@article{Gao2026, 
author = {Binyu Gao and Qiongye Dong and Tianqi Tao and Congmin Zhu and Jun Huang and Hui Chen and Qiuying Yang and Honglei Liu},
title = {Leveraging Large Language Models to Enhance Medical Text Representation for Lung Diagnosis Prediction via Knowledge Infusion},
year = {2026},
journal = {Tsinghua Science and Technology},
volume = {31},
number = {1},
pages = {418-429},
keywords = {bidirectional encoder representations from transformers (BERT), knowledge infusion, large language models (LLMs), medical text representation, aided diagnosis},
url = {https://www.sciopen.com/article/10.26599/TST.2024.9010153},
doi = {10.26599/TST.2024.9010153},
abstract = {Medical text representation is crucial for medical natural language processing (NLP) applications. Bidirectional encoder representations from transformers (BERT) has achieved the state-of-the-art performance in general domain text representation. However, limited by the design of the pretraining task and the frequency of knowledge occurrence, it lacks understanding of medical knowledge. To overcome these problems, we proposed a selective knowledge extraction and fusion framework to enhance medical text representation. In the knowledge extraction phase, we first designed a semantic importance evaluation metric to extract internal knowledge. We then used large language models (LLMs) to extract external knowledge from systematized nomenclature of medicine clinical term (SNOMED CT). In the knowledge fusion phase, we utilized an attention mechanism and Siamese network to integrate internal knowledge and external knowledge. Extracting knowledge through large language models (LLMs) and integrating it into five different types of BERT models, we achieved significant improvements in the task of pulmonary disease text classification.}
}