{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T04:42:05Z","timestamp":1772944925343,"version":"3.50.1"},"reference-count":69,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001348","name":"A*STAR Research Entities","doi-asserted-by":"publisher","award":["H20C6a0032"],"award-info":[{"award-number":["H20C6a0032"]}],"id":[{"id":"10.13039\/501100001348","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001348","name":"A*STAR Research Entities","doi-asserted-by":"publisher","award":["A20H4b0141"],"award-info":[{"award-number":["A20H4b0141"]}],"id":[{"id":"10.13039\/501100001348","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001381","name":"National Research Foundation Singapore","doi-asserted-by":"publisher","award":["AISG2-TC-2021-003"],"award-info":[{"award-number":["AISG2-TC-2021-003"]}],"id":[{"id":"10.13039\/501100001381","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Fusion"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1016\/j.inffus.2025.103631","type":"journal-article","created":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T02:39:37Z","timestamp":1755743977000},"page":"103631","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"PB","title":["EH-Benchmark: Ophthalmic hallucination benchmark and agent-driven top-down traceable reasoning workflow"],"prefix":"10.1016","volume":"126","author":[{"given":"Xiaoyu","family":"Pan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3324-0591","authenticated-orcid":false,"given":"Yang","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Ke","family":"Zou","sequence":"additional","affiliation":[]},{"given":"Yang","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Huazhu","family":"Fu","sequence":"additional","affiliation":[]},{"given":"Yih-Chung","family":"Tham","sequence":"additional","affiliation":[]},{"given":"Yong","family":"Liu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.inffus.2025.103631_b1","doi-asserted-by":"crossref","first-page":"771","DOI":"10.1038\/eye.2017.1","article-title":"Surveillance of sight loss due to delay in ophthalmic treatment or review: frequency, cause and outcome","volume":"31","author":"Foot","year":"2017","journal-title":"Eye"},{"key":"10.1016\/j.inffus.2025.103631_b2","doi-asserted-by":"crossref","DOI":"10.2196\/59505","article-title":"Multimodal large language models in health care: applications, challenges, and future outlook","volume":"26","author":"AlSaad","year":"2024","journal-title":"J. Med. Internet Res."},{"issue":"8","key":"10.1016\/j.inffus.2025.103631_b3","doi-asserted-by":"crossref","first-page":"1930","DOI":"10.1038\/s41591-023-02448-8","article-title":"Large language models in medicine","volume":"29","author":"Thirunavukarasu","year":"2023","journal-title":"Nature Med."},{"key":"10.1016\/j.inffus.2025.103631_b4","series-title":"Self-supervised quantized representation for seamlessly integrating knowledge graphs with large language models","author":"Lin","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b5","series-title":"Chatkbqa: A generate-then-retrieve framework for knowledge base question answering with fine-tuned large language models","author":"Luo","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b6","first-page":"8250","article-title":"Llm-rg4: Flexible and factual radiology report generation across diverse input contexts","volume":"vol. 39","author":"Wang","year":"2025"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b7","doi-asserted-by":"crossref","first-page":"2258","DOI":"10.1038\/s41467-025-57426-0","article-title":"Towards a holistic framework for multimodal LLM in 3D brain CT radiology report generation","volume":"16","author":"Li","year":"2025","journal-title":"Nat. Commun."},{"key":"10.1016\/j.inffus.2025.103631_b8","series-title":"LLM-based personalized recommendations in health","author":"Galitsky","year":"2024"},{"issue":"11","key":"10.1016\/j.inffus.2025.103631_b9","doi-asserted-by":"crossref","DOI":"10.1001\/jamanetworkopen.2023.43689","article-title":"Leveraging large language models for decision support in personalized oncology","volume":"6","author":"Benary","year":"2023","journal-title":"JAMA Netw. Open"},{"key":"10.1016\/j.inffus.2025.103631_b10","article-title":"Knowledge fusion in deep learning-based medical vision-language models: A review","author":"Xu","year":"2025","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.inffus.2025.103631_b11","series-title":"An integrated language-vision foundation model for conversational diagnostics and triaging in primary eye care","author":"Da Soh","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b12","series-title":"EyecareGPT: Boosting comprehensive ophthalmology understanding with tailored dataset, benchmark and model","author":"Li","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b13","doi-asserted-by":"crossref","unstructured":"X. Yue, Y. Ni, K. Zhang, T. Zheng, R. Liu, G. Zhang, S. Stevens, D. Jiang, W. Ren, Y. Sun, et al., Mmmu: A massive multi-discipline multimodal understanding and reasoning benchmark for expert agi, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 9556\u20139567.","DOI":"10.1109\/CVPR52733.2024.00913"},{"key":"10.1016\/j.inffus.2025.103631_b14","series-title":"Mathvista: Evaluating mathematical reasoning of foundation models in visual contexts","author":"Lu","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b15","doi-asserted-by":"crossref","DOI":"10.1109\/TPAMI.2024.3437288","article-title":"Unk-vqa: A dataset and a probe into the abstention ability of multi-modal large models","author":"Guo","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.inffus.2025.103631_b16","series-title":"LLMEval-Med: A real-world clinical benchmark for medical LLMs with physician validation","author":"Zhang","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b17","series-title":"MedHEval: Benchmarking hallucinations and mitigation strategies in medical large vision-language models","author":"Chang","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b18","series-title":"Localizing before answering: A hallucination evaluation benchmark for grounded medical multimodal LLMs","author":"Nguyen","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b19","series-title":"FunBench: Benchmarking fundus reading skills of MLLMs","author":"Wei","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b20","series-title":"Lmod: A large multimodal ophthalmology dataset and benchmark for large vision-language models","author":"Qin","year":"2024"},{"issue":"2","key":"10.1016\/j.inffus.2025.103631_b21","doi-asserted-by":"crossref","DOI":"10.1007\/s11432-024-4222-0","article-title":"The rise and potential of large language model based agents: A survey","volume":"68","author":"Xi","year":"2025","journal-title":"Sci. China Inf. Sci."},{"key":"10.1016\/j.inffus.2025.103631_b22","series-title":"Multi-agent collaboration: Harnessing the power of intelligent llm agents","author":"Talebirad","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b23","unstructured":"Z. Liu, Y. Zhang, P. Li, Y. Liu, D. Yang, A dynamic LLM-powered agent network for task-oriented agent collaboration, in: First Conference on Language Modeling, 2024."},{"key":"10.1016\/j.inffus.2025.103631_b24","series-title":"Embodied llm agents learn to cooperate in organized teams","author":"Guo","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b25","series-title":"Llm agents making agent tools","author":"W\u00f6lflein","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b26","unstructured":"X. Wang, Y. Chen, L. Yuan, Y. Zhang, Y. Li, H. Peng, H. Ji, Executable code actions elicit better llm agents, in: Forty-First International Conference on Machine Learning, 2024."},{"key":"10.1016\/j.inffus.2025.103631_b27","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.102963","article-title":"A survey of large language models for healthcare: from data, technology, and applications to accountability and ethics","author":"He","year":"2025","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.inffus.2025.103631_b28","article-title":"Has multimodal learning delivered universal intelligence in healthcare? A comprehensive survey","author":"Lin","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.inffus.2025.103631_b29","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"368","article-title":"Sam-u: Multi-box prompts triggered uncertainty estimation for reliable sam in medical image","author":"Deng","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b30","series-title":"Medsam-u: Uncertainty-guided auto multi-prompt adaptation for reliable medsam","author":"Zhou","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b31","first-page":"1","article-title":"A generalist vision\u2013language foundation model for diverse biomedical tasks","author":"Zhang","year":"2024","journal-title":"Nature Med."},{"key":"10.1016\/j.inffus.2025.103631_b32","series-title":"MedGemma hugging face","author":"Google","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b33","first-page":"28541","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume":"36","author":"Li","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103631_b34","article-title":"Cross-modal knowledge diffusion-based generation for difference-aware medical VQA","author":"Lin","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.inffus.2025.103631_b35","series-title":"HuatuoGPT-vision, towards injecting medical visual knowledge into multimodal LLMs at scale","author":"Chen","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b36","series-title":"Evaluating the quality of hallucination benchmarks for large vision-language models","author":"Yan","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b37","series-title":"Medvh: Towards systematic evaluation of hallucination for large vision language models in the medical context","author":"Gu","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b38","series-title":"Detecting and evaluating medical hallucinations in large vision language models","author":"Chen","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b39","series-title":"MedHallBench: A new benchmark for assessing hallucination in medical large language models","author":"Zuo","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b40","series-title":"2020 25th International Conference on Pattern Recognition","first-page":"7403","article-title":"Learn to segment retinal lesions and beyond","author":"Wei","year":"2021"},{"key":"10.1016\/j.inffus.2025.103631_b41","doi-asserted-by":"crossref","first-page":"511","DOI":"10.1016\/j.ins.2019.06.011","article-title":"Diagnostic assessment of deep learning algorithms for diabetic retinopathy screening","volume":"501","author":"Li","year":"2019","journal-title":"Inform. Sci."},{"issue":"3","key":"10.1016\/j.inffus.2025.103631_b42","doi-asserted-by":"crossref","first-page":"25","DOI":"10.3390\/data3030025","article-title":"Indian diabetic retinopathy image dataset (IDRiD): a database for diabetic retinopathy screening research","volume":"3","author":"Porwal","year":"2018","journal-title":"Data"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b43","doi-asserted-by":"crossref","first-page":"769","DOI":"10.1038\/s41597-023-02675-1","article-title":"Oimhs: An optical coherence tomography image dataset based on macular hole manual segmentation","volume":"10","author":"Ye","year":"2023","journal-title":"Sci. Data"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b44","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1038\/s41597-024-04259-z","article-title":"OCT5k: A dataset of multi-disease and multi-graded annotations for retinal layers","volume":"12","author":"Arikan","year":"2025","journal-title":"Sci. Data"},{"key":"10.1016\/j.inffus.2025.103631_b45","series-title":"2020 International Joint Conference on Neural Networks","first-page":"1","article-title":"G1020: A benchmark retinal fundus image dataset for computer-aided glaucoma detection","author":"Bajwa","year":"2020"},{"key":"10.1016\/j.inffus.2025.103631_b46","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2018: 21st International Conference, Granada, Spain, September 16-20, 2018, Proceedings, Part II 11","first-page":"65","article-title":"Towards a glaucoma risk index based on simulated hemodynamics from fundus images","author":"Orlando","year":"2018"},{"issue":"3","key":"10.1016\/j.inffus.2025.103631_b47","doi-asserted-by":"crossref","first-page":"818","DOI":"10.1109\/TMI.2020.3037771","article-title":"A benchmark for studying diabetic retinopathy: segmentation, grading, and transferability","volume":"40","author":"Zhou","year":"2020","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.inffus.2025.103631_b48","series-title":"ODIR-5K - grand challenge","author":"ODIR-5K Dataset","year":"2019"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b49","doi-asserted-by":"crossref","first-page":"365","DOI":"10.1038\/s41597-024-03182-7","article-title":"Octdl: Optical coherence tomography dataset for image-based deep learning methods","volume":"11","author":"Kulyabin","year":"2024","journal-title":"Sci. Data"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b50","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1038\/s41597-023-02424-4","article-title":"GRAPE: A multi-modal dataset of longitudinal follow-up visual field and fundus images for glaucoma management","volume":"10","author":"Huang","year":"2023","journal-title":"Sci. Data"},{"issue":"1","key":"10.1016\/j.inffus.2025.103631_b51","doi-asserted-by":"crossref","first-page":"291","DOI":"10.1038\/s41597-022-01388-1","article-title":"PAPILA: Dataset with fundus images and clinical data of both eyes of the same patient for glaucoma assessment","volume":"9","author":"Kovalyk","year":"2022","journal-title":"Sci. Data"},{"issue":"6","key":"10.1016\/j.inffus.2025.103631_b52","doi-asserted-by":"crossref","first-page":"290","DOI":"10.4239\/wjd.v4.i6.290","article-title":"Classification of diabetic retinopathy and diabetic macular edema","volume":"4","author":"Wu","year":"2013","journal-title":"World J. Diabetes"},{"key":"10.1016\/j.inffus.2025.103631_b53","series-title":"Qwen2.5: A party of foundation models","author":"Team","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b54","series-title":"Qwen2 technical report","author":"Yang","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b55","series-title":"Qwen2.5-VL technical report","author":"Bai","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b56","series-title":"Qwen2-VL: Enhancing vision-language model\u2019s perception of the world at any resolution","author":"Wang","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b57","series-title":"Qwen-VL: A versatile vision-language model for understanding, localization, text reading, and beyond","author":"Bai","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b58","series-title":"LLaVA-NeXT: Improved reasoning, OCR, and world knowledge","author":"Liu","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b59","series-title":"Improved baselines with visual instruction tuning","author":"Liu","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b60","unstructured":"H. Liu, C. Li, Q. Wu, Y.J. Lee, Visual Instruction Tuning, in: NeurIPS, 2023."},{"key":"10.1016\/j.inffus.2025.103631_b61","series-title":"Gpt-4 technical report","author":"Achiam","year":"2023"},{"key":"10.1016\/j.inffus.2025.103631_b62","series-title":"Introducing GPT-4.1 in the API","author":"OpenAI","year":"2025"},{"key":"10.1016\/j.inffus.2025.103631_b63","series-title":"Expanding performance boundaries of open-source multimodal models with model, data, and test-time scaling","author":"Chen","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b64","series-title":"Mini-internvl: A flexible-transfer pocket multimodal model with 5% parameters and 90% performance","author":"Gao","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b65","series-title":"How far are we to GPT-4V? Closing the gap to commercial multimodal models with open-source suites","author":"Chen","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b66","doi-asserted-by":"crossref","unstructured":"Z. Chen, J. Wu, W. Wang, W. Su, G. Chen, S. Xing, M. Zhong, Q. Zhang, X. Zhu, L. Lu, et al., Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 24185\u201324198.","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"10.1016\/j.inffus.2025.103631_b67","series-title":"Enhancing the reasoning ability of multimodal large language models via mixed preference optimization","author":"Wang","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b68","series-title":"DeepSeek-V3 technical report","author":"DeepSeek-AI","year":"2024"},{"key":"10.1016\/j.inffus.2025.103631_b69","series-title":"Healthgpt: A medical large vision-language model for unifying comprehension and generation via heterogeneous knowledge adaptation","author":"Lin","year":"2025"}],"container-title":["Information Fusion"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1566253525007031?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1566253525007031?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T13:48:18Z","timestamp":1772891298000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1566253525007031"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":69,"alternative-id":["S1566253525007031"],"URL":"https:\/\/doi.org\/10.1016\/j.inffus.2025.103631","relation":{},"ISSN":["1566-2535"],"issn-type":[{"value":"1566-2535","type":"print"}],"subject":[],"published":{"date-parts":[[2026,2]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"EH-Benchmark: Ophthalmic hallucination benchmark and agent-driven top-down traceable reasoning workflow","name":"articletitle","label":"Article Title"},{"value":"Information Fusion","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.inffus.2025.103631","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"103631"}}