{"id":"https://openalex.org/W4416640124","doi":"https://doi.org/10.48550/arxiv.2511.13169","title":"TCM-5CEval: Extended Deep Evaluation Benchmark for LLM's Comprehensive Clinical Research Competence in Traditional Chinese Medicine","display_name":"TCM-5CEval: Extended Deep Evaluation Benchmark for LLM's Comprehensive Clinical Research Competence in Traditional Chinese Medicine","publication_year":2025,"publication_date":"2025-11-17","ids":{"openalex":"https://openalex.org/W4416640124","doi":"https://doi.org/10.48550/arxiv.2511.13169"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2511.13169","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.13169","pdf_url":"https://arxiv.org/pdf/2511.13169","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2511.13169","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Huang, Tianai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Tianai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012630042","display_name":"Jiayuan Chen","orcid":"https://orcid.org/0000-0003-3122-6726"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiayuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013657813","display_name":"Lu Lu","orcid":"https://orcid.org/0000-0002-8834-1947"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101979965","display_name":"Pengcheng Chen","orcid":"https://orcid.org/0000-0003-2018-5594"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Pengcheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015782528","display_name":"T. Li","orcid":"https://orcid.org/0009-0006-6143-5018"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tianbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100690518","display_name":"Bing Han","orcid":"https://orcid.org/0000-0002-6473-0438"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Bing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025181835","display_name":"Wenchao Tang","orcid":"https://orcid.org/0000-0002-1889-9013"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Wenchao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052532450","display_name":"Jie Xu","orcid":"https://orcid.org/0000-0001-9233-4363"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100612727","display_name":"Ming Li","orcid":"https://orcid.org/0000-0002-4411-7791"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ming","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12647","display_name":"Traditional Chinese Medicine Studies","score":0.6409000158309937,"subfield":{"id":"https://openalex.org/subfields/2707","display_name":"Complementary and alternative medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T12647","display_name":"Traditional Chinese Medicine Studies","score":0.6409000158309937,"subfield":{"id":"https://openalex.org/subfields/2707","display_name":"Complementary and alternative medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.18170000612735748,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.01979999989271164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/competence","display_name":"Competence (human resources)","score":0.5245000123977661},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.504800021648407},{"id":"https://openalex.org/keywords/traditional-chinese-medicine","display_name":"Traditional Chinese medicine","score":0.4652000069618225},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.42080000042915344},{"id":"https://openalex.org/keywords/literacy","display_name":"Literacy","score":0.4106999933719635},{"id":"https://openalex.org/keywords/core-competency","display_name":"Core competency","score":0.34060001373291016},{"id":"https://openalex.org/keywords/alternative-medicine","display_name":"Alternative medicine","score":0.3303000032901764},{"id":"https://openalex.org/keywords/china","display_name":"China","score":0.31439998745918274}],"concepts":[{"id":"https://openalex.org/C100521375","wikidata":"https://www.wikidata.org/wiki/Q2015382","display_name":"Competence (human resources)","level":2,"score":0.5245000123977661},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.504800021648407},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.48910000920295715},{"id":"https://openalex.org/C188947578","wikidata":"https://www.wikidata.org/wiki/Q200253","display_name":"Traditional Chinese medicine","level":3,"score":0.4652000069618225},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C547764534","wikidata":"https://www.wikidata.org/wiki/Q8236","display_name":"Literacy","level":2,"score":0.4106999933719635},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3928999900817871},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.36399999260902405},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.36399999260902405},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3626999855041504},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3537999987602234},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3483999967575073},{"id":"https://openalex.org/C164065428","wikidata":"https://www.wikidata.org/wiki/Q1201929","display_name":"Core competency","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C204787440","wikidata":"https://www.wikidata.org/wiki/Q188504","display_name":"Alternative medicine","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C191935318","wikidata":"https://www.wikidata.org/wiki/Q148","display_name":"China","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C509550671","wikidata":"https://www.wikidata.org/wiki/Q126945","display_name":"Medical education","level":1,"score":0.29490000009536743},{"id":"https://openalex.org/C3018428822","wikidata":"https://www.wikidata.org/wiki/Q7850","display_name":"Chinese language","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C2778843546","wikidata":"https://www.wikidata.org/wiki/Q3097973","display_name":"Health literacy","level":3,"score":0.2847999930381775},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2750000059604645},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C2779318504","wikidata":"https://www.wikidata.org/wiki/Q1438035","display_name":"Research design","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C2986762682","wikidata":"https://www.wikidata.org/wiki/Q964754","display_name":"Basic research","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2511.13169","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.13169","pdf_url":"https://arxiv.org/pdf/2511.13169","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2511.13169","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.13169","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2511.13169","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.13169","pdf_url":"https://arxiv.org/pdf/2511.13169","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2,107,117],"(LLMs)":[3],"have":[4],"demonstrated":[5],"exceptional":[6],"capabilities":[7,186],"in":[8,14,120,140,187,194,216],"general":[9],"domains,":[10],"yet":[11],"their":[12,195],"application":[13],"highly":[15],"specialized":[16],"and":[17,27,43,55,85,104,110,169,202],"culturally-rich":[18],"fields":[19],"like":[20,108],"Traditional":[21],"Chinese":[22,81],"Medicine":[23],"(TCM)":[24],"requires":[25],"rigorous":[26],"nuanced":[28],"evaluation.":[29],"Building":[30],"upon":[31],"prior":[32],"foundational":[33,122],"work":[34],"such":[35],"as":[36],"TCM-3CEval,":[37],"which":[38],"highlighted":[39],"systemic":[40],"knowledge":[41],"gaps":[42],"the":[44,127,147,210,217],"importance":[45],"of":[46,96,130,172],"cultural-contextual":[47],"alignment,":[48],"we":[49],"introduce":[50],"TCM-5CEval,":[51],"a":[52,93,151,163,170,179],"more":[53,180],"granular":[54],"comprehensive":[56],"benchmark.":[57],"TCM-5CEval":[58,175,205],"is":[59],"designed":[60],"to":[61,166,209],"assess":[62],"LLMs":[63],"across":[64],"five":[65],"critical":[66],"dimensions:":[67],"(1)":[68],"Core":[69],"Knowledge":[70],"(TCM-Exam),":[71],"(2)":[72],"Classical":[73],"Literacy":[74],"(TCM-LitQA),":[75],"(3)":[76],"Clinical":[77,87],"Decision-making":[78],"(TCM-MRCD),":[79],"(4)":[80],"Materia":[82],"Medica":[83],"(TCM-CMM),":[84],"(5)":[86],"Non-pharmacological":[88],"Therapy":[89],"(TCM-ClinNPT).":[90],"We":[91],"conducted":[92],"thorough":[94],"evaluation":[95],"fifteen":[97],"prominent":[98],"LLMs,":[99],"revealing":[100],"significant":[101],"performance":[102,153],"disparities":[103],"identifying":[105],"top-performing":[106],"deepseek\\_r1":[109],"gemini\\_2\\_5\\_pro.":[111],"Our":[112],"findings":[113],"show":[114],"that":[115],"while":[116],"exhibit":[118],"proficiency":[119],"recalling":[121],"knowledge,":[123],"they":[124],"struggle":[125],"with":[126,157],"interpretative":[128],"complexities":[129],"classical":[131],"texts.":[132],"Critically,":[133],"permutation-based":[134],"consistency":[135],"testing":[136],"reveals":[137],"widespread":[138],"fragilities":[139],"model":[141],"inference.":[142],"All":[143],"evaluated":[144],"models,":[145],"including":[146],"highest-scoring":[148],"ones,":[149],"displayed":[150],"substantial":[152],"degradation":[154],"when":[155],"faced":[156],"varied":[158],"question":[159],"option":[160],"ordering,":[161],"indicating":[162],"pervasive":[164],"sensitivity":[165],"positional":[167],"bias":[168],"lack":[171],"robust":[173],"understanding.":[174],"not":[176],"only":[177],"provides":[178],"detailed":[181],"diagnostic":[182],"tool":[183],"for":[184,220],"LLM":[185],"TCM":[188,222],"but":[189],"aldso":[190],"exposes":[191],"fundamental":[192],"weaknesses":[193],"reasoning":[196],"stability.":[197],"To":[198],"promote":[199],"further":[200],"research":[201],"standardized":[203],"comparison,":[204],"has":[206],"been":[207],"uploaded":[208],"Medbench":[211],"platform,":[212],"joining":[213],"its":[214],"predecessor":[215],"\"In-depth":[218],"Challenge":[219],"Comprehensive":[221],"Abilities\"":[223],"special":[224],"track.":[225]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-11-19T00:00:00"}
