{"id":"https://openalex.org/W4414758332","doi":"https://doi.org/10.48550/arxiv.2509.04455","title":"INSEva: A Comprehensive Chinese Benchmark for Large Language Models in Insurance","display_name":"INSEva: A Comprehensive Chinese Benchmark for Large Language Models in Insurance","publication_year":2025,"publication_date":"2025-08-27","ids":{"openalex":"https://openalex.org/W4414758332","doi":"https://doi.org/10.48550/arxiv.2509.04455"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.04455","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.04455","pdf_url":"https://arxiv.org/pdf/2509.04455","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.04455","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085175204","display_name":"Shisong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Shisong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103011307","display_name":"Qian Zhu","orcid":"https://orcid.org/0009-0001-2932-6202"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Qian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100745800","display_name":"Wenyan Yang","orcid":"https://orcid.org/0009-0003-5235-4783"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Wenyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071511791","display_name":"Chengyi Yang","orcid":"https://orcid.org/0000-0002-7023-7543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Chengyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103240906","display_name":"Wang Zhong","orcid":"https://orcid.org/0009-0005-4244-3496"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338698","display_name":"Ping Wang","orcid":"https://orcid.org/0000-0003-0033-4150"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Lin, Xuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Xuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102800174","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0002-3640-0562"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050988122","display_name":"Danni Li","orcid":"https://orcid.org/0009-0007-5595-9138"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Daqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019113664","display_name":"Chao Yuan","orcid":"https://orcid.org/0009-0002-1864-2792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004450665","display_name":"Li-Cai Qi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi, Licai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102177776","display_name":"Wanqing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Wanqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"zhenxing, sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"zhenxing, sun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069884799","display_name":"Lin Xin","orcid":"https://orcid.org/0000-0002-4589-8593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119819106","display_name":"Shiqiang Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Shiqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101805443","display_name":"Chao Chen","orcid":"https://orcid.org/0000-0003-3911-8711"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109262530","display_name":"H. B. Hu","orcid":"https://orcid.org/0000-0003-1089-0302"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Haixiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5090455375","display_name":"Yanghua Xiao","orcid":"https://orcid.org/0000-0001-8403-9591"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Yanghua","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5085175204"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12394","display_name":"Insurance and Financial Risk Management","score":0.7918000221252441,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12394","display_name":"Insurance and Financial Risk Management","score":0.7918000221252441,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7583000063896179},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.47290000319480896},{"id":"https://openalex.org/keywords/completeness","display_name":"Completeness (order theory)","score":0.4586000144481659},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45249998569488525},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.42590001225471497},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3896999955177307},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.3677000105381012}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7583000063896179},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6761999726295471},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.47290000319480896},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.4586000144481659},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.42590001225471497},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39489999413490295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3935999870300293},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3896999955177307},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3546000123023987},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.2989000082015991},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25999999046325684},{"id":"https://openalex.org/C68799949","wikidata":"https://www.wikidata.org/wiki/Q977871","display_name":"Insurance policy","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.04455","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.04455","pdf_url":"https://arxiv.org/pdf/2509.04455","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.04455","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.04455","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.04455","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.04455","pdf_url":"https://arxiv.org/pdf/2509.04455","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Insurance,":[0],"as":[1],"a":[2,50,67],"critical":[3],"component":[4],"of":[5,13,39,108],"the":[6,34,40],"global":[7],"financial":[8],"system,":[9],"demands":[10],"high":[11],"standards":[12],"accuracy":[14],"and":[15,37,61,78,100],"reliability":[16],"in":[17,63,102,139],"AI":[18,24,58],"applications.":[19],"While":[20,123],"existing":[21],"benchmarks":[22],"evaluate":[23],"capabilities":[25,62],"across":[26,120],"various":[27],"domains,":[28],"they":[29],"often":[30],"fail":[31],"to":[32],"capture":[33],"unique":[35],"characteristics":[36],"requirements":[38],"insurance":[41,128,143],"domain.":[42],"To":[43],"address":[44],"this":[45],"gap,":[46],"we":[47,115],"present":[48],"INSEva,":[49],"comprehensive":[51],"Chinese":[52],"benchmark":[53,91,146],"specifically":[54],"designed":[55],"for":[56,96],"evaluating":[57],"systems'":[59],"knowledge":[60],"insurance.":[64],"INSEva":[65],"features":[66],"multi-dimensional":[68],"evaluation":[69,84,94,107],"taxonomy":[70],"covering":[71],"business":[72],"areas,":[73],"task":[74],"formats,":[75],"difficulty":[76],"levels,":[77],"cognitive-knowledge":[79],"dimension,":[80],"comprising":[81],"38,704":[82],"high-quality":[83],"examples":[85],"sourced":[86],"from":[87],"authoritative":[88],"materials.":[89],"Our":[90],"implements":[92],"tailored":[93],"methods":[95],"assessing":[97],"both":[98],"faithfulness":[99],"completeness":[101],"open-ended":[103],"responses.":[104],"Through":[105],"extensive":[106],"8":[109],"state-of-the-art":[110],"Large":[111],"Language":[112],"Models":[113],"(LLMs),":[114],"identify":[116],"significant":[117],"performance":[118],"variations":[119],"different":[121],"dimensions.":[122],"general":[124],"LLMs":[125],"demonstrate":[126],"basic":[127],"domain":[129],"competency":[130],"with":[131],"average":[132],"scores":[133],"above":[134],"80,":[135],"substantial":[136],"gaps":[137],"remain":[138],"handling":[140],"complex,":[141],"real-world":[142],"scenarios.":[144],"The":[145],"will":[147],"be":[148],"public":[149],"soon.":[150]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
