{"id":"https://openalex.org/W4393147013","doi":"https://doi.org/10.1609/aaai.v38i17.29923","title":"A Comprehensive Analysis of the Effectiveness of Large Language Models as Automatic Dialogue Evaluators","display_name":"A Comprehensive Analysis of the Effectiveness of Large Language Models as Automatic Dialogue Evaluators","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393147013","doi":"https://doi.org/10.1609/aaai.v38i17.29923"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v38i17.29923","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29923","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29923/31613","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29923/31613","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100374101","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0002-2406-8734"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Chen Zhang","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024161267","display_name":"Luis Fernando D\u2019Haro","orcid":"https://orcid.org/0000-0002-3411-7384"},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Luis Fernando D'Haro","raw_affiliation_strings":["Speech Technology Group - Universidad Polit\u00e9cnica de Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Speech Technology Group - Universidad Polit\u00e9cnica de Madrid, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100333361","display_name":"Yiming Chen","orcid":"https://orcid.org/0000-0002-8894-2902"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yiming Chen","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031886937","display_name":"Malu Zhang","orcid":"https://orcid.org/0000-0002-2345-0974"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Malu Zhang","raw_affiliation_strings":["University of Electronic Science and Technology of China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Haizhou Li","raw_affiliation_strings":["National University of Singapore\nThe Chinese University of Hong Kong (Shenzhen), China"],"affiliations":[{"raw_affiliation_string":"National University of Singapore\nThe Chinese University of Hong Kong (Shenzhen), China","institution_ids":["https://openalex.org/I4210116924","https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100374101"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":1.6082,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83677686,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"38","issue":"17","first_page":"19515","last_page":"19524"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.957099974155426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.957099974155426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9107999801635742,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.593045711517334},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5104112029075623},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3707125186920166}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.593045711517334},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5104112029075623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3707125186920166}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v38i17.29923","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29923","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29923/31613","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v38i17.29923","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v38i17.29923","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/29923/31613","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4393147013.pdf"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W2328886022","https://openalex.org/W2916772188","https://openalex.org/W2963903950","https://openalex.org/W3015487035","https://openalex.org/W3023366413","https://openalex.org/W3034424015","https://openalex.org/W3034808773","https://openalex.org/W3034950505","https://openalex.org/W3036394672","https://openalex.org/W3172728711","https://openalex.org/W3199246459","https://openalex.org/W3201300710","https://openalex.org/W3211384372","https://openalex.org/W4200634294","https://openalex.org/W4205686084","https://openalex.org/W4221138568","https://openalex.org/W4221143046","https://openalex.org/W4221151028","https://openalex.org/W4221160645","https://openalex.org/W4221167741","https://openalex.org/W4224308101","https://openalex.org/W4226120574","https://openalex.org/W4226278401","https://openalex.org/W4229005866","https://openalex.org/W4285188834","https://openalex.org/W4285273040","https://openalex.org/W4285292592","https://openalex.org/W4286987939","https://openalex.org/W4287891007","https://openalex.org/W4288567412","https://openalex.org/W4289447057","https://openalex.org/W4292779060","https://openalex.org/W4307079201","https://openalex.org/W4307479606","https://openalex.org/W4311642023","https://openalex.org/W4311991106","https://openalex.org/W4318719686","https://openalex.org/W4319793767","https://openalex.org/W4322718191","https://openalex.org/W4362598574","https://openalex.org/W4362655261","https://openalex.org/W4362655426","https://openalex.org/W4366735744","https://openalex.org/W4367000491","https://openalex.org/W4378464977","https://openalex.org/W4380136143","https://openalex.org/W4380353763","https://openalex.org/W4384662964","https://openalex.org/W4384918448","https://openalex.org/W4385573116","https://openalex.org/W4385734161","https://openalex.org/W4389009519","https://openalex.org/W4389009545","https://openalex.org/W4389519254","https://openalex.org/W6800875267","https://openalex.org/W6840678358","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Automatic":[0],"evaluation":[1,71,123,173],"is":[2],"an":[3],"integral":[4],"aspect":[5],"of":[6,79,82,86,89,112,125,140,149],"dialogue":[7,22,70,116,134,161],"system":[8],"research.":[9],"The":[10],"traditional":[11],"reference-based":[12],"NLG":[13],"metrics":[14,33],"are":[15,53,72,177],"generally":[16],"found":[17],"to":[18,55],"be":[19,56],"unsuitable":[20],"for":[21,59,68,114],"assessment.":[23],"Consequently,":[24],"recent":[25],"studies":[26],"have":[27],"suggested":[28],"various":[29,154],"unique,":[30],"reference-free":[31],"neural":[32],"that":[34],"better":[35],"align":[36],"with":[37],"human":[38,60],"evaluations.":[39],"Notably":[40],"among":[41],"them,":[42],"large":[43],"language":[44],"models":[45],"(LLMs),":[46],"particularly":[47],"the":[48,80,110,121,147,150,172],"instruction-tuned":[49],"variants":[50],"like":[51],"ChatGPT,":[52],"shown":[54],"promising":[57],"substitutes":[58],"judges.":[61],"Yet,":[62],"existing":[63],"works":[64],"on":[65,109],"utilizing":[66],"LLMs":[67,99,113,129,151],"automatic":[69,115],"limited":[73],"in":[74,77,152],"their":[75],"scope":[76],"terms":[78],"number":[81],"meta-evaluation":[83,142],"datasets,":[84],"mode":[85],"evaluation,":[87],"coverage":[88],"LLMs,":[90],"etc.":[91],"Hence,":[92],"it":[93],"remains":[94],"inconclusive":[95],"how":[96,166],"effective":[97],"these":[98],"are.":[100],"To":[101],"this":[102],"end,":[103],"we":[104,119,145,164],"conduct":[105],"a":[106,137],"comprehensive":[107,138],"study":[108],"application":[111],"evaluation.":[117],"Specifically,":[118],"analyze":[120],"multi-dimensional":[122],"capability":[124],"30":[126],"recently":[127],"emerged":[128],"at":[130,157,179],"both":[131,158],"turn":[132,159],"and":[133,160,168],"levels,":[135],"using":[136],"set":[139],"12":[141],"datasets.":[143],"Additionally,":[144],"probe":[146],"robustness":[148],"handling":[153],"adversarial":[155],"perturbations":[156],"levels.":[162],"Finally,":[163],"explore":[165],"model-level":[167],"dimension-level":[169],"ensembles":[170],"impact":[171],"performance.":[174],"All":[175],"resources":[176],"available":[178],"https://github.com/e0397123/comp-analysis.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
