{"id":"https://openalex.org/W4398853536","doi":"https://doi.org/10.48550/arxiv.2405.14646","title":"Unveiling the Achilles' Heel of NLG Evaluators: A Unified Adversarial Framework Driven by Large Language Models","display_name":"Unveiling the Achilles' Heel of NLG Evaluators: A Unified Adversarial Framework Driven by Large Language Models","publication_year":2024,"publication_date":"2024-05-23","ids":{"openalex":"https://openalex.org/W4398853536","doi":"https://doi.org/10.48550/arxiv.2405.14646"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.14646","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.14646","pdf_url":"https://arxiv.org/pdf/2405.14646","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.14646","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100333361","display_name":"Yiming Chen","orcid":"https://orcid.org/0000-0002-8894-2902"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Yiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374175","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0003-2762-2726"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102633211","display_name":"Danqing Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Danqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024161267","display_name":"Luis Fernando D\u2019Haro","orcid":"https://orcid.org/0000-0002-3411-7384"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"D'Haro, Luis Fernando","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111887396","display_name":"Robby T. Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Robby T.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032690182","display_name":"Haizhou Li","orcid":"https://orcid.org/0000-0001-9158-9401"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haizhou","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100333361"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6657000184059143,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6657000184059143,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.5917999744415283,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7452848553657532},{"id":"https://openalex.org/keywords/heel","display_name":"Heel","score":0.6941893100738525},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.519711971282959},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4377087354660034},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3882458209991455},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3266216516494751},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.13355901837348938},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.11279299855232239}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7452848553657532},{"id":"https://openalex.org/C2777248721","wikidata":"https://www.wikidata.org/wiki/Q174647","display_name":"Heel","level":2,"score":0.6941893100738525},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.519711971282959},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4377087354660034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3882458209991455},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3266216516494751},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13355901837348938},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.11279299855232239},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.14646","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.14646","pdf_url":"https://arxiv.org/pdf/2405.14646","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2405.14646","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.14646","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.14646","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.14646","pdf_url":"https://arxiv.org/pdf/2405.14646","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1204765605","display_name":null,"funder_award_id":"CIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G1320060331","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/50110001103","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G1598597440","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/5011000110","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G1622782223","display_name":null,"funder_award_id":"MCIN/AEI/10","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G1643949827","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G1815864298","display_name":null,"funder_award_id":"CIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G2262748287","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G2394899866","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320322138","funder_display_name":"Universidad Polit\u00e9cnica de Madrid"},{"id":"https://openalex.org/G2571741393","display_name":"Improving social competences of virtual agents through artificial consciousness based on the Attention Schema Theory","funder_award_id":"101071191","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G2601397997","display_name":null,"funder_award_id":"10.13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G2969783435","display_name":null,"funder_award_id":"CIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G300979063","display_name":null,"funder_award_id":"10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3219925899","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3429648993","display_name":null,"funder_award_id":"PID202","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3681454997","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4042783231","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4045093968","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G410571670","display_name":null,"funder_award_id":"10.13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G4126322094","display_name":null,"funder_award_id":"01100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G451917667","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5080475149","display_name":null,"funder_award_id":"10.13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G5539308778","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G5967599077","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6090738933","display_name":null,"funder_award_id":"ERDF A way of making Europe","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6378456299","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320313831","funder_display_name":"Comunidad de Madrid"},{"id":"https://openalex.org/G661330594","display_name":null,"funder_award_id":"00110","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6685425346","display_name":null,"funder_award_id":"0011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6703157376","display_name":null,"funder_award_id":"GENIUS","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6823456974","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6838289232","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/50110001103","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G7084143925","display_name":null,"funder_award_id":"AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7266728691","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7535663061","display_name":null,"funder_award_id":"AEI/10.","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8260616629","display_name":null,"funder_award_id":"011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8638682022","display_name":null,"funder_award_id":"10.13039/50110001103","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8695128545","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320313831","display_name":"Comunidad de Madrid","ror":null},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320322138","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null},{"id":"https://openalex.org/F4320336569","display_name":"Shenzhen Science and Technology Innovation Program","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4398853536.pdf","grobid_xml":"https://content.openalex.org/works/W4398853536.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2502115930","https://openalex.org/W2482350142","https://openalex.org/W4246396837","https://openalex.org/W3126451824","https://openalex.org/W1561927205","https://openalex.org/W3191453585","https://openalex.org/W4297672492","https://openalex.org/W4310988119"],"abstract_inverted_index":{"The":[0,143],"automatic":[1],"evaluation":[2,50],"of":[3,29,89,154],"natural":[4],"language":[5,91],"generation":[6,96],"(NLG)":[7],"systems":[8],"presents":[9],"a":[10,59],"long-lasting":[11],"challenge.":[12],"Recent":[13],"studies":[14],"have":[15],"highlighted":[16],"various":[17,155],"neural":[18],"metrics":[19],"that":[20,74,146],"align":[21],"well":[22],"with":[23,116],"human":[24,79],"evaluations.":[25],"Yet,":[26],"the":[27,40,54,86,105,119],"robustness":[28],"these":[30],"evaluators":[31,130],"against":[32,64],"adversarial":[33,45,62],"perturbations":[34],"remains":[35],"largely":[36],"under-explored":[37],"due":[38],"to":[39,71,150],"unique":[41],"challenges":[42],"in":[43,94],"obtaining":[44],"data":[46,73,106,112],"for":[47],"different":[48],"NLG":[49,65,133],"tasks.":[51],"To":[52],"address":[53],"problem,":[55],"we":[56,99],"introduce":[57],"AdvEval,":[58],"novel":[60],"black-box":[61],"framework":[63],"evaluators.":[66,82],"AdvEval":[67,147],"is":[68],"specially":[69],"tailored":[70],"generate":[72],"yield":[75],"strong":[76,101],"disagreements":[77],"between":[78],"and":[80,97,108,121,131,140],"victim":[81,122,129,156],"Specifically,":[83],"inspired":[84],"by":[85],"recent":[87],"success":[88],"large":[90],"models":[92],"(LLMs)":[93],"text":[95],"evaluation,":[98],"adopt":[100],"LLMs":[102],"as":[103],"both":[104],"generator":[107],"gold":[109,120],"evaluator.":[110,123],"Adversarial":[111],"are":[113],"automatically":[114],"optimized":[115],"feedback":[117],"from":[118],"We":[124],"conduct":[125],"experiments":[126],"on":[127],"12":[128],"11":[132],"datasets,":[134],"spanning":[135],"tasks":[136],"including":[137],"dialogue,":[138],"summarization,":[139],"question":[141],"evaluation.":[142],"results":[144],"show":[145],"can":[148],"lead":[149],"significant":[151],"performance":[152],"degradation":[153],"metrics,":[157],"thereby":[158],"validating":[159],"its":[160],"efficacy.":[161]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2024-05-25T00:00:00"}
