{"id":"https://openalex.org/W2737041661","doi":"https://doi.org/10.18653/v1/d17-1238","title":"Why We Need New Evaluation Metrics for NLG","display_name":"Why We Need New Evaluation Metrics for NLG","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2737041661","doi":"https://doi.org/10.18653/v1/d17-1238","mag":"2737041661"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d17-1238","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1238","pdf_url":"https://www.aclweb.org/anthology/D17-1238.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D17-1238.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050825493","display_name":"Jekaterina Novikova","orcid":"https://orcid.org/0000-0003-4754-6126"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jekaterina Novikova","raw_affiliation_strings":["School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","School of Mathematical & Computer Sciences"],"affiliations":[{"raw_affiliation_string":"School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","institution_ids":[]},{"raw_affiliation_string":"School of Mathematical & Computer Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004829991","display_name":"Ond\u0159ej Du\u0161ek","orcid":"https://orcid.org/0000-0002-1415-1702"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ond\u0159ej Du\u0161ek","raw_affiliation_strings":["School of Mathematical & Computer Sciences"],"affiliations":[{"raw_affiliation_string":"School of Mathematical & Computer Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016745814","display_name":"Amanda Cercas Curry","orcid":"https://orcid.org/0000-0002-5576-2550"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amanda Cercas Curry","raw_affiliation_strings":["School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","School of Mathematical & Computer Sciences"],"affiliations":[{"raw_affiliation_string":"School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","institution_ids":[]},{"raw_affiliation_string":"School of Mathematical & Computer Sciences","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076593865","display_name":"Verena Rieser","orcid":"https://orcid.org/0000-0001-6117-4395"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Verena Rieser","raw_affiliation_strings":["School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","School of Mathematical & Computer Sciences"],"affiliations":[{"raw_affiliation_string":"School of Mathematical and Computer Sciences Heriot-Watt University, Edinburgh","institution_ids":[]},{"raw_affiliation_string":"School of Mathematical & Computer Sciences","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5050825493"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.7415,"has_fulltext":true,"cited_by_count":32,"citation_normalized_percentile":{"value":0.94585128,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2241","last_page":"2252"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8129361867904663},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6742547154426575},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.5674532651901245},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5326839089393616},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5205087065696716},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5099121332168579},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4701153337955475},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44412586092948914},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41929054260253906},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3812180161476135},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1464117467403412},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.06404131650924683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8129361867904663},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6742547154426575},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.5674532651901245},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5326839089393616},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5205087065696716},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5099121332168579},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4701153337955475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44412586092948914},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41929054260253906},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3812180161476135},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1464117467403412},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.06404131650924683},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/d17-1238","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1238","pdf_url":"https://www.aclweb.org/anthology/D17-1238.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1707.06875","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1707.06875","pdf_url":"https://arxiv.org/pdf/1707.06875","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.1707.06875","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1707.06875","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.18653/v1/d17-1238","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d17-1238","pdf_url":"https://www.aclweb.org/anthology/D17-1238.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 Conference on Empirical Methods in Natural\n          Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5899999737739563}],"awards":[{"id":"https://openalex.org/G4052565463","display_name":"MaDrIgAL: MultiDimensional Interaction management and Adaptive Learning","funder_award_id":"EP/N017536/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4466388606","display_name":null,"funder_award_id":"NVIDIA","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5709710992","display_name":null,"funder_award_id":"EP/N017536/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6629039517","display_name":null,"funder_award_id":"EP/M005429/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8495392564","display_name":"DILiGENt: Domain-Independent Language Generation","funder_award_id":"EP/M005429/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2737041661.pdf","grobid_xml":"https://content.openalex.org/works/W2737041661.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W638006253","https://openalex.org/W1489525520","https://openalex.org/W1552182777","https://openalex.org/W1956340063","https://openalex.org/W1982897610","https://openalex.org/W2021618504","https://openalex.org/W2034562813","https://openalex.org/W2062295023","https://openalex.org/W2098651115","https://openalex.org/W2101105183","https://openalex.org/W2123891489","https://openalex.org/W2128856065","https://openalex.org/W2140054881","https://openalex.org/W2149327368","https://openalex.org/W2164777277","https://openalex.org/W2250530145","https://openalex.org/W2251165062","https://openalex.org/W2328886022","https://openalex.org/W2418441884","https://openalex.org/W2483402000","https://openalex.org/W2518570122","https://openalex.org/W2530291685","https://openalex.org/W2578330760","https://openalex.org/W2745039414","https://openalex.org/W2949954138","https://openalex.org/W2951718774","https://openalex.org/W2951813108","https://openalex.org/W2962905474","https://openalex.org/W2963912046","https://openalex.org/W2963924362","https://openalex.org/W3105830849"],"related_works":["https://openalex.org/W2963672599","https://openalex.org/W2101105183","https://openalex.org/W3104546989","https://openalex.org/W2963403868","https://openalex.org/W2963341956","https://openalex.org/W2154652894","https://openalex.org/W2130942839","https://openalex.org/W2064675550","https://openalex.org/W3166624321","https://openalex.org/W3186081172","https://openalex.org/W3037186790","https://openalex.org/W2965373594","https://openalex.org/W2963206148","https://openalex.org/W2962883855","https://openalex.org/W2123301721","https://openalex.org/W1522301498","https://openalex.org/W230557333","https://openalex.org/W650221164","https://openalex.org/W3091075079","https://openalex.org/W2971753232"],"abstract_inverted_index":{"The":[0],"majority":[1],"of":[2,31,49],"NLG":[3],"evaluation":[4,24],"relies":[5],"on":[6],"automatic":[7,23,73],"metrics,":[8,32],"such":[9],"as":[10,52],"BLEU.":[11],"In":[12],"this":[13],"paper,":[14],"we":[15],"motivate":[16],"the":[17],"need":[18],"for":[19],"novel,":[20],"system-and":[21],"data-independent":[22],"methods:":[25],"We":[26,58],"investigate":[27],"a":[28,88],"wide":[29],"range":[30],"including":[33],"state-of-the-art":[34],"word-based":[35],"and":[36,40,79],"novel":[37],"grammar-based":[38],"ones,":[39],"demonstrate":[41],"that":[42,61,72],"they":[43],"only":[44],"weakly":[45],"reflect":[46],"human":[47],"judgements":[48],"system":[50,82,89],"outputs":[51],"generated":[53],"by":[54,84],"data-driven,":[55],"end-to-end":[56],"NLG.":[57],"also":[59,70],"show":[60],"metric":[62],"performance":[63],"is":[64],"data-and":[65],"system-specific.":[66],"Nevertheless,":[67],"our":[68],"results":[69],"suggest":[71],"metrics":[74],"perform":[75],"reliably":[76],"at":[77],"system-level":[78],"can":[80],"support":[81],"development":[83],"finding":[85],"cases":[86],"where":[87],"performs":[90],"poorly.":[91]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2017-07-31T00:00:00"}
