@inproceedings{chen-etal-2025-evaluation,
title = "An Evaluation Resource for Grounding Translation Errors",
author = "Chen, Sujin and
Wang, Kang and
Zhou, Zixuan and
Duan, Xiangyu and
Zhang, Wanqun and
Yang, Hao and
Su, Jinsong and
Zhang, Min",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1299/",
doi = "10.18653/v1/2025.findings-emnlp.1299",
pages = "23900--23916",
ISBN = "979-8-89176-335-7",
abstract = "Current fine-grained error analyses by LLMs gain more and more attention in machine translation, but these analyses do not ground the errors to the reasons why the annotated text spans are erroneous. If LLMs do not know such reasons, the corrections or refinements by LLMs will be untrustworthy.In this paper, we check whether LLMs know such reasons in translation error grounding task. We manually build an evaluation resource through a bi-directional grounding scheme. In the forward direction, we annotate the explanation of the reason for each error span. In the backward direction, we annotate the error span given its explanation, in which the error span is masked. If the error spans of both directions are consistent, we deem the explanation is valid. Such grounding process can regulate the explanation so as to avoid the subjective bias. The evaluation results on this resource show that LLMs perform significantly worse than human in both directions. Furthermore, we apply the error grounding for filtering false alarmed errors, and achieve significant improvement in translation error detection."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2025-evaluation">
<titleInfo>
<title>An Evaluation Resource for Grounding Translation Errors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujin</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zixuan</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanqun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinsong</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Current fine-grained error analyses by LLMs gain more and more attention in machine translation, but these analyses do not ground the errors to the reasons why the annotated text spans are erroneous. If LLMs do not know such reasons, the corrections or refinements by LLMs will be untrustworthy.In this paper, we check whether LLMs know such reasons in translation error grounding task. We manually build an evaluation resource through a bi-directional grounding scheme. In the forward direction, we annotate the explanation of the reason for each error span. In the backward direction, we annotate the error span given its explanation, in which the error span is masked. If the error spans of both directions are consistent, we deem the explanation is valid. Such grounding process can regulate the explanation so as to avoid the subjective bias. The evaluation results on this resource show that LLMs perform significantly worse than human in both directions. Furthermore, we apply the error grounding for filtering false alarmed errors, and achieve significant improvement in translation error detection.</abstract>
<identifier type="citekey">chen-etal-2025-evaluation</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.1299</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1299/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>23900</start>
<end>23916</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Evaluation Resource for Grounding Translation Errors
%A Chen, Sujin
%A Wang, Kang
%A Zhou, Zixuan
%A Duan, Xiangyu
%A Zhang, Wanqun
%A Yang, Hao
%A Su, Jinsong
%A Zhang, Min
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F chen-etal-2025-evaluation
%X Current fine-grained error analyses by LLMs gain more and more attention in machine translation, but these analyses do not ground the errors to the reasons why the annotated text spans are erroneous. If LLMs do not know such reasons, the corrections or refinements by LLMs will be untrustworthy.In this paper, we check whether LLMs know such reasons in translation error grounding task. We manually build an evaluation resource through a bi-directional grounding scheme. In the forward direction, we annotate the explanation of the reason for each error span. In the backward direction, we annotate the error span given its explanation, in which the error span is masked. If the error spans of both directions are consistent, we deem the explanation is valid. Such grounding process can regulate the explanation so as to avoid the subjective bias. The evaluation results on this resource show that LLMs perform significantly worse than human in both directions. Furthermore, we apply the error grounding for filtering false alarmed errors, and achieve significant improvement in translation error detection.
%R 10.18653/v1/2025.findings-emnlp.1299
%U https://aclanthology.org/2025.findings-emnlp.1299/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.1299
%P 23900-23916
Markdown (Informal)
[An Evaluation Resource for Grounding Translation Errors](https://aclanthology.org/2025.findings-emnlp.1299/) (Chen et al., Findings 2025)
ACL
- Sujin Chen, Kang Wang, Zixuan Zhou, Xiangyu Duan, Wanqun Zhang, Hao Yang, Jinsong Su, and Min Zhang. 2025. An Evaluation Resource for Grounding Translation Errors. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 23900–23916, Suzhou, China. Association for Computational Linguistics.