@inproceedings{hayakawa-arase-2020-fine,
title = "Fine-Grained Error Analysis on {E}nglish-to-{J}apanese Machine Translation in the Medical Domain",
author = "Hayakawa, Takeshi and
Arase, Yuki",
editor = "Martins, Andr{\'e} and
Moniz, Helena and
Fumega, Sara and
Martins, Bruno and
Batista, Fernando and
Coheur, Luisa and
Parra, Carla and
Trancoso, Isabel and
Turchi, Marco and
Bisazza, Arianna and
Moorkens, Joss and
Guerberof, Ana and
Nurminen, Mary and
Marg, Lena and
Forcada, Mikel L.",
booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
month = nov,
year = "2020",
address = "Lisboa, Portugal",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2020.eamt-1.17/",
pages = "155--164",
abstract = "We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: {\textquotedblleft}Addition,{\textquotedblright} {\textquotedblleft}Omission,{\textquotedblright} {\textquotedblleft}Mistranslation,{\textquotedblright} {\textquotedblleft}Grammar,{\textquotedblright} and {\textquotedblleft}Terminology.{\textquotedblright} The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were {\textquotedblleft}Mistranslation{\textquotedblright} and {\textquotedblleft}Terminology{\textquotedblright} rather than {\textquotedblleft}Addition{\textquotedblright} and {\textquotedblleft}Omission,{\textquotedblright} which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hayakawa-arase-2020-fine">
<titleInfo>
<title>Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takeshi</namePart>
<namePart type="family">Hayakawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Arase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Fumega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bruno</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Batista</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Coheur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="family">Parra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="family">Trancoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joss</namePart>
<namePart type="family">Moorkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Guerberof</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lena</namePart>
<namePart type="family">Marg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Lisboa, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: “Addition,” “Omission,” “Mistranslation,” “Grammar,” and “Terminology.” The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were “Mistranslation” and “Terminology” rather than “Addition” and “Omission,” which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models.</abstract>
<identifier type="citekey">hayakawa-arase-2020-fine</identifier>
<location>
<url>https://aclanthology.org/2020.eamt-1.17/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>155</start>
<end>164</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain
%A Hayakawa, Takeshi
%A Arase, Yuki
%Y Martins, André
%Y Moniz, Helena
%Y Fumega, Sara
%Y Martins, Bruno
%Y Batista, Fernando
%Y Coheur, Luisa
%Y Parra, Carla
%Y Trancoso, Isabel
%Y Turchi, Marco
%Y Bisazza, Arianna
%Y Moorkens, Joss
%Y Guerberof, Ana
%Y Nurminen, Mary
%Y Marg, Lena
%Y Forcada, Mikel L.
%S Proceedings of the 22nd Annual Conference of the European Association for Machine Translation
%D 2020
%8 November
%I European Association for Machine Translation
%C Lisboa, Portugal
%F hayakawa-arase-2020-fine
%X We performed a detailed error analysis in domain-specific neural machine translation (NMT) for the English and Japanese language pair with fine-grained manual annotation. Despite its importance for advancing NMT technologies, research on the performance of domain-specific NMT and non-European languages has been limited. In this study, we designed an error typology based on the error types that were typically generated by NMT systems and might cause significant impact in technical translations: “Addition,” “Omission,” “Mistranslation,” “Grammar,” and “Terminology.” The error annotation was targeted to the medical domain and was performed by experienced professional translators specialized in medicine under careful quality control. The annotation detected 4,912 errors on 2,480 sentences, and the frequency and distribution of errors were analyzed. We found that the major errors in NMT were “Mistranslation” and “Terminology” rather than “Addition” and “Omission,” which have been reported as typical problems of NMT. Interestingly, more errors occurred in documents for professionals compared with those for the general public. The results of our annotation work will be published as a parallel corpus with error labels, which are expected to contribute to developing better NMT models, automatic evaluation metrics, and quality estimation models.
%U https://aclanthology.org/2020.eamt-1.17/
%P 155-164
Markdown (Informal)
[Fine-Grained Error Analysis on English-to-Japanese Machine Translation in the Medical Domain](https://aclanthology.org/2020.eamt-1.17/) (Hayakawa & Arase, EAMT 2020)
ACL