@inproceedings{sato-heffernan-2020-homonym,
title = "Homonym normalisation by word sense clustering: a case in {J}apanese",
author = "Sato, Yo and
Heffernan, Kevin",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.295",
doi = "10.18653/v1/2020.coling-main.295",
pages = "3324--3332",
abstract = "This work presents a method of word sense clustering that differentiates homonyms and merge homophones, taking Japanese as an example, where orthographical variation causes problem for language processing. It uses contextualised embeddings (BERT) to cluster tokens into distinct sense groups, and we use these groups to normalise synonymous instances to a single representative form. We see the benefit of this normalisation in language model, as well as in transliteration.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-heffernan-2020-homonym">
<titleInfo>
<title>Homonym normalisation by word sense clustering: a case in Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yo</namePart>
<namePart type="family">Sato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Heffernan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work presents a method of word sense clustering that differentiates homonyms and merge homophones, taking Japanese as an example, where orthographical variation causes problem for language processing. It uses contextualised embeddings (BERT) to cluster tokens into distinct sense groups, and we use these groups to normalise synonymous instances to a single representative form. We see the benefit of this normalisation in language model, as well as in transliteration.</abstract>
<identifier type="citekey">sato-heffernan-2020-homonym</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.295</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.295</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>3324</start>
<end>3332</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Homonym normalisation by word sense clustering: a case in Japanese
%A Sato, Yo
%A Heffernan, Kevin
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F sato-heffernan-2020-homonym
%X This work presents a method of word sense clustering that differentiates homonyms and merge homophones, taking Japanese as an example, where orthographical variation causes problem for language processing. It uses contextualised embeddings (BERT) to cluster tokens into distinct sense groups, and we use these groups to normalise synonymous instances to a single representative form. We see the benefit of this normalisation in language model, as well as in transliteration.
%R 10.18653/v1/2020.coling-main.295
%U https://aclanthology.org/2020.coling-main.295
%U https://doi.org/10.18653/v1/2020.coling-main.295
%P 3324-3332
Markdown (Informal)
[Homonym normalisation by word sense clustering: a case in Japanese](https://aclanthology.org/2020.coling-main.295) (Sato & Heffernan, COLING 2020)
ACL