@inproceedings{yimam-etal-2020-automatic,
title = "Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System",
author = "Yimam, Seid Muhie and
Venkatesh, Gopalakrishnan and
Lee, John and
Biemann, Chris",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.722",
pages = "5896--5904",
abstract = "We present the first approach to automatically building resources for academic writing. The aim is to build a writing aid system that automatically edits a text so that it better adheres to the academic style of writing. On top of existing academic resources, such as the Corpus of Contemporary American English (COCA) academic Word List, the New Academic Word List, and the Academic Collocation List, we also explore how to dynamically build such resources that would be used to automatically identify informal or non-academic words or phrases. The resources are compiled using different generic approaches that can be extended for different domains and languages. We describe the evaluation of resources with a system implementation. The system consists of an informal word identification (IWI), academic candidate paraphrase generation, and paraphrase ranking components. To generate candidates and rank them in context, we have used the PPDB and WordNet paraphrase resources. We use the Concepts in Context (CoInCO) {``}All-Words{''} lexical substitution dataset both for the informal word identification and paraphrase generation experiments. Our informal word identification component achieves an F-1 score of 82{\%}, significantly outperforming a stratified classifier baseline. The main contribution of this work is a domain-independent methodology to build targeted resources for writing aids.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yimam-etal-2020-automatic">
<titleInfo>
<title>Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="given">Muhie</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gopalakrishnan</namePart>
<namePart type="family">Venkatesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We present the first approach to automatically building resources for academic writing. The aim is to build a writing aid system that automatically edits a text so that it better adheres to the academic style of writing. On top of existing academic resources, such as the Corpus of Contemporary American English (COCA) academic Word List, the New Academic Word List, and the Academic Collocation List, we also explore how to dynamically build such resources that would be used to automatically identify informal or non-academic words or phrases. The resources are compiled using different generic approaches that can be extended for different domains and languages. We describe the evaluation of resources with a system implementation. The system consists of an informal word identification (IWI), academic candidate paraphrase generation, and paraphrase ranking components. To generate candidates and rank them in context, we have used the PPDB and WordNet paraphrase resources. We use the Concepts in Context (CoInCO) “All-Words” lexical substitution dataset both for the informal word identification and paraphrase generation experiments. Our informal word identification component achieves an F-1 score of 82%, significantly outperforming a stratified classifier baseline. The main contribution of this work is a domain-independent methodology to build targeted resources for writing aids.</abstract>
<identifier type="citekey">yimam-etal-2020-automatic</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.722</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>5896</start>
<end>5904</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System
%A Yimam, Seid Muhie
%A Venkatesh, Gopalakrishnan
%A Lee, John
%A Biemann, Chris
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F yimam-etal-2020-automatic
%X We present the first approach to automatically building resources for academic writing. The aim is to build a writing aid system that automatically edits a text so that it better adheres to the academic style of writing. On top of existing academic resources, such as the Corpus of Contemporary American English (COCA) academic Word List, the New Academic Word List, and the Academic Collocation List, we also explore how to dynamically build such resources that would be used to automatically identify informal or non-academic words or phrases. The resources are compiled using different generic approaches that can be extended for different domains and languages. We describe the evaluation of resources with a system implementation. The system consists of an informal word identification (IWI), academic candidate paraphrase generation, and paraphrase ranking components. To generate candidates and rank them in context, we have used the PPDB and WordNet paraphrase resources. We use the Concepts in Context (CoInCO) “All-Words” lexical substitution dataset both for the informal word identification and paraphrase generation experiments. Our informal word identification component achieves an F-1 score of 82%, significantly outperforming a stratified classifier baseline. The main contribution of this work is a domain-independent methodology to build targeted resources for writing aids.
%U https://aclanthology.org/2020.lrec-1.722
%P 5896-5904
Markdown (Informal)
[Automatic Compilation of Resources for Academic Writing and Evaluating with Informal Word Identification and Paraphrasing System](https://aclanthology.org/2020.lrec-1.722) (Yimam et al., LREC 2020)
ACL