@inproceedings{chordia-2021-punktuator,
title = "{P}un{K}tuator: A Multilingual Punctuation Restoration System for Spoken and Written Text",
author = "Chordia, Varnith",
editor = "Gkatzia, Dimitra and
Seddah, Djam{\'e}",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-demos.37/",
doi = "10.18653/v1/2021.eacl-demos.37",
pages = "312--320",
abstract = "Text transcripts without punctuation or sentence boundaries are hard to comprehend for both humans and machines. Punctuation marks play a vital role by providing meaning to the sentence and incorrect use or placement of punctuation marks can often alter it. This can impact downstream tasks such as language translation and understanding, pronoun resolution, text summarization, etc. for humans and machines. An automated punctuation restoration (APR) system with minimal human intervention can improve comprehension of text and help users write better. In this paper we describe a multitask modeling approach as a system to restore punctuation in multiple high resource {--} Germanic (English and German), Romanic (French){--} and low resource languages {--} Indo-Aryan (Hindi) Dravidian (Tamil) {--} that does not require extensive knowledge of grammar or syntax of a given language for both spoken and written form of text. For German language and the given Indic based languages this is the first towards restoring punctuation and can serve as a baseline for future work."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chordia-2021-punktuator">
<titleInfo>
<title>PunKtuator: A Multilingual Punctuation Restoration System for Spoken and Written Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varnith</namePart>
<namePart type="family">Chordia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Djamé</namePart>
<namePart type="family">Seddah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text transcripts without punctuation or sentence boundaries are hard to comprehend for both humans and machines. Punctuation marks play a vital role by providing meaning to the sentence and incorrect use or placement of punctuation marks can often alter it. This can impact downstream tasks such as language translation and understanding, pronoun resolution, text summarization, etc. for humans and machines. An automated punctuation restoration (APR) system with minimal human intervention can improve comprehension of text and help users write better. In this paper we describe a multitask modeling approach as a system to restore punctuation in multiple high resource – Germanic (English and German), Romanic (French)– and low resource languages – Indo-Aryan (Hindi) Dravidian (Tamil) – that does not require extensive knowledge of grammar or syntax of a given language for both spoken and written form of text. For German language and the given Indic based languages this is the first towards restoring punctuation and can serve as a baseline for future work.</abstract>
<identifier type="citekey">chordia-2021-punktuator</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-demos.37</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-demos.37/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>312</start>
<end>320</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PunKtuator: A Multilingual Punctuation Restoration System for Spoken and Written Text
%A Chordia, Varnith
%Y Gkatzia, Dimitra
%Y Seddah, Djamé
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F chordia-2021-punktuator
%X Text transcripts without punctuation or sentence boundaries are hard to comprehend for both humans and machines. Punctuation marks play a vital role by providing meaning to the sentence and incorrect use or placement of punctuation marks can often alter it. This can impact downstream tasks such as language translation and understanding, pronoun resolution, text summarization, etc. for humans and machines. An automated punctuation restoration (APR) system with minimal human intervention can improve comprehension of text and help users write better. In this paper we describe a multitask modeling approach as a system to restore punctuation in multiple high resource – Germanic (English and German), Romanic (French)– and low resource languages – Indo-Aryan (Hindi) Dravidian (Tamil) – that does not require extensive knowledge of grammar or syntax of a given language for both spoken and written form of text. For German language and the given Indic based languages this is the first towards restoring punctuation and can serve as a baseline for future work.
%R 10.18653/v1/2021.eacl-demos.37
%U https://aclanthology.org/2021.eacl-demos.37/
%U https://doi.org/10.18653/v1/2021.eacl-demos.37
%P 312-320
Markdown (Informal)
[PunKtuator: A Multilingual Punctuation Restoration System for Spoken and Written Text](https://aclanthology.org/2021.eacl-demos.37/) (Chordia, EACL 2021)
ACL