@inproceedings{shi-etal-2021-diversity,
title = "Diversity-Aware Batch Active Learning for Dependency Parsing",
author = "Shi, Tianze and
Benton, Adrian and
Malioutov, Igor and
{\.I}rsoy, Ozan",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.207",
doi = "10.18653/v1/2021.naacl-main.207",
pages = "2616--2626",
abstract = "While the predictive performance of modern statistical dependency parsers relies heavily on the availability of expensive expert-annotated treebank data, not all annotations contribute equally to the training of the parsers. In this paper, we attempt to reduce the number of labeled examples needed to train a strong dependency parser using batch active learning (AL). In particular, we investigate whether enforcing diversity in the sampled batches, using determinantal point processes (DPPs), can improve over their diversity-agnostic counterparts. Simulation experiments on an English newswire corpus show that selecting diverse batches with DPPs is superior to strong selection strategies that do not enforce batch diversity, especially during the initial stages of the learning process. Additionally, our diversity-aware strategy is robust under a corpus duplication setting, where diversity-agnostic sampling strategies exhibit significant degradation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shi-etal-2021-diversity">
<titleInfo>
<title>Diversity-Aware Batch Active Learning for Dependency Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tianze</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrian</namePart>
<namePart type="family">Benton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Igor</namePart>
<namePart type="family">Malioutov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ozan</namePart>
<namePart type="family">İrsoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While the predictive performance of modern statistical dependency parsers relies heavily on the availability of expensive expert-annotated treebank data, not all annotations contribute equally to the training of the parsers. In this paper, we attempt to reduce the number of labeled examples needed to train a strong dependency parser using batch active learning (AL). In particular, we investigate whether enforcing diversity in the sampled batches, using determinantal point processes (DPPs), can improve over their diversity-agnostic counterparts. Simulation experiments on an English newswire corpus show that selecting diverse batches with DPPs is superior to strong selection strategies that do not enforce batch diversity, especially during the initial stages of the learning process. Additionally, our diversity-aware strategy is robust under a corpus duplication setting, where diversity-agnostic sampling strategies exhibit significant degradation.</abstract>
<identifier type="citekey">shi-etal-2021-diversity</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.207</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.207</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>2616</start>
<end>2626</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diversity-Aware Batch Active Learning for Dependency Parsing
%A Shi, Tianze
%A Benton, Adrian
%A Malioutov, Igor
%A İrsoy, Ozan
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F shi-etal-2021-diversity
%X While the predictive performance of modern statistical dependency parsers relies heavily on the availability of expensive expert-annotated treebank data, not all annotations contribute equally to the training of the parsers. In this paper, we attempt to reduce the number of labeled examples needed to train a strong dependency parser using batch active learning (AL). In particular, we investigate whether enforcing diversity in the sampled batches, using determinantal point processes (DPPs), can improve over their diversity-agnostic counterparts. Simulation experiments on an English newswire corpus show that selecting diverse batches with DPPs is superior to strong selection strategies that do not enforce batch diversity, especially during the initial stages of the learning process. Additionally, our diversity-aware strategy is robust under a corpus duplication setting, where diversity-agnostic sampling strategies exhibit significant degradation.
%R 10.18653/v1/2021.naacl-main.207
%U https://aclanthology.org/2021.naacl-main.207
%U https://doi.org/10.18653/v1/2021.naacl-main.207
%P 2616-2626
Markdown (Informal)
[Diversity-Aware Batch Active Learning for Dependency Parsing](https://aclanthology.org/2021.naacl-main.207) (Shi et al., NAACL 2021)
ACL
- Tianze Shi, Adrian Benton, Igor Malioutov, and Ozan İrsoy. 2021. Diversity-Aware Batch Active Learning for Dependency Parsing. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 2616–2626, Online. Association for Computational Linguistics.