@inproceedings{szymanski-gorman-2020-best,
title = "Is the Best Better? {B}ayesian Statistical Model Comparison for Natural Language Processing",
author = "Szyma{\'n}ski, Piotr and
Gorman, Kyle",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.172/",
doi = "10.18653/v1/2020.emnlp-main.172",
pages = "2203--2212",
abstract = "Recent work raises concerns about the use of standard splits to compare natural language processing models. We propose a Bayesian statistical model comparison technique which uses k-fold cross-validation across multiple data sets to estimate the likelihood that one model will outperform the other, or that the two will produce practically equivalent results. We use this technique to rank six English part-of-speech taggers across two data sets and three evaluation metrics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="szymanski-gorman-2020-best">
<titleInfo>
<title>Is the Best Better? Bayesian Statistical Model Comparison for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Szymański</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work raises concerns about the use of standard splits to compare natural language processing models. We propose a Bayesian statistical model comparison technique which uses k-fold cross-validation across multiple data sets to estimate the likelihood that one model will outperform the other, or that the two will produce practically equivalent results. We use this technique to rank six English part-of-speech taggers across two data sets and three evaluation metrics.</abstract>
<identifier type="citekey">szymanski-gorman-2020-best</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.172</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.172/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>2203</start>
<end>2212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is the Best Better? Bayesian Statistical Model Comparison for Natural Language Processing
%A Szymański, Piotr
%A Gorman, Kyle
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F szymanski-gorman-2020-best
%X Recent work raises concerns about the use of standard splits to compare natural language processing models. We propose a Bayesian statistical model comparison technique which uses k-fold cross-validation across multiple data sets to estimate the likelihood that one model will outperform the other, or that the two will produce practically equivalent results. We use this technique to rank six English part-of-speech taggers across two data sets and three evaluation metrics.
%R 10.18653/v1/2020.emnlp-main.172
%U https://aclanthology.org/2020.emnlp-main.172/
%U https://doi.org/10.18653/v1/2020.emnlp-main.172
%P 2203-2212
Markdown (Informal)
[Is the Best Better? Bayesian Statistical Model Comparison for Natural Language Processing](https://aclanthology.org/2020.emnlp-main.172/) (Szymański & Gorman, EMNLP 2020)
ACL