@inproceedings{saeedi-etal-2020-cs,
title = "{CS}-{NLP} Team at {S}em{E}val-2020 Task 4: Evaluation of State-of-the-art {NLP} Deep Learning Architectures on Commonsense Reasoning Task",
author = "Saeedi, Sirwe and
Panahi, Aliakbar and
Saeedi, Seyran and
C Fong, Alvis",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.62/",
doi = "10.18653/v1/2020.semeval-1.62",
pages = "507--515",
abstract = "In this paper, we investigate a commonsense inference task that unifies natural language understanding and commonsense reasoning. We describe our attempt at SemEval-2020 Task 4 competition: Commonsense Validation and Explanation (ComVE) challenge. We discuss several state-of-the-art deep learning architectures for this challenge. Our system uses prepared labeled textual datasets that were manually curated for three different natural language inference subtasks. The goal of the first subtask is to test whether a model can distinguish between natural language statements that make sense and those that do not make sense. We compare the performance of several language models and fine-tuned classifiers. Then, we propose a method inspired by question/answering tasks to treat a classification problem as a multiple choice question task to boost the performance of our experimental results (96.06{\%}), which is significantly better than the baseline. For the second subtask, which is to select the reason why a statement does not make sense, we stand within the first six teams (93.7{\%}) among 27 participants with very competitive results. Our result for last subtask of generating reason against the nonsense statement shows many potentials for future researches as we applied the most powerful generative model of language (GPT-2) with 6.1732 BLEU score among first four teams. ."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saeedi-etal-2020-cs">
<titleInfo>
<title>CS-NLP Team at SemEval-2020 Task 4: Evaluation of State-of-the-art NLP Deep Learning Architectures on Commonsense Reasoning Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sirwe</namePart>
<namePart type="family">Saeedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aliakbar</namePart>
<namePart type="family">Panahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyran</namePart>
<namePart type="family">Saeedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alvis</namePart>
<namePart type="family">C Fong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate a commonsense inference task that unifies natural language understanding and commonsense reasoning. We describe our attempt at SemEval-2020 Task 4 competition: Commonsense Validation and Explanation (ComVE) challenge. We discuss several state-of-the-art deep learning architectures for this challenge. Our system uses prepared labeled textual datasets that were manually curated for three different natural language inference subtasks. The goal of the first subtask is to test whether a model can distinguish between natural language statements that make sense and those that do not make sense. We compare the performance of several language models and fine-tuned classifiers. Then, we propose a method inspired by question/answering tasks to treat a classification problem as a multiple choice question task to boost the performance of our experimental results (96.06%), which is significantly better than the baseline. For the second subtask, which is to select the reason why a statement does not make sense, we stand within the first six teams (93.7%) among 27 participants with very competitive results. Our result for last subtask of generating reason against the nonsense statement shows many potentials for future researches as we applied the most powerful generative model of language (GPT-2) with 6.1732 BLEU score among first four teams. .</abstract>
<identifier type="citekey">saeedi-etal-2020-cs</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.62</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.62/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>507</start>
<end>515</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CS-NLP Team at SemEval-2020 Task 4: Evaluation of State-of-the-art NLP Deep Learning Architectures on Commonsense Reasoning Task
%A Saeedi, Sirwe
%A Panahi, Aliakbar
%A Saeedi, Seyran
%A C Fong, Alvis
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F saeedi-etal-2020-cs
%X In this paper, we investigate a commonsense inference task that unifies natural language understanding and commonsense reasoning. We describe our attempt at SemEval-2020 Task 4 competition: Commonsense Validation and Explanation (ComVE) challenge. We discuss several state-of-the-art deep learning architectures for this challenge. Our system uses prepared labeled textual datasets that were manually curated for three different natural language inference subtasks. The goal of the first subtask is to test whether a model can distinguish between natural language statements that make sense and those that do not make sense. We compare the performance of several language models and fine-tuned classifiers. Then, we propose a method inspired by question/answering tasks to treat a classification problem as a multiple choice question task to boost the performance of our experimental results (96.06%), which is significantly better than the baseline. For the second subtask, which is to select the reason why a statement does not make sense, we stand within the first six teams (93.7%) among 27 participants with very competitive results. Our result for last subtask of generating reason against the nonsense statement shows many potentials for future researches as we applied the most powerful generative model of language (GPT-2) with 6.1732 BLEU score among first four teams. .
%R 10.18653/v1/2020.semeval-1.62
%U https://aclanthology.org/2020.semeval-1.62/
%U https://doi.org/10.18653/v1/2020.semeval-1.62
%P 507-515
Markdown (Informal)
[CS-NLP Team at SemEval-2020 Task 4: Evaluation of State-of-the-art NLP Deep Learning Architectures on Commonsense Reasoning Task](https://aclanthology.org/2020.semeval-1.62/) (Saeedi et al., SemEval 2020)
ACL