@inproceedings{sato-etal-2017-distributed,
title = "Distributed Document and Phrase Co-embeddings for Descriptive Clustering",
author = "Sato, Motoki and
Brockmeier, Austin J. and
Kontonatsios, Georgios and
Mu, Tingting and
Goulermas, John Y. and
Tsujii, Jun{'}ichi and
Ananiadou, Sophia",
editor = "Lapata, Mirella and
Blunsom, Phil and
Koller, Alexander",
booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 1, Long Papers",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-1093",
pages = "991--1001",
abstract = "Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sato-etal-2017-distributed">
<titleInfo>
<title>Distributed Document and Phrase Co-embeddings for Descriptive Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Motoki</namePart>
<namePart type="family">Sato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Austin</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Brockmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georgios</namePart>
<namePart type="family">Kontonatsios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingting</namePart>
<namePart type="family">Mu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">Y</namePart>
<namePart type="family">Goulermas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Koller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them.</abstract>
<identifier type="citekey">sato-etal-2017-distributed</identifier>
<location>
<url>https://aclanthology.org/E17-1093</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>991</start>
<end>1001</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Distributed Document and Phrase Co-embeddings for Descriptive Clustering
%A Sato, Motoki
%A Brockmeier, Austin J.
%A Kontonatsios, Georgios
%A Mu, Tingting
%A Goulermas, John Y.
%A Tsujii, Jun’ichi
%A Ananiadou, Sophia
%Y Lapata, Mirella
%Y Blunsom, Phil
%Y Koller, Alexander
%S Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F sato-etal-2017-distributed
%X Descriptive document clustering aims to automatically discover groups of semantically related documents and to assign a meaningful label to characterise the content of each cluster. In this paper, we present a descriptive clustering approach that employs a distributed representation model, namely the paragraph vector model, to capture semantic similarities between documents and phrases. The proposed method uses a joint representation of phrases and documents (i.e., a co-embedding) to automatically select a descriptive phrase that best represents each document cluster. We evaluate our method by comparing its performance to an existing state-of-the-art descriptive clustering method that also uses co-embedding but relies on a bag-of-words representation. Results obtained on benchmark datasets demonstrate that the paragraph vector-based method obtains superior performance over the existing approach in both identifying clusters and assigning appropriate descriptive labels to them.
%U https://aclanthology.org/E17-1093
%P 991-1001
Markdown (Informal)
[Distributed Document and Phrase Co-embeddings for Descriptive Clustering](https://aclanthology.org/E17-1093) (Sato et al., EACL 2017)
ACL
- Motoki Sato, Austin J. Brockmeier, Georgios Kontonatsios, Tingting Mu, John Y. Goulermas, Jun’ichi Tsujii, and Sophia Ananiadou. 2017. Distributed Document and Phrase Co-embeddings for Descriptive Clustering. In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pages 991–1001, Valencia, Spain. Association for Computational Linguistics.