@inproceedings{coltekin-2020-dialect,
title = "Dialect Identification under Domain Shift: Experiments with Discriminating {R}omanian and {M}oldavian",
author = {{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Scherrer, Yves},
booktitle = "Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics (ICCL)",
url = "https://aclanthology.org/2020.vardial-1.17",
pages = "186--192",
abstract = "This paper describes a set of experiments for discriminating between two closely related language varieties, Moldavian and Romanian, under a substantial domain shift. The experiments were conducted as part of the Romanian dialect identification task in the VarDial 2020 evaluation campaign. Our best system based on linear SVM classifier obtained the first position in the shared task with an F1 score of 0.79, supporting the earlier results showing (unexpected) success of machine learning systems in this task. The additional experiments reported in this paper also show that adapting to the test set is useful when the training data comes from another domain. However, the benefit of adaptation becomes doubtful even when a small amount of data from the target domain is available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coltekin-2020-dialect">
<titleInfo>
<title>Dialect Identification under Domain Shift: Experiments with Discriminating Romanian and Moldavian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics (ICCL)</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a set of experiments for discriminating between two closely related language varieties, Moldavian and Romanian, under a substantial domain shift. The experiments were conducted as part of the Romanian dialect identification task in the VarDial 2020 evaluation campaign. Our best system based on linear SVM classifier obtained the first position in the shared task with an F1 score of 0.79, supporting the earlier results showing (unexpected) success of machine learning systems in this task. The additional experiments reported in this paper also show that adapting to the test set is useful when the training data comes from another domain. However, the benefit of adaptation becomes doubtful even when a small amount of data from the target domain is available.</abstract>
<identifier type="citekey">coltekin-2020-dialect</identifier>
<location>
<url>https://aclanthology.org/2020.vardial-1.17</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>186</start>
<end>192</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dialect Identification under Domain Shift: Experiments with Discriminating Romanian and Moldavian
%A Çöltekin, Çağrı
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Scherrer, Yves
%S Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2020
%8 December
%I International Committee on Computational Linguistics (ICCL)
%C Barcelona, Spain (Online)
%F coltekin-2020-dialect
%X This paper describes a set of experiments for discriminating between two closely related language varieties, Moldavian and Romanian, under a substantial domain shift. The experiments were conducted as part of the Romanian dialect identification task in the VarDial 2020 evaluation campaign. Our best system based on linear SVM classifier obtained the first position in the shared task with an F1 score of 0.79, supporting the earlier results showing (unexpected) success of machine learning systems in this task. The additional experiments reported in this paper also show that adapting to the test set is useful when the training data comes from another domain. However, the benefit of adaptation becomes doubtful even when a small amount of data from the target domain is available.
%U https://aclanthology.org/2020.vardial-1.17
%P 186-192
Markdown (Informal)
[Dialect Identification under Domain Shift: Experiments with Discriminating Romanian and Moldavian](https://aclanthology.org/2020.vardial-1.17) (Çöltekin, VarDial 2020)
ACL