@inproceedings{krasner-etal-2022-revisiting,
title = "Revisiting the Effects of Leakage on Dependency Parsing",
author = "Krasner, Nathaniel and
Wanner, Miriam and
Anastasopoulos, Antonios",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.230",
doi = "10.18653/v1/2022.findings-acl.230",
pages = "2925--2934",
abstract = "Recent work by S{\o}gaard (2020) showed that, treebank size aside, overlap between training and test graphs (termed \textit{leakage}) explains more of the observed variation in dependency parsing performance than other explanations. In this work we revisit this claim, testing it on more models and languages. We find that it only holds for zero-shot cross-lingual settings. We then propose a more fine-grained measure of such leakage which, unlike the original measure, not only explains but also correlates with observed performance variation. Code and data are available here: \url{https://github.com/miriamwanner/reu-nlp-project}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krasner-etal-2022-revisiting">
<titleInfo>
<title>Revisiting the Effects of Leakage on Dependency Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Krasner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work by Søgaard (2020) showed that, treebank size aside, overlap between training and test graphs (termed leakage) explains more of the observed variation in dependency parsing performance than other explanations. In this work we revisit this claim, testing it on more models and languages. We find that it only holds for zero-shot cross-lingual settings. We then propose a more fine-grained measure of such leakage which, unlike the original measure, not only explains but also correlates with observed performance variation. Code and data are available here: https://github.com/miriamwanner/reu-nlp-project</abstract>
<identifier type="citekey">krasner-etal-2022-revisiting</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.230</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.230</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>2925</start>
<end>2934</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revisiting the Effects of Leakage on Dependency Parsing
%A Krasner, Nathaniel
%A Wanner, Miriam
%A Anastasopoulos, Antonios
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F krasner-etal-2022-revisiting
%X Recent work by Søgaard (2020) showed that, treebank size aside, overlap between training and test graphs (termed leakage) explains more of the observed variation in dependency parsing performance than other explanations. In this work we revisit this claim, testing it on more models and languages. We find that it only holds for zero-shot cross-lingual settings. We then propose a more fine-grained measure of such leakage which, unlike the original measure, not only explains but also correlates with observed performance variation. Code and data are available here: https://github.com/miriamwanner/reu-nlp-project
%R 10.18653/v1/2022.findings-acl.230
%U https://aclanthology.org/2022.findings-acl.230
%U https://doi.org/10.18653/v1/2022.findings-acl.230
%P 2925-2934
Markdown (Informal)
[Revisiting the Effects of Leakage on Dependency Parsing](https://aclanthology.org/2022.findings-acl.230) (Krasner et al., Findings 2022)
ACL