@inproceedings{cui-etal-2023-failure,
title = "What does the Failure to Reason with {``}Respectively{''} in Zero/Few-Shot Settings Tell Us about Language Models?",
author = "Cui, Ruixiang and
Lee, Seolhwa and
Hershcovich, Daniel and
S{\o}gaard, Anders",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.489",
doi = "10.18653/v1/2023.acl-long.489",
pages = "8786--8800",
abstract = "Humans can effortlessly understand the coordinate structure of sentences such as {``}Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, *respectively*{''}. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of {``}respectively{''}. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cui-etal-2023-failure">
<titleInfo>
<title>What does the Failure to Reason with “Respectively” in Zero/Few-Shot Settings Tell Us about Language Models?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruixiang</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seolhwa</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anders</namePart>
<namePart type="family">Søgaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Humans can effortlessly understand the coordinate structure of sentences such as “Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, *respectively*”. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of “respectively”. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions.</abstract>
<identifier type="citekey">cui-etal-2023-failure</identifier>
<identifier type="doi">10.18653/v1/2023.acl-long.489</identifier>
<location>
<url>https://aclanthology.org/2023.acl-long.489</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>8786</start>
<end>8800</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What does the Failure to Reason with “Respectively” in Zero/Few-Shot Settings Tell Us about Language Models?
%A Cui, Ruixiang
%A Lee, Seolhwa
%A Hershcovich, Daniel
%A Søgaard, Anders
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F cui-etal-2023-failure
%X Humans can effortlessly understand the coordinate structure of sentences such as “Niels Bohr and Kurt Cobain were born in Copenhagen and Seattle, *respectively*”. In the context of natural language inference (NLI), we examine how language models (LMs) reason with respective readings (Gawron and Kehler, 2004) from two perspectives: syntactic-semantic and commonsense-world knowledge. We propose a controlled synthetic dataset WikiResNLI and a naturally occurring dataset NatResNLI to encompass various explicit and implicit realizations of “respectively”. We show that fine-tuned NLI models struggle with understanding such readings without explicit supervision. While few-shot learning is easy in the presence of explicit cues, longer training is required when the reading is evoked implicitly, leaving models to rely on common sense inferences. Furthermore, our fine-grained analysis indicates models fail to generalize across different constructions. To conclude, we demonstrate that LMs still lag behind humans in generalizing to the long tail of linguistic constructions.
%R 10.18653/v1/2023.acl-long.489
%U https://aclanthology.org/2023.acl-long.489
%U https://doi.org/10.18653/v1/2023.acl-long.489
%P 8786-8800
Markdown (Informal)
[What does the Failure to Reason with “Respectively” in Zero/Few-Shot Settings Tell Us about Language Models?](https://aclanthology.org/2023.acl-long.489) (Cui et al., ACL 2023)
ACL