@inproceedings{ye-etal-2023-assessing,
title = "Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism",
author = "Ye, Mengyu and
Kuribayashi, Tatsuki and
Suzuki, Jun and
Kobayashi, Goro and
Funayama, Hiroaki",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.912",
doi = "10.18653/v1/2023.emnlp-main.912",
pages = "14753--14773",
abstract = "Large language models (LLMs) take advantage of step-by-step reasoning instructions, e.g., chain-of-thought (CoT) prompting. Building on this, their ability to perform CoT-style reasoning robustly is of interest from a probing perspective. In this study, we inspect the step-by-step reasoning ability of LLMs with a focus on negation, which is a core linguistic phenomenon that is difficult to process. In particular, we introduce several controlled settings (e.g., reasoning in case of fictional entities) to evaluate the logical reasoning abilities of the models. We observed that dozens of modern LLMs were not robust against lexical negation (e.g., plausible$\rightarrow$implausible) when performing CoT-style reasoning, and the results highlight unique limitations in each LLM family.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ye-etal-2023-assessing">
<titleInfo>
<title>Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mengyu</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatsuki</namePart>
<namePart type="family">Kuribayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goro</namePart>
<namePart type="family">Kobayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroaki</namePart>
<namePart type="family">Funayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) take advantage of step-by-step reasoning instructions, e.g., chain-of-thought (CoT) prompting. Building on this, their ability to perform CoT-style reasoning robustly is of interest from a probing perspective. In this study, we inspect the step-by-step reasoning ability of LLMs with a focus on negation, which is a core linguistic phenomenon that is difficult to process. In particular, we introduce several controlled settings (e.g., reasoning in case of fictional entities) to evaluate the logical reasoning abilities of the models. We observed that dozens of modern LLMs were not robust against lexical negation (e.g., plausible\rightarrowimplausible) when performing CoT-style reasoning, and the results highlight unique limitations in each LLM family.</abstract>
<identifier type="citekey">ye-etal-2023-assessing</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.912</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.912</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>14753</start>
<end>14773</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism
%A Ye, Mengyu
%A Kuribayashi, Tatsuki
%A Suzuki, Jun
%A Kobayashi, Goro
%A Funayama, Hiroaki
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F ye-etal-2023-assessing
%X Large language models (LLMs) take advantage of step-by-step reasoning instructions, e.g., chain-of-thought (CoT) prompting. Building on this, their ability to perform CoT-style reasoning robustly is of interest from a probing perspective. In this study, we inspect the step-by-step reasoning ability of LLMs with a focus on negation, which is a core linguistic phenomenon that is difficult to process. In particular, we introduce several controlled settings (e.g., reasoning in case of fictional entities) to evaluate the logical reasoning abilities of the models. We observed that dozens of modern LLMs were not robust against lexical negation (e.g., plausible\rightarrowimplausible) when performing CoT-style reasoning, and the results highlight unique limitations in each LLM family.
%R 10.18653/v1/2023.emnlp-main.912
%U https://aclanthology.org/2023.emnlp-main.912
%U https://doi.org/10.18653/v1/2023.emnlp-main.912
%P 14753-14773
Markdown (Informal)
[Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism](https://aclanthology.org/2023.emnlp-main.912) (Ye et al., EMNLP 2023)
ACL