@inproceedings{halbe-2020-exploring,
title = "Exploring Weaknesses of {VQA} Models through Attribution Driven Insights",
author = "Halbe, Shaunak",
editor = "Zadeh, Amir and
Morency, Louis-Philippe and
Liang, Paul Pu and
Poria, Soujanya",
booktitle = "Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)",
month = jul,
year = "2020",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.challengehml-1.9",
doi = "10.18653/v1/2020.challengehml-1.9",
pages = "64--68",
abstract = "Deep Neural Networks have been successfully used for the task of Visual Question Answering for the past few years owing to the availability of relevant large scale datasets. However these datasets are created in artificial settings and rarely reflect the real world scenario. Recent research effectively applies these VQA models for answering visual questions for the blind. Despite achieving high accuracy these models appear to be susceptible to variation in input questions. We analyze popular VQA models through the lens of attribution (input{'}s influence on predictions) to gain valuable insights. Further, We use these insights to craft adversarial attacks which inflict significant damage to these systems with negligible change in meaning of the input questions. We believe this will enhance development of systems more robust to the possible variations in inputs when deployed to assist the visually impaired.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="halbe-2020-exploring">
<titleInfo>
<title>Exploring Weaknesses of VQA Models through Attribution Driven Insights</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shaunak</namePart>
<namePart type="family">Halbe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis-Philippe</namePart>
<namePart type="family">Morency</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="given">Pu</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep Neural Networks have been successfully used for the task of Visual Question Answering for the past few years owing to the availability of relevant large scale datasets. However these datasets are created in artificial settings and rarely reflect the real world scenario. Recent research effectively applies these VQA models for answering visual questions for the blind. Despite achieving high accuracy these models appear to be susceptible to variation in input questions. We analyze popular VQA models through the lens of attribution (input’s influence on predictions) to gain valuable insights. Further, We use these insights to craft adversarial attacks which inflict significant damage to these systems with negligible change in meaning of the input questions. We believe this will enhance development of systems more robust to the possible variations in inputs when deployed to assist the visually impaired.</abstract>
<identifier type="citekey">halbe-2020-exploring</identifier>
<identifier type="doi">10.18653/v1/2020.challengehml-1.9</identifier>
<location>
<url>https://aclanthology.org/2020.challengehml-1.9</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>64</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Weaknesses of VQA Models through Attribution Driven Insights
%A Halbe, Shaunak
%Y Zadeh, Amir
%Y Morency, Louis-Philippe
%Y Liang, Paul Pu
%Y Poria, Soujanya
%S Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, USA
%F halbe-2020-exploring
%X Deep Neural Networks have been successfully used for the task of Visual Question Answering for the past few years owing to the availability of relevant large scale datasets. However these datasets are created in artificial settings and rarely reflect the real world scenario. Recent research effectively applies these VQA models for answering visual questions for the blind. Despite achieving high accuracy these models appear to be susceptible to variation in input questions. We analyze popular VQA models through the lens of attribution (input’s influence on predictions) to gain valuable insights. Further, We use these insights to craft adversarial attacks which inflict significant damage to these systems with negligible change in meaning of the input questions. We believe this will enhance development of systems more robust to the possible variations in inputs when deployed to assist the visually impaired.
%R 10.18653/v1/2020.challengehml-1.9
%U https://aclanthology.org/2020.challengehml-1.9
%U https://doi.org/10.18653/v1/2020.challengehml-1.9
%P 64-68
Markdown (Informal)
[Exploring Weaknesses of VQA Models through Attribution Driven Insights](https://aclanthology.org/2020.challengehml-1.9) (Halbe, Challenge-HML 2020)
ACL