@inproceedings{al-sabri-gao-2021-lamad-linguistic,
title = "{LAMAD}: A Linguistic Attentional Model for {A}rabic Text Diacritization",
author = "Al-Sabri, Raeed and
Gao, Jianliang",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.317",
doi = "10.18653/v1/2021.findings-emnlp.317",
pages = "3757--3764",
abstract = "In Arabic Language, diacritics are used to specify meanings as well as pronunciations. However, diacritics are often omitted from written texts, which increases the number of possible meanings and pronunciations. This leads to an ambiguous text and makes the computational process on undiacritized text more difficult. In this paper, we propose a Linguistic Attentional Model for Arabic text Diacritization (LAMAD). In LAMAD, a new linguistic feature representation is presented, which utilizes both word and character contextual features. Then, a linguistic attention mechanism is proposed to capture the important linguistic features. In addition, we explore the impact of the linguistic features extracted from the text on Arabic text diacritization (ATD) by introducing them to the linguistic attention mechanism. The extensive experimental results on three datasets with different sizes illustrate that LAMAD outperforms the existing state-of-the-art models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-sabri-gao-2021-lamad-linguistic">
<titleInfo>
<title>LAMAD: A Linguistic Attentional Model for Arabic Text Diacritization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raeed</namePart>
<namePart type="family">Al-Sabri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianliang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In Arabic Language, diacritics are used to specify meanings as well as pronunciations. However, diacritics are often omitted from written texts, which increases the number of possible meanings and pronunciations. This leads to an ambiguous text and makes the computational process on undiacritized text more difficult. In this paper, we propose a Linguistic Attentional Model for Arabic text Diacritization (LAMAD). In LAMAD, a new linguistic feature representation is presented, which utilizes both word and character contextual features. Then, a linguistic attention mechanism is proposed to capture the important linguistic features. In addition, we explore the impact of the linguistic features extracted from the text on Arabic text diacritization (ATD) by introducing them to the linguistic attention mechanism. The extensive experimental results on three datasets with different sizes illustrate that LAMAD outperforms the existing state-of-the-art models.</abstract>
<identifier type="citekey">al-sabri-gao-2021-lamad-linguistic</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.317</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.317</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>3757</start>
<end>3764</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LAMAD: A Linguistic Attentional Model for Arabic Text Diacritization
%A Al-Sabri, Raeed
%A Gao, Jianliang
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F al-sabri-gao-2021-lamad-linguistic
%X In Arabic Language, diacritics are used to specify meanings as well as pronunciations. However, diacritics are often omitted from written texts, which increases the number of possible meanings and pronunciations. This leads to an ambiguous text and makes the computational process on undiacritized text more difficult. In this paper, we propose a Linguistic Attentional Model for Arabic text Diacritization (LAMAD). In LAMAD, a new linguistic feature representation is presented, which utilizes both word and character contextual features. Then, a linguistic attention mechanism is proposed to capture the important linguistic features. In addition, we explore the impact of the linguistic features extracted from the text on Arabic text diacritization (ATD) by introducing them to the linguistic attention mechanism. The extensive experimental results on three datasets with different sizes illustrate that LAMAD outperforms the existing state-of-the-art models.
%R 10.18653/v1/2021.findings-emnlp.317
%U https://aclanthology.org/2021.findings-emnlp.317
%U https://doi.org/10.18653/v1/2021.findings-emnlp.317
%P 3757-3764
Markdown (Informal)
[LAMAD: A Linguistic Attentional Model for Arabic Text Diacritization](https://aclanthology.org/2021.findings-emnlp.317) (Al-Sabri & Gao, Findings 2021)
ACL