@inproceedings{holmstrom-etal-2023-bridging,
title = "Bridging the Resource Gap: Exploring the Efficacy of {E}nglish and Multilingual {LLM}s for {S}wedish",
author = {Holmstr{\"o}m, Oskar and
Kunz, Jenny and
Kuhlmann, Marco},
editor = "Ilinykh, Nikolai and
Morger, Felix and
Dann{\'e}lls, Dana and
Dobnik, Simon and
Megyesi, Be{\'a}ta and
Nivre, Joakim",
booktitle = "Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)",
month = may,
year = "2023",
address = "T{\'o}rshavn, the Faroe Islands",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.resourceful-1.13",
pages = "92--110",
abstract = "Large language models (LLMs) have substantially improved natural language processing (NLP) performance, but training these models from scratch is resource-intensive and challenging for smaller languages. With this paper, we want to initiate a discussion on the necessity of language-specific pre-training of LLMs.We propose how the {``}one model-many models{''} conceptual framework for task transfer can be applied to language transfer and explore this approach by evaluating the performance of non-Swedish monolingual and multilingual models{'} performance on tasks in Swedish.Our findings demonstrate that LLMs exposed to limited Swedish during training can be highly capable and transfer competencies from English off-the-shelf, including emergent abilities such as mathematical reasoning, while at the same time showing distinct culturally adapted behaviour. Our results suggest that there are resourceful alternatives to language-specific pre-training when creating useful LLMs for small languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="holmstrom-etal-2023-bridging">
<titleInfo>
<title>Bridging the Resource Gap: Exploring the Efficacy of English and Multilingual LLMs for Swedish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oskar</namePart>
<namePart type="family">Holmström</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenny</namePart>
<namePart type="family">Kunz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Kuhlmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Morger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Dannélls</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beáta</namePart>
<namePart type="family">Megyesi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tórshavn, the Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have substantially improved natural language processing (NLP) performance, but training these models from scratch is resource-intensive and challenging for smaller languages. With this paper, we want to initiate a discussion on the necessity of language-specific pre-training of LLMs.We propose how the “one model-many models” conceptual framework for task transfer can be applied to language transfer and explore this approach by evaluating the performance of non-Swedish monolingual and multilingual models’ performance on tasks in Swedish.Our findings demonstrate that LLMs exposed to limited Swedish during training can be highly capable and transfer competencies from English off-the-shelf, including emergent abilities such as mathematical reasoning, while at the same time showing distinct culturally adapted behaviour. Our results suggest that there are resourceful alternatives to language-specific pre-training when creating useful LLMs for small languages.</abstract>
<identifier type="citekey">holmstrom-etal-2023-bridging</identifier>
<location>
<url>https://aclanthology.org/2023.resourceful-1.13</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>92</start>
<end>110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging the Resource Gap: Exploring the Efficacy of English and Multilingual LLMs for Swedish
%A Holmström, Oskar
%A Kunz, Jenny
%A Kuhlmann, Marco
%Y Ilinykh, Nikolai
%Y Morger, Felix
%Y Dannélls, Dana
%Y Dobnik, Simon
%Y Megyesi, Beáta
%Y Nivre, Joakim
%S Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Tórshavn, the Faroe Islands
%F holmstrom-etal-2023-bridging
%X Large language models (LLMs) have substantially improved natural language processing (NLP) performance, but training these models from scratch is resource-intensive and challenging for smaller languages. With this paper, we want to initiate a discussion on the necessity of language-specific pre-training of LLMs.We propose how the “one model-many models” conceptual framework for task transfer can be applied to language transfer and explore this approach by evaluating the performance of non-Swedish monolingual and multilingual models’ performance on tasks in Swedish.Our findings demonstrate that LLMs exposed to limited Swedish during training can be highly capable and transfer competencies from English off-the-shelf, including emergent abilities such as mathematical reasoning, while at the same time showing distinct culturally adapted behaviour. Our results suggest that there are resourceful alternatives to language-specific pre-training when creating useful LLMs for small languages.
%U https://aclanthology.org/2023.resourceful-1.13
%P 92-110
Markdown (Informal)
[Bridging the Resource Gap: Exploring the Efficacy of English and Multilingual LLMs for Swedish](https://aclanthology.org/2023.resourceful-1.13) (Holmström et al., RESOURCEFUL 2023)
ACL