@inproceedings{1c12763a5a684811ae82a3808714d457,
title = "Improving Luxembourgish Speech Recognition with Cross-Lingual Speech Representations",
abstract = "Luxembourgish is a West Germanic language spoken by roughly 390,000 people, mainly in Luxembourg. It is one of Europe's under-described and under-resourced languages, not extensively investigated in the context of speech recognition. We explore the self-supervised multilingual learning of Luxembourgish speech representations for the speech recognition downstream task. We show that learning cross-lingual representations is essential for low-resourced languages such as Luxembourgish. Learning cross-lingual representations and rescoring the output transcriptions with language modelling while using only 4 hours of labelled speech achieves a word error rate of 15.1% and improves our Transfer Learning baseline model relatively by 33.1% and absolutely by 7.5%. Increasing the amount of labelled speech to 14 hours yields a significant performance gain resulting in a 9.3% word error rate.11Models and datasets are available at https://hugging£ace.co/lemswasabi",
keywords = "language modelling, Luxembourgish, multilingual speech recognition, under-resourced language, wav2vec 2.0 XLSR-53",
author = "Nguyen, {Le Minh} and Shekhar Nayak and Matt Coler",
note = "Funding Information: We thank the Faculty Board of the University of Groningen for their funding to cover expenses for attending the SLT22 conference. Additionally, we thank the Zenter fir d L etzebuerger Sprooch for their help normalizing the transcriptions of the segmented audio. Publisher Copyright: {\textcopyright} 2023 IEEE.; 2022 IEEE Spoken Language Technology Workshop, SLT 2022 ; Conference date: 09-01-2023 Through 12-01-2023",
year = "2023",
doi = "10.1109/SLT54892.2023.10022706",
language = "English",
isbn = "979-8-3503-9691-1",
series = "2022 IEEE Spoken Language Technology Workshop, SLT 2022 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "792--797",
booktitle = "2022 IEEE Spoken Language Technology Workshop, SLT 2022 - Proceedings",
}