@inproceedings{af254a9fd179486fb2e8a31012ed3f21,
title = "Benchmarking Offensive and Abusive Language in Dutch Tweets",
abstract = "We present an extensive evaluation of different fine-tuned models to detect instances of offensive and abusive language in Dutch across three benchmarks: a standard held-out test, a task-agnostic functional benchmark, and a dynamic test set. We also investigate the use of data cartography to identify high quality training data. Our results show a relatively good quality of the manually annotated data used to train the models while highlighting some critical weakness. We have also found a good portability of trained models along the same language phenomena. As for the data cartography, we have found a positive impact only on the functional benchmark and when selecting data per annotated dimension rather than using the entire training material.",
author = "Tommaso Caselli and {van der Veen}, Hylke",
note = "Publisher Copyright: {\textcopyright} 2023 Association for Computational Linguistics.; 7th Workshop on Online Abuse and Harms, WOAH 2023, co-located with ACL 2023 ; Conference date: 13-07-2023",
year = "2023",
doi = "10.18653/v1/2023.woah-1.7",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics, ACL Anthology",
pages = "69--84",
editor = "Yi-Ling Chung and {Mostafazadeh Davani}, Aida and Debora Nozza and Paul Rottger and Zeerak Talat",
booktitle = "ACL 2023 - 7th Workshop on Online Abuse and Harms, WOAH 2023 - Proceedings of the Workshop",
}