@inproceedings{8434f657c1da48e5a6764b2e51b580c6,
title = "Deep Learning for Effective Classification and Information Extraction of Financial Documents",
abstract = "The financial and accounting sectors are encountering increased demands to effectively manage large volumes of documents in today{\textquoteright}s digital environment. Meeting this demand is crucial for accurate archiving, maintaining efficiency and competitiveness, and ensuring operational excellence in the industry. This study proposes and analyzes machine learning-based pipelines to effectively classify and extract information from scanned and photographed financial documents, such as invoices, receipts, bank statements, etc. It also addresses the challenges associated with financial document processing using deep learning techniques. This research explores several models, including LeNet5, VGG19, and MobileNetV2 for document classification and RoBERTa, LayoutLMv3, and GraphDoc for information extraction. The models are trained and tested on financial documents from previously available benchmark datasets and a new dataset with financial documents in Romanian. Results show MobileNetV2 excels in classification tasks (with accuracies of 99.24% with data augmentation and 93.33% without augmentation), while RoBERTa and LayoutLMv3 lead in extraction tasks (with F1-scores of 0.7761 and 0.7426, respectively). Despite the challenges posed by the imbalanced dataset and cross-language documents, the proposed pipeline shows potential for automating the processing of financial documents in the relevant sectors.",
keywords = "Deep Learning, Classification, Financial Documents, Optical Character Recognition, CNN, RoBERTa, LayoutLMv3, GraphDoc, Information Extraction, Document Processing",
author = "Valentin-Adrian Serbanescu and Dhali, {Maruf A.}",
year = "2025",
doi = "10.5220/0013261000003905",
language = "English",
volume = "1",
publisher = "SciTePress",
pages = "749--756",
editor = "Castrillon-Santana, {Modesto } and {De Marsico}, {Maria } and Ana Fred",
booktitle = "Proceedings of the 14th International Conference on Pattern Recognition Applications and Methods",
}