{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:20:21Z","timestamp":1776093621011,"version":"3.50.1"},"reference-count":31,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,2,2]],"date-time":"2024-02-02T00:00:00Z","timestamp":1706832000000},"content-version":"vor","delay-in-days":32,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1016\/j.procs.2024.02.074","type":"journal-article","created":{"date-parts":[[2024,3,20]],"date-time":"2024-03-20T23:24:50Z","timestamp":1710977090000},"page":"2560-2569","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":23,"special_numbering":"C","title":["Unveiling embedded features in Wav2vec2 and HuBERT msodels for Speech Emotion Recognition"],"prefix":"10.1016","volume":"232","author":[{"given":"Adil","family":"CHAKHTOUNA","sequence":"first","affiliation":[]},{"given":"Sara","family":"SEKKATE","sequence":"additional","affiliation":[]},{"given":"Abdellah","family":"ADIB","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2024.02.074_bib0001","doi-asserted-by":"crossref","first-page":"218","DOI":"10.1016\/j.procs.2018.01.117","article-title":"Performance evaluation of feature extraction techniques in mr-brain image classification system","volume":"127","author":"Khalil","year":"2018","journal-title":"Procedia Computer Science"},{"key":"10.1016\/j.procs.2024.02.074_bib0002","first-page":"1","article-title":"Speaker and gender dependencies in within\/cross linguistic speech emotion recognition","author":"Chakhtouna","year":"2023","journal-title":"International Journal of Speech Technology"},{"key":"10.1016\/j.procs.2024.02.074_bib0003","series-title":"Proceedings. 11th IEEE International Workshop on Robot and Human Interactive Communication","first-page":"454","article-title":"Using gesture and speech control for commanding a robot assistant","author":"Rogalla","year":"2002"},{"key":"10.1016\/j.procs.2024.02.074_bib0004","series-title":"Interactive Collaborative Robotics: First International Conference, ICR 2016","first-page":"89","article-title":"Human-machine speech-based interfaces with augmented reality and interactive systems for controlling mobile cranes","author":"Majewski","year":"2016"},{"key":"10.1016\/j.procs.2024.02.074_bib0005","doi-asserted-by":"crossref","first-page":"203","DOI":"10.1016\/j.susoc.2022.01.008","article-title":"Understanding the adoption of industry 4.0 technologies in improving environmental sustainability","volume":"3","author":"Javaid","year":"2022","journal-title":"Sustainable Operations and Computers"},{"issue":"5","key":"10.1016\/j.procs.2024.02.074_bib0006","doi-asserted-by":"crossref","first-page":"1367","DOI":"10.1007\/s10639-015-9388-2","article-title":"Towards real-time speech emotion recognition for affective e-learning","volume":"21","author":"Bahreini","year":"2016","journal-title":"Education and information technologies"},{"key":"10.1016\/j.procs.2024.02.074_bib0007","series-title":"2021 43rd Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC)","first-page":"1857","article-title":"Speech based affective analysis of patients embedded in telemedicine platforms","author":"Kallipolitis","year":"2021"},{"issue":"2","key":"10.1016\/j.procs.2024.02.074_bib0008","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1109\/TSA.2004.838534","article-title":"Toward detecting emotions in spoken dialogs","volume":"13","author":"Lee","year":"2005","journal-title":"IEEE transactions on speech and audio processing"},{"key":"10.1016\/j.procs.2024.02.074_bib0009","series-title":"2004 IEEE international conference on acoustics, speech, and signal processing","first-page":"I","article-title":"Speech emotion recognition combining acoustic features and linguistic information in a hybrid support vector machine-belief network architecture","volume":"1","author":"Schuller","year":"2004"},{"key":"10.1016\/j.procs.2024.02.074_bib0010","doi-asserted-by":"crossref","first-page":"1875","DOI":"10.1016\/j.procs.2022.01.388","article-title":"Real-time detection of worker's emotions for advanced human-robot interaction during collaborative tasks in smart factories","volume":"200","author":"Chiurco","year":"2022","journal-title":"Procedia Computer Science"},{"key":"10.1016\/j.procs.2024.02.074_bib0011","unstructured":"J. Devlin, M.-W. Chang, K. Lee, K. Toutanova, Bert: Pre-training of deep bidirectional transformers for language understanding, arXiv preprint arXiv:1810.04805 (2018)."},{"key":"10.1016\/j.procs.2024.02.074_bib0012","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.procs.2024.02.074_bib0013","doi-asserted-by":"crossref","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","article-title":"Hubert: Self-supervised speech representation learning by masked prediction of hidden units","volume":"29","author":"Hsu","year":"2021","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"10.1016\/j.procs.2024.02.074_bib0014","series-title":"International Conference on Intelligent Systems Design and Applications","first-page":"399","article-title":"Improving speech emotion recognition system using spectral and prosodic features","author":"Chakhtouna","year":"2021"},{"key":"10.1016\/j.procs.2024.02.074_bib0015","series-title":"2019 29th International Conference Radioelektronika (RADIOELEKTRONIKA)","first-page":"1","article-title":"Deep learning techniques for speech emotion recognition: A review","author":"Pandey","year":"2019"},{"key":"10.1016\/j.procs.2024.02.074_bib0016","series-title":"2016 8th International Conference on Information Technology and Electrical Engineering (ICITEE)","first-page":"1","article-title":"A comparison between shallow and deep architecture classifiers on small dataset","author":"Pasupa","year":"2016"},{"key":"10.1016\/j.procs.2024.02.074_bib0017","series-title":"2015 IEEE international conference on acoustics, speech and signal processing (ICASSP)","first-page":"5206","article-title":"Librispeech: an asr corpus based on public domain audio books","author":"Panayotov","year":"2015"},{"key":"10.1016\/j.procs.2024.02.074_bib0018","series-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"7669","article-title":"Libri-light: A benchmark for asr with limited or no supervision","author":"Kahn","year":"2020"},{"key":"10.1016\/j.procs.2024.02.074_bib0019","series-title":"International Conference On Big Data and Internet of Things","first-page":"399","article-title":"Transformer model and convolutional neural networks (cnns) for arabic to english machine translation","author":"Bensalah","year":"2022"},{"key":"10.1016\/j.procs.2024.02.074_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.jneumeth.2020.109037","article-title":"Mi-eegnet: A novel convolutional neural network for motor imagery classification","volume":"353","author":"Riyad","year":"2021","journal-title":"Journal of Neuroscience Methods"},{"key":"10.1016\/j.procs.2024.02.074_bib0021","series-title":"2021 International Conference on INnovations in Intelligent SysTems and Applications (INISTA)","first-page":"1","article-title":"Convolutional denoising auto-encoder based awgn removal from ecg signal","author":"El Bouny","year":"2021"},{"key":"10.1016\/j.procs.2024.02.074_bib0022","series-title":"Emerging Trends in Intelligent Systems & Network Security","first-page":"136","article-title":"classification of credit applicants using svm variants coupled with filter-based feature selection","author":"Akil","year":"2022"},{"key":"10.1016\/j.procs.2024.02.074_bib0023","first-page":"365","article-title":"Speech emotion recognition using pre-trained and fine-tuned transfer learning approaches","volume":"6","author":"Chakhtouna","year":"2023"},{"key":"10.1016\/j.procs.2024.02.074_bib0024","series-title":"Multimedia Tools and Applications","first-page":"1","article-title":"A statistical feature extraction for deep speech emotion recognition in a bilingual scenario","author":"Sekkate","year":"2022"},{"key":"10.1016\/j.procs.2024.02.074_bib0025","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2021.114683","article-title":"Speech emotion recognition using recurrent neural networks with directional self-attention","volume":"173","author":"Li","year":"2021","journal-title":"Expert Systems with Applications"},{"key":"10.1016\/j.procs.2024.02.074_bib0026","series-title":"International Conference on Networking, Intelligent Systems and Security","first-page":"281","article-title":"Improving speaker-dependency\/independency of wavelet-based speech emotion recognition","author":"Chakhtouna","year":"2023"},{"key":"10.1016\/j.procs.2024.02.074_bib0027","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v036.i11","article-title":"Feature selection with the boruta package","volume":"36","author":"Kursa","year":"2010","journal-title":"Journal of statistical software"},{"key":"10.1016\/j.procs.2024.02.074_bib0028","article-title":"Recognizing more emotions with less data using self-supervised transfer learning","author":"Boigne","year":"2020","journal-title":"arXiv preprint"},{"key":"10.1016\/j.procs.2024.02.074_bib0029","article-title":"Emotion recognition from speech using wav2vec 2.0 embeddings","author":"Pepino","year":"2021","journal-title":"arXiv preprint"},{"issue":"5","key":"10.1016\/j.procs.2024.02.074_bib0030","doi-asserted-by":"crossref","DOI":"10.1371\/journal.pone.0196391","article-title":"The ryerson audio-visual database of emotional speech and song (ravdess): A dynamic, multimodal set of facial and vocal expressions in north american english","volume":"13","author":"Livingstone","year":"2018","journal-title":"PloS one"},{"issue":"1","key":"10.1016\/j.procs.2024.02.074_bib0031","doi-asserted-by":"crossref","first-page":"327","DOI":"10.3390\/app12010327","article-title":"A proposal for multimodal emotion recognition using aural transformers and action units on ravdess dataset","volume":"12","author":"Luna-Jim\u00e9nez","year":"2021","journal-title":"Applied Sciences"}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050924002515?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050924002515?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T04:13:44Z","timestamp":1736309624000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050924002515"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":31,"alternative-id":["S1877050924002515"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2024.02.074","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Unveiling embedded features in Wav2vec2 and HuBERT msodels for Speech Emotion Recognition","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2024.02.074","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}