{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T04:45:22Z","timestamp":1761972322812,"version":"build-2065373602"},"reference-count":31,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"11","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2025,11,1]]},"DOI":"10.1587\/transfun.2025eap1011","type":"journal-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T18:06:30Z","timestamp":1747073190000},"page":"1444-1451","source":"Crossref","is-referenced-by-count":0,"title":["Monaural Speech Enhancement with Attention Augmented Dual-Path CRN and Short Time Discrete Cosine Transform"],"prefix":"10.1587","volume":"E108.A","author":[{"given":"Lin","family":"ZHOU","sequence":"first","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}]},{"given":"Tongjia","family":"YAN","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}]},{"given":"Mingyang","family":"LI","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}]},{"given":"Ao","family":"LI","sequence":"additional","affiliation":[{"name":"School of Information Science and Engineering, Southeast University"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] J. Abdulbaqi, Y. Gu, SH. Chen, and I. Marsic, \u201cResidual recurrent neural network for speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp.6659-6663, 2020. 10.1109\/ICASSP40776.2020.9053544","DOI":"10.1109\/ICASSP40776.2020.9053544"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] B. Desplanques, J. Thienpondt, and K. Demuynck, \u201cECAPA-TDNN: Emphasized channel attention, propagation and aggregation in TDNN based speaker verification,\u201d INTERSPEECH 2020, pp.3830-3834, 2020. 10.21437\/interspeech.2020-2650","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"3","doi-asserted-by":"crossref","unstructured":"[3] S. Boll, \u201cSuppression of acoustic noise in speech using spectral subtraction,\u201d IEEE Trans. Acoust., Speech, Signal Process., vol.27, no.2, pp.113-120, 1979. 10.1109\/tassp.1979.1163209","DOI":"10.1109\/TASSP.1979.1163209"},{"key":"4","doi-asserted-by":"publisher","unstructured":"[4] Y. Ephraim and D. Malah, \u201cSpeech enhancement using a minimum mean-square error log-spectral amplitude estimator,\u201d IEEE Trans. Acoust., Speech, Signal Process., vol.33, no.2, pp.443-445, 1985. 10.1109\/tassp.1985.1164550","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"5","doi-asserted-by":"crossref","unstructured":"[5] S.R. Park and J. Lee, \u201cA fully convolutional neural network for speech enhancement,\u201d INTERSPEECH 2017, pp.1993-1997, 2017. 10.21437\/interspeech.2017-1465","DOI":"10.21437\/Interspeech.2017-1465"},{"key":"6","doi-asserted-by":"crossref","unstructured":"[6] C. Valentini-Botinhao, X. Wang, S. Takaki, and J. Yamagishi, \u201cInvestigating RNN-based speech enhancement methods for noise-robust Text-to-Speech,\u201d Proc. 9th ISCA Workshop on Speech Synthesis Workshop (SSW 9), pp.146-152, 2016. 10.21437\/ssw.2016-24","DOI":"10.21437\/SSW.2016-24"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] K. Tan and D. Wang, \u201cA convolutional recurrent neural network for real-time speech enhancement,\u201d INTERSPEECH 2018, pp.3229-3233, 2018. 10.21437\/interspeech.2018-1405","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"8","unstructured":"[8] H.-S. Choi, J.-H. Kim, J. Huh, A. Kim, J.-W. Ha, and K. Lee, \u201cPhase-aware speech enhancement with deep complex U-Net,\u201d International Conference on Learning Representations, 2018."},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] Y. Hu, Y. Liu, S. Lv, M. Xing, S. Zhang, Y. Fu, J. Wu, B. Zhang, and L. Xie, \u201cDCCRN: Deep complex convolution recurrent network for phase-aware speech enhancement,\u201d INTERSPEECH 2020, pp.2472-2476, 2020. 10.21437\/interspeech.2020-2537","DOI":"10.21437\/Interspeech.2020-2537"},{"key":"10","doi-asserted-by":"publisher","unstructured":"[10] D. Yin, C. Luo, Z. Xiong, and W. Zeng, \u201cPHASEN: A phase-and-harmonics-aware speech enhancement Network,\u201d Proc. AAAI Conference on Artificial Intelligence, vol.34, no.05, pp.9458-9465, 2020. 10.1609\/aaai.v34i05.6489","DOI":"10.1609\/aaai.v34i05.6489"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] C. Geng and L. Wang, \u201cEnd-to-end speech enhancement based on discrete cosine transform,\u201d 2020 IEEE International Conference on Artificial Intelligence and Computer Applications (ICAICA), pp.379-383, 2020. 10.1109\/icaica50127.2020.9182513","DOI":"10.1109\/ICAICA50127.2020.9182513"},{"key":"12","unstructured":"[12] Q. Li, F. Gao, H. Guan, and K. Ma, \u201cReal-time monaural speech enhancement with short-time discrete cosine transform,\u201d arXiv preprint arXiv:2102.04629, 2021. 10.48550\/arXiv.2102.04629"},{"key":"13","doi-asserted-by":"crossref","unstructured":"[13] Y. Luo, Z. Chen, and T. Yoshioka, \u201cDual-path RNN: Efficient long sequence modeling for time-domain single-channel speech separation,\u201d International Conference on Acoustics, Speech, and Signal Processing, pp.46-50, 2020. 10.1109\/icassp40776.2020.9054266","DOI":"10.1109\/ICASSP40776.2020.9054266"},{"key":"14","doi-asserted-by":"crossref","unstructured":"[14] X. Le, H. Chen, K. Chen, and J. Lu, \u201cDPCRN: Dual-path convolution recurrent network for single channel speech enhancement,\u201d INTERSPEECH 2021, pp.2811-2815, 2021. 10.21437\/interspeech.2021-296","DOI":"10.21437\/Interspeech.2021-296"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] J. Hu, L. Shen, and G. Sun, \u201cSqueeze-and-excitation networks,\u201d 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp.7132-7141, 2018. 10.1109\/cvpr.2018.00745","DOI":"10.1109\/CVPR.2018.00745"},{"key":"16","doi-asserted-by":"crossref","unstructured":"[16] S. Woo, J. Park, J. Lee, and I.S. Kweon, \u201cCBAM: Convolutional block attention module,\u201d European Conference on Computer Vision, pp.3-19, 2018. 10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"17","doi-asserted-by":"crossref","unstructured":"[17] S. Zhao, B. Ma, K.N. Watcharasupat, and W.S. Gan, \u201cFRCRN: Boosting feature representation using frequency recurrence for monaural speech enhancement,\u201d IEEE International Conference on Acoustics, Speech and Signal Processing, pp.9281-9285, 2022. 10.1109\/icassp43922.2022.9747578","DOI":"10.1109\/ICASSP43922.2022.9747578"},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] W. Shi, J. Caballero, F. Huszar, J. Totz, A.P. Aitken, R. Bishop, D. Rueckert, and Z. Wang, \u201cReal-time single image and video super-resolution using an efficient sub-pixel convolutional neural network,\u201d IEEE Conference on Computer Vision and Pattern Recognition, pp.1874-1883, 2016. 10.1109\/cvpr.2016.207","DOI":"10.1109\/CVPR.2016.207"},{"key":"19","doi-asserted-by":"publisher","unstructured":"[19] J. Park, S. Woo, J.-Y. Lee, and I.S. Kweon, \u201cA simple and light-weight attention module for convolutional neural networks,\u201d Int. J. Comput. Vis., vol.128, no.4, pp.783-798, 2020. 10.1007\/s11263-019-01283-0","DOI":"10.1007\/s11263-019-01283-0"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] C. Li, J. Shi, W. Zhang, A.S. Subramanian, X. Chang, N. Kamo, M. Hira, T. Hayashi, C. Boeddeker, Z. Chen, and S. Watanabe, \u201cESPnet-SE: End-to-end speech enhancement and separation toolkit designed for ASR integration,\u201d IEEE Spoken Language Technology Workshop (SLT), pp.785-792. 10.1109\/slt48900.2021.9383615","DOI":"10.1109\/SLT48900.2021.9383615"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] P.G. Shivakumar and P. Georgiou, \u201cPerception optimized deep denoising AutoEncoders for speech enhancement,\u201d INTERSPEECH 2016, pp.3743-3747, 2016. 10.21437\/interspeech.2016-1284","DOI":"10.21437\/Interspeech.2016-1284"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] S. Braun and I. Tashev, \u201cA consolidated view of loss functions for supervised deep learning-based speech enhancement,\u201d International Conference on Telecommunications and Signal Processing (TSP), pp.72-76, 2021. 10.1109\/tsp52935.2021.9522648","DOI":"10.1109\/TSP52935.2021.9522648"},{"key":"23","doi-asserted-by":"publisher","unstructured":"[23] J.M. Martin-Do\u00f1as, A.M. Gomez, J.A. Gonzalez, and A.M. Peinado, \u201cA deep learning loss function based on the perceptual evaluation of the speech quality,\u201d IEEE Signal Process. Lett., vol.25, no.11, pp.1680-1684, 2018. 10.1109\/lsp.2018.2871419","DOI":"10.1109\/LSP.2018.2871419"},{"key":"24","unstructured":"[24] J.G. Beerends, A.P. Hekstra, A.W. Rix, and M.P. Hollier, \u201cPerceptual evaluation of speech quality (PESQ) the new ITU standard for end-to-end speech quality assessment,\u201d Journal of The Audio Engineering Society, vol.50, no.10, pp.765-778, 2002."},{"key":"25","doi-asserted-by":"publisher","unstructured":"[25] Y. Hu and P.C. Loizou, \u201cEvaluation of objective quality measures for speech enhancement,\u201d IEEE Trans. Audio, Speech, Language Process., vol.16, no.1, pp.229-238, 2008. 10.1109\/tasl.2007.911054","DOI":"10.1109\/TASL.2007.911054"},{"key":"26","doi-asserted-by":"crossref","unstructured":"[26] S. Lv, Y. Hu, S. Zhang, and L. Xie, \u201cDCCRN+: Channel-wise subband DCCRN with SNR estimation for speech enhancement,\u201d INTERSPEECH 2021, pp.2816-2820, 2021. 10.21437\/interspeech.2021-1482","DOI":"10.21437\/Interspeech.2021-1482"},{"key":"27","unstructured":"[27] H. Schr\u00f6ter, T. Rosenkranz, A.N. Escalante-B, and A. Maier, \u201cDeepFilterNet: Perceptually motivated real-time speech enhancement,\u201d INTERSPEECH 2023, pp.2008-2009, 2023."},{"key":"28","unstructured":"[28] W. Shin, B.H. Lee, J.S. Kim, H.J. Park, and S.W. Han, \u201cMetricGAN-OKD: Multi-metric optimization of MetricGAN via online knowledge distillation for speech enhancement,\u201d Proc. 40th International Conference on Machine Learning, pp.31521-31538, 2023."},{"key":"29","doi-asserted-by":"publisher","unstructured":"[29] J. Fan, J. Yang, X. Zhang, and Y. Yao, \u201cRealtime single channel speech enhancement based on causal attention mechanism,\u201d Applied Acoustics, vol.201, p.109084, 2022. 10.1016\/j.apacoust.2022.109084","DOI":"10.1016\/j.apacoust.2022.109084"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] S.H. Bae, S.W. Chae, Y. Kim, K. Lee, H. Lim, and L.-H. Kim, \u201cStreaming dual-path transformer for speech enhancement,\u201d Proc. Interspeech 2023, pp.824-828, 2023. 10.21437\/interspeech.2023-751","DOI":"10.21437\/Interspeech.2023-751"},{"key":"31","doi-asserted-by":"publisher","unstructured":"[31] Q. Zhao, Y. Gao, Z. Cai, and S. Ou, \u201cSpeech enhancement based on dual-path cross-parallel conformer network,\u201d IEEE Access, vol.12, pp.198201-198211, 2024. 10.1109\/access.2024.3518540","DOI":"10.1109\/ACCESS.2024.3518540"}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/11\/E108.A_2025EAP1011\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T03:39:35Z","timestamp":1761968375000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E108.A\/11\/E108.A_2025EAP1011\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,1]]},"references-count":31,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2025eap1011","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"type":"print","value":"0916-8508"},{"type":"electronic","value":"1745-1337"}],"subject":[],"published":{"date-parts":[[2025,11,1]]},"article-number":"2025EAP1011"}}