{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T18:35:21Z","timestamp":1776882921453,"version":"3.51.2"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100020487","name":"Nature","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100020487","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10887968","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T17:15:02Z","timestamp":1741799702000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Anchored Monotonic Alignment and Representation Substitution for Rare Spontaneous Behaviors in Spontaneous Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Ning-Qian","family":"Wu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P.R.China"}]},{"given":"Ya-Jun","family":"Hu","sequence":"additional","affiliation":[{"name":"iFLYTEK Co. Ltd.,iFLYTEK Research,China"}]},{"given":"Liping","family":"Chen","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P.R.China"}]},{"given":"Zhen-Hua","family":"Ling","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P.R.China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/WSS.2002.1224409"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Eurospeech.2003-392"},{"key":"ref3","article-title":"Prosodic analysis and modelling of conversational elements for speech synthesis","volume-title":"Ph.D. dissertation","author":"Mercado","year":"2009"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.3390\/mti2010009"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054107"},{"key":"ref6","first-page":"6438","article-title":"Dnn-based speech synthesis using abundant tags of spontaneous speech corpus","volume-title":"Proceedings of The 12th Language Resources and Evaluation Conference, LREC 2020, Marseille, France, May 11-16, 2020","author":"Yamashita"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2019-44"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-412"},{"key":"ref9","first-page":"1960","article-title":"Evaluating Sampling-based Filler Insertion with Spontaneous TTS","volume-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference, LREC 2022, Marseille, France, 20-25 June 2022","author":"Wang"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPAASC55919.2022.9979895"},{"key":"ref11","first-page":"385","article-title":"Personalized Filled-pause Generation with Group-wise Prediction Models","volume-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference, LREC 2022, Marseille, France, 20-25 June 2022","author":"Matsunaga"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-584"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-1754"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1989"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/SSW.2021-12"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSPW59220.2023.10193157"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445828"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-534"},{"key":"ref19","article-title":"Rasc863-a chinese speech corpus with four regional accents","volume-title":"ICSLT-o-COCOSDA","author":"Li"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.58"},{"key":"ref21","article-title":"Chatglm: A family of large language models from glm-130b to glm-4 all tools","author":"Zeng","year":"2024"},{"key":"ref22","article-title":"LoRA: Low-Rank Adaptation of Large Language Models","volume-title":"The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022","author":"Hu"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10887968.pdf?arnumber=10887968","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:25:02Z","timestamp":1774416302000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10887968\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10887968","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}