default search action
BibTeX records: Zhijie Yan
@inproceedings{DBLP:conf/nsdi/MiaoZZYLYZJXJMC26,
author = {Congcong Miao and
Xianneng Zou and
Chuwen Zhang and
Shiping Yang and
Qihang Liu and
Zhijie Yan and
Yanke Zhang and
Yong Jiang and
Qiao Xiang and
Xin Jin and
Zili Meng and
Ang Chen},
editor = {Srikanth Kandula and
Hakim Weatherspoon},
title = {A Composable Emulation Framework for Whitebox Switches},
booktitle = {23rd {USENIX} Symposium on Networked Systems Design and Implementation,
{NSDI} 2026, Renton, WA, May 4-6, 2026},
pages = {1653--1667},
publisher = {{USENIX} Association},
year = {2026},
url = {https://www.usenix.org/conference/nsdi26/presentation/miao-whitebox},
timestamp = {Mon, 18 May 2026 16:37:21 +0200},
biburl = {https://dblp.org/rec/conf/nsdi/MiaoZZYLYZJXJMC26.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/aei/WangYLL25,
author = {Zuoxu Wang and
Zhijie Yan and
Shufei Li and
Jihong Liu},
title = {IndVisSGG: VLM-based scene graph generation for industrial spatial
intelligence},
journal = {Adv. Eng. Informatics},
volume = {65},
pages = {103107},
year = {2025},
url = {https://doi.org/10.1016/j.aei.2024.103107},
doi = {10.1016/J.AEI.2024.103107},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/aei/WangYLL25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ral/YanLWWWZCL25,
author = {Zhijie Yan and
Shufei Li and
Zuoxu Wang and
Lixiu Wu and
Han Wang and
Jun Zhu and
Lijiang Chen and
Jihong Liu},
title = {Dynamic Open-Vocabulary 3D Scene Graphs for Long-Term Language-Guided
Mobile Manipulation},
journal = {{IEEE} Robotics Autom. Lett.},
volume = {10},
number = {5},
pages = {4252--4259},
year = {2025},
url = {https://doi.org/10.1109/LRA.2025.3547643},
doi = {10.1109/LRA.2025.3547643},
timestamp = {Fri, 09 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/ral/YanLWWWZCL25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/rcim/LiYWG25,
author = {Shufei Li and
Zhijie Yan and
Zuoxu Wang and
Yiping Gao},
title = {VLM-MSGraph: Vision Language Model-enabled Multi-hierarchical Scene
Graph for robotic assembly},
journal = {Robotics Comput. Integr. Manuf.},
volume = {94},
pages = {102978},
year = {2025},
url = {https://doi.org/10.1016/j.rcim.2025.102978},
doi = {10.1016/J.RCIM.2025.102978},
timestamp = {Tue, 01 Apr 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/rcim/LiYWG25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/LiuLLJSW000LZY025,
author = {Huadai Liu and
Tianyi Luo and
Kaicheng Luo and
Qikai Jiang and
Peiwen Sun and
Jialei Wang and
Rongjie Huang and
Qian Chen and
Wen Wang and
Xiangtai Li and
Shiliang Zhang and
Zhijie Yan and
Zhou Zhao and
Wei Xue},
editor = {Aarti Singh and
Maryam Fazel and
Daniel Hsu and
Simon Lacoste{-}Julien and
Felix Berkenkamp and
Tegan Maharaj and
Kiri Wagstaff and
Jerry Zhu},
title = {OmniAudio: Generating Spatial Audio from 360-Degree Video},
booktitle = {Forty-second International Conference on Machine Learning, {ICML}
2025, Vancouver, BC, Canada, July 13-19, 2025},
series = {Proceedings of Machine Learning Research},
publisher = {{PMLR} / OpenReview.net},
year = {2025},
url = {https://proceedings.mlr.press/v267/liu25as.html},
timestamp = {Wed, 04 Feb 2026 16:54:16 +0100},
biburl = {https://dblp.org/rec/conf/icml/LiuLLJSW000LZY025.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2501-06282,
author = {Qian Chen and
Yafeng Chen and
Yanni Chen and
Mengzhe Chen and
Yingda Chen and
Chong Deng and
Zhihao Du and
Ruize Gao and
Changfeng Gao and
Zhifu Gao and
Yabin Li and
Xiang Lv and
Jiaqing Liu and
Haoneng Luo and
Bin Ma and
Chongjia Ni and
Xian Shi and
Jialong Tang and
Hui Wang and
Hao Wang and
Wen Wang and
Yuxuan Wang and
Yunlan Xu and
Fan Yu and
Zhijie Yan and
Yexin Yang and
Baosong Yang and
Xian Yang and
Guanrou Yang and
Tianyu Zhao and
Qinglin Zhang and
Shiliang Zhang and
Nan Zhao and
Pei Zhang and
Chong Zhang and
Jinren Zhou},
title = {MinMo: {A} Multimodal Large Language Model for Seamless Voice Interaction},
journal = {CoRR},
volume = {abs/2501.06282},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2501.06282},
doi = {10.48550/ARXIV.2501.06282},
eprinttype = {arXiv},
eprint = {2501.06282},
timestamp = {Tue, 24 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2501-06282.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2502-11094,
author = {Zhengyan Sheng and
Zhihao Du and
Shiliang Zhang and
Zhijie Yan and
Yexin Yang and
Zhenhua Ling},
title = {SyncSpeech: Low-Latency and Efficient Dual-Stream Text-to-Speech based
on Temporal Masked Transformer},
journal = {CoRR},
volume = {abs/2502.11094},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2502.11094},
doi = {10.48550/ARXIV.2502.11094},
eprinttype = {arXiv},
eprint = {2502.11094},
timestamp = {Mon, 17 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2502-11094.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2504-14906,
author = {Huadai Liu and
Tianyi Luo and
Qikai Jiang and
Kaicheng Luo and
Peiwen Sun and
Jialei Wang and
Rongjie Huang and
Qian Chen and
Wen Wang and
Xiangtai Li and
Shiliang Zhang and
Zhijie Yan and
Zhou Zhao and
Wei Xue},
title = {OmniAudio: Generating Spatial Audio from 360-Degree Video},
journal = {CoRR},
volume = {abs/2504.14906},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2504.14906},
doi = {10.48550/ARXIV.2504.14906},
eprinttype = {arXiv},
eprint = {2504.14906},
timestamp = {Sun, 10 Aug 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2504-14906.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijcisys/ZhangWYJWT24,
author = {Linghao Zhang and
Luqing Wang and
Zhijie Yan and
Zhentang Jia and
Hongjun Wang and
Xinyu Tang},
title = {Star Generative Adversarial {VGG} Network-Based Sample Augmentation
for Insulator Defect Detection},
journal = {Int. J. Comput. Intell. Syst.},
volume = {17},
number = {1},
pages = {141},
year = {2024},
url = {https://doi.org/10.1007/s44196-024-00524-6},
doi = {10.1007/S44196-024-00524-6},
timestamp = {Mon, 09 Dec 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/ijcisys/ZhangWYJWT24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ijcisys/ZhangWYJWT24a,
author = {Linghao Zhang and
Luqing Wang and
Zhijie Yan and
Zhentang Jia and
Hongjun Wang and
Xinyu Tang},
title = {Correction: Star Generative Adversarial {VGG} Network-Based Sample
Augmentation for Insulator Defect Detection},
journal = {Int. J. Comput. Intell. Syst.},
volume = {17},
number = {1},
pages = {149},
year = {2024},
url = {https://doi.org/10.1007/s44196-024-00558-w},
doi = {10.1007/S44196-024-00558-W},
timestamp = {Mon, 04 Nov 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/ijcisys/ZhangWYJWT24a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/case/LiWYGJZ24,
author = {Shufei Li and
Zuoxu Wang and
Zhijie Yan and
Yiping Gao and
Han Jiang and
Pai Zheng},
title = {Large Language Model for Humanoid Cognition in Proactive Human-Robot
Collaboration},
booktitle = {20th {IEEE} International Conference on Automation Science and Engineering,
{CASE} 2024, Bari, Italy, August 28 - Sept. 1, 2024},
pages = {540--545},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/CASE59546.2024.10711379},
doi = {10.1109/CASE59546.2024.10711379},
timestamp = {Thu, 07 Nov 2024 10:23:02 +0100},
biburl = {https://dblp.org/rec/conf/case/LiWYGJZ24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/case/YanWLLLL24,
author = {Zhijie Yan and
Zuoxu Wang and
Shufei Li and
Mingrui Li and
Xinxin Liang and
Jihong Liu},
title = {ManufVisSGG: {A} Vision-Language-Model Approach for Cognitive Scene
Graph Generation in Manufacturing Systems},
booktitle = {20th {IEEE} International Conference on Automation Science and Engineering,
{CASE} 2024, Bari, Italy, August 28 - Sept. 1, 2024},
pages = {1632--1637},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/CASE59546.2024.10711649},
doi = {10.1109/CASE59546.2024.10711649},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/case/YanWLLLL24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/JinZLLZHLZYSZJLCZ24,
author = {Bu Jin and
Yupeng Zheng and
Pengfei Li and
Weize Li and
Yuhang Zheng and
Sujie Hu and
Xinyu Liu and
Jinwei Zhu and
Zhijie Yan and
Haiyang Sun and
Kun Zhan and
Peng Jia and
Xiaoxiao Long and
Yilun Chen and
Hao Zhao},
editor = {Ales Leonardis and
Elisa Ricci and
Stefan Roth and
Olga Russakovsky and
Torsten Sattler and
G{\"{u}}l Varol},
title = {TOD3Cap: Towards 3D Dense Captioning in Outdoor Scenes},
booktitle = {Computer Vision - {ECCV} 2024 - 18th European Conference, Milan, Italy,
September 29-October 4, 2024, Proceedings, Part {XVIII}},
series = {Lecture Notes in Computer Science},
pages = {367--384},
publisher = {Springer},
year = {2024},
url = {https://doi.org/10.1007/978-3-031-72649-1\_21},
doi = {10.1007/978-3-031-72649-1\_21},
timestamp = {Tue, 05 May 2026 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/eccv/JinZLLZHLZYSZJLCZ24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iros/ZhengWYT0ZCG24,
author = {Xiaoji Zheng and
Lixiu Wu and
Zhijie Yan and
Yuanrong Tang and
Hao Zhao and
Chen Zhong and
Bokui Chen and
Jiangtao Gong},
title = {Large Language Models Powered Context-aware Motion Prediction in Autonomous
Driving},
booktitle = {{IEEE/RSJ} International Conference on Intelligent Robots and Systems,
{IROS} 2024, Abu Dhabi, United Arab Emirates, October 14-18, 2024},
pages = {980--985},
publisher = {{IEEE}},
year = {2024},
url = {https://doi.org/10.1109/IROS58592.2024.10802397},
doi = {10.1109/IROS58592.2024.10802397},
timestamp = {Sat, 06 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iros/ZhengWYT0ZCG24.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-11057,
author = {Xiaoji Zheng and
Lixiu Wu and
Zhijie Yan and
Yuanrong Tang and
Hao Zhao and
Chen Zhong and
Bokui Chen and
Jiangtao Gong},
title = {Large Language Models Powered Context-aware Motion Prediction},
journal = {CoRR},
volume = {abs/2403.11057},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2403.11057},
doi = {10.48550/ARXIV.2403.11057},
eprinttype = {arXiv},
eprint = {2403.11057},
timestamp = {Tue, 30 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2403-11057.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2403-19589,
author = {Bu Jin and
Yupeng Zheng and
Pengfei Li and
Weize Li and
Yuhang Zheng and
Sujie Hu and
Xinyu Liu and
Jinwei Zhu and
Zhijie Yan and
Haiyang Sun and
Kun Zhan and
Peng Jia and
Xiaoxiao Long and
Yilun Chen and
Hao Zhao},
title = {TOD3Cap: Towards 3D Dense Captioning in Outdoor Scenes},
journal = {CoRR},
volume = {abs/2403.19589},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2403.19589},
doi = {10.48550/ARXIV.2403.19589},
eprinttype = {arXiv},
eprint = {2403.19589},
timestamp = {Tue, 05 May 2026 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2403-19589.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2407-04051,
author = {Keyu An and
Qian Chen and
Chong Deng and
Zhihao Du and
Changfeng Gao and
Zhifu Gao and
Yue Gu and
Ting He and
Hangrui Hu and
Kai Hu and
Shengpeng Ji and
Yabin Li and
Zerui Li and
Heng Lu and
Haoneng Luo and
Xiang Lv and
Bin Ma and
Ziyang Ma and
Chongjia Ni and
Changhe Song and
Jiaqi Shi and
Xian Shi and
Hao Wang and
Wen Wang and
Yuxuan Wang and
Zhangyu Xiao and
Zhijie Yan and
Yexin Yang and
Bin Zhang and
Qinglin Zhang and
Shiliang Zhang and
Nan Zhao and
Siqi Zheng},
title = {FunAudioLLM: Voice Understanding and Generation Foundation Models
for Natural Interaction Between Humans and LLMs},
journal = {CoRR},
volume = {abs/2407.04051},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2407.04051},
doi = {10.48550/ARXIV.2407.04051},
eprinttype = {arXiv},
eprint = {2407.04051},
timestamp = {Tue, 24 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2407-04051.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2407-05407,
author = {Zhihao Du and
Qian Chen and
Shiliang Zhang and
Kai Hu and
Heng Lu and
Yexin Yang and
Hangrui Hu and
Siqi Zheng and
Yue Gu and
Ziyang Ma and
Zhifu Gao and
Zhijie Yan},
title = {CosyVoice: {A} Scalable Multilingual Zero-shot Text-to-speech Synthesizer
based on Supervised Semantic Tokens},
journal = {CoRR},
volume = {abs/2407.05407},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2407.05407},
doi = {10.48550/ARXIV.2407.05407},
eprinttype = {arXiv},
eprint = {2407.05407},
timestamp = {Sun, 15 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2407-05407.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-17750,
author = {Keyu An and
Shiliang Zhang and
Zhijie Yan},
title = {Are Transformers in Pre-trained {LM} {A} Good {ASR} Encoder? An Empirical
Study},
journal = {CoRR},
volume = {abs/2409.17750},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.17750},
doi = {10.48550/ARXIV.2409.17750},
eprinttype = {arXiv},
eprint = {2409.17750},
timestamp = {Mon, 21 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-17750.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-11989,
author = {Zhijie Yan and
Shufei Li and
Zuoxu Wang and
Lixiu Wu and
Han Wang and
Jun Zhu and
Lijiang Chen and
Jihong Liu},
title = {Dynamic Open-Vocabulary 3D Scene Graphs for Long-term Language-Guided
Mobile Manipulation},
journal = {CoRR},
volume = {abs/2410.11989},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2410.11989},
doi = {10.48550/ARXIV.2410.11989},
eprinttype = {arXiv},
eprint = {2410.11989},
timestamp = {Sun, 24 Nov 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2410-11989.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2412-10117,
author = {Zhihao Du and
Yuxuan Wang and
Qian Chen and
Xian Shi and
Xiang Lv and
Tianyu Zhao and
Zhifu Gao and
Yexin Yang and
Changfeng Gao and
Hui Wang and
Fan Yu and
Huadai Liu and
Zhengyan Sheng and
Yue Gu and
Chong Deng and
Wen Wang and
Shiliang Zhang and
Zhijie Yan and
Jingren Zhou},
title = {CosyVoice 2: Scalable Streaming Speech Synthesis with Large Language
Models},
journal = {CoRR},
volume = {abs/2412.10117},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2412.10117},
doi = {10.48550/ARXIV.2412.10117},
eprinttype = {arXiv},
eprint = {2412.10117},
timestamp = {Tue, 24 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2412-10117.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/LiangSYLZDCXQWCLYB23,
author = {Yuhao Liang and
Mohan Shi and
Fan Yu and
Yangze Li and
Shiliang Zhang and
Zhihao Du and
Qian Chen and
Lei Xie and
Yanmin Qian and
Jian Wu and
Zhuo Chen and
Kong Aik Lee and
Zhijie Yan and
Hui Bu},
title = {The Second Multi-Channel Multi-Party Meeting Transcription Challenge
(M2MeT 2.0): {A} Benchmark for Speaker-Attributed {ASR}},
booktitle = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
2023, Taipei, Taiwan, December 16-20, 2023},
pages = {1--8},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ASRU57964.2023.10389625},
doi = {10.1109/ASRU57964.2023.10389625},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/asru/LiangSYLZDCXQWCLYB23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/case/LiWYL23,
author = {Mingrui Li and
Zuoxu Wang and
Zhijie Yan and
Jihong Liu},
title = {Exploiting Patent Documents for Cross-Domain Knowledge Transfer in
Innovative Engineering Design: {A} Doc2Vec-GAT-Based Approach},
booktitle = {19th {IEEE} International Conference on Automation Science and Engineering,
{CASE} 2023, Auckland, New Zealand, August 26-30, 2023},
pages = {1--6},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/CASE56687.2023.10260662},
doi = {10.1109/CASE56687.2023.10260662},
timestamp = {Sun, 06 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/case/LiWYL23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cicba/HanYLLSLGSHGZCZZ23,
author = {Zhengxiao Han and
Zhijie Yan and
Yang Li and
Pengfei Li and
Yifeng Shi and
Nairui Luo and
Xu Gao and
Yongliang Shi and
Pengfei Huang and
Jiangtao Gong and
Guyue Zhou and
Yilun Chen and
Hang Zhao and
Hao Zhao},
editor = {Lu Fang and
Jian Pei and
Guangtao Zhai and
Ruiping Wang},
title = {M\({}^{\mbox{2}}\)Sim: {A} Long-Term Interactive Driving Simulator},
booktitle = {Artificial Intelligence - Third {CAAI} International Conference, {CICAI}
2023, Fuzhou, China, July 22-23, 2023, Revised Selected Papers, Part
{II}},
series = {Lecture Notes in Computer Science},
pages = {172--176},
publisher = {Springer},
year = {2023},
url = {https://doi.org/10.1007/978-981-99-9119-8\_16},
doi = {10.1007/978-981-99-9119-8\_16},
timestamp = {Tue, 14 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/cicba/HanYLLSLGSHGZCZZ23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/cicba/HanYLLSLGSHGZCZZ23a,
author = {Zhengxiao Han and
Zhijie Yan and
Yang Li and
Pengfei Li and
Yifeng Shi and
Nairui Luo and
Xu Gao and
Yongliang Shi and
Pengfei Huang and
Jiangtao Gong and
Guyue Zhou and
Yilun Chen and
Hang Zhao and
Hao Zhao},
editor = {Lu Fang and
Jian Pei and
Guangtao Zhai and
Ruiping Wang},
title = {Long-Term Interactive Driving Simulation: {MPC} to the Rescue},
booktitle = {Artificial Intelligence - Third {CAAI} International Conference, {CICAI}
2023, Fuzhou, China, July 22-23, 2023, Revised Selected Papers, Part
{II}},
series = {Lecture Notes in Computer Science},
pages = {177--188},
publisher = {Springer},
year = {2023},
url = {https://doi.org/10.1007/978-981-99-9119-8\_17},
doi = {10.1007/978-981-99-9119-8\_17},
timestamp = {Tue, 14 Oct 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/cicba/HanYLLSLGSHGZCZZ23a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangDLYCWYL0Z23,
author = {Qinglin Zhang and
Chong Deng and
Jiaqing Liu and
Hai Yu and
Qian Chen and
Wen Wang and
Zhijie Yan and
Jinglin Liu and
Yi Ren and
Zhou Zhao},
title = {Overview of the {ICASSP} 2023 General Meeting Understanding and Generation
Challenge {(MUG)}},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--2},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10433920},
doi = {10.1109/ICASSP49357.2023.10433920},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/ZhangDLYCWYL0Z23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangDLYCWYLRZ23,
author = {Qinglin Zhang and
Chong Deng and
Jiaqing Liu and
Hai Yu and
Qian Chen and
Wen Wang and
Zhijie Yan and
Jinglin Liu and
Yi Ren and
Zhou Zhao},
title = {{MUG:} {A} General Meeting Understanding and Generation Benchmark},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
{ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
pages = {1--5},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICASSP49357.2023.10097149},
doi = {10.1109/ICASSP49357.2023.10097149},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/ZhangDLYCWYLRZ23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccv/YanLFXSCZLLLLGC23,
author = {Zhijie Yan and
Pengfei Li and
Zheng Fu and
Shaocong Xu and
Yongliang Shi and
Xiaoxue Chen and
Yuhang Zheng and
Yang Li and
Tianyu Liu and
Chuxuan Li and
Nairui Luo and
Xu Gao and
Yilun Chen and
Zuoxu Wang and
Yifeng Shi and
Pengfei Huang and
Zhengxiao Han and
Jirui Yuan and
Jiangtao Gong and
Guyue Zhou and
Hang Zhao and
Hao Zhao},
title = {{INT2:} Interactive Trajectory Prediction at Intersections},
booktitle = {{IEEE/CVF} International Conference on Computer Vision, {ICCV} 2023,
Paris, France, October 1-6, 2023},
pages = {8502--8513},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/ICCV51070.2023.00784},
doi = {10.1109/ICCV51070.2023.00784},
timestamp = {Thu, 13 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/iccv/YanLFXSCZLLLLGC23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShiLGZY23,
author = {Xian Shi and
Haoneng Luo and
Zhifu Gao and
Shiliang Zhang and
Zhijie Yan},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {Accurate and Reliable Confidence Estimation Based on Non-Autoregressive
End-to-End Speech Recognition System},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {3247--3251},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-390},
doi = {10.21437/INTERSPEECH.2023-390},
timestamp = {Fri, 14 Jun 2024 14:12:12 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ShiLGZY23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhouWCZYZZ23,
author = {Xiaohuan Zhou and
Jiaming Wang and
Zeyu Cui and
Shiliang Zhang and
Zhijie Yan and
Jingren Zhou and
Chang Zhou},
editor = {Naomi Harte and
Julie Carson{-}Berndsen and
Gareth Jones},
title = {MMSpeech: Multi-modal Multi-task Encoder-Decoder Pre-training for
speech recognition},
booktitle = {24th Annual Conference of the International Speech Communication Association,
Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
pages = {4943--4947},
publisher = {{ISCA}},
year = {2023},
url = {https://doi.org/10.21437/Interspeech.2023-791},
doi = {10.21437/INTERSPEECH.2023-791},
timestamp = {Mon, 03 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/ZhouWCZYZZ23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-12343,
author = {Xian Shi and
Yanni Chen and
Shiliang Zhang and
Zhijie Yan},
title = {Achieving Timestamp Prediction While Recognizing with Non-Autoregressive
End-to-End {ASR} Model},
journal = {CoRR},
volume = {abs/2301.12343},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2301.12343},
doi = {10.48550/ARXIV.2301.12343},
eprinttype = {arXiv},
eprint = {2301.12343},
timestamp = {Wed, 01 Feb 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2301-12343.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-13932,
author = {Qinglin Zhang and
Chong Deng and
Jiaqing Liu and
Hai Yu and
Qian Chen and
Wen Wang and
Zhijie Yan and
Jinglin Liu and
Yi Ren and
Zhou Zhao},
title = {Overview of the {ICASSP} 2023 General Meeting Understanding and Generation
Challenge {(MUG)}},
journal = {CoRR},
volume = {abs/2303.13932},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2303.13932},
doi = {10.48550/ARXIV.2303.13932},
eprinttype = {arXiv},
eprint = {2303.13932},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2303-13932.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-13939,
author = {Qinglin Zhang and
Chong Deng and
Jiaqing Liu and
Hai Yu and
Qian Chen and
Wen Wang and
Zhijie Yan and
Jinglin Liu and
Yi Ren and
Zhou Zhao},
title = {{MUG:} {A} General Meeting Understanding and Generation Benchmark},
journal = {CoRR},
volume = {abs/2303.13939},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2303.13939},
doi = {10.48550/ARXIV.2303.13939},
eprinttype = {arXiv},
eprint = {2303.13939},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2303-13939.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-10680,
author = {Xian Shi and
Haoneng Luo and
Zhifu Gao and
Shiliang Zhang and
Zhijie Yan},
title = {Accurate and Reliable Confidence Estimation Based on Non-Autoregressive
End-to-End Speech Recognition System},
journal = {CoRR},
volume = {abs/2305.10680},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.10680},
doi = {10.48550/ARXIV.2305.10680},
eprinttype = {arXiv},
eprint = {2305.10680},
timestamp = {Thu, 25 May 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-10680.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-13573,
author = {Yuhao Liang and
Mohan Shi and
Fan Yu and
Yangze Li and
Shiliang Zhang and
Zhihao Du and
Qian Chen and
Lei Xie and
Yanmin Qian and
Jian Wu and
Zhuo Chen and
Kong Aik Lee and
Zhijie Yan and
Hui Bu},
title = {The second multi-channel multi-party meeting transcription challenge
(M2MeT) 2.0): {A} benchmark for speaker-attributed {ASR}},
journal = {CoRR},
volume = {abs/2309.13573},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2309.13573},
doi = {10.48550/ARXIV.2309.13573},
eprinttype = {arXiv},
eprint = {2309.13573},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2309-13573.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2310-04673,
author = {Jiaming Wang and
Zhihao Du and
Qian Chen and
Yunfei Chu and
Zhifu Gao and
Zerui Li and
Kai Hu and
Xiaohuan Zhou and
Jin Xu and
Ziyang Ma and
Wen Wang and
Siqi Zheng and
Chang Zhou and
Zhijie Yan and
Shiliang Zhang},
title = {LauraGPT: Listen, Attend, Understand, and Regenerate Audio with {GPT}},
journal = {CoRR},
volume = {abs/2310.04673},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2310.04673},
doi = {10.48550/ARXIV.2310.04673},
eprinttype = {arXiv},
eprint = {2310.04673},
timestamp = {Thu, 29 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2310-04673.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2311-07919,
author = {Yunfei Chu and
Jin Xu and
Xiaohuan Zhou and
Qian Yang and
Shiliang Zhang and
Zhijie Yan and
Chang Zhou and
Jingren Zhou},
title = {Qwen-Audio: Advancing Universal Audio Understanding via Unified Large-Scale
Audio-Language Models},
journal = {CoRR},
volume = {abs/2311.07919},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2311.07919},
doi = {10.48550/ARXIV.2311.07919},
eprinttype = {arXiv},
eprint = {2311.07919},
timestamp = {Fri, 30 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2311-07919.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2312-14860,
author = {Lingyun Zuo and
Keyu An and
Shiliang Zhang and
Zhijie Yan},
title = {Advancing {VAD} Systems Based on Multi-Task Learning with Improved
Model Structures},
journal = {CoRR},
volume = {abs/2312.14860},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2312.14860},
doi = {10.48550/ARXIV.2312.14860},
eprinttype = {arXiv},
eprint = {2312.14860},
timestamp = {Thu, 18 Jan 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2312-14860.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/DuZZY22,
author = {Zhihao Du and
Shiliang Zhang and
Siqi Zheng and
Zhi{-}Jie Yan},
editor = {Yoav Goldberg and
Zornitsa Kozareva and
Yue Zhang},
title = {Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis},
booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates,
December 7-11, 2022},
pages = {7458--7469},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://doi.org/10.18653/v1/2022.emnlp-main.505},
doi = {10.18653/V1/2022.EMNLP-MAIN.505},
timestamp = {Thu, 10 Aug 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/DuZZY22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YuZFXZDHGYMXB22,
author = {Fan Yu and
Shiliang Zhang and
Yihui Fu and
Lei Xie and
Siqi Zheng and
Zhihao Du and
Weilong Huang and
Pengcheng Guo and
Zhijie Yan and
Bin Ma and
Xin Xu and
Hui Bu},
title = {M2Met: The Icassp 2022 Multi-Channel Multi-Party Meeting Transcription
Challenge},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
pages = {6167--6171},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746465},
doi = {10.1109/ICASSP43922.2022.9746465},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YuZFXZDHGYMXB22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/RenLHZCYZ22,
author = {Yi Ren and
Ming Lei and
Zhiying Huang and
Shiliang Zhang and
Qian Chen and
Zhijie Yan and
Zhou Zhao},
title = {Prosospeech: Enhancing Prosody with Quantized Vector Pre-Training
in Text-To-Speech},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
pages = {7577--7581},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746883},
doi = {10.1109/ICASSP43922.2022.9746883},
timestamp = {Thu, 01 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/RenLHZCYZ22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YuZGFDZHXTWQLYM22,
author = {Fan Yu and
Shiliang Zhang and
Pengcheng Guo and
Yihui Fu and
Zhihao Du and
Siqi Zheng and
Weilong Huang and
Lei Xie and
Zheng{-}Hua Tan and
DeLiang Wang and
Yanmin Qian and
Kong Aik Lee and
Zhijie Yan and
Bin Ma and
Xin Xu and
Hui Bu},
title = {Summary on the {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
Grand Challenge},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
pages = {9156--9160},
publisher = {{IEEE}},
year = {2022},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746270},
doi = {10.1109/ICASSP43922.2022.9746270},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YuZGFDZHXTWQLYM22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/ieaaie/ZhuSSWHYC22,
author = {Min Zhu and
Bingqing Shen and
Yan Sun and
Chongyu Wang and
Guoxin Hou and
Zhijie Yan and
Hongming Cai},
editor = {Hamido Fujita and
Philippe Fournier{-}Viger and
Moonis Ali and
Yinglin Wang},
title = {Surface Defect Detection and Classification Based on Fusing Multiple
Computer Vision Techniques},
booktitle = {Advances and Trends in Artificial Intelligence. Theory and Practices
in Artificial Intelligence - 35th International Conference on Industrial,
Engineering and Other Applications of Applied Intelligent Systems,
{IEA/AIE} 2022, Kitakyushu, Japan, July 19-22, 2022, Proceedings},
series = {Lecture Notes in Computer Science},
pages = {51--62},
publisher = {Springer},
year = {2022},
url = {https://doi.org/10.1007/978-3-031-08530-7\_5},
doi = {10.1007/978-3-031-08530-7\_5},
timestamp = {Wed, 25 Feb 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/ieaaie/ZhuSSWHYC22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GaoZ0Y22,
author = {Zhifu Gao and
Shiliang Zhang and
Ian McLoughlin and
Zhijie Yan},
editor = {Hanseok Ko and
John H. L. Hansen},
title = {Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive
End-to-End Speech Recognition},
booktitle = {23rd Annual Conference of the International Speech Communication Association,
Interspeech 2022, Incheon, Korea, September 18-22, 2022},
pages = {2063--2067},
publisher = {{ISCA}},
year = {2022},
url = {https://doi.org/10.21437/Interspeech.2022-9996},
doi = {10.21437/INTERSPEECH.2022-9996},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/GaoZ0Y22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-03647,
author = {Fan Yu and
Shiliang Zhang and
Pengcheng Guo and
Yihui Fu and
Zhihao Du and
Siqi Zheng and
Weilong Huang and
Lei Xie and
Zheng{-}Hua Tan and
DeLiang Wang and
Yanmin Qian and
Kong Aik Lee and
Zhijie Yan and
Bin Ma and
Xin Xu and
Hui Bu},
title = {Summary On The {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
Grand Challenge},
journal = {CoRR},
volume = {abs/2202.03647},
year = {2022},
url = {https://arxiv.org/abs/2202.03647},
eprinttype = {arXiv},
eprint = {2202.03647},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2202-03647.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2202-07816,
author = {Yi Ren and
Ming Lei and
Zhiying Huang and
Shiliang Zhang and
Qian Chen and
Zhijie Yan and
Zhou Zhao},
title = {ProsoSpeech: Enhancing Prosody With Quantized Vector Pre-training
in Text-to-Speech},
journal = {CoRR},
volume = {abs/2202.07816},
year = {2022},
url = {https://arxiv.org/abs/2202.07816},
eprinttype = {arXiv},
eprint = {2202.07816},
timestamp = {Fri, 14 Feb 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2202-07816.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-09767,
author = {Zhihao Du and
Shiliang Zhang and
Siqi Zheng and
Zhijie Yan},
title = {Speaker Embedding-aware Neural Diarization: an Efficient Framework
for Overlapping Speech Diarization in Meeting Scenarios},
journal = {CoRR},
volume = {abs/2203.09767},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2203.09767},
doi = {10.48550/ARXIV.2203.09767},
eprinttype = {arXiv},
eprint = {2203.09767},
timestamp = {Mon, 04 Apr 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2203-09767.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2206-08317,
author = {Zhifu Gao and
Shiliang Zhang and
Ian McLoughlin and
Zhijie Yan},
title = {Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive
End-to-End Speech Recognition},
journal = {CoRR},
volume = {abs/2206.08317},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2206.08317},
doi = {10.48550/ARXIV.2206.08317},
eprinttype = {arXiv},
eprint = {2206.08317},
timestamp = {Sun, 12 Nov 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2206-08317.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-10243,
author = {Zhihao Du and
Shiliang Zhang and
Siqi Zheng and
Zhijie Yan},
title = {Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis},
journal = {CoRR},
volume = {abs/2211.10243},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2211.10243},
doi = {10.48550/ARXIV.2211.10243},
eprinttype = {arXiv},
eprint = {2211.10243},
timestamp = {Thu, 24 Nov 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2211-10243.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-00500,
author = {Xiaohuan Zhou and
Jiaming Wang and
Zeyu Cui and
Shiliang Zhang and
Zhijie Yan and
Jingren Zhou and
Chang Zhou},
title = {MMSpeech: Multi-modal Multi-task Encoder-Decoder Pre-training for
Speech Recognition},
journal = {CoRR},
volume = {abs/2212.00500},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2212.00500},
doi = {10.48550/ARXIV.2212.00500},
eprinttype = {arXiv},
eprint = {2212.00500},
timestamp = {Mon, 03 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2212-00500.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhengHWSFY21,
author = {Siqi Zheng and
Weilong Huang and
Xianliang Wang and
Hongbin Suo and
Jinwei Feng and
Zhijie Yan},
title = {A Real-Time Speaker Diarization System Based on Spatial Spectrum},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {7208--7212},
publisher = {{IEEE}},
year = {2021},
url = {https://doi.org/10.1109/ICASSP39728.2021.9413544},
doi = {10.1109/ICASSP39728.2021.9413544},
timestamp = {Fri, 09 Jul 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhengHWSFY21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhangZHLSFY21,
author = {Shiliang Zhang and
Siqi Zheng and
Weilong Huang and
Ming Lei and
Hongbin Suo and
Jinwei Feng and
Zhijie Yan},
editor = {Hynek Hermansky and
Honza Cernock{\'{y}} and
Luk{\'{a}}s Burget and
Lori Lamel and
Odette Scharenborg and
Petr Motl{\'{\i}}cek},
title = {Investigation of Spatial-Acoustic Features for Overlapping Speech
Detection in Multiparty Meetings},
booktitle = {22nd Annual Conference of the International Speech Communication Association,
Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
pages = {3550--3554},
publisher = {{ISCA}},
year = {2021},
url = {https://doi.org/10.21437/Interspeech.2021-747},
doi = {10.21437/INTERSPEECH.2021-747},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ZhangZHLSFY21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2107-09321,
author = {Siqi Zheng and
Weilong Huang and
Xianliang Wang and
Hongbin Suo and
Jinwei Feng and
Zhijie Yan},
title = {A Real-time Speaker Diarization System Based on Spatial Spectrum},
journal = {CoRR},
volume = {abs/2107.09321},
year = {2021},
url = {https://arxiv.org/abs/2107.09321},
eprinttype = {arXiv},
eprint = {2107.09321},
timestamp = {Thu, 29 Jul 2021 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2107-09321.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2109-04049,
author = {Siqi Zheng and
Shiliang Zhang and
Weilong Huang and
Qian Chen and
Hongbin Suo and
Ming Lei and
Jinwei Feng and
Zhijie Yan},
title = {BeamTransformer: Microphone Array-based Overlapping Speech Detection},
journal = {CoRR},
volume = {abs/2109.04049},
year = {2021},
url = {https://arxiv.org/abs/2109.04049},
eprinttype = {arXiv},
eprint = {2109.04049},
timestamp = {Sun, 21 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2109-04049.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-07393,
author = {Fan Yu and
Shiliang Zhang and
Yihui Fu and
Lei Xie and
Siqi Zheng and
Zhihao Du and
Weilong Huang and
Pengcheng Guo and
Zhijie Yan and
Bin Ma and
Xin Xu and
Hui Bu},
title = {M2MeT: The {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
Challenge},
journal = {CoRR},
volume = {abs/2110.07393},
year = {2021},
url = {https://arxiv.org/abs/2110.07393},
eprinttype = {arXiv},
eprint = {2110.07393},
timestamp = {Tue, 17 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2110-07393.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/FanLWZCGY20,
author = {Kai Fan and
Bo Li and
Jiayi Wang and
Shiliang Zhang and
Boxing Chen and
Niyu Ge and
Zhijie Yan},
editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
title = {Neural Zero-Inflated Quality Estimation Model for Automatic Speech
Recognition System},
booktitle = {21st Annual Conference of the International Speech Communication Association,
Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
pages = {606--610},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Interspeech.2020-1881},
doi = {10.21437/INTERSPEECH.2020-1881},
timestamp = {Tue, 21 Apr 2026 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/FanLWZCGY20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhangGLLGYX20,
author = {Shiliang Zhang and
Zhifu Gao and
Haoneng Luo and
Ming Lei and
Jie Gao and
Zhijie Yan and
Lei Xie},
editor = {Helen Meng and
Bo Xu and
Thomas Fang Zheng},
title = {Streaming Chunk-Aware Multihead Attention for Online End-to-End Speech
Recognition},
booktitle = {21st Annual Conference of the International Speech Communication Association,
Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
pages = {2142--2146},
publisher = {{ISCA}},
year = {2020},
url = {https://doi.org/10.21437/Interspeech.2020-1972},
doi = {10.21437/INTERSPEECH.2020-1972},
timestamp = {Fri, 09 Dec 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/ZhangGLLGYX20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2006-01712,
author = {Shiliang Zhang and
Zhifu Gao and
Haoneng Luo and
Ming Lei and
Jie Gao and
Zhijie Yan and
Lei Xie},
title = {Streaming Chunk-Aware Multihead Attention for Online End-to-End Speech
Recognition},
journal = {CoRR},
volume = {abs/2006.01712},
year = {2020},
url = {https://arxiv.org/abs/2006.01712},
eprinttype = {arXiv},
eprint = {2006.01712},
timestamp = {Fri, 09 Dec 2022 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2006-01712.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jips/SunYLWW19,
author = {Zidan Sun and
Zhijie Yan and
Likai Liang and
Ran Wei and
Wei Wang},
title = {Dynamic Thermal Rating of Transmission Line Based on Environmental
Parameter Estimation},
journal = {J. Inf. Process. Syst.},
volume = {15},
number = {2},
pages = {386--398},
year = {2019},
url = {http://www.jips-k.org/q.jips?cp=pp\&\#38;pn=656},
timestamp = {Fri, 17 Jun 2022 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/jips/SunYLWW19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhangLY19,
author = {Shiliang Zhang and
Ming Lei and
Zhijie Yan},
editor = {Gernot Kubin and
Zdravko Kacic},
title = {Investigation of Transformer Based Spelling Correction Model for CTC-Based
End-to-End Mandarin Speech Recognition},
booktitle = {20th Annual Conference of the International Speech Communication Association,
Interspeech 2019, Graz, Austria, September 15-19, 2019},
pages = {2180--2184},
publisher = {{ISCA}},
year = {2019},
url = {https://doi.org/10.21437/Interspeech.2019-1290},
doi = {10.21437/INTERSPEECH.2019-1290},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ZhangLY19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-10045,
author = {Shiliang Zhang and
Ming Lei and
Zhijie Yan},
title = {Automatic Spelling Correction with Transformer for CTC-based End-to-End
Speech Recognition},
journal = {CoRR},
volume = {abs/1904.10045},
year = {2019},
url = {http://arxiv.org/abs/1904.10045},
eprinttype = {arXiv},
eprint = {1904.10045},
timestamp = {Sat, 27 Apr 2019 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-10045.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/ejwcn/WangTYW18,
author = {Yanling Wang and
Weihua Tao and
Zhijie Yan and
Ran Wei},
title = {Uncertainty analysis of dynamic thermal rating based on environmental
parameter estimation},
journal = {{EURASIP} J. Wirel. Commun. Netw.},
volume = {2018},
pages = {167},
year = {2018},
url = {https://doi.org/10.1186/s13638-018-1181-7},
doi = {10.1186/S13638-018-1181-7},
timestamp = {Sun, 19 Jan 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/ejwcn/WangTYW18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/BiLZLY18,
author = {Mengxiao Bi and
Heng Lu and
Shiliang Zhang and
Ming Lei and
Zhijie Yan},
title = {Deep Feed-Forward Sequential Memory Networks for Speech Synthesis},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {4794--4798},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.1109/ICASSP.2018.8461623},
doi = {10.1109/ICASSP.2018.8461623},
timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
biburl = {https://dblp.org/rec/conf/icassp/BiLZLY18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/HuangLLY18,
author = {Zhiying Huang and
Heng Lu and
Ming Lei and
Zhijie Yan},
title = {Linear Networks Based Speaker Adaptation for Speech Synthesis},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {5319--5323},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.1109/ICASSP.2018.8462373},
doi = {10.1109/ICASSP.2018.8462373},
timestamp = {Tue, 18 Sep 2018 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/HuangLLY18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangLYD18,
author = {Shiliang Zhang and
Ming Lei and
Zhijie Yan and
Lirong Dai},
title = {Deep-FSMN for Large Vocabulary Continuous Speech Recognition},
booktitle = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {5869--5873},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.1109/ICASSP.2018.8461404},
doi = {10.1109/ICASSP.2018.8461404},
timestamp = {Thu, 27 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhangLYD18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdsp/XueYYL18,
author = {Shaofei Xue and
Zhijie Yan and
Tao Yu and
Zhang Liu},
title = {A Study on Improving Acoustic Model for Robust and Far-Field Speech
Recognition},
booktitle = {23rd {IEEE} International Conference on Digital Signal Processing,
{DSP} 2018, Shanghai, China, November 19-21, 2018},
pages = {1--5},
publisher = {{IEEE}},
year = {2018},
url = {https://doi.org/10.1109/ICDSP.2018.8631862},
doi = {10.1109/ICDSP.2018.8631862},
timestamp = {Mon, 31 Oct 2022 09:05:23 +0100},
biburl = {https://dblp.org/rec/conf/icdsp/XueYYL18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1802-09194,
author = {Mengxiao Bi and
Heng Lu and
Shiliang Zhang and
Ming Lei and
Zhijie Yan},
title = {Deep Feed-forward Sequential Memory Networks for Speech Synthesis},
journal = {CoRR},
volume = {abs/1802.09194},
year = {2018},
url = {http://arxiv.org/abs/1802.09194},
eprinttype = {arXiv},
eprint = {1802.09194},
timestamp = {Mon, 13 Aug 2018 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1802-09194.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-02445,
author = {Zhiying Huang and
Heng Lu and
Ming Lei and
Zhijie Yan},
title = {Linear networks based speaker adaptation for speech synthesis},
journal = {CoRR},
volume = {abs/1803.02445},
year = {2018},
url = {http://arxiv.org/abs/1803.02445},
eprinttype = {arXiv},
eprint = {1803.02445},
timestamp = {Mon, 13 Aug 2018 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1803-02445.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1803-05030,
author = {Shiliang Zhang and
Ming Lei and
Zhijie Yan and
Lirong Dai},
title = {Deep-FSMN for Large Vocabulary Continuous Speech Recognition},
journal = {CoRR},
volume = {abs/1803.05030},
year = {2018},
url = {http://arxiv.org/abs/1803.05030},
eprinttype = {arXiv},
eprint = {1803.05030},
timestamp = {Thu, 27 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1803-05030.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jips/YanWL17,
author = {Zhijie Yan and
Yanling Wang and
Likai Liang},
title = {Analysis on Ampacity of Overhead Transmission Lines Being Operated},
journal = {J. Inf. Process. Syst.},
volume = {13},
number = {5},
pages = {1358--1371},
year = {2017},
url = {https://doi.org/10.3745/JIPS.04.0044},
doi = {10.3745/JIPS.04.0044},
timestamp = {Tue, 16 Feb 2021 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/jips/YanWL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/XueY17,
author = {Shaofei Xue and
Zhijie Yan},
title = {Improving latency-controlled {BLSTM} acoustic models for online speech
recognition},
booktitle = {2017 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
pages = {5340--5344},
publisher = {{IEEE}},
year = {2017},
url = {https://doi.org/10.1109/ICASSP.2017.7953176},
doi = {10.1109/ICASSP.2017.7953176},
timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
biburl = {https://dblp.org/rec/conf/icassp/XueY17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/HuangXYD16,
author = {Zhiying Huang and
Shaofei Xue and
Zhijie Yan and
Li{-}Rong Dai},
title = {Unsupervised speaker adaptation of {BLSTM-RNN} for {LVCSR} based on
speaker code},
booktitle = {10th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
pages = {1--5},
publisher = {{IEEE}},
year = {2016},
url = {https://doi.org/10.1109/ISCSLP.2016.7918363},
doi = {10.1109/ISCSLP.2016.7918363},
timestamp = {Wed, 18 Sep 2024 12:51:31 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/HuangXYD16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/XueYHD16,
author = {Shaofei Xue and
Zhijie Yan and
Zhiying Huang and
Li{-}Rong Dai},
title = {Rapid speaker adaptation based on D-code extracted from {BLSTM-RNN}
in {LVCSR}},
booktitle = {10th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
pages = {1--5},
publisher = {{IEEE}},
year = {2016},
url = {https://doi.org/10.1109/ISCSLP.2016.7918374},
doi = {10.1109/ISCSLP.2016.7918374},
timestamp = {Thu, 27 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/XueYHD16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icdar/ChenYH15,
author = {Kai Chen and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A context-sensitive-chunk {BPTT} approach to training deep {LSTM/BLSTM}
recurrent neural networks for offline handwriting recognition},
booktitle = {13th International Conference on Document Analysis and Recognition,
{ICDAR} 2015, Nancy, France, August 23-26, 2015},
pages = {411--415},
publisher = {{IEEE} Computer Society},
year = {2015},
url = {https://doi.org/10.1109/ICDAR.2015.7333794},
doi = {10.1109/ICDAR.2015.7333794},
timestamp = {Fri, 24 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icdar/ChenYH15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ChenYH15,
author = {Kai Chen and
Zhi{-}Jie Yan and
Qiang Huo},
title = {Training deep bidirectional {LSTM} acoustic model for {LVCSR} by a
context-sensitive-chunk {BPTT} approach},
booktitle = {16th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2015, Dresden, Germany, September 6-10, 2015},
pages = {3600--3604},
publisher = {{ISCA}},
year = {2015},
url = {https://doi.org/10.21437/Interspeech.2015-714},
doi = {10.21437/INTERSPEECH.2015-714},
timestamp = {Sun, 19 Jan 2025 13:13:53 +0100},
biburl = {https://dblp.org/rec/conf/interspeech/ChenYH15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/XuYH14,
author = {Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {An Unsupervised Adaptation Approach to Leveraging Feedback Loop Data
by Using i-Vector for Data Clustering and Selection},
journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
volume = {22},
number = {11},
pages = {1581--1589},
year = {2014},
url = {https://doi.org/10.1109/TASLP.2014.2341911},
doi = {10.1109/TASLP.2014.2341911},
timestamp = {Fri, 26 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/XuYH14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/QianSY13,
author = {Yao Qian and
Frank K. Soong and
Zhi{-}Jie Yan},
title = {A Unified Trajectory Tiling Approach to High Quality Speech Rendering},
journal = {{IEEE} Trans. Speech Audio Process.},
volume = {21},
number = {2},
pages = {280--290},
year = {2013},
url = {https://doi.org/10.1109/TASL.2012.2221460},
doi = {10.1109/TASL.2012.2221460},
timestamp = {Sun, 17 May 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/taslp/QianSY13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YanHXZ13,
author = {Zhi{-}Jie Yan and
Qiang Huo and
Jian Xu and
Yu Zhang},
title = {Tied-state based discriminative training of context-expanded region-dependent
feature transforms for {LVCSR}},
booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
{ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
pages = {6940--6944},
publisher = {{IEEE}},
year = {2013},
url = {https://doi.org/10.1109/ICASSP.2013.6639007},
doi = {10.1109/ICASSP.2013.6639007},
timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YanHXZ13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/YanHX13,
author = {Zhi{-}Jie Yan and
Qiang Huo and
Jian Xu},
editor = {Fr{\'{e}}d{\'{e}}ric Bimbot and
Christophe Cerisara and
C{\'{e}}cile Fougeron and
Guillaume Gravier and
Lori Lamel and
Fran{\c{c}}ois Pellegrino and
Pascal Perrier},
title = {A scalable approach to using DNN-derived features in {GMM-HMM} based
acoustic modeling for {LVCSR}},
booktitle = {14th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2013, Lyon, France, August 25-29, 2013},
pages = {104--108},
publisher = {{ISCA}},
year = {2013},
url = {https://doi.org/10.21437/Interspeech.2013-47},
doi = {10.21437/INTERSPEECH.2013-47},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/YanHX13.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangXYH12,
author = {Yu Zhang and
Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A study of discriminative feature extraction for i-vector based acoustic
sniffing in {IVN} acoustic model training},
booktitle = {2012 {IEEE} International Conference on Acoustics, Speech and Signal
Processing, {ICASSP} 2012, Kyoto, Japan, March 25-30, 2012},
pages = {4077--4080},
publisher = {{IEEE}},
year = {2012},
url = {https://doi.org/10.1109/ICASSP.2012.6288814},
doi = {10.1109/ICASSP.2012.6288814},
timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhangXYH12.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/XuYH12,
author = {Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A comparative study of fMPE and {RDLT} approaches to {LVCSR}},
booktitle = {8th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2012, Kowloon Tong, China, December 5-8, 2012},
pages = {21--24},
publisher = {{IEEE}},
year = {2012},
url = {https://doi.org/10.1109/ISCSLP.2012.6423511},
doi = {10.1109/ISCSLP.2012.6423511},
timestamp = {Wed, 18 Sep 2024 12:50:55 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/XuYH12.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/XuYH12a,
author = {Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A feature-transform based approach to unsupervised task adaptation
and personalization},
booktitle = {8th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2012, Kowloon Tong, China, December 5-8, 2012},
pages = {229--232},
publisher = {{IEEE}},
year = {2012},
url = {https://doi.org/10.1109/ISCSLP.2012.6423513},
doi = {10.1109/ISCSLP.2012.6423513},
timestamp = {Fri, 19 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/XuYH12a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mhci/EdgeCWQYS12,
author = {Darren Edge and
Kai{-}Yin Cheng and
Michael Whitney and
Yao Qian and
Zhijie Yan and
Frank K. Soong},
editor = {Elizabeth F. Churchill and
Sriram Subramanian and
Patrick Baudisch and
Kenton O'Hara},
title = {Tip tap tones: mobile microtraining of mandarin sounds},
booktitle = {Mobile {HCI} '12, Companion Proceedings of the 14th international
conference on Human-computer interaction with mobile devices and services,
San Francsico, CA, USA, September 21-24, 2012},
pages = {215--216},
publisher = {{ACM}},
year = {2012},
url = {https://doi.org/10.1145/2371664.2371715},
doi = {10.1145/2371664.2371715},
timestamp = {Tue, 06 Nov 2018 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/mhci/EdgeCWQYS12.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mhci/EdgeCWQYS12a,
author = {Darren Edge and
Kai{-}Yin Cheng and
Michael Whitney and
Yao Qian and
Zhijie Yan and
Frank K. Soong},
editor = {Elizabeth F. Churchill and
Sriram Subramanian and
Patrick Baudisch and
Kenton O'Hara},
title = {Tip tap tones: mobile microtraining of mandarin sounds},
booktitle = {Mobile {HCI} '12, Proceedings of the 14th international conference
on Human-computer interaction with mobile devices and services, San
Francsico, CA, USA, September 21-24, 2012},
pages = {427--430},
publisher = {{ACM}},
year = {2012},
url = {https://doi.org/10.1145/2371574.2371640},
doi = {10.1145/2371574.2371640},
timestamp = {Tue, 06 Nov 2018 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/mhci/EdgeCWQYS12a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/LongYSDG11,
author = {Yanhua Long and
Zhi{-}Jie Yan and
Frank K. Soong and
Li{-}Rong Dai and
Wu Guo},
title = {Speaker characterization using spectral subband energy ratio based
on Harmonic plus Noise Model},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
Center, Prague, Czech Republic},
pages = {4520--4523},
publisher = {{IEEE}},
year = {2011},
url = {https://doi.org/10.1109/ICASSP.2011.5947359},
doi = {10.1109/ICASSP.2011.5947359},
timestamp = {Thu, 27 Aug 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/LongYSDG11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangXYH11,
author = {Yu Zhang and
Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A study of an irrelevant variability normalization based discriminative
training approach for {LVCSR}},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
Center, Prague, Czech Republic},
pages = {5308--5311},
publisher = {{IEEE}},
year = {2011},
url = {https://doi.org/10.1109/ICASSP.2011.5947556},
doi = {10.1109/ICASSP.2011.5947556},
timestamp = {Sun, 01 Apr 2018 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhangXYH11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LongYSDG11,
author = {Yanhua Long and
Zhi{-}Jie Yan and
Frank K. Soong and
Li{-}Rong Dai and
Wu Guo},
title = {Improvements in Speaker Characterization Using Spectral Subband Energy
Based on Harmonic plus Noise Model},
booktitle = {12th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
pages = {373--376},
publisher = {{ISCA}},
year = {2011},
url = {https://doi.org/10.21437/Interspeech.2011-133},
doi = {10.21437/INTERSPEECH.2011-133},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/LongYSDG11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ZhangXYH11,
author = {Yu Zhang and
Jian Xu and
Zhi{-}Jie Yan and
Qiang Huo},
title = {An i-vector Based Approach to Training Data Clustering for Improved
Speech Recognition},
booktitle = {12th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
pages = {789--792},
publisher = {{ISCA}},
year = {2011},
url = {https://doi.org/10.21437/Interspeech.2011-179},
doi = {10.21437/INTERSPEECH.2011-179},
timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ZhangXYH11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/XuZYH11,
author = {Jian Xu and
Yu Zhang and
Zhi{-}Jie Yan and
Qiang Huo},
title = {An i-vector Based Approach to Acoustic Sniffing for Irrelevant Variability
Normalization Based Acoustic Model Training and Speech Recognition},
booktitle = {12th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
pages = {1701--1704},
publisher = {{ISCA}},
year = {2011},
url = {https://doi.org/10.21437/Interspeech.2011-186},
doi = {10.21437/INTERSPEECH.2011-186},
timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/XuZYH11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/mlsp/0007YH11,
author = {Yu Zhang and
Zhi{-}Jie Yan and
Qiang Huo},
title = {A new i-vector approach and its application to irrelevant variability
normalization based acoustic model training},
booktitle = {2011 {IEEE} International Workshop on Machine Learning for Signal
Processing, {MLSP} 2011, Beijing, China, September 18-21, 2011},
pages = {1--6},
publisher = {{IEEE}},
year = {2011},
url = {https://doi.org/10.1109/MLSP.2011.6064637},
doi = {10.1109/MLSP.2011.6064637},
timestamp = {Wed, 16 Oct 2019 14:14:49 +0200},
biburl = {https://dblp.org/rec/conf/mlsp/0007YH11.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/blizzard/QianYWSZW10,
author = {Yao Qian and
Zhi{-}Jie Yan and
Yi{-}Jian Wu and
Frank K. Soong and
Guoliang Zhang and
Lijuan Wang},
title = {An {HMM} Trajectory Tiling {(HTT)} Approach to High Quality {TTS}
- Microsoft Entry to Blizzard Challenge 2010},
booktitle = {The Blizzard Challenge 2010, Kansai Science City, Japan, September
25, 2010},
publisher = {{ISCA}},
year = {2010},
url = {https://doi.org/10.21437/Blizzard.2010-14},
doi = {10.21437/BLIZZARD.2010-14},
timestamp = {Fri, 20 Sep 2024 10:07:57 +0200},
biburl = {https://dblp.org/rec/conf/blizzard/QianYWSZW10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangYS10,
author = {Yu Zhang and
Zhi{-}Jie Yan and
Frank K. Soong},
title = {Cross-validation based decision tree clustering for HMM-based {TTS}},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
Hotel, Dallas, Texas, {USA}},
pages = {4602--4605},
publisher = {{IEEE}},
year = {2010},
url = {https://doi.org/10.1109/ICASSP.2010.5495560},
doi = {10.1109/ICASSP.2010.5495560},
timestamp = {Fri, 19 May 2017 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhangYS10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangSQYPY10,
author = {Qingqing Zhang and
Frank K. Soong and
Yao Qian and
Zhijie Yan and
Jielin Pan and
Yonghong Yan},
title = {Improved modeling for {F0} generation and {V/U} decision in HMM-based
{TTS}},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
Hotel, Dallas, Texas, {USA}},
pages = {4606--4609},
publisher = {{IEEE}},
year = {2010},
url = {https://doi.org/10.1109/ICASSP.2010.5495561},
doi = {10.1109/ICASSP.2010.5495561},
timestamp = {Fri, 19 May 2017 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/ZhangSQYPY10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YanQS10,
author = {Zhi{-}Jie Yan and
Yao Qian and
Frank K. Soong},
title = {RIch-context Unit Selection {(RUS)} approach to high quality {TTS}},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
Hotel, Dallas, Texas, {USA}},
pages = {4798--4801},
publisher = {{IEEE}},
year = {2010},
url = {https://doi.org/10.1109/ICASSP.2010.5495150},
doi = {10.1109/ICASSP.2010.5495150},
timestamp = {Fri, 19 May 2017 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YanQS10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/QianYWSZK10,
author = {Yao Qian and
Zhi{-}Jie Yan and
Yi{-}Jian Wu and
Frank K. Soong and
Xin Zhuang and
Shengyi Kong},
editor = {Takao Kobayashi and
Keikichi Hirose and
Satoshi Nakamura},
title = {An {HMM} trajectory tiling {(HTT)} approach to high quality {TTS}},
booktitle = {11th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2010, Makuhari, Chiba, Japan, September 26-30, 2010},
pages = {422--425},
publisher = {{ISCA}},
year = {2010},
url = {https://doi.org/10.21437/Interspeech.2010-175},
doi = {10.21437/INTERSPEECH.2010-175},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/QianYWSZK10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ChenYS10,
author = {Yining Chen and
Zhi{-}Jie Yan and
Frank K. Soong},
editor = {Takao Kobayashi and
Keikichi Hirose and
Satoshi Nakamura},
title = {A perceptual study of acceleration parameters in HMM-based {TTS}},
booktitle = {11th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2010, Makuhari, Chiba, Japan, September 26-30, 2010},
pages = {426--429},
publisher = {{ISCA}},
year = {2010},
url = {https://doi.org/10.21437/Interspeech.2010-176},
doi = {10.21437/INTERSPEECH.2010-176},
timestamp = {Fri, 23 Jun 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/ChenYS10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YanLHJ09,
author = {Zhi{-}Jie Yan and
Cong Liu and
Yu Hu and
Hui Jiang},
title = {A trust region based optimization for maximum mutual information estimation
of {HMMS} in speech recognition},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2009, 19-24 April 2009, Taipei, Taiwan},
pages = {3757--3760},
publisher = {{IEEE}},
year = {2009},
url = {https://doi.org/10.1109/ICASSP.2009.4960444},
doi = {10.1109/ICASSP.2009.4960444},
timestamp = {Thu, 23 Mar 2023 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/icassp/YanLHJ09.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/YanQS09,
author = {Zhi{-}Jie Yan and
Yao Qian and
Frank K. Soong},
title = {Rich context modeling for high quality HMM-based {TTS}},
booktitle = {10th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2009, Brighton, United Kingdom, September 6-10, 2009},
pages = {1755--1758},
publisher = {{ISCA}},
year = {2009},
url = {https://doi.org/10.21437/Interspeech.2009-142},
doi = {10.21437/INTERSPEECH.2009-142},
timestamp = {Tue, 11 Jun 2024 16:45:43 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/YanQS09.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YanZHW08,
author = {Zhi{-}Jie Yan and
Bo Zhu and
Yu Hu and
Ren{-}Hua Wang},
title = {Minimum word classification error training of {HMMS} for automatic
speech recognition},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2008, March 30 - April 4, 2008, Caesars
Palace, Las Vegas, Nevada, {USA}},
pages = {4521--4524},
publisher = {{IEEE}},
year = {2008},
url = {https://doi.org/10.1109/ICASSP.2008.4518661},
doi = {10.1109/ICASSP.2008.4518661},
timestamp = {Wed, 30 Sep 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YanZHW08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LiYLW08,
author = {Jinyu Li and
Zhi{-}Jie Yan and
Chin{-}Hui Lee and
Ren{-}Hua Wang},
title = {Soft margin estimation with various separation levels for {LVCSR}},
booktitle = {9th Annual Conference of the International Speech Communication Association,
{INTERSPEECH} 2008, Brisbane, Australia, September 22-26, 2008},
pages = {269--272},
publisher = {{ISCA}},
year = {2008},
url = {https://doi.org/10.21437/Interspeech.2008-100},
doi = {10.21437/INTERSPEECH.2008-100},
timestamp = {Mon, 22 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/interspeech/LiYLW08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/ZhuYHWDW08,
author = {Bo Zhu and
Zhi{-}Jie Yan and
Yu Hu and
Zhiguo Wang and
Li{-}Rong Dai and
Ren{-}Hua Wang},
editor = {Helen M. Meng and
Hui Jiang and
Jianhua Tao and
Ren{-}Hua Wang},
title = {Investigation on Adaptation Using Different Discriminative Training
Criteria Based Linear Regression and Map},
booktitle = {6th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2008, 16-19 December, 2008, Kunming, China},
pages = {93--96},
publisher = {{IEEE}},
year = {2008},
url = {https://doi.org/10.1109/CHINSL.2008.ECP.35},
doi = {10.1109/CHINSL.2008.ECP.35},
timestamp = {Wed, 18 Sep 2024 12:50:19 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/ZhuYHWDW08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/LiYLW07,
author = {Jinyu Li and
Zhi{-}Jie Yan and
Chin{-}Hui Lee and
Ren{-}Hua Wang},
editor = {Sadaoki Furui and
Tatsuya Kawahara},
title = {A study on soft margin estimation for {LVCSR}},
booktitle = {{IEEE} Workshop on Automatic Speech Recognition {\&} Understanding,
{ASRU} 2007, Kyoto, Japan, December 9-13, 2007},
pages = {268--271},
publisher = {{IEEE}},
year = {2007},
url = {https://doi.org/10.1109/ASRU.2007.4430122},
doi = {10.1109/ASRU.2007.4430122},
timestamp = {Mon, 22 Jul 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/asru/LiYLW07.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/YanSW07,
author = {Zhi{-}Jie Yan and
Frank K. Soong and
Ren{-}Hua Wang},
title = {Word Graph Based Feature Enhancement for Noisy Speech Recognition},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
15-20, 2007},
pages = {373--376},
publisher = {{IEEE}},
year = {2007},
url = {https://doi.org/10.1109/ICASSP.2007.366927},
doi = {10.1109/ICASSP.2007.366927},
timestamp = {Mon, 22 Jun 2020 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/icassp/YanSW07.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/0006Y0W06,
author = {Cong Liu and
Zhijie Yan and
Yu Hu and
Renhua Wang},
title = {A Comparative Study on Confidence Measure in Mandarin Command Word
Recognition},
booktitle = {5th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2006, Singapore, December 13-16, 2006},
publisher = {{ISCA}},
year = {2006},
url = {https://www.isca-archive.org/iscslp\_2006/liu06d\_iscslp.html},
timestamp = {Thu, 26 Sep 2024 17:06:35 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/0006Y0W06.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/Yan0DSW06,
author = {Zhijie Yan and
Peng Liu and
Jun Du and
Frank K. Soong and
Renhua Wang},
title = {Training Discriminative {HMM} by Optimal Allocation of Gaussian Kernels},
booktitle = {5th International Symposium on Chinese Spoken Language Processing,
{ISCSLP} 2006, Singapore, December 13-16, 2006},
publisher = {{ISCA}},
year = {2006},
url = {https://www.isca-archive.org/iscslp\_2006/yan06b\_iscslp.html},
timestamp = {Wed, 04 Dec 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/iscslp/Yan0DSW06.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iscslp/YanZSW06,
author = {Zhi{-}Jie Yan and
Jian{-}Lai Zhou and
Frank K. Soong and
Ren{-}Hua Wang},
editor = {Qiang Huo and
Bin Ma and
Chng Eng Siong and
Haizhou Li},
title = {Signal Trajectory Based Noise Compensation for Robust Speech Recognition},
booktitle = {Chinese Spoken Language Processing, 5th International Symposium, {ISCSLP}
2006, Singapore, December 13-16, 2006, Selected Papers},
series = {Lecture Notes in Computer Science},
pages = {335--345},
publisher = {Springer},
year = {2006},
url = {https://doi.org/10.1007/11939993\_37},
doi = {10.1007/11939993\_37},
timestamp = {Thu, 26 Sep 2024 14:17:09 +0200},
biburl = {https://dblp.org/rec/conf/iscslp/YanZSW06.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.