default search action

combined dblp search
author search
venue search
publication search

ask others

BibTeX records: Zhijie Yan

Name: dblp XML data dump
Creator: Schloss Dagstuhl - Leibniz Center for Informatics
Published: 1993
License: https://creativecommons.org/publicdomain/zero/1.0/
Keywords: dblp, XML, computer science, scholarly publications, metadata

> Home > Persons > Zhijie Yan

download as .bib file

@inproceedings{DBLP:conf/nsdi/MiaoZZYLYZJXJMC26,
  author       = {Congcong Miao and
                  Xianneng Zou and
                  Chuwen Zhang and
                  Shiping Yang and
                  Qihang Liu and
                  Zhijie Yan and
                  Yanke Zhang and
                  Yong Jiang and
                  Qiao Xiang and
                  Xin Jin and
                  Zili Meng and
                  Ang Chen},
  editor       = {Srikanth Kandula and
                  Hakim Weatherspoon},
  title        = {A Composable Emulation Framework for Whitebox Switches},
  booktitle    = {23rd {USENIX} Symposium on Networked Systems Design and Implementation,
                  {NSDI} 2026, Renton, WA, May 4-6, 2026},
  pages        = {1653--1667},
  publisher    = {{USENIX} Association},
  year         = {2026},
  url          = {https://www.usenix.org/conference/nsdi26/presentation/miao-whitebox},
  timestamp    = {Mon, 18 May 2026 16:37:21 +0200},
  biburl       = {https://dblp.org/rec/conf/nsdi/MiaoZZYLYZJXJMC26.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/aei/WangYLL25,
  author       = {Zuoxu Wang and
                  Zhijie Yan and
                  Shufei Li and
                  Jihong Liu},
  title        = {IndVisSGG: VLM-based scene graph generation for industrial spatial
                  intelligence},
  journal      = {Adv. Eng. Informatics},
  volume       = {65},
  pages        = {103107},
  year         = {2025},
  url          = {https://doi.org/10.1016/j.aei.2024.103107},
  doi          = {10.1016/J.AEI.2024.103107},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/aei/WangYLL25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ral/YanLWWWZCL25,
  author       = {Zhijie Yan and
                  Shufei Li and
                  Zuoxu Wang and
                  Lixiu Wu and
                  Han Wang and
                  Jun Zhu and
                  Lijiang Chen and
                  Jihong Liu},
  title        = {Dynamic Open-Vocabulary 3D Scene Graphs for Long-Term Language-Guided
                  Mobile Manipulation},
  journal      = {{IEEE} Robotics Autom. Lett.},
  volume       = {10},
  number       = {5},
  pages        = {4252--4259},
  year         = {2025},
  url          = {https://doi.org/10.1109/LRA.2025.3547643},
  doi          = {10.1109/LRA.2025.3547643},
  timestamp    = {Fri, 09 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/ral/YanLWWWZCL25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/rcim/LiYWG25,
  author       = {Shufei Li and
                  Zhijie Yan and
                  Zuoxu Wang and
                  Yiping Gao},
  title        = {VLM-MSGraph: Vision Language Model-enabled Multi-hierarchical Scene
                  Graph for robotic assembly},
  journal      = {Robotics Comput. Integr. Manuf.},
  volume       = {94},
  pages        = {102978},
  year         = {2025},
  url          = {https://doi.org/10.1016/j.rcim.2025.102978},
  doi          = {10.1016/J.RCIM.2025.102978},
  timestamp    = {Tue, 01 Apr 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/rcim/LiYWG25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icml/LiuLLJSW000LZY025,
  author       = {Huadai Liu and
                  Tianyi Luo and
                  Kaicheng Luo and
                  Qikai Jiang and
                  Peiwen Sun and
                  Jialei Wang and
                  Rongjie Huang and
                  Qian Chen and
                  Wen Wang and
                  Xiangtai Li and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Zhou Zhao and
                  Wei Xue},
  editor       = {Aarti Singh and
                  Maryam Fazel and
                  Daniel Hsu and
                  Simon Lacoste{-}Julien and
                  Felix Berkenkamp and
                  Tegan Maharaj and
                  Kiri Wagstaff and
                  Jerry Zhu},
  title        = {OmniAudio: Generating Spatial Audio from 360-Degree Video},
  booktitle    = {Forty-second International Conference on Machine Learning, {ICML}
                  2025, Vancouver, BC, Canada, July 13-19, 2025},
  series       = {Proceedings of Machine Learning Research},
  publisher    = {{PMLR} / OpenReview.net},
  year         = {2025},
  url          = {https://proceedings.mlr.press/v267/liu25as.html},
  timestamp    = {Wed, 04 Feb 2026 16:54:16 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/LiuLLJSW000LZY025.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2501-06282,
  author       = {Qian Chen and
                  Yafeng Chen and
                  Yanni Chen and
                  Mengzhe Chen and
                  Yingda Chen and
                  Chong Deng and
                  Zhihao Du and
                  Ruize Gao and
                  Changfeng Gao and
                  Zhifu Gao and
                  Yabin Li and
                  Xiang Lv and
                  Jiaqing Liu and
                  Haoneng Luo and
                  Bin Ma and
                  Chongjia Ni and
                  Xian Shi and
                  Jialong Tang and
                  Hui Wang and
                  Hao Wang and
                  Wen Wang and
                  Yuxuan Wang and
                  Yunlan Xu and
                  Fan Yu and
                  Zhijie Yan and
                  Yexin Yang and
                  Baosong Yang and
                  Xian Yang and
                  Guanrou Yang and
                  Tianyu Zhao and
                  Qinglin Zhang and
                  Shiliang Zhang and
                  Nan Zhao and
                  Pei Zhang and
                  Chong Zhang and
                  Jinren Zhou},
  title        = {MinMo: {A} Multimodal Large Language Model for Seamless Voice Interaction},
  journal      = {CoRR},
  volume       = {abs/2501.06282},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2501.06282},
  doi          = {10.48550/ARXIV.2501.06282},
  eprinttype   = {arXiv},
  eprint       = {2501.06282},
  timestamp    = {Tue, 24 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2501-06282.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2502-11094,
  author       = {Zhengyan Sheng and
                  Zhihao Du and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Yexin Yang and
                  Zhenhua Ling},
  title        = {SyncSpeech: Low-Latency and Efficient Dual-Stream Text-to-Speech based
                  on Temporal Masked Transformer},
  journal      = {CoRR},
  volume       = {abs/2502.11094},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2502.11094},
  doi          = {10.48550/ARXIV.2502.11094},
  eprinttype   = {arXiv},
  eprint       = {2502.11094},
  timestamp    = {Mon, 17 Mar 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2502-11094.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2504-14906,
  author       = {Huadai Liu and
                  Tianyi Luo and
                  Qikai Jiang and
                  Kaicheng Luo and
                  Peiwen Sun and
                  Jialei Wang and
                  Rongjie Huang and
                  Qian Chen and
                  Wen Wang and
                  Xiangtai Li and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Zhou Zhao and
                  Wei Xue},
  title        = {OmniAudio: Generating Spatial Audio from 360-Degree Video},
  journal      = {CoRR},
  volume       = {abs/2504.14906},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2504.14906},
  doi          = {10.48550/ARXIV.2504.14906},
  eprinttype   = {arXiv},
  eprint       = {2504.14906},
  timestamp    = {Sun, 10 Aug 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2504-14906.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ijcisys/ZhangWYJWT24,
  author       = {Linghao Zhang and
                  Luqing Wang and
                  Zhijie Yan and
                  Zhentang Jia and
                  Hongjun Wang and
                  Xinyu Tang},
  title        = {Star Generative Adversarial {VGG} Network-Based Sample Augmentation
                  for Insulator Defect Detection},
  journal      = {Int. J. Comput. Intell. Syst.},
  volume       = {17},
  number       = {1},
  pages        = {141},
  year         = {2024},
  url          = {https://doi.org/10.1007/s44196-024-00524-6},
  doi          = {10.1007/S44196-024-00524-6},
  timestamp    = {Mon, 09 Dec 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijcisys/ZhangWYJWT24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ijcisys/ZhangWYJWT24a,
  author       = {Linghao Zhang and
                  Luqing Wang and
                  Zhijie Yan and
                  Zhentang Jia and
                  Hongjun Wang and
                  Xinyu Tang},
  title        = {Correction: Star Generative Adversarial {VGG} Network-Based Sample
                  Augmentation for Insulator Defect Detection},
  journal      = {Int. J. Comput. Intell. Syst.},
  volume       = {17},
  number       = {1},
  pages        = {149},
  year         = {2024},
  url          = {https://doi.org/10.1007/s44196-024-00558-w},
  doi          = {10.1007/S44196-024-00558-W},
  timestamp    = {Mon, 04 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ijcisys/ZhangWYJWT24a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/case/LiWYGJZ24,
  author       = {Shufei Li and
                  Zuoxu Wang and
                  Zhijie Yan and
                  Yiping Gao and
                  Han Jiang and
                  Pai Zheng},
  title        = {Large Language Model for Humanoid Cognition in Proactive Human-Robot
                  Collaboration},
  booktitle    = {20th {IEEE} International Conference on Automation Science and Engineering,
                  {CASE} 2024, Bari, Italy, August 28 - Sept. 1, 2024},
  pages        = {540--545},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CASE59546.2024.10711379},
  doi          = {10.1109/CASE59546.2024.10711379},
  timestamp    = {Thu, 07 Nov 2024 10:23:02 +0100},
  biburl       = {https://dblp.org/rec/conf/case/LiWYGJZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/case/YanWLLLL24,
  author       = {Zhijie Yan and
                  Zuoxu Wang and
                  Shufei Li and
                  Mingrui Li and
                  Xinxin Liang and
                  Jihong Liu},
  title        = {ManufVisSGG: {A} Vision-Language-Model Approach for Cognitive Scene
                  Graph Generation in Manufacturing Systems},
  booktitle    = {20th {IEEE} International Conference on Automation Science and Engineering,
                  {CASE} 2024, Bari, Italy, August 28 - Sept. 1, 2024},
  pages        = {1632--1637},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/CASE59546.2024.10711649},
  doi          = {10.1109/CASE59546.2024.10711649},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/case/YanWLLLL24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/eccv/JinZLLZHLZYSZJLCZ24,
  author       = {Bu Jin and
                  Yupeng Zheng and
                  Pengfei Li and
                  Weize Li and
                  Yuhang Zheng and
                  Sujie Hu and
                  Xinyu Liu and
                  Jinwei Zhu and
                  Zhijie Yan and
                  Haiyang Sun and
                  Kun Zhan and
                  Peng Jia and
                  Xiaoxiao Long and
                  Yilun Chen and
                  Hao Zhao},
  editor       = {Ales Leonardis and
                  Elisa Ricci and
                  Stefan Roth and
                  Olga Russakovsky and
                  Torsten Sattler and
                  G{\"{u}}l Varol},
  title        = {TOD3Cap: Towards 3D Dense Captioning in Outdoor Scenes},
  booktitle    = {Computer Vision - {ECCV} 2024 - 18th European Conference, Milan, Italy,
                  September 29-October 4, 2024, Proceedings, Part {XVIII}},
  series       = {Lecture Notes in Computer Science},
  pages        = {367--384},
  publisher    = {Springer},
  year         = {2024},
  url          = {https://doi.org/10.1007/978-3-031-72649-1\_21},
  doi          = {10.1007/978-3-031-72649-1\_21},
  timestamp    = {Tue, 05 May 2026 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eccv/JinZLLZHLZYSZJLCZ24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iros/ZhengWYT0ZCG24,
  author       = {Xiaoji Zheng and
                  Lixiu Wu and
                  Zhijie Yan and
                  Yuanrong Tang and
                  Hao Zhao and
                  Chen Zhong and
                  Bokui Chen and
                  Jiangtao Gong},
  title        = {Large Language Models Powered Context-aware Motion Prediction in Autonomous
                  Driving},
  booktitle    = {{IEEE/RSJ} International Conference on Intelligent Robots and Systems,
                  {IROS} 2024, Abu Dhabi, United Arab Emirates, October 14-18, 2024},
  pages        = {980--985},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/IROS58592.2024.10802397},
  doi          = {10.1109/IROS58592.2024.10802397},
  timestamp    = {Sat, 06 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iros/ZhengWYT0ZCG24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2403-11057,
  author       = {Xiaoji Zheng and
                  Lixiu Wu and
                  Zhijie Yan and
                  Yuanrong Tang and
                  Hao Zhao and
                  Chen Zhong and
                  Bokui Chen and
                  Jiangtao Gong},
  title        = {Large Language Models Powered Context-aware Motion Prediction},
  journal      = {CoRR},
  volume       = {abs/2403.11057},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.11057},
  doi          = {10.48550/ARXIV.2403.11057},
  eprinttype   = {arXiv},
  eprint       = {2403.11057},
  timestamp    = {Tue, 30 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-11057.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2403-19589,
  author       = {Bu Jin and
                  Yupeng Zheng and
                  Pengfei Li and
                  Weize Li and
                  Yuhang Zheng and
                  Sujie Hu and
                  Xinyu Liu and
                  Jinwei Zhu and
                  Zhijie Yan and
                  Haiyang Sun and
                  Kun Zhan and
                  Peng Jia and
                  Xiaoxiao Long and
                  Yilun Chen and
                  Hao Zhao},
  title        = {TOD3Cap: Towards 3D Dense Captioning in Outdoor Scenes},
  journal      = {CoRR},
  volume       = {abs/2403.19589},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2403.19589},
  doi          = {10.48550/ARXIV.2403.19589},
  eprinttype   = {arXiv},
  eprint       = {2403.19589},
  timestamp    = {Tue, 05 May 2026 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2403-19589.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2407-04051,
  author       = {Keyu An and
                  Qian Chen and
                  Chong Deng and
                  Zhihao Du and
                  Changfeng Gao and
                  Zhifu Gao and
                  Yue Gu and
                  Ting He and
                  Hangrui Hu and
                  Kai Hu and
                  Shengpeng Ji and
                  Yabin Li and
                  Zerui Li and
                  Heng Lu and
                  Haoneng Luo and
                  Xiang Lv and
                  Bin Ma and
                  Ziyang Ma and
                  Chongjia Ni and
                  Changhe Song and
                  Jiaqi Shi and
                  Xian Shi and
                  Hao Wang and
                  Wen Wang and
                  Yuxuan Wang and
                  Zhangyu Xiao and
                  Zhijie Yan and
                  Yexin Yang and
                  Bin Zhang and
                  Qinglin Zhang and
                  Shiliang Zhang and
                  Nan Zhao and
                  Siqi Zheng},
  title        = {FunAudioLLM: Voice Understanding and Generation Foundation Models
                  for Natural Interaction Between Humans and LLMs},
  journal      = {CoRR},
  volume       = {abs/2407.04051},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.04051},
  doi          = {10.48550/ARXIV.2407.04051},
  eprinttype   = {arXiv},
  eprint       = {2407.04051},
  timestamp    = {Tue, 24 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-04051.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2407-05407,
  author       = {Zhihao Du and
                  Qian Chen and
                  Shiliang Zhang and
                  Kai Hu and
                  Heng Lu and
                  Yexin Yang and
                  Hangrui Hu and
                  Siqi Zheng and
                  Yue Gu and
                  Ziyang Ma and
                  Zhifu Gao and
                  Zhijie Yan},
  title        = {CosyVoice: {A} Scalable Multilingual Zero-shot Text-to-speech Synthesizer
                  based on Supervised Semantic Tokens},
  journal      = {CoRR},
  volume       = {abs/2407.05407},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2407.05407},
  doi          = {10.48550/ARXIV.2407.05407},
  eprinttype   = {arXiv},
  eprint       = {2407.05407},
  timestamp    = {Sun, 15 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2407-05407.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2409-17750,
  author       = {Keyu An and
                  Shiliang Zhang and
                  Zhijie Yan},
  title        = {Are Transformers in Pre-trained {LM} {A} Good {ASR} Encoder? An Empirical
                  Study},
  journal      = {CoRR},
  volume       = {abs/2409.17750},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2409.17750},
  doi          = {10.48550/ARXIV.2409.17750},
  eprinttype   = {arXiv},
  eprint       = {2409.17750},
  timestamp    = {Mon, 21 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2409-17750.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2410-11989,
  author       = {Zhijie Yan and
                  Shufei Li and
                  Zuoxu Wang and
                  Lixiu Wu and
                  Han Wang and
                  Jun Zhu and
                  Lijiang Chen and
                  Jihong Liu},
  title        = {Dynamic Open-Vocabulary 3D Scene Graphs for Long-term Language-Guided
                  Mobile Manipulation},
  journal      = {CoRR},
  volume       = {abs/2410.11989},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2410.11989},
  doi          = {10.48550/ARXIV.2410.11989},
  eprinttype   = {arXiv},
  eprint       = {2410.11989},
  timestamp    = {Sun, 24 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2410-11989.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2412-10117,
  author       = {Zhihao Du and
                  Yuxuan Wang and
                  Qian Chen and
                  Xian Shi and
                  Xiang Lv and
                  Tianyu Zhao and
                  Zhifu Gao and
                  Yexin Yang and
                  Changfeng Gao and
                  Hui Wang and
                  Fan Yu and
                  Huadai Liu and
                  Zhengyan Sheng and
                  Yue Gu and
                  Chong Deng and
                  Wen Wang and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Jingren Zhou},
  title        = {CosyVoice 2: Scalable Streaming Speech Synthesis with Large Language
                  Models},
  journal      = {CoRR},
  volume       = {abs/2412.10117},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2412.10117},
  doi          = {10.48550/ARXIV.2412.10117},
  eprinttype   = {arXiv},
  eprint       = {2412.10117},
  timestamp    = {Tue, 24 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2412-10117.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/LiangSYLZDCXQWCLYB23,
  author       = {Yuhao Liang and
                  Mohan Shi and
                  Fan Yu and
                  Yangze Li and
                  Shiliang Zhang and
                  Zhihao Du and
                  Qian Chen and
                  Lei Xie and
                  Yanmin Qian and
                  Jian Wu and
                  Zhuo Chen and
                  Kong Aik Lee and
                  Zhijie Yan and
                  Hui Bu},
  title        = {The Second Multi-Channel Multi-Party Meeting Transcription Challenge
                  (M2MeT 2.0): {A} Benchmark for Speaker-Attributed {ASR}},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2023, Taipei, Taiwan, December 16-20, 2023},
  pages        = {1--8},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ASRU57964.2023.10389625},
  doi          = {10.1109/ASRU57964.2023.10389625},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/LiangSYLZDCXQWCLYB23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/case/LiWYL23,
  author       = {Mingrui Li and
                  Zuoxu Wang and
                  Zhijie Yan and
                  Jihong Liu},
  title        = {Exploiting Patent Documents for Cross-Domain Knowledge Transfer in
                  Innovative Engineering Design: {A} Doc2Vec-GAT-Based Approach},
  booktitle    = {19th {IEEE} International Conference on Automation Science and Engineering,
                  {CASE} 2023, Auckland, New Zealand, August 26-30, 2023},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/CASE56687.2023.10260662},
  doi          = {10.1109/CASE56687.2023.10260662},
  timestamp    = {Sun, 06 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/case/LiWYL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/cicba/HanYLLSLGSHGZCZZ23,
  author       = {Zhengxiao Han and
                  Zhijie Yan and
                  Yang Li and
                  Pengfei Li and
                  Yifeng Shi and
                  Nairui Luo and
                  Xu Gao and
                  Yongliang Shi and
                  Pengfei Huang and
                  Jiangtao Gong and
                  Guyue Zhou and
                  Yilun Chen and
                  Hang Zhao and
                  Hao Zhao},
  editor       = {Lu Fang and
                  Jian Pei and
                  Guangtao Zhai and
                  Ruiping Wang},
  title        = {M\({}^{\mbox{2}}\)Sim: {A} Long-Term Interactive Driving Simulator},
  booktitle    = {Artificial Intelligence - Third {CAAI} International Conference, {CICAI}
                  2023, Fuzhou, China, July 22-23, 2023, Revised Selected Papers, Part
                  {II}},
  series       = {Lecture Notes in Computer Science},
  pages        = {172--176},
  publisher    = {Springer},
  year         = {2023},
  url          = {https://doi.org/10.1007/978-981-99-9119-8\_16},
  doi          = {10.1007/978-981-99-9119-8\_16},
  timestamp    = {Tue, 14 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cicba/HanYLLSLGSHGZCZZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/cicba/HanYLLSLGSHGZCZZ23a,
  author       = {Zhengxiao Han and
                  Zhijie Yan and
                  Yang Li and
                  Pengfei Li and
                  Yifeng Shi and
                  Nairui Luo and
                  Xu Gao and
                  Yongliang Shi and
                  Pengfei Huang and
                  Jiangtao Gong and
                  Guyue Zhou and
                  Yilun Chen and
                  Hang Zhao and
                  Hao Zhao},
  editor       = {Lu Fang and
                  Jian Pei and
                  Guangtao Zhai and
                  Ruiping Wang},
  title        = {Long-Term Interactive Driving Simulation: {MPC} to the Rescue},
  booktitle    = {Artificial Intelligence - Third {CAAI} International Conference, {CICAI}
                  2023, Fuzhou, China, July 22-23, 2023, Revised Selected Papers, Part
                  {II}},
  series       = {Lecture Notes in Computer Science},
  pages        = {177--188},
  publisher    = {Springer},
  year         = {2023},
  url          = {https://doi.org/10.1007/978-981-99-9119-8\_17},
  doi          = {10.1007/978-981-99-9119-8\_17},
  timestamp    = {Tue, 14 Oct 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/cicba/HanYLLSLGSHGZCZZ23a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangDLYCWYL0Z23,
  author       = {Qinglin Zhang and
                  Chong Deng and
                  Jiaqing Liu and
                  Hai Yu and
                  Qian Chen and
                  Wen Wang and
                  Zhijie Yan and
                  Jinglin Liu and
                  Yi Ren and
                  Zhou Zhao},
  title        = {Overview of the {ICASSP} 2023 General Meeting Understanding and Generation
                  Challenge {(MUG)}},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--2},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10433920},
  doi          = {10.1109/ICASSP49357.2023.10433920},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangDLYCWYL0Z23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangDLYCWYLRZ23,
  author       = {Qinglin Zhang and
                  Chong Deng and
                  Jiaqing Liu and
                  Hai Yu and
                  Qian Chen and
                  Wen Wang and
                  Zhijie Yan and
                  Jinglin Liu and
                  Yi Ren and
                  Zhou Zhao},
  title        = {{MUG:} {A} General Meeting Understanding and Generation Benchmark},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10097149},
  doi          = {10.1109/ICASSP49357.2023.10097149},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangDLYCWYLRZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iccv/YanLFXSCZLLLLGC23,
  author       = {Zhijie Yan and
                  Pengfei Li and
                  Zheng Fu and
                  Shaocong Xu and
                  Yongliang Shi and
                  Xiaoxue Chen and
                  Yuhang Zheng and
                  Yang Li and
                  Tianyu Liu and
                  Chuxuan Li and
                  Nairui Luo and
                  Xu Gao and
                  Yilun Chen and
                  Zuoxu Wang and
                  Yifeng Shi and
                  Pengfei Huang and
                  Zhengxiao Han and
                  Jirui Yuan and
                  Jiangtao Gong and
                  Guyue Zhou and
                  Hang Zhao and
                  Hao Zhao},
  title        = {{INT2:} Interactive Trajectory Prediction at Intersections},
  booktitle    = {{IEEE/CVF} International Conference on Computer Vision, {ICCV} 2023,
                  Paris, France, October 1-6, 2023},
  pages        = {8502--8513},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICCV51070.2023.00784},
  doi          = {10.1109/ICCV51070.2023.00784},
  timestamp    = {Thu, 13 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iccv/YanLFXSCZLLLLGC23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ShiLGZY23,
  author       = {Xian Shi and
                  Haoneng Luo and
                  Zhifu Gao and
                  Shiliang Zhang and
                  Zhijie Yan},
  editor       = {Naomi Harte and
                  Julie Carson{-}Berndsen and
                  Gareth Jones},
  title        = {Accurate and Reliable Confidence Estimation Based on Non-Autoregressive
                  End-to-End Speech Recognition System},
  booktitle    = {24th Annual Conference of the International Speech Communication Association,
                  Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
  pages        = {3247--3251},
  publisher    = {{ISCA}},
  year         = {2023},
  url          = {https://doi.org/10.21437/Interspeech.2023-390},
  doi          = {10.21437/INTERSPEECH.2023-390},
  timestamp    = {Fri, 14 Jun 2024 14:12:12 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiLGZY23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhouWCZYZZ23,
  author       = {Xiaohuan Zhou and
                  Jiaming Wang and
                  Zeyu Cui and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Jingren Zhou and
                  Chang Zhou},
  editor       = {Naomi Harte and
                  Julie Carson{-}Berndsen and
                  Gareth Jones},
  title        = {MMSpeech: Multi-modal Multi-task Encoder-Decoder Pre-training for
                  speech recognition},
  booktitle    = {24th Annual Conference of the International Speech Communication Association,
                  Interspeech 2023, Dublin, Ireland, August 20-24, 2023},
  pages        = {4943--4947},
  publisher    = {{ISCA}},
  year         = {2023},
  url          = {https://doi.org/10.21437/Interspeech.2023-791},
  doi          = {10.21437/INTERSPEECH.2023-791},
  timestamp    = {Mon, 03 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhouWCZYZZ23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2301-12343,
  author       = {Xian Shi and
                  Yanni Chen and
                  Shiliang Zhang and
                  Zhijie Yan},
  title        = {Achieving Timestamp Prediction While Recognizing with Non-Autoregressive
                  End-to-End {ASR} Model},
  journal      = {CoRR},
  volume       = {abs/2301.12343},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2301.12343},
  doi          = {10.48550/ARXIV.2301.12343},
  eprinttype   = {arXiv},
  eprint       = {2301.12343},
  timestamp    = {Wed, 01 Feb 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2301-12343.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2303-13932,
  author       = {Qinglin Zhang and
                  Chong Deng and
                  Jiaqing Liu and
                  Hai Yu and
                  Qian Chen and
                  Wen Wang and
                  Zhijie Yan and
                  Jinglin Liu and
                  Yi Ren and
                  Zhou Zhao},
  title        = {Overview of the {ICASSP} 2023 General Meeting Understanding and Generation
                  Challenge {(MUG)}},
  journal      = {CoRR},
  volume       = {abs/2303.13932},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.13932},
  doi          = {10.48550/ARXIV.2303.13932},
  eprinttype   = {arXiv},
  eprint       = {2303.13932},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-13932.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2303-13939,
  author       = {Qinglin Zhang and
                  Chong Deng and
                  Jiaqing Liu and
                  Hai Yu and
                  Qian Chen and
                  Wen Wang and
                  Zhijie Yan and
                  Jinglin Liu and
                  Yi Ren and
                  Zhou Zhao},
  title        = {{MUG:} {A} General Meeting Understanding and Generation Benchmark},
  journal      = {CoRR},
  volume       = {abs/2303.13939},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2303.13939},
  doi          = {10.48550/ARXIV.2303.13939},
  eprinttype   = {arXiv},
  eprint       = {2303.13939},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2303-13939.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2305-10680,
  author       = {Xian Shi and
                  Haoneng Luo and
                  Zhifu Gao and
                  Shiliang Zhang and
                  Zhijie Yan},
  title        = {Accurate and Reliable Confidence Estimation Based on Non-Autoregressive
                  End-to-End Speech Recognition System},
  journal      = {CoRR},
  volume       = {abs/2305.10680},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.10680},
  doi          = {10.48550/ARXIV.2305.10680},
  eprinttype   = {arXiv},
  eprint       = {2305.10680},
  timestamp    = {Thu, 25 May 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-10680.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2309-13573,
  author       = {Yuhao Liang and
                  Mohan Shi and
                  Fan Yu and
                  Yangze Li and
                  Shiliang Zhang and
                  Zhihao Du and
                  Qian Chen and
                  Lei Xie and
                  Yanmin Qian and
                  Jian Wu and
                  Zhuo Chen and
                  Kong Aik Lee and
                  Zhijie Yan and
                  Hui Bu},
  title        = {The second multi-channel multi-party meeting transcription challenge
                  (M2MeT) 2.0): {A} benchmark for speaker-attributed {ASR}},
  journal      = {CoRR},
  volume       = {abs/2309.13573},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.13573},
  doi          = {10.48550/ARXIV.2309.13573},
  eprinttype   = {arXiv},
  eprint       = {2309.13573},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-13573.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2310-04673,
  author       = {Jiaming Wang and
                  Zhihao Du and
                  Qian Chen and
                  Yunfei Chu and
                  Zhifu Gao and
                  Zerui Li and
                  Kai Hu and
                  Xiaohuan Zhou and
                  Jin Xu and
                  Ziyang Ma and
                  Wen Wang and
                  Siqi Zheng and
                  Chang Zhou and
                  Zhijie Yan and
                  Shiliang Zhang},
  title        = {LauraGPT: Listen, Attend, Understand, and Regenerate Audio with {GPT}},
  journal      = {CoRR},
  volume       = {abs/2310.04673},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2310.04673},
  doi          = {10.48550/ARXIV.2310.04673},
  eprinttype   = {arXiv},
  eprint       = {2310.04673},
  timestamp    = {Thu, 29 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-04673.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2311-07919,
  author       = {Yunfei Chu and
                  Jin Xu and
                  Xiaohuan Zhou and
                  Qian Yang and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Chang Zhou and
                  Jingren Zhou},
  title        = {Qwen-Audio: Advancing Universal Audio Understanding via Unified Large-Scale
                  Audio-Language Models},
  journal      = {CoRR},
  volume       = {abs/2311.07919},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2311.07919},
  doi          = {10.48550/ARXIV.2311.07919},
  eprinttype   = {arXiv},
  eprint       = {2311.07919},
  timestamp    = {Fri, 30 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2311-07919.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2312-14860,
  author       = {Lingyun Zuo and
                  Keyu An and
                  Shiliang Zhang and
                  Zhijie Yan},
  title        = {Advancing {VAD} Systems Based on Multi-Task Learning with Improved
                  Model Structures},
  journal      = {CoRR},
  volume       = {abs/2312.14860},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2312.14860},
  doi          = {10.48550/ARXIV.2312.14860},
  eprinttype   = {arXiv},
  eprint       = {2312.14860},
  timestamp    = {Thu, 18 Jan 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2312-14860.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/emnlp/DuZZY22,
  author       = {Zhihao Du and
                  Shiliang Zhang and
                  Siqi Zheng and
                  Zhi{-}Jie Yan},
  editor       = {Yoav Goldberg and
                  Zornitsa Kozareva and
                  Yue Zhang},
  title        = {Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis},
  booktitle    = {Proceedings of the 2022 Conference on Empirical Methods in Natural
                  Language Processing, {EMNLP} 2022, Abu Dhabi, United Arab Emirates,
                  December 7-11, 2022},
  pages        = {7458--7469},
  publisher    = {Association for Computational Linguistics},
  year         = {2022},
  url          = {https://doi.org/10.18653/v1/2022.emnlp-main.505},
  doi          = {10.18653/V1/2022.EMNLP-MAIN.505},
  timestamp    = {Thu, 10 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/emnlp/DuZZY22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YuZFXZDHGYMXB22,
  author       = {Fan Yu and
                  Shiliang Zhang and
                  Yihui Fu and
                  Lei Xie and
                  Siqi Zheng and
                  Zhihao Du and
                  Weilong Huang and
                  Pengcheng Guo and
                  Zhijie Yan and
                  Bin Ma and
                  Xin Xu and
                  Hui Bu},
  title        = {M2Met: The Icassp 2022 Multi-Channel Multi-Party Meeting Transcription
                  Challenge},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {6167--6171},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746465},
  doi          = {10.1109/ICASSP43922.2022.9746465},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YuZFXZDHGYMXB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/RenLHZCYZ22,
  author       = {Yi Ren and
                  Ming Lei and
                  Zhiying Huang and
                  Shiliang Zhang and
                  Qian Chen and
                  Zhijie Yan and
                  Zhou Zhao},
  title        = {Prosospeech: Enhancing Prosody with Quantized Vector Pre-Training
                  in Text-To-Speech},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {7577--7581},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746883},
  doi          = {10.1109/ICASSP43922.2022.9746883},
  timestamp    = {Thu, 01 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/RenLHZCYZ22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YuZGFDZHXTWQLYM22,
  author       = {Fan Yu and
                  Shiliang Zhang and
                  Pengcheng Guo and
                  Yihui Fu and
                  Zhihao Du and
                  Siqi Zheng and
                  Weilong Huang and
                  Lei Xie and
                  Zheng{-}Hua Tan and
                  DeLiang Wang and
                  Yanmin Qian and
                  Kong Aik Lee and
                  Zhijie Yan and
                  Bin Ma and
                  Xin Xu and
                  Hui Bu},
  title        = {Summary on the {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
                  Grand Challenge},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {9156--9160},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746270},
  doi          = {10.1109/ICASSP43922.2022.9746270},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YuZGFDZHXTWQLYM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/ieaaie/ZhuSSWHYC22,
  author       = {Min Zhu and
                  Bingqing Shen and
                  Yan Sun and
                  Chongyu Wang and
                  Guoxin Hou and
                  Zhijie Yan and
                  Hongming Cai},
  editor       = {Hamido Fujita and
                  Philippe Fournier{-}Viger and
                  Moonis Ali and
                  Yinglin Wang},
  title        = {Surface Defect Detection and Classification Based on Fusing Multiple
                  Computer Vision Techniques},
  booktitle    = {Advances and Trends in Artificial Intelligence. Theory and Practices
                  in Artificial Intelligence - 35th International Conference on Industrial,
                  Engineering and Other Applications of Applied Intelligent Systems,
                  {IEA/AIE} 2022, Kitakyushu, Japan, July 19-22, 2022, Proceedings},
  series       = {Lecture Notes in Computer Science},
  pages        = {51--62},
  publisher    = {Springer},
  year         = {2022},
  url          = {https://doi.org/10.1007/978-3-031-08530-7\_5},
  doi          = {10.1007/978-3-031-08530-7\_5},
  timestamp    = {Wed, 25 Feb 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/ieaaie/ZhuSSWHYC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/GaoZ0Y22,
  author       = {Zhifu Gao and
                  Shiliang Zhang and
                  Ian McLoughlin and
                  Zhijie Yan},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive
                  End-to-End Speech Recognition},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {2063--2067},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-9996},
  doi          = {10.21437/INTERSPEECH.2022-9996},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/GaoZ0Y22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-03647,
  author       = {Fan Yu and
                  Shiliang Zhang and
                  Pengcheng Guo and
                  Yihui Fu and
                  Zhihao Du and
                  Siqi Zheng and
                  Weilong Huang and
                  Lei Xie and
                  Zheng{-}Hua Tan and
                  DeLiang Wang and
                  Yanmin Qian and
                  Kong Aik Lee and
                  Zhijie Yan and
                  Bin Ma and
                  Xin Xu and
                  Hui Bu},
  title        = {Summary On The {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
                  Grand Challenge},
  journal      = {CoRR},
  volume       = {abs/2202.03647},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.03647},
  eprinttype   = {arXiv},
  eprint       = {2202.03647},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-03647.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2202-07816,
  author       = {Yi Ren and
                  Ming Lei and
                  Zhiying Huang and
                  Shiliang Zhang and
                  Qian Chen and
                  Zhijie Yan and
                  Zhou Zhao},
  title        = {ProsoSpeech: Enhancing Prosody With Quantized Vector Pre-training
                  in Text-to-Speech},
  journal      = {CoRR},
  volume       = {abs/2202.07816},
  year         = {2022},
  url          = {https://arxiv.org/abs/2202.07816},
  eprinttype   = {arXiv},
  eprint       = {2202.07816},
  timestamp    = {Fri, 14 Feb 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2202-07816.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2203-09767,
  author       = {Zhihao Du and
                  Shiliang Zhang and
                  Siqi Zheng and
                  Zhijie Yan},
  title        = {Speaker Embedding-aware Neural Diarization: an Efficient Framework
                  for Overlapping Speech Diarization in Meeting Scenarios},
  journal      = {CoRR},
  volume       = {abs/2203.09767},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.09767},
  doi          = {10.48550/ARXIV.2203.09767},
  eprinttype   = {arXiv},
  eprint       = {2203.09767},
  timestamp    = {Mon, 04 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-09767.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2206-08317,
  author       = {Zhifu Gao and
                  Shiliang Zhang and
                  Ian McLoughlin and
                  Zhijie Yan},
  title        = {Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive
                  End-to-End Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2206.08317},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2206.08317},
  doi          = {10.48550/ARXIV.2206.08317},
  eprinttype   = {arXiv},
  eprint       = {2206.08317},
  timestamp    = {Sun, 12 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2206-08317.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2211-10243,
  author       = {Zhihao Du and
                  Shiliang Zhang and
                  Siqi Zheng and
                  Zhijie Yan},
  title        = {Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis},
  journal      = {CoRR},
  volume       = {abs/2211.10243},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.10243},
  doi          = {10.48550/ARXIV.2211.10243},
  eprinttype   = {arXiv},
  eprint       = {2211.10243},
  timestamp    = {Thu, 24 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-10243.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2212-00500,
  author       = {Xiaohuan Zhou and
                  Jiaming Wang and
                  Zeyu Cui and
                  Shiliang Zhang and
                  Zhijie Yan and
                  Jingren Zhou and
                  Chang Zhou},
  title        = {MMSpeech: Multi-modal Multi-task Encoder-Decoder Pre-training for
                  Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2212.00500},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2212.00500},
  doi          = {10.48550/ARXIV.2212.00500},
  eprinttype   = {arXiv},
  eprint       = {2212.00500},
  timestamp    = {Mon, 03 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2212-00500.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhengHWSFY21,
  author       = {Siqi Zheng and
                  Weilong Huang and
                  Xianliang Wang and
                  Hongbin Suo and
                  Jinwei Feng and
                  Zhijie Yan},
  title        = {A Real-Time Speaker Diarization System Based on Spatial Spectrum},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {7208--7212},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9413544},
  doi          = {10.1109/ICASSP39728.2021.9413544},
  timestamp    = {Fri, 09 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhengHWSFY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhangZHLSFY21,
  author       = {Shiliang Zhang and
                  Siqi Zheng and
                  Weilong Huang and
                  Ming Lei and
                  Hongbin Suo and
                  Jinwei Feng and
                  Zhijie Yan},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Investigation of Spatial-Acoustic Features for Overlapping Speech
                  Detection in Multiparty Meetings},
  booktitle    = {22nd Annual Conference of the International Speech Communication Association,
                  Interspeech 2021, Brno, Czechia, August 30 - September 3, 2021},
  pages        = {3550--3554},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-747},
  doi          = {10.21437/INTERSPEECH.2021-747},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhangZHLSFY21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2107-09321,
  author       = {Siqi Zheng and
                  Weilong Huang and
                  Xianliang Wang and
                  Hongbin Suo and
                  Jinwei Feng and
                  Zhijie Yan},
  title        = {A Real-time Speaker Diarization System Based on Spatial Spectrum},
  journal      = {CoRR},
  volume       = {abs/2107.09321},
  year         = {2021},
  url          = {https://arxiv.org/abs/2107.09321},
  eprinttype   = {arXiv},
  eprint       = {2107.09321},
  timestamp    = {Thu, 29 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2107-09321.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2109-04049,
  author       = {Siqi Zheng and
                  Shiliang Zhang and
                  Weilong Huang and
                  Qian Chen and
                  Hongbin Suo and
                  Ming Lei and
                  Jinwei Feng and
                  Zhijie Yan},
  title        = {BeamTransformer: Microphone Array-based Overlapping Speech Detection},
  journal      = {CoRR},
  volume       = {abs/2109.04049},
  year         = {2021},
  url          = {https://arxiv.org/abs/2109.04049},
  eprinttype   = {arXiv},
  eprint       = {2109.04049},
  timestamp    = {Sun, 21 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2109-04049.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2110-07393,
  author       = {Fan Yu and
                  Shiliang Zhang and
                  Yihui Fu and
                  Lei Xie and
                  Siqi Zheng and
                  Zhihao Du and
                  Weilong Huang and
                  Pengcheng Guo and
                  Zhijie Yan and
                  Bin Ma and
                  Xin Xu and
                  Hui Bu},
  title        = {M2MeT: The {ICASSP} 2022 Multi-Channel Multi-Party Meeting Transcription
                  Challenge},
  journal      = {CoRR},
  volume       = {abs/2110.07393},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.07393},
  eprinttype   = {arXiv},
  eprint       = {2110.07393},
  timestamp    = {Tue, 17 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-07393.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/FanLWZCGY20,
  author       = {Kai Fan and
                  Bo Li and
                  Jiayi Wang and
                  Shiliang Zhang and
                  Boxing Chen and
                  Niyu Ge and
                  Zhijie Yan},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Neural Zero-Inflated Quality Estimation Model for Automatic Speech
                  Recognition System},
  booktitle    = {21st Annual Conference of the International Speech Communication Association,
                  Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
  pages        = {606--610},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-1881},
  doi          = {10.21437/INTERSPEECH.2020-1881},
  timestamp    = {Tue, 21 Apr 2026 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/FanLWZCGY20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhangGLLGYX20,
  author       = {Shiliang Zhang and
                  Zhifu Gao and
                  Haoneng Luo and
                  Ming Lei and
                  Jie Gao and
                  Zhijie Yan and
                  Lei Xie},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Streaming Chunk-Aware Multihead Attention for Online End-to-End Speech
                  Recognition},
  booktitle    = {21st Annual Conference of the International Speech Communication Association,
                  Interspeech 2020, Virtual Event, Shanghai, China, October 25-29, 2020},
  pages        = {2142--2146},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-1972},
  doi          = {10.21437/INTERSPEECH.2020-1972},
  timestamp    = {Fri, 09 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhangGLLGYX20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-2006-01712,
  author       = {Shiliang Zhang and
                  Zhifu Gao and
                  Haoneng Luo and
                  Ming Lei and
                  Jie Gao and
                  Zhijie Yan and
                  Lei Xie},
  title        = {Streaming Chunk-Aware Multihead Attention for Online End-to-End Speech
                  Recognition},
  journal      = {CoRR},
  volume       = {abs/2006.01712},
  year         = {2020},
  url          = {https://arxiv.org/abs/2006.01712},
  eprinttype   = {arXiv},
  eprint       = {2006.01712},
  timestamp    = {Fri, 09 Dec 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2006-01712.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jips/SunYLWW19,
  author       = {Zidan Sun and
                  Zhijie Yan and
                  Likai Liang and
                  Ran Wei and
                  Wei Wang},
  title        = {Dynamic Thermal Rating of Transmission Line Based on Environmental
                  Parameter Estimation},
  journal      = {J. Inf. Process. Syst.},
  volume       = {15},
  number       = {2},
  pages        = {386--398},
  year         = {2019},
  url          = {http://www.jips-k.org/q.jips?cp=pp\&\#38;pn=656},
  timestamp    = {Fri, 17 Jun 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jips/SunYLWW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhangLY19,
  author       = {Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Investigation of Transformer Based Spelling Correction Model for CTC-Based
                  End-to-End Mandarin Speech Recognition},
  booktitle    = {20th Annual Conference of the International Speech Communication Association,
                  Interspeech 2019, Graz, Austria, September 15-19, 2019},
  pages        = {2180--2184},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-1290},
  doi          = {10.21437/INTERSPEECH.2019-1290},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhangLY19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1904-10045,
  author       = {Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan},
  title        = {Automatic Spelling Correction with Transformer for CTC-based End-to-End
                  Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1904.10045},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.10045},
  eprinttype   = {arXiv},
  eprint       = {1904.10045},
  timestamp    = {Sat, 27 Apr 2019 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-10045.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/ejwcn/WangTYW18,
  author       = {Yanling Wang and
                  Weihua Tao and
                  Zhijie Yan and
                  Ran Wei},
  title        = {Uncertainty analysis of dynamic thermal rating based on environmental
                  parameter estimation},
  journal      = {{EURASIP} J. Wirel. Commun. Netw.},
  volume       = {2018},
  pages        = {167},
  year         = {2018},
  url          = {https://doi.org/10.1186/s13638-018-1181-7},
  doi          = {10.1186/S13638-018-1181-7},
  timestamp    = {Sun, 19 Jan 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/ejwcn/WangTYW18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/BiLZLY18,
  author       = {Mengxiao Bi and
                  Heng Lu and
                  Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan},
  title        = {Deep Feed-Forward Sequential Memory Networks for Speech Synthesis},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {4794--4798},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8461623},
  doi          = {10.1109/ICASSP.2018.8461623},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/BiLZLY18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/HuangLLY18,
  author       = {Zhiying Huang and
                  Heng Lu and
                  Ming Lei and
                  Zhijie Yan},
  title        = {Linear Networks Based Speaker Adaptation for Speech Synthesis},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5319--5323},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8462373},
  doi          = {10.1109/ICASSP.2018.8462373},
  timestamp    = {Tue, 18 Sep 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/HuangLLY18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangLYD18,
  author       = {Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan and
                  Lirong Dai},
  title        = {Deep-FSMN for Large Vocabulary Continuous Speech Recognition},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5869--5873},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8461404},
  doi          = {10.1109/ICASSP.2018.8461404},
  timestamp    = {Thu, 27 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangLYD18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icdsp/XueYYL18,
  author       = {Shaofei Xue and
                  Zhijie Yan and
                  Tao Yu and
                  Zhang Liu},
  title        = {A Study on Improving Acoustic Model for Robust and Far-Field Speech
                  Recognition},
  booktitle    = {23rd {IEEE} International Conference on Digital Signal Processing,
                  {DSP} 2018, Shanghai, China, November 19-21, 2018},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICDSP.2018.8631862},
  doi          = {10.1109/ICDSP.2018.8631862},
  timestamp    = {Mon, 31 Oct 2022 09:05:23 +0100},
  biburl       = {https://dblp.org/rec/conf/icdsp/XueYYL18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1802-09194,
  author       = {Mengxiao Bi and
                  Heng Lu and
                  Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan},
  title        = {Deep Feed-forward Sequential Memory Networks for Speech Synthesis},
  journal      = {CoRR},
  volume       = {abs/1802.09194},
  year         = {2018},
  url          = {http://arxiv.org/abs/1802.09194},
  eprinttype   = {arXiv},
  eprint       = {1802.09194},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1802-09194.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1803-02445,
  author       = {Zhiying Huang and
                  Heng Lu and
                  Ming Lei and
                  Zhijie Yan},
  title        = {Linear networks based speaker adaptation for speech synthesis},
  journal      = {CoRR},
  volume       = {abs/1803.02445},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.02445},
  eprinttype   = {arXiv},
  eprint       = {1803.02445},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-02445.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/corr/abs-1803-05030,
  author       = {Shiliang Zhang and
                  Ming Lei and
                  Zhijie Yan and
                  Lirong Dai},
  title        = {Deep-FSMN for Large Vocabulary Continuous Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1803.05030},
  year         = {2018},
  url          = {http://arxiv.org/abs/1803.05030},
  eprinttype   = {arXiv},
  eprint       = {1803.05030},
  timestamp    = {Thu, 27 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1803-05030.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/jips/YanWL17,
  author       = {Zhijie Yan and
                  Yanling Wang and
                  Likai Liang},
  title        = {Analysis on Ampacity of Overhead Transmission Lines Being Operated},
  journal      = {J. Inf. Process. Syst.},
  volume       = {13},
  number       = {5},
  pages        = {1358--1371},
  year         = {2017},
  url          = {https://doi.org/10.3745/JIPS.04.0044},
  doi          = {10.3745/JIPS.04.0044},
  timestamp    = {Tue, 16 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/jips/YanWL17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/XueY17,
  author       = {Shaofei Xue and
                  Zhijie Yan},
  title        = {Improving latency-controlled {BLSTM} acoustic models for online speech
                  recognition},
  booktitle    = {2017 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  pages        = {5340--5344},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ICASSP.2017.7953176},
  doi          = {10.1109/ICASSP.2017.7953176},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/XueY17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/HuangXYD16,
  author       = {Zhiying Huang and
                  Shaofei Xue and
                  Zhijie Yan and
                  Li{-}Rong Dai},
  title        = {Unsupervised speaker adaptation of {BLSTM-RNN} for {LVCSR} based on
                  speaker code},
  booktitle    = {10th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ISCSLP.2016.7918363},
  doi          = {10.1109/ISCSLP.2016.7918363},
  timestamp    = {Wed, 18 Sep 2024 12:51:31 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/HuangXYD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/XueYHD16,
  author       = {Shaofei Xue and
                  Zhijie Yan and
                  Zhiying Huang and
                  Li{-}Rong Dai},
  title        = {Rapid speaker adaptation based on D-code extracted from {BLSTM-RNN}
                  in {LVCSR}},
  booktitle    = {10th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2016, Tianjin, China, October 17-20, 2016},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ISCSLP.2016.7918374},
  doi          = {10.1109/ISCSLP.2016.7918374},
  timestamp    = {Thu, 27 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/XueYHD16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icdar/ChenYH15,
  author       = {Kai Chen and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A context-sensitive-chunk {BPTT} approach to training deep {LSTM/BLSTM}
                  recurrent neural networks for offline handwriting recognition},
  booktitle    = {13th International Conference on Document Analysis and Recognition,
                  {ICDAR} 2015, Nancy, France, August 23-26, 2015},
  pages        = {411--415},
  publisher    = {{IEEE} Computer Society},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICDAR.2015.7333794},
  doi          = {10.1109/ICDAR.2015.7333794},
  timestamp    = {Fri, 24 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/ChenYH15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ChenYH15,
  author       = {Kai Chen and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {Training deep bidirectional {LSTM} acoustic model for {LVCSR} by a
                  context-sensitive-chunk {BPTT} approach},
  booktitle    = {16th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2015, Dresden, Germany, September 6-10, 2015},
  pages        = {3600--3604},
  publisher    = {{ISCA}},
  year         = {2015},
  url          = {https://doi.org/10.21437/Interspeech.2015-714},
  doi          = {10.21437/INTERSPEECH.2015-714},
  timestamp    = {Sun, 19 Jan 2025 13:13:53 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ChenYH15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/XuYH14,
  author       = {Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {An Unsupervised Adaptation Approach to Leveraging Feedback Loop Data
                  by Using i-Vector for Data Clustering and Selection},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {22},
  number       = {11},
  pages        = {1581--1589},
  year         = {2014},
  url          = {https://doi.org/10.1109/TASLP.2014.2341911},
  doi          = {10.1109/TASLP.2014.2341911},
  timestamp    = {Fri, 26 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/XuYH14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{DBLP:journals/taslp/QianSY13,
  author       = {Yao Qian and
                  Frank K. Soong and
                  Zhi{-}Jie Yan},
  title        = {A Unified Trajectory Tiling Approach to High Quality Speech Rendering},
  journal      = {{IEEE} Trans. Speech Audio Process.},
  volume       = {21},
  number       = {2},
  pages        = {280--290},
  year         = {2013},
  url          = {https://doi.org/10.1109/TASL.2012.2221460},
  doi          = {10.1109/TASL.2012.2221460},
  timestamp    = {Sun, 17 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/QianSY13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YanHXZ13,
  author       = {Zhi{-}Jie Yan and
                  Qiang Huo and
                  Jian Xu and
                  Yu Zhang},
  title        = {Tied-state based discriminative training of context-expanded region-dependent
                  feature transforms for {LVCSR}},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
  pages        = {6940--6944},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICASSP.2013.6639007},
  doi          = {10.1109/ICASSP.2013.6639007},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YanHXZ13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/YanHX13,
  author       = {Zhi{-}Jie Yan and
                  Qiang Huo and
                  Jian Xu},
  editor       = {Fr{\'{e}}d{\'{e}}ric Bimbot and
                  Christophe Cerisara and
                  C{\'{e}}cile Fougeron and
                  Guillaume Gravier and
                  Lori Lamel and
                  Fran{\c{c}}ois Pellegrino and
                  Pascal Perrier},
  title        = {A scalable approach to using DNN-derived features in {GMM-HMM} based
                  acoustic modeling for {LVCSR}},
  booktitle    = {14th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2013, Lyon, France, August 25-29, 2013},
  pages        = {104--108},
  publisher    = {{ISCA}},
  year         = {2013},
  url          = {https://doi.org/10.21437/Interspeech.2013-47},
  doi          = {10.21437/INTERSPEECH.2013-47},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YanHX13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangXYH12,
  author       = {Yu Zhang and
                  Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A study of discriminative feature extraction for i-vector based acoustic
                  sniffing in {IVN} acoustic model training},
  booktitle    = {2012 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2012, Kyoto, Japan, March 25-30, 2012},
  pages        = {4077--4080},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICASSP.2012.6288814},
  doi          = {10.1109/ICASSP.2012.6288814},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangXYH12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/XuYH12,
  author       = {Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A comparative study of fMPE and {RDLT} approaches to {LVCSR}},
  booktitle    = {8th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2012, Kowloon Tong, China, December 5-8, 2012},
  pages        = {21--24},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ISCSLP.2012.6423511},
  doi          = {10.1109/ISCSLP.2012.6423511},
  timestamp    = {Wed, 18 Sep 2024 12:50:55 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/XuYH12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/XuYH12a,
  author       = {Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A feature-transform based approach to unsupervised task adaptation
                  and personalization},
  booktitle    = {8th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2012, Kowloon Tong, China, December 5-8, 2012},
  pages        = {229--232},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ISCSLP.2012.6423513},
  doi          = {10.1109/ISCSLP.2012.6423513},
  timestamp    = {Fri, 19 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/XuYH12a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mhci/EdgeCWQYS12,
  author       = {Darren Edge and
                  Kai{-}Yin Cheng and
                  Michael Whitney and
                  Yao Qian and
                  Zhijie Yan and
                  Frank K. Soong},
  editor       = {Elizabeth F. Churchill and
                  Sriram Subramanian and
                  Patrick Baudisch and
                  Kenton O'Hara},
  title        = {Tip tap tones: mobile microtraining of mandarin sounds},
  booktitle    = {Mobile {HCI} '12, Companion Proceedings of the 14th international
                  conference on Human-computer interaction with mobile devices and services,
                  San Francsico, CA, USA, September 21-24, 2012},
  pages        = {215--216},
  publisher    = {{ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1145/2371664.2371715},
  doi          = {10.1145/2371664.2371715},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mhci/EdgeCWQYS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mhci/EdgeCWQYS12a,
  author       = {Darren Edge and
                  Kai{-}Yin Cheng and
                  Michael Whitney and
                  Yao Qian and
                  Zhijie Yan and
                  Frank K. Soong},
  editor       = {Elizabeth F. Churchill and
                  Sriram Subramanian and
                  Patrick Baudisch and
                  Kenton O'Hara},
  title        = {Tip tap tones: mobile microtraining of mandarin sounds},
  booktitle    = {Mobile {HCI} '12, Proceedings of the 14th international conference
                  on Human-computer interaction with mobile devices and services, San
                  Francsico, CA, USA, September 21-24, 2012},
  pages        = {427--430},
  publisher    = {{ACM}},
  year         = {2012},
  url          = {https://doi.org/10.1145/2371574.2371640},
  doi          = {10.1145/2371574.2371640},
  timestamp    = {Tue, 06 Nov 2018 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/mhci/EdgeCWQYS12a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/LongYSDG11,
  author       = {Yanhua Long and
                  Zhi{-}Jie Yan and
                  Frank K. Soong and
                  Li{-}Rong Dai and
                  Wu Guo},
  title        = {Speaker characterization using spectral subband energy ratio based
                  on Harmonic plus Noise Model},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
                  Center, Prague, Czech Republic},
  pages        = {4520--4523},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ICASSP.2011.5947359},
  doi          = {10.1109/ICASSP.2011.5947359},
  timestamp    = {Thu, 27 Aug 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/LongYSDG11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangXYH11,
  author       = {Yu Zhang and
                  Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A study of an irrelevant variability normalization based discriminative
                  training approach for {LVCSR}},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
                  Center, Prague, Czech Republic},
  pages        = {5308--5311},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ICASSP.2011.5947556},
  doi          = {10.1109/ICASSP.2011.5947556},
  timestamp    = {Sun, 01 Apr 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangXYH11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LongYSDG11,
  author       = {Yanhua Long and
                  Zhi{-}Jie Yan and
                  Frank K. Soong and
                  Li{-}Rong Dai and
                  Wu Guo},
  title        = {Improvements in Speaker Characterization Using Spectral Subband Energy
                  Based on Harmonic plus Noise Model},
  booktitle    = {12th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
  pages        = {373--376},
  publisher    = {{ISCA}},
  year         = {2011},
  url          = {https://doi.org/10.21437/Interspeech.2011-133},
  doi          = {10.21437/INTERSPEECH.2011-133},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LongYSDG11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ZhangXYH11,
  author       = {Yu Zhang and
                  Jian Xu and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {An i-vector Based Approach to Training Data Clustering for Improved
                  Speech Recognition},
  booktitle    = {12th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
  pages        = {789--792},
  publisher    = {{ISCA}},
  year         = {2011},
  url          = {https://doi.org/10.21437/Interspeech.2011-179},
  doi          = {10.21437/INTERSPEECH.2011-179},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ZhangXYH11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/XuZYH11,
  author       = {Jian Xu and
                  Yu Zhang and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {An i-vector Based Approach to Acoustic Sniffing for Irrelevant Variability
                  Normalization Based Acoustic Model Training and Speech Recognition},
  booktitle    = {12th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2011, Florence, Italy, August 27-31, 2011},
  pages        = {1701--1704},
  publisher    = {{ISCA}},
  year         = {2011},
  url          = {https://doi.org/10.21437/Interspeech.2011-186},
  doi          = {10.21437/INTERSPEECH.2011-186},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/XuZYH11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/mlsp/0007YH11,
  author       = {Yu Zhang and
                  Zhi{-}Jie Yan and
                  Qiang Huo},
  title        = {A new i-vector approach and its application to irrelevant variability
                  normalization based acoustic model training},
  booktitle    = {2011 {IEEE} International Workshop on Machine Learning for Signal
                  Processing, {MLSP} 2011, Beijing, China, September 18-21, 2011},
  pages        = {1--6},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/MLSP.2011.6064637},
  doi          = {10.1109/MLSP.2011.6064637},
  timestamp    = {Wed, 16 Oct 2019 14:14:49 +0200},
  biburl       = {https://dblp.org/rec/conf/mlsp/0007YH11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/blizzard/QianYWSZW10,
  author       = {Yao Qian and
                  Zhi{-}Jie Yan and
                  Yi{-}Jian Wu and
                  Frank K. Soong and
                  Guoliang Zhang and
                  Lijuan Wang},
  title        = {An {HMM} Trajectory Tiling {(HTT)} Approach to High Quality {TTS}
                  - Microsoft Entry to Blizzard Challenge 2010},
  booktitle    = {The Blizzard Challenge 2010, Kansai Science City, Japan, September
                  25, 2010},
  publisher    = {{ISCA}},
  year         = {2010},
  url          = {https://doi.org/10.21437/Blizzard.2010-14},
  doi          = {10.21437/BLIZZARD.2010-14},
  timestamp    = {Fri, 20 Sep 2024 10:07:57 +0200},
  biburl       = {https://dblp.org/rec/conf/blizzard/QianYWSZW10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangYS10,
  author       = {Yu Zhang and
                  Zhi{-}Jie Yan and
                  Frank K. Soong},
  title        = {Cross-validation based decision tree clustering for HMM-based {TTS}},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
                  Hotel, Dallas, Texas, {USA}},
  pages        = {4602--4605},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICASSP.2010.5495560},
  doi          = {10.1109/ICASSP.2010.5495560},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangYS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/ZhangSQYPY10,
  author       = {Qingqing Zhang and
                  Frank K. Soong and
                  Yao Qian and
                  Zhijie Yan and
                  Jielin Pan and
                  Yonghong Yan},
  title        = {Improved modeling for {F0} generation and {V/U} decision in HMM-based
                  {TTS}},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
                  Hotel, Dallas, Texas, {USA}},
  pages        = {4606--4609},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICASSP.2010.5495561},
  doi          = {10.1109/ICASSP.2010.5495561},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangSQYPY10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YanQS10,
  author       = {Zhi{-}Jie Yan and
                  Yao Qian and
                  Frank K. Soong},
  title        = {RIch-context Unit Selection {(RUS)} approach to high quality {TTS}},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
                  Hotel, Dallas, Texas, {USA}},
  pages        = {4798--4801},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICASSP.2010.5495150},
  doi          = {10.1109/ICASSP.2010.5495150},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YanQS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/QianYWSZK10,
  author       = {Yao Qian and
                  Zhi{-}Jie Yan and
                  Yi{-}Jian Wu and
                  Frank K. Soong and
                  Xin Zhuang and
                  Shengyi Kong},
  editor       = {Takao Kobayashi and
                  Keikichi Hirose and
                  Satoshi Nakamura},
  title        = {An {HMM} trajectory tiling {(HTT)} approach to high quality {TTS}},
  booktitle    = {11th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2010, Makuhari, Chiba, Japan, September 26-30, 2010},
  pages        = {422--425},
  publisher    = {{ISCA}},
  year         = {2010},
  url          = {https://doi.org/10.21437/Interspeech.2010-175},
  doi          = {10.21437/INTERSPEECH.2010-175},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/QianYWSZK10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/ChenYS10,
  author       = {Yining Chen and
                  Zhi{-}Jie Yan and
                  Frank K. Soong},
  editor       = {Takao Kobayashi and
                  Keikichi Hirose and
                  Satoshi Nakamura},
  title        = {A perceptual study of acceleration parameters in HMM-based {TTS}},
  booktitle    = {11th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2010, Makuhari, Chiba, Japan, September 26-30, 2010},
  pages        = {426--429},
  publisher    = {{ISCA}},
  year         = {2010},
  url          = {https://doi.org/10.21437/Interspeech.2010-176},
  doi          = {10.21437/INTERSPEECH.2010-176},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ChenYS10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YanLHJ09,
  author       = {Zhi{-}Jie Yan and
                  Cong Liu and
                  Yu Hu and
                  Hui Jiang},
  title        = {A trust region based optimization for maximum mutual information estimation
                  of {HMMS} in speech recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2009, 19-24 April 2009, Taipei, Taiwan},
  pages        = {3757--3760},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ICASSP.2009.4960444},
  doi          = {10.1109/ICASSP.2009.4960444},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/YanLHJ09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/YanQS09,
  author       = {Zhi{-}Jie Yan and
                  Yao Qian and
                  Frank K. Soong},
  title        = {Rich context modeling for high quality HMM-based {TTS}},
  booktitle    = {10th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2009, Brighton, United Kingdom, September 6-10, 2009},
  pages        = {1755--1758},
  publisher    = {{ISCA}},
  year         = {2009},
  url          = {https://doi.org/10.21437/Interspeech.2009-142},
  doi          = {10.21437/INTERSPEECH.2009-142},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YanQS09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YanZHW08,
  author       = {Zhi{-}Jie Yan and
                  Bo Zhu and
                  Yu Hu and
                  Ren{-}Hua Wang},
  title        = {Minimum word classification error training of {HMMS} for automatic
                  speech recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2008, March 30 - April 4, 2008, Caesars
                  Palace, Las Vegas, Nevada, {USA}},
  pages        = {4521--4524},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/ICASSP.2008.4518661},
  doi          = {10.1109/ICASSP.2008.4518661},
  timestamp    = {Wed, 30 Sep 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YanZHW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/interspeech/LiYLW08,
  author       = {Jinyu Li and
                  Zhi{-}Jie Yan and
                  Chin{-}Hui Lee and
                  Ren{-}Hua Wang},
  title        = {Soft margin estimation with various separation levels for {LVCSR}},
  booktitle    = {9th Annual Conference of the International Speech Communication Association,
                  {INTERSPEECH} 2008, Brisbane, Australia, September 22-26, 2008},
  pages        = {269--272},
  publisher    = {{ISCA}},
  year         = {2008},
  url          = {https://doi.org/10.21437/Interspeech.2008-100},
  doi          = {10.21437/INTERSPEECH.2008-100},
  timestamp    = {Mon, 22 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiYLW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/ZhuYHWDW08,
  author       = {Bo Zhu and
                  Zhi{-}Jie Yan and
                  Yu Hu and
                  Zhiguo Wang and
                  Li{-}Rong Dai and
                  Ren{-}Hua Wang},
  editor       = {Helen M. Meng and
                  Hui Jiang and
                  Jianhua Tao and
                  Ren{-}Hua Wang},
  title        = {Investigation on Adaptation Using Different Discriminative Training
                  Criteria Based Linear Regression and Map},
  booktitle    = {6th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2008, 16-19 December, 2008, Kunming, China},
  pages        = {93--96},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/CHINSL.2008.ECP.35},
  doi          = {10.1109/CHINSL.2008.ECP.35},
  timestamp    = {Wed, 18 Sep 2024 12:50:19 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/ZhuYHWDW08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/asru/LiYLW07,
  author       = {Jinyu Li and
                  Zhi{-}Jie Yan and
                  Chin{-}Hui Lee and
                  Ren{-}Hua Wang},
  editor       = {Sadaoki Furui and
                  Tatsuya Kawahara},
  title        = {A study on soft margin estimation for {LVCSR}},
  booktitle    = {{IEEE} Workshop on Automatic Speech Recognition {\&} Understanding,
                  {ASRU} 2007, Kyoto, Japan, December 9-13, 2007},
  pages        = {268--271},
  publisher    = {{IEEE}},
  year         = {2007},
  url          = {https://doi.org/10.1109/ASRU.2007.4430122},
  doi          = {10.1109/ASRU.2007.4430122},
  timestamp    = {Mon, 22 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/LiYLW07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/icassp/YanSW07,
  author       = {Zhi{-}Jie Yan and
                  Frank K. Soong and
                  Ren{-}Hua Wang},
  title        = {Word Graph Based Feature Enhancement for Noisy Speech Recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
                  15-20, 2007},
  pages        = {373--376},
  publisher    = {{IEEE}},
  year         = {2007},
  url          = {https://doi.org/10.1109/ICASSP.2007.366927},
  doi          = {10.1109/ICASSP.2007.366927},
  timestamp    = {Mon, 22 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/YanSW07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/0006Y0W06,
  author       = {Cong Liu and
                  Zhijie Yan and
                  Yu Hu and
                  Renhua Wang},
  title        = {A Comparative Study on Confidence Measure in Mandarin Command Word
                  Recognition},
  booktitle    = {5th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2006, Singapore, December 13-16, 2006},
  publisher    = {{ISCA}},
  year         = {2006},
  url          = {https://www.isca-archive.org/iscslp\_2006/liu06d\_iscslp.html},
  timestamp    = {Thu, 26 Sep 2024 17:06:35 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/0006Y0W06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/Yan0DSW06,
  author       = {Zhijie Yan and
                  Peng Liu and
                  Jun Du and
                  Frank K. Soong and
                  Renhua Wang},
  title        = {Training Discriminative {HMM} by Optimal Allocation of Gaussian Kernels},
  booktitle    = {5th International Symposium on Chinese Spoken Language Processing,
                  {ISCSLP} 2006, Singapore, December 13-16, 2006},
  publisher    = {{ISCA}},
  year         = {2006},
  url          = {https://www.isca-archive.org/iscslp\_2006/yan06b\_iscslp.html},
  timestamp    = {Wed, 04 Dec 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/iscslp/Yan0DSW06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DBLP:conf/iscslp/YanZSW06,
  author       = {Zhi{-}Jie Yan and
                  Jian{-}Lai Zhou and
                  Frank K. Soong and
                  Ren{-}Hua Wang},
  editor       = {Qiang Huo and
                  Bin Ma and
                  Chng Eng Siong and
                  Haizhou Li},
  title        = {Signal Trajectory Based Noise Compensation for Robust Speech Recognition},
  booktitle    = {Chinese Spoken Language Processing, 5th International Symposium, {ISCSLP}
                  2006, Singapore, December 13-16, 2006, Selected Papers},
  series       = {Lecture Notes in Computer Science},
  pages        = {335--345},
  publisher    = {Springer},
  year         = {2006},
  url          = {https://doi.org/10.1007/11939993\_37},
  doi          = {10.1007/11939993\_37},
  timestamp    = {Thu, 26 Sep 2024 14:17:09 +0200},
  biburl       = {https://dblp.org/rec/conf/iscslp/YanZSW06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

manage site settings

To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.