{"id":"https://openalex.org/W4379618854","doi":"https://doi.org/10.1109/access.2023.3283772","title":"MixGAN-TTS: Efficient and Stable Speech Synthesis Based on Diffusion Model","display_name":"MixGAN-TTS: Efficient and Stable Speech Synthesis Based on Diffusion Model","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4379618854","doi":"https://doi.org/10.1109/access.2023.3283772"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3283772","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283772","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10145456.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10145456.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079798556","display_name":"Yan Deng","orcid":"https://orcid.org/0000-0002-0778-6144"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yan Deng","raw_affiliation_strings":["School of Computer, Electronics and Information, Guangxi University, Nanning, China"],"raw_orcid":"https://orcid.org/0000-0002-0778-6144","affiliations":[{"raw_affiliation_string":"School of Computer, Electronics and Information, Guangxi University, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059122235","display_name":"Ning Wu","orcid":"https://orcid.org/0000-0002-4951-6337"},"institutions":[{"id":"https://openalex.org/I4210128032","display_name":"Beibu Gulf University","ror":"https://ror.org/031j0at32","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210128032"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Wu","raw_affiliation_strings":["Key Laboratory of Beibu Gulf Offshore Engineering Equipment and Technology, Beibu Gulf University, Qinzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-4951-6337","affiliations":[{"raw_affiliation_string":"Key Laboratory of Beibu Gulf Offshore Engineering Equipment and Technology, Beibu Gulf University, Qinzhou, China","institution_ids":["https://openalex.org/I4210128032"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042410962","display_name":"Chengjun Qiu","orcid":"https://orcid.org/0009-0001-2264-8866"},"institutions":[{"id":"https://openalex.org/I4210128032","display_name":"Beibu Gulf University","ror":"https://ror.org/031j0at32","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210128032"]},{"id":"https://openalex.org/I4210153248","display_name":"Guangxi Institute of Oceanography","ror":"https://ror.org/052h3h832","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210153248"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengjun Qiu","raw_affiliation_strings":["College of Mechanical Naval Architecture and Ocean Engineering, Beibu Gulf University, Qinzhou, China","Guangxi Key Laboratory of Ocean Engineering Equipment and Technology, Qinzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-2264-8866","affiliations":[{"raw_affiliation_string":"College of Mechanical Naval Architecture and Ocean Engineering, Beibu Gulf University, Qinzhou, China","institution_ids":["https://openalex.org/I4210128032"]},{"raw_affiliation_string":"Guangxi Key Laboratory of Ocean Engineering Equipment and Technology, Qinzhou, China","institution_ids":["https://openalex.org/I4210153248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102800579","display_name":"Yangyang Luo","orcid":"https://orcid.org/0009-0005-3533-3619"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangyang Luo","raw_affiliation_strings":["School of Computer, Electronics and Information, Guangxi University, Nanning, China"],"raw_orcid":"https://orcid.org/0009-0005-3533-3619","affiliations":[{"raw_affiliation_string":"School of Computer, Electronics and Information, Guangxi University, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069796663","display_name":"Yan Chen","orcid":"https://orcid.org/0000-0002-9950-684X"},"institutions":[{"id":"https://openalex.org/I150807315","display_name":"Guangxi University","ror":"https://ror.org/02c9qn167","country_code":"CN","type":"education","lineage":["https://openalex.org/I150807315"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Chen","raw_affiliation_strings":["School of Computer, Electronics and Information, Guangxi University, Nanning, China"],"raw_orcid":"https://orcid.org/0000-0002-9950-684X","affiliations":[{"raw_affiliation_string":"School of Computer, Electronics and Information, Guangxi University, Nanning, China","institution_ids":["https://openalex.org/I150807315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5079798556"],"corresponding_institution_ids":["https://openalex.org/I150807315"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.1906,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.89941948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"11","issue":null,"first_page":"57674","last_page":"57682"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.751778781414032},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7316006422042847},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6317405104637146},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.6155099868774414},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5474047660827637},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.4977777302265167},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.47200700640678406},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4603578448295593},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.43438422679901123},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.42952728271484375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33880871534347534},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14446678757667542}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.751778781414032},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7316006422042847},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6317405104637146},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.6155099868774414},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5474047660827637},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.4977777302265167},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.47200700640678406},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4603578448295593},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.43438422679901123},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.42952728271484375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33880871534347534},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14446678757667542},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3283772","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283772","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10145456.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:51aab411976e412b8a338d0ade84d204","is_oa":true,"landing_page_url":"https://doaj.org/article/51aab411976e412b8a338d0ade84d204","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 57674-57682 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3283772","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3283772","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/10005208/10145456.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4379618854.pdf","grobid_xml":"https://content.openalex.org/works/W4379618854.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W349236604","https://openalex.org/W2107860279","https://openalex.org/W2133665775","https://openalex.org/W2519091744","https://openalex.org/W2593414223","https://openalex.org/W2767052532","https://openalex.org/W2903739847","https://openalex.org/W2946200149","https://openalex.org/W2963300588","https://openalex.org/W2963609956","https://openalex.org/W2964167449","https://openalex.org/W2964243274","https://openalex.org/W2974381323","https://openalex.org/W2990384275","https://openalex.org/W3026874504","https://openalex.org/W3033411150","https://openalex.org/W3036167779","https://openalex.org/W3092028330","https://openalex.org/W3094002217","https://openalex.org/W3123097577","https://openalex.org/W3158762648","https://openalex.org/W3172148458","https://openalex.org/W3196308991","https://openalex.org/W3198710156","https://openalex.org/W4226063663","https://openalex.org/W4226334828","https://openalex.org/W4226376398","https://openalex.org/W4240592325","https://openalex.org/W4286950013","https://openalex.org/W4287250916","https://openalex.org/W4320930577","https://openalex.org/W4385245566","https://openalex.org/W6687506355","https://openalex.org/W6739901393","https://openalex.org/W6746023985","https://openalex.org/W6763832098","https://openalex.org/W6777694618","https://openalex.org/W6778823374","https://openalex.org/W6779823529","https://openalex.org/W6782760101","https://openalex.org/W6783867762","https://openalex.org/W6785090365","https://openalex.org/W6793578827","https://openalex.org/W6795261426","https://openalex.org/W6802142237","https://openalex.org/W6810926057","https://openalex.org/W6811291704"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2088854863","https://openalex.org/W2011227383","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W2065606036","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W2016904525"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"MixGAN-TTS,":[3],"an":[4],"efficient":[5],"and":[6,26,37,40,74,99,142],"stable":[7],"non-autoregressive":[8],"speech":[9],"synthesis":[10],"based":[11,21],"on":[12,22],"diffusion":[13,87],"model.":[14,88],"The":[15,114,150],"MixGAN-TTS":[16,131,158],"uses":[17],"a":[18],"linguistic":[19],"encoder":[20],"soft":[23],"phoneme-level":[24],"alignment":[25,29],"hard":[27],"word-level":[28,34],"approach":[30],"which":[31],"explicitly":[32],"extracts":[33],"semantic":[35],"information,":[36],"introduces":[38],"pitch":[39],"energy":[41],"predictors":[42],"to":[43,57,62,68,81,100,110],"optimally":[44],"predict":[45],"the":[46,50,55,59,64,70,76,83,96,104,119,123,130,133,155],"rhythmic":[47],"information":[48],"of":[49,78,86,106,139,157],"audio.":[51],"Specifically,":[52],"we":[53],"use":[54],"GAN":[56,92],"replace":[58],"Gaussian":[60],"function":[61],"model":[63,90],"denoising":[65,71,79,97,148],"distribution,":[66],"aiming":[67],"enlarge":[69],"steps":[72,80],"size":[73],"reduce":[75,95],"number":[77],"accelerate":[82],"sampling":[84],"speed":[85],"Diffusion":[89],"using":[91],"can":[93],"significantly":[94],"steps,":[98],"some":[101],"extent":[102],"solve":[103],"problem":[105],"not":[107],"being":[108],"able":[109],"apply":[111],"in":[112,137],"real-time.":[113],"mel-spectrogram":[115,143],"is":[116,159],"converted":[117],"into":[118],"final":[120],"audio":[121,140],"by":[122],"HiFi-GAN":[124],"vocoder.":[125],"Experimental":[126],"results":[127],"show":[128],"that":[129,154],"outperforms":[132],"other":[134],"models":[135],"compared":[136],"terms":[138],"quality":[141],"modeling":[144],"capability":[145],"for":[146],"4":[147],"steps.":[149],"ablation":[151],"studies":[152],"demonstrate":[153],"structure":[156],"effective.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
