{"id":"https://openalex.org/W4378499140","doi":"https://doi.org/10.48550/arxiv.2305.15719","title":"Efficient Neural Music Generation","display_name":"Efficient Neural Music Generation","publication_year":2023,"publication_date":"2023-05-25","ids":{"openalex":"https://openalex.org/W4378499140","doi":"https://doi.org/10.48550/arxiv.2305.15719"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2305.15719","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.15719","pdf_url":"https://arxiv.org/pdf/2305.15719","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.15719","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102313170","display_name":"Max W. Y. Lam","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lam, Max W. Y.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103162279","display_name":"Qiao Tian","orcid":"https://orcid.org/0000-0002-4078-1273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Qiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329338","display_name":"Tang Li","orcid":"https://orcid.org/0000-0003-0123-1364"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101206771","display_name":"Zongyu Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Zongyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053373302","display_name":"Siyuan Feng","orcid":"https://orcid.org/0000-0003-2531-8480"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Siyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102334100","display_name":"Ming Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Ming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104247077","display_name":"Yuliang Ji","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Yuliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100605555","display_name":"Rui Xia","orcid":"https://orcid.org/0000-0003-3605-509X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101022759","display_name":"Mingbo Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Mingbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104098658","display_name":"Xuchen Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Xuchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046063537","display_name":"Jitong Chen","orcid":"https://orcid.org/0000-0001-6084-043X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jitong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100339128","display_name":"Yu-Ping Wang","orcid":"https://orcid.org/0000-0003-4129-7704"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100375986","display_name":"Yuxuan Wang","orcid":"https://orcid.org/0000-0002-5743-2029"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuxuan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5102313170"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7777693271636963},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5492253303527832},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.492183119058609},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.46169981360435486},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.42442643642425537},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.41543489694595337},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4108520746231079},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36221981048583984},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2381855845451355},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.09941971302032471}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7777693271636963},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5492253303527832},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.492183119058609},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.46169981360435486},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.42442643642425537},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.41543489694595337},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4108520746231079},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36221981048583984},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2381855845451355},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.09941971302032471},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2305.15719","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.15719","pdf_url":"https://arxiv.org/pdf/2305.15719","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2305.15719","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2305.15719","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.15719","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.15719","pdf_url":"https://arxiv.org/pdf/2305.15719","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4378499140.pdf","grobid_xml":"https://content.openalex.org/works/W4378499140.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1974895211","https://openalex.org/W2176409448","https://openalex.org/W2129841057","https://openalex.org/W3040712279","https://openalex.org/W2364769705","https://openalex.org/W2056136368","https://openalex.org/W2374664672","https://openalex.org/W4367555392","https://openalex.org/W2883092465","https://openalex.org/W2114441484"],"abstract_inverted_index":{"Recent":[0],"progress":[1],"in":[2,105,182,195],"music":[3,59,93],"generation":[4,60],"has":[5],"been":[6],"remarkably":[7],"advanced":[8],"by":[9,40,156],"the":[10,32,44,116,139,151,158,176],"state-of-the-art":[11,96,197],"MusicLM,":[12,106],"which":[13],"comprises":[14],"a":[15,55,62,69,126],"hierarchy":[16],"of":[17,95,163,178],"three":[18],"LMs,":[19],"respectively,":[20,107],"for":[21,54,79,82,85,108,121],"semantic,":[22],"coarse":[23,152],"acoustic,":[24],"and":[25,52,124,132,153,189,201],"fine":[26,154],"acoustic":[27,46],"modelings.":[28],"Yet,":[29],"sampling":[30,109,187],"with":[31,61,66],"MusicLM":[33,67,120],"requires":[34],"processing":[35],"through":[36],"these":[37],"LMs":[38],"one":[39,41],"to":[42,136,148],"obtain":[43],"fine-grained":[45],"tokens,":[47],"making":[48],"it":[49],"computationally":[50],"expensive":[51],"prohibitive":[53],"real-time":[56],"generation.":[57],"Efficient":[58],"quality":[63,97],"on":[64,186],"par":[65],"remains":[68],"significant":[70],"challenge.":[71],"In":[72],"this":[73],"paper,":[74],"we":[75],"present":[76],"MeLoDy":[77,114],"(M":[78],"music;":[80],"L":[81],"LM;":[83],"D":[84],"diffusion),":[86],"an":[87,133],"LM-guided":[88],"diffusion":[89,129],"model":[90,131,150],"that":[91],"generates":[92],"audios":[94],"meanwhile":[98],"reducing":[99],"95.7%":[100],"or":[101,111],"99.6%":[102],"forward":[103],"passes":[104],"10s":[110],"30s":[112],"music.":[113],"inherits":[115],"highest-level":[117],"LM":[118],"from":[119],"semantic":[122,141,159],"modeling,":[123],"applies":[125],"novel":[127],"dual-path":[128],"(DPD)":[130],"audio":[134,199],"VAE-GAN":[135],"efficiently":[137],"decode":[138],"conditioning":[140],"tokens":[142],"into":[143,161],"waveform.":[144],"DPD":[145],"is":[146],"proposed":[147],"simultaneously":[149],"acoustics":[155],"incorporating":[157],"information":[160],"segments":[162],"latents":[164],"effectively":[165],"via":[166],"cross-attention":[167],"at":[168,208],"each":[169],"denoising":[170],"step.":[171],"Our":[172,204],"experimental":[173],"results":[174],"suggest":[175],"superiority":[177],"MeLoDy,":[179],"not":[180],"only":[181],"its":[183,196],"practical":[184],"advantages":[185],"speed":[188],"infinitely":[190],"continuable":[191],"generation,":[192],"but":[193],"also":[194],"musicality,":[198],"quality,":[200],"text":[202],"correlation.":[203],"samples":[205],"are":[206],"available":[207],"https://Efficient-MeLoDy.github.io/.":[209]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":6}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
