{"id":"https://openalex.org/W4413145494","doi":"https://doi.org/10.1109/cvpr52734.2025.01468","title":"Autoregressive Distillation of Diffusion Transformers","display_name":"Autoregressive Distillation of Diffusion Transformers","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413145494","doi":"https://doi.org/10.1109/cvpr52734.2025.01468"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.01468","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01468","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101798670","display_name":"Yeongmin Kim","orcid":"https://orcid.org/0000-0001-5239-4070"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yeongmin Kim","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030953545","display_name":"Sotiris Anagnostidis","orcid":"https://orcid.org/0000-0001-8012-3331"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sotiris Anagnostidis","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031074685","display_name":"Yuming Du","orcid":"https://orcid.org/0009-0001-2132-8027"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuming Du","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109715046","display_name":"Edgar Sch\u00f6nfeld","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Edgar Sch\u00f6nfeld","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089301599","display_name":"Jonas K\u00f6hler","orcid":"https://orcid.org/0000-0002-7256-2892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jonas Kohler","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074117139","display_name":"Markos Georgopoulos","orcid":"https://orcid.org/0000-0001-5928-515X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Markos Georgopoulos","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023922146","display_name":"Albert Pumarola","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Albert Pumarola","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026143507","display_name":"Ali Thabet","orcid":"https://orcid.org/0000-0001-7513-0748"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali Thabet","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053620423","display_name":"Artsiom Sanakoyeu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Artsiom Sanakoyeu","raw_affiliation_strings":["Meta GenAI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta GenAI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101798670"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.1903,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94274266,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"15745","last_page":"15756"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7416999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7416999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.558525025844574},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5123013257980347},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4921678900718689},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.4440971612930298},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.41249802708625793},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.1972387135028839},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.1941242218017578},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.19012156128883362},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1760733723640442},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17269009351730347},{"id":"https://openalex.org/keywords/thermodynamics","display_name":"Thermodynamics","score":0.16665419936180115},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.16309991478919983},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1030532717704773},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.0891149640083313}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.558525025844574},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5123013257980347},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4921678900718689},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.4440971612930298},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.41249802708625793},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.1972387135028839},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.1941242218017578},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.19012156128883362},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1760733723640442},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17269009351730347},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.16665419936180115},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.16309991478919983},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1030532717704773},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0891149640083313}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.01468","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.01468","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345"],"abstract_inverted_index":{"Diffusion":[0],"models":[1,232],"with":[2,237],"transformer":[3,138,169],"architectures":[4],"have":[5,48],"demonstrated":[6],"promising":[7],"capabilities":[8],"in":[9,167,181,196,220,233,241],"generating":[10],"high-fidelity":[11],"images":[12],"and":[13,115,153,173,187,224],"scalability":[14],"for":[15,23,160],"high":[16],"resolution.":[17],"However,":[18],"iterative":[19],"sampling":[20],"process":[21],"required":[22],"synthesis":[24],"is":[25,109],"very":[26],"resource-intensive.":[27],"A":[28],"line":[29],"of":[30,85,122,131,179,216],"work":[31],"has":[32],"focused":[33],"on":[34,54,185,210,218],"distilling":[35],"solutions":[36],"to":[37,65,88,112,145,200,244],"probability":[38],"flow":[39],"ODEs":[40],"into":[41],"few-step":[42],"student":[43],"models.":[44],"Nevertheless,":[45],"existing":[46],"methods":[47,203],"been":[49],"limited":[50],"by":[51,102,140],"their":[52],"reliance":[53],"the":[55,82,86,119,123,136,150,177,201,226,245],"most":[56],"recent":[57],"denoised":[58],"samples":[59],"as":[60,126],"input,":[61],"rendering":[62],"them":[63],"susceptible":[64,111],"exposure":[66,100],"bias.":[67],"To":[68],"address":[69],"this":[70],"limitation,":[71],"we":[72],"propose":[73],"AutoRegressive":[74],"Distillation":[75],"(ARD),":[76],"a":[77,104,127,155,182,193,238],"novel":[78],"approach":[79],"that":[80,108],"leverages":[81,118],"historical":[83,106,164],"trajectory":[84,107,125,151],"ODE":[87,124],"predict":[89],"future":[90],"steps.":[91],"ARD":[92,134,180,213],"offers":[93],"two":[94],"key":[95],"benefits:":[96],"1)":[97],"it":[98,117],"mitigates":[99],"bias":[101],"utilizing":[103],"predicted":[105],"less":[110],"accumulated":[113],"errors,":[114],"2)":[116],"previous":[120],"history":[121,152],"more":[128],"effective":[129],"source":[130],"coarse-grained":[132],"information.":[133],"modifies":[135],"teacher":[137],"architecture":[139],"adding":[141],"token-wise":[142],"time":[143],"embedding":[144],"mark":[146],"each":[147],"input":[148],"from":[149],"employs":[154],"block-wise":[156],"causal":[157],"attention":[158],"mask":[159],"training.":[161],"Furthermore,":[162],"incorporating":[163],"inputs":[165],"only":[166,206],"lower":[168],"layers":[170],"enhances":[171],"performance":[172],"efficiency.":[174],"We":[175],"validate":[176],"effectiveness":[178],"class-conditioned":[183],"generation":[184],"ImageNet":[186],"T2I":[188],"synthesis.":[189],"Our":[190],"model":[191],"achieves":[192],"5\u00d7":[194],"reduction":[195],"FID":[197,215,242],"degradation":[198],"compared":[199,243],"baseline":[202],"while":[204],"requiring":[205],"1.1%":[207],"extra":[208],"FLOPs":[209],"ImageNet-256.":[211],"Moreover,":[212],"reaches":[214],"1.84":[217],"ImageNet-256":[219],"merely":[221],"4":[222],"steps":[223],"outperforms":[225],"publicly":[227],"available":[228],"1024p":[229],"text-to-image":[230],"distilled":[231],"prompt":[234],"adherence":[235],"score":[236],"minimal":[239],"drop":[240],"teacher.":[246],"Project":[247],"page:":[248],"https://github.com/alsdudrla10/ARD.":[249]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
