{"id":"https://openalex.org/W4389116633","doi":"https://doi.org/10.48550/arxiv.2311.16090","title":"Self-correcting LLM-controlled Diffusion Models","display_name":"Self-correcting LLM-controlled Diffusion Models","publication_year":2023,"publication_date":"2023-11-27","ids":{"openalex":"https://openalex.org/W4389116633","doi":"https://doi.org/10.48550/arxiv.2311.16090"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.16090","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.16090","pdf_url":"https://arxiv.org/pdf/2311.16090","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.16090","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101804850","display_name":"Tsung-Han Wu","orcid":"https://orcid.org/0000-0002-5071-0583"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Tsung-Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114232525","display_name":"Long Lian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lian, Long","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072427753","display_name":"Joseph E. Gonzalez","orcid":"https://orcid.org/0000-0003-2921-956X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gonzalez, Joseph E.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114686206","display_name":"Boyi Li","orcid":"https://orcid.org/0000-0002-6752-3223"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Boyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5029105520","display_name":"Trevor Darrell","orcid":"https://orcid.org/0000-0001-5453-8533"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Darrell, Trevor","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101804850"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9585999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9585999846458435,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.780624270439148},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6928697228431702},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5596886873245239},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.5109061598777771},{"id":"https://openalex.org/keywords/anisotropic-diffusion","display_name":"Anisotropic diffusion","score":0.44905638694763184},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4338317811489105},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.4181049168109894},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4092519283294678},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3684154748916626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3603839874267578},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33797192573547363},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3207305073738098}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.780624270439148},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6928697228431702},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5596886873245239},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.5109061598777771},{"id":"https://openalex.org/C203504353","wikidata":"https://www.wikidata.org/wiki/Q4765461","display_name":"Anisotropic diffusion","level":3,"score":0.44905638694763184},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4338317811489105},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.4181049168109894},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4092519283294678},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3684154748916626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3603839874267578},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33797192573547363},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3207305073738098},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.16090","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.16090","pdf_url":"https://arxiv.org/pdf/2311.16090","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2311.16090","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.16090","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.16090","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.16090","pdf_url":"https://arxiv.org/pdf/2311.16090","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8799999952316284,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389116633.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W4238433571","https://openalex.org/W3174044702","https://openalex.org/W2967848559","https://openalex.org/W4299831724"],"abstract_inverted_index":{"Text-to-image":[0],"generation":[1,92,176],"has":[2],"witnessed":[3],"significant":[4],"progress":[5],"with":[6,46,71,115],"the":[7,13,65,72,78,81,101,128,160,163,172],"advent":[8],"of":[9,130,144],"diffusion":[10,21,116,132],"models.":[11,133],"Despite":[12],"ability":[14],"to":[15,26,37,42,125,162],"generate":[16,43],"photorealistic":[17],"images,":[18],"current":[19],"text-to-image":[20,91,175],"models":[22,39,117],"still":[23],"often":[24],"struggle":[25],"accurately":[27],"interpret":[28],"and":[29,74,153,177,190],"follow":[30],"complex":[31],"input":[32,66],"text":[33],"prompts.":[34],"In":[35],"contrast":[36],"existing":[38],"that":[40,60,137],"aim":[41],"images":[44],"only":[45,107],"their":[47],"best":[48],"effort,":[49],"we":[50],"introduce":[51],"Self-correcting":[52],"LLM-controlled":[53],"Diffusion":[54],"(SLD).":[55],"SLD":[56,89,104,165],"is":[57,105],"a":[58,142],"framework":[59],"generates":[61],"an":[62,86,94],"image":[63,168,178],"from":[64],"prompt,":[67,73],"assesses":[68],"its":[69],"alignment":[70],"performs":[75],"self-corrections":[76],"on":[77],"inaccuracies":[79],"in":[80,100,148],"generated":[82],"image.":[83,103],"Steered":[84],"by":[85,157],"LLM":[87],"controller,":[88],"turns":[90],"into":[93],"iterative":[95],"closed-loop":[96],"process,":[97],"ensuring":[98],"correctness":[99],"resulting":[102],"not":[106],"training-free":[108],"but":[109],"can":[110,140,166],"also":[111],"be":[112],"seamlessly":[113],"integrated":[114],"behind":[118],"API":[119],"access,":[120],"such":[121],"as":[122],"DALL-E":[123],"3,":[124],"further":[126],"boost":[127],"performance":[129],"state-of-the-art":[131],"Experimental":[134],"results":[135],"show":[136],"our":[138,184],"approach":[139],"rectify":[141],"majority":[143],"incorrect":[145],"generations,":[146],"particularly":[147],"generative":[149],"numeracy,":[150],"attribute":[151],"binding,":[152],"spatial":[154],"relationships.":[155],"Furthermore,":[156],"simply":[158],"adjusting":[159],"instructions":[161],"LLM,":[164],"perform":[167],"editing":[169,179],"tasks,":[170],"bridging":[171],"gap":[173],"between":[174],"pipelines.":[180],"We":[181],"will":[182],"make":[183],"code":[185],"available":[186],"for":[187],"future":[188],"research":[189],"applications.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
