{"id":"https://openalex.org/W4404987930","doi":"https://doi.org/10.48550/arxiv.2411.16657","title":"DreamRunner: Fine-Grained Compositional Story-to-Video Generation with Retrieval-Augmented Motion Adaptation","display_name":"DreamRunner: Fine-Grained Compositional Story-to-Video Generation with Retrieval-Augmented Motion Adaptation","publication_year":2024,"publication_date":"2024-11-25","ids":{"openalex":"https://openalex.org/W4404987930","doi":"https://doi.org/10.48550/arxiv.2411.16657"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.16657","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.16657","pdf_url":"https://arxiv.org/pdf/2411.16657","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.16657","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101452807","display_name":"Zun Wang","orcid":"https://orcid.org/0009-0006-1478-5963"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Zun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108050360","display_name":"Jialu Li","orcid":"https://orcid.org/0000-0002-6411-6876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jialu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101549959","display_name":"Han Lin","orcid":"https://orcid.org/0000-0002-3446-5098"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054456399","display_name":"Jaehong Yoon","orcid":"https://orcid.org/0000-0002-9653-9590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoon, Jaehong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5112364306","display_name":"Mohit Bansal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bansal, Mohit","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101452807"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/storytelling","display_name":"Storytelling","score":0.8152949810028076},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.7619770765304565},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.6128374934196472},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6070916652679443},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.392387330532074},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3442443013191223},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14530596137046814},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.08141306042671204},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.06771707534790039},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.05410224199295044}],"concepts":[{"id":"https://openalex.org/C2776538412","wikidata":"https://www.wikidata.org/wiki/Q989963","display_name":"Storytelling","level":3,"score":0.8152949810028076},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.7619770765304565},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.6128374934196472},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6070916652679443},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.392387330532074},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3442443013191223},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14530596137046814},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.08141306042671204},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.06771707534790039},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.05410224199295044},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2411.16657","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.16657","pdf_url":"https://arxiv.org/pdf/2411.16657","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2411.16657","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.16657","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.16657","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.16657","pdf_url":"https://arxiv.org/pdf/2411.16657","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Storytelling":[0],"video":[1],"generation":[2,84,138],"(SVG)":[3],"aims":[4],"to":[5,26,98,117,209],"produce":[6],"coherent":[7,61],"and":[8,38,66,71,155,164,184],"visually":[9],"rich":[10],"multi-scene":[11],"videos":[12,48,141],"that":[13],"follow":[14],"a":[15,28,81,93,149],"structured":[16],"narrative.":[17],"Existing":[18],"methods":[19],"primarily":[20],"employ":[21],"LLM":[22],"for":[23,122,160],"high-level":[24],"planning":[25,103],"decompose":[27],"story":[29],"into":[30],"scene-level":[31],"descriptions,":[32],"which":[33],"are":[34],"then":[35],"independently":[36],"generated":[37],"stitched":[39],"together.":[40],"However,":[41],"these":[42,76],"approaches":[43],"struggle":[44],"with":[45,50,142,172,213],"generating":[46],"high-quality":[47],"aligned":[49],"the":[51,89,137],"complex":[52,58,68],"single-scene":[53],"description,":[54],"as":[55,104,106],"visualizing":[56],"such":[57],"description":[59],"involves":[60],"composition":[62],"of":[63,139],"multiple":[64],"characters":[65],"events,":[67],"motion":[69,120,129],"synthesis":[70],"multi-character":[72],"customization.":[73],"To":[74],"address":[75],"challenges,":[77],"we":[78,87,147,204],"propose":[79,148],"DREAMRUNNER,":[80],"novel":[82,150],"story-to-video":[83],"method:":[85],"First,":[86],"structure":[88],"input":[90],"script":[91],"using":[92],"large":[94],"language":[95],"model":[96],"(LLM)":[97],"facilitate":[99],"both":[100],"coarse-grained":[101],"scene":[102],"well":[105],"fine-grained":[107,161,191],"object-level":[108],"layout":[109],"planning.":[110],"Next,":[111],"DREAMRUNNER":[112,171,188],"presents":[113],"retrieval-augmented":[114],"test-time":[115],"adaptation":[116],"capture":[118],"target":[119],"priors":[121],"objects":[123],"in":[124,179,194],"each":[125],"scene,":[126],"supporting":[127],"diverse":[128],"customization":[130],"based":[131],"on":[132,201],"retrieved":[133],"videos,":[134],"thus":[135],"facilitating":[136],"new":[140],"complex,":[143],"scripted":[144],"motions.":[145],"Lastly,":[146],"spatial-temporal":[151,166],"region-based":[152],"3D":[153],"attention":[154],"prior":[156],"injection":[157],"module":[158],"SR3AI":[159],"object-motion":[162],"binding":[163],"frame-by-frame":[165],"semantic":[167],"control.":[168],"We":[169],"compare":[170],"various":[173],"SVG":[174],"baselines,":[175],"demonstrating":[176],"state-of-the-art":[177],"performance":[178],"character":[180],"consistency,":[181],"text":[182],"alignment,":[183],"smooth":[185],"transitions.":[186],"Additionally,":[187],"exhibits":[189],"strong":[190],"condition-following":[192],"ability":[193,208],"compositional":[195],"text-to-video":[196],"generation,":[197],"significantly":[198],"outperforming":[199],"baselines":[200],"T2V-ComBench.":[202],"Finally,":[203],"validate":[205],"DREAMRUNNER's":[206],"robust":[207],"generate":[210],"multi-object":[211],"interactions":[212],"qualitative":[214],"examples.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2024-12-04T00:00:00"}
