{"id":"https://openalex.org/W4417183335","doi":"https://doi.org/10.48550/arxiv.2507.01926","title":"IC-Custom: Diverse Image Customization via In-Context Learning","display_name":"IC-Custom: Diverse Image Customization via In-Context Learning","publication_year":2025,"publication_date":"2025-07-02","ids":{"openalex":"https://openalex.org/W4417183335","doi":"https://doi.org/10.48550/arxiv.2507.01926"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.01926","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01926","pdf_url":"https://arxiv.org/pdf/2507.01926","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.01926","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034135008","display_name":"Yaowei Li","orcid":"https://orcid.org/0000-0003-0725-6108"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Yaowei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100449176","display_name":"Xiaoyu Li","orcid":"https://orcid.org/0000-0003-1616-4985"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100751311","display_name":"Zhaoyang Zhang","orcid":"https://orcid.org/0000-0003-2346-6228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhaoyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062922713","display_name":"Yuxuan Bian","orcid":"https://orcid.org/0000-0002-5846-417X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bian, Yuxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Liu, Gan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Gan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100663161","display_name":"Xinyuan Li","orcid":"https://orcid.org/0000-0002-1255-4604"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xinyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101925114","display_name":"Jiale Xu","orcid":"https://orcid.org/0000-0002-1806-1165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jiale","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001992062","display_name":"Wenbo Hu","orcid":"https://orcid.org/0000-0001-6082-4966"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Wenbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048086599","display_name":"Yating Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yating","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082484068","display_name":"Lingen Li","orcid":"https://orcid.org/0000-0002-1313-8717"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Lingen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015354791","display_name":"Jing Cai","orcid":"https://orcid.org/0000-0002-6792-4959"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002795838","display_name":"Yuexian Zou","orcid":"https://orcid.org/0000-0001-9999-6140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Yuexian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103325403","display_name":"Yancheng He","orcid":"https://orcid.org/0009-0003-5078-0447"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yancheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102004349","display_name":"Ying Shan","orcid":"https://orcid.org/0000-0001-7673-8325"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Ying","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5034135008"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.733299970626831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.733299970626831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.05620000138878822,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02710000053048134,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.8199999928474426},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6000000238418579},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4641000032424927},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.3693999946117401},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.35440000891685486},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.3046000003814697}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8205000162124634},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.8199999928474426},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6000000238418579},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.499099999666214},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4641000032424927},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.35440000891685486},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3483000099658966},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.31299999356269836},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3046000003814697},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3041999936103821},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2703999876976013},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.2587999999523163},{"id":"https://openalex.org/C2987933465","wikidata":"https://www.wikidata.org/wiki/Q141130","display_name":"Image manipulation","level":3,"score":0.257099986076355},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.25699999928474426},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2502000033855438}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.01926","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01926","pdf_url":"https://arxiv.org/pdf/2507.01926","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.01926","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.01926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.01926","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.01926","pdf_url":"https://arxiv.org/pdf/2507.01926","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image":[0],"customization,":[1,39],"a":[2,34,53,75,127],"crucial":[3],"technique":[4],"for":[5,37,82],"industrial":[6,152],"media":[7],"production,":[8],"aims":[9],"to":[10,74,104,108],"generate":[11],"content":[12],"that":[13,56,174],"is":[14],"consistent":[15],"with":[16,71,131],"reference":[17,69],"images.":[18],"However,":[19],"current":[20],"approaches":[21],"conventionally":[22],"separate":[23],"image":[24,62,156],"customization":[25,30,63],"into":[26],"position-aware":[27,59],"and":[28,32,60,100,113,134,158,168,182,197],"position-free":[29,61],"paradigms":[31],"lack":[33],"universal":[35],"framework":[36,55],"diverse":[38,111],"limiting":[40],"their":[41],"applications":[42],"across":[43,193],"various":[44,151],"scenarios.":[45],"To":[46,120],"overcome":[47],"these":[48],"limitations,":[49],"we":[50,125],"propose":[51,87],"IC-Custom,":[52],"unified":[54],"seamlessly":[57],"integrates":[58],"through":[64],"in-context":[65],"learning.":[66],"IC-Custom":[67,149,175,186],"concatenates":[68],"images":[70,73],"target":[72],"polyptych,":[76],"leveraging":[77],"DiT's":[78],"multi-modal":[79],"attention":[80],"mechanism":[81],"fine-grained":[83],"token-level":[84],"interactions.":[85],"We":[86],"the":[88,106,122,140,169,206],"In-context":[89],"Multi-Modal":[90],"Attention":[91],"(ICMA)":[92],"mechanism,":[93],"which":[94],"employs":[95],"learnable":[96],"task-oriented":[97],"register":[98],"tokens":[99],"boundary-aware":[101],"positional":[102],"embeddings":[103],"enable":[105],"model":[107,208],"effectively":[109],"handle":[110],"tasks":[112],"distinguish":[114],"between":[115],"inputs":[116],"in":[117],"polyptych":[118],"configurations.":[119],"address":[121],"data":[123],"gap,":[124],"curated":[126],"12K":[128],"identity-consistent":[129],"dataset":[130],"8K":[132],"real-world":[133],"4K":[135],"high-quality":[136],"synthetic":[137,147],"samples,":[138],"avoiding":[139],"overly":[141],"glossy,":[142],"oversaturated":[143],"look":[144],"typical":[145],"of":[146,205],"data.":[148],"supports":[150],"applications,":[153],"including":[154],"try-on,":[155],"insertion,":[157],"creative":[159],"IP":[160],"customization.":[161],"Extensive":[162],"evaluations":[163],"on":[164],"our":[165],"proposed":[166],"ProductBench":[167],"publicly":[170],"available":[171],"DreamBench":[172],"demonstrate":[173],"significantly":[176],"outperforms":[177],"community":[178],"workflows,":[179],"closed-source":[180],"models,":[181],"state-of-the-art":[183],"open-source":[184],"approaches.":[185],"achieves":[187],"about":[188],"73\\%":[189],"higher":[190],"human":[191],"preference":[192],"identity":[194],"consistency,":[195],"harmony,":[196],"text":[198],"alignment":[199],"metrics,":[200],"while":[201],"training":[202],"only":[203],"0.4\\%":[204],"original":[207],"parameters.":[209],"Project":[210],"page:":[211],"https://liyaowei-stu.github.io/project/IC_Custom":[212]},"counts_by_year":[],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
