{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:15:26Z","timestamp":1777655726261,"version":"3.51.4"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T00:00:00Z","timestamp":1743811200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,5]],"date-time":"2025-04-05T00:00:00Z","timestamp":1743811200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Front. Comput. Sci."],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s11704-025-41244-0","type":"journal-article","created":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T01:21:23Z","timestamp":1743902483000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["CCA: collaborative competitive agents for image editing"],"prefix":"10.1007","volume":"19","author":[{"given":"Tiankai","family":"Hang","sequence":"first","affiliation":[]},{"given":"Shuyang","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Dong","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Geng","sequence":"additional","affiliation":[]},{"given":"Baining","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,5]]},"reference":[{"key":"41244_CR1","volume-title":"Gpt-4v(ision) system card","author":"OpenAI","year":"2023","unstructured":"OpenAI. Gpt-4v(ision) system card. See api.semanticscholar.org\/ CorpusID:263218031 website, 2023"},{"key":"41244_CR2","unstructured":"OpenAI, Achiam J, Adler S, Agarwal S, Ahmad L, et al. Gpt-4 technical report. 2023, arXiv preprint arXiv: 2303.08774"},{"key":"41244_CR3","first-page":"2011","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright C L, Mishkin P, Zhang C, Agarwal S, Slama K, Ray A, Schulman J, Hilton J, Kelton F, Miller L, Simens M, Askell A, Welinder P, Christiiano P, Leike J, Lowe R. Training language models to follow instructions with human feedback. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 2011"},{"key":"41244_CR4","unstructured":"Touvron H, Lavril T, Izacard G, Martinet X, Lachaux M A, Lacroix T, Rozi\u00e8re B, Goyal N, Hambro E, Azhar F, Rodriguez A, Joulin A, Grave E, Lample G. LLaMA: open and efficient foundation language models. 2023, arXiv preprint arXiv: 2302.13971"},{"key":"41244_CR5","unstructured":"Touvron H, Martin L, Stone K, Albert P, Almahairi A, et al. Llama 2: open foundation and fine-tuned chat models. 2023, arXiv preprint arXiv: 2307.09288"},{"key":"41244_CR6","first-page":"1508","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"S Yao","year":"2022","unstructured":"Yao S, Chen H, Yang J, Narasimhan K. WebShop: towards scalable real-world web interaction with grounded language agents. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 1508"},{"key":"41244_CR7","doi-asserted-by":"crossref","unstructured":"Qian C, Cong X, Liu W, Yang C, Chen W, Su Y, Dang Y, Li J, Xu J, Li D, Liu Z, Sun M. Communicative agents for software development. 2023, arXiv preprint arXiv:2307.07924","DOI":"10.18653\/v1\/2024.acl-long.810"},{"key":"41244_CR8","unstructured":"Swan M, Kido T, Roland E, dos Santos R P. Math agents: computational infrastructure, mathematical embedding, and genomics. 2023, arXiv preprint arXiv: 2307.02502"},{"key":"41244_CR9","unstructured":"Kalvakurthi V, Varde A S, Jenq J. Hey Dona! Can you help me with student course registration? 2023, arXiv preprint arXiv: 2303.13548"},{"key":"41244_CR10","first-page":"2","volume-title":"Proceedings of the 36th Annual ACM Symposium on User Interface Software and Technology","author":"J S Park","year":"2023","unstructured":"Park J S, O\u2019Brien J, Cai C J, Morris M R, Liang P, Bernstein M S. Generative agents: interactive simulacra of human behavior. In: Proceedings of the 36th Annual ACM Symposium on User Interface Software and Technology. 2023, 2"},{"key":"41244_CR11","first-page":"2672","volume-title":"Proceedings of the 28th International Conference on Neural Information Processing Systems","author":"I J Goodfellow","year":"2014","unstructured":"Goodfellow I J, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y. Generative adversarial nets. In: Proceedings of the 28th International Conference on Neural Information Processing Systems. 2014, 2672\u20132680"},{"key":"41244_CR12","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"A Brock","year":"2019","unstructured":"Brock A, Donahue J, Simonyan K. Large scale GAN training for high fidelity natural image synthesis. In: Proceedings of the 7th International Conference on Learning Representations. 2019"},{"key":"41244_CR13","first-page":"4401","volume-title":"Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"T Karras","year":"2019","unstructured":"Karras T, Laine S, Aila T. A style-based generator architecture for generative adversarial networks. In: Proceedings of 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2019, 4401\u20134410"},{"key":"41244_CR14","volume-title":"Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"T Karras","year":"2020","unstructured":"Karras T, Laine S, Aittala M, Hellsten J, Lehtinen J, Aila T. Analyzing and improving the image quality of StyleGAN. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020"},{"key":"41244_CR15","first-page":"574","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"J Ho","year":"2020","unstructured":"Ho J, Jain A, Abbeel P. Denoising diffusion probabilistic models. In: Proceedings of the 34th International Conference on Neural Information Processing Systems. 2020, 574"},{"key":"41244_CR16","volume-title":"Proceedings of the 9th International Conference on Learning Representations","author":"Y Song","year":"2021","unstructured":"Song Y, Sohl-Dickstein J, Kingma D P, Kumar A, Ermon S, Poole B. Score-based generative modeling through stochastic differential equations. In: Proceedings of the 9th International Conference on Learning Representations. 2021"},{"key":"41244_CR17","first-page":"672","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems","author":"P Dhariwal","year":"2021","unstructured":"Dhariwal P, Nichol A. Diffusion models beat GANs on image synthesis. In: Proceedings of the 35th International Conference on Neural Information Processing Systems. 2021, 672"},{"key":"41244_CR18","first-page":"1926","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"T Karras","year":"2022","unstructured":"Karras T, Aittala M, Aila T, Laine S. Elucidating the design space of diffusion-based generative models. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 1926"},{"key":"41244_CR19","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"D Podell","year":"2024","unstructured":"Podell D, English Z, Lacey K, Blattmann A, Dockhorn T, M\u00fcller J, Penna J, Rombach R. SDXL: improving latent diffusion models for high-resolution image synthesis. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"41244_CR20","first-page":"8162","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"A Q Nichol","year":"2021","unstructured":"Nichol A Q, Dhariwal P. Improved denoising diffusion probabilistic models. In: Proceedings of the 38th International Conference on Machine Learning. 2021, 8162\u20138171"},{"key":"41244_CR21","unstructured":"Hang T, Gu S, Geng X, Guo B. Improved noise schedule for diffusion training. 2024, arXiv preprint arXiv: 2407.03297"},{"key":"41244_CR22","volume-title":"Proceedings of the 38th Annual Conference on Neural Information Processing Systems","author":"T Wang","year":"2024","unstructured":"Wang T, Yang Q, Wang R, Sun D, Li J, Chen Y, Hu Y, Yang C, Kimura T, Kara D, Abdelzaher T F. Fine-grained control of generative data augmentation in IoT sensing. In: Proceedings of the 38th Annual Conference on Neural Information Processing Systems. 2024"},{"key":"41244_CR23","first-page":"18392","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"T Brooks","year":"2023","unstructured":"Brooks T, Holynski A, Efros A A. InstructPix2Pix: learning to follow image editing instructions. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 18392\u201318402"},{"key":"41244_CR24","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"A Hertz","year":"2023","unstructured":"Hertz A, Mokady R, Tenenbaum J, Aberman K, Pritch Y, Cohen-Or D. Prompt-to-prompt image editing with cross-attention control. In: Proceedings of the 11th International Conference on Learning Representations. 2023"},{"key":"41244_CR25","volume-title":"Proceedings of the 10th International Conference on Learning Representations","author":"C Meng","year":"2022","unstructured":"Meng C, He Y, Song Y, Song J, Wu J, Zhu J Y, Ermon S. SDEdit: guided image synthesis and editing with stochastic differential equations. In: Proceedings of the 10th International Conference on Learning Representations. 2022"},{"key":"41244_CR26","volume-title":"Reinforcement Learning: An Introduction","author":"R S Sutton","year":"2018","unstructured":"Sutton R S, Barto A G. Reinforcement Learning: An Introduction. 2nd ed. Cambridge: MIT Press, 2018","edition":"2nd ed"},{"issue":"2","key":"41244_CR27","doi-asserted-by":"publisher","first-page":"121101","DOI":"10.1007\/s11432-024-4222-0","volume":"68","author":"Z Xi","year":"2025","unstructured":"Xi Z, Chen W, Guo X, He W, Ding Y, et al. The rise and potential of large language model based agents: a survey. Science China Information Sciences, 2025, 68(2): 121101","journal-title":"Science China Information Sciences"},{"key":"41244_CR28","volume-title":"LLM powered autonomous agents","author":"L Weng","year":"2023","unstructured":"Weng L. LLM powered autonomous agents. See Lilianweng.github.io website, 2023"},{"key":"41244_CR29","volume-title":"Proceedings of the 25th International Society for Music Information Retrieval Conference","author":"Q Deng","year":"2024","unstructured":"Deng Q, Yang Q, Yuan R, Huang Y, Wang Y, Liu X, Tian Z, Pan J, Zhang G, Lin H, Li Y, Ma Y, Fu J, Lin C, Benetos E, Wang W, Xia G, Xue W, Guo Y. ComposerX: multi-agent symbolic music composition with LLMs. In: Proceedings of the 25th International Society for Music Information Retrieval Conference. 2024"},{"key":"41244_CR30","volume-title":"Proceedings of the 36th Annual Conference on Neural Information Processing Systems","author":"T Schick","year":"2023","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ec R, Raileanu R, Lomeli M, Hambro E, Zettlemoyer L, Cancedda N, Scialom T. Toolformer: language models can teach themselves to use tools. In: Proceedings of the 36th Annual Conference on Neural Information Processing Systems. 2023"},{"issue":"5","key":"41244_CR31","doi-asserted-by":"publisher","first-page":"185347","DOI":"10.1007\/s11704-024-40013-9","volume":"18","author":"Y Wu","year":"2024","unstructured":"Wu Y, Yang X. A glance at in-context learning. Frontiers of Computer Science, 2024, 18(5): 185347","journal-title":"Frontiers of Computer Science"},{"key":"41244_CR32","first-page":"1800","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"J Wei","year":"2022","unstructured":"Wei J, Wang X, Schuurmans D, Bosma M, Ichter B, Xia F, Chi E H, Le Q V, Zhou D. Chain-of-thought prompting elicits reasoning in large language models. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 1800"},{"key":"41244_CR33","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"S Yao","year":"2023","unstructured":"Yao S, Zhao J, Yu D, Du N, Shafran I, Narasimhan K R, Cao Y. ReAct: synergizing reasoning and acting in language models. In: Proceedings of the 11th International Conference on Learning Representations. 2023"},{"key":"41244_CR34","first-page":"2019","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"A Madaan","year":"2023","unstructured":"Madaan A, Tandon N, Gupta P, Hallinan S, Gao L, Wiegreffe S, Alon U, Dziri N, Prabhumoye S, Yang Y, Gupta S, Majumder B P, Hermann K, Welleck S, Yazdanbakhsh A, Clark P. SELF-REFINE: iterative refinement with self-feedback. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2023, 2019"},{"key":"41244_CR35","volume-title":"Proceedings of the 18th European Conference on Computer Vision","author":"Z Yang","year":"2025","unstructured":"Yang Z, Wang J, Li L, Lin K, Lin C C, Liu Z, Wang L. Idea2img: iterative self-refinement with GPT-4V for automatic image design and generation. In: Proceedings of the 18th European Conference on Computer Vision. 2025"},{"key":"41244_CR36","volume-title":"Proceedings of the 37th Annual Conference on Neural Information Processing Systems","author":"Y Shen","year":"2023","unstructured":"Shen Y, Song K, Tan X, Li D, Lu W, Zhuang Y. HuggingGPT: solving AI tasks with ChatGPT and its friends in hugging face. In: Proceedings of the 37th Annual Conference on Neural Information Processing Systems. 2023"},{"key":"41244_CR37","first-page":"340","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"D Driess","year":"2023","unstructured":"Driess D, Xia F, Sajjadi M S M, Lynch C, Chowdhery A, Ichter B, Wahid A, Tompson J, Vuong Q, Yu T, Huang W, Chebotar Y, Sermanet P, Duckworth D, Levine S, Vanhoucke V, Hausman K, Toussaint M, Greff K, Zeng A, Mordatch I, Florence P. PaLM-E: an embodied multimodal language model. In: Proceedings of the 40th International Conference on Machine Learning. 2023, 340"},{"key":"41244_CR38","first-page":"2264","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"G Li","year":"2023","unstructured":"Li G, Hammoud H A A K, Itani H, Khizbullin D, Ghanem B. CAMEL: communicative agents for \u201cmind\u201d exploration of large language model society. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. 2023, 2264"},{"key":"41244_CR39","unstructured":"Chen W, Su Y, Zuo J, Yang C, Yuan C, Chan C M, Qin Y, Lu Y, Hung Y H, Qian C, Qin Y, Cong X, Xie R, Liu Z, Sun M, Zhou J. AgentVerse: facilitating multi-agent collaboration and exploring emergent behaviors. 2023, arXiv preprint arXiv: 2308.10848"},{"key":"41244_CR40","volume-title":"Proceedings of the 12th International Conference on Learning Representations","author":"C M Chan","year":"2024","unstructured":"Chan C M, Chen W, Su Y, Yu J, Xue W, Zhang S, Fu J, Liu Z. ChatEval: towards better LLM-based evaluators through multi-agent debate. In: Proceedings of the 12th International Conference on Learning Representations. 2024"},{"key":"41244_CR41","volume-title":"Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Z Geng","year":"2024","unstructured":"Geng Z, Yang B, Hang T, Li C, Gu S, Zhang T, Bao J, Zhang Z, Li H, Hu H, Chen D, Guo B. InstructDiffusion: a generalist modeling interface for vision tasks. In: Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024"},{"key":"41244_CR42","doi-asserted-by":"publisher","first-page":"9216","DOI":"10.1109\/TMM.2023.3248143","volume":"25","author":"T Hang","year":"2023","unstructured":"Hang T, Yang H, Liu B, Fu J, Geng X, Guo B. Language-guided face animation by recurrent styleGAN-based generator. IEEE Transactions on Multimedia, 2023, 25: 9216\u20139227","journal-title":"IEEE Transactions on Multimedia"},{"key":"41244_CR43","first-page":"6038","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"R Mokady","year":"2023","unstructured":"Mokady R, Hertz A, Aberman K, Pritch Y, Cohen-Or D. Null-text inversion for editing real images using guided diffusion models. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 6038\u20136047"},{"key":"41244_CR44","first-page":"694","volume-title":"Proceedings of the 14th European Conference on Computer Vision","author":"J Johnson","year":"2016","unstructured":"Johnson J, Alahi A, Fei-Fei L. Perceptual losses for real-time style transfer and super-resolution. In: Proceedings of the 14th European Conference on Computer Vision. 2016, 694\u2013711"},{"key":"41244_CR45","first-page":"2414","volume-title":"Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition","author":"L A Gatys","year":"2016","unstructured":"Gatys L A, Ecker A S, Bethge M. Image style transfer using convolutional neural networks. In: Proceedings of 2016 IEEE Conference on Computer Vision and Pattern Recognition. 2016, 2414\u20132423"},{"key":"41244_CR46","doi-asserted-by":"publisher","first-page":"8222","DOI":"10.1109\/CVPR.2018.00858","volume-title":"Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"S Gu","year":"2018","unstructured":"Gu S, Chen C, Liao J, Yuan L. Arbitrary style transfer with deep feature reshuffle. In: Proceedings of 2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2018, 8222\u20138231"},{"key":"41244_CR47","first-page":"593","volume-title":"Proceedings of the 5th International Conference on Computer Vision, Image and Deep Learning","author":"Z Ding","year":"2024","unstructured":"Ding Z, Li P, Yang Q, Li S, Gong Q. Regional style and color transfer. In: Proceedings of the 5th International Conference on Computer Vision, Image and Deep Learning. 2024, 593\u2013597"},{"key":"41244_CR48","first-page":"2223","volume-title":"Proceedings of 2017 IEEE International Conference on Computer Vision","author":"J Y Zhu","year":"2017","unstructured":"Zhu J Y, Park T, Isola P, Efros A A. Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of 2017 IEEE International Conference on Computer Vision. 2017, 2223\u20132232"},{"key":"41244_CR49","first-page":"1125","volume-title":"Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition","author":"P Isola","year":"2017","unstructured":"Isola P, Zhu J Y, Zhou T, Efros A A. Image-to-image translation with conditional adversarial networks. In: Proceedings of 2017 IEEE Conference on Computer Vision and Pattern Recognition. 2017, 1125\u20131134"},{"key":"41244_CR50","first-page":"417","volume-title":"Proceedings of the 27th Annual Conference on Computer Graphics and Interactive Techniques","author":"M Bertalmio","year":"2000","unstructured":"Bertalmio M, Sapiro G, Caselles V, Ballester C. Image inpainting. In: Proceedings of the 27th Annual Conference on Computer Graphics and Interactive Techniques. 2000, 417\u2013424"},{"key":"41244_CR51","volume-title":"Proceedings of 2003 IEEE Computer Society Conference on Computer Vision and Pattern Recognition","author":"A Criminisi","year":"2003","unstructured":"Criminisi A, Perez P, Toyama K. Object removal by exemplar-based inpainting. In: Proceedings of 2003 IEEE Computer Society Conference on Computer Vision and Pattern Recognition. 2003"},{"key":"41244_CR52","doi-asserted-by":"publisher","first-page":"861","DOI":"10.1145\/1186822.1073274","volume-title":"Proceedings of the ACM SIGGRAPH 2005 Papers","author":"J Sun","year":"2005","unstructured":"Sun J, Yuan L, Jia J, Shum H Y. Image completion with structure propagation. In: Proceedings of the ACM SIGGRAPH 2005 Papers. 2005, 861\u2013868"},{"key":"41244_CR53","first-page":"18381","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Yang","year":"2023","unstructured":"Yang B, Gu S, Zhang B, Zhang T, Chen X, Sun X, Chen D, Wen F. Paint by example: exemplar-based image editing with diffusion models. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 18381\u201318391"},{"key":"41244_CR54","volume-title":"Proceedings of the 37th Annual Conference on Neural Information Processing Systems","author":"K Zhang","year":"2023","unstructured":"Zhang K, Mo L, Chen W, Sun H, Su Y. MagicBrush: a manually annotated dataset for instruction-guided image editing. In: Proceedings of the 37th Annual Conference on Neural Information Processing Systems. 2023"},{"issue":"3","key":"41244_CR55","doi-asserted-by":"publisher","first-page":"3121","DOI":"10.1109\/TPAMI.2022.3181070","volume":"45","author":"W Xia","year":"2023","unstructured":"Xia W, Zhang Y, Yang Y, Xue J H, Zhou B, Yang M H. GAN inversion: a survey. IEEE Transactions on Pattern Analysis and Machine Intelligence, 2023, 45(3): 3121\u20133138","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"41244_CR56","first-page":"9243","volume-title":"Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Y Shen","year":"2020","unstructured":"Shen Y, Gu J, Tang X, Zhou B. Interpreting the latent space of GANs for semantic face editing. In: Proceedings of 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2020, 9243\u20139252"},{"key":"41244_CR57","first-page":"592","volume-title":"Proceedings of the 16th European Conference on Computer Vision","author":"J Zhu","year":"2020","unstructured":"Zhu J, Shen Y, Zhao D, Zhou B. In-domain GAN inversion for real image editing. In: Proceedings of the 16th European Conference on Computer Vision. 2020, 592\u2013608"},{"key":"41244_CR58","first-page":"2085","volume-title":"Proceedings of 2021 IEEE\/CVF International Conference on Computer Vision","author":"O Patashnik","year":"2021","unstructured":"Patashnik O, Wu Z, Shechtman E, Cohen-Or D, Lischinski D. StyleCLIP: text-driven manipulation of StyleGAN imagery. In: Proceedings of 2021 IEEE\/CVF International Conference on Computer Vision. 2021, 2085\u20132094"},{"key":"41244_CR59","first-page":"8748","volume-title":"Proceedings of the 38th International Conference on Machine Learning","author":"A Radford","year":"2021","unstructured":"Radford A, Kim J W, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, Krueger G, Sutskever I. Learning transferable visual models from natural language supervision. In: Proceedings of the 38th International Conference on Machine Learning. 2021, 8748\u20138763"},{"key":"41244_CR60","first-page":"10696","volume-title":"Proceedings of 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"S Gu","year":"2022","unstructured":"Gu S, Chen D, Bao J, Wen F, Zhang B, Chen D, Yuan L, Guo B. Vector quantized diffusion model for text-to-image synthesis. In: Proceedings of 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022, 10696\u201310706"},{"key":"41244_CR61","first-page":"10684","volume-title":"Proceedings of 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"R Rombach","year":"2022","unstructured":"Rombach R, Blattmann A, Lorenz D, Esser P, Ommer B. Highresolution image synthesis with latent diffusion models. In: Proceedings of 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2022, 10684\u201310695"},{"key":"41244_CR62","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M. Hierarchical text-conditional image generation with CLIP latents. 2022, arXiv preprint arXiv: 2204.06125"},{"key":"41244_CR63","first-page":"2643","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"C Saharia","year":"2022","unstructured":"Saharia C, Chan W, Saxena S, Lit L, Whang J, Denton E L, Ghasemipour S K S, Ayan B K, Mahdavi S S, Gontijo-Lopes R, Salimans T, Ho J, Fleet D J, Norouzi M. Photorealistic text-to-image diffusion models with deep language understanding. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. 2022, 2643"},{"key":"41244_CR64","unstructured":"Balaji Y, Nah S, Huang X, Vahdat A, Song J, Zhang Q, Kreis K, Aittala M, Aila T, Laine S, Catanzaro B, Karras T, Liu M Y. eDiff-I: text-to-image diffusion models with an ensemble of expert denoisers. 2022, arXiv preprint arXiv: 2211.01324"},{"key":"41244_CR65","first-page":"22532","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"B Wallace","year":"2023","unstructured":"Wallace B, Gokul A, Naik N. EDICT: exact diffusion inversion via coupled transformations. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 22532\u201322541"},{"key":"41244_CR66","first-page":"22500","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"N Ruiz","year":"2023","unstructured":"Ruiz N, Li Y, Jampani V, Pritch Y, Rubinstein M, Aberman K. DreamBooth: fine tuning text-to-image diffusion models for subject-driven generation. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 22500\u201322510"},{"key":"41244_CR67","first-page":"14953","volume-title":"Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"T Gupta","year":"2023","unstructured":"Gupta T, Kembhavi A. Visual programming: compositional visual reasoning without training. In: Proceedings of 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2023, 14953\u201314962"},{"key":"41244_CR68","volume-title":"Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"H Liu","year":"2024","unstructured":"Liu H, Li C, Li Y, Lee Y J. Improved baselines with visual instruction tuning. In: Proceedings of 2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2024"},{"key":"41244_CR69","volume-title":"Proceedings of the 18th European Conference on Computer Vision","author":"C Schuhmann","year":"2025","unstructured":"Schuhmann C. Improved aesthetic predictor, 2022. GitHub repository 70. Liu S, Zeng Z, Ren T, Li F, Zhang H, Yang J, Jiang Q, Li C, Yang J, Su H, Zhu J, Zhang L. Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. In: Proceedings of the 18th European Conference on Computer Vision. 2025"}],"container-title":["Frontiers of Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41244-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11704-025-41244-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11704-025-41244-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T20:23:26Z","timestamp":1743971006000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11704-025-41244-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,5]]},"references-count":69,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["41244"],"URL":"https:\/\/doi.org\/10.1007\/s11704-025-41244-0","relation":{},"ISSN":["2095-2228","2095-2236"],"issn-type":[{"value":"2095-2228","type":"print"},{"value":"2095-2236","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,4,5]]},"assertion":[{"value":"17 November 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 April 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Competing interests The authors declare that they have no competing interests or financial conflicts to disclose","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics"}}],"article-number":"1911367"}}