{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:43:16Z","timestamp":1778082196862,"version":"3.51.4"},"reference-count":77,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,1]],"date-time":"2021-06-01T00:00:00Z","timestamp":1622505600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100004358","name":"Samsung","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004358","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1109\/cvpr46437.2021.00086","type":"proceedings-article","created":{"date-parts":[[2021,11,2]],"date-time":"2021-11-02T21:56:02Z","timestamp":1635890162000},"page":"802-812","source":"Crossref","is-referenced-by-count":118,"title":["CoSMo: Content-Style Modulation for Image Retrieval with Text Feedback"],"prefix":"10.1109","author":[{"given":"Seungmin","family":"Lee","sequence":"first","affiliation":[{"name":"Seoul National University"}]},{"given":"Dongwan","family":"Kim","sequence":"additional","affiliation":[{"name":"Seoul National University"}]},{"given":"Bohyung","family":"Han","sequence":"additional","affiliation":[{"name":"Seoul National University"}]}],"member":"263","reference":[{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00644"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.93"},{"key":"ref71","article-title":"Disentangled non-local neural networks","author":"yin","year":"2020","journal-title":"ECCV"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967597"},{"key":"ref76","article-title":"Unpaired image-to-image translation using cycleconsistent adversarial networks","author":"zhu","year":"2017","journal-title":"ICCV"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00515"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00359"},{"key":"ref39","article-title":"Stacked cross attention for image-text matching","author":"lee","year":"2018","journal-title":"ECCV"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.652"},{"key":"ref38","article-title":"Diverse image-to-image translation via disentangled representations","author":"lee","year":"2018","journal-title":"ECCV"},{"key":"ref33","article-title":"Perceptual losses for real-time style transfer and super-resolution","author":"johnson","year":"2016","journal-title":"ECCV"},{"key":"ref32","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"ICML"},{"key":"ref31","article-title":"Arbitrary style transfer in real-time with adaptive instance normalization","author":"huang","year":"2019","journal-title":"ICCV"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00473"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00452"},{"key":"ref36","article-title":"Multimodal residual learning for visual qa","author":"kim","year":"2016","journal-title":"NeurIPS"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref60","article-title":"Learning to optimize domain specific normalization for domain generalization","author":"seo","year":"2020","journal-title":"ECCV"},{"key":"ref62","article-title":"Instance normalization: The missing ingredient for fast stylization","author":"ulyanov","year":"2016"},{"key":"ref61","article-title":"Retrieving similar e-commerce images using deep learning","author":"sharma","year":"2019"},{"key":"ref63","author":"der maaten","year":"2008","journal-title":"Visualizing data using t-sne"},{"key":"ref28","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017"},{"key":"ref64","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"NeurIPS"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00365"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00660"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00356"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00931"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00226"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00804"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00453"},{"key":"ref20","article-title":"Dialog-based interactive image retrieval","author":"guo","year":"2018","journal-title":"NeurIPS"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3295822"},{"key":"ref21","article-title":"Fashion iq: A new dataset towards retrieving images by natural language feedback","author":"guo","year":"2019"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.163"},{"key":"ref26","article-title":"Cycada: Cycle consistent adversarial domain adaptation","author":"hoffman","year":"2018","journal-title":"ICML"},{"key":"ref25","article-title":"In defense of the triplet loss for person re-identification","author":"hermans","year":"2017"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.125"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00637"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00591"},{"key":"ref58","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017","journal-title":"NeurIPS"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925954"},{"key":"ref56","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"NeurIPS"},{"key":"ref55","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11671","article-title":"Film: Visual reasoning with a general conditioning layer","author":"perez","year":"2018","journal-title":"AAAI"},{"key":"ref54","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"NeurIPS"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.11"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.374"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00307"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.202"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00018"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00110"},{"key":"ref13","article-title":"s-sbir: Style augmented sketch based image retrieval","author":"dutta","year":"2020","journal-title":"WACV"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018295"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00680"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.265"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00126"},{"key":"ref18","article-title":"Deep image retrieval: Learning global representations for image search","author":"gordo","year":"2016","journal-title":"ECCV"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00401"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2944597"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1151"},{"key":"ref5","article-title":"Automatic attribute discovery and characterization from noisy web data","author":"berg","year":"2010","journal-title":"ECCV"},{"key":"ref8","author":"chen","year":"2020","journal-title":"Learning joint visual semantic matching embeddings for language-guided retrieval"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00753"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6845"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01035"},{"key":"ref46","article-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","author":"lu","year":"2019","journal-title":"NeurIPS"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.124"},{"key":"ref48","article-title":"Spatial-content image search in complex scenes","author":"ma","year":"2020","journal-title":"WACV"},{"key":"ref47","article-title":"Hierarchical question-image co-attention for visual question answering","author":"lu","year":"2016","journal-title":"NeurIPS"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00243"},{"key":"ref41","article-title":"Precomputed real-time texture synthesis with markovian generative adversarial networks","author":"li","year":"2016","journal-title":"ECCV"},{"key":"ref44","article-title":"On the variance of the adaptive learning rate and beyond","author":"liu","year":"2020","journal-title":"ICLRE"},{"key":"ref43","article-title":"Revisiting batch normalization for practical domain adaptation","author":"li","year":"2017","journal-title":"ICLRE"}],"event":{"name":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Nashville, TN, USA","start":{"date-parts":[[2021,6,20]]},"end":{"date-parts":[[2021,6,25]]}},"container-title":["2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9577055\/9577056\/09577437.pdf?arnumber=9577437","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,12]],"date-time":"2023-11-12T05:01:47Z","timestamp":1699765307000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9577437\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6]]},"references-count":77,"URL":"https:\/\/doi.org\/10.1109\/cvpr46437.2021.00086","relation":{},"subject":[],"published":{"date-parts":[[2021,6]]}}}