{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T16:44:13Z","timestamp":1780591453740,"version":"3.54.1"},"reference-count":46,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2019YFC1509604"],"award-info":[{"award-number":["2019YFC1509604"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007046","name":"Wuhan University","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007046","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62072348"],"award-info":[{"award-number":["62072348"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100013072","name":"Major Science and Technology Project of Hainan Province","doi-asserted-by":"publisher","award":["2019AEA170"],"award-info":[{"award-number":["2019AEA170"]}],"id":[{"id":"10.13039\/501100013072","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1016\/j.patcog.2024.110822","type":"journal-article","created":{"date-parts":[[2024,7,31]],"date-time":"2024-07-31T02:57:00Z","timestamp":1722394620000},"page":"110822","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":131,"special_numbering":"C","title":["ITFuse: An interactive transformer for infrared and visible image fusion"],"prefix":"10.1016","volume":"156","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8995-705X","authenticated-orcid":false,"given":"Wei","family":"Tang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fazhi","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2211-3535","authenticated-orcid":false,"given":"Yu","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2024.110822_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.109295","article-title":"TCCFusion: An infrared and visible image fusion method based on transformer and cross correlation","volume":"137","author":"Tang","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110822_b2","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109369","article-title":"Unsupervised person re-identification via multi-domain joint learning","volume":"138","author":"Chen","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110822_b3","doi-asserted-by":"crossref","first-page":"5413","DOI":"10.1109\/TMM.2022.3192661","article-title":"YDTR: Infrared and visible image fusion via Y-shape dynamic transformer","volume":"25","author":"Tang","year":"2023","journal-title":"IEEE Trans. Multimed."},{"issue":"7","key":"10.1016\/j.patcog.2024.110822_b4","doi-asserted-by":"crossref","first-page":"3159","DOI":"10.1109\/TCSVT.2023.3234340","article-title":"DATFuse: Infrared and visible image fusion via dual attention transformer","volume":"33","author":"Tang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2024.110822_b5","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1016\/j.inffus.2016.02.001","article-title":"Infrared and visible image fusion via gradient transfer and total variation minimization","volume":"31","author":"Ma","year":"2016","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.patcog.2024.110822_b6","doi-asserted-by":"crossref","first-page":"11","DOI":"10.1016\/j.inffus.2018.09.004","article-title":"FusionGAN: A generative adversarial network for infrared and visible image fusion","volume":"48","author":"Ma","year":"2019","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.patcog.2024.110822_b7","doi-asserted-by":"crossref","DOI":"10.1109\/TIM.2022.3216413","article-title":"SwinFuse: A residual Swin Transformer fusion network for infrared and visible images","volume":"71","author":"Wang","year":"2022","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.patcog.2024.110822_b8","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.108042","article-title":"Non-linear and selective fusion of cross-modal images","volume":"119","author":"Fang","year":"2021","journal-title":"Pattern Recognit."},{"issue":"10","key":"10.1016\/j.patcog.2024.110822_b9","doi-asserted-by":"crossref","first-page":"1225","DOI":"10.1002\/jemt.23514","article-title":"A phase congruency-based green fluorescent protein and phase contrast image fusion method in nonsubsampled shearlet transform domain","volume":"83","author":"Tang","year":"2020","journal-title":"Microsc. Res. Tech."},{"key":"10.1016\/j.patcog.2024.110822_b10","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1016\/j.inffus.2016.05.004","article-title":"Pixel-level image fusion: A survey of the state of the art","volume":"33","author":"Li","year":"2017","journal-title":"Inf. Fusion"},{"issue":"1","key":"10.1016\/j.patcog.2024.110822_b11","doi-asserted-by":"crossref","first-page":"502","DOI":"10.1109\/TPAMI.2020.3012548","article-title":"U2Fusion: A unified unsupervised image fusion network","volume":"44","author":"Xu","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.110822_b12","doi-asserted-by":"crossref","first-page":"584","DOI":"10.1109\/TCI.2021.3083965","article-title":"Green fluorescent protein and phase contrast image fusion via detail preserving cross network","volume":"7","author":"Tang","year":"2021","journal-title":"IEEE Trans. Comput. Imag."},{"issue":"4","key":"10.1016\/j.patcog.2024.110822_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2024.103687","article-title":"FATFusion: A functional\u2013anatomical transformer for medical image fusion","volume":"64","author":"Tang","year":"2024","journal-title":"Inf. Process. Manage."},{"key":"10.1016\/j.patcog.2024.110822_b14","doi-asserted-by":"crossref","DOI":"10.1155\/2019\/5450373","article-title":"Green fluorescent protein and phase-contrast image fusion via generative adversarial networks","volume":"2019","author":"Tang","year":"2019","journal-title":"Comput. Math. Methods Med."},{"issue":"3","key":"10.1016\/j.patcog.2024.110822_b15","doi-asserted-by":"crossref","DOI":"10.1142\/S0219691318500182","article-title":"Infrared and visible image fusion with convolutional neural networks","volume":"16","author":"Liu","year":"2018","journal-title":"Int. J. Wavelets Multiresolut. Inf. Process."},{"issue":"5","key":"10.1016\/j.patcog.2024.110822_b16","doi-asserted-by":"crossref","first-page":"2614","DOI":"10.1109\/TIP.2018.2887342","article-title":"DenseFuse: A fusion approach to infrared and visible images","volume":"28","author":"Li","year":"2018","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110822_b17","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1016\/j.inffus.2021.02.023","article-title":"RFN-Nest: An end-to-end residual fusion network for infrared and visible images","volume":"73","author":"Li","year":"2021","journal-title":"Inf. Fusion"},{"issue":"6","key":"10.1016\/j.patcog.2024.110822_b18","doi-asserted-by":"crossref","first-page":"3360","DOI":"10.1109\/TCSVT.2021.3109895","article-title":"UNFusion: A unified multi-scale densely connected network for infrared and visible image fusion","volume":"32","author":"Wang","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2024.110822_b19","doi-asserted-by":"crossref","first-page":"4980","DOI":"10.1109\/TIP.2020.2977573","article-title":"DDcGAN: A dual-discriminator conditional generative adversarial network for multi-resolution image fusion","volume":"29","author":"Ma","year":"2020","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110822_b20","doi-asserted-by":"crossref","unstructured":"J. Liu, X. Fan, Z. Huang, G. Wu, R. Liu, W. Zhong, Z. Luo, Target-aware Dual Adversarial Learning and a Multi-scenario Multi-Modality Benchmark to Fuse Infrared and Visible for Object Detection, in: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 5802\u20135811.","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"10.1016\/j.patcog.2024.110822_b21","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A.N. Gomez, \u0141. Kaiser, I. Polosukhin, Attention is all you need, in: Annual Conference on Neural Information Processing Systems, 2017, pp. 5998\u20136008."},{"key":"10.1016\/j.patcog.2024.110822_b22","doi-asserted-by":"crossref","unstructured":"R. Strudel, R. Garcia, I. Laptev, C. Schmid, Segmenter: Transformer for Semantic Segmentation, in: IEEE\/CVF International Conference on Computer Vision, 2021, pp. 7262\u20137272.","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"10.1016\/j.patcog.2024.110822_b23","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","first-page":"1","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.patcog.2024.110822_b24","doi-asserted-by":"crossref","unstructured":"C.F.R. Chen, Q. Fan, R. Panda, CrossViT: Cross-attention multi-scale vision transformer for image classification, in: IEEE\/CVF International Conference on Computer Vision, 2021, pp. 357\u2013366.","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"10.1016\/j.patcog.2024.110822_b25","doi-asserted-by":"crossref","unstructured":"N. Carion, F. Massa, G. Synnaeve, N. Usunier, A. Kirillov, S. Zagoruyko, End-to-end object detection with transformers, in: European Conference on Computer Vision, 2020, pp. 213\u2013229.","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"10.1016\/j.patcog.2024.110822_b26","series-title":"Deformable DETR: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020"},{"key":"10.1016\/j.patcog.2024.110822_b27","doi-asserted-by":"crossref","unstructured":"S. Zheng, J. Lu, H. Zhao, X. Zhu, Z. Luo, Y. Wang, Y. Fu, J. Feng, T. Xiang, P.H. Torr, L. Zhang, Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers, in: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 6881\u20136890.","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"10.1016\/j.patcog.2024.110822_b28","doi-asserted-by":"crossref","first-page":"5134","DOI":"10.1109\/TIP.2022.3193288","article-title":"MATR: Multimodal medical image fusion via multiscale adaptive transformer","volume":"32","author":"Tang","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110822_b29","doi-asserted-by":"crossref","unstructured":"V. Vs, J.M.J. Valanarasu, P. Oza, V.M. Patel, Image Fusion Transformer, in: International Conference on Image Processing, 2022, pp. 3566\u20133570.","DOI":"10.1109\/ICIP46576.2022.9897280"},{"issue":"2","key":"10.1016\/j.patcog.2024.110822_b30","doi-asserted-by":"crossref","first-page":"770","DOI":"10.1109\/TCSVT.2023.3289170","article-title":"Cross-modal transformers for infrared and visible image fusion","volume":"34","author":"Park","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2024.110822_b31","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2022.3218574","article-title":"CGTF: Convolution-guided transformer for infrared and visible image fusion","volume":"71","author":"Li","year":"2022","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.patcog.2024.110822_b32","article-title":"TGFuse: An infrared and visible image fusion approach based on transformer and generative adversarial network","author":"Rao","year":"2023","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110822_b33","doi-asserted-by":"crossref","unstructured":"Q. Hou, D. Zhou, J. Feng, Coordinate Attention for Efficient Mobile Network Design, in: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 13713\u201313722.","DOI":"10.1109\/CVPR46437.2021.01350"},{"issue":"2","key":"10.1016\/j.patcog.2024.110822_b34","doi-asserted-by":"crossref","first-page":"1489","DOI":"10.1109\/TPAMI.2022.3164083","article-title":"Contextual transformer networks for visual recognition","volume":"45","author":"Li","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"3","key":"10.1016\/j.patcog.2024.110822_b35","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1023\/B:VISI.0000045324.43199.43","article-title":"Lucas\/kanade meets horn\/schunck: Combining local and global optic flow methods","volume":"61","author":"Bruhn","year":"2005","journal-title":"Int. J. Comput. Vis."},{"issue":"4","key":"10.1016\/j.patcog.2024.110822_b36","doi-asserted-by":"crossref","first-page":"600","DOI":"10.1109\/TIP.2003.819861","article-title":"Image quality assessment: from error visibility to structural similarity","volume":"13","author":"Wang","year":"2004","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2024.110822_b37","series-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"issue":"11","key":"10.1016\/j.patcog.2024.110822_b38","doi-asserted-by":"crossref","first-page":"626","DOI":"10.1049\/el:20060693","article-title":"Image fusion metric based on mutual information and tsallis entropy","volume":"42","author":"Cvejic","year":"2006","journal-title":"Electron. Lett."},{"key":"10.1016\/j.patcog.2024.110822_b39","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1016\/j.inffus.2005.10.001","article-title":"A human perception inspired quality metric for image fusion based on regional information","volume":"8","author":"Chen","year":"2007","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.patcog.2024.110822_b40","first-page":"55","article-title":"Edge detection operators: peak signal to noise ratio based comparison","volume":"10","author":"Poobathy","year":"2014","journal-title":"I. J. Imag. Graph. Signal Process."},{"key":"10.1016\/j.patcog.2024.110822_b41","unstructured":"Z. Wang, E.P. Simoncelli, A.C. Bovik, Multi-scale structural similarity for image quality assessment, in: Asilomar Conference on Signals, Systems and Computers, 2003, pp. 1398\u20131402."},{"key":"10.1016\/j.patcog.2024.110822_b42","first-page":"1","article-title":"GANMcC: A generative adversarial network with multiclassification constraints for infrared and visible image fusion","volume":"70","author":"Ma","year":"2021","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"3","key":"10.1016\/j.patcog.2024.110822_b43","doi-asserted-by":"crossref","first-page":"1186","DOI":"10.1109\/TCSVT.2021.3075745","article-title":"Efficient and model-based infrared and visible image fusion via algorithm unrolling","volume":"32","author":"Zhao","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.patcog.2024.110822_b44","doi-asserted-by":"crossref","unstructured":"S. Hwang, J. Park, N. Kim, Y. Choi, I.S. Kweon, Multispectral pedestrian detection: Benchmark dataset and baseline, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 1037\u20131045.","DOI":"10.1109\/CVPR.2015.7298706"},{"key":"10.1016\/j.patcog.2024.110822_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.cagd.2023.102219","article-title":"TPNet: A novel mesh analysis method via topology preservation and perception enhancement","volume":"104","author":"Li","year":"2023","journal-title":"Comput. Aided Geom. D."},{"issue":"4","key":"10.1016\/j.patcog.2024.110822_b46","doi-asserted-by":"crossref","first-page":"327","DOI":"10.3233\/ICA-230710","article-title":"3D Reconstruction based on Hierarchical Reinforcement Learning with Transferability","volume":"30","author":"Li","year":"2023","journal-title":"Integr. Comput.-Aid. E."}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324005739?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324005739?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T02:12:43Z","timestamp":1725502363000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320324005739"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":46,"alternative-id":["S0031320324005739"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2024.110822","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2024,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"ITFuse: An interactive transformer for infrared and visible image fusion","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2024.110822","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"110822"}}