{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:31:23Z","timestamp":1765308683730,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["No.62261160653,No.62441619,No.62322216,U24B20175"],"award-info":[{"award-number":["No.62261160653,No.62441619,No.62322216,U24B20175"]}]},{"name":"Ningbo Science and Technology Innovation 2025 Major Project","award":["2025Z027"],"award-info":[{"award-number":["2025Z027"]}]},{"name":"Guangdong Key Laboratory of Information Security Technology and MoE Key Laboratory of Information Technology"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755049","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:50:47Z","timestamp":1761371447000},"page":"11367-11375","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Detecting Synthetic Image by Cross-Modal Commonality Interaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3146-9115","authenticated-orcid":false,"given":"Kai","family":"Li","sequence":"first","affiliation":[{"name":"Shenzhen Campus of Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5481-653X","authenticated-orcid":false,"given":"Wenqi","family":"Ren","sequence":"additional","affiliation":[{"name":"Shenzhen Campus of Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8676-1190","authenticated-orcid":false,"given":"Wei","family":"Wang","sequence":"additional","affiliation":[{"name":"Shenzhen Campus of Sun Yat-sen University, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5016-2719","authenticated-orcid":false,"given":"Linchao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Institute of China Electronics Technology Group Corporation, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7141-708X","authenticated-orcid":false,"given":"Xiaochun","family":"Cao","sequence":"additional","affiliation":[{"name":"Shenzhen Campus of Sun Yat-sen University, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"unstructured":"Andrew Brock Jeff Donahue and Karen Simonyan. 2019. Large Scale GAN Training for High Fidelity Natural Image Synthesis. In ICLR.","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","first-page":"103","article-title":"What Makes Fake Images Detectable? Understanding Properties that Generalize","volume":"12371","author":"Chai Lucy","year":"2020","unstructured":"Lucy Chai, David Bau, Ser-Nam Lim, and Phillip Isola. 2020. What Makes Fake Images Detectable? Understanding Properties that Generalize. In ECCV, Vol. 12371. 103--120.","journal-title":"ECCV"},{"doi-asserted-by":"crossref","unstructured":"Yunjey Choi Min-Je Choi Munyoung Kim Jung-Woo Ha Sunghun Kim and Jaegul Choo. 2018. StarGAN: Unified Generative Adversarial Networks for Multi- Domain Image-to-Image Translation. In CVPR. 8789--8797.","key":"e_1_3_2_1_3_1","DOI":"10.1109\/CVPR.2018.00916"},{"key":"e_1_3_2_1_4_1","first-page":"219","article-title":"Combining EfficientNet and Vision Transformers for Video Deepfake Detection","volume":"13233","author":"Coccomini Davide","year":"2022","unstructured":"Davide Coccomini, Nicola Messina, Claudio Gennaro, and Fabrizio Falchi. 2022. Combining EfficientNet and Vision Transformers for Video Deepfake Detection. In ICIAP, Vol. 13233. 219--229.","journal-title":"ICIAP"},{"unstructured":"Prafulla Dhariwal and Alexander Quinn Nichol. 2021. Diffusion Models Beat GANs on Image Synthesis. In NIPS. 8780--8794.","key":"e_1_3_2_1_5_1"},{"doi-asserted-by":"crossref","unstructured":"Ricard Durall Margret Keuper and Janis Keuper. 2020. Watch Your Up- Convolution: CNN Based Generative Deep Neural Networks Are Failing to Reproduce Spectral Distributions. In CVPR. 7887--7896.","key":"e_1_3_2_1_6_1","DOI":"10.1109\/CVPR42600.2020.00791"},{"key":"e_1_3_2_1_7_1","first-page":"3247","article-title":"Leveraging Frequency Analysis for Deep Fake Image Recognition","volume":"119","author":"Frank Joel","year":"2020","unstructured":"Joel Frank, Thorsten Eisenhofer, Lea Sch\u00f6nherr, Asja Fischer, Dorothea Kolossa, and Thorsten Holz. 2020. Leveraging Frequency Analysis for Deep Fake Image Recognition. In ICML, Vol. 119. 3247--3258.","journal-title":"ICML"},{"unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza et al. 2014. Generative Adversarial Networks. In NIPS. 2672--2680.","key":"e_1_3_2_1_8_1"},{"doi-asserted-by":"crossref","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep Residual Learning for Image Recognition. In CVPR. 770--778.","key":"e_1_3_2_1_9_1","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_10_1","first-page":"6840","article-title":"Denoising Diffusion Probabilistic Models","volume":"33","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising Diffusion Probabilistic Models. In NIPS, Vol. 33. 6840--6851.","journal-title":"NIPS"},{"doi-asserted-by":"crossref","unstructured":"Yonghyun Jeong Doyeon Kim Seungjai Min Seongho Joe Youngjune Gwon and Jongwon Choi. 2022. BiHPF: Bilateral High-Pass Filters for Robust Deepfake Detection. In WACV. 2878--2887.","key":"e_1_3_2_1_11_1","DOI":"10.1109\/WACV51458.2022.00293"},{"doi-asserted-by":"crossref","unstructured":"Yonghyun Jeong Doyeon Kim Youngmin Ro and Jongwon Choi. 2022. FrePGAN: Robust Deepfake Detection Using Frequency-Level Perturbations. In AAAI. 1060-- 1068.","key":"e_1_3_2_1_12_1","DOI":"10.1609\/aaai.v36i1.19990"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1109\/TMM.2023.3313503"},{"unstructured":"Tero Karras Timo Aila Samuli Laine and Jaakko Lehtinen. 2018. Progressive Growing of GANs for Improved Quality Stability and Variation. In ICLR.","key":"e_1_3_2_1_14_1"},{"doi-asserted-by":"crossref","unstructured":"Tero Karras Samuli Laine and Timo Aila. 2019. A Style-Based Generator Architecture for Generative Adversarial Networks. In CVPR. 4396--4405.","key":"e_1_3_2_1_15_1","DOI":"10.1109\/CVPR.2019.00453"},{"doi-asserted-by":"crossref","unstructured":"Tero Karras Samuli Laine Miika Aittala Janne Hellsten Jaakko Lehtinen and Timo Aila. 2020. Analyzing and Improving the Image Quality of StyleGAN. In CVPR. 8107--8116.","key":"e_1_3_2_1_16_1","DOI":"10.1109\/CVPR42600.2020.00813"},{"doi-asserted-by":"crossref","unstructured":"Dong-Keon Kim and Kwangsu Kim. 2022. Generalized Facial Manipulation Detection with Edge Region Feature Extraction. In WACV. 2784--2794.","key":"e_1_3_2_1_17_1","DOI":"10.1109\/WACV51458.2022.00284"},{"key":"e_1_3_2_1_18_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2015","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In ICLR."},{"unstructured":"Yuchen Luo Yong Zhang Junchi Yan and Wei Liu. 2021. Generalizing Face Forgery Detection With High-Frequency Features. In CVPR. 16317--16326.","key":"e_1_3_2_1_19_1"},{"key":"e_1_3_2_1_20_1","first-page":"1","article-title":"ISSP-Net: An Interactive Spatial-Spectral Perception Network for Multimodal Classification","volume":"62","author":"Ma Wenping","year":"2024","unstructured":"Wenping Ma, Hekai Zhang, Mengru Ma, Chuang Chen, and Biao Hou. 2024. ISSP-Net: An Interactive Spatial-Spectral Perception Network for Multimodal Classification. IEEE Transactions on Geoscience and Remote Sensing 62 (2024), 1--14.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"doi-asserted-by":"crossref","unstructured":"Sara Mandelli Nicol\u00f2 Bonettini Paolo Bestagini and Stefano Tubaro. 2022. Detecting Gan-Generated Images by Orthogonal Training of Multiple CNNs. In ICIP. 3091--3095.","key":"e_1_3_2_1_21_1","DOI":"10.1109\/ICIP46576.2022.9897310"},{"doi-asserted-by":"crossref","unstructured":"Francesco Marra Diego Gragnaniello Luisa Verdoliva and Giovanni Poggi. 2019. Do GANs Leave Artificial Fingerprints?. In MIPR. 506--511.","key":"e_1_3_2_1_22_1","DOI":"10.1109\/MIPR.2019.00103"},{"key":"e_1_3_2_1_23_1","first-page":"8162","article-title":"Improved Denoising Diffusion Probabilistic Models","volume":"139","author":"Nichol Alexander Quinn","year":"2021","unstructured":"Alexander Quinn Nichol and Prafulla Dhariwal. 2021. Improved Denoising Diffusion Probabilistic Models. In ICML, Vol. 139. 8162--8171.","journal-title":"ICML"},{"doi-asserted-by":"crossref","unstructured":"Utkarsh Ojha Yuheng Li and Yong Jae Lee. 2023. Towards Universal Fake Image Detectors that Generalize Across Generative Models. In CVPR. 24480--24489.","key":"e_1_3_2_1_24_1","DOI":"10.1109\/CVPR52729.2023.02345"},{"doi-asserted-by":"crossref","unstructured":"Taesung Park Ming-Yu Liu Ting-Chun Wang and Jun-Yan Zhu. 2019. Semantic Image Synthesis With Spatially-Adaptive Normalization. In CVPR. 2337--2346.","key":"e_1_3_2_1_25_1","DOI":"10.1109\/CVPR.2019.00244"},{"doi-asserted-by":"crossref","unstructured":"Yuyang Qian Guojun Yin Lu Sheng Zixuan Chen and Jing Shao. 2020. Thinking in Frequency: Face Forgery Detection by Mining Frequency-Aware Clues. In ECCV. 86--103.","key":"e_1_3_2_1_26_1","DOI":"10.1007\/978-3-030-58610-2_6"},{"key":"e_1_3_2_1_27_1","first-page":"8821","article-title":"Zero-Shot Text-to-Image Generation","volume":"139","author":"Ramesh Aditya","year":"2021","unstructured":"Aditya Ramesh, Mikhail Pavlov, Gabriel Goh, Scott Gray, Chelsea Voss, Alec Radford, Mark Chen, and Ilya Sutskever. 2021. Zero-Shot Text-to-Image Generation. In ICML, Vol. 139. 8821--8831.","journal-title":"ICML"},{"key":"e_1_3_2_1_28_1","volume-title":"AEROBLADE: Training- Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error. In CVPR. 9130--9140.","author":"Ricker Jonas","year":"2024","unstructured":"Jonas Ricker, Denis Lukovnikov, and Asja Fischer. 2024. AEROBLADE: Training- Free Detection of Latent Diffusion Images Using Autoencoder Reconstruction Error. In CVPR. 9130--9140."},{"doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. In CVPR. 10674--10685.","key":"e_1_3_2_1_29_1","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_30_1","volume-title":"Faceforensics: Learning to detect manipulated facial images. In ICCV. 1--11.","author":"Rossler Andreas","year":"2019","unstructured":"Andreas Rossler, Davide Cozzolino, Luisa Verdoliva, Christian Riess, and Justus Thies. 2019. Faceforensics: Learning to detect manipulated facial images. In ICCV. 1--11."},{"doi-asserted-by":"crossref","unstructured":"Andreas R\u00f6ssler Davide Cozzolino Luisa Verdoliva Christian Riess Justus Thies and Matthias Nie\u00dfner. 2019. FaceForensics: Learning to Detect Manipulated Facial Images. In ICCV. 1--11.","key":"e_1_3_2_1_31_1","DOI":"10.1109\/ICCV.2019.00009"},{"doi-asserted-by":"crossref","unstructured":"Ramprasaath R. Selvaraju Michael Cogswell Abhishek Das Ramakrishna Vedantam Devi Parikh and Dhruv Batra. 2017. Grad-CAM: Visual Explanations from Deep Networks via Gradient-Based Localization. In ICCV. 618--626.","key":"e_1_3_2_1_32_1","DOI":"10.1109\/ICCV.2017.74"},{"doi-asserted-by":"crossref","unstructured":"Kaede Shiohara and Toshihiko Yamasaki. 2022. Detecting Deepfakes With Self- Blended Images. In CVPR. 18720--18729.","key":"e_1_3_2_1_33_1","DOI":"10.1109\/CVPR52688.2022.01816"},{"doi-asserted-by":"crossref","unstructured":"Sergey Sinitsa and Ohad Fried. 2024. Deep Image Fingerprint: Towards Low Budget Synthetic Image Detection and Model Lineage Analysis. In WACV. 4067-- 4076.","key":"e_1_3_2_1_34_1","DOI":"10.1109\/WACV57701.2024.00402"},{"doi-asserted-by":"crossref","unstructured":"Chuangchuang Tan Huan Liu Yao Zhao Shikui Wei Guanghua Gu Ping Liu and Yunchao Wei. 2024. Rethinking the Up-Sampling Operations in CNN-Based Generative Network for Generalizable Deepfake Detection. In CVPR. 28130-- 28139.","key":"e_1_3_2_1_35_1","DOI":"10.1109\/CVPR52733.2024.02657"},{"doi-asserted-by":"crossref","unstructured":"Chuangchuang Tan Yao Zhao ShikuiWei Guanghua Gu Ping Liu and Yunchao Wei. 2024. Frequency-Aware Deepfake Detection: Improving Generalizability through Frequency Space Domain Learning. In AAAI. 5052--5060.","key":"e_1_3_2_1_36_1","DOI":"10.1609\/aaai.v38i5.28310"},{"doi-asserted-by":"crossref","unstructured":"Chuangchuang Tan Yao Zhao ShikuiWei Guanghua Gu and YunchaoWei. 2023. Learning on Gradients: Generalized Artifacts Representation for GAN-Generated Images Detection. In CVPR. 12105--12114.","key":"e_1_3_2_1_37_1","DOI":"10.1109\/CVPR52729.2023.01165"},{"key":"e_1_3_2_1_38_1","volume-title":"Efros","author":"Wang Sheng-Yu","year":"2020","unstructured":"Sheng-Yu Wang, Oliver Wang, Richard Zhang, Andrew Owens, and Alexei A. Efros. 2020. CNN-Generated Images Are Surprisingly Easy to Spot... for Now. In CVPR. 8692--8701."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_39_1","DOI":"10.1007\/s10489-021-03064-0"},{"unstructured":"Kaiwen Yang Tianyi Zhou Yonggang Zhang Xinmei Tian and Dacheng Tao. 2021. Class-Disentanglement and Applications in Adversarial Detection and Defense. In NeurIPS. 16051--16063.","key":"e_1_3_2_1_40_1"},{"doi-asserted-by":"crossref","unstructured":"Ning Yu Larry Davis and Mario Fritz. 2019. Attributing Fake Images to GANs: Learning and Analyzing GAN Fingerprints. In ICCV. 7555--7565.","key":"e_1_3_2_1_41_1","DOI":"10.1109\/ICCV.2019.00765"},{"key":"e_1_3_2_1_42_1","volume-title":"PatchCraft: Exploring Texture Patch for Efficient AI-generated Image Detection. CoRR","author":"Zhong Nan","year":"2023","unstructured":"Nan Zhong, Yiran Xu, Zhenxing Qian, and Xinpeng Zhang. 2023. PatchCraft: Exploring Texture Patch for Efficient AI-generated Image Detection. CoRR (2023)."},{"key":"e_1_3_2_1_43_1","volume-title":"Efros","author":"Zhu Jun-Yan","year":"2017","unstructured":"Jun-Yan Zhu, Taesung Park, Phillip Isola, and Alexei A. Efros. 2017. Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks. In ICCV. 2242--2251."}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"MM '25","name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:28:24Z","timestamp":1765308504000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":43,"alternative-id":["10.1145\/3746027.3755049","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755049","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}