{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T05:21:24Z","timestamp":1777180884866,"version":"3.51.4"},"reference-count":112,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T00:00:00Z","timestamp":1747353600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100012479","name":"General Research Fund of Shanghai Normal University","doi-asserted-by":"publisher","award":["GRF 14216022"],"award-info":[{"award-number":["GRF 14216022"]}],"id":[{"id":"10.13039\/501100012479","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012479","name":"General Research Fund of Shanghai Normal University","doi-asserted-by":"publisher","award":["GRF 14211420"],"award-info":[{"award-number":["GRF 14211420"]}],"id":[{"id":"10.13039\/501100012479","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012479","name":"General Research Fund of Shanghai Normal University","doi-asserted-by":"publisher","award":["GRF 14203323"],"award-info":[{"award-number":["GRF 14203323"]}],"id":[{"id":"10.13039\/501100012479","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["CUHK420\/22"],"award-info":[{"award-number":["CUHK420\/22"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001839","name":"University Grants Committee","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001839","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002920","name":"Research Grants Council, University Grants Committee","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002920","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100021171","name":"Basic and Applied Basic Research Foundation of Guangdong Province","doi-asserted-by":"publisher","award":["2021B1515120035"],"award-info":[{"award-number":["2021B1515120035"]}],"id":[{"id":"10.13039\/501100021171","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Fusion"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1016\/j.inffus.2025.103290","type":"journal-article","created":{"date-parts":[[2025,5,16]],"date-time":"2025-05-16T11:55:21Z","timestamp":1747396521000},"page":"103290","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":5,"special_numbering":"C","title":["Multimodal graph representation learning for robust surgical workflow recognition with adversarial feature disentanglement"],"prefix":"10.1016","volume":"123","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9762-6821","authenticated-orcid":false,"given":"Long","family":"Bai","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6913-6171","authenticated-orcid":false,"given":"Boyi","family":"Ma","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0005-2146-2292","authenticated-orcid":false,"given":"Ruohan","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2440-4950","authenticated-orcid":false,"given":"Guankun","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7900-8032","authenticated-orcid":false,"given":"Beilei","family":"Cui","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7461-2200","authenticated-orcid":false,"given":"Zhongliang","family":"Jiang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7162-2822","authenticated-orcid":false,"given":"Mobarakol","family":"Islam","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8903-1561","authenticated-orcid":false,"given":"Zhe","family":"Min","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2676-7387","authenticated-orcid":false,"given":"Jiewen","family":"Lai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6032-5611","authenticated-orcid":false,"given":"Nassir","family":"Navab","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6488-1551","authenticated-orcid":false,"given":"Hongliang","family":"Ren","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.inffus.2025.103290_b1","series-title":"Surgical-dino: Adapter learning of foundation model for depth estimation in endoscopic surgery","author":"Cui","year":"2024"},{"issue":"2","key":"10.1016\/j.inffus.2025.103290_b2","doi-asserted-by":"crossref","DOI":"10.1002\/rcs.2358","article-title":"A systematic review of robotic surgery: From supervised paradigms to fully autonomous robotic approaches","volume":"18","author":"Han","year":"2022","journal-title":"Int. J. Med. Robot. Comput. Assist. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b3","series-title":"Copesd: A multi-level surgical motion dataset for training large vision-language models to co-pilot endoscopic submucosal dissection","author":"Wang","year":"2024"},{"issue":"5","key":"10.1016\/j.inffus.2025.103290_b4","doi-asserted-by":"crossref","first-page":"e3","DOI":"10.1016\/j.jviscsurg.2011.04.007","article-title":"History of robotic surgery: from AESOP\u00ae and ZEUS\u00ae to da Vinci\u00ae","volume":"148","author":"Pugin","year":"2011","journal-title":"J. Visc. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b5","doi-asserted-by":"crossref","first-page":"0109","DOI":"10.34133\/cbsystems.0109","article-title":"Magnetic soft microrobot design for cell grasping and transportation","volume":"5","author":"Wang","year":"2024","journal-title":"Cyborg and Bionic Systems"},{"key":"10.1016\/j.inffus.2025.103290_b6","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"369","article-title":"Lightdiff: surgical endoscopic image low-light enhancement with t-diffusion","author":"Chen","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b7","series-title":"Surgical-lvlm: Learning to adapt large vision-language model for grounded visual question answering in robotic surgery","author":"Wang","year":"2024"},{"issue":"12","key":"10.1016\/j.inffus.2025.103290_b8","doi-asserted-by":"crossref","first-page":"4457","DOI":"10.1109\/TMI.2024.3426953","article-title":"Video-instrument synergistic network for referring video instrument segmentation in robotic surgery","volume":"43","author":"Wang","year":"2024","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.inffus.2025.103290_b9","article-title":"Transendoscopic flexible parallel continuum robotic mechanism for bimanual endoscopic submucosal dissection","author":"Gao","year":"2023","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.inffus.2025.103290_b10","doi-asserted-by":"crossref","first-page":"0042","DOI":"10.34133\/cbsystems.0042","article-title":"Three-dimensional collision avoidance method for robot-assisted minimally invasive surgery","volume":"4","author":"Li","year":"2023","journal-title":"Cyborg and Bionic Systems"},{"key":"10.1016\/j.inffus.2025.103290_b11","series-title":"2023 IEEE International Conference on Robotics and Automation","first-page":"6859","article-title":"Surgical-VQLA: Transformer with gated vision-language embedding for visual question localized-answering in robotic surgery","author":"Bai","year":"2023"},{"key":"10.1016\/j.inffus.2025.103290_b12","series-title":"Surgsora: Decoupled rgbd-flow diffusion model for controllable surgical video generation","author":"Chen","year":"2024"},{"issue":"10","key":"10.1016\/j.inffus.2025.103290_b13","doi-asserted-by":"crossref","first-page":"2745","DOI":"10.1007\/s11517-023-02877-0","article-title":"Domain adaptive sim-to-real segmentation of oropharyngeal organs","volume":"61","author":"Wang","year":"2023","journal-title":"Medical & Biological Engineering & Computing"},{"key":"10.1016\/j.inffus.2025.103290_b14","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"397","article-title":"CAT-ViL: co-attention gated vision-language embedding for visual question localized-answering in robotic surgery","author":"Bai","year":"2023"},{"issue":"5","key":"10.1016\/j.inffus.2025.103290_b15","doi-asserted-by":"crossref","first-page":"1114","DOI":"10.1109\/TMI.2017.2787657","article-title":"Sv-rcnet: workflow recognition from surgical videos using recurrent convolutional network","volume":"37","author":"Jin","year":"2017","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"7","key":"10.1016\/j.inffus.2025.103290_b16","doi-asserted-by":"crossref","first-page":"1911","DOI":"10.1109\/TMI.2021.3069471","article-title":"Temporal memory relation network for workflow recognition from surgical video","volume":"40","author":"Jin","year":"2021","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.inffus.2025.103290_b17","series-title":"Sar-rarp50: Segmentation of surgical instrumentation and action recognition on robot-assisted radical prostatectomy challenge","author":"Psychogyios","year":"2023"},{"key":"10.1016\/j.inffus.2025.103290_b18","series-title":"OSSAR: Towards open-set surgical activity recognition in robot-assisted surgery","author":"Bai","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b19","doi-asserted-by":"crossref","DOI":"10.1016\/j.ijsu.2021.106151","article-title":"A systematic review on artificial intelligence in robot-assisted surgery","volume":"95","author":"Moglia","year":"2021","journal-title":"Int. J. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b20","series-title":"2021 IEEE International Conference on Robotics and Automation","first-page":"13346","article-title":"Relational graph learning on visual and kinematics embeddings for accurate gesture recognition in robotic surgery","author":"Long","year":"2021"},{"issue":"1","key":"10.1016\/j.inffus.2025.103290_b21","doi-asserted-by":"crossref","first-page":"6676","DOI":"10.1038\/s41467-023-42451-8","article-title":"Intelligent surgical workflow recognition for endoscopic submucosal dissection with real-time animal study","volume":"14","author":"Cao","year":"2023","journal-title":"Nat. Commun."},{"issue":"1","key":"10.1016\/j.inffus.2025.103290_b22","doi-asserted-by":"crossref","first-page":"86","DOI":"10.1109\/TMI.2016.2593957","article-title":"Endonet: a deep architecture for recognition tasks on laparoscopic videos","volume":"36","author":"Twinanda","year":"2016","journal-title":"IEEE Trans. Med. Imaging"},{"issue":"12","key":"10.1016\/j.inffus.2025.103290_b23","doi-asserted-by":"crossref","first-page":"2193","DOI":"10.1007\/s11548-022-02743-8","article-title":"Trans-SVNet: hybrid embedding aggregation transformer for surgical workflow analysis","volume":"17","author":"Jin","year":"2022","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b24","series-title":"Learning multi-modal representations by watching hundreds of surgical video lectures","author":"Yuan","year":"2023"},{"key":"10.1016\/j.inffus.2025.103290_b25","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1016\/j.artmed.2018.08.002","article-title":"Surgical motion analysis using discriminative interpretable patterns","volume":"91","author":"Forestier","year":"2018","journal-title":"Artif. Intell. Med."},{"key":"10.1016\/j.inffus.2025.103290_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.cmpb.2021.106452","article-title":"Micro-surgical anastomose workflow recognition challenge report","volume":"212","author":"Huaulm\u00e9","year":"2021","journal-title":"Comput. Methods Programs Biomed."},{"key":"10.1016\/j.inffus.2025.103290_b27","article-title":"Adaptive multi-hypergraph convolutional networks for 3D object classification","author":"Nong","year":"2022","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.inffus.2025.103290_b28","first-page":"1","article-title":"Multimodal visual-textual object graph attention network for propaganda detection in memes","author":"Chen","year":"2023","journal-title":"Multimedia Tools Appl."},{"key":"10.1016\/j.inffus.2025.103290_b29","article-title":"Dualgnn: Dual graph neural network for multimedia recommendation","author":"Wang","year":"2021","journal-title":"IEEE Trans. Multimed."},{"issue":"2","key":"10.1016\/j.inffus.2025.103290_b30","doi-asserted-by":"crossref","DOI":"10.1145\/3542927","article-title":"Multimodal graph for unaligned multimodal sequence analysis via graph convolution and graph pooling","volume":"19","author":"Mai","year":"2023","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl."},{"key":"10.1016\/j.inffus.2025.103290_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.107920","article-title":"Dyadic relational graph convolutional networks for skeleton-based human interaction recognition","volume":"115","author":"Zhu","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.inffus.2025.103290_b32","article-title":"Graph structure enhanced pre-training language model for knowledge graph completion","author":"Zhu","year":"2024","journal-title":"IEEE Trans. Emerg. Top. Comput. Intell."},{"issue":"10","key":"10.1016\/j.inffus.2025.103290_b33","doi-asserted-by":"crossref","first-page":"2335","DOI":"10.3390\/math11102335","article-title":"Multimodal interaction and fused graph convolution network for sentiment classification of online reviews","volume":"11","author":"Zeng","year":"2023","journal-title":"Math."},{"key":"10.1016\/j.inffus.2025.103290_b34","doi-asserted-by":"crossref","first-page":"376","DOI":"10.1016\/j.inffus.2022.10.022","article-title":"Brain tumor segmentation based on the fusion of deep semantics and edge information in multimodal MRI","volume":"91","author":"Zhu","year":"2023","journal-title":"Inf. Fusion"},{"issue":"8","key":"10.1016\/j.inffus.2025.103290_b35","doi-asserted-by":"crossref","first-page":"2462","DOI":"10.1109\/TMI.2023.3253760","article-title":"Hybrid graph convolutional network with online masked autoencoder for robust multimodal cancer survival prediction","volume":"42","author":"Hou","year":"2023","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.inffus.2025.103290_b36","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TIM.2021.3109398","article-title":"Multimodal information fusion approach for noncontact heart rate estimation using facial videos and graph convolutional network","volume":"71","author":"Yue","year":"2021","journal-title":"IEEE Trans. Instrum. Meas."},{"issue":"12","key":"10.1016\/j.inffus.2025.103290_b37","doi-asserted-by":"crossref","first-page":"3564","DOI":"10.1109\/TBME.2021.3077875","article-title":"Ensemble manifold regularized multi-modal graph convolutional network for cognitive ability prediction","volume":"68","author":"Qu","year":"2021","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"10.1016\/j.inffus.2025.103290_b38","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102224","article-title":"Multimodal graph learning based on 3D Haar semi-tight framelet for student engagement prediction","volume":"105","author":"Li","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.inffus.2025.103290_b39","doi-asserted-by":"crossref","first-page":"4298","DOI":"10.1109\/TASLP.2024.3434495","article-title":"Masked graph learning with recurrent alignment for multimodal emotion recognition in conversation","volume":"32","author":"Meng","year":"2024","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"10.1016\/j.inffus.2025.103290_b40","series-title":"Pixel-wise recognition for holistic surgical scene understanding","author":"Ayobi","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b41","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2018: 21st International Conference, Granada, Spain, September 16-20, 2018, Proceedings, Part IV 11","first-page":"247","article-title":"Deep reinforcement learning for surgical gesture segmentation and classification","author":"Liu","year":"2018"},{"key":"10.1016\/j.inffus.2025.103290_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103366","article-title":"Lovit: Long video transformer for surgical phase recognition","volume":"99","author":"Liu","year":"2025","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.inffus.2025.103290_b43","doi-asserted-by":"crossref","unstructured":"Y. Liu, J. Huo, J. Peng, R. Sparks, P. Dasgupta, A. Granados, S. Ourselin, Skit: a fast key information video transformer for online surgical phase recognition, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2023, pp. 21074\u201321084.","DOI":"10.1109\/ICCV51070.2023.01927"},{"key":"10.1016\/j.inffus.2025.103290_b44","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2021: 24th International Conference, Strasbourg, France, September 27\u2013October 1, 2021, Proceedings, Part IV 24","first-page":"615","article-title":"Surgical workflow anticipation using instrument interaction","author":"Yuan","year":"2021"},{"key":"10.1016\/j.inffus.2025.103290_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102611","article-title":"Anticipation for surgical workflow through instrument interaction and recognized signals","volume":"82","author":"Yuan","year":"2022","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.inffus.2025.103290_b46","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"306","article-title":"Hecvl: Hierarchical video-language pretraining for zero-shot surgical phase recognition","author":"Yuan","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b47","doi-asserted-by":"crossref","first-page":"1081","DOI":"10.1007\/s11548-016-1371-x","article-title":"Automatic data-driven real-time segmentation and recognition of surgical workflow","volume":"11","author":"Dergachyova","year":"2016","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b48","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2019.101572","article-title":"Multi-task recurrent convolutional network with correlation loss for surgical video analysis","volume":"59","author":"Jin","year":"2020","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.inffus.2025.103290_b49","series-title":"Medical Image Computing and Computer Assisted Intervention\u2013MICCAI 2020: 23rd International Conference, Lima, Peru, October 4\u20138, 2020, Proceedings, Part III 23","first-page":"343","article-title":"Tecno: Surgical phase recognition with multi-stage temporal convolutional networks","author":"Czempiel","year":"2020"},{"key":"10.1016\/j.inffus.2025.103290_b50","series-title":"2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems","first-page":"4633","article-title":"Visual-kinematics graph learning for procedure-agnostic instrument tip segmentation in robotic surgeries","author":"Liu","year":"2023"},{"issue":"9","key":"10.1016\/j.inffus.2025.103290_b51","doi-asserted-by":"crossref","first-page":"2025","DOI":"10.1109\/TBME.2016.2647680","article-title":"A dataset and benchmarks for segmentation and recognition of gestures in robotic surgery","volume":"64","author":"Ahmidi","year":"2017","journal-title":"IEEE Trans. Biomed. Eng."},{"key":"10.1016\/j.inffus.2025.103290_b52","series-title":"2020 IEEE International Conference on Robotics and Automation","first-page":"371","article-title":"Temporal segmentation of surgical sub-tasks through deep learning with multiple data sources","author":"Qin","year":"2020"},{"key":"10.1016\/j.inffus.2025.103290_b53","series-title":"Calibration in deep learning: A survey of the state-of-the-art","author":"Wang","year":"2023"},{"issue":"3","key":"10.1016\/j.inffus.2025.103290_b54","first-page":"61","article-title":"Probabilistic outputs for support vector machines and comparisons to regularized likelihood methods","volume":"10","author":"Platt","year":"1999","journal-title":"Adv. Large Margin Classif."},{"key":"10.1016\/j.inffus.2025.103290_b55","doi-asserted-by":"crossref","unstructured":"M.P. Naeini, G. Cooper, M. Hauskrecht, Obtaining well calibrated probabilities using bayesian binning, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 29, 2015.","DOI":"10.1609\/aaai.v29i1.9602"},{"key":"10.1016\/j.inffus.2025.103290_b56","doi-asserted-by":"crossref","unstructured":"B. Zadrozny, C. Elkan, Transforming classifier scores into accurate multiclass probability estimates, in: Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2002, pp. 694\u2013699.","DOI":"10.1145\/775047.775151"},{"key":"10.1016\/j.inffus.2025.103290_b57","series-title":"Post-hoc reward calibration: A case study on length bias","author":"Huang","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b58","doi-asserted-by":"crossref","unstructured":"T. Popordanoska, A. Tiulpin, M.B. Blaschko, Beyond classification: Definition and density-based estimation of calibration in object detection, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2024, pp. 585\u2013594.","DOI":"10.1109\/WACV57701.2024.00064"},{"key":"10.1016\/j.inffus.2025.103290_b59","series-title":"International Conference on Machine Learning","first-page":"1321","article-title":"On calibration of modern neural networks","author":"Guo","year":"2017"},{"key":"10.1016\/j.inffus.2025.103290_b60","series-title":"Regularizing neural networks by penalizing confident output distributions","author":"Pereyra","year":"2017"},{"key":"10.1016\/j.inffus.2025.103290_b61","series-title":"Focal loss for dense object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.inffus.2025.103290_b62","series-title":"Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, IJCAI-24","first-page":"4515","article-title":"Efficiency calibration of implicit regularization in deep networks via self-paced curriculum-driven singular value selection","author":"Li","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b63","series-title":"International Conference on Artificial Intelligence and Statistics","first-page":"1972","article-title":"Classifier calibration with ROC-regularized isotonic regression","author":"Berta","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b64","series-title":"Class-distribution-aware calibration for long-tailed visual recognition","author":"Islam","year":"2021"},{"key":"10.1016\/j.inffus.2025.103290_b65","series-title":"International Conference on Machine Learning","first-page":"1613","article-title":"Weight uncertainty in neural network","author":"Blundell","year":"2015"},{"key":"10.1016\/j.inffus.2025.103290_b66","series-title":"Bayesian recurrent neural networks","author":"Fortunato","year":"2017"},{"key":"10.1016\/j.inffus.2025.103290_b67","article-title":"Simple and scalable predictive uncertainty estimation using deep ensembles","volume":"30","author":"Lakshminarayanan","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103290_b68","series-title":"Categorical reparameterization with gumbel-softmax","author":"Jang","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b69","doi-asserted-by":"crossref","unstructured":"J. Pei, C. Wang, G. Szarvas, Transformer uncertainty estimation with hierarchical stochastic attention, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36, 2022, pp. 11147\u201311155.","DOI":"10.1609\/aaai.v36i10.21364"},{"key":"10.1016\/j.inffus.2025.103290_b70","series-title":"European Conference on Computer Vision","first-page":"232","article-title":"Uncertainty calibration with energy based instance-wise scaling in the wild dataset","author":"Kim","year":"2025"},{"key":"10.1016\/j.inffus.2025.103290_b71","series-title":"Are graph neural networks miscalibrated?","author":"Teixeira","year":"2019"},{"key":"10.1016\/j.inffus.2025.103290_b72","first-page":"13775","article-title":"What makes graph neural networks miscalibrated?","volume":"35","author":"Hsu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103290_b73","doi-asserted-by":"crossref","unstructured":"M. Wang, H. Yang, Q. Cheng, Gcl: Graph calibration loss for trustworthy graph neural network, in: Proceedings of the 30th ACM International Conference on Multimedia, 2022, pp. 988\u2013996.","DOI":"10.1145\/3503161.3548423"},{"key":"10.1016\/j.inffus.2025.103290_b74","first-page":"23768","article-title":"Be confident! towards trustworthy graph neural networks via confidence calibration","volume":"34","author":"Wang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103290_b75","doi-asserted-by":"crossref","unstructured":"M. Wang, H. Yang, J. Huang, Q. Cheng, Moderate Message Passing Improves Calibration: A Universal Way to Mitigate Confidence Bias in Graph Neural Networks, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38, 2024, pp. 21681\u201321689.","DOI":"10.1609\/aaai.v38i19.30167"},{"key":"10.1016\/j.inffus.2025.103290_b76","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128294","article-title":"Exploring heterophily in calibration of graph neural networks","volume":"604","author":"Xie","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.inffus.2025.103290_b77","doi-asserted-by":"crossref","unstructured":"C. Yang, C. Yang, C. Shi, Y. Li, Z. Zhang, J. Zhou, Calibrating Graph Neural Networks from a Data-centric Perspective, in: Proceedings of the ACM on Web Conference 2024, 2024, pp. 745\u2013755.","DOI":"10.1145\/3589334.3645562"},{"key":"10.1016\/j.inffus.2025.103290_b78","series-title":"2016 Eighth International Conference on Quality of Multimedia Experience","first-page":"1","article-title":"Understanding how image quality affects deep neural networks","author":"Dodge","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b79","series-title":"Impact of regularization on calibration and robustness: from the representation space perspective","author":"Park","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b80","series-title":"Uncertainty estimation and quantification for LLMs: A simple supervised approach","author":"Liu","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b81","series-title":"Graph attention networks","author":"Veli\u010dkovi\u0107","year":"2017"},{"issue":"11","key":"10.1016\/j.inffus.2025.103290_b82","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1145\/3422622","article-title":"Generative adversarial networks","volume":"63","author":"Goodfellow","year":"2020","journal-title":"Commun. ACM"},{"key":"10.1016\/j.inffus.2025.103290_b83","first-page":"37","article-title":"Long short-term memory","author":"Graves","year":"2012","journal-title":"Supervised Seq. Label. Recurr. Neural Netw."},{"key":"10.1016\/j.inffus.2025.103290_b84","doi-asserted-by":"crossref","unstructured":"C. Lea, M.D. Flynn, R. Vidal, A. Reiter, G.D. Hager, Temporal convolutional networks for action segmentation and detection, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 156\u2013165.","DOI":"10.1109\/CVPR.2017.113"},{"key":"10.1016\/j.inffus.2025.103290_b85","doi-asserted-by":"crossref","unstructured":"Q. Xu, R. Zhang, Y. Zhang, Y. Wang, Q. Tian, A fourier-based framework for domain generalization, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 14383\u201314392.","DOI":"10.1109\/CVPR46437.2021.01415"},{"key":"10.1016\/j.inffus.2025.103290_b86","doi-asserted-by":"crossref","unstructured":"Y. Yang, S. Soatto, Fda: Fourier domain adaptation for semantic segmentation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 4085\u20134095.","DOI":"10.1109\/CVPR42600.2020.00414"},{"key":"10.1016\/j.inffus.2025.103290_b87","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"34","article-title":"Llcaps: Learning to illuminate low-light capsule endoscopy with curved wavelet attention and reverse diffusion","author":"Bai","year":"2023"},{"key":"10.1016\/j.inffus.2025.103290_b88","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2024.102387","article-title":"Dbfft: Adversarial-robust dual-branch frequency domain feature fusion in vision transformers","volume":"108","author":"Zeng","year":"2024","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.inffus.2025.103290_b89","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren, J. Sun, Deep residual learning for image recognition, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"2","key":"10.1016\/j.inffus.2025.103290_b90","doi-asserted-by":"crossref","first-page":"288","DOI":"10.1090\/S0273-0979-1993-00390-2","article-title":"Wavelet transforms versus Fourier transforms","volume":"28","author":"Strang","year":"1993","journal-title":"Bull. Am. Math. Soc."},{"key":"10.1016\/j.inffus.2025.103290_b91","doi-asserted-by":"crossref","unstructured":"J. Tan, S. Pei, W. Qin, B. Fu, X. Li, L. Huang, Wavelet-based Mamba with Fourier Adjustment for Low-light Image Enhancement, in: Proceedings of the Asian Conference on Computer Vision, 2024, pp. 3449\u20133464.","DOI":"10.1007\/978-981-96-0911-6_10"},{"issue":"20","key":"10.1016\/j.inffus.2025.103290_b92","first-page":"10","article-title":"Graph attention networks","volume":"1050","author":"Velickovic","year":"2017","journal-title":"Stat."},{"key":"10.1016\/j.inffus.2025.103290_b93","series-title":"Semi-supervised classification with graph convolutional networks","author":"Kipf","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b94","series-title":"The Semantic Web: 15th International Conference, ESWC 2018, Heraklion, Crete, Greece, June 3\u20137, 2018, Proceedings 15","first-page":"593","article-title":"Modeling relational data with graph convolutional networks","author":"Schlichtkrull","year":"2018"},{"key":"10.1016\/j.inffus.2025.103290_b95","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103290_b96","doi-asserted-by":"crossref","unstructured":"S. Mai, H. Hu, S. Xing, Modality to modality translation: An adversarial representation learning and graph fusion network for multimodal fusion, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 34, 2020, pp. 164\u2013172.","DOI":"10.1609\/aaai.v34i01.5347"},{"key":"10.1016\/j.inffus.2025.103290_b97","doi-asserted-by":"crossref","unstructured":"Y. Shang, C. Gao, J. Chen, D. Jin, H. Ma, Y. Li, Enhancing adversarial robustness of multi-modal recommendation via modality balancing, in: Proceedings of the 31st ACM International Conference on Multimedia, 2023, pp. 6274\u20136282.","DOI":"10.1145\/3581783.3612337"},{"key":"10.1016\/j.inffus.2025.103290_b98","doi-asserted-by":"crossref","unstructured":"J. Liu, X. Fan, Z. Huang, G. Wu, R. Liu, W. Zhong, Z. Luo, Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 5802\u20135811.","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"10.1016\/j.inffus.2025.103290_b99","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2022.102535","article-title":"Adversarial multimodal fusion with attention mechanism for skin lesion classification using clinical and dermoscopic images","volume":"81","author":"Wang","year":"2022","journal-title":"Med. Image Anal."},{"issue":"6","key":"10.1016\/j.inffus.2025.103290_b100","doi-asserted-by":"crossref","first-page":"1075","DOI":"10.1007\/s11548-024-03101-6","article-title":"Multimodal semi-supervised learning for online recognition of multi-granularity surgical workflows","volume":"19","author":"Yamada","year":"2024","journal-title":"Int. J. Comput. Assist. Radiol. Surg."},{"key":"10.1016\/j.inffus.2025.103290_b101","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.inffus.2025.103290_b102","series-title":"On the properties of neural machine translation: Encoder-decoder approaches","author":"Cho","year":"2014"},{"key":"10.1016\/j.inffus.2025.103290_b103","series-title":"Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8-10 and 15-16, 2016, Proceedings, Part III 14","first-page":"47","article-title":"Temporal convolutional networks: A unified approach to action segmentation","author":"Lea","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b104","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"606","article-title":"Surgformer: Surgical transformer with hierarchical temporal attention for surgical phase recognition","author":"Yang","year":"2024"},{"key":"10.1016\/j.inffus.2025.103290_b105","doi-asserted-by":"crossref","DOI":"10.1016\/j.media.2024.103126","article-title":"On the pitfalls of batch normalization for end-to-end video learning: a study on surgical workflow analysis","volume":"94","author":"Rivoir","year":"2024","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.inffus.2025.103290_b106","series-title":"2016 IEEE International Conference on Robotics and Automation","first-page":"1642","article-title":"Learning convolutional action primitives for fine-grained action recognition","author":"Lea","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b107","series-title":"Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11-14, 2016, Proceedings, Part III 14","first-page":"36","article-title":"Segmental spatiotemporal cnns for fine-grained action segmentation","author":"Lea","year":"2016"},{"key":"10.1016\/j.inffus.2025.103290_b108","series-title":"2022 IEEE International Conference on Multimedia and Expo","first-page":"1","article-title":"Cat: Cross attention in vision transformer","author":"Lin","year":"2022"},{"key":"10.1016\/j.inffus.2025.103290_b109","article-title":"Bilinear attention networks","volume":"31","author":"Kim","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.inffus.2025.103290_b110","doi-asserted-by":"crossref","unstructured":"P. Anderson, X. He, C. Buehler, D. Teney, M. Johnson, S. Gould, L. Zhang, Bottom-up and top-down attention for image captioning and visual question answering, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 6077\u20136086.","DOI":"10.1109\/CVPR.2018.00636"},{"key":"10.1016\/j.inffus.2025.103290_b111","doi-asserted-by":"crossref","unstructured":"Y. Dai, F. Gieseke, S. Oehmcke, Y. Wu, K. Barnard, Attentional feature fusion, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2021, pp. 3560\u20133569.","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"10.1016\/j.inffus.2025.103290_b112","series-title":"Benchmarking neural network robustness to common corruptions and perturbations","author":"Hendrycks","year":"2019"}],"container-title":["Information Fusion"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156625352500363X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156625352500363X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T21:37:09Z","timestamp":1762551429000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S156625352500363X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":112,"alternative-id":["S156625352500363X"],"URL":"https:\/\/doi.org\/10.1016\/j.inffus.2025.103290","relation":{},"ISSN":["1566-2535"],"issn-type":[{"value":"1566-2535","type":"print"}],"subject":[],"published":{"date-parts":[[2025,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Multimodal graph representation learning for robust surgical workflow recognition with adversarial feature disentanglement","name":"articletitle","label":"Article Title"},{"value":"Information Fusion","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.inffus.2025.103290","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"103290"}}