{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T16:22:49Z","timestamp":1777738969580,"version":"3.51.4"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T00:00:00Z","timestamp":1761955200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100014219","name":"National Science Fund for Distinguished Young Scholars","doi-asserted-by":"publisher","award":["62025602"],"award-info":[{"award-number":["62025602"]}],"id":[{"id":"10.13039\/501100014219","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22B2036"],"award-info":[{"award-number":["U22B2036"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100031931","name":"Shanghai Artificial Intelligence Laboratory","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100031931","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Artificial Intelligence"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1016\/j.artint.2025.104392","type":"journal-article","created":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T07:57:36Z","timestamp":1752566256000},"page":"104392","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["Provably efficient information-directed sampling algorithms for multi-agent reinforcement learning"],"prefix":"10.1016","volume":"348","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6114-8453","authenticated-orcid":false,"given":"Qiaosheng","family":"Zhang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8379-9385","authenticated-orcid":false,"given":"Chenjia","family":"Bai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1908-1344","authenticated-orcid":false,"given":"Shuyue","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Zhen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Xuelong","family":"Li","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.artint.2025.104392_br0010","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s11721-012-0075-2","article-title":"Swarm robotics: a review from the swarm engineering perspective","volume":"7","author":"Brambilla","year":"2013","journal-title":"Swarm Intell."},{"key":"10.1016\/j.artint.2025.104392_br0020","author":"Shalev-Shwartz"},{"issue":"7587","key":"10.1016\/j.artint.2025.104392_br0030","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"key":"10.1016\/j.artint.2025.104392_br0040","first-page":"2159","article-title":"Near-optimal reinforcement learning with self-play","volume":"33","author":"Bai","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0050","article-title":"V-learning\u2014a simple, efficient, decentralized algorithm for multiagent reinforcement learning","author":"Jin","year":"2023","journal-title":"Math. Oper. Res."},{"key":"10.1016\/j.artint.2025.104392_br0060","series-title":"International Conference on Learning Representations","article-title":"Towards general function approximation in zero-sum Markov games","author":"Huang","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0070","series-title":"International Conference on Machine Learning","first-page":"7001","article-title":"A sharp analysis of model-based reinforcement learning with self-play","author":"Liu","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0080","series-title":"International Conference on Machine Learning","first-page":"24496","article-title":"A self-play posterior sampling algorithm for zero-sum Markov games","author":"Xiong","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0090","series-title":"Advances in Neural Information Processing Systems","article-title":"Posterior sampling for competitive RL: function approximation and partial observation","author":"Qiu","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0100","series-title":"Advances in Neural Information Processing Systems","first-page":"1583","article-title":"Learning to optimize via information-directed sampling","author":"Russo","year":"2014"},{"issue":"1","key":"10.1016\/j.artint.2025.104392_br0110","doi-asserted-by":"crossref","first-page":"230","DOI":"10.1287\/opre.2017.1663","article-title":"Learning to optimize via information-directed sampling","volume":"66","author":"Russo","year":"2018","journal-title":"Oper. Res."},{"key":"10.1016\/j.artint.2025.104392_br0120","doi-asserted-by":"crossref","first-page":"28575","DOI":"10.52202\/068431-2071","article-title":"Regret bounds for information-directed reinforcement learning","volume":"35","author":"Hao","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"1","key":"10.1016\/j.artint.2025.104392_br0130","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1561\/2200000070","article-title":"A tutorial on Thompson sampling","volume":"11","author":"Russo","year":"2018","journal-title":"Found. Trends Mach. Learn."},{"key":"10.1016\/j.artint.2025.104392_br0140","series-title":"International Conference on Learning Representations","article-title":"Information-directed exploration for deep reinforcement learning","author":"Nikolov","year":"2019"},{"issue":"6","key":"10.1016\/j.artint.2025.104392_br0150","doi-asserted-by":"crossref","first-page":"733","DOI":"10.1561\/2200000097","article-title":"Reinforcement learning, bit by bit","volume":"16","author":"Lu","year":"2023","journal-title":"Found. Trends Mach. Learn."},{"key":"10.1016\/j.artint.2025.104392_br0160","series-title":"Conference on Learning Theory","first-page":"358","article-title":"Information directed sampling and bandits with heteroscedastic noise","author":"Kirschner","year":"2018"},{"key":"10.1016\/j.artint.2025.104392_br0170","series-title":"Advances in Neural Information Processing Systems","first-page":"16738","article-title":"Information directed sampling for sparse linear bandits","author":"Hao","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0180","article-title":"Exploration in deep reinforcement learning: from single-agent to multiagent domain","author":"Hao","year":"2023","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0190","series-title":"Rate-Distortion Theory","author":"Berger","year":"2003"},{"key":"10.1016\/j.artint.2025.104392_br0200","series-title":"International Conference on Machine Learning","first-page":"373","article-title":"Deciding what to learn: a rate-distortion approach","author":"Arumugam","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0210","first-page":"9816","article-title":"The value of information when deciding what to learn","volume":"34","author":"Arumugam","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0220","doi-asserted-by":"crossref","first-page":"9024","DOI":"10.52202\/068431-0656","article-title":"Deciding what to model: value-equivalent sampling for reinforcement learning","volume":"35","author":"Arumugam","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0230","doi-asserted-by":"crossref","DOI":"10.1016\/j.artint.2023.104048","article-title":"Pessimistic value iteration for multi-task data sharing in offline reinforcement learning","volume":"326","author":"Bai","year":"2024","journal-title":"Artif. Intell."},{"key":"10.1016\/j.artint.2025.104392_br0240","series-title":"International Conference on Machine Learning","first-page":"551","article-title":"Provable self-play algorithms for competitive reinforcement learning","author":"Bai","year":"2020"},{"key":"10.1016\/j.artint.2025.104392_br0250","series-title":"Conference on Learning Theory","first-page":"3674","article-title":"Learning zero-sum simultaneous-move Markov games using function approximation and correlated equilibrium","author":"Xie","year":"2020"},{"issue":"1","key":"10.1016\/j.artint.2025.104392_br0260","first-page":"165","article-title":"Provably efficient reinforcement learning in decentralized general-sum Markov games","volume":"13","author":"Mao","year":"2023","journal-title":"Dyn. Games Appl."},{"key":"10.1016\/j.artint.2025.104392_br0270","series-title":"International Conference on Learning Representations","article-title":"When can we learn general-sum Markov games with a large number of players sample-efficiently?","author":"Song","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0280","series-title":"Handbook of Reinforcement Learning and Control","first-page":"321","article-title":"Multi-agent reinforcement learning: a selective overview of theories and algorithms","author":"Zhang","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0290","series-title":"International Conference on Machine Learning","first-page":"8715","article-title":"Provably efficient fictitious play policy optimization for zero-sum Markov games with structured transitions","author":"Qiu","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0300","series-title":"International Conference on Algorithmic Learning Theory","first-page":"227","article-title":"Almost optimal algorithms for two-player zero-sum linear mixture Markov games","author":"Chen","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0310","series-title":"International Conference on Machine Learning","first-page":"10251","article-title":"The power of exploiter: provable multi-agent rl in large state spaces","author":"Jin","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0320","series-title":"Advances in Neural Information Processing Systems","article-title":"Maximize to explore: one objective function fusing estimation, planning, and exploration","author":"Liu","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0330","series-title":"The Twelfth International Conference on Learning Representations (ICLR)","article-title":"On the role of general function approximation in offline reinforcement learning","author":"Mao","year":"2024"},{"key":"10.1016\/j.artint.2025.104392_br0340","series-title":"Advances in Neural Information Processing Systems","first-page":"18530","article-title":"Sample-efficient reinforcement learning of undercomplete pomdps","author":"Jin","year":"2020"},{"key":"10.1016\/j.artint.2025.104392_br0350","article-title":"Modelling the dynamics of multiagent q-learning in repeated symmetric games: a mean field theoretic approach","volume":"32","author":"Hu","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0360","series-title":"Proceedings of the 21st International Conference on Autonomous Agents and Multiagent Systems","doi-asserted-by":"crossref","first-page":"615","DOI":"10.65109\/MLBH9101","article-title":"The dynamics of q-learning in population games: a physics-inspired continuity equation model","author":"Hu","year":"2022"},{"key":"10.1016\/j.artint.2025.104392_br0370","first-page":"18296","article-title":"Sample-efficient reinforcement learning of partially observable Markov games","volume":"35","author":"Liu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0380","series-title":"The Thirty Sixth Annual Conference on Learning Theory","first-page":"2678","article-title":"On the complexity of multi-agent decision making: from learning in games to partial monitoring","author":"Foster","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0390","doi-asserted-by":"crossref","DOI":"10.52202\/075280-3374","article-title":"The best of both worlds in network population games: reaching consensus and convergence to equilibrium","volume":"36","author":"Hu","year":"2023","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0400","series-title":"Proceedings of Thirty Sixth Conference on Learning Theory","article-title":"Breaking the curse of multiagency: provably efficient decentralized multi-agent RL with function approximation","author":"Wang","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0410","series-title":"The Thirty Sixth Annual Conference on Learning Theory","first-page":"2651","article-title":"Breaking the curse of multiagents in a large state space: RL in Markov games with independent linear function approximation","author":"Cui","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0420","author":"Xiong"},{"key":"10.1016\/j.artint.2025.104392_br0430","series-title":"AAAI Conference on Artificial Intelligence","article-title":"Information directed sampling for stochastic bandits with graph feedback","author":"Liu","year":"2018"},{"key":"10.1016\/j.artint.2025.104392_br0440","series-title":"Conference on Learning Theory","first-page":"2777","article-title":"Asymptotically optimal information-directed sampling","author":"Kirschner","year":"2021"},{"key":"10.1016\/j.artint.2025.104392_br0450","article-title":"Information-theoretic confidence bounds for reinforcement learning","volume":"33","author":"Lu","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0460","series-title":"Advances in Neural Information Processing Systems","article-title":"Improved Bayesian regret bounds for Thompson sampling in reinforcement learning","author":"Moradipari","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0470","series-title":"International Conference on Machine Learning","first-page":"3949","article-title":"Steering: Stein information directed exploration for model-based reinforcement learning","author":"Chakraborty","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0480","first-page":"5541","article-title":"The value equivalence principle for model-based reinforcement learning","volume":"33","author":"Grimm","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0490","first-page":"7773","article-title":"Proper value equivalence","volume":"34","author":"Grimm","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.artint.2025.104392_br0500","author":"Foster"},{"key":"10.1016\/j.artint.2025.104392_br0510","series-title":"International Conference on Machine Learning","first-page":"38768","article-title":"Bayesian design principles for frequentist sequential learning","author":"Xu","year":"2023"},{"key":"10.1016\/j.artint.2025.104392_br0520","series-title":"Elements of Information Theory","author":"Cover","year":"1999"},{"key":"10.1016\/j.artint.2025.104392_br0530","series-title":"Competitive Markov Decision Processes","author":"Filar","year":"2012"},{"issue":"4","key":"10.1016\/j.artint.2025.104392_br0540","first-page":"382","article-title":"Bayesian model averaging: a tutorial","volume":"14","author":"Hoeting","year":"1999","journal-title":"Stat. Sci."},{"key":"10.1016\/j.artint.2025.104392_br0550","series-title":"Advances in Neural Information Processing Systems","article-title":"Epistemic neural networks","author":"Osband","year":"2023"},{"issue":"4","key":"10.1016\/j.artint.2025.104392_br0560","doi-asserted-by":"crossref","first-page":"460","DOI":"10.1109\/TIT.1972.1054855","article-title":"Computation of channel capacity and rate-distortion functions","volume":"18","author":"Blahut","year":"1972","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"1","key":"10.1016\/j.artint.2025.104392_br0570","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1109\/TIT.1972.1054753","article-title":"An algorithm for computing the capacity of arbitrary discrete memoryless channels","volume":"18","author":"Arimoto","year":"1972","journal-title":"IEEE Trans. Inf. Theory"}],"container-title":["Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370225001110?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0004370225001110?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T11:23:22Z","timestamp":1777461802000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0004370225001110"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11]]},"references-count":57,"alternative-id":["S0004370225001110"],"URL":"https:\/\/doi.org\/10.1016\/j.artint.2025.104392","relation":{},"ISSN":["0004-3702"],"issn-type":[{"value":"0004-3702","type":"print"}],"subject":[],"published":{"date-parts":[[2025,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Provably efficient information-directed sampling algorithms for multi-agent reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.artint.2025.104392","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"104392"}}