{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:13:19Z","timestamp":1771697599661,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,7,9]],"date-time":"2021-07-09T00:00:00Z","timestamp":1625788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,9]],"date-time":"2021-07-09T00:00:00Z","timestamp":1625788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s10489-021-02554-5","type":"journal-article","created":{"date-parts":[[2021,7,9]],"date-time":"2021-07-09T06:02:40Z","timestamp":1625810560000},"page":"3691-3704","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Improving sample efficiency in Multi-Agent Actor-Critic methods"],"prefix":"10.1007","volume":"52","author":[{"given":"Zhenhui","family":"Ye","sequence":"first","affiliation":[]},{"given":"Yining","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xiaohong","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Guanghua","family":"Song","sequence":"additional","affiliation":[]},{"given":"Bowei","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Sheng","family":"Fan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,9]]},"reference":[{"issue":"7587","key":"2554_CR1","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver D, Huang A, Maddison CJ, Guez A, Sifre L, Van Den Driessche G, Schrittwieser J, Antonoglou I, Panneershelvam V, Lanctot M, et al (2016) Mastering the game of go with deep neural networks and tree search. Nature 529(7587):484","journal-title":"Nature"},{"key":"2554_CR2","doi-asserted-by":"crossref","unstructured":"Chebotar Y, Kalakrishnan M, Yahya A, Li A, Schaal S, Levine S (2017) Path integral guided policy search. In: 2017 IEEE international conference on robotics and automation (ICRA). IEEE, pp 3381\u20133388","DOI":"10.1109\/ICRA.2017.7989384"},{"issue":"1","key":"2554_CR3","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine S, Finn C, Darrell T, Abbeel P (2016) End-to-end training of deep visuomotor policies. J Mach Learn Res 17(1):1334\u20131373","journal-title":"J Mach Learn Res"},{"issue":"4","key":"2554_CR4","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1016\/j.trb.2004.03.005","volume":"39","author":"JL Adler","year":"2005","unstructured":"Adler JL, Satapathy G, Manikonda V, Bowles B, Blue VJ (2005) A multi-agent approach to cooperative traffic management and route guidance. Transp Res B 39(4):297\u2013318","journal-title":"Transp Res B"},{"key":"2554_CR5","doi-asserted-by":"crossref","unstructured":"Liu CH, Ma X, Gao X, Tang J (2019) Distributed energy-efficient multi-uav navigation for long-term communication coverage by deep reinforcement learning. IEEE Trans Mob Comput","DOI":"10.1109\/TMC.2019.2908171"},{"issue":"5","key":"2554_CR6","doi-asserted-by":"publisher","first-page":"10026","DOI":"10.3390\/s150510026","volume":"15","author":"D Ye","year":"2015","unstructured":"Ye D, Zhang M, Yang Y (2015) A multi-agent framework for packet routing in wireless sensor networks. Sensors 15(5):10026\u201310047","journal-title":"Sensors"},{"issue":"746-752","key":"2554_CR7","first-page":"2","volume":"1998","author":"C Claus","year":"1998","unstructured":"Claus C, Boutilier C (1998) The dynamics of reinforcement learning in cooperative multiagent systems. AAAI\/IAAI 1998(746-752):2","journal-title":"AAAI\/IAAI"},{"key":"2554_CR8","unstructured":"Lowe R, Wu YI, Tamar A, Harb J, Abbeel OP, Mordatch I (2017) Multi-agent actor-critic for mixed cooperative-competitive environments. In: Advances in neural information processing systems, pp 6379\u20136390"},{"key":"2554_CR9","unstructured":"Iqbal S, Sha F (2019) Actor-attention-critic for multi-agent reinforcement learning. In: ICML"},{"key":"2554_CR10","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang W, Hu Y, Hao J, Chen X, Gao Y (2020) Multi-agent game abstraction via graph attention neural network.. In: AAAI, pp 7211\u20137218","DOI":"10.1609\/aaai.v34i05.6211"},{"issue":"12","key":"2554_CR11","doi-asserted-by":"publisher","first-page":"4195","DOI":"10.1007\/s10489-020-01755-8","volume":"50","author":"H Chen","year":"2020","unstructured":"Chen H, Liu Y, Zhou Z, Hu D, Zhang M (2020) Gama: Graph attention multi-agent reinforcement learning algorithm for cooperation. Appl Intell 50(12):4195\u20134205","journal-title":"Appl Intell"},{"key":"2554_CR12","doi-asserted-by":"crossref","unstructured":"Foerster J, Farquhar G, Afouras T, Nardelli N, Whiteson S (2017) Counterfactual multi-agent policy gradients","DOI":"10.1609\/aaai.v32i1.11794"},{"key":"2554_CR13","unstructured":"Ackermann J, Gabler V, Osa T, Sugiyama M (2019) Reducing overestimation bias in multi-agent domains using double centralized critics. arXiv:1910.01465"},{"key":"2554_CR14","unstructured":"Laskin M, Lee K, Stooke A, Pinto L, Abbeel P, Srinivas A (2020) Reinforcement learning with augmented data. arXiv:2004.14990"},{"issue":"11","key":"2554_CR15","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. Proc IEEE 86(11):2278\u20132324","journal-title":"Proc IEEE"},{"key":"2554_CR16","doi-asserted-by":"crossref","unstructured":"Pinto L, Andrychowicz M, Welinder P, Zaremba W, Abbeel P (2017) Asymmetric actor critic for image-based robot learning. arXiv:1710.06542","DOI":"10.15607\/RSS.2018.XIV.008"},{"key":"2554_CR17","unstructured":"Cobbe K, Klimov O, Hesse C, Kim T, Schulman J (2019) Quantifying generalization in reinforcement learning. In: International Conference on Machine Learning. PMLR, pp 1282\u20131289"},{"key":"2554_CR18","unstructured":"Mnih V, Badia AP, Mirza M, Graves A, Lillicrap T, Harley T, Silver D, Kavukcuoglu K (2016) Asynchronous methods for deep reinforcement learning. In: International conference on machine learning, pp 1928\u20131937"},{"key":"2554_CR19","first-page":"1587","volume":"80","author":"S Fujimoto","year":"2018","unstructured":"Fujimoto S, van Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. Proc Mach Learn Res 80:1587\u20131596","journal-title":"Proc Mach Learn Res"},{"key":"2554_CR20","doi-asserted-by":"crossref","unstructured":"Peters J, Schaal S (2006) Policy gradient methods for robotics. In: 2006 IEEE\/RSJ International Conference on Intelligent Robots and Systems. IEEE, pp 2219\u20132225","DOI":"10.1109\/IROS.2006.282564"},{"key":"2554_CR21","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: ICML"},{"key":"2554_CR22","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. In: Advances in neural information processing systems, pp 5998\u20136008"},{"key":"2554_CR23","unstructured":"Liu I-J, Yeh RA, Schwing AG (2019) Pic: Permutation invariant critic for multi-agent deep reinforcement learning. arXiv:1911.00025"},{"key":"2554_CR24","unstructured":"Estrach JB, Zaremba W, Szlam A, LeCun Y (2014) Spectral networks and deep locally connected networks on graphs. In: 2nd International Conference on Learning Representations, ICLR 2014"},{"key":"2554_CR25","unstructured":"Schaul T, Quan J, Antonoglou I, Silver D (2015) Prioritized experience replay. arXiv:1511.05952"},{"key":"2554_CR26","unstructured":"Horgan D, Quan J, Budden D, Barth-Maron G, Hessel M, Van Hasselt H, Silver D (2018) Distributed prioritized experience replay. arXiv:1803.00933"},{"key":"2554_CR27","doi-asserted-by":"crossref","unstructured":"Aotani T, Kobayashi T, Sugimoto K (2021) Bottom-up multi-agent reinforcement learning by reward shaping for cooperative-competitive tasks. In: Appl Intell:1\u201319","DOI":"10.1007\/s10489-020-02034-2"},{"key":"2554_CR28","unstructured":"Srinivas A, Laskin M, Abbeel P (2020) Curl: Contrastive unsupervised representations for reinforcement learning. arXiv:2004.04136"},{"key":"2554_CR29","doi-asserted-by":"crossref","unstructured":"Littman ML (1994) Markov games as a framework for multi-agent reinforcement learning. In: Machine learning proceedings 1994. Elsevier, pp 157\u2013163","DOI":"10.1016\/B978-1-55860-335-6.50027-1"},{"issue":"3-4","key":"2554_CR30","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/BF00992698","volume":"8","author":"CJCH Watkins","year":"1992","unstructured":"Watkins CJCH, Dayan P (1992) Q-learning. Mach Learn 8(3-4):279\u2013292","journal-title":"Mach Learn"},{"issue":"7540","key":"2554_CR31","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Rusu AA, Veness J, Bellemare MG, Graves A, Riedmiller M, Fidjeland AK, Ostrovski G, et al (2015) Human-level control through deep reinforcement learning. Nature 518(7540):529\u2013533","journal-title":"Nature"},{"issue":"3-4","key":"2554_CR32","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1007\/BF00992699","volume":"8","author":"L-J Lin","year":"1992","unstructured":"Lin L-J (1992) Self-improving reactive agents based on reinforcement learning, planning and teaching. Mach Learn 8(3-4):293\u2013321","journal-title":"Mach Learn"},{"key":"2554_CR33","unstructured":"Lillicrap T, Hunt J, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. CoRR"},{"key":"2554_CR34","doi-asserted-by":"crossref","unstructured":"Mordatch I, Abbeel P (2018) Emergence of grounded compositional language in multi-agent populations. In: Thirty-Second AAAI Conference on Artificial Intelligenc","DOI":"10.1609\/aaai.v32i1.11492"},{"issue":"5","key":"2554_CR35","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1109\/TRA.2002.804040","volume":"18","author":"R Vidal","year":"2002","unstructured":"Vidal R, Shakernia O, Kim HJ, Shim DH, Sastry S (2002) Probabilistic pursuit-evasion games: theory, implementation, and experimental evaluation. IEEE Trans Robot Autom 18(5):662\u2013669","journal-title":"IEEE Trans Robot Autom"},{"key":"2554_CR36","doi-asserted-by":"crossref","unstructured":"Gupta JK, Egorov M, Kochenderfer M (2017) Cooperative multi-agent control using deep reinforcement learning. In: International Conference on Autonomous Agents and Multiagent Systems. Springer, pp 66\u201383","DOI":"10.1007\/978-3-319-71682-4_5"},{"issue":"3731","key":"2554_CR37","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1126\/science.153.3731.34","volume":"153","author":"R Bellman","year":"1966","unstructured":"Bellman R (1966) Dynamic programming. Science 153(3731):34\u201337","journal-title":"Science"},{"key":"2554_CR38","unstructured":"Kingma DP, Ba JL Adam: Amethod for stochastic optimization"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02554-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-021-02554-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-021-02554-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,3]],"date-time":"2023-01-03T12:36:16Z","timestamp":1672749376000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-021-02554-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,9]]},"references-count":38,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["2554"],"URL":"https:\/\/doi.org\/10.1007\/s10489-021-02554-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,9]]},"assertion":[{"value":"21 May 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 July 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}