{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,1,25]],"date-time":"2025-01-25T05:05:55Z","timestamp":1737781555302,"version":"3.33.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,24]],"date-time":"2025-01-24T00:00:00Z","timestamp":1737676800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,24]],"date-time":"2025-01-24T00:00:00Z","timestamp":1737676800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach. Intell. Res."],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s11633-023-1467-z","type":"journal-article","created":{"date-parts":[[2025,1,24]],"date-time":"2025-01-24T04:21:25Z","timestamp":1737692485000},"page":"160-175","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Prioritization Hindsight Experience Based on Spatial Position Attention for Robots"],"prefix":"10.1007","volume":"22","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-0025-8051","authenticated-orcid":false,"given":"Ye","family":"Yuan","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4521-2077","authenticated-orcid":false,"given":"Yu","family":"Sha","sequence":"additional","affiliation":[]},{"given":"Feixiang","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Haofan","family":"Lu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2619-6481","authenticated-orcid":false,"given":"Shuiping","family":"Gou","sequence":"additional","affiliation":[]},{"given":"Jie","family":"Luo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,24]]},"reference":[{"key":"1467_CR1","first-page":"839","volume-title":"Proceedings of the 2nd Conference on Robot Learning","author":"R Kaushik","year":"2018","unstructured":"R. Kaushik, K. Chatzilygeroudis, J. B. Mouret. Multi-objective model-based policy search for data-efficient learning with sparse rewards. In Proceedings of the 2nd Conference on Robot Learning, Z\u00fcrich, Switzerland, vol.87, pp.839\u2013855, [Online], Available: https:\/\/arxiv.org\/abs\/1806.09351, 2018."},{"key":"1467_CR2","doi-asserted-by":"publisher","first-page":"6292","DOI":"10.1109\/ICRA.2018.8463162","volume-title":"Proceedings of IEEE International Conference on Robotics and Automation","author":"A Nair","year":"2018","unstructured":"A. Nair, B. McGrew, M. Andrychowicz, W. Zaremba, P. Abbeel. Overcoming exploration in reinforcement learning with demonstrations. In Proceedings of IEEE International Conference on Robotics and Automation, Brisbane, Australia, pp. 6292\u20136299, 2018. DOI: https:\/\/doi.org\/10.1109\/ICRA.2018.8463162."},{"issue":"7540","key":"1467_CR3","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"V. Mnih, K. Kavukcuoglu, D. Silver, A. A. Rusu, J. Veness, M. G. Bellemare, A. Graves, M. Riedmiller, A. K. Fidjeland, G. Ostrovski, S. Petersen, C. Beattie, A. Sadik, I. Antonoglou, H. King, D. Kumaran, D. Wierstra, S. Legg, D. Hassabis. Human-level control through deep reinforcement learning. Nature, vol.518, no. 7540, pp.529\u2013533, 2015. DOI: https:\/\/doi.org\/10.1038\/nature14236.","journal-title":"Nature"},{"key":"1467_CR4","first-page":"2778","volume-title":"Proceedings of the 34th International Conference on Machine Learning","author":"D Pathak","year":"2017","unstructured":"D. Pathak, P. Agrawal, A. A. Efros, T. Darrell. Curiosity-driven exploration by self-supervised prediction. In Proceedings of the 34th International Conference on Machine Learning, Sydney, Australia, pp. 2778\u20132787, 2017."},{"key":"1467_CR5","first-page":"7553","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"R Zhao","year":"2019","unstructured":"R. Zhao, X. D. Sun, V. Tresp. Maximum entropy-regularized multi-goal reinforcement learning. In Proceedings of the 36th International Conference on Machine Learning, Long Beach, USA, pp. 7553\u20137562, 2019."},{"key":"1467_CR6","first-page":"278","volume-title":"Proceedings of the 16th International Conference on Machine Learning","author":"A Y Ng","year":"1999","unstructured":"A. Y. Ng, D. Harada, S. J. Russell. Policy invariance under reward transformations: Theory and application to reward shaping. In Proceedings of the 16th International Conference on Machine Learning, Bled, Slovenia, pp. 278\u2013287, 1999."},{"key":"1467_CR7","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems","author":"Y J Hu","year":"2020","unstructured":"Y. J. Hu, W. X. Wang, H. T. Jia, Y. X. Wang, Y. F. Chen, J. Y. Hao, F. Wu, C. J. Fan. Learning to utilize shaping rewards: A new approach of reward shaping. In Proceedings of the 34th International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1336, 2020."},{"key":"1467_CR8","first-page":"5055","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems","author":"M Andrychowicz","year":"2017","unstructured":"M. Andrychowicz, F. Wolski, A. Ray, J. Schneider, R. Fong, P. Welinder, B. McGrew, J. Tobin, P. Abbeel, W. Zaremba. Hindsight experience replay. In Proceedings of the 31st International Conference on Neural Information Processing Systems, Long Beach, USA, pp. 5055\u20135065, 2017."},{"key":"1467_CR9","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1613\/jair.301","volume":"4","author":"L P Kaelbling","year":"1996","unstructured":"L. P. Kaelbling, M. L. Littman, A. W. Moore. Reinforcement learning: A survey. Journal of Artificial Intelligence Research, vol.4, pp. 237\u2013285, 1996. DOI: https:\/\/doi.org\/10.1613\/jair.301.","journal-title":"Journal of Artificial Intelligence Research"},{"issue":"8","key":"1467_CR10","doi-asserted-by":"publisher","first-page":"3796","DOI":"10.1109\/TNNLS.2021.3124466","volume":"34","author":"L C Garaffa","year":"2023","unstructured":"L. C. Garaffa, M. Basso, A. A. Konzen, E. P. de Freitas. Reinforcement learning for mobile robotics exploration: A survey. IEEE Transactions on Neural Networks and Learning Systems, vol.34, no.8, pp. 3796\u20133810, 2023. DOI: https:\/\/doi.org\/10.1109\/TNNLS.2021.3124466.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"1467_CR11","volume-title":"Incentivizing exploration in reinforcement learning with deep predictive models","author":"B C Stadie","year":"2015","unstructured":"B. C. Stadie, S. Levine, P. Abbeel. Incentivizing exploration in reinforcement learning with deep predictive models, [Online], Available: https:\/\/arxiv.org\/abs\/1507.00814, 2015."},{"key":"1467_CR12","first-page":"17","volume":"3","author":"R McFarlane","year":"2018","unstructured":"R. McFarlane. A survey of exploration strategies in reinforcement learning. McGill University, vol.3, pp. 17\u201318, 2018.","journal-title":"McGill University"},{"key":"1467_CR13","series-title":"Ph. D. dissertation","volume-title":"Theory and Application of Reward Shaping in Reinforcement Learning","author":"A D Laud","year":"2004","unstructured":"A. D. Laud. Theory and Application of Reward Shaping in Reinforcement Learning, Ph. D. dissertation, University of Illinois at Urbana-Champaign, USA, 2004."},{"issue":"4","key":"1467_CR14","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1016\/j.neunet.2010.01.001","volume":"23","author":"M Grzes","year":"2010","unstructured":"M. Grzes, D. Kudenko. Online learning of shaping rewards in reinforcement learning. Neural networks, vol.23, no.4, pp. 541\u2013550, 2010. DOI: https:\/\/doi.org\/10.1016\/j.neunet.2010.01.001.","journal-title":"Neural networks"},{"issue":"5","key":"1467_CR15","doi-asserted-by":"publisher","first-page":"674","DOI":"10.26599\/TST.2021.9010012","volume":"26","author":"K Zhu","year":"2021","unstructured":"K. Zhu, T. Zhang. Deep reinforcement learning based mobile robot navigation: A review. Tsinghua Science and Technology, vol.26, no. 5, pp. 674\u2013691, 2021. DOI: https:\/\/doi.org\/10.26599\/TST.2021.9010012.","journal-title":"Tsinghua Science and Technology"},{"key":"1467_CR16","volume-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"M Plappert","year":"2018","unstructured":"M. Plappert, M. Andrychowicz, A. Ray, B. McGrew, B. Baker, G. Powell, J. Schneider, J. Tobin, M. Chociej, P. Welinder, V. Kumar, W. Zaremba. Multi-goal reinforcement learning: Challenging robotics environments and request for research, [Online], Available: https:\/\/arxiv.org\/abs\/1802.09464, 2018."},{"key":"1467_CR17","volume-title":"ARCHER: Aggressive rewards to counter bias in hindsight experience replay","author":"S Lanka","year":"2018","unstructured":"S. Lanka, T. F. Wu. ARCHER: Aggressive rewards to counter bias in hindsight experience replay, [Online], Available: https:\/\/arxiv.org\/abs\/1809.02070, 2018."},{"key":"1467_CR18","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"P E Rauber","year":"2019","unstructured":"P. E. Rauber, A. Ummadisingu, F. Mutz, J. Schmidhuber. Hindsight policy gradients. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1467_CR19","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"M Fang","year":"2019","unstructured":"M. Fang, C. Zhou, B. Shi, B. Q. Gong, J. Xu, T. Zhang. DHER: Hindsight experience replay for dynamic goals. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1467_CR20","volume-title":"Proceedings of the 7th International Conference on Learning Representations","author":"H Liu","year":"2019","unstructured":"H. Liu, A. Trott, R. Socher, C. M. Xiong. Competitive experience replay. In Proceedings of the 7th International Conference on Learning Representations, New Orleans, USA, 2019."},{"key":"1467_CR21","volume-title":"Curiosity-driven experience prioritization via density estimation","author":"R Zhao","year":"2019","unstructured":"R. Zhao, V. Tresp. Curiosity-driven experience prioritization via density estimation, [Online], Available: https:\/\/arxiv.org\/abs\/1902.08039, 2019."},{"key":"1467_CR22","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems","author":"M Fang","year":"2019","unstructured":"M. Fang, T. Y. Zhou, Y. L. Du, L. Han, Z. Y. Zhang. Curriculum-guided hindsight experience replay. In Proceedings of the 33rd International Conference on Neural Information Processing Systems, Vancouver, Canada, Article number 1131, 2019."},{"key":"1467_CR23","first-page":"113","volume-title":"Proceedings of the 2nd Annual Conference on Robot Learning","author":"R Zhao","year":"2018","unstructured":"R. Zhao, V. Tresp. Energy-based hindsight experience prioritization. In Proceedings of the 2nd Annual Conference on Robot Learning, Z\u00fcrich, Switzerland, pp. 113\u2013122, 2018."},{"key":"1467_CR24","first-page":"1008","volume-title":"Proceedings of the 12th International Conference on Neural Information Processing Systems","author":"V R Konda","year":"2000","unstructured":"V. R. Konda, J. N. Tsitsiklis. Actor-critic algorithms. In Proceedings of the 12th International Conference on Neural Information Processing Systems, Denver, USA, pp.1008\u20131014, 2000."},{"issue":"4","key":"1467_CR25","doi-asserted-by":"publisher","first-page":"1143","DOI":"10.1137\/S0363012901385691","volume":"42","author":"V R Konda","year":"2003","unstructured":"V. R. Konda, J. N. Tsitsiklis. Onactor-critic algorithms. SIAM Journal on Control and Optimization, vol.42, no.4, pp. 1143\u20131166, 2003. DOI: https:\/\/doi.org\/10.1137\/S0363012901385691.","journal-title":"SIAM Journal on Control and Optimization"},{"issue":"6","key":"1467_CR26","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","volume":"34","author":"K Arulkumaran","year":"2017","unstructured":"K. Arulkumaran, M. P. Deisenroth, M. Brundage, A. A. Bharath. Deep reinforcement learning: A brief survey. IEEE Signal Processing Magazine, vol. 34, no. 6, pp. 26\u201338, 2017. DOI: https:\/\/doi.org\/10.1109\/MSP.2017.2743240.","journal-title":"IEEE Signal Processing Magazine"},{"key":"1467_CR27","volume-title":"Proceedings of the 4th International Conference on Learning Representations","author":"T P Lillicrap","year":"2016","unstructured":"T. P. Lillicrap, J. J. Hunt, A. Pritzel, N. Heess, T. Erez, Y. Tassa, D. Silver, D. Wierstra. Continuous control with deep reinforcement learning. In Proceedings of the 4th International Conference on Learning Representations, San Juan, Puerto Rico, 2016."},{"key":"1467_CR28","first-page":"195","volume-title":"Proceedings of the 1st Annual Conference on Robot Learning","author":"G Kaiweit","year":"2017","unstructured":"G. Kaiweit, J. Boedecker. Uncertainty-driven imagination for continuous deep reinforcement learning. In Proceedings of the 1st Annual Conference on Robot Learning, Mountain View, USA, pp. 195\u2013206, 2017."},{"issue":"2","key":"1467_CR29","doi-asserted-by":"publisher","first-page":"604","DOI":"10.1109\/TCYB.2019.2939174","volume":"51","author":"Z Z Zhang","year":"2021","unstructured":"Z. Z. Zhang, J. L. Chen, Z. B. Chen, W. P. Li. Asynchronous episodic deep deterministic policy gradient: Toward continuous control in computationally complex environments. IEEE Transactions on Cybernetics, vol.51, no. 2, pp. 604\u2013613, 2021. DOI: https:\/\/doi.org\/10.1109\/TCYB.2019.2939174.","journal-title":"IEEE Transactions on Cybernetics"},{"key":"1467_CR30","volume-title":"Proceedings of the 6th International Conference on Learning Representations","author":"G Barth-Maron","year":"2018","unstructured":"G. Barth-Maron, M. W. Hoffman, D. Budden, W. Dabney, D. Horgan, T. B. Dhruva, A. Muldal, N. Heess, T. P. Lillicrap. Distributed distributional deterministic policy gradients. In Proceedings of the 6th International Conference on Learning Representations, Vancouver, Canada, 2018."},{"key":"1467_CR31","doi-asserted-by":"publisher","first-page":"4213","DOI":"10.1609\/aaai.v33i01.33014213","volume-title":"Proceedings of the 33rd AAAI Conference on Artificial Intelligence","author":"S H Li","year":"2019","unstructured":"S. H. Li, Y. Wu, X. Y. Cui, H. H. Dong, F. Fang, S. Russell. Robust multi-agent reinforcement learning via minimax deep deterministic policy gradient. In Proceedings of the 33rd AAAI Conference on Artificial Intelligence, Honolulu, USA, pp.4213\u20134220, 2019. DOI: https:\/\/doi.org\/10.1609\/aaai.v33i01.33014213."},{"key":"1467_CR32","volume-title":"Proceedings of the 10th International Conference on Agents and Artificial Intelligence","author":"G Leuenberger","year":"2018","unstructured":"G. Leuenberger, M. Wiering. Actor-critic reinforcement learning with neural networks in continuous games. In Proceedings of the 10th International Conference on Agents and Artificial Intelligence, Funchal, Portugal, 2018."},{"issue":"3","key":"1467_CR33","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1017\/S0140525X00079577","volume":"13","author":"J K Tsotsos","year":"1990","unstructured":"J. K. Tsotsos. Analyzing vision at the complexity level. Behavioral and brain sciences, vol.13, no. 3, pp. 423\u20134145, 1990. DOI: https:\/\/doi.org\/10.1017\/S0140525X00079577.","journal-title":"Behavioral and brain sciences"},{"key":"1467_CR34","first-page":"2048","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","author":"K Xu","year":"2015","unstructured":"K. Xu, J. Ba, R. Kiros, K. Cho, A. C. Courville, R. Salakhutdinov, R. S. Zemel, Y. Bengio. Show, attend and tell: Neural image caption generation with visual attention. In Proceedings of the 32nd International Conference on Machine Learning, Lille, France, pp. 2048\u20132057, 2015."},{"key":"1467_CR35","doi-asserted-by":"publisher","first-page":"1412","DOI":"10.18653\/v1\/D15-1166","volume-title":"Proceedings of Conference on Empirical Methods in Natural Language Processing","author":"M T Luong","year":"2015","unstructured":"M. T. Luong, H. Pham, C. D. Manning. Effective approaches to attention-based neural machine translation. In Proceedings of Conference on Empirical Methods in Natural Language Processing, Lisbon, Portugal, pp.1412\u20131421, 2015. DOI: https:\/\/doi.org\/10.18653\/v1\/D15-1166."},{"issue":"1","key":"1467_CR36","doi-asserted-by":"publisher","first-page":"131","DOI":"10.1109\/72.80212","volume":"1","author":"T M Martinetz","year":"1990","unstructured":"T. M. Martinetz, H. J. Ritter, K. J. Schulten. Three-dimensional neural net for learning visuomotor coordination of a robot arm. IEEE Transactions on Neural Networks, vol.1, no. 1, pp. 131\u2013136, 1990. DOI: https:\/\/doi.org\/10.1109\/72.80212.","journal-title":"IEEE Transactions on Neural Networks"},{"issue":"2","key":"1467_CR37","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1016\/j.automatica.2008.08.021","volume":"45","author":"T Wojtara","year":"2009","unstructured":"T. Wojtara, M. Uchihara, H. Murayama, S. Shimoda, S. Sakai, H. Fujimoto, H. Kimura. Human-robot collaboration in precise positioning of a three-dimensional object. Automatica, vol.45, no. 2, pp. 333\u2013342, 2009. DOI: https:\/\/doi.org\/10.1016\/j.automatica.2008.08.021.","journal-title":"Automatica"},{"key":"1467_CR38","volume-title":"Proceedings of the 4th International Conference on Learning Representations","author":"T Schaul","year":"2016","unstructured":"T. Schaul, J. Quan, I. Antonoglou, D. Silver. Prioritized experience replay. In Proceedings of the 4th International Conference on Learning Representations, San Juan, Puerto Rico, 2016."}],"container-title":["Machine Intelligence Research"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1467-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11633-023-1467-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11633-023-1467-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,24]],"date-time":"2025-01-24T04:21:38Z","timestamp":1737692498000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11633-023-1467-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,24]]},"references-count":38,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["1467"],"URL":"https:\/\/doi.org\/10.1007\/s11633-023-1467-z","relation":{},"ISSN":["2731-538X","2731-5398"],"issn-type":[{"value":"2731-538X","type":"print"},{"value":"2731-5398","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,24]]},"assertion":[{"value":"11 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declared that they have no conflicts of interest to this work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations of conflict of interest"}}]}}