{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T21:05:54Z","timestamp":1774127154038,"version":"3.50.1"},"reference-count":116,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72001212"],"award-info":[{"award-number":["72001212"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72201273"],"award-info":[{"award-number":["72201273"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100020758","name":"Science and Technology Innovation Team of Shanxi Province","doi-asserted-by":"publisher","award":["2023-CX-TD-07"],"award-info":[{"award-number":["2023-CX-TD-07"]}],"id":[{"id":"10.13039\/501100020758","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1016\/j.asoc.2023.110975","type":"journal-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T22:24:48Z","timestamp":1698445488000},"page":"110975","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":51,"special_numbering":"PA","title":["Ensemble reinforcement learning: A survey"],"prefix":"10.1016","volume":"149","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4313-8312","authenticated-orcid":false,"given":"Yanjie","family":"Song","sequence":"first","affiliation":[]},{"given":"Ponnuthurai Nagaratnam","family":"Suganthan","sequence":"additional","affiliation":[]},{"given":"Witold","family":"Pedrycz","sequence":"additional","affiliation":[]},{"given":"Junwei","family":"Ou","sequence":"additional","affiliation":[]},{"given":"Yongming","family":"He","sequence":"additional","affiliation":[]},{"given":"Yingwu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Yutong","family":"Wu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7540","key":"10.1016\/j.asoc.2023.110975_b1","doi-asserted-by":"crossref","first-page":"529","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","volume":"518","author":"Mnih","year":"2015","journal-title":"Nature"},{"issue":"7587","key":"10.1016\/j.asoc.2023.110975_b2","doi-asserted-by":"crossref","first-page":"484","DOI":"10.1038\/nature16961","article-title":"Mastering the game of go with deep neural networks and tree search","volume":"529","author":"Silver","year":"2016","journal-title":"Nature"},{"issue":"7782","key":"10.1016\/j.asoc.2023.110975_b3","doi-asserted-by":"crossref","first-page":"350","DOI":"10.1038\/s41586-019-1724-z","article-title":"Grandmaster level in StarCraft II using multi-agent reinforcement learning","volume":"575","author":"Vinyals","year":"2019","journal-title":"Nature"},{"key":"10.1016\/j.asoc.2023.110975_b4","series-title":"International Conference on Learning Representations","article-title":"Model based reinforcement learning for atari","author":"Kaiser","year":"2020"},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b5","doi-asserted-by":"crossref","first-page":"22","DOI":"10.3390\/robotics10010022","article-title":"Deep reinforcement learning for the control of robotic manipulation: a focussed mini-review","volume":"10","author":"Liu","year":"2021","journal-title":"Robotics"},{"key":"10.1016\/j.asoc.2023.110975_b6","series-title":"International Conference on Machine Learning","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"Fujimoto","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b7","article-title":"Stabilizing off-policy q-learning via bootstrapping error reduction","volume":"32","author":"Kumar","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.asoc.2023.110975_b8","series-title":"Ucb exploration via q-ensembles","author":"Chen","year":"2017"},{"key":"10.1016\/j.asoc.2023.110975_b9","series-title":"Essay on the Application of Analysis to the Probability of Majority Decisions","author":"Condorcet","year":"1785"},{"key":"10.1016\/j.asoc.2023.110975_b10","series-title":"Advances in Neural Information Processing Systems, Vol. 7","first-page":"231","article-title":"Neural network ensembles, cross validation, and active learning","author":"Krogh","year":"1995"},{"key":"10.1016\/j.asoc.2023.110975_b11","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1023\/A:1010933404324","article-title":"Random forests","volume":"45","author":"Breiman","year":"2001","journal-title":"Mach. Learn."},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b12","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1016\/j.inffus.2004.04.004","article-title":"Diversity creation methods: a survey and categorisation","volume":"6","author":"Brown","year":"2005","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.asoc.2023.110975_b13","series-title":"Multiple Classifier Systems: First International Workshop, MCS 2000 Cagliari, Italy, June 21\u201323, 2000 Proceedings 1","first-page":"1","article-title":"Ensemble methods in machine learning","author":"Dietterich","year":"2000"},{"key":"10.1016\/j.asoc.2023.110975_b14","doi-asserted-by":"crossref","unstructured":"H. Yang, X.-Y. Liu, S. Zhong, A. Walid, Deep reinforcement learning for automated stock trading: An ensemble strategy, in: Proceedings of the First ACM International Conference on AI in Finance, 2020, pp. 1\u20138.","DOI":"10.1145\/3383455.3422540"},{"key":"10.1016\/j.asoc.2023.110975_b15","series-title":"International Conference on Learning Representations","article-title":"Maximizing ensemble diversity in deep reinforcement learning","author":"Sheikh","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b16","series-title":"International Conference on Learning Representations","article-title":"Randomized ensembled double Q-learning: Learning fast without a model","author":"Chen","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b17","series-title":"Multiple Classifier Systems: 10th International Workshop, MCS 2011, Naples, Italy, June 15-17, 2011. Proceedings 10","first-page":"56","article-title":"Ensemble methods for reinforcement learning with function approximation","author":"Fau\u00dfer","year":"2011"},{"key":"10.1016\/j.asoc.2023.110975_b18","series-title":"International Conference on Machine Learning","first-page":"176","article-title":"Averaged-dqn: Variance reduction and stabilization for deep reinforcement learning","author":"Anschel","year":"2017"},{"issue":"9","key":"10.1016\/j.asoc.2023.110975_b19","doi-asserted-by":"crossref","first-page":"6597","DOI":"10.1109\/JIOT.2021.3113872","article-title":"Distributed resource scheduling for large-scale MEC systems: A multiagent ensemble deep reinforcement learning with imitation acceleration","volume":"9","author":"Jiang","year":"2021","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.asoc.2023.110975_b20","series-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b21","first-page":"279","article-title":"Q-learning","volume":"8","author":"Watkins","year":"1992","journal-title":"Mach. Learn."},{"issue":"6","key":"10.1016\/j.asoc.2023.110975_b22","doi-asserted-by":"crossref","first-page":"26","DOI":"10.1109\/MSP.2017.2743240","article-title":"Deep reinforcement learning: A brief survey","volume":"34","author":"Arulkumaran","year":"2017","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.asoc.2023.110975_b23","doi-asserted-by":"crossref","first-page":"123","DOI":"10.1023\/A:1018054314350","article-title":"Bagging predictors","volume":"24","author":"Breiman","year":"1996","journal-title":"Mach. Learn."},{"key":"10.1016\/j.asoc.2023.110975_b24","series-title":"Nonlinear Estimation and Classification","first-page":"149","article-title":"The boosting approach to machine learning: An overview","author":"Schapire","year":"2003"},{"issue":"2","key":"10.1016\/j.asoc.2023.110975_b25","doi-asserted-by":"crossref","first-page":"241","DOI":"10.1016\/S0893-6080(05)80023-1","article-title":"Stacked generalization","volume":"5","author":"Wolpert","year":"1992","journal-title":"Neural Netw."},{"issue":"9","key":"10.1016\/j.asoc.2023.110975_b26","article-title":"Managing diversity in regression ensembles","volume":"6","author":"Brown","year":"2005","journal-title":"J. Mach. Learn. Res."},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b27","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1162\/neco.1992.4.1.1","article-title":"Neural networks and the bias\/variance dilemma","volume":"4","author":"Geman","year":"1992","journal-title":"Neural Comput."},{"issue":"20","key":"10.1016\/j.asoc.2023.110975_b28","doi-asserted-by":"crossref","first-page":"4133","DOI":"10.3390\/rs13204133","article-title":"Deep ensembles for hyperspectral image data classification and unmixing","volume":"13","author":"Nalepa","year":"2021","journal-title":"Remote Sens."},{"key":"10.1016\/j.asoc.2023.110975_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2022.105151","article-title":"Ensemble deep learning: A review","volume":"115","author":"Ganaie","year":"2022","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.asoc.2023.110975_b30","series-title":"Robust and Reliable Autonomy in the Wild Workshop At the 30th International Joint Conference of Artificial Intelligence","article-title":"PEBL: Pessimistic ensembles for offline deep reinforcement learning","author":"Smit","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2022.126034","article-title":"A multi-factor driven spatiotemporal wind power prediction model based on ensemble deep graph attention reinforcement learning networks","volume":"263","author":"Chengqing","year":"2023","journal-title":"Energy"},{"key":"10.1016\/j.asoc.2023.110975_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.dsp.2022.103419","article-title":"A new ensemble deep graph reinforcement learning network for spatio-temporal traffic volume forecasting in a freeway network","volume":"123","author":"Shang","year":"2022","journal-title":"Digit. Signal Process."},{"key":"10.1016\/j.asoc.2023.110975_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.chaos.2022.112405","article-title":"A new ensemble spatio-temporal PM2.5 prediction method based on graph attention recursive networks and reinforcement learning","volume":"162","author":"Tan","year":"2022","journal-title":"Chaos Solitons Fractals"},{"key":"10.1016\/j.asoc.2023.110975_b34","series-title":"Advances in Artificial Intelligence: 4th Helenic Conference on AI, SETN 2006, Heraklion, Crete, Greece, May 18-20, 2006. Proceedings 4","first-page":"301","article-title":"Ensemble pruning using reinforcement learning","author":"Partalas","year":"2006"},{"key":"10.1016\/j.asoc.2023.110975_b35","series-title":"2020 International Joint Conference on Neural Networks (IJCNN)","first-page":"1","article-title":"Instance-based ensemble selection using deep reinforcement learning","author":"Liu","year":"2020"},{"key":"10.1016\/j.asoc.2023.110975_b36","series-title":"2010 Ninth International Conference on Machine Learning and Applications","first-page":"401","article-title":"Ensembles of neural networks for robust reinforcement learning","author":"Hans","year":"2010"},{"key":"10.1016\/j.asoc.2023.110975_b37","series-title":"Mepg: A minimalist ensemble policy gradient framework for deep reinforcement learning","author":"He","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b38","series-title":"Advances in Neural Information Processing Systems, Vol. 34","first-page":"7436","article-title":"Uncertainty-based offline reinforcement learning with diversified q-ensemble","author":"An","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b39","series-title":"Maxmin q-learning: Controlling the estimation bias of q-learning","author":"Lan","year":"2020"},{"key":"10.1016\/j.asoc.2023.110975_b40","doi-asserted-by":"crossref","unstructured":"S. Ghosh, S. Laguna, S.H. Lim, L. Wynter, H. Poonawala, A deep ensemble method for multi-agent reinforcement learning: A case study on air traffic control, in: Proceedings of the International Conference on Automated Planning and Scheduling, Vol. 31, 2021, pp. 468\u2013476.","DOI":"10.1609\/icaps.v31i1.15993"},{"issue":"2\u20133","key":"10.1016\/j.asoc.2023.110975_b41","doi-asserted-by":"crossref","first-page":"161","DOI":"10.1023\/A:1017928328829","article-title":"Kernel-based reinforcement learning","volume":"49","author":"Ormoneit","year":"2002","journal-title":"Mach. Learn."},{"key":"10.1016\/j.asoc.2023.110975_b42","series-title":"2021 IEEE International Conference on Robotics and Automation (ICRA)","first-page":"4202","article-title":"Sample efficient reinforcement learning via model-ensemble exploration and exploitation","author":"Yao","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b43","doi-asserted-by":"crossref","first-page":"518","DOI":"10.1016\/j.ins.2019.09.066","article-title":"An ensemble method for inverse reinforcement learning","volume":"512","author":"Lin","year":"2020","journal-title":"Inform. Sci."},{"key":"10.1016\/j.asoc.2023.110975_b44","series-title":"Random ensemble reinforcement learning for traffic signal control","author":"Qi","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b45","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1007\/s40815-018-0535-y","article-title":"An ensemble fuzzy approach for inverse reinforcement learning","volume":"21","author":"Pan","year":"2019","journal-title":"Int. J. Fuzzy Syst."},{"key":"10.1016\/j.asoc.2023.110975_b46","series-title":"Conference on Robot Learning","first-page":"1702","article-title":"Offline-to-online reinforcement learning via balanced replay and pessimistic q-ensemble","author":"Lee","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b47","series-title":"Towards applicable reinforcement learning: Improving the generalization and sample efficiency with policy ensemble","author":"Yang","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b48","series-title":"Reinforcement learning with competitive ensembles of information-constrained primitives","author":"Goyal","year":"2019"},{"key":"10.1016\/j.asoc.2023.110975_b49","series-title":"DEFT: Diverse ensembles for fast transfer in reinforcement learning","author":"Adebola","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b50","series-title":"Workshop on Reinforcement Learning in Games","first-page":"1","article-title":"Ensemble policy distillation in deep reinforcement learning","author":"Sun","year":"2020"},{"key":"10.1016\/j.asoc.2023.110975_b51","series-title":"2021 Workshop on Algorithm and Big Data","first-page":"55","article-title":"A novel ensemble reinforcement learning gated recursive network for traffic speed forecasting","author":"Dong","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b52","series-title":"Reinforcement learning based dynamic weighing of ensemble models for time series forecasting","author":"Perepu","year":"2020"},{"key":"10.1016\/j.asoc.2023.110975_b53","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113820","article-title":"Multi-DQN: An ensemble of deep Q-learning agents for stock market forecasting","volume":"164","author":"Carta","year":"2021","journal-title":"Expert Syst. Appl."},{"issue":"10","key":"10.1016\/j.asoc.2023.110975_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.apr.2021.101197","article-title":"A new multi-data-driven spatiotemporal PM2.5 forecasting model based on an ensemble graph reinforcement learning convolutional network","volume":"12","author":"Liu","year":"2021","journal-title":"Atmospheric Pollut. Res."},{"key":"10.1016\/j.asoc.2023.110975_b55","article-title":"The wisdom of the crowd: Reliable deep reinforcement learning through ensembles of q-functions","author":"Elliott","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.asoc.2023.110975_b56","first-page":"1","article-title":"A novel carbon price forecasting method based on model matching, adaptive decomposition, and reinforcement learning ensemble strategy","author":"Cao","year":"2022","journal-title":"Environ. Sci. Pollut. Res."},{"key":"10.1016\/j.asoc.2023.110975_b57","series-title":"2021 IEEE 8th International Conference on Data Science and Advanced Analytics (DSAA)","first-page":"1","article-title":"Online ensemble aggregation using deep reinforcement learning for time series forecasting","author":"Saadallah","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b58","doi-asserted-by":"crossref","DOI":"10.1016\/j.is.2021.101772","article-title":"Deep reinforcement learning based ensemble model for rumor tracking","volume":"103","author":"Li","year":"2022","journal-title":"Inf. Syst."},{"key":"10.1016\/j.asoc.2023.110975_b59","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2022.109113","article-title":"DeepEvap: Deep reinforcement learning based ensemble approach for estimating reference evapotranspiration","volume":"125","author":"Sharma","year":"2022","journal-title":"Appl. Soft Comput."},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b60","doi-asserted-by":"crossref","first-page":"15","DOI":"10.1109\/TIA.2021.3126272","article-title":"New hybrid deep neural architectural search-based ensemble reinforcement learning strategy for wind power forecasting","volume":"58","author":"Jalali","year":"2021","journal-title":"IEEE Trans. Ind. Appl."},{"key":"10.1016\/j.asoc.2023.110975_b61","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2020.117794","article-title":"A new hybrid ensemble deep reinforcement learning model for wind speed short term forecasting","volume":"202","author":"Liu","year":"2020","journal-title":"Energy"},{"key":"10.1016\/j.asoc.2023.110975_b62","doi-asserted-by":"crossref","first-page":"45266","DOI":"10.1109\/ACCESS.2022.3170905","article-title":"A new multipredictor ensemble decision framework based on deep reinforcement learning for regional GDP prediction","volume":"10","author":"Li","year":"2022","journal-title":"IEEE Access"},{"key":"10.1016\/j.asoc.2023.110975_b63","doi-asserted-by":"crossref","first-page":"133","DOI":"10.1016\/j.inffus.2021.07.011","article-title":"Cost-effective ensemble models selection using deep reinforcement learning","volume":"77","author":"Birman","year":"2022","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.asoc.2023.110975_b64","doi-asserted-by":"crossref","DOI":"10.1016\/j.energy.2022.123857","article-title":"Wind power prediction based on outlier correction, ensemble reinforcement learning, and residual correction","volume":"250","author":"Yin","year":"2022","journal-title":"Energy"},{"key":"10.1016\/j.asoc.2023.110975_b65","series-title":"POLTER: Policy trajectory ensemble regularization for unsupervised reinforcement learning","author":"Schubert","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b66","series-title":"Robust opponent modeling via adversarial ensemble reinforcement learning in asymmetric imperfect-information games","author":"Shen","year":"2019"},{"key":"10.1016\/j.asoc.2023.110975_b67","series-title":"2022 International Joint Conference on Neural Networks (IJCNN)","first-page":"1","article-title":"HRL2E: Hierarchical reinforcement learning with low-level ensemble","author":"Qin","year":"2022"},{"issue":"5","key":"10.1016\/j.asoc.2023.110975_b68","doi-asserted-by":"crossref","first-page":"1030","DOI":"10.1109\/JSAC.2018.2832820","article-title":"Multiobjective reinforcement learning for cognitive satellite communications using deep neural network ensembles","volume":"36","author":"Ferreira","year":"2018","journal-title":"IEEE J. Sel. Areas Commun."},{"issue":"2","key":"10.1016\/j.asoc.2023.110975_b69","doi-asserted-by":"crossref","first-page":"245","DOI":"10.1109\/TEVC.2017.2704781","article-title":"Quality and diversity optimization: A unifying modular framework","volume":"22","author":"Cully","year":"2017","journal-title":"IEEE Trans. Evol. Comput."},{"issue":"4","key":"10.1016\/j.asoc.2023.110975_b70","doi-asserted-by":"crossref","first-page":"930","DOI":"10.1109\/TSMCB.2008.920231","article-title":"Ensemble algorithms in reinforcement learning","volume":"38","author":"Wiering","year":"2008","journal-title":"IEEE Trans. Syst. Man Cybern. B"},{"key":"10.1016\/j.asoc.2023.110975_b71","article-title":"Ensemble network architecture for deep reinforcement learning","volume":"2018","author":"Chen","year":"2018","journal-title":"Math. Probl. Eng."},{"key":"10.1016\/j.asoc.2023.110975_b72","series-title":"Seerl: Sample efficient ensemble reinforcement learning","author":"Saphal","year":"2020"},{"key":"10.1016\/j.asoc.2023.110975_b73","series-title":"Uncertainty in Artificial Intelligence","first-page":"631","article-title":"SENTINEL: taming uncertainty with ensemble based distributional reinforcement learning","author":"Eriksson","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b74","doi-asserted-by":"crossref","unstructured":"M. N\u00e9meth, G. Sz\u0171cs, Split Feature Space Ensemble Method using Deep Reinforcement Learning for Algorithmic Trading, in: Proceedings of the 2022 8th International Conference on Computer Technology Applications, 2022, pp. 188\u2013194.","DOI":"10.1145\/3543712.3543722"},{"key":"10.1016\/j.asoc.2023.110975_b75","series-title":"International Conference on Learning Representations","article-title":"Evolutionary diversity optimization with clustering-based selection for reinforcement learning","author":"Wang","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b76","series-title":"International Conference on Artificial Intelligence and Statistics","first-page":"4015","article-title":"On the importance of hyperparameter optimization for model-based reinforcement learning","author":"Zhang","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b77","doi-asserted-by":"crossref","first-page":"55","DOI":"10.1007\/s11063-013-9334-5","article-title":"Neural network ensembles in reinforcement learning","volume":"41","author":"Fau\u00dfer","year":"2015","journal-title":"Neural Process. Lett."},{"key":"10.1016\/j.asoc.2023.110975_b78","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2021.107675","article-title":"An oppositional-Cauchy based GSK evolutionary algorithm with a novel deep ensemble reinforcement learning strategy for COVID-19 diagnosis","volume":"111","author":"Jalali","year":"2021","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2023.110975_b79","first-page":"1","article-title":"Deep ensemble reinforcement learning with multiple deep deterministic policy gradient algorithm","volume":"2020","author":"Wu","year":"2020","journal-title":"Math. Probl. Eng."},{"key":"10.1016\/j.asoc.2023.110975_b80","series-title":"International Conference on Machine Learning","first-page":"19731","article-title":"DNS: Determinantal point process based neural network sampler for ensemble reinforcement learning","author":"Sheikh","year":"2022"},{"key":"10.1016\/j.asoc.2023.110975_b81","series-title":"Advances in Neural Information Processing Systems, Vol. 31","article-title":"Sample-efficient reinforcement learning with stochastic ensemble value expansion","author":"Buckman","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b82","series-title":"Effective exploration for deep reinforcement learning via bootstrapped q-ensembles under tsallis entropy regularization","author":"Chen","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b83","series-title":"International Conference on Machine Learning","first-page":"8454","article-title":"Ensemble bootstrapping for Q-Learning","author":"Peer","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b84","series-title":"Interpretable reinforcement learning with ensemble methods","author":"Brown","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b85","doi-asserted-by":"crossref","DOI":"10.1016\/j.scitotenv.2019.07.367","article-title":"Deep learning-based PM2. 5 prediction considering the spatiotemporal correlations: A case study of Beijing, China","volume":"699","author":"Pak","year":"2020","journal-title":"Sci. Total Environ."},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b86","doi-asserted-by":"crossref","first-page":"128","DOI":"10.4209\/aaqr.2019.08.0408","article-title":"Application of the XGBoost machine learning method in PM2. 5 prediction: A case study of Shanghai","volume":"20","author":"Ma","year":"2020","journal-title":"Aerosol Air Qual. Res."},{"key":"10.1016\/j.asoc.2023.110975_b87","series-title":"2021 International Conference on Smart Energy Systems and Technologies (SEST)","first-page":"1","article-title":"A new ensemble reinforcement learning strategy for solar irradiance forecasting using deep optimized convolutional neural network models","author":"Jalali","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b88","doi-asserted-by":"crossref","first-page":"443","DOI":"10.1007\/s11869-020-00948-x","article-title":"A novel ensemble reinforcement learning gated unit model for daily PM2.5 forecasting","volume":"14","author":"Li","year":"2021","journal-title":"Air Qual., Atmosphere Health"},{"key":"10.1016\/j.asoc.2023.110975_b89","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2021.101290","article-title":"Dynamic ensemble wind speed prediction model based on hybrid deep reinforcement learning","volume":"48","author":"Chen","year":"2021","journal-title":"Adv. Eng. Inform."},{"key":"10.1016\/j.asoc.2023.110975_b90","series-title":"An optimization method-assisted ensemble deep reinforcement learning algorithm to solve unit commitment problems","author":"Qin","year":"2022"},{"issue":"4","key":"10.1016\/j.asoc.2023.110975_b91","doi-asserted-by":"crossref","DOI":"10.1063\/5.0097344","article-title":"Attention and masking embedded ensemble reinforcement learning for smart energy optimization and risk evaluation under uncertainties","volume":"14","author":"Sogabe","year":"2022","journal-title":"J. Renew. Sustain. Energy"},{"key":"10.1016\/j.asoc.2023.110975_b92","doi-asserted-by":"crossref","DOI":"10.1016\/j.ijepes.2022.108406","article-title":"Ensemble-based Deep Reinforcement Learning for robust cooperative wind farm control","volume":"143","author":"He","year":"2022","journal-title":"Int. J. Electr. Power Energy Syst."},{"key":"10.1016\/j.asoc.2023.110975_b93","article-title":"Solar irradiance forecasting using a novel hybrid deep ensemble reinforcement learning algorithm","volume":"32","author":"Jalali","year":"2022","journal-title":"Sustain. Energy, Grids Netw."},{"key":"10.1016\/j.asoc.2023.110975_b94","series-title":"Advances in Natural Computation, Fuzzy Systems and Knowledge Discovery","first-page":"152","article-title":"Heterogeneous-aware online cloud task scheduler based on clustering and deep reinforcement learning ensemble","author":"Gu","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b95","series-title":"2021 55th Asilomar Conference on Signals, Systems, and Computers","first-page":"1018","article-title":"On-policy reinforcement learning via ensemble Gaussian processes with application to resource allocation","author":"Polyzos","year":"2021"},{"issue":"1","key":"10.1016\/j.asoc.2023.110975_b96","doi-asserted-by":"crossref","first-page":"180","DOI":"10.1109\/JSTSP.2017.2787979","article-title":"Optimal and scalable caching for 5G using reinforcement learning of space-time popularities","volume":"12","author":"Sadeghi","year":"2017","journal-title":"IEEE J. Sel. Top. Sign. Proces."},{"key":"10.1016\/j.asoc.2023.110975_b97","doi-asserted-by":"crossref","first-page":"97045","DOI":"10.1109\/ACCESS.2020.2992853","article-title":"Energy-efficient IoT sensor calibration with deep reinforcement learning","volume":"8","author":"Ashiquzzaman","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.asoc.2023.110975_b98","series-title":"Elegantrl-podracer: Scalable and elastic library for cloud-native deep reinforcement learning","author":"Liu","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b99","series-title":"2022 IEEE World AI IoT Congress (AIIoT)","first-page":"032","article-title":"Ensemble reinforcement learning framework for sum rate optimization in NOMA-UAV network","author":"Mahmud","year":"2022"},{"issue":"2","key":"10.1016\/j.asoc.2023.110975_b100","doi-asserted-by":"crossref","first-page":"717","DOI":"10.1109\/TTE.2020.2991079","article-title":"Ensemble reinforcement learning-based supervisory control of hybrid electric vehicle for fuel economy improvement","volume":"6","author":"Xu","year":"2020","journal-title":"IEEE Trans. Transp. Electr."},{"key":"10.1016\/j.asoc.2023.110975_b101","series-title":"NIPS Workshop on Deep Reinforcement Learning","article-title":"Inquire and diagnose: Neural symptom checking ensemble using deep reinforcement learning","author":"Tang","year":"2016"},{"key":"10.1016\/j.asoc.2023.110975_b102","doi-asserted-by":"crossref","DOI":"10.1016\/j.optcom.2022.129186","article-title":"Ensemble consensus representation deep reinforcement learning for hybrid FSO\/RF communication systems","volume":"530","author":"Henna","year":"2023","journal-title":"Opt. Commun."},{"key":"10.1016\/j.asoc.2023.110975_b103","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1016\/j.neucom.2019.08.007","article-title":"Ensemble-based deep reinforcement learning for chatbots","volume":"366","author":"Cuay\u00e1huitl","year":"2019","journal-title":"Neurocomputing"},{"key":"10.1016\/j.asoc.2023.110975_b104","series-title":"Openai gym","author":"Brockman","year":"2016"},{"key":"10.1016\/j.asoc.2023.110975_b105","series-title":"Advances in Neural Information Processing Systems, Vol. 29","article-title":"Deep exploration via bootstrapped DQN","author":"Osband","year":"2016"},{"issue":"7\u20139","key":"10.1016\/j.asoc.2023.110975_b106","doi-asserted-by":"crossref","first-page":"1900","DOI":"10.1016\/j.neucom.2008.06.007","article-title":"Pruning an ensemble of classifiers via reinforcement learning","volume":"72","author":"Partalas","year":"2009","journal-title":"Neurocomputing"},{"key":"10.1016\/j.asoc.2023.110975_b107","series-title":"Bayesian inference with anchored ensembles of neural networks, and application to exploration in reinforcement learning","author":"Pearce","year":"2018"},{"key":"10.1016\/j.asoc.2023.110975_b108","doi-asserted-by":"crossref","unstructured":"M. Shen, J.P. How, Robust opponent modeling via adversarial ensemble reinforcement learning, in: Proceedings of the International Conference on Automated Planning and Scheduling, Vol. 31, 2021, pp. 578\u2013587.","DOI":"10.1609\/icaps.v31i1.16006"},{"key":"10.1016\/j.asoc.2023.110975_b109","series-title":"Federated ensemble model-based reinforcement learning in edge computing","author":"Wang","year":"2021"},{"key":"10.1016\/j.asoc.2023.110975_b110","doi-asserted-by":"crossref","unstructured":"R. Caruana, A. Niculescu-Mizil, G. Crew, A. Ksikes, Ensemble selection from libraries of models, in: Proceedings of the Twenty-First International Conference on Machine Learning, 2004, p. 18.","DOI":"10.1145\/1015330.1015432"},{"issue":"6","key":"10.1016\/j.asoc.2023.110975_b111","doi-asserted-by":"crossref","first-page":"511","DOI":"10.3233\/IDA-2005-9602","article-title":"Selective fusion of heterogeneous classifiers","volume":"9","author":"Tsoumakas","year":"2005","journal-title":"Intell. Data Anal."},{"key":"10.1016\/j.asoc.2023.110975_b112","series-title":"Prioritized experience replay","author":"Schaul","year":"2015"},{"key":"10.1016\/j.asoc.2023.110975_b113","doi-asserted-by":"crossref","unstructured":"D. Isele, A. Cosgun, Selective experience replay for lifelong learning, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 32, 2018.","DOI":"10.1609\/aaai.v32i1.11595"},{"issue":"2","key":"10.1016\/j.asoc.2023.110975_b114","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1016\/0925-2312(94)90053-1","article-title":"Learning and generalization characteristics of the random vector functional-link net","volume":"6","author":"Pao","year":"1994","journal-title":"Neurocomputing"},{"key":"10.1016\/j.asoc.2023.110975_b115","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2021.107978","article-title":"Random vector functional link neural network based ensemble deep learning","volume":"117","author":"Shi","year":"2021","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.asoc.2023.110975_b116","doi-asserted-by":"crossref","unstructured":"B. Han, J. Sim, H. Adam, Branchout: Regularization for online ensemble tracking with convolutional neural networks, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 3356\u20133365.","DOI":"10.1109\/CVPR.2017.63"}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494623009936?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494623009936?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T20:26:21Z","timestamp":1761596781000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1568494623009936"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":116,"alternative-id":["S1568494623009936"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2023.110975","relation":{},"ISSN":["1568-4946"],"issn-type":[{"value":"1568-4946","type":"print"}],"subject":[],"published":{"date-parts":[[2023,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Ensemble reinforcement learning: A survey","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2023.110975","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2023 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"110975"}}