{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T23:31:38Z","timestamp":1771371098923,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T00:00:00Z","timestamp":1663113600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T00:00:00Z","timestamp":1663113600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s00521-022-07779-0","type":"journal-article","created":{"date-parts":[[2022,9,14]],"date-time":"2022-09-14T20:07:27Z","timestamp":1663186047000},"page":"449-467","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Policy-based optimization: single-step policy gradient method seen as an evolution strategy"],"prefix":"10.1007","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6026-9250","authenticated-orcid":false,"given":"J.","family":"Viquerat","sequence":"first","affiliation":[]},{"given":"R.","family":"Duvigneau","sequence":"additional","affiliation":[]},{"given":"P.","family":"Meliga","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kuhnle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hachem","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,9,14]]},"reference":[{"key":"7779_CR1","doi-asserted-by":"publisher","first-page":"2352","DOI":"10.1162\/neco_a_00990","volume":"29","author":"W Rawat","year":"2017","unstructured":"Rawat W, Wang Z (2017) Deep convolutional neural networks for image classification: a comprehensive review. Neural Comput 29:2352\u20132449","journal-title":"Neural Comput"},{"key":"7779_CR2","doi-asserted-by":"crossref","unstructured":"Khan A, Sohail A, Zahoora U, Qureshi AS (2020) A survey of the recent architectures of deep convolutional neural networks. Artif Intell Revi , pp 2352\u20132449","DOI":"10.1007\/s10462-020-09825-6"},{"key":"7779_CR3","doi-asserted-by":"publisher","first-page":"19143","DOI":"10.1109\/ACCESS.2019.2896880","volume":"7","author":"AB Nassif","year":"2019","unstructured":"Nassif AB, Shahin I, Attili I, Azzeh M, Shaalan K (2019) Speech recognition using deep neural networks: a systematic review. IEEE Access 7:19143\u201319165","journal-title":"IEEE Access"},{"key":"7779_CR4","unstructured":"Gui J, Sun Z, Wen Y, Tao D, Ye J (2020) A review on generative adversarial networks: algorithms, theory, and applications. http:\/\/arxiv.org\/abs\/2001.06937,"},{"key":"7779_CR5","unstructured":"Mnih V, Kavukcuoglu K, Silver D, Graves A, Antonoglou I, Wierstra D, Riedmiller M (2013) Playing Atari with deep reinforcement learning. http:\/\/arxiv.org\/abs\/1312.5602,"},{"key":"7779_CR6","doi-asserted-by":"crossref","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A, Chen Y, Lillicrap T, Hui F, Sifre L, van\u00a0den Driessche G, Graepel T, Hassabis D (2017) Mastering the game of Go without human knowledge. Nature, 550","DOI":"10.1038\/nature24270"},{"key":"7779_CR7","unstructured":"OpenAI. OpenAI Five. https:\/\/blog.openai.com\/openai-five\/, (2018)"},{"key":"7779_CR8","doi-asserted-by":"crossref","unstructured":"Pinto L, Andrychowicz M, Welinder P, Zaremba W, Abbeel P (2017) Asymmetric actor critic for image-based robot learning. http:\/\/arxiv.org\/abs\/1710.06542,","DOI":"10.15607\/RSS.2018.XIV.008"},{"key":"7779_CR9","unstructured":"Bahdanau D, Brakel P, Xu K, Goyal A, Lowe R, Pineau J, Courville A, Bengio Y (2016) An actor-critic algorithm for sequence prediction. http:\/\/arxiv.org\/abs\/1607.07086,"},{"key":"7779_CR10","doi-asserted-by":"crossref","unstructured":"Kendall A, Hawke J, Janz D, Mazur P, Reda D, Allen J.-M, Lam V.-D, Bewley A, Shah A (2018) Learning to drive in a day. http:\/\/arxiv.org\/abs\/1807.00412,","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"7779_CR11","doi-asserted-by":"crossref","unstructured":"Bewley A, Rigley J, Liu Y, Hawke J, Shen R, Lam V.-D, Kendall A (2018) Learning to drive from simulation without real world labels. http:\/\/arxiv.org\/abs\/1812.03823,","DOI":"10.1109\/ICRA.2019.8793668"},{"key":"7779_CR12","unstructured":"Knight W (2018) Google just gave control over data center cooling to an AI. http:\/\/www.technologyreview.com\/s\/611902\/google-just-gave-control-over-data-center-cooling-to-an-ai\/,"},{"key":"7779_CR13","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1016\/j.neucom.2017.04.075","volume":"272","author":"G Villarrubia","year":"2018","unstructured":"Villarrubia G, De Paz JF, Chamoso P, De la Prieta F (2018) Artificial neural networks used in optimization problems. Neurocomputing 272:10\u201316","journal-title":"Neurocomputing"},{"key":"7779_CR14","doi-asserted-by":"publisher","first-page":"925","DOI":"10.1007\/s10957-018-1396-0","volume":"180","author":"AM Schweidtmann","year":"2019","unstructured":"Schweidtmann AM, Mitsos A (2019) Deterministic global optimization with artificial neural networks embedded. J Opt Theory Appl 180:925\u2013948","journal-title":"J Opt Theory Appl"},{"key":"7779_CR15","unstructured":"Andrychowicz M, Denil M, Gomez S, Hoffman M.\u00a0W, Pfau D, Schaul T, Shillingford B, de\u00a0Freitas N (2016) Learning to learn by gradient descent by gradient descent. http:\/\/arxiv.org\/abs\/1606.04474,"},{"key":"7779_CR16","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1016\/j.ast.2019.02.003","volume":"86","author":"X Yan","year":"2019","unstructured":"Yan X, Zhu J, Kuang M, Wang X (2019) Aerodynamic shape optimization using a novel optimizer based on machine learning techniques. Aerospace Sci Technol 86:826\u2013835","journal-title":"Aerospace Sci Technol"},{"key":"7779_CR17","unstructured":"Li R, Zhang Y, Chen H (2020) Learning the aerodynamic design of supercritical airfoils through deep reinforcement learning. https:\/\/arxiv.org\/abs\/2010.03651,"},{"key":"7779_CR18","doi-asserted-by":"publisher","first-page":"110080","DOI":"10.1016\/j.jcp.2020.110080","volume":"428","author":"J Viquerat","year":"2021","unstructured":"Viquerat J, Rabault J, Kuhnle A, Ghraieb H, Larcher A, Hachem E (2021) Direct shape optimization through deep reinforcement learning. J Comput Phys 428:110080","journal-title":"J Comput Phys"},{"key":"7779_CR19","unstructured":"Ghraieb H, Viquerat J, Larcher A, Meliga P, Hachem E (2020) Optimization and passive flow control using single-step deep reinforcement learning. http:\/\/arxiv.org\/abs\/2006.02979,"},{"key":"7779_CR20","doi-asserted-by":"crossref","unstructured":"Hachem E, Ghraieb H, Viquerat J, Larcher A, Meliga P (2020) Deep reinforcement learning for the control of conjugate heat transfer with application to workpiece cooling. https:\/\/arxiv.org\/abs\/2011.15035,","DOI":"10.1016\/j.jcp.2021.110317"},{"key":"7779_CR21","unstructured":"H\u00e4m\u00e4l\u00e4inen P, Babadi A, Ma X, Lehtinen J (2018) Ppo-cma: Proximal policy optimization with covariance matrix adaptation. http:\/\/arxiv.org\/abs\/1810.02541,"},{"key":"7779_CR22","unstructured":"Viquerat J (2021) PBO git repository. https:\/\/github.com\/jviquerat\/pbo,"},{"issue":"5","key":"7779_CR23","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K Hornik","year":"1989","unstructured":"Hornik K, Stinchcombe M, White H (1989) Multilayer feedforward networks are universal approximators. Neural Netw 2(5):359\u2013366","journal-title":"Neural Netw"},{"key":"7779_CR24","unstructured":"Goodfellow I, Bengio Y, Courville A (2017) The deep learning book. MIT Press, London"},{"key":"7779_CR25","unstructured":"Sutton R, Mcallester D, Singh S, Mansour Y (2000) Policy gradient methods for reinforcement learning with function approximation. Adv Neural Inf Process Syst, 12"},{"key":"7779_CR26","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323:533\u2013536","journal-title":"Nature"},{"key":"7779_CR27","unstructured":"Konda, V.\u00a0R, Tsitsiklis J.\u00a0N (2000) Actor-critic algorithms. In: Adv Neural Inf Process Syst, pp 1008\u20131014"},{"key":"7779_CR28","unstructured":"Schulman J, Moritz P, Levine S, Jordan M, Abbeel P (2015) High-dimensional continuous control using generalized advantage estimation. https:\/\/arxiv.org\/abs\/1506.02438,"},{"key":"7779_CR29","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. http:\/\/arxiv.org\/abs\/1707.06347,"},{"key":"7779_CR30","unstructured":"Sutton R, Barto, A.\u00a0G (2018) Reinforcement learning: an introduction. MIT press, Cambridge"},{"issue":"1","key":"7779_CR31","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1023\/A:1015059928466","volume":"1","author":"H-G Beyer","year":"2002","unstructured":"Beyer H-G, Schwefel H-P (2002) Evolution strategies\u2014a comprehensive introduction. Natural Comput 1(1):3\u201352","journal-title":"Natural Comput"},{"key":"7779_CR32","doi-asserted-by":"crossref","unstructured":"Eiben A.\u00a0E, Smith J.\u00a0E (2015) Introduction to Evolutionary Computing, 2nd edn. Springer, Berlin","DOI":"10.1007\/978-3-662-44874-8"},{"key":"7779_CR33","unstructured":"Hansen N (2016) The cma evolution strategy: a tutorial. http:\/\/arxiv.org\/abs\/1604.00772,"},{"key":"7779_CR34","unstructured":"Kingma D.\u00a0P, Ba J (2014) Adam: a method for stochastic optimization. http:\/\/arxiv.org\/abs\/1412.6980,"},{"key":"7779_CR35","unstructured":"Degris T, White M, Sutton R.\u00a0S (2013) Off-policy actor-critic. https:\/\/arxiv.org\/abs\/1205.4839,"},{"key":"7779_CR36","doi-asserted-by":"crossref","unstructured":"Rebonato R, J\u00e4ckel, P (2011) The most general methodology to create a valid correlation matrix for risk management and option pricing purposes. Available at SSRN 1969689,","DOI":"10.2139\/ssrn.1969689"},{"issue":"1","key":"7779_CR37","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1093\/imaman\/dpl010","volume":"18","author":"F Rapisarda","year":"2007","unstructured":"Rapisarda F, Brigo D, Mercurio F (2007) Parameterizing correlations: a geometric interpretation. IMA J Manage Math 18(1):55\u201373","journal-title":"IMA J Manage Math"},{"key":"7779_CR38","doi-asserted-by":"crossref","unstructured":"Numpacharoen K, Atsawarungruangkit A (2012) Generating correlation matrices based on the boundaries of their coefficients. PLOS One, 7(11)","DOI":"10.1371\/journal.pone.0048902"},{"key":"7779_CR39","unstructured":"Maree S (2012) Correcting non positive definite correlation matrices. BSc Thesis Applied Mathematics, TU Delft"},{"issue":"4","key":"7779_CR40","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1175\/1520-0469(1962)019<0329:FAFCAA>2.0.CO;2","volume":"19","author":"B Saltzman","year":"1962","unstructured":"Saltzman B (1962) Finite amplitude free convection as an initial value problem. J Atmos Sci 19(4):329\u2013341","journal-title":"J Atmos Sci"},{"issue":"2","key":"7779_CR41","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1175\/1520-0469(1963)020<0130:DNF>2.0.CO;2","volume":"20","author":"EN Lorenz","year":"1963","unstructured":"Lorenz EN (1963) Deterministic nonperiodic flow. J Atmos Sci 20(2):130\u2013141","journal-title":"J Atmos Sci"},{"issue":"9\u201310","key":"7779_CR42","doi-asserted-by":"publisher","first-page":"585","DOI":"10.1080\/14685248.2020.1797059","volume":"21","author":"G Beintema","year":"2020","unstructured":"Beintema G, Corbetta A, Biferale L, Toschi F (2020) Controlling rayleigh-b\u00e9nard convection via reinforcement learning. J Turbulen 21(9\u201310):585\u2013605","journal-title":"J Turbulen"},{"key":"7779_CR43","doi-asserted-by":"crossref","unstructured":"Virtanen P, GommersR, Oliphant TE, Haberland M, Reddy T, Cournapeau D, Burovski E, Peterson P, Weckesser W, Bright J, van der Walt SJ, Brett M, Wilson J, Millman KJ, Mayorov N, Nelson ARJ, Jones E, Kern R, Larson E, Carey CJ, Polat I, Feng Y, Moore EW, VanderPlas J, Laxalde D, Perktold J, Cimrman R, Henriksen I, Quintero EA, Harris CR, Archibald AM, Ribeiro AH, Pedregosa F, van Mulbregt P (2020) SciPy 1.0: Fundamental algorithms for scientific computing in python. Nature Methods 17:261\u2013272","DOI":"10.1038\/s41592-020-0772-5"},{"key":"7779_CR44","doi-asserted-by":"crossref","unstructured":"Barto AG, Sutton RS, Anderson CW (1983) Neuron-like adaptive elements that can solve difficult learning control problems. IEEE Trans Syst Man Cybern, SMC-13(5):834\u2013846","DOI":"10.1109\/TSMC.1983.6313077"},{"key":"7779_CR45","unstructured":"Brockman G, Cheung V, Pettersson L, Schneider J, Schulman J, Tang J, Zaremba W (2016) Openai gym. https:\/\/arxiv.org\/abs\/1606.01540,"},{"key":"7779_CR46","unstructured":"Lillicrap T.\u00a0P, Hunt J.\u00a0J, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2019) Continuous control with deep reinforcement learning. https:\/\/arxiv.org\/abs\/1509.02971v6,"},{"key":"7779_CR47","unstructured":"Wang Z, Bapst V, Heess N, Mnih V, Munos R, Kavukcuoglu K, de\u00a0Freitas N (2017) Sample efficient actor-critic with experience replay. https:\/\/arxiv.org\/abs\/1611.01224,"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07779-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07779-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07779-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,7]],"date-time":"2023-01-07T06:10:19Z","timestamp":1673071819000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07779-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,14]]},"references-count":47,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["7779"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07779-0","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,9,14]]},"assertion":[{"value":"26 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 September 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflict of interest to declare.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}