{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:05:38Z","timestamp":1764183938168,"version":"3.46.0"},"reference-count":19,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,9,1]],"date-time":"2019-09-01T00:00:00Z","timestamp":1567296000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1109\/cbd.2019.00051","type":"proceedings-article","created":{"date-parts":[[2019,11,29]],"date-time":"2019-11-29T07:16:05Z","timestamp":1575011765000},"page":"240-245","source":"Crossref","is-referenced-by-count":2,"title":["An Intelligent Bidding Strategy Based on Model-Free Reinforcement Learning for Real-Time Bidding in Display Advertising"],"prefix":"10.1109","author":[{"given":"Mengjuan","family":"Liu","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"given":"Jiaxing","family":"Li","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"given":"Wei","family":"Yue","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"given":"Lizhou","family":"Qiu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"given":"Jinyu","family":"Liu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]},{"given":"Zhiguang","family":"Qin","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China"}]}],"member":"263","reference":[{"key":"ref10","first-page":"261","author":"gordon","year":"1995","journal-title":"Stable function approximation in dynamic programming"},{"key":"ref11","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2015.7364112"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.1998.712192"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2648584.2648590"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref16","article-title":"Playing Atari with Deep Reinforcement Learning","author":"mnih","year":"2013","journal-title":"Computer Science"},{"key":"ref17","article-title":"On-line Q-learning Using Connectionist Systems","author":"rummery","year":"1994","journal-title":"Technical Report"},{"key":"ref18","first-page":"437","article-title":"Multi-armed bandit algorithms and empirical evaluation","author":"mohri","year":"0","journal-title":"Proceedings of the 10th European Conference on Machine Learning (ECML)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124761"},{"journal-title":"Auction Theory","year":"2009","author":"krishna","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1561\/1500000049"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2339530.2339655"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018702"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271748"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"1077","DOI":"10.1145\/2623330.2623633","article-title":"Optimal Real-time bidding for display advertising","author":"zhang","year":"2014","journal-title":"KDD &#x2018;12"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2684822.2697041"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2501040.2501980"},{"key":"ref9","first-page":"187a","article-title":"Continuous control with deep reinforcement learning","volume":"8","author":"lillicrap","year":"2015","journal-title":"Computer Science"}],"event":{"name":"2019 Seventh International Conference on Advanced Cloud and Big Data (CBD)","start":{"date-parts":[[2019,9,21]]},"location":"Suzhou, China","end":{"date-parts":[[2019,9,22]]}},"container-title":["2019 Seventh International Conference on Advanced Cloud and Big Data (CBD)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8909284\/8916217\/08916257.pdf?arnumber=8916257","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,26]],"date-time":"2025-11-26T19:02:06Z","timestamp":1764183726000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8916257\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9]]},"references-count":19,"URL":"https:\/\/doi.org\/10.1109\/cbd.2019.00051","relation":{},"subject":[],"published":{"date-parts":[[2019,9]]}}}