{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T22:31:42Z","timestamp":1777501902890,"version":"3.51.4"},"reference-count":43,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,5,23]],"date-time":"2022-05-23T00:00:00Z","timestamp":1653264000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000006","name":"Office of Naval Research","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000006","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"NSF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,5,23]]},"DOI":"10.1109\/icra46639.2022.9812166","type":"proceedings-article","created":{"date-parts":[[2022,7,12]],"date-time":"2022-07-12T19:36:40Z","timestamp":1657654600000},"page":"1593-1599","source":"Crossref","is-referenced-by-count":80,"title":["Legged Robots that Keep on Learning: Fine-Tuning Locomotion Policies in the Real World"],"prefix":"10.1109","author":[{"given":"Laura","family":"Smith","sequence":"first","affiliation":[{"name":"Berkeley AI Research, UC Berkeley"}]},{"given":"J. Chase","family":"Kew","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Xue","family":"Bin Peng","sequence":"additional","affiliation":[{"name":"Berkeley AI Research, UC Berkeley"}]},{"given":"Sehoon","family":"Ha","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Jie","family":"Tan","sequence":"additional","affiliation":[{"name":"Google Research"}]},{"given":"Sergey","family":"Levine","sequence":"additional","affiliation":[{"name":"Berkeley AI Research, UC Berkeley"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201366"},{"key":"ref38","author":"coumans","year":"2016","journal-title":"Pybullet a python module for physics simulation for games robotics and machine learning"},{"key":"ref33","article-title":"Learning to walk in the real world with minimal human effort","volume":"abs 2002 8550","author":"ha","year":"2020","journal-title":"ArXiv"},{"key":"ref32","article-title":"Efficient adaptation for end-to-end vision-based robotic manipulation","volume":"abs 2004 10190","author":"julian","year":"2020","journal-title":"ArXiv"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2974685"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8968053"},{"key":"ref37","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"0","journal-title":"ICML"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.061"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794207"},{"key":"ref34","article-title":"Data efficient reinforcement learning for legged robots","volume":"abs 1907 3613","author":"yang","year":"0","journal-title":"Conference on Robot Learning (CoRL)"},{"key":"ref10","article-title":"Emergence of locomotion behaviours in rich environments","volume":"abs 1707 2286","author":"heess","year":"2017","journal-title":"ArXiv"},{"key":"ref40","author":"kingma","year":"2017","journal-title":"Adam A method for stochastic optimization"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593722"},{"key":"ref13","first-page":"1","article-title":"Learning basketball dribbling skills using trajectory optimization and deep reinforcement learning","volume":"37","author":"liu","year":"2018","journal-title":"ACM Transactions on Graphics (TOG)"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"143:1","DOI":"10.1145\/3197517.3201311","article-title":"Deepmimic: Example-guided deep reinforcement learning of physics-based character skills","volume":"37","author":"peng","year":"2018","journal-title":"ACM Trans Graph"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3322972"},{"key":"ref16","article-title":"Learning to walk via deep reinforcement learning","author":"haarnoja","year":"2020","journal-title":"Robotics Science and Systems (RSS)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.aau5872"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2021.XVII.011"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.064"},{"key":"ref28","article-title":"Cad2rl: Real single-image flight without a single real image","author":"sadeghi","year":"2016","journal-title":"ArXiv Preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/MRA.2015.2505910"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593885"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307456"},{"key":"ref29","article-title":"Zero-shot skill composition and simulation-to-real transfer by learning task representations","volume":"abs 1810 2422","author":"he","year":"2018","journal-title":"ArXiv"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7758092"},{"key":"ref8","author":"tedrake","year":"2005","journal-title":"Learning to walk in 20 minutes"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2004.1389841"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1126\/scirobotics.abc5986"},{"key":"ref9","article-title":"Learning cpg sensory feedback with policy gradient for biped loco-motion for a full-body humanoid","author":"endo","year":"0","journal-title":"AAAI"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.2972872"},{"key":"ref20","article-title":"Randomized ensembled double q-learning: Learning fast without a model","volume":"abs 2101 5982","author":"chen","year":"2021","journal-title":"ArXiv"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917694244"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793865"},{"key":"ref42","article-title":"Robust recovery controller for a quadrupedal robot using deep reinforcement learning","volume":"abs 1901 7517","author":"lee","year":"2019","journal-title":"ArXiv"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759424"},{"key":"ref41","article-title":"TensorFlow: Large-scale machine learning on heterogeneous systems","author":"abadi","year":"2015","journal-title":"Software"},{"key":"ref23","article-title":"Learning locomotion skills for cassie: Iterative design and sim-to-real","author":"xie","year":"0","journal-title":"Conference on Robot Learning (CoRL)"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9562091"},{"key":"ref43","article-title":"Advantage-weighted regression: Simple and scalable off-policy reinforcement learning","author":"peng","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793789"}],"event":{"name":"2022 IEEE International Conference on Robotics and Automation (ICRA)","location":"Philadelphia, PA, USA","start":{"date-parts":[[2022,5,23]]},"end":{"date-parts":[[2022,5,27]]}},"container-title":["2022 International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9811522\/9811357\/09812166.pdf?arnumber=9812166","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:07:50Z","timestamp":1667516870000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9812166\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5,23]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/icra46639.2022.9812166","relation":{},"subject":[],"published":{"date-parts":[[2022,5,23]]}}}