{"id":"https://openalex.org/W4404314527","doi":"https://doi.org/10.1613/jair.1.16457","title":"Towards Robust Offline-to-Online Reinforcement Learning via Uncertainty and Smoothness","display_name":"Towards Robust Offline-to-Online Reinforcement Learning via Uncertainty and Smoothness","publication_year":2024,"publication_date":"2024-11-13","ids":{"openalex":"https://openalex.org/W4404314527","doi":"https://doi.org/10.1613/jair.1.16457"},"language":"en","primary_location":{"id":"doi:10.1613/jair.1.16457","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.16457","pdf_url":"https://www.jair.org/index.php/jair/article/download/16457/27095","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.jair.org/index.php/jair/article/download/16457/27095","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034722896","display_name":"Xiaoyu Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoyu Wen","raw_affiliation_strings":["Northwestern Polytechnical University"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101412837","display_name":"Xudong Yu","orcid":"https://orcid.org/0000-0001-5720-1215"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xudong Yu","raw_affiliation_strings":["Harbin Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100958780","display_name":"Rui Yang","orcid":"https://orcid.org/0000-0003-3738-1612"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Rui Yang","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101882098","display_name":"Haoyuan Chen","orcid":"https://orcid.org/0000-0001-9442-1874"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyuan Chen","raw_affiliation_strings":["Northwestern Polytechnical University"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044788927","display_name":"Chenjia Bai","orcid":"https://orcid.org/0000-0002-8379-9385"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenjia Bai","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory and Shenzhen Research Institute of Northwestern Polytechnical University"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory and Shenzhen Research Institute of Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100422319","display_name":"Zhen Wang","orcid":"https://orcid.org/0000-0002-3399-5281"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhen Wang","raw_affiliation_strings":["Northwestern Polytechnical University"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5034722896"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":1.3379,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.8455464,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"81","issue":null,"first_page":"481","last_page":"509"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6969872713088989},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.647148847579956},{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.622400164604187},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5232250094413757},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5135862827301025},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5058512687683105},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.42124444246292114},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.19522598385810852},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.14715281128883362},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.08792588114738464},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.08519589900970459}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6969872713088989},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.647148847579956},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.622400164604187},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5232250094413757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5135862827301025},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5058512687683105},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.42124444246292114},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.19522598385810852},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.14715281128883362},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.08792588114738464},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.08519589900970459},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1613/jair.1.16457","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.16457","pdf_url":"https://www.jair.org/index.php/jair/article/download/16457/27095","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-146810","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-146810","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1613/jair.1.16457","is_oa":true,"landing_page_url":"https://doi.org/10.1613/jair.1.16457","pdf_url":"https://www.jair.org/index.php/jair/article/download/16457/27095","source":{"id":"https://openalex.org/S139930977","display_name":"Journal of Artificial Intelligence Research","issn_l":"1076-9757","issn":["1076-9757","1943-5037"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310315760","host_organization_name":"AI Access Foundation","host_organization_lineage":["https://openalex.org/P4310315760"],"host_organization_lineage_names":["AI Access Foundation"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Artificial Intelligence Research","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6000000238418579,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G192882225","display_name":null,"funder_award_id":"U22B2036","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4668225214","display_name":null,"funder_award_id":"62025602","funder_id":"https://openalex.org/F4320336125","funder_display_name":"National Science Fund for Distinguished Young Scholars"},{"id":"https://openalex.org/G5167091242","display_name":null,"funder_award_id":"No. 1","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5249178904","display_name":null,"funder_award_id":"Grant No. 6","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6129628694","display_name":null,"funder_award_id":"11931915","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7108700631","display_name":null,"funder_award_id":"62306242","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G714647208","display_name":null,"funder_award_id":"62025602","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8833332184","display_name":null,"funder_award_id":"6230624","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320316083","display_name":"Tencent","ror":"https://ror.org/00hhjss72"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336125","display_name":"National Science Fund for Distinguished Young Scholars","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404314527.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4310083477","https://openalex.org/W2328553770","https://openalex.org/W2920061524","https://openalex.org/W1977959518","https://openalex.org/W2038908348","https://openalex.org/W2107890255","https://openalex.org/W2106552856","https://openalex.org/W2145821588","https://openalex.org/W2086122291","https://openalex.org/W1987513656"],"abstract_inverted_index":{"To":[0,86],"obtain":[1],"a":[2,12,44,135],"near-optimal":[3],"policy":[4,81,126],"with":[5,38,187],"fewer":[6],"interactions":[7],"in":[8,83,112,139,152,166,178],"Reinforcement":[9],"Learning":[10],"(RL),":[11],"promising":[13],"approach":[14],"involves":[15],"the":[16,39,58,92,109,147,157,174],"combination":[17],"of":[18,176],"offline":[19,27,66,69,100],"RL,":[20,31],"which":[21,32,130],"enhances":[22],"sample":[23],"efficiency":[24],"by":[25,36],"leveraging":[26],"datasets,":[28],"and":[29,65,76,104,106,122,127,159,183],"online":[30,53,63,113,140,189],"explores":[33],"informative":[34],"transitions":[35],"interacting":[37],"environment.":[40],"Offline-to-Online":[41],"RL":[42,70],"provides":[43],"paradigm":[45],"for":[46,119,125],"improving":[47],"an":[48],"offline-trained":[49],"agent":[50],"within":[51],"limited":[52,188],"interactions.":[54,190],"However,":[55],"due":[56],"to":[57,78,98,107,133,146,162],"significant":[59,185],"distribution":[60,169],"shift":[61],"between":[62],"experiences":[64],"data,":[67],"most":[68],"algorithms":[71],"suffer":[72],"from":[73],"performance":[74,110],"drops":[75],"fail":[77],"achieve":[79],"stable":[80,180],"improvement":[82,186],"offline-to-online":[84,167,181],"adaptation.":[85,114],"address":[87],"this":[88],"problem,":[89],"we":[90],"propose":[91],"Robust":[93],"Offlineto-Online":[94],"(RO2O)":[95],"algorithm,":[96],"designed":[97],"enhance":[99],"policies":[101],"through":[102],"uncertainty":[103,120,158],"smoothness,":[105,129],"mitigate":[108],"drop":[111],"Specifically,":[115],"RO2O":[116,132,177],"incorporates":[117],"Q-ensemble":[118],"penalty":[121],"adversarial":[123],"samples":[124],"value":[128],"enable":[131],"maintain":[134],"consistent":[136],"learning":[137,148,182],"procedure":[138],"adaptation":[141],"without":[142],"requiring":[143],"special":[144],"changes":[145],"objective.":[149],"Theoretical":[150],"analyses":[151],"linear":[153],"MDPs":[154],"demonstrate":[155],"that":[156],"smoothness":[160],"lead":[161],"tighter":[163],"optimality":[164],"bound":[165],"against":[168],"shift.":[170],"Experimental":[171],"results":[172],"illustrate":[173],"superiority":[175],"facilitating":[179],"achieving":[184]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2024-11-14T00:00:00"}
