{"id":"https://openalex.org/W4380137071","doi":"https://doi.org/10.48550/arxiv.2306.05412","title":"Decoupled Prioritized Resampling for Offline RL","display_name":"Decoupled Prioritized Resampling for Offline RL","publication_year":2023,"publication_date":"2023-06-08","ids":{"openalex":"https://openalex.org/W4380137071","doi":"https://doi.org/10.48550/arxiv.2306.05412"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2306.05412","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.05412","pdf_url":"https://arxiv.org/pdf/2306.05412","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.05412","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100369313","display_name":"Yue Yang","orcid":"https://orcid.org/0009-0003-5641-2899"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yue, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032599820","display_name":"Bingyi Kang","orcid":"https://orcid.org/0000-0003-2637-4695"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Bingyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100321036","display_name":"Xiao Ma","orcid":"https://orcid.org/0009-0001-1088-5776"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yang, Qisen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qisen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013240918","display_name":"Gao Huang","orcid":"https://orcid.org/0000-0002-7251-0988"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Gao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101868179","display_name":"Shiji Song","orcid":"https://orcid.org/0000-0003-0858-1770"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Shiji","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100381753","display_name":"Shuicheng Yan","orcid":"https://orcid.org/0000-0001-8906-3777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Shuicheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100369313"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8008207082748413},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6908923983573914},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5928447842597961},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5178540349006653},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.502469539642334},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4897429943084717},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4757803976535797},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46496325731277466},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.4293597340583801},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.42589038610458374},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23069578409194946},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20454943180084229},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15033552050590515}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8008207082748413},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6908923983573914},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5928447842597961},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5178540349006653},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.502469539642334},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4897429943084717},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4757803976535797},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46496325731277466},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.4293597340583801},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.42589038610458374},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23069578409194946},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20454943180084229},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15033552050590515},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2306.05412","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.05412","pdf_url":"https://arxiv.org/pdf/2306.05412","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2306.05412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2306.05412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.05412","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.05412","pdf_url":"https://arxiv.org/pdf/2306.05412","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380137071.pdf","grobid_xml":"https://content.openalex.org/works/W4380137071.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126","https://openalex.org/W2043093291","https://openalex.org/W4225619808"],"abstract_inverted_index":{"Offline":[0,59],"reinforcement":[1],"learning":[2],"(RL)":[3],"is":[4,110,145],"challenged":[5],"by":[6,126],"the":[7,25,29,50,181],"distributional":[8],"shift":[9],"problem.":[10],"To":[11,53],"address":[12],"this":[13,55,88,102],"problem,":[14],"existing":[15],"works":[16],"mainly":[17],"focus":[18],"on":[19,130,159],"designing":[20],"sophisticated":[21],"policy":[22,27],"constraints":[23,34],"between":[24],"learned":[26,51],"and":[28,40,98,169,177,188],"behavior":[30,96],"policy.":[31,52],"However,":[32],"these":[33],"are":[35,191],"applied":[36],"equally":[37],"to":[38,71,101,112,122],"well-performing":[39],"inferior":[41],"actions":[42],"through":[43],"uniform":[44],"sampling,":[45],"which":[46],"might":[47],"negatively":[48],"affect":[49],"alleviate":[54],"issue,":[56],"we":[57,85,156],"propose":[58],"Prioritized":[60],"Experience":[61],"Replay":[62],"(OPER),":[63],"featuring":[64],"a":[65,105,114,131,146],"class":[66,89],"of":[67,90],"priority":[68,91,124,189],"functions":[69,92],"designed":[70],"prioritize":[72],"highly-rewarding":[73],"transitions,":[74],"making":[75],"them":[76],"more":[77],"frequently":[78],"visited":[79],"during":[80],"training.":[81],"Through":[82],"theoretical":[83],"analysis,":[84],"show":[86],"that":[87,174],"induce":[93],"an":[94],"improved":[95,103],"policy,":[97,104],"when":[99],"constrained":[100],"policy-constrained":[106],"offline":[107,150],"RL":[108,151],"algorithm":[109],"likely":[111],"yield":[113],"better":[115],"solution.":[116],"We":[117],"develop":[118],"two":[119],"practical":[120],"strategies":[121],"obtain":[123],"weights":[125,190],"estimating":[127],"advantages":[128],"based":[129],"fitted":[132],"value":[133],"network":[134],"(OPER-A)":[135],"or":[136],"utilizing":[137],"trajectory":[138],"returns":[139],"(OPER-R)":[140],"for":[141,149,183],"quick":[142],"computation.":[143],"OPER":[144,158],"plug-and-play":[147],"component":[148],"algorithms.":[152],"As":[153],"case":[154],"studies,":[155],"evaluate":[157],"five":[160],"different":[161],"algorithms,":[162],"including":[163],"BC,":[164],"TD3+BC,":[165],"Onestep":[166],"RL,":[167],"CQL,":[168],"IQL.":[170],"Extensive":[171],"experiments":[172],"demonstrate":[173],"both":[175],"OPER-A":[176],"OPER-R":[178],"significantly":[179],"improve":[180],"performance":[182],"all":[184],"baseline":[185],"methods.":[186],"Codes":[187],"availiable":[192],"at":[193],"https://github.com/sail-sg/OPER.":[194]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2023-06-10T00:00:00"}
