{"id":"https://openalex.org/W4319985853","doi":"https://doi.org/10.48550/arxiv.2206.12542","title":"Value-Consistent Representation Learning for Data-Efficient Reinforcement Learning","display_name":"Value-Consistent Representation Learning for Data-Efficient Reinforcement Learning","publication_year":2022,"publication_date":"2022-06-25","ids":{"openalex":"https://openalex.org/W4319985853","doi":"https://doi.org/10.48550/arxiv.2206.12542"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2206.12542","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.12542","pdf_url":"https://arxiv.org/pdf/2206.12542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2206.12542","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100369313","display_name":"Yue Yang","orcid":"https://orcid.org/0009-0003-5641-2899"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yue, Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032599820","display_name":"Bingyi Kang","orcid":"https://orcid.org/0000-0003-2637-4695"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Bingyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062979221","display_name":"Zhongwen Xu","orcid":"https://orcid.org/0000-0002-9348-7262"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Zhongwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013240918","display_name":"Gao Huang","orcid":"https://orcid.org/0000-0002-7251-0988"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Gao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100381753","display_name":"Shuicheng Yan","orcid":"https://orcid.org/0000-0001-8906-3777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Shuicheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100369313"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8352948427200317},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7169884443283081},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.684153139591217},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6633536219596863},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6173826456069946},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5562129020690918},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5108249187469482}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8352948427200317},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7169884443283081},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.684153139591217},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6633536219596863},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6173826456069946},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5562129020690918},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5108249187469482},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2206.12542","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.12542","pdf_url":"https://arxiv.org/pdf/2206.12542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2206.12542","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2206.12542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2206.12542","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.12542","pdf_url":"https://arxiv.org/pdf/2206.12542","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.75,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993","https://openalex.org/W3082059448","https://openalex.org/W4313640622"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1],"learning":[2,23,44,106],"(RL)":[3],"algorithms":[4],"suffer":[5],"severe":[6],"performance":[7,244],"degradation":[8],"when":[9],"the":[10,61,70,91,125,132,137,157,183,196,204,208],"interaction":[11],"data":[12,46],"is":[13,57,63,177],"scarce,":[14],"which":[15,56],"limits":[16],"their":[17,228],"real-world":[18],"application.":[19],"Recently,":[20],"visual":[21,75],"representation":[22,72,105],"has":[24,235],"been":[25,236],"shown":[26],"to":[27,48,108,115,123,130,181,186,226],"be":[28,78],"effective":[29],"and":[30,45,89,140,167,179,210,221],"promising":[31],"for":[32,53,80,85,207,230,245],"boosting":[33],"sample":[34,232],"efficiency":[35],"in":[36,65],"RL.":[37],"These":[38],"methods":[39,76,240],"usually":[40],"rely":[41],"on":[42,136,164,218],"contrastive":[43],"augmentation":[47],"train":[49],"a":[50,100,121,141,152,161,175,188],"transition":[51],"model":[52,62,122],"state":[54,87,127,150,154,185],"prediction,":[55],"different":[58],"from":[59],"how":[60],"used":[64],"RL--performing":[66],"value-based":[67],"planning.":[68],"Accordingly,":[69],"learned":[71],"by":[73,156,195],"these":[74],"may":[77],"good":[79],"recognition":[81],"but":[82],"not":[83],"optimal":[84],"estimating":[86],"value":[88,191],"solving":[90],"decision":[92],"problem.":[93],"To":[94],"address":[95],"this":[96,148],"issue,":[97],"we":[98],"propose":[99],"novel":[101],"method,":[102],"called":[103],"value-consistent":[104],"(VCR),":[107],"learn":[109],"representations":[110],"that":[111,194,238],"are":[112],"directly":[113],"related":[114],"decision-making.":[116],"More":[117],"specifically,":[118],"VCR":[119,159],"trains":[120],"predict":[124],"future":[126],"(also":[128],"referred":[129],"as":[131,193],"''imagined":[133],"state'')":[134],"based":[135],"current":[138],"one":[139],"sequence":[142],"of":[143,146,171,203],"actions.":[144],"Instead":[145],"aligning":[147],"imagined":[149,184],"with":[151],"real":[153,197],"returned":[155],"environment,":[158],"applies":[160],"$Q$-value":[162],"head":[163],"both":[165],"states":[166],"obtains":[168],"two":[169,201],"distributions":[170],"action":[172,190,212],"values.":[173],"Then":[174],"distance":[176],"computed":[178],"minimized":[180],"force":[182],"produce":[187],"similar":[189],"prediction":[192],"state.":[198],"We":[199,215],"develop":[200],"implementations":[202],"above":[205],"idea":[206],"discrete":[209],"continuous":[211],"spaces":[213],"respectively.":[214],"conduct":[216],"experiments":[217],"Atari":[219],"100K":[220],"DeepMind":[222],"Control":[223],"Suite":[224],"benchmarks":[225],"validate":[227],"effectiveness":[229],"improving":[231],"efficiency.":[233],"It":[234],"demonstrated":[237],"our":[239],"achieve":[241],"new":[242],"state-of-the-art":[243],"search-free":[246],"RL":[247],"algorithms.":[248]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
