{"id":"https://openalex.org/W4390091466","doi":"https://doi.org/10.48550/arxiv.2312.12145","title":"OVD-Explorer: Optimism Should Not Be the Sole Pursuit of Exploration in Noisy Environments","display_name":"OVD-Explorer: Optimism Should Not Be the Sole Pursuit of Exploration in Noisy Environments","publication_year":2023,"publication_date":"2023-12-19","ids":{"openalex":"https://openalex.org/W4390091466","doi":"https://doi.org/10.48550/arxiv.2312.12145"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2312.12145","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.12145","pdf_url":"https://arxiv.org/pdf/2312.12145","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2312.12145","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100766553","display_name":"Jinyi Liu","orcid":"https://orcid.org/0000-0002-4537-348X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Jinyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100376364","display_name":"Zhi Wang","orcid":"https://orcid.org/0000-0001-6083-9068"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109988926","display_name":"Yan Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047509839","display_name":"Jianye Hao","orcid":"https://orcid.org/0000-0002-0422-8235"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Jianye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044788927","display_name":"Chenjia Bai","orcid":"https://orcid.org/0000-0002-8379-9385"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Chenjia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100777718","display_name":"Junjie Ye","orcid":"https://orcid.org/0009-0004-0921-6323"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Junjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100422329","display_name":"Zhen Wang","orcid":"https://orcid.org/0000-0002-4318-9996"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004540162","display_name":"Haiyin Piao","orcid":"https://orcid.org/0000-0002-8519-4750"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Piao, Haiyin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100604420","display_name":"Yang Sun","orcid":"https://orcid.org/0000-0002-7423-4411"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100766553"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.7284989953041077},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6991075873374939},{"id":"https://openalex.org/keywords/optimism","display_name":"Optimism","score":0.6274849772453308},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6142697334289551},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4630921185016632},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.4255199134349823},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39458775520324707},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3862297832965851},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12901201844215393},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.061525434255599976}],"concepts":[{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.7284989953041077},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6991075873374939},{"id":"https://openalex.org/C204017024","wikidata":"https://www.wikidata.org/wiki/Q485446","display_name":"Optimism","level":2,"score":0.6274849772453308},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6142697334289551},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4630921185016632},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.4255199134349823},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39458775520324707},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3862297832965851},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12901201844215393},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.061525434255599976},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2312.12145","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.12145","pdf_url":"https://arxiv.org/pdf/2312.12145","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2312.12145","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2312.12145","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2312.12145","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2312.12145","pdf_url":"https://arxiv.org/pdf/2312.12145","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G325098684","display_name":null,"funder_award_id":"92370132","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8734799983","display_name":null,"funder_award_id":"62106172","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1583826057","https://openalex.org/W2377237701","https://openalex.org/W2360099860","https://openalex.org/W4323893170","https://openalex.org/W2352463596","https://openalex.org/W2380850119","https://openalex.org/W2101450440","https://openalex.org/W2383675217","https://openalex.org/W2376151201","https://openalex.org/W2393898889"],"abstract_inverted_index":{"In":[0,72],"reinforcement":[1],"learning,":[2],"the":[3,6,28,97,127,133],"optimism":[4],"in":[5,27,50,67,103,137],"face":[7],"of":[8,30,42,96,135],"uncertainty":[9],"(OFU)":[10],"is":[11],"a":[12,59,84,93],"mainstream":[13],"principle":[14],"for":[15,88],"directing":[16],"exploration":[17,36,47,56,87,99],"towards":[18],"less":[19],"explored":[20],"areas,":[21,44],"characterized":[22],"by":[23],"higher":[24],"uncertainty.":[25],"However,":[26],"presence":[29],"environmental":[31],"stochasticity":[32],"(noise),":[33],"purely":[34],"optimistic":[35,86,104,140],"may":[37],"lead":[38],"to":[39,63,82,110],"excessive":[40],"probing":[41],"high-noise":[43,68],"consequently":[45],"impeding":[46],"efficiency.":[48],"Hence,":[49],"exploring":[51],"noisy":[52],"environments,":[53],"while":[54],"optimism-driven":[55],"serves":[57],"as":[58],"foundation,":[60],"prudent":[61],"attention":[62],"alleviating":[64],"unnecessary":[65],"over-exploration":[66],"areas":[69],"becomes":[70],"beneficial.":[71],"this":[73],"work,":[74],"we":[75],"propose":[76],"Optimistic":[77],"Value":[78],"Distribution":[79],"Explorer":[80],"(OVD-Explorer)":[81],"achieve":[83],"noise-aware":[85,139],"continuous":[89,120],"control.":[90],"OVD-Explorer":[91,114,136],"proposes":[92],"new":[94],"measurement":[95],"policy's":[98],"ability":[100],"considering":[101],"noise":[102],"perspectives,":[105],"and":[106,129],"leverages":[107],"gradient":[108],"ascent":[109],"drive":[111],"exploration.":[112,141],"Practically,":[113],"can":[115],"be":[116],"easily":[117],"integrated":[118],"with":[119],"control":[121],"RL":[122],"algorithms.":[123],"Extensive":[124],"evaluations":[125],"on":[126],"MuJoCo":[128],"GridChaos":[130],"tasks":[131],"demonstrate":[132],"superiority":[134],"achieving":[138]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
