{"id":"https://openalex.org/W4386555428","doi":"https://doi.org/10.48550/arxiv.2309.03839","title":"Bootstrapping Adaptive Human-Machine Interfaces with Offline Reinforcement Learning","display_name":"Bootstrapping Adaptive Human-Machine Interfaces with Offline Reinforcement Learning","publication_year":2023,"publication_date":"2023-09-07","ids":{"openalex":"https://openalex.org/W4386555428","doi":"https://doi.org/10.48550/arxiv.2309.03839"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.03839","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.03839","pdf_url":"https://arxiv.org/pdf/2309.03839","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.03839","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016538750","display_name":"Jensen Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gao, Jensen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109695558","display_name":"Siddharth Reddy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reddy, Siddharth","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045351810","display_name":"Glen Berseth","orcid":"https://orcid.org/0000-0001-7351-8028"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Berseth, Glen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005997281","display_name":"Anca D. Dragan","orcid":"https://orcid.org/0000-0001-6312-5466"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dragan, Anca D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Levine, Sergey","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016538750"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11707","display_name":"Gaze Tracking and Assistive Technology","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10525","display_name":"Human-Automation Interaction and Safety","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7821094393730164},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6485532522201538},{"id":"https://openalex.org/keywords/teleoperation","display_name":"Teleoperation","score":0.6468324065208435},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.6375827789306641},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6180847883224487},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.5849388837814331},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5300402641296387},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5133782625198364},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4463358521461487},{"id":"https://openalex.org/keywords/brain\u2013computer-interface","display_name":"Brain\u2013computer interface","score":0.4434848427772522},{"id":"https://openalex.org/keywords/user-interface","display_name":"User interface","score":0.42236387729644775},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.39591920375823975}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7821094393730164},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6485532522201538},{"id":"https://openalex.org/C161759796","wikidata":"https://www.wikidata.org/wiki/Q3982902","display_name":"Teleoperation","level":3,"score":0.6468324065208435},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.6375827789306641},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6180847883224487},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.5849388837814331},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5300402641296387},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5133782625198364},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4463358521461487},{"id":"https://openalex.org/C173201364","wikidata":"https://www.wikidata.org/wiki/Q897410","display_name":"Brain\u2013computer interface","level":3,"score":0.4434848427772522},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.42236387729644775},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.39591920375823975},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C522805319","wikidata":"https://www.wikidata.org/wiki/Q179965","display_name":"Electroencephalography","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.03839","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.03839","pdf_url":"https://arxiv.org/pdf/2309.03839","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2309.03839","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.03839","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.03839","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.03839","pdf_url":"https://arxiv.org/pdf/2309.03839","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.800000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1432373144","display_name":null,"funder_award_id":"W911NF-21-1-","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G6661146833","display_name":null,"funder_award_id":"W911NF-21-1-0097","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8895310522","display_name":null,"funder_award_id":"W911NF-17-2-0181","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8998121839","display_name":null,"funder_award_id":"911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386555428.pdf","grobid_xml":"https://content.openalex.org/works/W4386555428.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1923069992","https://openalex.org/W2160982102","https://openalex.org/W2048862765","https://openalex.org/W2770942502","https://openalex.org/W4281943815","https://openalex.org/W2364738582","https://openalex.org/W2160932924","https://openalex.org/W2082997813","https://openalex.org/W2296377172","https://openalex.org/W2946737911"],"abstract_inverted_index":{"Adaptive":[0],"interfaces":[1,221],"can":[2,49,127],"help":[3],"users":[4,53,125],"perform":[5],"sequential":[6],"decision-making":[7],"tasks":[8],"like":[9],"robotic":[10],"teleoperation":[11],"given":[12,115],"noisy,":[13,131],"high-dimensional":[14,132],"command":[15,75,95,157],"signals":[16,76,96,185],"(e.g.,":[17],"from":[18,51,159],"a":[19,61,80,102,114,136,144,155,175,195],"brain-computer":[20],"interface).":[21],"Recent":[22],"advances":[23],"in":[24,54,139,222],"human-in-the-loop":[25],"machine":[26],"learning":[27,63,180],"enable":[28],"such":[29],"systems":[30],"to":[31,39,65,72,77,123,153,181],"improve":[32],"by":[33,42,68,93,148,179],"interacting":[34],"with":[35,200,209,230],"users,":[36],"but":[37],"tend":[38],"be":[40],"limited":[41],"the":[43,90,109,205],"amount":[44],"of":[45,82,238],"data":[46],"that":[47,165,215],"they":[48],"collect":[50],"individual":[52],"practice.":[55],"In":[56],"this":[57,67],"paper,":[58],"we":[59,100],"propose":[60],"reinforcement":[62],"algorithm":[64],"address":[66,89],"training":[69],"an":[70],"interface":[71],"map":[73],"raw":[74],"actions":[78],"using":[79,149],"combination":[81],"offline":[83],"pre-training":[84],"and":[85,97,107,186,204,213],"online":[86],"fine-tuning.":[87],"To":[88],"challenges":[91],"posed":[92],"noisy":[94],"sparse":[98],"rewards,":[99],"develop":[101],"novel":[103],"method":[104,167,217],"for":[105,113],"representing":[106],"inferring":[108],"user's":[110],"long-term":[111],"intent":[112],"trajectory.":[116],"We":[117,191],"primarily":[118],"evaluate":[119,193],"our":[120,166,216,239],"method's":[121],"ability":[122],"assist":[124],"who":[126],"only":[128],"communicate":[129],"through":[130,135],"input":[133],"channels":[134],"user":[137,183,211,232],"study":[138],"which":[140],"12":[141],"participants":[142],"performed":[143],"simulated":[145,196,210,231],"navigation":[146,171],"task":[147,199],"their":[150,160],"eye":[151,201],"gaze":[152,202],"modulate":[154],"128-dimensional":[156],"signal":[158],"webcam.":[161],"The":[162],"results":[163],"show":[164],"enables":[168],"successful":[169],"goal":[170],"more":[172],"often":[173],"than":[174],"baseline":[176,220],"directional":[177],"interface,":[178],"denoise":[182],"commands":[184,233],"provide":[187],"shared":[188],"autonomy":[189],"assistance.":[190],"further":[192],"on":[194],"Sawyer":[197],"pushing":[198],"control,":[203],"Lunar":[206],"Lander":[207],"game":[208],"commands,":[212],"find":[214],"improves":[218],"over":[219],"these":[223],"domains":[224],"as":[225],"well.":[226],"Extensive":[227],"ablation":[228],"experiments":[229],"empirically":[234],"motivate":[235],"each":[236],"component":[237],"method.":[240]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
