{"id":"https://openalex.org/W4404312229","doi":"https://doi.org/10.48550/arxiv.2410.19609","title":"OpenWebVoyager: Building Multimodal Web Agents via Iterative Real-World Exploration, Feedback and Optimization","display_name":"OpenWebVoyager: Building Multimodal Web Agents via Iterative Real-World Exploration, Feedback and Optimization","publication_year":2024,"publication_date":"2024-10-25","ids":{"openalex":"https://openalex.org/W4404312229","doi":"https://doi.org/10.48550/arxiv.2410.19609"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.19609","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.19609","pdf_url":"https://arxiv.org/pdf/2410.19609","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.19609","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101434712","display_name":"Hongliang He","orcid":"https://orcid.org/0000-0002-6099-4356"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"He, Hongliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024873469","display_name":"Wenlin Yao","orcid":"https://orcid.org/0000-0002-4502-0350"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Wenlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038055180","display_name":"Kaixin Ma","orcid":"https://orcid.org/0000-0001-7414-5673"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Kaixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114860703","display_name":"Wenhao Yu","orcid":"https://orcid.org/0000-0002-9671-8652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Wenhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100445026","display_name":"Hongming Zhang","orcid":"https://orcid.org/0000-0001-6133-693X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hongming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046409172","display_name":"Tianqing Fang","orcid":"https://orcid.org/0000-0002-0186-8253"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Tianqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103239171","display_name":"Zhenzhong Lan","orcid":"https://orcid.org/0000-0003-4763-6148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Zhenzhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Dong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101434712"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9789000153541565,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5301279425621033},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.45080819725990295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5301279425621033},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45080819725990295}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.19609","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.19609","pdf_url":"https://arxiv.org/pdf/2410.19609","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.19609","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.19609","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.19609","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.19609","pdf_url":"https://arxiv.org/pdf/2410.19609","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4404312229.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"rapid":[1],"development":[2,96],"of":[3,25,97],"large":[4],"language":[5],"and":[6,47,80,107,133],"multimodal":[7,77,98],"models":[8],"has":[9],"sparked":[10],"significant":[11],"interest":[12],"in":[13,57],"using":[14],"proprietary":[15],"models,":[16],"such":[17],"as":[18],"GPT-4o,":[19],"to":[20,38,44,70,72,93,119],"develop":[21],"autonomous":[22],"agents":[23,40,56,68],"capable":[24],"handling":[26],"real-world":[27,105],"scenarios":[28],"like":[29],"web":[30,99,132,169],"navigation.":[31],"Although":[32],"recent":[33],"open-source":[34,90],"efforts":[35],"have":[36],"tried":[37],"equip":[39],"with":[41,116],"the":[42,61,95,113,121,127,130],"ability":[43],"explore":[45,129],"environments":[46,59],"continuously":[48],"improve":[49,108],"over":[50],"time,":[51],"they":[52],"are":[53,64],"building":[54],"text-only":[55],"synthetic":[58],"where":[60],"reward":[62],"signals":[63],"clearly":[65],"defined.":[66],"Such":[67],"struggle":[69],"generalize":[71],"realistic":[73],"settings":[74],"that":[75,101,167],"require":[76],"perception":[78],"abilities":[79],"lack":[81],"ground-truth":[82],"signals.":[83],"In":[84],"this":[85],"paper,":[86],"we":[87],"introduce":[88],"an":[89],"framework":[91],"designed":[92],"facilitate":[94],"agent":[100,128,170],"can":[102,159],"autonomously":[103],"conduct":[104],"exploration":[106],"itself.":[109],"We":[110,124],"first":[111],"train":[112],"base":[114],"model":[115],"imitation":[117],"learning":[118,147],"gain":[120],"basic":[122],"abilities.":[123],"then":[125],"let":[126],"open":[131],"collect":[134],"feedback":[135],"on":[136],"its":[137,144],"trajectories.":[138],"After":[139],"that,":[140],"it":[141],"further":[142],"improves":[143,172],"policy":[145],"by":[146,152],"from":[148],"well-performing":[149],"trajectories":[150],"judged":[151],"another":[153],"general-purpose":[154],"model.":[155],"This":[156],"exploration-feedback-optimization":[157],"cycle":[158],"continue":[160],"for":[161],"several":[162],"iterations.":[163],"Experimental":[164],"results":[165],"show":[166],"our":[168],"successfully":[171],"itself":[173],"after":[174],"each":[175],"iteration,":[176],"demonstrating":[177],"strong":[178],"performance":[179],"across":[180],"multiple":[181],"test":[182],"sets.":[183]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2024-11-14T00:00:00"}
