{"id":"https://openalex.org/W4391272423","doi":"https://doi.org/10.48550/arxiv.2401.13919","title":"WebVoyager: Building an End-to-End Web Agent with Large Multimodal Models","display_name":"WebVoyager: Building an End-to-End Web Agent with Large Multimodal Models","publication_year":2024,"publication_date":"2024-01-25","ids":{"openalex":"https://openalex.org/W4391272423","doi":"https://doi.org/10.48550/arxiv.2401.13919"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2401.13919","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.13919","pdf_url":"https://arxiv.org/pdf/2401.13919","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2401.13919","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101807708","display_name":"Hongliang He","orcid":"https://orcid.org/0009-0004-7575-2570"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"He, Hongliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024873469","display_name":"Wenlin Yao","orcid":"https://orcid.org/0000-0002-4502-0350"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Wenlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038055180","display_name":"Kaixin Ma","orcid":"https://orcid.org/0000-0001-7414-5673"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Kaixin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114860703","display_name":"Wenhao Yu","orcid":"https://orcid.org/0000-0002-9671-8652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Wenhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102982899","display_name":"Yong Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Yong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100445026","display_name":"Hongming Zhang","orcid":"https://orcid.org/0000-0001-6133-693X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hongming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103239171","display_name":"Zhenzhong Lan","orcid":"https://orcid.org/0000-0003-4763-6148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan, Zhenzhong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034476404","display_name":"Dong Yu","orcid":"https://orcid.org/0000-0003-0520-6844"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Dong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101807708"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7621674537658691},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5717610120773315},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5461708903312683},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.5338571667671204},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5329948663711548},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.5091413855552673},{"id":"https://openalex.org/keywords/web-accessibility-initiative","display_name":"Web Accessibility Initiative","score":0.5074040293693542},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.48136812448501587},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.47245028614997864},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.43772563338279724},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.4369502663612366},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.43252119421958923},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3267357647418976},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.32321906089782715},{"id":"https://openalex.org/keywords/web-intelligence","display_name":"Web intelligence","score":0.2936176657676697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24366459250450134},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10617420077323914},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.08008354902267456}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7621674537658691},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5717610120773315},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5461708903312683},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.5338571667671204},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5329948663711548},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.5091413855552673},{"id":"https://openalex.org/C197973564","wikidata":"https://www.wikidata.org/wiki/Q636020","display_name":"Web Accessibility Initiative","level":5,"score":0.5074040293693542},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.48136812448501587},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.47245028614997864},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.43772563338279724},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.4369502663612366},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.43252119421958923},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3267357647418976},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.32321906089782715},{"id":"https://openalex.org/C544335954","wikidata":"https://www.wikidata.org/wiki/Q2553348","display_name":"Web intelligence","level":4,"score":0.2936176657676697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24366459250450134},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10617420077323914},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.08008354902267456},{"id":"https://openalex.org/C204787440","wikidata":"https://www.wikidata.org/wiki/Q188504","display_name":"Alternative medicine","level":2,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C142724271","wikidata":"https://www.wikidata.org/wiki/Q7208","display_name":"Pathology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2401.13919","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.13919","pdf_url":"https://arxiv.org/pdf/2401.13919","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2401.13919","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2401.13919","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2401.13919","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.13919","pdf_url":"https://arxiv.org/pdf/2401.13919","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4391272423.pdf","grobid_xml":"https://content.openalex.org/works/W4391272423.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4299590256","https://openalex.org/W3163634122","https://openalex.org/W3119482857","https://openalex.org/W2919182614","https://openalex.org/W2166381389","https://openalex.org/W2501053608","https://openalex.org/W2073468858","https://openalex.org/W2108203100","https://openalex.org/W2321239069","https://openalex.org/W68102627"],"abstract_inverted_index":{"The":[0,151],"rapid":[1],"advancement":[2],"of":[3,18,111,135,149,171],"large":[4],"language":[5],"models":[6],"(LLMs)":[7],"has":[8],"led":[9],"to":[10,113],"a":[11,90,123],"new":[12,91],"era":[13],"marked":[14],"by":[15,82,93],"the":[16,133,141,146],"development":[17],"autonomous":[19],"applications":[20],"in":[21,27,45,57,165],"real-world":[22,58,85,95],"scenarios,":[23],"which":[24],"drives":[25],"innovation":[26],"creating":[28],"advanced":[29],"web":[30,33,47,51,74,116,172],"agents.":[31,117,173],"Existing":[32],"agents":[34],"typically":[35],"only":[36,44],"handle":[37],"one":[38],"input":[39],"modality":[40],"and":[41,101,140,168],"are":[42],"evaluated":[43],"simplified":[46],"simulators":[48],"or":[49],"static":[50],"snapshots,":[52],"greatly":[53],"limiting":[54],"their":[55],"applicability":[56],"scenarios.":[59],"To":[60],"bridge":[61],"this":[62],"gap,":[63],"we":[64,88],"introduce":[65,102],"WebVoyager,":[66],"an":[67,103],"innovative":[68],"Large":[69],"Multimodal":[70],"Model":[71],"(LMM)":[72],"powered":[73],"agent":[75],"that":[76,120],"can":[77],"complete":[78],"user":[79],"instructions":[80],"end-to-end":[81],"interacting":[83],"with":[84,159],"websites.":[86],"Moreover,":[87],"establish":[89],"benchmark":[92],"compiling":[94],"tasks":[96],"from":[97],"15":[98],"popular":[99],"websites":[100],"automatic":[104,153],"evaluation":[105,154],"protocol":[106],"leveraging":[107],"multimodal":[108],"understanding":[109],"abilities":[110],"GPT-4V":[112],"evaluate":[114],"open-ended":[115],"We":[118],"show":[119],"WebVoyager":[121,142],"achieves":[122,156],"59.1%":[124],"task":[125],"success":[126],"rate":[127],"on":[128],"our":[129],"benchmark,":[130],"significantly":[131],"surpassing":[132],"performance":[134],"both":[136],"GPT-4":[137],"(All":[138],"Tools)":[139],"(text-only)":[143],"setups,":[144],"underscoring":[145],"exceptional":[147],"capability":[148],"WebVoyager.":[150],"proposed":[152],"metric":[155],"85.3%":[157],"agreement":[158],"human":[160],"judgment,":[161],"indicating":[162],"its":[163],"effectiveness":[164],"providing":[166],"reliable":[167],"accurate":[169],"assessments":[170]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
