{"id":"https://openalex.org/W4415318378","doi":"https://doi.org/10.48550/arxiv.2510.07505","title":"PEAR: Planner-Executor Agent Robustness Benchmark","display_name":"PEAR: Planner-Executor Agent Robustness Benchmark","publication_year":2025,"publication_date":"2025-10-08","ids":{"openalex":"https://openalex.org/W4415318378","doi":"https://doi.org/10.48550/arxiv.2510.07505"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.07505","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07505","pdf_url":"https://arxiv.org/pdf/2510.07505","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.07505","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dong, Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dong, Shen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075218726","display_name":"Mingxuan Zhang","orcid":"https://orcid.org/0000-0001-9371-793X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Mingxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011376249","display_name":"Pengfei He","orcid":"https://orcid.org/0000-0003-0183-9302"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Pengfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110379271","display_name":"Li Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072193842","display_name":"Bhavani Thuraisingham","orcid":"https://orcid.org/0000-0003-4653-2080"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thuraisingham, Bhavani","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100658253","display_name":"Hui Liu","orcid":"https://orcid.org/0000-0002-8051-2932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xing, Yue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing, Yue","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.8695999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.8695999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.8385000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.7918000221252441,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.7929999828338623},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7141000032424927},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6177999973297119},{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.593500018119812},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.546999990940094},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5375000238418579}],"concepts":[{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.7929999828338623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7329000234603882},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7141000032424927},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6177999973297119},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.593500018119812},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.546999990940094},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5375000238418579},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4417000114917755},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4059999883174896},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.3709999918937683},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3075000047683716},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.25699999928474426}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.07505","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07505","pdf_url":"https://arxiv.org/pdf/2510.07505","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.07505","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.07505","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.07505","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.07505","pdf_url":"https://arxiv.org/pdf/2510.07505","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Model":[2],"(LLM)-based":[3],"Multi-Agent":[4],"Systems":[5],"(MAS)":[6],"have":[7],"emerged":[8],"as":[9],"a":[10,43,58,86,99,110,115,124,140],"powerful":[11],"paradigm":[12],"for":[13,60,120,127,165,175],"tackling":[14],"complex,":[15],"multi-step":[16],"tasks":[17],"across":[18],"diverse":[19],"domains.":[20],"However,":[21],"despite":[22],"their":[23],"impressive":[24],"capabilities,":[25],"MAS":[26,49,75,170],"remain":[27],"susceptible":[28],"to":[29],"adversarial":[30],"manipulation.":[31],"Existing":[32],"studies":[33],"typically":[34],"examine":[35],"isolated":[36],"attack":[37],"surfaces":[38],"or":[39],"specific":[40],"scenarios,":[41],"leaving":[42],"lack":[44],"of":[45,48,68,169],"holistic":[46],"understanding":[47],"vulnerabilities.":[50],"To":[51],"bridge":[52],"this":[53],"gap,":[54],"we":[55,95],"introduce":[56],"PEAR,":[57],"benchmark":[59,78],"systematically":[61],"evaluating":[62],"both":[63],"the":[64,81,121,128,133,151,158,167,173],"utility":[65],"and":[66,88,145,147,171],"vulnerability":[67],"planner-executor":[69,82],"MAS.":[70],"While":[71],"compatible":[72],"with":[73],"various":[74],"architectures,":[76],"our":[77],"focuses":[79],"on":[80],"structure,":[83],"which":[84],"is":[85,118],"practical":[87],"widely":[89],"adopted":[90],"design.":[91],"Through":[92],"extensive":[93],"experiments,":[94],"find":[96],"that":[97],"(1)":[98],"weak":[100,111],"planner":[101,152],"degrades":[102],"overall":[103],"clean":[104,134],"task":[105,135,143],"performance":[106,144],"more":[107],"severely":[108],"than":[109],"executor;":[112],"(2)":[113],"while":[114],"memory":[116,125],"module":[117,126],"essential":[119],"planner,":[122],"having":[123],"executor":[129],"does":[130],"not":[131],"impact":[132],"performance;":[136],"(3)":[137],"there":[138],"exists":[139],"trade-off":[141],"between":[142],"robustness;":[146],"(4)":[148],"attacks":[149],"targeting":[150],"are":[153],"particularly":[154],"effective":[155],"at":[156],"misleading":[157],"system.":[159],"These":[160],"findings":[161],"offer":[162],"actionable":[163],"insights":[164],"enhancing":[166],"robustness":[168],"lay":[172],"groundwork":[174],"principled":[176],"defenses":[177],"in":[178],"multi-agent":[179],"settings.":[180]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-18T00:00:00"}
