{"id":"https://openalex.org/W4390962075","doi":"https://doi.org/10.48550/arxiv.2401.07263","title":"BET: Explaining Deep Reinforcement Learning through The Error-Prone Decisions","display_name":"BET: Explaining Deep Reinforcement Learning through The Error-Prone Decisions","publication_year":2024,"publication_date":"2024-01-14","ids":{"openalex":"https://openalex.org/W4390962075","doi":"https://doi.org/10.48550/arxiv.2401.07263"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2401.07263","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.07263","pdf_url":"https://arxiv.org/pdf/2401.07263","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2401.07263","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100441116","display_name":"Xiao Liu","orcid":"https://orcid.org/0000-0003-4510-2847"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101700948","display_name":"Jie Zhao","orcid":"https://orcid.org/0000-0002-5571-8015"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026361347","display_name":"Wubing Chen","orcid":"https://orcid.org/0009-0003-6527-968X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Wubing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067534245","display_name":"Mao Tan","orcid":"https://orcid.org/0000-0002-2246-440X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Mao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102605929","display_name":"Yongxing Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Yongxing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100441116"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9491999745368958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9067000150680542,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8430278301239014},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.692041277885437},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.5405641198158264},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5261021256446838},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5076278448104858},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49727943539619446},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.48911982774734497},{"id":"https://openalex.org/keywords/black-box","display_name":"Black box","score":0.47789299488067627},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4697597026824951},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.44087615609169006},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4040919542312622}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8430278301239014},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.692041277885437},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.5405641198158264},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5261021256446838},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5076278448104858},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49727943539619446},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.48911982774734497},{"id":"https://openalex.org/C94966114","wikidata":"https://www.wikidata.org/wiki/Q29256","display_name":"Black box","level":2,"score":0.47789299488067627},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4697597026824951},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.44087615609169006},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4040919542312622},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2401.07263","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.07263","pdf_url":"https://arxiv.org/pdf/2401.07263","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2401.07263","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2401.07263","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2401.07263","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2401.07263","pdf_url":"https://arxiv.org/pdf/2401.07263","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390962075.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W2987774938","https://openalex.org/W4256492088","https://openalex.org/W632915154","https://openalex.org/W2055733372","https://openalex.org/W3022067003","https://openalex.org/W4378874356"],"abstract_inverted_index":{"Despite":[0],"the":[1,32,36,43,64,69,82,161,172,179],"impressive":[2],"capabilities":[3],"of":[4,35,111,148,174],"Deep":[5],"Reinforcement":[6],"Learning":[7],"(DRL)":[8],"agents":[9,162],"in":[10,22,80,133,146,163],"many":[11],"challenging":[12],"scenarios,":[13],"their":[14,20],"black-box":[15],"decision-making":[16],"process":[17],"significantly":[18],"limits":[19],"deployment":[21],"safety-sensitive":[23],"domains.":[24],"Several":[25],"previous":[26],"self-interpretable":[27,54,144],"works":[28],"focus":[29],"on":[30],"revealing":[31],"critical":[33],"states":[34,79,102,115],"agent's":[37,65],"decision.":[38],"However,":[39],"they":[40],"cannot":[41],"pinpoint":[42],"error-prone":[44,70],"states.":[45,71,113],"To":[46,94,171],"address":[47],"this":[48,97],"issue,":[49],"we":[50,152,177],"propose":[51],"a":[52,73,89,108,118,154,166,184,188],"novel":[53],"structure,":[55],"named":[56],"Backbone":[57],"Extract":[58],"Tree":[59],"(BET),":[60],"to":[61,128,181],"better":[62],"explain":[63,182],"behavior":[66],"by":[67,107],"identify":[68],"At":[72],"high":[74],"level,":[75],"BET":[76,99,132],"hypothesizes":[77],"that":[78],"which":[81],"agent":[83],"consistently":[84],"executes":[85],"uniform":[86],"decisions":[87],"exhibit":[88],"reduced":[90],"propensity":[91],"for":[92,157,160],"errors.":[93],"effectively":[95],"model":[96],"phenomenon,":[98],"expresses":[100],"these":[101,122],"within":[103],"neighborhoods,":[104],"each":[105],"defined":[106],"curated":[109],"set":[110],"representative":[112,123],"Therefore,":[114],"positioned":[116],"at":[117],"greater":[119],"distance":[120],"from":[121],"benchmarks":[124],"are":[125,178],"more":[126],"prone":[127],"error.":[129],"We":[130],"evaluate":[131],"various":[134],"popular":[135],"RL":[136],"environments":[137],"and":[138],"show":[139],"its":[140],"superiority":[141],"over":[142],"existing":[143],"models":[145],"terms":[147],"explanation":[149],"fidelity.":[150],"Furthermore,":[151],"demonstrate":[153],"use":[155],"case":[156],"providing":[158],"explanations":[159],"StarCraft":[164],"II,":[165],"sophisticated":[167],"multi-agent":[168],"cooperative":[169],"game.":[170],"best":[173],"our":[175],"knowledge,":[176],"first":[180],"such":[183],"complex":[185],"scenarios":[186],"using":[187],"fully":[189],"transparent":[190],"structure.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2024-01-18T00:00:00"}
