{"id":"https://openalex.org/W4402952841","doi":"https://doi.org/10.48550/arxiv.2408.16286","title":"Near-Optimal Policy Identification in Robust Constrained Markov Decision Processes via Epigraph Form","display_name":"Near-Optimal Policy Identification in Robust Constrained Markov Decision Processes via Epigraph Form","publication_year":2024,"publication_date":"2024-08-29","ids":{"openalex":"https://openalex.org/W4402952841","doi":"https://doi.org/10.48550/arxiv.2408.16286"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2408.16286","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2408.16286","pdf_url":"https://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2408.16286","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031877106","display_name":"Toshinori Kitamura","orcid":"https://orcid.org/0000-0002-2326-3140"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kitamura, Toshinori","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070075141","display_name":"Tadashi Kozuno","orcid":"https://orcid.org/0000-0002-8820-1362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kozuno, Tadashi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003820265","display_name":"Wataru Kumagai","orcid":"https://orcid.org/0000-0002-3081-5951"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumagai, Wataru","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075966877","display_name":"K. Hoshino","orcid":"https://orcid.org/0000-0003-0562-2733"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoshino, Kenta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009320679","display_name":"Yohei Hosoe","orcid":"https://orcid.org/0000-0002-5659-1060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hosoe, Yohei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083407612","display_name":"Kazumi Kasaura","orcid":"https://orcid.org/0000-0002-3219-9961"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kasaura, Kazumi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034718334","display_name":"Masashi Hamaya","orcid":"https://orcid.org/0000-0003-4189-8219"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hamaya, Masashi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047914843","display_name":"Paavo Parmas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parmas, Paavo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074059447","display_name":"Yutaka Matsuo","orcid":"https://orcid.org/0000-0002-2070-4393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matsuo, Yutaka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5031877106"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.19140000641345978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.19140000641345978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.17239999771118164,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.16740000247955322,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/epigraph","display_name":"Epigraph","score":0.9705146551132202},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6346087455749512},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5964210033416748},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5064839124679565},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.49679043889045715},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.47096091508865356},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.39866819977760315},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3864402770996094},{"id":"https://openalex.org/keywords/mathematical-economics","display_name":"Mathematical economics","score":0.3518759310245514},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34503868222236633},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.32890045642852783},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.21893557906150818},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.15752160549163818}],"concepts":[{"id":"https://openalex.org/C17192189","wikidata":"https://www.wikidata.org/wiki/Q1347059","display_name":"Epigraph","level":2,"score":0.9705146551132202},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6346087455749512},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5964210033416748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5064839124679565},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.49679043889045715},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.47096091508865356},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.39866819977760315},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3864402770996094},{"id":"https://openalex.org/C144237770","wikidata":"https://www.wikidata.org/wiki/Q747534","display_name":"Mathematical economics","level":1,"score":0.3518759310245514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34503868222236633},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.32890045642852783},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21893557906150818},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.15752160549163818},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2408.16286","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2408.16286","pdf_url":"https://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2408.16286","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2408.16286","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2408.16286","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2408.16286","pdf_url":"https://arxiv.org/pdf/2408.16286","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8738369156","display_name":null,"funder_award_id":"JPMJMS2236","funder_id":"https://openalex.org/F4320338247","funder_display_name":"Moonshot Research and Development Program"}],"funders":[{"id":"https://openalex.org/F4320338247","display_name":"Moonshot Research and Development Program","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4402952841.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2355558294","https://openalex.org/W2392366684","https://openalex.org/W2379651310","https://openalex.org/W2113019827","https://openalex.org/W1541249122","https://openalex.org/W2413828414","https://openalex.org/W2367222340","https://openalex.org/W187740018","https://openalex.org/W2162286586","https://openalex.org/W4255368532"],"abstract_inverted_index":{"Designing":[0],"a":[1,35,39,59,90,119,136,141],"safe":[2],"policy":[3,37,47,69,142,152,159],"for":[4],"uncertain":[5],"environments":[6],"is":[7],"crucial":[8],"in":[9,38,54,80,153],"real-world":[10],"control":[11],"systems.":[12],"However,":[13],"this":[14],"challenge":[15],"remains":[16],"inadequately":[17],"addressed":[18],"within":[19],"the":[20,29,55,67,73,96,106,110,115,124,127,131],"Markov":[21],"decision":[22],"process":[23],"(MDP)":[24],"framework.":[25],"This":[26,83],"paper":[27],"presents":[28],"first":[30,64],"algorithm":[31,139],"guaranteed":[32],"to":[33,72],"identify":[34],"near-optimal":[36],"robust":[40,158],"constrained":[41],"MDP":[42],"(RCMDP),":[43],"where":[44],"an":[45,150,154],"optimal":[46],"minimizes":[48],"cumulative":[49],"cost":[50],"while":[51],"satisfying":[52],"constraints":[53],"worst-case":[56],"scenario":[57],"across":[58],"set":[60],"of":[61,92,109],"environments.":[62],"We":[63],"prove":[65,146],"that":[66,147],"conventional":[68],"gradient":[70,121,143],"approach":[71],"Lagrangian":[74],"max-min":[75],"formulation":[76],"can":[77],"become":[78],"trapped":[79],"suboptimal":[81],"solutions.":[82],"occurs":[84],"when":[85],"its":[86],"inner":[87],"minimization":[88],"encounters":[89],"sum":[91],"conflicting":[93],"gradients":[94],"from":[95,122],"objective":[97,125],"and":[98,145],"constraint":[99],"functions.":[100],"To":[101],"address":[102],"this,":[103],"we":[104,134],"leverage":[105],"epigraph":[107,132],"form":[108],"RCMDP":[111,155],"problem,":[112],"which":[113],"resolves":[114],"conflict":[116],"by":[117],"selecting":[118],"single":[120],"either":[123],"or":[126],"constraints.":[128],"Building":[129],"on":[130],"form,":[133],"propose":[135],"bisection":[137],"search":[138],"with":[140,156],"subroutine":[144],"it":[148],"identifies":[149],"$\\varepsilon$-optimal":[151],"$\\tilde{\\mathcal{O}}(\\varepsilon^{-4})$":[157],"evaluations.":[160]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
