{"id":"https://openalex.org/W4390099682","doi":"https://doi.org/10.1109/robio58561.2023.10354940","title":"Learning Stall Recovery Policies using a Soft Actor-Critic Algorithm with Smooth Reward Functions","display_name":"Learning Stall Recovery Policies using a Soft Actor-Critic Algorithm with Smooth Reward Functions","publication_year":2023,"publication_date":"2023-12-04","ids":{"openalex":"https://openalex.org/W4390099682","doi":"https://doi.org/10.1109/robio58561.2023.10354940"},"language":"en","primary_location":{"id":"doi:10.1109/robio58561.2023.10354940","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/robio58561.2023.10354940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114985917","display_name":"Junqiu Wang","orcid":"https://orcid.org/0000-0002-7138-7359"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Junqiu Wang","raw_affiliation_strings":["China Aerospace Establishment"],"affiliations":[{"raw_affiliation_string":"China Aerospace Establishment","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026863388","display_name":"Jianmei Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianmei Tan","raw_affiliation_strings":["China Aerospace Establishment"],"affiliations":[{"raw_affiliation_string":"China Aerospace Establishment","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058598684","display_name":"Peng Lin","orcid":"https://orcid.org/0000-0002-3464-5574"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peng Lin","raw_affiliation_strings":["China Aerospace Establishment"],"affiliations":[{"raw_affiliation_string":"China Aerospace Establishment","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006425522","display_name":"Chenguang Xing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenguang Xing","raw_affiliation_strings":["China Aerospace Establishment"],"affiliations":[{"raw_affiliation_string":"China Aerospace Establishment","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108053321","display_name":"B. Liu","orcid":"https://orcid.org/0000-0003-2823-9307"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bo Liu","raw_affiliation_strings":["China Aerospace Establishment"],"affiliations":[{"raw_affiliation_string":"China Aerospace Establishment","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114985917"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1751,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59045142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1707.06347","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12125","display_name":"Aerospace and Aviation Technology","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7766318321228027},{"id":"https://openalex.org/keywords/stall","display_name":"Stall (fluid mechanics)","score":0.7109507918357849},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6868526935577393},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46502161026000977},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33264291286468506},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.20152580738067627}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7766318321228027},{"id":"https://openalex.org/C5804382","wikidata":"https://www.wikidata.org/wiki/Q752034","display_name":"Stall (fluid mechanics)","level":2,"score":0.7109507918357849},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6868526935577393},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46502161026000977},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33264291286468506},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.20152580738067627},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/robio58561.2023.10354940","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/robio58561.2023.10354940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Robotics and Biomimetics (ROBIO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W2145339207","https://openalex.org/W2169080882","https://openalex.org/W2169209873","https://openalex.org/W2257979135","https://openalex.org/W2329878586","https://openalex.org/W2560674852","https://openalex.org/W2569387431","https://openalex.org/W2618530766","https://openalex.org/W2736601468","https://openalex.org/W2747329762","https://openalex.org/W2785534585","https://openalex.org/W2904246096","https://openalex.org/W2963403593","https://openalex.org/W2970065535","https://openalex.org/W3006176312","https://openalex.org/W3092511456","https://openalex.org/W3164631379","https://openalex.org/W3208231292","https://openalex.org/W3210472963","https://openalex.org/W3212333881","https://openalex.org/W4213011582","https://openalex.org/W4214717370","https://openalex.org/W4244745777","https://openalex.org/W6627932998","https://openalex.org/W6638018090","https://openalex.org/W6684205842","https://openalex.org/W6684921986","https://openalex.org/W6685130742","https://openalex.org/W6738741286","https://openalex.org/W6745873365","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6757469721","https://openalex.org/W6803713571","https://openalex.org/W6804220809","https://openalex.org/W6810533150"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"We":[0,101,122],"propose":[1],"an":[2,78],"effective":[3],"stall":[4,52,69,91],"recovery":[5,38,53,70,92],"learning":[6,57,93,109],"approach":[7,94],"based":[8],"on":[9],"a":[10],"soft":[11,132],"actor-critic":[12,133],"algorithm":[13,134],"with":[14,135],"smooth":[15,104],"reward":[16,105,112,128],"functions.":[17],"Stalling":[18],"is":[19,59,72,83],"extremely":[20],"dangerous":[21],"for":[22,116],"aircraft":[23,79],"and":[24,80],"unmanned":[25],"aerial":[26],"vehicles":[27],"(UAVs)":[28],"because":[29,61,111],"altitude":[30],"decreases":[31],"can":[32,64,144],"result":[33],"in":[34],"fatal":[35],"accidents.":[36],"Stall":[37],"policies":[39,54,63],"perform":[40],"appropriate":[41],"control":[42],"sequences":[43],"to":[44,107,130],"save":[45],"aircrafts":[46],"from":[47],"such":[48,62],"lethal":[49],"situations.":[50],"Learning":[51],"using":[55,148],"reinforcement":[56],"methods":[58],"desirable":[60],"be":[65,145],"learned":[66,150],"automatically.":[67],"However,":[68],"training":[71],"challenging":[73],"since":[74],"the":[75,89,108,117,131,149],"interplay":[76],"between":[77],"its":[81],"environment":[82],"very":[84],"complicated.":[85],"In":[86],"this":[87],"work,":[88],"proposed":[90],"yields":[95],"better":[96,160],"performance":[97,125],"than":[98,162],"other":[99],"methods.":[100],"successfully":[102,146],"apply":[103],"functions":[106,113],"process":[110],"are":[114],"critical":[115],"convergence":[118],"of":[119],"policy":[120],"learning.":[121,138],"achieve":[123],"good":[124],"by":[126],"applying":[127],"scaling":[129],"automatic":[136],"entropy":[137],"Experimental":[139],"results":[140,154,161],"demonstrate":[141],"that":[142,156],"stalls":[143],"recovered":[147],"policies.":[151],"The":[152],"comparison":[153],"show":[155],"our":[157],"method":[158],"provides":[159],"previous":[163],"algorithms.":[164]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}