{"id":"https://openalex.org/W4416877088","doi":"https://doi.org/10.48550/arxiv.2507.00417","title":"ASTRO: Teaching Language Models to Reason by Reflecting and Backtracking In-Context","display_name":"ASTRO: Teaching Language Models to Reason by Reflecting and Backtracking In-Context","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W4416877088","doi":"https://doi.org/10.48550/arxiv.2507.00417"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.00417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00417","pdf_url":"https://arxiv.org/pdf/2507.00417","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.00417","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050832932","display_name":"Joongwon Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kim, Joongwon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016769717","display_name":"Anirudh Goyal","orcid":"https://orcid.org/0000-0002-4080-1940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goyal, Anirudh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075258788","display_name":"Liang See Tan","orcid":"https://orcid.org/0000-0001-7468-7683"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082305994","display_name":"Hannaneh Hajishirzi","orcid":"https://orcid.org/0000-0002-1055-6657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hajishirzi, Hannaneh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088004913","display_name":"Srinivasan Iyer","orcid":"https://orcid.org/0000-0002-6186-2603"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iyer, Srinivasan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026227210","display_name":"Tianlu Wang","orcid":"https://orcid.org/0000-0002-7480-9705"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tianlu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5050832932"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5235999822616577,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5235999822616577,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10480000078678131,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.04670000076293945,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backtracking","display_name":"Backtracking","score":0.6520000100135803},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.545199990272522},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5400999784469604},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.46549999713897705},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.45320001244544983},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.430400013923645},{"id":"https://openalex.org/keywords/monte-carlo-tree-search","display_name":"Monte Carlo tree search","score":0.4016000032424927},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.33809998631477356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7609999775886536},{"id":"https://openalex.org/C156884757","wikidata":"https://www.wikidata.org/wiki/Q798554","display_name":"Backtracking","level":2,"score":0.6520000100135803},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5993000268936157},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.545199990272522},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5400999784469604},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.46549999713897705},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.45320001244544983},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4318999946117401},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.430400013923645},{"id":"https://openalex.org/C46149586","wikidata":"https://www.wikidata.org/wiki/Q11785332","display_name":"Monte Carlo tree search","level":3,"score":0.4016000032424927},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.33809998631477356},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3370000123977661},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3003999888896942},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27619999647140503},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2567000091075897},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.00417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00417","pdf_url":"https://arxiv.org/pdf/2507.00417","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.00417","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.00417","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.00417","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.00417","pdf_url":"https://arxiv.org/pdf/2507.00417","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416877088.pdf","grobid_xml":"https://content.openalex.org/works/W4416877088.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,145,162],"introduce":[1],"ASTRO,":[2],"the":[3,40,84,166],"\"Autoregressive":[4],"Search-Taught":[5],"Reasoner\",":[6],"a":[7,75,104,138,206],"framework":[8],"for":[9,141],"training":[10,28,204],"language":[11,30,124],"models":[12,31,44,59,90,97,136,148,171],"to":[13,39,82,98,165,209],"reason":[14],"like":[15],"search":[16,68,101,120],"algorithms,":[17],"explicitly":[18],"leveraging":[19],"self-reflection,":[20],"backtracking,":[21],"and":[22,130,153,172,185],"exploration":[23,142],"in":[24],"their":[25],"outputs.":[26],"Recently,":[27],"large":[29],"(LLMs)":[32],"via":[33,157],"reinforcement":[34],"learning":[35],"(RL)":[36],"has":[37],"led":[38],"advent":[41],"of":[42,52,87,170,177],"reasoning":[43,48,53,64,85,212],"with":[45,67,137,159],"greatly":[46],"enhanced":[47],"capabilities.":[49],"Open-source":[50],"replications":[51],"models,":[54],"while":[55],"successful,":[56],"build":[57],"upon":[58,192],"that":[60,126,195,202],"already":[61],"exhibit":[62],"strong":[63],"capabilities":[65,86,213],"along":[66],"behavior":[69,102],"observed":[70],"even":[71],"before":[72],"RL.":[73,144],"As":[74],"result,":[76],"it":[77],"is":[78],"yet":[79],"unclear":[80],"how":[81],"boost":[83],"other":[88],"non-reasoner":[89],"including":[91],"Llama":[92,167],"3.":[93],"ASTRO":[94,134,164],"teaches":[95],"such":[96],"internalize":[99],"structured":[100],"through":[103],"synthetic":[105],"dataset":[106],"derived":[107],"from":[108,132],"Monte":[109],"Carlo":[110],"Tree":[111],"Search":[112],"(MCTS)":[113],"over":[114],"mathematical":[115],"problem-solving":[116],"trajectories.":[117],"By":[118],"converting":[119],"traces":[121,152],"into":[122,214],"natural":[123],"chain-of-thoughts":[125],"capture":[127],"both":[128],"successes":[129],"recoveries":[131],"failure,":[133],"bootstraps":[135],"rich":[139],"prior":[140],"during":[143],"finetune":[146],"our":[147],"on":[149,179,182,187],"these":[150],"search-derived":[151],"further":[154],"improve":[155],"performance":[156,175],"RL":[158],"verifiable":[160],"rewards.":[161],"apply":[163],"3":[168],"family":[169],"achieve":[173],"absolute":[174],"gains":[176],"16.0%":[178],"MATH-500,":[180],"26.9%":[181],"AMC":[183],"2023,":[184],"20.0%":[186],"AIME":[188],"2024,":[189],"especially":[190],"improving":[191],"challenging":[193],"problems":[194],"require":[196],"iterative":[197],"correction.":[198],"Our":[199],"results":[200],"demonstrate":[201],"search-inspired":[203],"offers":[205],"principled":[207],"way":[208],"instill":[210],"robust":[211],"open":[215],"LLMs.":[216]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
