{"id":"https://openalex.org/W4414855528","doi":"https://doi.org/10.48550/arxiv.2505.23884","title":"Test-Time Training Done Right","display_name":"Test-Time Training Done Right","publication_year":2025,"publication_date":"2025-05-29","ids":{"openalex":"https://openalex.org/W4414855528","doi":"https://doi.org/10.48550/arxiv.2505.23884"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.23884","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.23884","pdf_url":"https://arxiv.org/pdf/2505.23884","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.23884","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100769910","display_name":"Tianyuan Zhang","orcid":"https://orcid.org/0000-0001-9104-0830"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Tianyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099137440","display_name":"Sai Bi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bi, Sai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036105537","display_name":"Yicong Hong","orcid":"https://orcid.org/0000-0002-5068-1508"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Yicong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100323912","display_name":"Kai Zhang","orcid":"https://orcid.org/0000-0001-9054-288X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061806060","display_name":"Fujun Luan","orcid":"https://orcid.org/0000-0001-5926-6266"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luan, Fujun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100548266","display_name":"Songlin Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Songlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032004510","display_name":"Kalyan Sunkavalli","orcid":"https://orcid.org/0000-0002-6030-2348"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sunkavalli, Kalyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074429265","display_name":"William T. Freeman","orcid":"https://orcid.org/0000-0002-2231-7995"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Freeman, William T.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102849436","display_name":"H. Tan","orcid":"https://orcid.org/0000-0002-4475-5937"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Hao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100769910"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11994","display_name":"Human Resource Development and Performance Evaluation","score":0.17509999871253967,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11994","display_name":"Human Resource Development and Performance Evaluation","score":0.17509999871253967,"subfield":{"id":"https://openalex.org/subfields/3202","display_name":"Applied Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6761999726295471},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.4713999927043915},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.46700000762939453},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4652000069618225},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4481000006198883},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.43070000410079956},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4122999906539917},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.3856000006198883},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.37880000472068787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.817799985408783},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6761999726295471},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.4713999927043915},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.46700000762939453},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4652000069618225},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4481000006198883},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43959999084472656},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.37880000472068787},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3682999908924103},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.367900013923645},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.35370001196861267},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3278000056743622},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30649998784065247},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.30390000343322754},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2856000065803528},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.2637999951839447},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2554999887943268},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C188087704","wikidata":"https://www.wikidata.org/wiki/Q369577","display_name":"Standardization","level":2,"score":0.2529999911785126}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2505.23884","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.23884","pdf_url":"https://arxiv.org/pdf/2505.23884","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.23884","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.23884","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.23884","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.23884","pdf_url":"https://arxiv.org/pdf/2505.23884","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Test-Time":[0,151],"Training":[1,152],"(TTT)":[2],"models":[3],"context":[4,255],"dependencies":[5,98],"by":[6,126,158],"adapting":[7],"part":[8],"of":[9,32,62,141,160,167,174,195,270],"the":[10,36,100,123,268],"model's":[11],"weights":[12,84],"(referred":[13],"to":[14,24,43,51,136,147,172,230,239],"as":[15,115,148],"fast":[16,21,83],"weights)":[17],"during":[18],"inference.":[19],"This":[20],"weight,":[22],"akin":[23],"recurrent":[25],"states":[26],"in":[27,35,46,60,99,267],"RNNs,":[28],"stores":[29],"temporary":[30],"memories":[31],"past":[33],"tokens":[34,138],"current":[37],"sequence.":[38],"Existing":[39],"TTT":[40,58],"methods":[41],"struggled":[42],"show":[44],"effectiveness":[45],"handling":[47],"long-context":[48,271],"data,":[49,101],"due":[50],"their":[52],"inefficiency":[53],"on":[54,236],"modern":[55],"GPUs.":[56],"The":[57],"layers":[59],"many":[61],"these":[63],"approaches":[64],"operate":[65],"with":[66,216,252],"extremely":[67,129],"low":[68],"FLOPs":[69],"utilization":[70,157],"(often":[71],"&lt;5%)":[72],"because":[73],"they":[74],"deliberately":[75],"apply":[76],"small":[77,92],"online":[78,201],"minibatch":[79,93],"sizes":[80],"(e.g.,":[81],"updating":[82],"every":[85],"16":[86],"or":[87,111,117],"64":[88],"tokens).":[89],"Moreover,":[90],"a":[91],"implies":[94],"fine-grained":[95],"block-wise":[96],"causal":[97],"unsuitable":[102],"for":[103,200],"data":[104],"beyond":[105],"1D":[106],"ordered":[107],"sequences,":[108],"like":[109],"sets":[110],"N-dimensional":[112],"grids":[113],"such":[114],"images":[116],"videos.":[118],"In":[119,242],"contrast,":[120],"we":[121,145,247],"pursue":[122],"opposite":[124],"direction":[125],"using":[127],"an":[128],"large":[130],"chunk":[131],"update,":[132],"ranging":[133],"from":[134],"2K":[135],"1M":[137],"across":[139,207],"tasks":[140],"varying":[142],"modalities,":[143],"which":[144],"refer":[146],"Large":[149],"Chunk":[150],"(LaCT).":[153],"It":[154,190],"improves":[155],"hardware":[156],"orders":[159],"magnitude,":[161],"and":[162,186,210,221,263,273],"more":[163],"importantly,":[164],"facilitates":[165],"scaling":[166],"nonlinear":[168],"state":[169,180],"size":[170],"(up":[171],"40%":[173],"model":[175,235],"parameters),":[176],"hence":[177],"substantially":[178],"improving":[179],"capacity,":[181],"all":[182],"without":[183],"requiring":[184],"cumbersome":[185],"error-prone":[187],"kernel":[188],"implementations.":[189],"also":[191],"allows":[192],"easy":[193],"integration":[194],"sophisticated":[196],"optimizers,":[197],"e.g.":[198],"Muon":[199],"updates.":[202],"We":[203,257],"validate":[204],"our":[205,243],"approach":[206,226],"diverse":[208],"modalities":[209],"tasks,":[211],"including":[212],"novel":[213,249],"view":[214,250],"synthesis":[215,251],"image":[217],"set,":[218],"language":[219],"models,":[220],"auto-regressive":[222],"video":[223,233],"diffusion.":[224],"Our":[225],"can":[227],"scale":[228],"up":[229,238],"14B-parameter":[231],"AR":[232],"diffusion":[234],"sequences":[237],"56K":[240],"tokens.":[241],"longest":[244],"sequence":[245],"experiment,":[246],"perform":[248],"1":[253],"million":[254],"length.":[256],"hope":[258],"this":[259],"work":[260],"will":[261],"inspire":[262],"accelerate":[264],"new":[265],"research":[266],"field":[269],"modeling":[272],"test-time":[274],"training.":[275],"Website:":[276],"https://tianyuanzhang.com/projects/ttt-done-right":[277]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
