{"id":"https://openalex.org/W4399353800","doi":"https://doi.org/10.48550/arxiv.2406.01375","title":"D-CPT Law: Domain-specific Continual Pre-Training Scaling Law for Large Language Models","display_name":"D-CPT Law: Domain-specific Continual Pre-Training Scaling Law for Large Language Models","publication_year":2024,"publication_date":"2024-06-03","ids":{"openalex":"https://openalex.org/W4399353800","doi":"https://doi.org/10.48550/arxiv.2406.01375"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2406.01375","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.01375","pdf_url":"https://arxiv.org/pdf/2406.01375","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2406.01375","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092998429","display_name":"Haoran Que","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Que, Haoran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032858379","display_name":"Jiaheng Liu","orcid":"https://orcid.org/0000-0002-5183-8538"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiaheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100326077","display_name":"Ge Zhang","orcid":"https://orcid.org/0000-0003-0704-0657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408850","display_name":"Chenchen Zhang","orcid":"https://orcid.org/0000-0002-7304-8721"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chenchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101308310","display_name":"Xingwei Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Xingwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111279796","display_name":"Yinghao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yinghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054747523","display_name":"Feiyu Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Feiyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065092030","display_name":"Zhiqi Bai","orcid":"https://orcid.org/0000-0003-2857-2556"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Zhiqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031116553","display_name":"Jiakai Wang","orcid":"https://orcid.org/0000-0001-5884-3412"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiakai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020022791","display_name":"Yuanxing Zhang","orcid":"https://orcid.org/0000-0003-1460-8124"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuanxing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101323614","display_name":"Xu Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100666922","display_name":"Jie Fu","orcid":"https://orcid.org/0000-0001-5622-4888"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113392255","display_name":"Wenbo Su","orcid":"https://orcid.org/0009-0009-3800-7543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Wenbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025403631","display_name":"Jiamang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiamang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100763937","display_name":"Qu Lin","orcid":"https://orcid.org/0000-0002-4251-5577"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054263974","display_name":"Bo Zheng","orcid":"https://orcid.org/0000-0002-9654-1500"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Bo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":16,"corresponding_author_ids":["https://openalex.org/A5092998429"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling-law","display_name":"Scaling law","score":0.5946625471115112},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5192596912384033},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.48793694376945496},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.45758405327796936},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.42205142974853516},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.3905916213989258},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.39003992080688477},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18641969561576843},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11160498857498169},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.05426591634750366}],"concepts":[{"id":"https://openalex.org/C2988430800","wikidata":"https://www.wikidata.org/wiki/Q428971","display_name":"Scaling law","level":3,"score":0.5946625471115112},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5192596912384033},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.48793694376945496},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.45758405327796936},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.42205142974853516},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3905916213989258},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.39003992080688477},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18641969561576843},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11160498857498169},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.05426591634750366},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2406.01375","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.01375","pdf_url":"https://arxiv.org/pdf/2406.01375","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2406.01375","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2406.01375","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2406.01375","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.01375","pdf_url":"https://arxiv.org/pdf/2406.01375","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399353800.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2117748264","https://openalex.org/W4298190863","https://openalex.org/W2518229301","https://openalex.org/W2597205018","https://openalex.org/W2554072523","https://openalex.org/W1671671741","https://openalex.org/W2952002823","https://openalex.org/W2013329450","https://openalex.org/W3104909087","https://openalex.org/W2512421151"],"abstract_inverted_index":{"Continual":[0,113],"Pre-Training":[1,114],"(CPT)":[2],"on":[3,29,62,160,171,209],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"has":[8],"been":[9],"widely":[10],"used":[11],"to":[12,37,105,117,180],"expand":[13],"the":[14,27,39,44,50,79,85,90,97,107,111,119,135,142,176,182,196,203,214],"model's":[15],"fundamental":[16],"understanding":[17],"of":[18,65,92,110,129,147,185,195,218],"specific":[19,86],"downstream":[20,51,145,211],"domains":[21,212],"(e.g.,":[22,46],"math":[23],"and":[24,49,144,153,174,216,223],"code).":[25],"For":[26],"CPT":[28],"domain-specific":[30],"LLMs,":[31],"one":[32],"important":[33],"question":[34],"is":[35,82],"how":[36],"choose":[38],"optimal":[40,83,120],"mixture":[41,66,121,149],"ratio":[42,81,122],"between":[43],"general-corpus":[45],"Dolma,":[47],"Slim-pajama)":[48],"domain-corpus.":[52],"Existing":[53],"methods":[54],"usually":[55],"adopt":[56],"laborious":[57],"human":[58],"efforts":[59],"by":[60,96,133],"grid-searching":[61],"a":[63],"set":[64],"ratios,":[67,150],"which":[68],"require":[69],"high":[70],"GPU":[71],"training":[72,125,158,191,198],"consumption":[73],"costs.":[74],"Besides,":[75],"we":[76,103,138,164],"cannot":[77],"guarantee":[78],"selected":[80],"for":[84,100,127,202],"domain.":[87],"To":[88],"address":[89],"limitations":[91],"existing":[93],"methods,":[94],"inspired":[95],"Scaling":[98,108],"Law":[99,109,170,179,222],"performance":[101,146],"prediction,":[102],"propose":[104,175],"investigate":[106],"Domain-specific":[112],"(D-CPT":[115],"Law)":[116],"decide":[118],"with":[123],"acceptable":[124],"costs":[126,159,192],"LLMs":[128],"different":[130],"sizes.":[131],"Specifically,":[132],"fitting":[134],"D-CPT":[136,169,178,183,221,225],"Law,":[137],"can":[139],"easily":[140],"predict":[141,181],"general":[143],"arbitrary":[148],"model":[151],"sizes,":[152],"dataset":[154],"sizes":[155],"using":[156],"small-scale":[157],"limited":[161],"experiments.":[162],"Moreover,":[163],"also":[165],"extend":[166],"our":[167,219],"standard":[168],"cross-domain":[172],"settings":[173],"Cross-Domain":[177,224],"law":[184],"target":[186,204],"domains,":[187],"where":[188],"very":[189],"small":[190],"(about":[193],"1%":[194],"normal":[197],"costs)":[199],"are":[200],"needed":[201],"domains.":[205],"Comprehensive":[206],"experimental":[207],"results":[208],"six":[210],"demonstrate":[213],"effectiveness":[215],"generalizability":[217],"proposed":[220],"Law.":[226]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
