{"id":"https://openalex.org/W4393969316","doi":"https://doi.org/10.48550/arxiv.2404.02852","title":"Toward Inference-optimal Mixture-of-Expert Large Language Models","display_name":"Toward Inference-optimal Mixture-of-Expert Large Language Models","publication_year":2024,"publication_date":"2024-04-03","ids":{"openalex":"https://openalex.org/W4393969316","doi":"https://doi.org/10.48550/arxiv.2404.02852"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2404.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.02852","pdf_url":"https://arxiv.org/pdf/2404.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.02852","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090539039","display_name":"Longfei Yun","orcid":"https://orcid.org/0000-0001-7628-9179"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yun, Longfei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076407338","display_name":"Yonghao Zhuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Yonghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101403218","display_name":"Yao Fu","orcid":"https://orcid.org/0000-0002-2491-3854"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009547049","display_name":"Eric P. Xing","orcid":"https://orcid.org/0009-0005-9158-4201"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing, Eric P","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100396885","display_name":"Hao Zhang","orcid":"https://orcid.org/0000-0002-2928-2692"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090539039"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.910099983215332,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7634307146072388},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5171741843223572},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48325711488723755},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.412127822637558},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3206160366535187},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.13482138514518738}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7634307146072388},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5171741843223572},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48325711488723755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.412127822637558},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3206160366535187},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13482138514518738}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2404.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.02852","pdf_url":"https://arxiv.org/pdf/2404.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2404.02852","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2404.02852","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.02852","pdf_url":"https://arxiv.org/pdf/2404.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4393969316.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Mixture-of-Expert":[0],"(MoE)":[1],"based":[2],"large":[3],"language":[4],"models":[5],"(LLMs),":[6],"such":[7],"as":[8,117,143],"the":[9,25,41,50,54,63,70,73,81,94,99,111,118,134,147,160,166,176,188],"recent":[10],"Mixtral":[11],"and":[12,57,80],"DeepSeek-MoE,":[13],"have":[14],"shown":[15],"great":[16],"promise":[17],"in":[18,89,173],"scaling":[19,64,135],"model":[20,55,74,76],"size":[21,56],"without":[22],"suffering":[23],"from":[24],"quadratic":[26],"growth":[27],"of":[28,31,59,66,97,101,113,137],"training":[29,37,46,119,179,195,203],"cost":[30,120],"dense":[32,35],"transformers.":[33],"Like":[34],"models,":[36],"MoEs":[38,153],"requires":[39],"answering":[40],"same":[42,167],"question:":[43],"given":[44],"a":[45,155,180,193,198,202],"budget,":[47],"what":[48],"is":[49,125,197],"optimal":[51],"allocation":[52],"on":[53],"number":[58,100,112],"tokens?":[60],"We":[61,130,150],"study":[62],"law":[65,136],"MoE-based":[67],"LLMs":[68],"regarding":[69],"relations":[71],"between":[72],"performance,":[75,168],"size,":[77,79],"dataset":[78,196],"expert":[82,182],"degree.":[83],"Echoing":[84],"previous":[85],"research":[86],"studying":[87],"MoE":[88,138,183],"different":[90],"contexts,":[91],"we":[92,108],"observe":[93],"diminishing":[95],"return":[96],"increasing":[98],"experts,":[102],"but":[103,169,191],"this":[104],"seems":[105],"to":[106,132],"suggest":[107],"should":[109],"scale":[110],"experts":[114,158],"until":[115],"saturation,":[116],"would":[121],"remain":[122],"constant,":[123],"which":[124],"problematic":[126],"during":[127],"inference":[128,141],"time.":[129],"propose":[131],"amend":[133],"by":[139],"introducing":[140],"efficiency":[142],"another":[144],"metric":[145],"besides":[146],"validation":[148],"loss.":[149],"find":[151],"that":[152],"with":[154,192],"few":[156],"(4/8)":[157],"are":[159],"most":[161],"serving":[162],"efficient":[163],"solution":[164],"under":[165,201],"costs":[170],"2.5-3.5x":[171],"more":[172],"training.":[174],"On":[175],"other":[177],"hand,":[178],"(16/32)":[181],"much":[184],"smaller":[185],"(70-85%)":[186],"than":[187],"loss-optimal":[189],"solution,":[190],"larger":[194],"promising":[199],"setup":[200],"budget.":[204]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
