{"id":"https://openalex.org/W7093094125","doi":"https://doi.org/10.48550/arxiv.2510.16448","title":"Input Domain Aware MoE: Decoupling Routing Decisions from Task Optimization in Mixture of Experts","display_name":"Input Domain Aware MoE: Decoupling Routing Decisions from Task Optimization in Mixture of Experts","publication_year":2025,"publication_date":"2025-10-18","ids":{"openalex":"https://openalex.org/W7093094125","doi":"https://doi.org/10.48550/arxiv.2510.16448"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2510.16448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.16448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2510.16448","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hua, Yongxiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hua, Yongxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cao, Haoyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Haoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tao, Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Zhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Li, Bocheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Bocheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wu, Zihao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zihao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Liu, Chaohu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Chaohu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xu, Linli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Linli","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6740000247955322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.6740000247955322,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11140000075101852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.05609999969601631,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6682999730110168},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5867999792098999},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.5199000239372253},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5177000164985657},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.503600001335144},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.478300005197525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7724000215530396},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6682999730110168},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5867999792098999},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5199000239372253},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5177000164985657},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.503600001335144},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.478300005197525},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4235000014305115},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40299999713897705},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39899998903274536},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.361299991607666},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3467000126838684},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.30869999527931213},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2971999943256378},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2669999897480011}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2510.16448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.16448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2510.16448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.16448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"Mixture":[1],"of":[2,26,91,115],"Experts":[3],"(sMoE)":[4],"has":[5],"become":[6],"a":[7,49,68,74,89],"pivotal":[8],"approach":[9],"for":[10,119],"scaling":[11],"large":[12],"vision-language":[13,129],"models,":[14],"offering":[15],"substantial":[16],"capacity":[17],"while":[18,102],"maintaining":[19],"computational":[20],"efficiency":[21],"through":[22],"dynamic,":[23],"sparse":[24],"activation":[25],"experts.":[27],"However,":[28],"existing":[29,137],"routing":[30,70,86,110],"mechanisms,":[31],"typically":[32],"based":[33],"on":[34,128],"similarity":[35],"scoring,":[36],"struggle":[37],"to":[38,48,78,97],"effectively":[39],"capture":[40],"the":[41,81],"underlying":[42],"input":[43,82],"structure.":[44],"This":[45],"limitation":[46],"leads":[47],"trade-off":[50],"between":[51],"expert":[52,124,146],"specialization":[53,100],"and":[54,60,122,144],"balanced":[55,104],"computation,":[56],"hindering":[57],"both":[58],"scalability":[59],"performance.":[61],"We":[62],"propose":[63],"Input":[64],"Domain":[65],"Aware":[66],"MoE,":[67],"novel":[69],"framework":[71],"that":[72,132],"leverages":[73],"probabilistic":[75],"mixture":[76,90],"model":[77],"better":[79],"partition":[80],"space.":[83],"By":[84],"modeling":[85],"probabilities":[87],"as":[88],"distributions,":[92],"our":[93,109,133],"method":[94,134],"enables":[95],"experts":[96],"develop":[98],"clear":[99],"boundaries":[101],"achieving":[103,140],"utilization.":[105],"Unlike":[106],"conventional":[107],"approaches,":[108,139],"mechanism":[111],"is":[112],"trained":[113],"independently":[114],"task-specific":[116],"objectives,":[117],"allowing":[118],"stable":[120],"optimization":[121],"decisive":[123],"assignments.":[125],"Empirical":[126],"results":[127],"tasks":[130],"demonstrate":[131],"consistently":[135],"outperforms":[136],"sMoE":[138],"higher":[141],"task":[142],"performance":[143],"improved":[145],"utilization":[147],"balance.":[148]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-22T00:00:00"}
