{"id":"https://openalex.org/W4404648872","doi":"https://doi.org/10.48550/arxiv.2411.12992","title":"MemoryFormer: Minimize Transformer Computation by Removing Fully-Connected Layers","display_name":"MemoryFormer: Minimize Transformer Computation by Removing Fully-Connected Layers","publication_year":2024,"publication_date":"2024-11-20","ids":{"openalex":"https://openalex.org/W4404648872","doi":"https://doi.org/10.48550/arxiv.2411.12992"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2411.12992","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12992","pdf_url":"https://arxiv.org/pdf/2411.12992","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2411.12992","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101616384","display_name":"Ning Ding","orcid":"https://orcid.org/0000-0001-6067-0434"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ding, Ning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038232518","display_name":"Yehui Tang","orcid":"https://orcid.org/0000-0002-0322-4283"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yehui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030652014","display_name":"Haochen Qin","orcid":"https://orcid.org/0009-0008-7712-8703"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Haochen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhou, Zhenli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zhenli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101853030","display_name":"C. F. Xu","orcid":"https://orcid.org/0000-0002-1290-7860"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101942977","display_name":"Lin Li","orcid":"https://orcid.org/0000-0002-4626-3100"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101629927","display_name":"K. L. Han","orcid":"https://orcid.org/0000-0001-8960-3358"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110026325","display_name":"Heng Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Heng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100727358","display_name":"Yunhe Wang","orcid":"https://orcid.org/0000-0002-2709-4946"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yunhe","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101616384"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10299","display_name":"Photonic and Optical Devices","score":0.9286999702453613,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.7277247905731201},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6634630560874939},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4922608435153961},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.335868239402771},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.2941752076148987},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15168249607086182},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08877965807914734}],"concepts":[{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.7277247905731201},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6634630560874939},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4922608435153961},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.335868239402771},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.2941752076148987},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15168249607086182},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08877965807914734}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2411.12992","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12992","pdf_url":"https://arxiv.org/pdf/2411.12992","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"pmh:doi:10.48550/arxiv.2411.12992","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2411.12992","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2411.12992","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2411.12992","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2411.12992","pdf_url":"https://arxiv.org/pdf/2411.12992","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4415642530","display_name":null,"funder_award_id":"62276007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5249178904","display_name":null,"funder_award_id":"Grant No. 6","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109"],"abstract_inverted_index":{"In":[0,47],"order":[1],"to":[2,16,17,101,127,142,181,211],"reduce":[3],"the":[4,19,31,60,72,75,80,85,103,129,152,162,170,213,216],"computational":[5,36,61],"complexity":[6,37,62],"of":[7,21,44,74,106,115,124,147,169,172,215],"large":[8,122],"language":[9],"models,":[10],"great":[11],"efforts":[12],"have":[13],"been":[14],"made":[15,91],"improve":[18],"efficiency":[20],"transformer":[22,55,76],"models":[23],"such":[24],"as":[25],"linear":[26,104,134],"attention":[27,87],"and":[28,34,204],"flash-attention.":[29],"However,":[30],"model":[32,77],"size":[33],"corresponding":[35],"are":[38],"constantly":[39],"scaled":[40],"up":[41],"in":[42,133,176],"pursuit":[43],"higher":[45],"performance.":[46],"this":[48],"work,":[49],"we":[50,110],"present":[51],"MemoryFormer,":[52],"a":[53,65,113,121,139,144,177,191],"novel":[54],"architecture":[56],"which":[57,165,195],"significantly":[58],"reduces":[59],"(FLOPs)":[63],"from":[64,188,202],"new":[66],"perspective.":[67],"We":[68,136,199],"eliminate":[69],"nearly":[70],"all":[71],"computations":[73],"except":[78],"for":[79,98],"necessary":[81],"computation":[82],"required":[83],"by":[84,93],"multi-head":[86],"operation.":[88],"This":[89],"is":[90,190],"possible":[92],"utilizing":[94],"an":[95,167],"alternative":[96],"method":[97],"feature":[99],"transformation":[100],"replace":[102,128],"projection":[105],"fully-connected":[107,178],"layers.":[108],"Specifically,":[109],"first":[111],"construct":[112],"group":[114],"in-memory":[116],"lookup":[117],"tables":[118],"that":[119],"store":[120],"amount":[123],"discrete":[125],"vectors":[126,148,157],"weight":[130],"matrix":[131,173,183],"used":[132],"projection.":[135],"then":[137],"use":[138],"hash":[140],"algorithm":[141],"retrieve":[143],"correlated":[145],"subset":[146],"dynamically":[149],"based":[150],"on":[151,208],"input":[153],"embedding.":[154],"The":[155],"retrieved":[156],"combined":[158],"together":[159],"will":[160],"form":[161],"output":[163],"embedding,":[164],"provides":[166],"estimation":[168],"result":[171],"multiplication":[174],"operation":[175,194],"layer.":[179],"Compared":[180],"conducting":[182],"multiplication,":[184],"retrieving":[185],"data":[186],"blocks":[187],"memory":[189],"much":[192],"cheaper":[193],"requires":[196],"little":[197],"computations.":[198],"train":[200],"MemoryFormer":[201],"scratch":[203],"conduct":[205],"extensive":[206],"experiments":[207],"various":[209],"benchmarks":[210],"demonstrate":[212],"effectiveness":[214],"proposed":[217],"model.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2024-11-24T00:00:00"}
