{"id":"https://openalex.org/W4296596641","doi":"https://doi.org/10.48550/arxiv.2209.09570","title":"Adaptable Butterfly Accelerator for Attention-based NNs via Hardware and Algorithm Co-design","display_name":"Adaptable Butterfly Accelerator for Attention-based NNs via Hardware and Algorithm Co-design","publication_year":2022,"publication_date":"2022-09-20","ids":{"openalex":"https://openalex.org/W4296596641","doi":"https://doi.org/10.48550/arxiv.2209.09570"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2209.09570","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.09570","pdf_url":"https://arxiv.org/pdf/2209.09570","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.09570","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057043409","display_name":"Hongxiang Fan","orcid":"https://orcid.org/0000-0003-2387-5611"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fan, Hongxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110651248","display_name":"Thomas Chau","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chau, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033442931","display_name":"Stylianos I. Venieris","orcid":"https://orcid.org/0000-0001-5181-6251"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Venieris, Stylianos I.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026267258","display_name":"Royson Lee","orcid":"https://orcid.org/0000-0002-6716-7994"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Royson","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102929943","display_name":"Alexandros Kouris","orcid":"https://orcid.org/0000-0002-2900-430X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kouris, Alexandros","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057940557","display_name":"Wayne Luk","orcid":"https://orcid.org/0000-0002-6750-927X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luk, Wayne","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045638679","display_name":"Nicholas D. Lane","orcid":"https://orcid.org/0000-0002-2728-8273"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lane, Nicholas D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000815783","display_name":"Mohamed S. Abdelfattah","orcid":"https://orcid.org/0000-0002-6416-9011"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdelfattah, Mohamed S.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5057043409"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8220162391662598},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6646138429641724},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6633470058441162},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6412799954414368},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5671051740646362},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.4850420355796814},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.45995911955833435},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.45466771721839905},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3704019784927368},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.34972575306892395},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.31536588072776794}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8220162391662598},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6646138429641724},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6633470058441162},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6412799954414368},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5671051740646362},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4850420355796814},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.45995911955833435},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.45466771721839905},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3704019784927368},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34972575306892395},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31536588072776794},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2209.09570","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.09570","pdf_url":"https://arxiv.org/pdf/2209.09570","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2209.09570","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2209.09570","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.09570","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.09570","pdf_url":"https://arxiv.org/pdf/2209.09570","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4300000071525574}],"awards":[{"id":"https://openalex.org/G3496335909","display_name":"Centre for Spatial Computational Learning","funder_award_id":"EP/S030069/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4587427570","display_name":null,"funder_award_id":"EP/S030069/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G514660964","display_name":"DART: Design Accelerators by Regulating Transformations","funder_award_id":"EP/V028251/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5201431165","display_name":"EPSRC Centre for Doctoral Training in High Performance Embedded and Distributed Systems","funder_award_id":"EP/L016796/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5248561189","display_name":"Event-based parallel computing - partially ordered event-triggered systems (POETS)","funder_award_id":"EP/N031768/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6653872853","display_name":null,"funder_award_id":"EP/L016","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8051675292","display_name":null,"funder_award_id":"EP/V028251/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4296596641.pdf","grobid_xml":"https://content.openalex.org/works/W4296596641.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3138386522","https://openalex.org/W2499279132","https://openalex.org/W2532502681","https://openalex.org/W2518118925","https://openalex.org/W3159273459","https://openalex.org/W4319952061","https://openalex.org/W4280636456","https://openalex.org/W4388913998","https://openalex.org/W4310584535","https://openalex.org/W3154092384"],"abstract_inverted_index":{"Attention-based":[0],"neural":[1],"networks":[2],"have":[3,41],"become":[4],"pervasive":[5],"in":[6,113],"many":[7],"AI":[8],"tasks.":[9],"Despite":[10],"their":[11,34,66],"excellent":[12],"algorithmic":[13,122],"performance,":[14],"the":[15,18,50,56,62,79,83,94,110,121,140,144,147,179,184,188,193,203,214,233,262],"use":[16],"of":[17,53,64,93,195,205],"attention":[19,54,80,141],"mechanism":[20,81,142],"and":[21,28,143,202,216,241,248,257],"feed-forward":[22],"network":[23],"(FFN)":[24],"demands":[25],"excessive":[26],"computational":[27,235],"memory":[29],"resources,":[30],"which":[31],"often":[32],"compromises":[33],"hardware":[35,70,118,148,165,176],"performance.":[36],"Although":[37],"various":[38],"sparse":[39],"variants":[40,115],"been":[42],"introduced,":[43],"most":[44,73,92],"approaches":[45],"only":[46,75],"focus":[47,76],"on":[48,55,68,77,244],"mitigating":[49],"quadratic":[51],"scaling":[52],"algorithm":[57,215],"level,":[58,123,149],"without":[59,86],"explicitly":[60],"considering":[61],"efficiency":[63],"mapping":[65],"methods":[67],"real":[69],"designs.":[71],"Furthermore,":[72],"efforts":[74],"either":[78],"or":[82],"FFNs":[84],"but":[85],"jointly":[87,212],"optimizing":[88,213],"both":[89,139],"parts,":[90],"causing":[91],"current":[95],"designs":[96,243],"to":[97,137,167,199,208,224,232,255],"lack":[98],"scalability":[99],"when":[100],"dealing":[101],"with":[102,238],"different":[103,114,169],"input":[104],"lengths.":[105],"This":[106],"paper":[107],"systematically":[108],"considers":[109],"sparsity":[111,135],"patterns":[112],"from":[116],"a":[117,127,132,150,173],"perspective.":[119],"On":[120,146,178],"we":[124],"propose":[125],"FABNet,":[126],"hardware-friendly":[128],"variant":[129],"that":[130,157],"adopts":[131],"unified":[133,175],"butterfly":[134,153,170,220],"pattern":[136],"approximate":[138],"FFNs.":[145],"novel":[151],"adaptable":[152],"accelerator":[154,221],"is":[155,253],"proposed":[156],"can":[158],"be":[159],"configured":[160],"at":[161],"runtime":[162],"via":[163],"dedicated":[164],"control":[166],"accelerate":[168],"layers":[171],"using":[172],"single":[174],"engine.":[177],"Long-Range-Arena":[180],"dataset,":[181],"FABNet":[182],"achieves":[183,222],"same":[185,234,263],"accuracy":[186],"as":[187],"vanilla":[189],"Transformer":[190],"while":[191],"reducing":[192],"amount":[194],"computation":[196],"by":[197],"10":[198],"66":[200],"times":[201,226,259],"number":[204],"parameters":[206],"2":[207],"22":[209],"times.":[210],"By":[211],"hardware,":[217],"our":[218,251],"FPGA-based":[219],"14.2":[223],"23.2":[225],"speedup":[227],"over":[228],"state-of-the-art":[229],"accelerators":[230],"normalized":[231],"budget.":[236,265],"Compared":[237],"optimized":[239],"CPU":[240],"GPU":[242],"Raspberry":[245],"Pi":[246],"4":[247],"Jetson":[249],"Nano,":[250],"system":[252],"up":[254],"273.8":[256],"15.1":[258],"faster":[260],"under":[261],"power":[264]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2022-09-22T00:00:00"}
