{"id":"https://openalex.org/W2171473263","doi":"https://doi.org/10.1109/ipdps.2008.4536163","title":"An efficient, model-based CPU-GPU heterogeneous FFT library","display_name":"An efficient, model-based CPU-GPU heterogeneous FFT library","publication_year":2008,"publication_date":"2008-04-01","ids":{"openalex":"https://openalex.org/W2171473263","doi":"https://doi.org/10.1109/ipdps.2008.4536163","mag":"2171473263"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps.2008.4536163","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536163","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110277008","display_name":"Yasuhito Ogata","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yasuhito Ogata","raw_affiliation_strings":["JST-CREST, Japan","Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"JST-CREST, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013739106","display_name":"Toshio Endo","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshio Endo","raw_affiliation_strings":["JST-CREST, Japan","Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"JST-CREST, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035025604","display_name":"Naoya Maruyama","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Naoya Maruyama","raw_affiliation_strings":["JST-CREST, Japan","Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"JST-CREST, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100634486","display_name":"Satoshi Matsuoka","orcid":"https://orcid.org/0000-0003-1910-8532"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210086780","display_name":"Japan Science and Technology Agency","ror":"https://ror.org/00097mb19","country_code":"JP","type":"government","lineage":["https://openalex.org/I4210086780"]},{"id":"https://openalex.org/I184597095","display_name":"National Institute of Informatics","ror":"https://ror.org/04ksd4g47","country_code":"JP","type":"facility","lineage":["https://openalex.org/I1319490839","https://openalex.org/I184597095","https://openalex.org/I4210158934"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Matsuoka","raw_affiliation_strings":["JST-CREST, Japan","National Institute of Information, Japan","Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"JST-CREST, Japan","institution_ids":["https://openalex.org/I4210086780"]},{"raw_affiliation_string":"National Institute of Information, Japan","institution_ids":["https://openalex.org/I184597095"]},{"raw_affiliation_string":"Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110277008"],"corresponding_institution_ids":["https://openalex.org/I114531698","https://openalex.org/I4210086780"],"apc_list":null,"apc_paid":null,"fwci":4.4897,"has_fulltext":false,"cited_by_count":78,"citation_normalized_percentile":{"value":0.95127295,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8799928426742554},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7426618337631226},{"id":"https://openalex.org/keywords/fast-fourier-transform","display_name":"Fast Fourier transform","score":0.682709276676178},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.6639807224273682},{"id":"https://openalex.org/keywords/gpu-cluster","display_name":"GPU cluster","score":0.6270194053649902},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.6026650667190552},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5966989398002625},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5648510456085205},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5005967617034912},{"id":"https://openalex.org/keywords/graphics-processing-unit","display_name":"Graphics processing unit","score":0.5001859664916992},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4815390706062317},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.47130951285362244},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.44999611377716064},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.4406086504459381},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.43061378598213196},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22924846410751343},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.17917796969413757},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1196635365486145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8799928426742554},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7426618337631226},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.682709276676178},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.6639807224273682},{"id":"https://openalex.org/C2781335571","wikidata":"https://www.wikidata.org/wiki/Q2633544","display_name":"GPU cluster","level":3,"score":0.6270194053649902},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.6026650667190552},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5966989398002625},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5648510456085205},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5005967617034912},{"id":"https://openalex.org/C2779851693","wikidata":"https://www.wikidata.org/wiki/Q183484","display_name":"Graphics processing unit","level":2,"score":0.5001859664916992},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4815390706062317},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.47130951285362244},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.44999611377716064},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.4406086504459381},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.43061378598213196},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22924846410751343},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.17917796969413757},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1196635365486145},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ipdps.2008.4536163","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps.2008.4536163","pdf_url":null,"source":{"id":"https://openalex.org/S4210174069","display_name":"Proceedings - IEEE International Parallel and Distributed Processing Symposium","issn_l":"1530-2075","issn":["1530-2075"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2008 IEEE International Symposium on Parallel and Distributed Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50062400","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100575649","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.5099999904632568,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322832","display_name":"University of Tokyo","ror":"https://ror.org/057zh3y96"},{"id":"https://openalex.org/F4320338075","display_name":"Core Research for Evolutional Science and Technology","ror":"https://ror.org/00097mb19"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1816932979","https://openalex.org/W2016677154","https://openalex.org/W2032309817","https://openalex.org/W2079524266","https://openalex.org/W2102182691","https://openalex.org/W2108792719","https://openalex.org/W2136834900","https://openalex.org/W2139578306","https://openalex.org/W2150606860","https://openalex.org/W2162380856","https://openalex.org/W3150025736","https://openalex.org/W6638416746","https://openalex.org/W6654554885"],"related_works":["https://openalex.org/W2598944200","https://openalex.org/W2559348759","https://openalex.org/W1936382095","https://openalex.org/W2163816448","https://openalex.org/W4317790246","https://openalex.org/W2056717482","https://openalex.org/W2992303576","https://openalex.org/W2346971659","https://openalex.org/W1482681439","https://openalex.org/W4211066045"],"abstract_inverted_index":{"General-Purpose":[0],"computing":[1,34,109],"on":[2,60],"Graphics":[3],"Processing":[4],"Units":[5],"(GPGPU)":[6],"is":[7,39,65],"becoming":[8],"popular":[9],"in":[10,19,32],"HPC":[11],"because":[12,66],"of":[13,21,45,58,131,141,167,188],"its":[14,36],"high":[15,234],"peak":[16],"performance.":[17],"However,":[18],"spite":[20],"the":[22,46,56,67,128,137,154,164,180,185,198,208,222],"potential":[23],"performance":[24,38,69,104,124,151,181,224],"improvements":[25],"as":[26,28,77,195,197,233,235],"well":[27],"recent":[29,52],"promising":[30],"results":[31],"scientific":[33],"applications,":[35],"real":[37],"not":[40],"necessarily":[41],"higher":[42],"than":[43,203,216],"that":[44,100,126,179,191,207,221],"current":[47],"high-performance":[48],"CPUs,":[49],"especially":[50],"with":[51,175,201],"trends":[53],"towards":[54],"increasing":[55],"number":[57],"cores":[59],"a":[61,93,123,241,245],"single":[62],"die.":[63],"This":[64],"GPU":[68],"can":[70,183,231],"be":[71,232],"severely":[72],"limited":[73],"by":[74],"such":[75],"restrictions":[76],"memory":[78],"size":[79],"and":[80,82,119,135,147,162,206,229],"bandwidth":[81],"programming":[83],"using":[84,105,170,226,239],"graphics-specific":[85],"APIs.":[86],"To":[87,111],"overcome":[88],"this":[89],"problem,":[90],"we":[91,121],"propose":[92],"model-based,":[94],"adaptive":[95],"library":[96],"for":[97,143],"2D":[98],"FFT":[99,155],"automatically":[101],"achieves":[102],"optimal":[103,113,210],"available":[106],"heterogeneous":[107],"CPU-GPU":[108],"resources.":[110],"find":[112],"load":[114,148,211],"distribution":[115,212],"ratios":[116,213],"between":[117],"CPUs":[118,228],"GPUs,":[120],"construct":[122],"model":[125,152,182],"captures":[127],"respective":[129],"contributions":[130],"CPU":[132,242],"vs.":[133],"GPU,":[134],"predicts":[136,163],"total":[138],"execution":[139,165,186],"time":[140,166,187],"2D-FFT":[142],"arbitrary":[144],"problem":[145,189],"sizes":[146,190],"distribution.":[149],"The":[150],"divides":[153],"computation":[156],"into":[157],"several":[158],"small":[159],"sub":[160],"steps,":[161],"each":[168],"step":[169],"profiling":[171],"results.":[172],"Preliminary":[173],"evaluation":[174],"our":[176],"prototype":[177],"shows":[178],"predict":[184],"are":[192],"16":[193],"times":[194],"large":[196],"profile":[199],"runs":[200],"less":[202,215],"20%":[204],"error,":[205],"predicted":[209],"have":[214],"1%":[217],"error.":[218],"We":[219],"show":[220],"resulting":[223],"improvement":[225],"both":[227],"GPUs":[230],"50%":[236],"compared":[237],"to":[238],"either":[240],"core":[243],"or":[244],"GPU.":[246]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":10},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":7}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
