{"id":"https://openalex.org/W2898890919","doi":"https://doi.org/10.1145/3274654","title":"SCP","display_name":"SCP","publication_year":2018,"publication_date":"2018-10-10","ids":{"openalex":"https://openalex.org/W2898890919","doi":"https://doi.org/10.1145/3274654","mag":"2898890919"},"language":"en","primary_location":{"id":"doi:10.1145/3274654","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3274654","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3274654","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3274654","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101673342","display_name":"Xing Su","orcid":"https://orcid.org/0000-0002-7514-1495"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xing Su","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-7514-1495","affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000688713","display_name":"Xiangke Liao","orcid":"https://orcid.org/0000-0002-6125-3330"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangke Liao","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100712105","display_name":"Hao Jiang","orcid":"https://orcid.org/0000-0002-6769-0785"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Jiang","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101044975","display_name":"Canqun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Canqun Yang","raw_affiliation_strings":["National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Laboratory for Parallel and Distributed Processing, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024664385","display_name":"Jingling Xue","orcid":"https://orcid.org/0000-0003-0380-3506"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jingling Xue","raw_affiliation_strings":["UNSW Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UNSW Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101673342"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":1.1872,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.8010157,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"15","issue":"4","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.895573616027832},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7090891599655151},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6102141737937927},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.5366072058677673},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.5008704662322998},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4960728585720062},{"id":"https://openalex.org/keywords/bus-sniffing","display_name":"Bus sniffing","score":0.4795650839805603},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.4597734212875366},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.45512670278549194},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.42242738604545593},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.41117575764656067},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3898264169692993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.895573616027832},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7090891599655151},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6102141737937927},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.5366072058677673},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.5008704662322998},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4960728585720062},{"id":"https://openalex.org/C51185590","wikidata":"https://www.wikidata.org/wiki/Q1017228","display_name":"Bus sniffing","level":5,"score":0.4795650839805603},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4597734212875366},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.45512670278549194},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.42242738604545593},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.41117575764656067},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3898264169692993},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3274654","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3274654","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3274654","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3274654","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3274654","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3274654","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2288415015","display_name":null,"funder_award_id":"DP170103956 and DP180104069","funder_id":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council"},{"id":"https://openalex.org/G5369909036","display_name":null,"funder_award_id":"2017YFB0202003","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7347214431","display_name":null,"funder_award_id":"NO.2017YFB0202003","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2898890919.pdf","grobid_xml":"https://content.openalex.org/works/W2898890919.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W110174833","https://openalex.org/W1503700136","https://openalex.org/W1656664476","https://openalex.org/W1983157164","https://openalex.org/W1988098298","https://openalex.org/W1988425770","https://openalex.org/W1991009705","https://openalex.org/W2034761517","https://openalex.org/W2039227678","https://openalex.org/W2043275593","https://openalex.org/W2059094285","https://openalex.org/W2063186542","https://openalex.org/W2064744742","https://openalex.org/W2064872546","https://openalex.org/W2067575922","https://openalex.org/W2073061372","https://openalex.org/W2079658918","https://openalex.org/W2084379367","https://openalex.org/W2098220211","https://openalex.org/W2099059741","https://openalex.org/W2099404643","https://openalex.org/W2111394443","https://openalex.org/W2112502633","https://openalex.org/W2124480634","https://openalex.org/W2125277034","https://openalex.org/W2135653967","https://openalex.org/W2143773524","https://openalex.org/W2156168043","https://openalex.org/W2160609361","https://openalex.org/W2160996172","https://openalex.org/W2252007067","https://openalex.org/W2288740281","https://openalex.org/W2293241800","https://openalex.org/W2333659671","https://openalex.org/W2412152731","https://openalex.org/W2427072102","https://openalex.org/W2490366892","https://openalex.org/W2516525699","https://openalex.org/W2592969254","https://openalex.org/W4233358870","https://openalex.org/W4250470790"],"related_works":["https://openalex.org/W4312759433","https://openalex.org/W2161101294","https://openalex.org/W2026179701","https://openalex.org/W2407815036","https://openalex.org/W2108638805","https://openalex.org/W2123859627","https://openalex.org/W2148571123","https://openalex.org/W2793052975","https://openalex.org/W2290179447","https://openalex.org/W4304166325"],"abstract_inverted_index":{"GEneral":[0],"Matrix":[1],"Multiply":[2],"(GEMM)":[3],"is":[4],"the":[5,12,45,68,96,137,161,195],"most":[6],"fundamental":[7],"computational":[8],"kernel":[9],"routine":[10],"in":[11,57,62,76,80,101,136,160,198,204,211],"BLAS":[13],"library.":[14],"To":[15],"achieve":[16],"high":[17],"performance,":[18],"in-memory":[19],"data":[20,37,111],"must":[21],"be":[22],"prefetched":[23],"into":[24,112,145],"fast":[25],"on-chip":[26,48],"caches":[27,86,178,183,203],"before":[28],"they":[29],"are":[30,55],"used.":[31],"Two":[32],"techniques,":[33],"software":[34],"prefetching":[35],"and":[36,65,149,164,179,201],"packing,":[38],"have":[39,157],"been":[40],"used":[41,51,61],"to":[42,95,109,131,153,219],"effectively":[43,193],"exploit":[44],"capability":[46],"of":[47,98,214],"least":[49],"recent":[50],"(LRU)":[52],"caches,":[53],"which":[54],"popular":[56],"traditional":[58],"high-performance":[59,81,99],"processors":[60,82],"high-end":[63],"servers":[64],"supercomputers.":[66],"However,":[67],"market":[69],"has":[70,192],"recently":[71],"witnessed":[72],"a":[73,93,102,113,125,142,170,205],"new":[74],"diversity":[75],"processor":[77,173],"design,":[78],"resulting":[79,210],"equipped":[83],"with":[84,87,174],"shared":[85,114,143,180],"non-LRU":[88],"replacement":[89],"policies.":[90],"This":[91],"poses":[92],"challenge":[94],"development":[97],"GEMM":[100,138,208],"multithreaded":[103],"context.":[104],"As":[105],"several":[106],"threads":[107],"try":[108],"load":[110],"cache":[115,118,134,144],"simultaneously,":[116],"interthread":[117,133],"conflicts":[119,135],"will":[120],"increase":[121],"significantly.":[122],"We":[123,156],"present":[124],"Shared":[126],"Cache":[127],"Partitioning":[128],"(SCP)":[129],"method":[130],"eliminate":[132],"routines,":[139],"by":[140,217],"partitioning":[141],"physically":[146],"disjoint":[147],"sets":[148,152],"assigning":[150],"different":[151,154],"threads.":[155],"implemented":[158],"SCP":[159,191],"OpenBLAS":[162],"library":[163],"evaluated":[165],"it":[166],"on":[167],"Phytium":[168],"2000+,":[169],"64-core":[171],"AArch64":[172],"private":[175],"LRU":[176],"L1":[177,200],"pseudo-random":[181],"L2":[182,202],"(per":[184],"four-core":[185],"cluster).":[186],"Our":[187],"evaluation":[188],"shows":[189],"that":[190],"reduced":[194],"conflict":[196],"misses":[197],"both":[199],"highly":[206],"optimized":[207],"implementation,":[209],"an":[212],"improvement":[213],"its":[215],"performance":[216],"2.75%":[218],"6.91%.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2018-11-09T00:00:00"}
