{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T16:53:04Z","timestamp":1771951984575,"version":"3.50.1"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1109\/pact52795.2021.00008","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T01:21:19Z","timestamp":1634692879000},"page":"1-16","source":"Crossref","is-referenced-by-count":12,"title":["A Flexible Approach to Autotuning Multi-Pass Machine Learning Compilers"],"prefix":"10.1109","author":[{"given":"Phitchaya Mangpo","family":"Phothilimthana","sequence":"first","affiliation":[]},{"given":"Amit","family":"Sabne","sequence":"additional","affiliation":[]},{"given":"Nikhil","family":"Sarda","sequence":"additional","affiliation":[]},{"given":"Karthik Srinivasa","family":"Murthy","sequence":"additional","affiliation":[]},{"given":"Yanqi","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Christof","family":"Angermueller","sequence":"additional","affiliation":[]},{"given":"Mike","family":"Burrows","sequence":"additional","affiliation":[]},{"given":"Sudip","family":"Roy","sequence":"additional","affiliation":[]},{"given":"Ketan","family":"Mandke","sequence":"additional","affiliation":[]},{"given":"Rezsa","family":"Farahani","sequence":"additional","affiliation":[]},{"given":"Yu Emma","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Berkin","family":"Ilbeyi","sequence":"additional","affiliation":[]},{"given":"Blake","family":"Hechtman","sequence":"additional","affiliation":[]},{"given":"Bjarke","family":"Roune","sequence":"additional","affiliation":[]},{"given":"Shen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Yuanzhong","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Samuel J.","family":"Kaufman","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","first-page":"1025","article-title":"Optimizing CNN Model Inference on CPUs","author":"liu","year":"2019","journal-title":"2019 USENIX Annual Technical Conference"},{"key":"ref38","article-title":"FusionStitching: Boosting Memory Intensive Computations for Deep Learning Workloads","author":"zheng","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref33","first-page":"1233","article-title":"AKG: Automatic Kernel Generation for Neural Processing Units Using Polyhedral Transformations","author":"zhao","year":"2021","journal-title":"Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation"},{"key":"ref32","first-page":"943","article-title":"Mind Mappings: Enabling Efficient Algorithm-Accelerator Mapping Space Search","author":"hegde","year":"2021","journal-title":"Proc Int Conf Architectural Support for Programming Languages and Operating Systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/2897824.2925952"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201383"},{"key":"ref37","year":"2017","journal-title":"Nvidia tensorrt - programmable inference accelerator"},{"key":"ref36","first-page":"881","article-title":"Rammer: Enabling Holistic Deep Learning Compiler Optimizations with rTasks","author":"ma","year":"2020","journal-title":"USENIX Symp on Operating Systems Design & Implementation"},{"key":"ref35","article-title":"Optimizing DNN Computation with Relaxed Graph Substitutions","author":"jia","year":"0","journal-title":"Proceedings of MLSys Conference"},{"key":"ref34","first-page":"190","article-title":"DeepCuts: A Deep Learning Optimization Framework for Versatile GPU Workloads","author":"jung","year":"2021","journal-title":"Proceedings of the ACM SIGPLAN Conference on Programming Language Design and Implementation"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359630"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/1242531.1242553"},{"key":"ref11","year":"0","journal-title":"XLA Optimizing Compiler for TensorFlow"},{"key":"ref12","article-title":"Glow: Graph Lowering Compiler Techniques for Neural Networks","author":"rotem","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref13","article-title":"Automatic cross-replica sharding of weight update in data-parallel training","volume":"abs 2004 13336","author":"xu","year":"2020","journal-title":"CoRR"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3360307"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"ref17","article-title":"Population-Based Black-Box Optimization for Biological Sequence Design","author":"angermueller","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref18","article-title":"Model-based reinforcement learning for biological sequence design","author":"angermueller","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref19","article-title":"Transferable Graph Optimizers for ML Compilers","author":"zhou","year":"0","journal-title":"Proceedings of International Conference on Neural Information Processing Systems"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628092"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378508"},{"key":"ref27","article-title":"Portable Performance on Heterogeneous Architectures","author":"phothilimthana","year":"2013","journal-title":"Proceedings of the eighth international conference on Architectural support for programming languages and operating systems - AS"},{"key":"ref3","first-page":"863","article-title":"Ansor: Generating High-Performance Tensor Programs for Deep Learning","author":"zheng","year":"2020","journal-title":"USENIX Symp on Operating Systems Design & Implementation"},{"key":"ref6","article-title":"Chameleon: Adaptive Code Optimization for Expedited Deep Neural Network Compilation","author":"ahn","year":"0","journal-title":"International Conference on Learning Representations"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/2491956.2462176"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1145\/3306346.3322967","article-title":"Learning to Optimize Halide with Tree Search and Random Programs","volume":"38","author":"adams","year":"2019","journal-title":"ACM Trans Graph"},{"key":"ref8","article-title":"Tensor Comprehensions: Framework-Agnostic High-Performance Machine Learning Abstractions","author":"vasilache","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref7","article-title":"AdaTune: Adaptive Tensor Program Compilation Made Efficient","author":"li","year":"0","journal-title":"Neural Information Processing Systems Conference"},{"key":"ref2","first-page":"3393","article-title":"Learning to Optimize Tensor Programs","author":"chen","year":"2018","journal-title":"Proceedings of the 32Nd International Conference on Neural Information Processing Systems"},{"key":"ref9","article-title":"Value Learning for Throughput Optimization of Deep Learning Workloads","author":"steiner","year":"0","journal-title":"Proceedings of MLSys Conference"},{"key":"ref1","first-page":"579","article-title":"TVM: An Automated End-to-End Optimizing Compiler for Deep Learning","author":"chen","year":"2018","journal-title":"Proceedings of the 13th USENIX Conference on Operating Systems Design and Implementation"},{"key":"ref20","article-title":"Proxi-mal Policy Optimization Algorithms","volume":"abs 1707 6347","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref22","article-title":"A Learned Performance Model for Tensor Processing Units","author":"kaufman","year":"0","journal-title":"Proceedings of Machine Learning for Systems"},{"key":"ref21","article-title":"Relay: A High-Level Compiler for Deep Learning","author":"roesch","year":"2019","journal-title":"ArXiv Preprint"},{"key":"ref42","article-title":"A Deep Learning Based Cost Model for Automatic Code Optimization","author":"baghdadi","year":"0","journal-title":"Proceedings of MLSys"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1998.681704"},{"key":"ref41","first-page":"187","article-title":"Improving the Accuracy, Scalability, and Performance of Graph Neural Networks with Roc","volume":"2","author":"jia","year":"0","journal-title":"Proceedings of MLSys Conference"},{"key":"ref23","first-page":"1","article-title":"Automatically tuned linear algebra software","author":"whaley","year":"1998","journal-title":"Proc 1998 ACM\/IEEE Supercomputing 98 Conf"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/1542476.1542481"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-010-0161-2"}],"event":{"name":"2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT)","location":"Atlanta, GA, USA","start":{"date-parts":[[2021,9,26]]},"end":{"date-parts":[[2021,9,29]]}},"container-title":["2021 30th International Conference on Parallel Architectures and Compilation Techniques (PACT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9563009\/9563010\/09563030.pdf?arnumber=9563030","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:47:27Z","timestamp":1652197647000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9563030\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/pact52795.2021.00008","relation":{},"subject":[],"published":{"date-parts":[[2021,9]]}}}