{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T02:41:38Z","timestamp":1774320098576,"version":"3.50.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T00:00:00Z","timestamp":1572566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,11]]},"DOI":"10.1109\/pmbs49563.2019.00017","type":"proceedings-article","created":{"date-parts":[[2020,4,10]],"date-time":"2020-04-10T03:18:07Z","timestamp":1586488687000},"page":"103-113","source":"Crossref","is-referenced-by-count":22,"title":["Performance Analysis of Deep Learning Workloads on Leading-edge Systems"],"prefix":"10.1109","author":[{"given":"Yihui","family":"Ren","sequence":"first","affiliation":[]},{"given":"Shinjae","family":"Yoo","sequence":"additional","affiliation":[]},{"given":"Adolfy","family":"Hoisie","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref32","year":"2017","journal-title":"Nvidia tesla v100 gpu architecture"},{"key":"ref31","article-title":"Mixed precision training","author":"micikevicius","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref30","author":"tierney","year":"2019","journal-title":"NCCL DGX1v DGX2 (Personal Communication)"},{"key":"ref10","first-page":"arxiv:1410.0759","article-title":"cuDNN: Efficient Primitives for Deep Learning","author":"chetlur","year":"2014","journal-title":"ArXiv e-prints"},{"key":"ref11","article-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"devlin","year":"2018","journal-title":"ArXiv e-prints"},{"key":"ref12","year":"2017","journal-title":"NVIDIA Collective Communications Library (NCCL)"},{"key":"ref13","article-title":"NVIDIA NVSwitch: The World&#x2019;s Highest-Bandwidth On-Node Switch","year":"2018"},{"key":"ref14","first-page":"74","author":"caldeira","year":"2018","journal-title":"IBM Power System AC922 Introduction and Technical Overview"},{"key":"ref15","article-title":"NVIDIA Turing Architecture","year":"2018"},{"key":"ref16","author":"furmanek","year":"2019","journal-title":"PowerAI 1 6 0 Introduction A Full Transition to Conda"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref19","first-page":"1097","article-title":"ImageNet Classification with Deep Convolutional Neural Networks","author":"krizhevsky","year":"2012","journal-title":"Advances in Neural Information Processing Systems 25"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref4","doi-asserted-by":"crossref","first-page":"6645","DOI":"10.1109\/ICASSP.2013.6638947","article-title":"Speech recognition with deep recurrent neural networks","author":"graves","year":"2013","journal-title":"2013 IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1009"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248110"},{"key":"ref6","year":"2017","journal-title":"Nvidia dgx-1 with tesla v100 system architecture"},{"key":"ref29","article-title":"Automatic differentiation in PyTorch","author":"paszke","year":"2017","journal-title":"NIPS-W"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2011.71"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2018.8573483"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/978-3-319-72971-8_1","article-title":"Evaluating On-Node GPU Interconnects for Deep Learning Workloads","volume":"10724","author":"tallent","year":"2017","journal-title":"PMBS SC ser Lecture Notes in Computer Science"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581275"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553486"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2016.25"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref22","first-page":"3111","article-title":"Distributed Representations of Words and Phrases and their Compositionality","author":"mikolov","year":"2013","journal-title":"Advances in Neural Information Processing Systems 26"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1561\/2000000004"},{"key":"ref24","first-page":"5998","article-title":"Attention is All you Need","author":"vaswani","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref23","article-title":"DARPA TIMIT acoustic-phonetic continous speech corpus CD-ROM. NIST speech disc 1-1.1","volume":"93","author":"garofolo","year":"1993","journal-title":"NASA STI\/Recon Technical Report N"},{"key":"ref26","first-page":"12","author":"radford","year":"2018","journal-title":"Improving language understanding by generative pre-training"},{"key":"ref25","first-page":"3079","article-title":"Semi-supervised Sequence Learning","author":"dai","year":"2015","journal-title":"Advances in Neural IInformation Processing Systems"}],"event":{"name":"2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)","location":"Denver, CO, USA","start":{"date-parts":[[2019,11,18]]},"end":{"date-parts":[[2019,11,18]]}},"container-title":["2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9048048\/9059254\/09059262.pdf?arnumber=9059262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:54:59Z","timestamp":1658094899000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9059262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/pmbs49563.2019.00017","relation":{},"subject":[],"published":{"date-parts":[[2019,11]]}}}