{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T09:41:40Z","timestamp":1775122900483,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,4,18]],"date-time":"2017-04-18T00:00:00Z","timestamp":1492473600000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"PDL Consortium"},{"name":"Intel Science and Technology Center for Cloud Computing"},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1042537 and CNS-1042543"],"award-info":[{"award-number":["CNS-1042537 and CNS-1042543"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,4,18]]},"DOI":"10.1145\/2901318.2901323","type":"proceedings-article","created":{"date-parts":[[2016,4,12]],"date-time":"2016-04-12T08:23:12Z","timestamp":1460449392000},"page":"1-16","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":197,"title":["GeePS"],"prefix":"10.1145","author":[{"given":"Henggang","family":"Cui","sequence":"first","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Hao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Gregory R.","family":"Ganger","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Phillip B.","family":"Gibbons","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]},{"given":"Eric P.","family":"Xing","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University"}]}],"member":"320","published-online":{"date-parts":[[2016,4,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"NVIDIA cuBLAS https:\/\/developer.nvidia.com\/cublas."},{"key":"e_1_3_2_1_2_1","unstructured":"NVIDIA cuDNN https:\/\/developer.nvidia.com\/cudnn."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/2124295.2124312"},{"key":"e_1_3_2_1_4_1","volume-title":"ICML","author":"Ahn S.","year":"2014","unstructured":"S. Ahn, B. Shahbaba, and M. Welling. Distributed stochastic gradient MCMC. In ICML, 2014."},{"key":"e_1_3_2_1_5_1","volume-title":"Scaling learning algorithms towards AI. Large-scale kernel machines, 34(5)","author":"Bengio Y.","year":"2007","unstructured":"Y. Bengio, Y. LeCun, et al. Scaling learning algorithms towards AI. Large-scale kernel machines, 34(5), 2007."},{"key":"e_1_3_2_1_6_1","volume-title":"MXNet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274","author":"Chen T.","year":"2015","unstructured":"T. Chen, M. Li, Y. Li, M. Lin, N. Wang, M. Wang, T. Xiao, B. Xu, C. Zhang, and Z. Zhang. MXNet: A flexible and efficient machine learning library for heterogeneous distributed systems. arXiv preprint arXiv:1512.01274, 2015."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/2685048.2685094"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/2354409.2354694"},{"key":"e_1_3_2_1_9_1","volume-title":"ICML","author":"Coates A.","year":"2013","unstructured":"A. Coates, B. Huval, T. Wang, D. Wu, B. Catanzaro, and N. Andrew. Deep learning with COTS HPC systems. In ICML, 2013."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.5555\/2643634.2643639"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2670979.2670984"},{"key":"e_1_3_2_1_12_1","unstructured":"H. Cui G. R. Ganger and P. B. Gibbons. Scalable deep learning on distributed GPUs with a GPU-specialized parameter server. CMU PDL Technical Report (CMU-PDL-15-107) 2015."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2134090"},{"key":"e_1_3_2_1_14_1","volume-title":"NIPS","author":"Dean J.","year":"2012","unstructured":"J. Dean, G. Corrado, R. Monga, K. Chen, M. Devin, M. Mao, A. Senior, P. Tucker, K. Yang, Q. V. Le, et al. Large scale distributed deep networks. In NIPS, 2012."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.21236\/ADA623249"},{"key":"e_1_3_2_1_17_1","volume-title":"PRObE: A thousand-node experimental cluster for computer systems research. USENIX","author":"Gibson G.","year":"2013","unstructured":"G. Gibson, G. Grider, A. Jacobson, and W. Lloyd. PRObE: A thousand-node experimental cluster for computer systems research. USENIX; login:, 2013."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"e_1_3_2_1_19_1","volume-title":"NIPS","author":"Ho Q.","year":"2013","unstructured":"Q. Ho, J. Cipar, H. Cui, S. Lee, J. K. Kim, P. B. Gibbons, G. A. Gibson, G. R. Ganger, and E. P. Xing. More effective distributed ML via a Stale Synchronous Parallel parameter server. In NIPS, 2013."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_1_21_1","volume-title":"Caffe: Convolutional architecture for fast feature embedding. arXiv preprint arXiv:1408.5093","author":"Jia Y.","year":"2014","unstructured":"Y. Jia, E. Shelhamer, J. Donahue, S. Karayev, J. Long, R. Girshick, S. Guadarrama, and T. Darrell. Caffe: Convolutional architecture for fast feature embedding. arXiv preprint arXiv:1408.5093, 2014."},{"key":"e_1_3_2_1_22_1","volume-title":"arXiv preprint arXiv:1404.5997","author":"Krizhevsky A.","year":"2014","unstructured":"A. Krizhevsky. One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997, 2014."},{"key":"e_1_3_2_1_23_1","volume-title":"NIPS","author":"Krizhevsky A.","year":"2012","unstructured":"A. Krizhevsky, I. Sutskever, and G. E. Hinton. ImageNet classification with deep convolutional neural networks. In NIPS, 2012."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2640087.2644155"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.5555\/1924943.1924964"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_27_1","volume-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402","author":"Soomro K.","year":"2012","unstructured":"K. Soomro, A. R. Zamir, and M. Shah. Ucf101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402, 2012."},{"key":"e_1_3_2_1_28_1","volume-title":"Going deeper with convolutions. arXiv preprint arXiv:1409.4842","author":"Szegedy C.","year":"2014","unstructured":"C. Szegedy, W. Liu, Y. Jia, P. Sermanet, S. Reed, D. Anguelov, D. Erhan, V. Vanhoucke, and A. Rabinovich. Going deeper with convolutions. arXiv preprint arXiv:1409.4842, 2014."},{"key":"e_1_3_2_1_29_1","volume-title":"Show and tell: A neural image caption generator. arXiv preprint arXiv:1411.4555","author":"Vinyals O.","year":"2014","unstructured":"O. Vinyals, A. Toshev, S. Bengio, and D. Erhan. Show and tell: A neural image caption generator. arXiv preprint arXiv:1411.4555, 2014."},{"key":"e_1_3_2_1_30_1","volume-title":"NIPS 2014 Workshop of Distributed Matrix Computations","author":"Wang M.","year":"2014","unstructured":"M. Wang, T. Xiao, J. Li, J. Zhang, C. Hong, and Z. Zhang. Minerva: A scalable and highly efficient training platform for deep learning. NIPS 2014 Workshop of Distributed Matrix Computations, 2014."},{"key":"e_1_3_2_1_31_1","volume-title":"Towards topic modeling for big data. arXiv preprint arXiv:1405.4402","author":"Wang Y.","year":"2014","unstructured":"Y. Wang, X. Zhao, Z. Sun, H. Yan, L. Wang, Z. Jin, L. Wang, Y. Gao, J. Zeng, Q. Yang, et al. Towards topic modeling for big data. arXiv preprint arXiv:1405.4402, 2014."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2806777.2806778"},{"key":"e_1_3_2_1_33_1","volume-title":"Deep image: Scaling up image recognition. arXiv preprint arXiv:1501.02876","author":"Wu R.","year":"2015","unstructured":"R. Wu, S. Yan, Y. Shan, Q. Dang, and G. Sun. Deep image: Scaling up image recognition. arXiv preprint arXiv:1501.02876, 2015."},{"key":"e_1_3_2_1_34_1","volume-title":"CVPR","author":"Ng J. Yue-Hei","year":"2015","unstructured":"J. Yue-Hei Ng, M. Hausknecht, S. Vijayanarasimhan, O. Vinyals, R. Monga, and G. Toderici. Beyond short snippets: Deep networks for video classification. In CVPR, 2015."},{"key":"e_1_3_2_1_35_1","volume-title":"Poseidon: A system architecture for efficient GPU-based deep learning on multiple machines. arXiv preprint arXiv:1512.06216","author":"Zhang H.","year":"2015","unstructured":"H. Zhang, Z. Hu, J. Wei, P. Xie, G. Kim, Q. Ho, and E. Xing. Poseidon: A system architecture for efficient GPU-based deep learning on multiple machines. arXiv preprint arXiv:1512.06216, 2015."},{"key":"e_1_3_2_1_36_1","volume-title":"ICML","author":"Zhang R.","year":"2014","unstructured":"R. Zhang and J. Kwok. Asynchronous distributed ADMM algorithm for global variable consensus optimization. In ICML, 2014."}],"event":{"name":"EuroSys '16: Eleventh EuroSys Conference 2016","location":"London United Kingdom","acronym":"EuroSys '16"},"container-title":["Proceedings of the Eleventh European Conference on Computer Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2901318.2901323","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2901318.2901323","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2901318.2901323","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:38:25Z","timestamp":1763458705000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2901318.2901323"}},"subtitle":["scalable deep learning on distributed GPUs with a GPU-specialized parameter server"],"short-title":[],"issued":{"date-parts":[[2016,4,18]]},"references-count":36,"alternative-id":["10.1145\/2901318.2901323","10.1145\/2901318"],"URL":"https:\/\/doi.org\/10.1145\/2901318.2901323","relation":{},"subject":[],"published":{"date-parts":[[2016,4,18]]},"assertion":[{"value":"2016-04-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}