{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T20:56:23Z","timestamp":1775854583867,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"DOI":"10.13039\/100000015","name":"U.S. Department of Energy","doi-asserted-by":"publisher","award":["82037"],"award-info":[{"award-number":["82037"]}],"id":[{"id":"10.13039\/100000015","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3731545.3731594","type":"proceedings-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T12:46:16Z","timestamp":1757421976000},"page":"1-13","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Optimizing Data Distribution and Kernel Performance for Efficient Training of Chemistry Foundation Models: A Case Study with MACE"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8174-2545","authenticated-orcid":false,"given":"Jesun Sahariar","family":"Firoz","sequence":"first","affiliation":[{"name":"Pacific Northwest National Laboratory, Seattle, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9084-8480","authenticated-orcid":false,"given":"Franco","family":"Pellegrini","sequence":"additional","affiliation":[{"name":"SISSA, Trieste, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5433-0900","authenticated-orcid":false,"given":"Mario","family":"Geiger","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0353-7626","authenticated-orcid":false,"given":"Darren","family":"Hsu","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4199-7244","authenticated-orcid":false,"given":"Jenna A.","family":"Bilbrey","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9738-6461","authenticated-orcid":false,"given":"Han-Yi","family":"Chou","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6822-6047","authenticated-orcid":false,"given":"Maximilian","family":"Stadler","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8440-0751","authenticated-orcid":false,"given":"Markus","family":"H\u00f6hnerbach","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2520-0511","authenticated-orcid":false,"given":"Tingyu","family":"Wang","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5618-7175","authenticated-orcid":false,"given":"Dejun","family":"Lin","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0588-7750","authenticated-orcid":false,"given":"Emine","family":"Kucukbenli","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7981-4175","authenticated-orcid":false,"given":"Henry W.","family":"Sprueill","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6915-9851","authenticated-orcid":false,"given":"Ilyes","family":"Batatia","sequence":"additional","affiliation":[{"name":"University of Cambridge, Cambridge, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6303-1037","authenticated-orcid":false,"given":"Sotiris S.","family":"Xantheas","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6851-177X","authenticated-orcid":false,"given":"Mal-Soon","family":"Lee","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1378-5241","authenticated-orcid":false,"given":"Christopher","family":"Mundy","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8180-2034","authenticated-orcid":false,"given":"Gabor","family":"Csanyi","sequence":"additional","affiliation":[{"name":"University of Cambridge, Cambridge, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7314-7896","authenticated-orcid":false,"given":"Justin S.","family":"Smith","sequence":"additional","affiliation":[{"name":"NVIDIA, Santa Clara, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4737-2034","authenticated-orcid":false,"given":"Ponnuswamy","family":"Sadayappan","sequence":"additional","affiliation":[{"name":"University of Utah, Salt Lake City, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7352-2035","authenticated-orcid":false,"given":"Sutanay","family":"Choudhury","sequence":"additional","affiliation":[{"name":"Pacific Northwest National Laboratory, Richland, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,9,9]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n.d.]. Bin packing as a scheduling Problem. https:\/\/www.csa.iisc.ac.in\/~arindamkhan\/seminar\/binpacking21\/"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41524-024-01339-x"},{"key":"e_1_3_2_1_3_1","volume-title":"Truong Son Hy, and Risi Kondor","author":"Anderson Brandon","year":"2019","unstructured":"Brandon Anderson, Truong Son Hy, and Risi Kondor. 2019. Cormorant: Covariant molecular neural networks. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"Albert Musaelian, Gregor NC Simm, Ralf Drautz, Christoph Ortner, Boris Kozinsky, and G\u00e1bor Cs\u00e1nyi.","author":"Batatia Ilyes","year":"2025","unstructured":"Ilyes Batatia, Simon Batzner, D\u00e1vid P\u00e9ter Kov\u00e1cs, Albert Musaelian, Gregor NC Simm, Ralf Drautz, Christoph Ortner, Boris Kozinsky, and G\u00e1bor Cs\u00e1nyi. 2025. The design space of E (3)-equivariant atom-centred interatomic potentials. Nature Machine Intelligence (2025), 1\u201312."},{"key":"e_1_3_2_1_5_1","unstructured":"Ilyes Batatia Philipp Benner Yuan Chiang Alin M Elena D\u00e1vid P Kov\u00e1cs Janosh Riebesell Xavier R Advincula Mark Asta William J Baldwin Noam Bernstein et al. 2023. A foundation model for atomistic materials chemistry. arXiv preprint arXiv:2401.00096 (2023)."},{"key":"e_1_3_2_1_6_1","first-page":"11423","article-title":"MACE: Higher order equivariant message passing neural networks for fast and accurate force fields","volume":"35","author":"Batatia Ilyes","year":"2022","unstructured":"Ilyes Batatia, David P Kovacs, Gregor Simm, Christoph Ortner, and G\u00e1bor Cs\u00e1nyi. 2022. MACE: Higher order equivariant message passing neural networks for fast and accurate force fields. Advances in Neural Information Processing Systems 35 (2022), 11423\u201311436.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","volume-title":"Nature communications 13, 1","author":"Batzner Simon","year":"2022","unstructured":"Simon Batzner, Albert Musaelian, Lixin Sun, Mario Geiger, Jonathan P Mailoa, Mordechai Kornbluth, Nicola Molinari, Tess E Smidt, and Boris Kozinsky. 2022. E (3)-equivariant graph neural networks for data-efficient and accurate interatomic potentials. Nature communications 13, 1 (2022), 2453."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1063\/5.0009933"},{"key":"e_1_3_2_1_9_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_32"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00716-3"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.99.014104"},{"key":"e_1_3_2_1_13_1","volume-title":"Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds.","author":"Fey Matthias","unstructured":"Matthias Fey and Jan E. Lenssen. 2019. Fast Graph Representation Learning with PyTorch Geometric. In ICLR Workshop on Representation Learning on Graphs and Manifolds."},{"key":"e_1_3_2_1_14_1","volume-title":"Johnson","author":"Garey Michael R.","year":"1979","unstructured":"Michael R. Garey and David S. Johnson. 1979. Computers and intractability: A guide to the theory of NP-completeness. W.H. Freeman and Company."},{"key":"e_1_3_2_1_15_1","first-page":"6790","article-title":"Gemnet: Universal directional graph neural networks for molecules","volume":"34","author":"Gasteiger Johannes","year":"2021","unstructured":"Johannes Gasteiger, Florian Becker, and Stephan G\u00fcnnemann. 2021. Gemnet: Universal directional graph neural networks for molecules. Advances in Neural Information Processing Systems 34 (2021), 6790\u20136802.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_16_1","volume-title":"Directional Message Passing for Molecular Graphs. In International Conference on Learning Representations.","author":"Gasteiger Johannes","unstructured":"Johannes Gasteiger, Janek Gro\u00df, and Stephan G\u00fcnnemann. [n. d.]. Directional Message Passing for Molecular Graphs. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Mario Geiger Tess Smidt Alby M. Benjamin Kurt Miller Wouter Boomsma Bradley Dice Kostiantyn Lapchevskyi Maurice Weiler Micha\u0142 Tyszkiewicz Simon Batzner Dylan Madisetti Martin Uhrin Jes Frellsen Nuri Jung Sophia Sanborn Mingjian Wen Josh Rackers Marcel R\u00f8d and Michael Bailey. 2022. Euclidean neural networks: e3nn. 10.5281\/zenodo.6459381","DOI":"10.5281\/zenodo.6459381"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625549.3658655"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.3c01312"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1039\/D2DD00102K"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-023-00561-9"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24488-9"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1063\/5.0155322"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jpclett.1c01357"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3650200.3661896"},{"key":"e_1_3_2_1_26_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Liao Yi-Lun","unstructured":"Yi-Lun Liao and Tess Smidt. [n. d.]. Equiformer: Equivariant Graph Attention Transformer for 3D Atomistic Graphs. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevMaterials.7.045802"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02103-4"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-36329-y"},{"key":"e_1_3_2_1_30_1","volume-title":"Transferability of datasets between Machine-Learning Interaction Potentials. arXiv preprint arXiv:2409.05590","author":"Niblett Samuel P","year":"2024","unstructured":"Samuel P Niblett, Panagiotis Kourtis, Ioan-Bogdan Magd\u0103u, Clare P Grey, and G\u00e1bor Cs\u00e1nyi. 2024. Transferability of datasets between Machine-Learning Interaction Potentials. arXiv preprint arXiv:2409.05590 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"On first-order meta-learning algorithms. arXiv preprint arXiv:1803.02999","author":"Nichol Alex","year":"2018","unstructured":"Alex Nichol, Joshua Achiam, and John Schulman. 2018. On first-order meta-learning algorithms. arXiv preprint arXiv:1803.02999 (2018)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-021-00034-x"},{"key":"e_1_3_2_1_33_1","volume-title":"Kshitij Mehta, Pei Zhang, David Rogers, Jonghyun Bae, Khaled Z Ibrahim, Ashwin M Aji, Karl W Schulz, Jorda Polo, et al.","author":"Pasini Massimiliano Lupo","year":"2024","unstructured":"Massimiliano Lupo Pasini, Jong Youl Choi, Kshitij Mehta, Pei Zhang, David Rogers, Jonghyun Bae, Khaled Z Ibrahim, Ashwin M Aji, Karl W Schulz, Jorda Polo, et al. 2024. Scalable Training of Trustworthy and Energy-Efficient Predictive Graph Foundation Models for Atomistic Materials Modeling: A Case Study with HydraGNN. arXiv preprint arXiv:2406.12909 (2024)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCIE.2010.5668374"},{"key":"e_1_3_2_1_35_1","volume-title":"International conference on machine learning. PMLR, 9323\u20139332","author":"Satorras V\u0131ctor Garcia","year":"2021","unstructured":"V\u0131ctor Garcia Satorras, Emiel Hoogeboom, and Max Welling. 2021. E (n) equivariant graph neural networks. In International conference on machine learning. PMLR, 9323\u20139332."},{"key":"e_1_3_2_1_36_1","volume-title":"Stefan Chmiela, Alexandre Tkatchenko, and Klaus-Robert M\u00fcller.","author":"Sch\u00fctt Kristof","year":"2017","unstructured":"Kristof Sch\u00fctt, Pieter-Jan Kindermans, Huziel Enoc Sauceda Felix, Stefan Chmiela, Alexandre Tkatchenko, and Klaus-Robert M\u00fcller. 2017. Schnet: A continuous-filter convolutional neural network for modeling quantum interactions. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_37_1","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Shoghi Nima","year":"2024","unstructured":"Nima Shoghi, Adeesh Kolluru, John R Kitchin, Zachary Ward Ulissi, C Lawrence Zitnick, and Brandon M Wood. 2024. From Molecules to Materials: Pre-training Large Generalizable Models for Atomic Property Prediction. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1063\/5.0133023"},{"key":"e_1_3_2_1_39_1","volume-title":"Tensor field networks: Rotation-and translation-equivariant neural networks for 3d point clouds. arXiv preprint arXiv:1802.08219","author":"Thomas Nathaniel","year":"2018","unstructured":"Nathaniel Thomas, Tess Smidt, Steven Kearnes, Lusann Yang, Li Li, Kai Kohlhoff, and Patrick Riley. 2018. Tensor field networks: Rotation-and translation-equivariant neural networks for 3d point clouds. arXiv preprint arXiv:1802.08219 (2018)."},{"key":"e_1_3_2_1_40_1","volume-title":"Machine learning interatomic potential: Bridge the gap between small-scale models and realistic device-scale simulations. Iscience 27, 5","author":"Wang Guanjie","year":"2024","unstructured":"Guanjie Wang, Changrui Wang, Xuanguang Zhang, Zefeng Li, Jian Zhou, and Zhimei Sun. 2024. Machine learning interatomic potential: Bridge the gap between small-scale models and realistic device-scale simulations. Iscience 27, 5 (2024)."},{"key":"e_1_3_2_1_41_1","volume-title":"Scaling New Heights: Transformative Cross-GPU Sampling for Training Billion-Edge Graphs. In SC24: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1\u201315","author":"Xia Yaqi","year":"2024","unstructured":"Yaqi Xia, Donglin Yang, Xiaobo Zhou, and Dazhao Cheng. 2024. Scaling New Heights: Transformative Cross-GPU Sampling for Training Billion-Edge Graphs. In SC24: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 1\u201315."},{"key":"e_1_3_2_1_42_1","volume-title":"Group theory in a nutshell for physicists","author":"Zee Anthony","unstructured":"Anthony Zee. 2016. Group theory in a nutshell for physicists. Vol. 17. Princeton University Press."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/IA351965.2020.00011"},{"key":"e_1_3_2_1_44_1","volume-title":"Teaching a neural network to attach and detach electrons from molecules. Nature communications 12, 1","author":"Zubatyuk Roman","year":"2021","unstructured":"Roman Zubatyuk, Justin S Smith, Benjamin T Nebgen, Sergei Tretiak, and Olexandr Isayev. 2021. Teaching a neural network to attach and detach electrons from molecules. Nature communications 12, 1 (2021), 4870."}],"event":{"name":"HPDC '25: 34th International Symposium on High-Performance Parallel and Distributed Computing","location":"University of Notre Dame Conference Facilities Notre Dame IN USA","acronym":"HPDC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGARCH ACM Special Interest Group on Computer Architecture"]},"container-title":["Proceedings of the 34th International Symposium on High-Performance Parallel and Distributed Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731545.3731594","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T12:46:58Z","timestamp":1757422018000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731545.3731594"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":44,"alternative-id":["10.1145\/3731545.3731594","10.1145\/3731545"],"URL":"https:\/\/doi.org\/10.1145\/3731545.3731594","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-09-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}