{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T09:33:26Z","timestamp":1777887206609,"version":"3.51.4"},"reference-count":37,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T00:00:00Z","timestamp":1777593600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100007129","name":"Shandong Province Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Journal of Network and Computer Applications"],"published-print":{"date-parts":[[2026,5]]},"DOI":"10.1016\/j.jnca.2026.104457","type":"journal-article","created":{"date-parts":[[2026,3,2]],"date-time":"2026-03-02T20:37:21Z","timestamp":1772483841000},"page":"104457","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Learning reward functions via GNNs for multi-agent task placement in edge\u2013cloud LLM services"],"prefix":"10.1016","volume":"249","author":[{"given":"Hao","family":"Yang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6157-3740","authenticated-orcid":false,"given":"Li","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Shijun","family":"Liu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.jnca.2026.104457_b1","doi-asserted-by":"crossref","first-page":"241","DOI":"10.32604\/jai.2024.056259","article-title":"Hybrid task scheduling algorithm for makespan optimisation in cloud computing: A performance evaluation","volume":"6","author":"Abdulghani","year":"2024","journal-title":"J. Artif. Intell."},{"key":"10.1016\/j.jnca.2026.104457_b2","doi-asserted-by":"crossref","DOI":"10.3390\/electronics14153044","article-title":"Network-aware gaussian mixture models for multi-objective sd-wan controller placement","volume":"14","author":"Abdulghani","year":"2025","journal-title":"Electronics"},{"key":"10.1016\/j.jnca.2026.104457_b3","doi-asserted-by":"crossref","first-page":"17349","DOI":"10.1109\/JIOT.2025.3538657","article-title":"A graph-assisted digital-twin-driven multiagent shared offloading for internet of vehicles","volume":"12","author":"Alam","year":"2025","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.jnca.2026.104457_b4","series-title":"Constitutional ai: Harmlessness from ai feedback","author":"Bai","year":"2022"},{"key":"10.1016\/j.jnca.2026.104457_b5","doi-asserted-by":"crossref","first-page":"868","DOI":"10.1007\/s10439-023-03172-7","article-title":"Role of chat gpt in public health","volume":"51","author":"Biswas","year":"2023","journal-title":"Ann. Biomed. Eng."},{"key":"10.1016\/j.jnca.2026.104457_b6","doi-asserted-by":"crossref","first-page":"7783","DOI":"10.1109\/TVT.2023.3238771","article-title":"Joint multi-task offloading and resource allocation for mobile edge computing systems in satellite iot","volume":"72","author":"Chai","year":"2023","journal-title":"IEEE Trans. Veh. Technol."},{"key":"10.1016\/j.jnca.2026.104457_b7","first-page":"3571","article-title":"Offloading in mobile edge computing: Task allocation and computational frequency scaling","volume":"65","author":"Dinh","year":"2017","journal-title":"IEEE Trans. Commun."},{"key":"10.1016\/j.jnca.2026.104457_b8","series-title":"GLOBECOM 2024 - 2024 IEEE Global Communications Conference","first-page":"3334","article-title":"Dta-rl: Dynamic topology adaptive reinforcement learning approach for task offloading in mobile edge computing","author":"Fu","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b9","doi-asserted-by":"crossref","first-page":"168","DOI":"10.1016\/j.future.2021.09.007","article-title":"A q-learning approach for the autoscaling of scientific workflows in the cloud","volume":"127","author":"Gar\u00ed","year":"2022","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.jnca.2026.104457_b10","doi-asserted-by":"crossref","DOI":"10.1109\/JIOT.2025.3617349","article-title":"Explainable multi-agent deep reinforcement learning for joint task offloading and resource allocation in distance and channel-aware noma vehicular edge networks","author":"Hu","year":"2025","journal-title":"IEEE Internet Things J."},{"key":"10.1016\/j.jnca.2026.104457_b11","first-page":"15931","article-title":"Learning to utilize shaping rewards: A new approach of reward shaping","volume":"33","author":"Hu","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jnca.2026.104457_b12","doi-asserted-by":"crossref","first-page":"1029","DOI":"10.1109\/JSAC.2024.3365899","article-title":"Joint offloading and resource allocation for hybrid cloud and edge computing in sagins: A decision assisted hybrid action space deep reinforcement learning approach","volume":"42","author":"Huang","year":"2024","journal-title":"IEEE J. Sel. Areas Commun."},{"key":"10.1016\/j.jnca.2026.104457_b13","unstructured":"Jeon, M., Venkataraman, S., Phanishayee, A., Qian, J., Xiao, W., Yang, F., 2019. Analysis of large-scale multi-tenant gpu clusters for dnn training workloads. In: 2019 USENIX Annual Technical Conference (USENIX ATC 19). pp. 947\u2013960."},{"key":"10.1016\/j.jnca.2026.104457_b14","series-title":"Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10\u201315, 2024","first-page":"1725","article-title":"D-llm: A token adaptive computing resource allocation strategy for large language models","volume":"vol. 37","author":"Jiang","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b15","series-title":"International Conference on Machine Learning","first-page":"4870","article-title":"Reward-free exploration for reinforcement learning","author":"Jin","year":"2020"},{"key":"10.1016\/j.jnca.2026.104457_b16","series-title":"Scaling laws for neural language models","author":"Kaplan","year":"2020"},{"key":"10.1016\/j.jnca.2026.104457_b17","series-title":"A survey of reinforcement learning from human feedback","author":"Kaufmann","year":"2023"},{"key":"10.1016\/j.jnca.2026.104457_b18","doi-asserted-by":"crossref","first-page":"3073","DOI":"10.1109\/TPDS.2023.3313779","article-title":"Task placement and resource allocation for edge machine learning: A gnn-based multi-agent reinforcement learning paradigm","volume":"34","author":"Li","year":"2023","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"10.1016\/j.jnca.2026.104457_b19","series-title":"Deepseek-v3 technical report","author":"Liu","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b20","series-title":"Proceedings of the Twenty-Fifth International Symposium on Theory, Algorithmic Foundations, and Protocol Design for Mobile Networks and Mobile Computing, MobiHoc \u201924","first-page":"81","article-title":"Resource allocation for stable llm training in mobile edge computing","author":"Liu","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b21","doi-asserted-by":"crossref","first-page":"6160","DOI":"10.3390\/s25196160","article-title":"Multi-agent deep reinforcement learning for joint task offloading and resource allocation in iiot with dynamic priorities","volume":"25","author":"Ma","year":"2025","journal-title":"Sensors"},{"key":"10.1016\/j.jnca.2026.104457_b22","doi-asserted-by":"crossref","first-page":"3590","DOI":"10.1109\/JSAC.2016.2611964","article-title":"Dynamic computation offloading for mobile-edge computing with energy harvesting devices","volume":"34","author":"Mao","year":"2016","journal-title":"IEEE J. Sel. Areas Commun."},{"key":"10.1016\/j.jnca.2026.104457_b23","series-title":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2, ASPLOS \u201924","first-page":"369","article-title":"Exegpt: Constraint-aware resource scheduling for llm inference","author":"Oh","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b24","doi-asserted-by":"crossref","first-page":"945","DOI":"10.1007\/s10462-021-09997-9","article-title":"Reinforcement learning in robotic applications: a comprehensive survey","volume":"55","author":"Singh","year":"2022","journal-title":"Artif. Intell. Rev."},{"key":"10.1016\/j.jnca.2026.104457_b25","series-title":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2, ASPLOS \u201925","first-page":"1266","article-title":"Tapas: Thermal- and power-aware scheduling for llm inference in cloud platforms","author":"Stojkovic","year":"2025"},{"key":"10.1016\/j.jnca.2026.104457_b26","series-title":"A minimaximalist approach to reinforcement learning from human feedback","author":"Swamy","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b27","series-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.jnca.2026.104457_b28","doi-asserted-by":"crossref","unstructured":"Wang, Y., Chen, Y., Li, Z., Kang, X., Fang, Y., Zhou, Y., Zheng, Y., Tang, Z., He, X., Guo, R., et al., 2025. Burstgpt: A real-world workload dataset to optimize llm serving systems. In: Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V. 2. pp. 5831\u20135841.","DOI":"10.1145\/3711896.3737413"},{"key":"10.1016\/j.jnca.2026.104457_b29","first-page":"17816","article-title":"On reward-free reinforcement learning with linear function approximation","volume":"33","author":"Wang","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.jnca.2026.104457_b30","article-title":"Deep reinforcement learning based resource allocation strategy in cloud-edge computing system","volume":"10","author":"Xu","year":"2022","journal-title":"Front. Bioeng. Biotechnol."},{"key":"10.1016\/j.jnca.2026.104457_b31","series-title":"A survey of resource-efficient llm and multimodal foundation models","author":"Xu","year":"2024"},{"key":"10.1016\/j.jnca.2026.104457_b32","doi-asserted-by":"crossref","DOI":"10.1145\/3649506","article-title":"Harnessing the power of llms in practice: A survey on chatgpt and beyond","volume":"18","author":"Yang","year":"2024","journal-title":"ACM Trans. Knowl. Discov. Data"},{"key":"10.1016\/j.jnca.2026.104457_b33","doi-asserted-by":"crossref","unstructured":"Yu, T., Yao, Y., Zhang, H., He, T., Han, Y., Cui, G., Hu, J., Liu, Z., Zheng, H.-T., Sun, M., et al., 2024. Rlhf-v: Towards trustworthy mllms via behavior alignment from fine-grained correctional human feedback. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 13807\u201313816.","DOI":"10.1109\/CVPR52733.2024.01310"},{"key":"10.1016\/j.jnca.2026.104457_b34","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s00607-025-01472-5","article-title":"Task offloading and resource allocation for multi-uav asset edge computing with multi-agent deep reinforcement learning","volume":"107","author":"Zakaryia","year":"2025","journal-title":"Computing"},{"key":"10.1016\/j.jnca.2026.104457_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.comnet.2025.111222","article-title":"Joint task offloading and computing resource allocation with dqn for task-dependency in multi-access edge computing","volume":"263","author":"Zhai","year":"2025","journal-title":"Comput. Netw."},{"key":"10.1016\/j.jnca.2026.104457_b36","doi-asserted-by":"crossref","first-page":"883","DOI":"10.1109\/TC.2020.2969148","article-title":"A deep reinforcement learning based offloading game in edge computing","volume":"69","author":"Zhan","year":"2020","journal-title":"IEEE Trans. Comput."},{"key":"10.1016\/j.jnca.2026.104457_b37","doi-asserted-by":"crossref","first-page":"9763","DOI":"10.1109\/JIOT.2020.3040768","article-title":"Multiagent deep reinforcement learning for vehicular computation offloading in iot","volume":"8","author":"Zhu","year":"2021","journal-title":"IEEE Internet Things J."}],"container-title":["Journal of Network and Computer Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1084804526000329?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1084804526000329?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T04:04:31Z","timestamp":1777608271000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1084804526000329"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5]]},"references-count":37,"alternative-id":["S1084804526000329"],"URL":"https:\/\/doi.org\/10.1016\/j.jnca.2026.104457","relation":{},"ISSN":["1084-8045"],"issn-type":[{"value":"1084-8045","type":"print"}],"subject":[],"published":{"date-parts":[[2026,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Learning reward functions via GNNs for multi-agent task placement in edge\u2013cloud LLM services","name":"articletitle","label":"Article Title"},{"value":"Journal of Network and Computer Applications","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.jnca.2026.104457","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"104457"}}