{"id":"https://openalex.org/W4384891727","doi":"https://doi.org/10.1145/3539618.3591648","title":"Causal Decision Transformer for Recommender Systems via Offline Reinforcement Learning","display_name":"Causal Decision Transformer for Recommender Systems via Offline Reinforcement Learning","publication_year":2023,"publication_date":"2023-07-18","ids":{"openalex":"https://openalex.org/W4384891727","doi":"https://doi.org/10.1145/3539618.3591648"},"language":"en","primary_location":{"id":"doi:10.1145/3539618.3591648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100358930","display_name":"Siyu Wang","orcid":"https://orcid.org/0009-0008-8726-5277"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Siyu Wang","raw_affiliation_strings":["The University of New South Wales, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0009-0008-8726-5277","affiliations":[{"raw_affiliation_string":"The University of New South Wales, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057487706","display_name":"Xiaocong Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xiaocong Chen","raw_affiliation_strings":["The University of New South Wales, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-8849-4943","affiliations":[{"raw_affiliation_string":"The University of New South Wales, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005545405","display_name":"Dietmar Jannach","orcid":"https://orcid.org/0000-0002-4698-8507"},"institutions":[{"id":"https://openalex.org/I4210166741","display_name":"University of Klagenfurt","ror":"https://ror.org/05q9m0937","country_code":"AT","type":"education","lineage":["https://openalex.org/I4210166741"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Dietmar Jannach","raw_affiliation_strings":["University of Klagenfurt, Klagenfurt, Austria"],"raw_orcid":"https://orcid.org/0000-0002-4698-8507","affiliations":[{"raw_affiliation_string":"University of Klagenfurt, Klagenfurt, Austria","institution_ids":["https://openalex.org/I4210166741"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052731721","display_name":"Lina Yao","orcid":"https://orcid.org/0000-0002-4149-839X"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Lina Yao","raw_affiliation_strings":["Data61, CSIRO &amp; The University of New South Wales, Eveleighm NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-4149-839X","affiliations":[{"raw_affiliation_string":"Data61, CSIRO &amp; The University of New South Wales, Eveleighm NSW, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I31746571","https://openalex.org/I1292875679"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100358930"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":13.1495,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.98733639,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1599","last_page":"1608"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9807999730110168,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8688185214996338},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8221942782402039},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.7525612115859985},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6481739282608032},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6039870381355286},{"id":"https://openalex.org/keywords/popularity","display_name":"Popularity","score":0.4957357347011566},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.49163132905960083},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.46771636605262756},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.4564199447631836},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32093167304992676},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.23013702034950256},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09393134713172913}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8688185214996338},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8221942782402039},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.7525612115859985},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6481739282608032},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6039870381355286},{"id":"https://openalex.org/C2780586970","wikidata":"https://www.wikidata.org/wiki/Q1357284","display_name":"Popularity","level":2,"score":0.4957357347011566},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.49163132905960083},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.46771636605262756},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.4564199447631836},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32093167304992676},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.23013702034950256},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09393134713172913},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3539618.3591648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3539618.3591648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2155912844","https://openalex.org/W2612690371","https://openalex.org/W2788295351","https://openalex.org/W2799544270","https://openalex.org/W2893359107","https://openalex.org/W2950275995","https://openalex.org/W2963367478","https://openalex.org/W2963619374","https://openalex.org/W2963654596","https://openalex.org/W2984100107","https://openalex.org/W2994850640","https://openalex.org/W3005071803","https://openalex.org/W3017289192","https://openalex.org/W3033324992","https://openalex.org/W3065542300","https://openalex.org/W3100260481","https://openalex.org/W3102778384","https://openalex.org/W3102899483","https://openalex.org/W3132607382","https://openalex.org/W3159587374","https://openalex.org/W3208231292","https://openalex.org/W4212774754","https://openalex.org/W4214717370","https://openalex.org/W4318159260","https://openalex.org/W6600007113"],"related_works":["https://openalex.org/W2368605798","https://openalex.org/W4225619808","https://openalex.org/W4388926065","https://openalex.org/W4386160446","https://openalex.org/W4387545330","https://openalex.org/W4226042081","https://openalex.org/W4284974072","https://openalex.org/W4376223516","https://openalex.org/W4283712691","https://openalex.org/W4308935744"],"abstract_inverted_index":{"Reinforcement":[0],"learning-based":[1],"recommender":[2,156],"systems":[3,157],"have":[4,128,222],"recently":[5],"gained":[6],"popularity.":[7],"However,":[8],"the":[9,12,17,30,37,40,45,49,57,69,84,93,101,151,181,200,204,214],"design":[10],"of":[11,39,52,60,68,100,103,187,218],"reward":[13,41],"function,":[14],"on":[15,225],"which":[16,184],"agent":[18,46],"relies":[19],"to":[20,47,56,82,115,136,202],"optimize":[21],"its":[22],"recommendation":[23],"policy,":[24],"is":[25,160,185],"often":[26],"not":[27,127],"straightforward.":[28],"Exploring":[29],"causality":[31],"underlying":[32],"users'":[33],"behavior":[34],"can":[35,168],"take":[36],"place":[38],"function":[42],"in":[43,75],"guiding":[44],"capture":[48],"dynamic":[50],"interests":[51],"users.":[53],"Moreover,":[54,178],"due":[55],"typical":[58],"limitations":[59],"simulation":[61],"environments":[62],"(e.g.,":[63],"data":[64,90,114,201],"ineffi-":[65],"ciency),":[66],"most":[67],"work":[70],"cannot":[71,111],"be":[72],"broadly":[73],"applied":[74],"large-scale":[76],"situations.":[77],"Although":[78],"some":[79],"works":[80],"attempt":[81],"convert":[83],"offline":[85,139,162,190,228],"dataset":[86,172],"into":[87],"a":[88,118,129,147,171],"simulator,":[89],"inefficiency":[91],"makes":[92],"learning":[94,105,107,124,134,165],"pro-":[95],"cess":[96],"even":[97],"slower.":[98],"Because":[99],"nature":[102],"reinforcement":[104,123],"(i.e.,":[106],"by":[108],"interaction),":[109],"it":[110],"collect":[112],"enough":[113],"train":[116],"during":[117],"single":[119],"interaction.":[120,177],"Furthermore,":[121],"traditional":[122],"algorithms":[125],"do":[126],"solid":[130],"capability":[131],"like":[132],"supervised":[133],"methods":[135],"learn":[137,169],"from":[138,170,175],"datasets":[140,191,229],"directly.":[141],"In":[142],"this":[143],"paper,":[144],"we":[145,221],"propose":[146],"new":[148],"model":[149],"named":[150],"causal":[152,205],"decision":[153],"transformer":[154,182],"for":[155],"(CDT4Rec).":[158],"CDT4Rec":[159,179],"an":[161],"reinforce-":[163],"ment":[164],"system":[166],"that":[167],"rather":[173],"than":[174],"online":[176,232],"employs":[180],"architecture,":[183],"capable":[186],"processing":[188],"large":[189],"and":[192,196,210,216,230],"capturing":[193],"both":[194],"short-term":[195],"long-term":[197],"dependencies":[198],"within":[199],"estimate":[203],"relationship":[206],"between":[207],"action,":[208],"state,":[209],"reward.":[211],"To":[212],"demonstrate":[213],"feasibility":[215],"superiority":[217],"our":[219],"model,":[220],"conducted":[223],"experiments":[224],"six":[226],"real-world":[227],"one":[231],"simulator.":[233]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":5}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
