{"id":"https://openalex.org/W4413155144","doi":"https://doi.org/10.1109/cvpr52734.2025.02285","title":"Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene","display_name":"Learning 4D Panoptic Scene Graph Generation from Rich 2D Visual Scene","publication_year":2025,"publication_date":"2025-06-10","ids":{"openalex":"https://openalex.org/W4413155144","doi":"https://doi.org/10.1109/cvpr52734.2025.02285"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52734.2025.02285","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.02285","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089517967","display_name":"Shengqiong Wu","orcid":"https://orcid.org/0000-0001-6192-1194"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Shengqiong Wu","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Fei","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hao Fei","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075948339","display_name":"Jingkang Yang","orcid":"https://orcid.org/0000-0002-9424-254X"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jingkang Yang","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029645676","display_name":"Xiangtai Li","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiangtai Li","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100648014","display_name":"Juncheng Li","orcid":"https://orcid.org/0000-0001-7314-6754"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juncheng Li","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042324027","display_name":"Hanwang Zhang","orcid":"https://orcid.org/0000-0001-7374-8739"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hanwang Zhang","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089404640","display_name":"Tat\u2010Seng Chua","orcid":"https://orcid.org/0000-0001-6097-7807"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tat-Seng Chua","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5089517967"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":7.0966,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.97175593,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"24539","last_page":"24549"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9674000144004822,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.96670001745224,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12698","display_name":"3D Modeling in Geospatial Applications","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7396665215492249},{"id":"https://openalex.org/keywords/panopticon","display_name":"Panopticon","score":0.7261766791343689},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6160796880722046},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5676714181900024},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5528044700622559},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.46232226490974426},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3823249936103821},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.14338204264640808},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.06736722588539124}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7396665215492249},{"id":"https://openalex.org/C138569888","wikidata":"https://www.wikidata.org/wiki/Q828310","display_name":"Panopticon","level":3,"score":0.7261766791343689},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6160796880722046},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5676714181900024},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5528044700622559},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.46232226490974426},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3823249936103821},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.14338204264640808},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.06736722588539124},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52734.2025.02285","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52734.2025.02285","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W2077069816","https://openalex.org/W2250378130","https://openalex.org/W2277195237","https://openalex.org/W2469807079","https://openalex.org/W2765137706","https://openalex.org/W2951323451","https://openalex.org/W2963351448","https://openalex.org/W2963518342","https://openalex.org/W2963536419","https://openalex.org/W2963938081","https://openalex.org/W2969679616","https://openalex.org/W3034679267","https://openalex.org/W3035154952","https://openalex.org/W3108601100","https://openalex.org/W3184370138","https://openalex.org/W3201890160","https://openalex.org/W4214520160","https://openalex.org/W4312471667","https://openalex.org/W4312563197","https://openalex.org/W4312680532","https://openalex.org/W4312936847","https://openalex.org/W4366352717","https://openalex.org/W4385569940","https://openalex.org/W4385570514","https://openalex.org/W4385572347","https://openalex.org/W4386066401","https://openalex.org/W4386071707","https://openalex.org/W4386071767","https://openalex.org/W4386083046","https://openalex.org/W4387969604","https://openalex.org/W4394625817","https://openalex.org/W4401043375","https://openalex.org/W4401070841","https://openalex.org/W4401537510","https://openalex.org/W4401634890","https://openalex.org/W4402703022","https://openalex.org/W4402704531","https://openalex.org/W4402715983","https://openalex.org/W4402726938","https://openalex.org/W4402727514","https://openalex.org/W4402727824","https://openalex.org/W4402733565","https://openalex.org/W4402753599","https://openalex.org/W4402753940","https://openalex.org/W4402754220","https://openalex.org/W4402754270","https://openalex.org/W4402915938","https://openalex.org/W4403791241","https://openalex.org/W4404792503"],"related_works":["https://openalex.org/W2921107741","https://openalex.org/W2197002326","https://openalex.org/W2494728058","https://openalex.org/W3204968380","https://openalex.org/W1710116222","https://openalex.org/W2800383628","https://openalex.org/W2345320341","https://openalex.org/W2366518132","https://openalex.org/W4392007279","https://openalex.org/W4387129494"],"abstract_inverted_index":{"The":[0,182],"latest":[1],"emerged":[2],"4D":[3,17,77,84,150],"Panoptic":[4],"Scene":[5],"Graph":[6],"(4D-PSG)":[7],"provides":[8],"an":[9],"advanced-ever":[10],"representation":[11],"for":[12,65,95,154],"comprehensively":[13],"modeling":[14],"the":[15,37,42,46,162,177],"dynamic":[16],"visual":[18,72,129],"real":[19],"world.":[20],"Unfortunately,":[21],"current":[22],"pioneering":[23],"4D-PSG":[24,66],"research":[25],"can":[26,50],"primarily":[27],"suffer":[28],"from":[29,144],"data":[30,155,164],"scarcity":[31,156],"issues":[32],"severely,":[33],"as":[34,36],"well":[35],"resulting":[38],"out-of-vocabulary":[39],"problems;":[40],"also,":[41],"pipeline":[43],"nature":[44],"of":[45,98,179],"benchmark":[47,163],"generation":[48,67,97],"method":[49],"lead":[51],"to":[52,75,108,113,149],"suboptimal":[53],"performance.":[54],"To":[55],"address":[56],"these":[57],"challenges,":[58],"this":[59],"paper":[60],"investigates":[61],"a":[62,83,91,127,135,173],"novel":[63],"framework":[64],"that":[68,166],"leverages":[69],"rich":[70],"2D":[71,146],"scene":[73,78,130,137],"annotations":[74,148],"enhance":[76],"learning.":[79],"First,":[80],"we":[81,125,167],"introduce":[82],"Large":[85],"Language":[86],"Model":[87],"(4D-LLM)":[88],"integrated":[89],"with":[90],"3D":[92],"mask":[93],"decoder":[94],"end-to-end":[96],"4D-PSG.":[99,158],"A":[100],"chained":[101],"SG":[102,147],"inference":[103],"mechanism":[104],"is":[105,185],"further":[106],"designed":[107],"exploit":[109],"LLMs\u2019":[110],"open-vocabulary":[111],"capabilities":[112],"infer":[114],"accurate":[115],"and":[116,119],"comprehensive":[117],"object":[118],"relation":[120],"labels":[121],"iteratively.":[122],"Most":[123],"importantly,":[124],"propose":[126],"2D-to-4D":[128],"transfer":[131],"learning":[132],"framework,":[133],"where":[134],"spatial-temporal":[136],"transcending":[138],"strategy":[139],"effectively":[140,152],"transfers":[141],"dimension-invariant":[142],"features":[143],"abundant":[145],"scenes,":[151],"compensating":[153],"in":[157],"Extensive":[159],"experiments":[160],"on":[161],"demonstrate":[165],"strikingly":[168],"outperform":[169],"baseline":[170],"models":[171],"by":[172],"large":[174],"margin,":[175],"highlighting":[176],"effectiveness":[178],"our":[180],"method.":[181],"project":[183],"page":[184],"https://sqwu.top/PSG-4D-LLM/.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
