{"id":"https://openalex.org/W4390873628","doi":"https://doi.org/10.1109/iccv51070.2023.01971","title":"Visually-Prompted Language Model for Fine-Grained Scene Graph Generation in an Open World","display_name":"Visually-Prompted Language Model for Fine-Grained Scene Graph Generation in an Open World","publication_year":2023,"publication_date":"2023-10-01","ids":{"openalex":"https://openalex.org/W4390873628","doi":"https://doi.org/10.1109/iccv51070.2023.01971"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51070.2023.01971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51070.2023.01971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103128533","display_name":"Qifan Yu","orcid":"https://orcid.org/0000-0003-0029-5622"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qifan Yu","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100648014","display_name":"Juncheng Li","orcid":"https://orcid.org/0000-0001-7314-6754"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Juncheng Li","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100324098","display_name":"Yu Wu","orcid":"https://orcid.org/0000-0002-1680-8253"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wu","raw_affiliation_strings":["Wuhan University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan University","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063062444","display_name":"Siliang Tang","orcid":"https://orcid.org/0000-0002-7356-9711"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siliang Tang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056318151","display_name":"Wei Ji","orcid":"https://orcid.org/0000-0003-4059-5902"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wei Ji","raw_affiliation_strings":["National University of Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008666077","display_name":"Yueting Zhuang","orcid":"https://orcid.org/0000-0001-9017-2508"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yueting Zhuang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103128533"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":2.7839,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.92538228,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"21503","last_page":"21514"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8129769563674927},{"id":"https://openalex.org/keywords/predicate","display_name":"Predicate (mathematical logic)","score":0.7358989715576172},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6023361682891846},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5460385084152222},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.541240930557251},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5129963755607605},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4636150002479553},{"id":"https://openalex.org/keywords/epic","display_name":"EPIC","score":0.4364731013774872},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3702598810195923},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35394051671028137},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3349464535713196},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20329618453979492},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.11610078811645508}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8129769563674927},{"id":"https://openalex.org/C140146324","wikidata":"https://www.wikidata.org/wiki/Q1144319","display_name":"Predicate (mathematical logic)","level":2,"score":0.7358989715576172},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6023361682891846},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5460385084152222},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.541240930557251},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5129963755607605},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4636150002479553},{"id":"https://openalex.org/C115519274","wikidata":"https://www.wikidata.org/wiki/Q267903","display_name":"EPIC","level":2,"score":0.4364731013774872},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3702598810195923},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35394051671028137},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3349464535713196},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20329618453979492},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.11610078811645508},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccv51070.2023.01971","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51070.2023.01971","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2194775991","https://openalex.org/W2209100941","https://openalex.org/W2277195237","https://openalex.org/W2479423890","https://openalex.org/W2579549467","https://openalex.org/W2842511635","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2962785943","https://openalex.org/W2963518342","https://openalex.org/W2963536419","https://openalex.org/W2963936013","https://openalex.org/W2963938081","https://openalex.org/W2985775525","https://openalex.org/W3034910302","https://openalex.org/W3035017890","https://openalex.org/W3035497460","https://openalex.org/W3081642947","https://openalex.org/W3094502228","https://openalex.org/W3098267758","https://openalex.org/W3105580002","https://openalex.org/W3106328333","https://openalex.org/W3155644662","https://openalex.org/W3166596953","https://openalex.org/W3172642864","https://openalex.org/W3173271937","https://openalex.org/W3174770825","https://openalex.org/W3174966920","https://openalex.org/W3181556077","https://openalex.org/W3182635745","https://openalex.org/W3183719433","https://openalex.org/W3191092153","https://openalex.org/W3198377975","https://openalex.org/W3201890160","https://openalex.org/W3202514640","https://openalex.org/W3202712981","https://openalex.org/W3202778561","https://openalex.org/W3205051885","https://openalex.org/W3215287157","https://openalex.org/W4200498145","https://openalex.org/W4214693531","https://openalex.org/W4214942454","https://openalex.org/W4225868495","https://openalex.org/W4285200267","https://openalex.org/W4285247752","https://openalex.org/W4285300583","https://openalex.org/W4286889413","https://openalex.org/W4287113019","https://openalex.org/W4290058858","https://openalex.org/W4297808394","https://openalex.org/W4304080274","https://openalex.org/W4312245888","https://openalex.org/W4312429751","https://openalex.org/W4312578903","https://openalex.org/W4312682661","https://openalex.org/W4312873085","https://openalex.org/W4313037583","https://openalex.org/W4318718936","https://openalex.org/W4375928968","https://openalex.org/W4377864736","https://openalex.org/W4390873802","https://openalex.org/W6620707391","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6784333009","https://openalex.org/W6789753369","https://openalex.org/W6790019176","https://openalex.org/W6790634876","https://openalex.org/W6791353385","https://openalex.org/W6797716411","https://openalex.org/W6802960777","https://openalex.org/W6841299521","https://openalex.org/W6849177959"],"related_works":["https://openalex.org/W2151979312","https://openalex.org/W1480231236","https://openalex.org/W1488337530","https://openalex.org/W2169393985","https://openalex.org/W2606632758","https://openalex.org/W3094193311","https://openalex.org/W4200333915","https://openalex.org/W4300961093","https://openalex.org/W1505520868","https://openalex.org/W2966204614"],"abstract_inverted_index":{"Scene":[0],"Graph":[1],"Generation":[2],"(SGG)":[3],"aims":[4],"to":[5,36,40,43,51,58,68,98,122,150],"extract":[6],"<subject,":[7],"predicate,":[8],"object>":[9],"relationships":[10],"in":[11,103,113,153,176],"images":[12],"for":[13,75,139,194],"vision":[14],"understanding.":[15],"Although":[16],"recent":[17],"works":[18],"have":[19],"made":[20],"steady":[21],"progress":[22],"on":[23,128,159,186],"SGG,":[24],"they":[25],"still":[26,66],"suffer":[27],"long-tail":[28],"distribution":[29],"issues":[30],"that":[31,164],"tail-predicates":[32],"are":[33,65,72,197],"more":[34],"costly":[35],"train":[37],"and":[38,78,117,192],"hard":[39],"distinguish":[41],"due":[42],"a":[44,85,92,104,114,133,154,177],"small":[45],"amount":[46],"of":[47,170],"annotated":[48],"data":[49,191],"compared":[50],"frequent":[52],"predicates.":[53],"Existing":[54],"re-balancing":[55],"strategies":[56],"try":[57],"handle":[59],"it":[60],"via":[61],"prior":[62],"rules":[63],"but":[64],"confined":[67],"pre-defined":[69],"conditions,":[70],"which":[71],"not":[73],"scalable":[74],"various":[76],"models":[77,147,175],"datasets.":[79],"In":[80],"this":[81,195],"paper,":[82],"we":[83,130],"propose":[84],"Cross-modal":[86],"prediCate":[87],"boosting":[88],"(CaCao)":[89],"framework,":[90],"where":[91,146],"visually-prompted":[93],"language":[94],"model":[95],"is":[96],"learned":[97],"generate":[99],"diverse":[100],"fine-grained":[101],"predicates":[102,152],"low-resource":[105],"way.":[106,179],"The":[107,190],"proposed":[108],"CaCao":[109,165],"can":[110,148],"be":[111],"applied":[112],"plug-and-play":[115],"fashion":[116],"automatically":[118],"strengthen":[119],"existing":[120],"SGG":[121],"tackle":[123],"the":[124,168],"long-tailed":[125],"problem.":[126],"Based":[127],"that,":[129],"further":[131],"introduce":[132],"novel":[134],"Entangled":[135],"cross-modal":[136],"prompt":[137],"approach":[138],"open-world":[140,187],"predicate":[141,188],"scene":[142,172],"graph":[143,173],"generation":[144,174],"(Epic),":[145],"generalize":[149],"unseen":[151],"zero-shot":[155],"manner.":[156],"Comprehensive":[157],"experiments":[158],"three":[160],"benchmark":[161],"datasets":[162],"show":[163],"consistently":[166],"boosts":[167],"performance":[169,185],"multiple":[171],"model-agnostic":[178],"Moreover,":[180],"our":[181],"Epic":[182],"achieves":[183],"competitive":[184],"prediction.":[189],"code":[193],"paper":[196],"publicly":[198],"available.":[199],"<sup":[200],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[201],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[202]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
