{"id":"https://openalex.org/W4387934928","doi":"https://doi.org/10.1109/tpami.2023.3327677","title":"Learning Hierarchical Modular Networks for Video Captioning","display_name":"Learning Hierarchical Modular Networks for Video Captioning","publication_year":2023,"publication_date":"2023-10-25","ids":{"openalex":"https://openalex.org/W4387934928","doi":"https://doi.org/10.1109/tpami.2023.3327677","pmid":"https://pubmed.ncbi.nlm.nih.gov/37878438"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2023.3327677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3327677","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100775216","display_name":"Guorong Li","orcid":"https://orcid.org/0000-0003-3954-2387"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guorong Li","raw_affiliation_strings":["School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-3954-2387","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045051320","display_name":"Hanhua Ye","orcid":"https://orcid.org/0009-0006-0824-6306"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanhua Ye","raw_affiliation_strings":["School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-0824-6306","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070842891","display_name":"Yuankai Qi","orcid":"https://orcid.org/0000-0003-4312-5682"},"institutions":[{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]},{"id":"https://openalex.org/I5681781","display_name":"The University of Adelaide","ror":"https://ror.org/00892tw58","country_code":"AU","type":"education","lineage":["https://openalex.org/I5681781"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yuankai Qi","raw_affiliation_strings":["Australian Institute for Machine Learning, The University of Adelaide, Adelaide, SA, Australia"],"raw_orcid":"https://orcid.org/0000-0003-4312-5682","affiliations":[{"raw_affiliation_string":"Australian Institute for Machine Learning, The University of Adelaide, Adelaide, SA, Australia","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I5681781"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669242","display_name":"Shuhui Wang","orcid":"https://orcid.org/0000-0002-5931-0527"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuhui Wang","raw_affiliation_strings":["Key Laboratory of Intelligent Information Processing, Institute of Computer Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5931-0527","affiliations":[{"raw_affiliation_string":"Key Laboratory of Intelligent Information Processing, Institute of Computer Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010557705","display_name":"Laiyun Qing","orcid":"https://orcid.org/0000-0001-9923-5034"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Laiyun Qing","raw_affiliation_strings":["School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9923-5034","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028597017","display_name":"Qingming Huang","orcid":"https://orcid.org/0000-0001-7542-296X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingming Huang","raw_affiliation_strings":["School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7542-296X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Key Lab of Big Data Mining and Knowledge Management, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100418319","display_name":"Ming\u2013Hsuan Yang","orcid":"https://orcid.org/0000-0003-4848-2304"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]},{"id":"https://openalex.org/I193775966","display_name":"Yonsei University","ror":"https://ror.org/01wjejq96","country_code":"KR","type":"education","lineage":["https://openalex.org/I193775966"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Ming-Hsuan Yang","raw_affiliation_strings":["University of California at Merced, Merced, CA, USA","Google","University of California at Merced, Yonsei University, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-4848-2304","affiliations":[{"raw_affiliation_string":"University of California at Merced, Merced, CA, USA","institution_ids":["https://openalex.org/I156087764"]},{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"University of California at Merced, Yonsei University, South Korea","institution_ids":["https://openalex.org/I193775966"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100775216"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":3.7118,"has_fulltext":false,"cited_by_count":32,"citation_normalized_percentile":{"value":0.94829946,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"46","issue":"2","first_page":"1049","last_page":"1064"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9348200559616089},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7814736366271973},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.744754433631897},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6171364188194275},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36406809091567993},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3425721824169159},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2107195258140564},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10311582684516907}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9348200559616089},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7814736366271973},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.744754433631897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6171364188194275},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36406809091567993},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3425721824169159},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2107195258140564},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10311582684516907}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2023.3327677","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3327677","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:37878438","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37878438","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6899999976158142}],"awards":[{"id":"https://openalex.org/G4199693610","display_name":null,"funder_award_id":"62236008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5169889009","display_name":null,"funder_award_id":"KGFZD-145-23-18","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G5327851226","display_name":null,"funder_award_id":"62022083","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5967673014","display_name":null,"funder_award_id":"61976069","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6212863906","display_name":null,"funder_award_id":"U21B2038","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7528510259","display_name":null,"funder_award_id":"61836002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7801426869","display_name":null,"funder_award_id":"61931008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8752815154","display_name":null,"funder_award_id":"62272438","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":91,"referenced_works":["https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1601567445","https://openalex.org/W1956340063","https://openalex.org/W2049870103","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2117539524","https://openalex.org/W2139501017","https://openalex.org/W2152984213","https://openalex.org/W2222512263","https://openalex.org/W2277195237","https://openalex.org/W2425121537","https://openalex.org/W2549139847","https://openalex.org/W2554906389","https://openalex.org/W2584992898","https://openalex.org/W2607119937","https://openalex.org/W2619947201","https://openalex.org/W2621571501","https://openalex.org/W2766375149","https://openalex.org/W2887585070","https://openalex.org/W2896457183","https://openalex.org/W2905145027","https://openalex.org/W2948358897","https://openalex.org/W2949376505","https://openalex.org/W2950579554","https://openalex.org/W2950635152","https://openalex.org/W2951390634","https://openalex.org/W2962681491","https://openalex.org/W2962934715","https://openalex.org/W2962990649","https://openalex.org/W2963084599","https://openalex.org/W2963177403","https://openalex.org/W2963843052","https://openalex.org/W2963971014","https://openalex.org/W2964241990","https://openalex.org/W2965373594","https://openalex.org/W2970641574","https://openalex.org/W2974212192","https://openalex.org/W2984862483","https://openalex.org/W2989322838","https://openalex.org/W3009192917","https://openalex.org/W3034221024","https://openalex.org/W3035365026","https://openalex.org/W3035392611","https://openalex.org/W3035588244","https://openalex.org/W3093309253","https://openalex.org/W3096609285","https://openalex.org/W3103022576","https://openalex.org/W3105136412","https://openalex.org/W3107069568","https://openalex.org/W3107848485","https://openalex.org/W3152079230","https://openalex.org/W3176425931","https://openalex.org/W3193767255","https://openalex.org/W3205021045","https://openalex.org/W3205276578","https://openalex.org/W3206064582","https://openalex.org/W3209229003","https://openalex.org/W3216659302","https://openalex.org/W4211103897","https://openalex.org/W4214617019","https://openalex.org/W4214692497","https://openalex.org/W4220790454","https://openalex.org/W4304084115","https://openalex.org/W4312372711","https://openalex.org/W4312938887","https://openalex.org/W4313065316","https://openalex.org/W4383604614","https://openalex.org/W4385245566","https://openalex.org/W4386066385","https://openalex.org/W4386083024","https://openalex.org/W4394659899","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6684090549","https://openalex.org/W6685322675","https://openalex.org/W6726873649","https://openalex.org/W6729805386","https://openalex.org/W6766673545","https://openalex.org/W6791353385","https://openalex.org/W6802680840","https://openalex.org/W6803537622","https://openalex.org/W6810738896","https://openalex.org/W6811013733","https://openalex.org/W6838329711","https://openalex.org/W6855094854","https://openalex.org/W6864544085","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Video":[0],"captioning":[1],"aims":[2],"to":[3,63,88,109,141],"generate":[4],"natural":[5],"language":[6],"descriptions":[7],"for":[8],"a":[9,59,98],"given":[10],"video":[11,65,93,136],"clip.":[12],"Existing":[13],"methods":[14],"mainly":[15],"focus":[16],"on":[17,103,128],"end-to-end":[18],"representation":[19],"learning":[20,100],"via":[21],"word-by-word":[22],"comparison":[23],"between":[24,42],"predicted":[25],"captions":[26,108],"and":[27,44,67,79,144],"ground-truth":[28],"texts.":[29],"Although":[30],"significant":[31],"progress":[32],"has":[33],"been":[34],"made,":[35],"such":[36],"supervised":[37],"approaches":[38],"neglect":[39],"semantic":[40],"alignment":[41],"visual":[43],"linguistic":[45,68],"entities,":[46],"which":[47],"may":[48],"negatively":[49],"affect":[50],"the":[51,104,119,125],"generated":[52],"captions.":[53],"In":[54],"this":[55],"work,":[56],"we":[57,96],"propose":[58],"hierarchical":[60],"modular":[61],"network":[62],"bridge":[64],"representations":[66],"semantics":[69,91],"at":[70],"four":[71],"granularities":[72],"before":[73],"generating":[74],"captions:":[75],"entity,":[76],"verb,":[77],"predicate,":[78],"sentence.":[80],"Each":[81],"level":[82],"is":[83],"implemented":[84],"by":[85],"one":[86],"module":[87,101],"embed":[89],"corresponding":[90],"into":[92],"representations.":[94],"Additionally,":[95],"present":[97],"reinforcement":[99],"based":[102],"scene":[105],"graph":[106],"of":[107],"better":[110],"measure":[111],"sentence":[112],"similarity.":[113],"Extensive":[114],"experimental":[115],"results":[116],"show":[117],"that":[118],"proposed":[120],"method":[121],"performs":[122],"favorably":[123],"against":[124],"state-of-the-art":[126],"models":[127],"three":[129],"widely-used":[130],"benchmark":[131],"datasets,":[132],"including":[133],"microsoft":[134],"research":[135],"description":[137],"corpus":[138],"(MSVD),":[139],"MSR-video":[140],"text":[142],"(MSR-VTT),":[143],"video-and-TEXt":[145],"(VATEX).":[146]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":6}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
