{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T15:49:40Z","timestamp":1777477780423,"version":"3.51.4"},"reference-count":33,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2017,5,1]],"date-time":"2017-05-01T00:00:00Z","timestamp":1493596800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"funder":[{"name":"National Hi-Tech Research and Development Program of China","award":["2014AA015102"],"award-info":[{"award-number":["2014AA015102"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61371128"],"award-info":[{"award-number":["61371128"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["61532005"],"award-info":[{"award-number":["61532005"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neurocomputing"],"published-print":{"date-parts":[[2017,5]]},"DOI":"10.1016\/j.neucom.2016.07.067","type":"journal-article","created":{"date-parts":[[2016,11,18]],"date-time":"2016-11-18T07:01:19Z","timestamp":1479452479000},"page":"123-133","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":2,"special_numbering":"C","title":["Cross-media retrieval by exploiting fine-grained correlation at entity level"],"prefix":"10.1016","volume":"236","author":[{"given":"Lei","family":"Huang","sequence":"first","affiliation":[]},{"given":"Yuxin","family":"Peng","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"5","key":"10.1016\/j.neucom.2016.07.067_bib1","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1016\/0306-4573(88)90021-0","article-title":"Term-weighting approaches in automatic text retrieval","volume":"24","author":"Salton","year":"1988","journal-title":"Inf. Process. Manag."},{"key":"10.1016\/j.neucom.2016.07.067_bib2","series-title":"Text-Based Intelligent Systems: Current Research and Practice in Information Extraction and Retrieval","author":"Jacobs","year":"2014"},{"issue":"12","key":"10.1016\/j.neucom.2016.07.067_bib3","doi-asserted-by":"crossref","first-page":"1349","DOI":"10.1109\/34.895972","article-title":"Content-based image retrieval at the end of the early years","volume":"22","author":"Smeulders","year":"2000","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2016.07.067_bib4","doi-asserted-by":"crossref","unstructured":"J. Deng, W. Dong, R. Socher, L. Li, K. Li, L. Fei-Fei, Imagenet: a large-scale hierarchical image database, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2009, pp. 248\u2013255. http:\/\/dx.doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"10.1016\/j.neucom.2016.07.067_bib5","doi-asserted-by":"crossref","unstructured":"Y. Jiang, C. Ngo, J. Yang, Towards optimal bag-of-features for object categorization and semantic video retrieval, in:\u00a0Proceedings of the 6th ACM International Conference on Image and Video Retrieval, 2007, pp. 494\u2013501. http:\/\/dx.doi.org\/10.1145\/1282280.1282352","DOI":"10.1145\/1282280.1282352"},{"issue":"2","key":"10.1016\/j.neucom.2016.07.067_bib6","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1145\/2457450.2457456","article-title":"Effective transfer tagging from image to video","volume":"9","author":"Yang","year":"2013","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"issue":"6","key":"10.1016\/j.neucom.2016.07.067_bib7","doi-asserted-by":"crossref","first-page":"345","DOI":"10.1007\/s00530-010-0182-0","article-title":"Multimodal fusion for multimedia analysis:\u00a0a survey","volume":"16","author":"Atrey","year":"2010","journal-title":"Multimed. Syst."},{"key":"10.1016\/j.neucom.2016.07.067_bib8","doi-asserted-by":"crossref","unstructured":"M. Guillaumin, T. Mensink, J. Verbeek, C. Schmid, Tagprop: Discriminative metric learning in nearest neighbor models for image auto-annotation, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2009, pp. 309\u2013316. http:\/\/dx.doi.org\/10.1109\/ICCV.2009.5459266","DOI":"10.1109\/ICCV.2009.5459266"},{"issue":"6","key":"10.1016\/j.neucom.2016.07.067_bib9","doi-asserted-by":"crossref","first-page":"1677","DOI":"10.1109\/TMM.2014.2323014","article-title":"Exploiting web images for semantic video indexing via robust sample-specific loss","volume":"16","author":"Yang","year":"2014","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2016.07.067_bib10","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1093\/biomet\/28.3-4.321","article-title":"Relations between two sets of variates","volume":"28","author":"Hotelling","year":"1936","journal-title":"Biometrika"},{"key":"10.1016\/j.neucom.2016.07.067_bib11","doi-asserted-by":"crossref","unstructured":"D. Li, N. Dimitrova, M. Li, I.K. Sethi, Multimedia content processing through cross-modal association, in:\u00a0Proceedings of the 11th ACM International Conference on Multimedia (ACM-MM), 2003, pp. 604\u2013611. http:\/\/dx.doi.org\/10.1145\/957013.957143","DOI":"10.1145\/957013.957143"},{"issue":"3","key":"10.1016\/j.neucom.2016.07.067_bib12","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1109\/TPAMI.2013.142","article-title":"On the role of correlation and abstraction in cross-modal multimedia retrieval","volume":"36","author":"Pereira","year":"2014","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.neucom.2016.07.067_bib13","doi-asserted-by":"crossref","unstructured":"A. Sharma, A. Kumar, H. Daume III, D.W. Jacobs, Generalized multiview analysis: a discriminative latent space, in: Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2012, pp. 2160\u20132167. http:\/\/dx.doi.org\/10.1109\/CVPR.2012.6247923","DOI":"10.1109\/CVPR.2012.6247923"},{"key":"10.1016\/j.neucom.2016.07.067_bib14","doi-asserted-by":"crossref","unstructured":"Y. Zhuang, Y.F. Wang, F. Wu, Y. Zhang, W.M. Lu, Supervised coupled dictionary learning with group structures for multi-modal retrieval, in:\u00a0Proceedings of the 27th AAAI Conference on Artificial Intelligence (AAAI), 2013, pp. 1070\u20131076.","DOI":"10.1609\/aaai.v27i1.8603"},{"key":"10.1016\/j.neucom.2016.07.067_bib15","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1016\/j.neucom.2012.03.033","article-title":"Fusing inherent and external knowledge with nonlinear learning for cross-media retrieval","volume":"119","author":"Zhang","year":"2013","journal-title":"Neurocomputing"},{"issue":"3","key":"10.1016\/j.neucom.2016.07.067_bib16","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1109\/TCSVT.2015.2400779","article-title":"Semi-supervised cross-media feature learning with unified patch graph regularization","volume":"26","author":"Peng","year":"2016","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.neucom.2016.07.067_bib17","doi-asserted-by":"crossref","first-page":"250","DOI":"10.1016\/j.neucom.2016.02.016","article-title":"Semantic consistency hashing for cross-modal retrieval","volume":"193","author":"Yao","year":"2016","journal-title":"Neurocomputing"},{"issue":"2","key":"10.1016\/j.neucom.2016.07.067_bib18","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1109\/TMM.2007.911822","article-title":"Mining semantic correlation of heterogeneous multimedia data for cross-media retrieval","volume":"10","author":"Zhuang","year":"2008","journal-title":"IEEE Trans. Multimed."},{"issue":"3","key":"10.1016\/j.neucom.2016.07.067_bib19","doi-asserted-by":"crossref","first-page":"437","DOI":"10.1109\/TMM.2008.917359","article-title":"Harmonizing hierarchical manifolds for multimedia document semantics understanding and cross-media retrieval","volume":"10","author":"Yang","year":"2008","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.neucom.2016.07.067_bib20","unstructured":"V. Mahadevan, C.W. Wong, J.C. Pereira, T. Liu, N. Vasconcelos, L. K. Saul, Maximum covariance unfolding: Manifold learning for bimodal data, in: Advances in Neural Information Processing Systems (NIPS), 2011, pp. 918\u2013926."},{"key":"10.1016\/j.neucom.2016.07.067_bib21","doi-asserted-by":"crossref","unstructured":"N. Rasiwasia, J. Costa Pereira, E. Coviello, G. Doyle, G.R. Lanckriet, R. Levy, N. Vasconcelos, A new approach to cross-modal multimedia retrieval, in:\u00a0Proceedings of the 18th ACM International Conference on Multimedia (ACM-MM), 2010, pp. 251\u2013260. http:\/\/dx.doi.org\/10.1145\/1873951.1873987","DOI":"10.1145\/1873951.1873987"},{"key":"10.1016\/j.neucom.2016.07.067_bib22","doi-asserted-by":"crossref","unstructured":"A. Farhadi, M. Hejrati, M.A. Sadeghi, P. Young, C. Rashtchian, J. Hockenmaier, D. Forsyth, Every picture tells a story: Generating sentences from images, in: Proceedings of European Conference on Computer Vision (ECCV), Springer, 2010, pp. 15\u201329. http:\/\/dx.doi.org\/10.1007\/978-3-642-15561-1_2","DOI":"10.1007\/978-3-642-15561-1_2"},{"key":"10.1016\/j.neucom.2016.07.067_bib23","doi-asserted-by":"crossref","unstructured":"L. Huang, Y. Peng, Cross-media retrieval via semantic entity projection, in: MultiMedia Modeling, 2016, pp. 276\u2013288. http:\/\/dx.doi.org\/10.1007\/978-3-319-27671-7_23","DOI":"10.1007\/978-3-319-27671-7_23"},{"key":"10.1016\/j.neucom.2016.07.067_bib24","doi-asserted-by":"crossref","unstructured":"V. Ranjan, N. Rasiwasia, C.V. Jawahar, Multi-label cross-modal retrieval, in: Proceedings of IEEE International Conference on Computer Vision (ICCV), 2015, pp. 4094\u20134102.","DOI":"10.1109\/ICCV.2015.466"},{"key":"10.1016\/j.neucom.2016.07.067_bib25","doi-asserted-by":"crossref","unstructured":"Y. Wang, F. Wu, J. Song, X. Li, Y. Zhuang, Multi-modal mutual topic reinforce modeling for cross-media retrieval, in:\u00a0Proceedings of the 22nd ACM international conference on Multimedia, ACM, 2014, pp. 307\u2013316. http:\/\/dx.doi.org\/10.1145\/2647868.2654901","DOI":"10.1145\/2647868.2654901"},{"key":"10.1016\/j.neucom.2016.07.067_bib26","doi-asserted-by":"crossref","unstructured":"L. Song, M. Luo, J. Liu, L. Zhang, B. Qian, M.H. Li, Q. Zheng, Sparse multi-modal topical coding for image annotation, Neurocomputing http:\/\/dx.doi.org\/10.1016\/j.neucom.2016.06.005","DOI":"10.1016\/j.neucom.2016.06.005"},{"key":"10.1016\/j.neucom.2016.07.067_bib27","doi-asserted-by":"crossref","unstructured":"F. Feng, X. Wang, R. Li, Cross-modal retrieval with correspondence autoencoder, in:\u00a0Proceedings of the 22nd ACM International Conference on Multimedia (ACM-MM), 2014, pp. 7\u201316. http:\/\/dx.doi.org\/10.1145\/2647868.2654902","DOI":"10.1145\/2647868.2654902"},{"key":"10.1016\/j.neucom.2016.07.067_bib28","doi-asserted-by":"crossref","unstructured":"X. Jiang, F. Wu, X. Li, Z. Zhao, W. Lu, S. Tang, Y. Zhuang, Deep compositional cross-modal learning to rank via local-global alignment, in:\u00a0Proceedings of the 23rd ACM International Conference on Multimedia (ACM-MM), ACM, 2015, pp. 69\u201378. http:\/\/dx.doi.org\/10.1145\/2733373.2806240","DOI":"10.1145\/2733373.2806240"},{"key":"10.1016\/j.neucom.2016.07.067_bib29","doi-asserted-by":"crossref","unstructured":"X. Cheng, D. Roth, Relational inference for wikification, in: Proceedings of Conference on Empirical Methods in Natural Language Processing (EMNLP), 2013, pp. 1787\u20131796.","DOI":"10.18653\/v1\/D13-1184"},{"issue":"1","key":"10.1016\/j.neucom.2016.07.067_bib30","first-page":"22","article-title":"Word association norms, mutual information, and lexicography","volume":"16","author":"Church","year":"1990","journal-title":"Comput. Linguist."},{"key":"10.1016\/j.neucom.2016.07.067_bib31","first-page":"1871","article-title":"Liblinear:\u00a0a library for large linear classification","volume":"9","author":"Fan","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2016.07.067_bib32","first-page":"993","article-title":"Latent dirichlet allocation","volume":"3","author":"Blei","year":"2003","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.neucom.2016.07.067_bib33","unstructured":"A. Vedaldi, B. Fulkerson, VLFeat: An Open and Portable Library of Computer Vision Algorithms, 2008. URL http:\/\/www.vlfeat.org\/."}],"container-title":["Neurocomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231216313819?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0925231216313819?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,6,12]],"date-time":"2025-06-12T18:38:07Z","timestamp":1749753487000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0925231216313819"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,5]]},"references-count":33,"alternative-id":["S0925231216313819"],"URL":"https:\/\/doi.org\/10.1016\/j.neucom.2016.07.067","relation":{},"ISSN":["0925-2312"],"issn-type":[{"value":"0925-2312","type":"print"}],"subject":[],"published":{"date-parts":[[2017,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Cross-media retrieval by exploiting fine-grained correlation at entity level","name":"articletitle","label":"Article Title"},{"value":"Neurocomputing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neucom.2016.07.067","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2016 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}]}}