{"id":"https://openalex.org/W2109038798","doi":"https://doi.org/10.1109/tmm.2015.2428996","title":"Partial-Duplicate Clustering and Visual Pattern Discovery on Web Scale Image Database","display_name":"Partial-Duplicate Clustering and Visual Pattern Discovery on Web Scale Image Database","publication_year":2015,"publication_date":"2015-05-01","ids":{"openalex":"https://openalex.org/W2109038798","doi":"https://doi.org/10.1109/tmm.2015.2428996","mag":"2109038798"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2015.2428996","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2015.2428996","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047526883","display_name":"Wei Li","orcid":"https://orcid.org/0000-0003-0879-9200"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Li","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China","State Key Laboratory of Intelligent Technology and Systems, Tsinghua National Laboratory for Information Science and Technology Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"State Key Laboratory of Intelligent Technology and Systems, Tsinghua National Laboratory for Information Science and Technology Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055085354","display_name":"Changhu Wang","orcid":"https://orcid.org/0000-0001-8373-2597"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Changhu Wang","raw_affiliation_strings":["Microsoft Research Asia, Haidian District, Beijing, China","Microsoft Research Asia, Haidian District, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Haidian District, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia, Haidian District, China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100603174","display_name":"Lei Zhang","orcid":"https://orcid.org/0009-0005-9977-283X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lei Zhang","raw_affiliation_strings":["Microsoft Corporation, Redmond, WA, USA","Microsoft Corporation Redmond,WA,USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]},{"raw_affiliation_string":"Microsoft Corporation Redmond,WA,USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100728762","display_name":"Yong Rui","orcid":"https://orcid.org/0000-0002-9142-5914"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yong Rui","raw_affiliation_strings":["Microsoft Research Asia, Haidian District, Beijing, China","Microsoft Research Asia, Haidian District, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Haidian District, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia, Haidian District, China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100335187","display_name":"Bo Zhang","orcid":"https://orcid.org/0000-0002-9958-6181"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Zhang","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China","State Key Laboratory of Intelligent Technology and Systems, Tsinghua National Laboratory for Information Science and Technology Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"State Key Laboratory of Intelligent Technology and Systems, Tsinghua National Laboratory for Information Science and Technology Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5047526883"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.5523,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.74618516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"17","issue":"7","first_page":"967","last_page":"980"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7609794735908508},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6817846894264221},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.49489182233810425},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.49351775646209717},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46301624178886414},{"id":"https://openalex.org/keywords/parallelizable-manifold","display_name":"Parallelizable manifold","score":0.4491458237171173},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43520307540893555},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4200670123100281},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.418654203414917},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16989007592201233}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7609794735908508},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6817846894264221},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.49489182233810425},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.49351775646209717},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46301624178886414},{"id":"https://openalex.org/C148047603","wikidata":"https://www.wikidata.org/wiki/Q1014612","display_name":"Parallelizable manifold","level":2,"score":0.4491458237171173},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43520307540893555},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4200670123100281},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.418654203414917},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16989007592201233},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2015.2428996","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2015.2428996","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G179975616","display_name":null,"funder_award_id":"91120011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3686428102","display_name":null,"funder_award_id":"2012CB316301","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5531878825","display_name":null,"funder_award_id":"61273023","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7373123958","display_name":null,"funder_award_id":"2013CB329403","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W163569376","https://openalex.org/W1579524835","https://openalex.org/W1924999439","https://openalex.org/W1980972548","https://openalex.org/W1990253676","https://openalex.org/W1999817731","https://openalex.org/W2026066288","https://openalex.org/W2032448673","https://openalex.org/W2032475142","https://openalex.org/W2040335539","https://openalex.org/W2061571889","https://openalex.org/W2065296697","https://openalex.org/W2067766814","https://openalex.org/W2077528174","https://openalex.org/W2081332440","https://openalex.org/W2095027197","https://openalex.org/W2103442564","https://openalex.org/W2117602761","https://openalex.org/W2128017662","https://openalex.org/W2131846894","https://openalex.org/W2133671054","https://openalex.org/W2139882995","https://openalex.org/W2141362318","https://openalex.org/W2145607950","https://openalex.org/W2150307973","https://openalex.org/W2151103935","https://openalex.org/W2154952031","https://openalex.org/W2156854610","https://openalex.org/W2162530156","https://openalex.org/W2167135506","https://openalex.org/W2170146448","https://openalex.org/W2171448996","https://openalex.org/W3070706509","https://openalex.org/W4247915583","https://openalex.org/W6606697986","https://openalex.org/W6634582620","https://openalex.org/W6658424634","https://openalex.org/W6666665630","https://openalex.org/W6677567300","https://openalex.org/W6680676027","https://openalex.org/W6682717316","https://openalex.org/W6684741110"],"related_works":["https://openalex.org/W4285069850","https://openalex.org/W2891970004","https://openalex.org/W4240963716","https://openalex.org/W1974634278","https://openalex.org/W1996916724","https://openalex.org/W1596637634","https://openalex.org/W2321918740","https://openalex.org/W2900794160","https://openalex.org/W2154273227","https://openalex.org/W1776186222"],"abstract_inverted_index":{"In":[0],"this":[1,70,85],"paper,":[2],"we":[3,88],"study":[4],"the":[5,28,113,121,171,185,195,201],"problem":[6],"of":[7,36,62,116,177,188,200],"discovering":[8],"visual":[9,18,58,101,158,186,208],"patterns":[10,102,187],"and":[11,21,79,92,100,119,132,164,174,198,207,218,225],"partial-duplicate":[12,55,98,172,205],"images,":[13],"which":[14,144,230],"is":[15,30,153,231],"fundamental":[16],"to":[17,69,96,155,169,183,221,235],"concept":[19],"representation":[20,129],"image":[22],"parsing,":[23],"but":[24],"very":[25],"challenging":[26],"when":[27],"database":[29],"extremely":[31],"large,":[32],"such":[33],"as":[34,126],"billions":[35],"images":[37,99,228],"indexed":[38],"by":[39,141],"a":[40,90,104,127],"commercial":[41],"search":[42],"engine.":[43],"Although":[44],"extensive":[45],"research":[46],"with":[47],"sophisticated":[48],"algorithms":[49,166,202],"has":[50],"been":[51],"conducted":[52],"for":[53,130],"either":[54],"clustering":[56,75,206],"or":[57],"pattern":[59,209],"discovery,":[60],"most":[61],"them":[63],"can":[64],"not":[65],"be":[66],"easily":[67],"extended":[68],"scale,":[71],"since":[72],"both":[73,204],"are":[74,138,167,181],"problems":[76],"in":[77,103,107],"nature":[78],"require":[80],"pairwise":[81],"comparisons.":[82],"To":[83],"tackle":[84],"computational":[86],"challenge,":[87],"introduce":[89],"novel":[91],"highly":[93],"parallelizable":[94,196],"framework":[95],"discover":[97],"unified":[105],"way":[106],"distributed":[108],"computing":[109],"systems.":[110],"We":[111],"emphasize":[112],"nested":[114,123],"property":[115,197],"local":[117,133],"features,":[118],"propose":[120],"generalized":[122],"feature":[124],"(GNF)":[125],"mid-level":[128],"regions":[131],"patterns.":[134,159],"Initial":[135],"coarse":[136],"clusters":[137],"then":[139],"discovered":[140],"GNFs,":[142],"upon":[143],"<formula":[145],"formulatype=\"inline\"":[146],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[147],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex":[148],"Notation=\"TeX\">$n$</tex>":[149],"</formula>":[150],"-gram":[151],"GNF":[152,179],"defined":[154],"represent":[156,184],"co-occurrent":[157],"After":[160],"that,":[161],"efficient":[162,233],"merging":[163],"refining":[165],"used":[168],"get":[170],"clusters,":[173],"logical":[175],"combinations":[176],"probabilistic":[178],"models":[180],"leveraged":[182],"partially":[189],"duplicate":[190],"images.":[191],"Extensive":[192],"experiments":[193],"show":[194],"effectiveness":[199],"on":[203],"discovery.":[210],"With":[211],"2000":[212],"machines,":[213],"it":[214],"costs":[215],"about":[216],"eight":[217],"400":[219],"minutes":[220],"process":[222],"one":[223],"million":[224,227],"40":[226],"respectively,":[229],"quite":[232],"compared":[234],"previous":[236],"methods.":[237]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
