{"id":"https://openalex.org/W4415537806","doi":"https://doi.org/10.1145/3746027.3754879","title":"Serial Over Parallel: Learning Continual Unification for Multi-Modal Visual Object Tracking and Benchmarking","display_name":"Serial Over Parallel: Learning Continual Unification for Multi-Modal Visual Object Tracking and Benchmarking","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415537806","doi":"https://doi.org/10.1145/3746027.3754879"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074731752","display_name":"Zhangyong Tang","orcid":"https://orcid.org/0000-0001-8187-9384"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhangyong Tang","raw_affiliation_strings":["Jiangnan University, Wuxi, China"],"raw_orcid":"https://orcid.org/0000-0001-8187-9384","affiliations":[{"raw_affiliation_string":"Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049469328","display_name":"Tianyang Xu","orcid":"https://orcid.org/0000-0002-9015-3128"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyang Xu","raw_affiliation_strings":["Jiangnan University, Wuxi, China"],"raw_orcid":"https://orcid.org/0000-0002-9015-3128","affiliations":[{"raw_affiliation_string":"Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013792562","display_name":"Xuefeng Zhu","orcid":"https://orcid.org/0000-0003-0262-5891"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xue-Feng Zhu","raw_affiliation_strings":["Jiangnan University, Wuxi, China"],"raw_orcid":"https://orcid.org/0000-0003-0262-5891","affiliations":[{"raw_affiliation_string":"Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038294507","display_name":"Chunyang Cheng","orcid":"https://orcid.org/0000-0003-4603-3505"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyang Cheng","raw_affiliation_strings":["Jiangnan University, Wuxi, China"],"raw_orcid":"https://orcid.org/0000-0003-4603-3505","affiliations":[{"raw_affiliation_string":"Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101817616","display_name":"Tao Zhou","orcid":"https://orcid.org/0000-0002-3733-7286"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Zhou","raw_affiliation_strings":["Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-3733-7286","affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112052641","display_name":"Xiaojun Wu","orcid":"https://orcid.org/0009-0000-0199-5001"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Wu","raw_affiliation_strings":["Jiangnan University, Wuxi, China"],"raw_orcid":"https://orcid.org/0009-0000-0199-5001","affiliations":[{"raw_affiliation_string":"Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028209738","display_name":"Josef Kittler","orcid":"https://orcid.org/0000-0002-8110-9205"},"institutions":[{"id":"https://openalex.org/I28290843","display_name":"University of Surrey","ror":"https://ror.org/00ks66431","country_code":"GB","type":"education","lineage":["https://openalex.org/I28290843"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Josef Kittler","raw_affiliation_strings":["University of Surrey, Guildford, United Kingdom"],"raw_orcid":"https://orcid.org/0000-0002-8110-9205","affiliations":[{"raw_affiliation_string":"University of Surrey, Guildford, United Kingdom","institution_ids":["https://openalex.org/I28290843"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5074731752"],"corresponding_institution_ids":["https://openalex.org/I111599522"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83513527,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1229","last_page":"1238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12597","display_name":"Fire Detection and Safety Systems","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/2213","display_name":"Safety, Risk, Reliability and Quality"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9753000140190125,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unification","display_name":"Unification","score":0.7035999894142151},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6341000199317932},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6101999878883362},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5662000179290771},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5171999931335449},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5144000053405762},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43050000071525574},{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.388700008392334},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.37610000371932983}],"concepts":[{"id":"https://openalex.org/C96146094","wikidata":"https://www.wikidata.org/wiki/Q609057","display_name":"Unification","level":2,"score":0.7035999894142151},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6931999921798706},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6341000199317932},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6101999878883362},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5662000179290771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5356000065803528},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5171999931335449},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5144000053405762},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49540001153945923},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43050000071525574},{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.37610000371932983},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.34779998660087585},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.321399986743927},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3197999894618988},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.3197999894618988},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C2775945657","wikidata":"https://www.wikidata.org/wiki/Q381442","display_name":"Structuring","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C45493050","wikidata":"https://www.wikidata.org/wiki/Q7884934","display_name":"Unified Model","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3754879","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3754879","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2886878341","display_name":null,"funder_award_id":"62020106012, 62332008, 62106089, 62336004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5814230653","display_name":null,"funder_award_id":"2023YFE0116300, 2023YFF1105102, 2023YFF1105105","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6781096127","display_name":null,"funder_award_id":"JUSRP202504007","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2527415613","https://openalex.org/W2794744029","https://openalex.org/W2963905288","https://openalex.org/W2994777800","https://openalex.org/W3158472981","https://openalex.org/W3202534481","https://openalex.org/W4206291722","https://openalex.org/W4214737929","https://openalex.org/W4304098539","https://openalex.org/W4312751983","https://openalex.org/W4380884766","https://openalex.org/W4385569741","https://openalex.org/W4386075603","https://openalex.org/W4393147957","https://openalex.org/W4393154017","https://openalex.org/W4393154934","https://openalex.org/W4396598146","https://openalex.org/W4396951406","https://openalex.org/W4401262386","https://openalex.org/W4402754150","https://openalex.org/W4404770784","https://openalex.org/W4414499313"],"related_works":[],"abstract_inverted_index":{"Unifying":[0],"multiple":[1,111],"multi-modal":[2,239],"visual":[3],"object":[4],"tracking":[5,22],"(MMVOT)":[6],"tasks":[7,226],"draws":[8],"increasing":[9],"attention":[10],"due":[11],"to":[12,84,105,119,210,221],"the":[13,41,51,55,59,107,143,159,172,185,190,205,245],"complementary":[14],"nature":[15],"of":[16,54,61,68,152,161,168,187,192],"different":[17],"modalities":[18],"in":[19,31,94,132,194,232],"building":[20],"robust":[21],"systems.":[23],"Existing":[24],"practices":[25],"mix":[26],"all":[27,66],"data":[28,69],"sensor":[29],"types":[30,67],"a":[32,37,47,62,133,196],"single":[33],"training":[34,79],"procedure,":[35],"structuring":[36],"parallel":[38],"paradigm":[39],"from":[40,117],"data-centric":[42],"perspective":[43],"and":[44,80,121,181,189,244],"aiming":[45],"for":[46,237],"global":[48],"optimum":[49],"on":[50,73,178],"joint":[52],"distribution":[53],"involved":[56],"tasks.":[57,139],"However,":[58],"absence":[60],"unified":[63,98],"benchmark":[64,247],"where":[65],"coexist":[70],"forces":[71],"evaluations":[72],"separated":[74],"benchmarks,":[75],"causing":[76],"inconsistency":[77,108],"between":[78],"testing,":[81],"thus":[82],"leading":[83],"performance":[85,144,206],"degradation.":[86],"To":[87],"address":[88],"these":[89],"issues,":[90],"this":[91,141],"work":[92],"advances":[93],"two":[95,179],"aspects:":[96],"A":[97],"benchmark,":[99],"coined":[100],"as":[101,149],"UniBench300,":[102],"is":[103,130,208,248],"introduced":[104],"bridge":[106],"by":[109,125],"incorporating":[110],"task":[112],"data,":[113],"reducing":[114],"inference":[115],"passes":[116],"three":[118],"one":[120],"cutting":[122],"time":[123],"consumption":[124],"27%.":[126],"The":[127],"unification":[128,173,198],"process":[129],"reformulated":[131],"serial":[134],"format,":[135],"progressively":[136],"integrating":[137],"new":[138],"In":[140],"way,":[142],"degradation":[145,207,223],"can":[146],"be":[147,211],"specified":[148],"knowledge":[150],"forgetting":[151],"previous":[153],"tasks,":[154],"which":[155],"naturally":[156],"aligns":[157],"with":[158,214],"philosophy":[160],"continual":[162],"learning":[163],"(CL),":[164],"motivating":[165],"further":[166],"exploration":[167],"injecting":[169],"CL":[170,193],"into":[171],"process.":[174,199],"Extensive":[175],"experiments":[176],"conducted":[177],"baselines":[180],"four":[182],"benchmarks":[183],"demonstrate":[184],"significance":[186],"UniBench300":[188],"superiority":[191],"supporting":[195],"stable":[197],"Moreover,":[200],"while":[201],"conducting":[202],"dedicated":[203],"analyses,":[204],"found":[209],"negatively":[212],"correlated":[213],"network":[215],"capacity.":[216],"Additionally,":[217],"modality":[218],"discrepancies":[219],"contribute":[220],"varying":[222],"levels":[224],"across":[225],"(RGBT":[227],">":[228,230],"RGBD":[229],"RGBE":[231],"MMVOT),":[233],"offering":[234],"valuable":[235],"insights":[236],"future":[238],"vision":[240],"research.":[241],"Source":[242],"codes":[243],"proposed":[246],"available":[249],"at":[250],"https://github.com/Zhangyong-Tang/UniBench300.":[251]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-25T00:00:00"}
