{"id":"https://openalex.org/W7108222254","doi":"https://doi.org/10.48550/arxiv.2511.22404","title":"UAV-MM3D: A Large-Scale Synthetic Benchmark for 3D Perception of Unmanned Aerial Vehicles with Multi-Modal Data","display_name":"UAV-MM3D: A Large-Scale Synthetic Benchmark for 3D Perception of Unmanned Aerial Vehicles with Multi-Modal Data","publication_year":2025,"publication_date":"2025-11-27","ids":{"openalex":"https://openalex.org/W7108222254","doi":"https://doi.org/10.48550/arxiv.2511.22404"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.22404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.22404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.22404","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zou, Longkun","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zou, Longkun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Jiale","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jiale","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Liang, Rongqin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Rongqin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wu, Hai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Hai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Chen, Ke","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wang, Yaowei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yaowei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.5008000135421753,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.5008000135421753,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11489","display_name":"Air Traffic Management and Optimization","score":0.1599999964237213,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.1251000016927719,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6518999934196472},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.6065000295639038},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.49320000410079956},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.46939998865127563},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4693000018596649},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4560999870300293},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.43070000410079956}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6518999934196472},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6449999809265137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6237999796867371},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.6065000295639038},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5853999853134155},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.49320000410079956},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.46939998865127563},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4693000018596649},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4560999870300293},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.43070000410079956},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.3993000090122223},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.30219998955726624},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2606000006198883},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.22404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.22404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.22404","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.22404","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.5168027877807617,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurate":[0],"perception":[1,72,184],"of":[2,47,185],"UAVs":[3,133],"in":[4],"complex":[5],"low-altitude":[6,70],"environments":[7],"is":[8,53],"critical":[9],"for":[10,69,181],"airspace":[11,37],"security":[12],"and":[13,24,41,50,55,73,90,104,112,125,142,155,173],"related":[14,131],"intelligent":[15],"systems.":[16],"Developing":[17],"reliable":[18],"solutions":[19],"requires":[20],"large-scale,":[21],"accurately":[22],"annotated,":[23],"multimodal":[25,66,152],"data.":[26],"However,":[27],"real-world":[28],"UAV":[29,71,99,158],"data":[30],"collection":[31],"faces":[32],"inherent":[33],"constraints":[34],"due":[35],"to":[36,132,162],"regulations,":[38],"privacy":[39],"concerns,":[40],"environmental":[42],"variability,":[43],"while":[44],"manual":[45],"annotation":[46],"3D":[48,136,183],"poses":[49],"cross-modal":[51],"correspondences":[52],"time-consuming":[54],"costly.":[56],"To":[57],"overcome":[58],"these":[59],"challenges,":[60],"we":[61],"introduce":[62],"UAV-MM3D,":[63],"a":[64,150,156,178],"high-fidelity":[65],"synthetic":[67],"dataset":[68],"motion":[74],"understanding.":[75],"It":[76],"comprises":[77],"400K":[78],"synchronized":[79],"frames":[80],"across":[81],"diverse":[82],"scenes":[83],"(urban":[84],"areas,":[85],"suburbs,":[86],"forests,":[87],"coastal":[88],"regions)":[89],"weather":[91],"conditions":[92],"(clear,":[93],"cloudy,":[94],"rainy,":[95],"foggy),":[96],"featuring":[97],"multiple":[98],"models":[100],"(micro,":[101],"small,":[102],"medium-sized)":[103],"five":[105],"modalities":[106],"-":[107],"RGB,":[108],"IR,":[109],"LiDAR,":[110],"Radar,":[111],"DVS":[113],"(Dynamic":[114],"Vision":[115],"Sensor).":[116],"Each":[117],"frame":[118],"provides":[119],"2D/3D":[120],"bounding":[121],"boxes,":[122],"6-DoF":[123],"poses,":[124],"instance-level":[126],"annotations,":[127,175],"enabling":[128],"core":[129],"tasks":[130],"such":[134],"as":[135],"detection,":[137],"pose":[138],"estimation,":[139],"target":[140],"tracking,":[141],"short-term":[143],"trajectory":[144,159],"forecasting.":[145],"We":[146],"further":[147],"propose":[148],"LGFusionNet,":[149],"LiDAR-guided":[151],"fusion":[153],"baseline,":[154],"dedicated":[157],"prediction":[160],"baseline":[161],"facilitate":[163],"benchmarking.":[164],"With":[165],"its":[166],"controllable":[167],"simulation":[168],"environment,":[169],"comprehensive":[170],"scenario":[171],"coverage,":[172],"rich":[174],"UAV3D":[176],"offers":[177],"public":[179],"benchmark":[180],"advancing":[182],"UAVs.":[186]},"counts_by_year":[],"updated_date":"2025-12-03T00:07:38.036990","created_date":"2025-12-03T00:00:00"}
