{"id":"https://openalex.org/W6929377562","doi":"https://doi.org/10.48550/arxiv.2504.01901","title":"Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness","display_name":"Ross3D: Reconstructive Visual Instruction Tuning with 3D-Awareness","publication_year":2025,"publication_date":"2025-04-02","ids":{"openalex":"https://openalex.org/W6929377562","doi":"https://doi.org/10.48550/arxiv.2504.01901"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2504.01901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.01901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2504.01901","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wang, Haochen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Haochen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhao, Yucheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yucheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Tiancai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tiancai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Fan, Haoqiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Haoqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhang, Xiangyu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiangyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Zhang, Zhaoxiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhaoxiang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T14072","display_name":"Immunotoxicology and immune responses","score":0.2750999927520752,"subfield":{"id":"https://openalex.org/subfields/2403","display_name":"Immunology"},"field":{"id":"https://openalex.org/fields/24","display_name":"Immunology and Microbiology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T14072","display_name":"Immunotoxicology and immune responses","score":0.2750999927520752,"subfield":{"id":"https://openalex.org/subfields/2403","display_name":"Immunology"},"field":{"id":"https://openalex.org/fields/24","display_name":"Immunology and Microbiology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12281","display_name":"Animal testing and alternatives","score":0.05570000037550926,"subfield":{"id":"https://openalex.org/subfields/3404","display_name":"Small Animals"},"field":{"id":"https://openalex.org/fields/34","display_name":"Veterinary"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10613","display_name":"Virus-based gene therapy research","score":0.03830000013113022,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6414999961853027},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.447299987077713},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.3865000009536743},{"id":"https://openalex.org/keywords/3d-model","display_name":"3d model","score":0.3370000123977661},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.30660000443458557},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3009999990463257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.722100019454956},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6414999961853027},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5375999808311462},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4810999929904938},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.447299987077713},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4415999948978424},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.3370000123977661},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.2669999897480011},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.26019999384880066},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2504.01901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.01901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2504.01901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.01901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,88,101],"rapid":[1],"development":[2],"of":[3,27,120,147],"Large":[4],"Multimodal":[5],"Models":[6],"(LMMs)":[7],"for":[8,20],"2D":[9,49],"images":[10],"and":[11,85],"videos":[12],"has":[13,32],"spurred":[14],"efforts":[15],"to":[16,104,111,116],"adapt":[17],"these":[18],"models":[19],"interpreting":[21],"3D":[22,29,46,53,131,149],"scenes.":[23],"However,":[24],"the":[25,78,121],"absence":[26],"large-scale":[28],"vision-language":[30],"datasets":[31],"posed":[33],"a":[34,60,117],"significant":[35,141],"obstacle.":[36],"To":[37],"address":[38],"this":[39],"issue,":[40],"typical":[41],"approaches":[42],"focus":[43],"on":[44],"injecting":[45],"awareness":[47],"into":[48,77],"LMMs":[50],"by":[51,94],"designing":[52],"input-level":[54],"scene":[55,132],"representations.":[56],"This":[57],"work":[58],"provides":[59],"new":[61],"perspective.":[62],"We":[63],"introduce":[64],"reconstructive":[65],"visual":[66,75],"instruction":[67],"tuning":[68],"with":[69],"3D-awareness":[70],"(Ross3D),":[71],"which":[72],"integrates":[73],"3D-aware":[74],"supervision":[76],"training":[79],"procedure.":[80],"Specifically,":[81],"it":[82],"incorporates":[83],"cross-view":[84],"global-view":[86],"reconstruction.":[87],"former":[89],"requires":[90],"reconstructing":[91],"masked":[92],"views":[93,110],"aggregating":[95],"overlapping":[96],"information":[97,106],"from":[98,107],"other":[99],"views.":[100],"latter":[102],"aims":[103],"aggregate":[105],"all":[108],"available":[109],"recover":[112],"Bird's-Eye-View":[113],"images,":[114],"contributing":[115],"comprehensive":[118],"overview":[119],"entire":[122],"scene.":[123],"Empirically,":[124],"Ross3D":[125],"achieves":[126],"state-of-the-art":[127],"performance":[128],"across":[129],"various":[130],"understanding":[133],"benchmarks.":[134],"More":[135],"importantly,":[136],"our":[137],"semi-supervised":[138],"experiments":[139],"demonstrate":[140],"potential":[142],"in":[143],"leveraging":[144],"large":[145],"amounts":[146],"unlabeled":[148],"vision-only":[150],"data.":[151]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
