{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:40:45Z","timestamp":1777653645095,"version":"3.51.4"},"reference-count":108,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,1]],"date-time":"2023-06-01T00:00:00Z","timestamp":1685577600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1109\/cvprw59228.2023.00308","type":"proceedings-article","created":{"date-parts":[[2023,8,14]],"date-time":"2023-08-14T17:40:56Z","timestamp":1692034856000},"page":"3064-3076","source":"Crossref","is-referenced-by-count":10,"title":["The Second Monocular Depth Estimation Challenge"],"prefix":"10.1109","author":[{"given":"Jaime","family":"Spencer","sequence":"first","affiliation":[{"name":"University of Surrey"}]},{"given":"C. Stella","family":"Qian","sequence":"additional","affiliation":[{"name":"Aston University"}]},{"given":"Michaela","family":"Trescakova","sequence":"additional","affiliation":[{"name":"University of Southampton"}]},{"given":"Chris","family":"Russell","sequence":"additional","affiliation":[{"name":"Amazon"}]},{"given":"Simon","family":"Hadfield","sequence":"additional","affiliation":[{"name":"University of Surrey"}]},{"given":"Erich W.","family":"Graf","sequence":"additional","affiliation":[{"name":"University of Southampton"}]},{"given":"Wendy J.","family":"Adams","sequence":"additional","affiliation":[{"name":"University of Southampton"}]},{"given":"Andrew J.","family":"Schofield","sequence":"additional","affiliation":[{"name":"Aston University"}]},{"given":"James","family":"Elder","sequence":"additional","affiliation":[{"name":"York University"}]},{"given":"Richard","family":"Bowden","sequence":"additional","affiliation":[{"name":"University of Surrey"}]},{"given":"Ali","family":"Anwar","sequence":"additional","affiliation":[{"name":"University of Antwerp"}]},{"given":"Hao","family":"Chen","sequence":"additional","affiliation":[{"name":"Zhejiang University"}]},{"given":"Xiaozhi","family":"Chen","sequence":"additional","affiliation":[{"name":"DJI Technology"}]},{"given":"Kai","family":"Cheng","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China"}]},{"given":"Yuchao","family":"Dai","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University"}]},{"given":"Huynh Thai","family":"Hoa","sequence":"additional","affiliation":[{"name":"DeltaX"}]},{"given":"Sadat","family":"Hossain","sequence":"additional","affiliation":[{"name":"DeltaX"}]},{"given":"Jianmian","family":"Huang","sequence":"additional","affiliation":[{"name":"Independent"}]},{"given":"Mohan","family":"Jing","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China"}]},{"given":"Bo","family":"Li","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University"}]},{"given":"Chao","family":"Li","sequence":"additional","affiliation":[{"name":"VIVO"}]},{"given":"Baojun","family":"Li","sequence":"additional","affiliation":[{"name":"Independent"}]},{"given":"Zhiwen","family":"Liu","sequence":"additional","affiliation":[{"name":"VIVO"}]},{"given":"Stefano","family":"Mattoccia","sequence":"additional","affiliation":[{"name":"University of Bologna"}]},{"given":"Siegfried","family":"Mercelis","sequence":"additional","affiliation":[{"name":"University of Antwerp"}]},{"given":"Myungwoo","family":"Nam","sequence":"additional","affiliation":[{"name":"DeltaX"}]},{"given":"Matteo","family":"Poggi","sequence":"additional","affiliation":[{"name":"University of Bologna"}]},{"given":"Xiaohua","family":"Qi","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China"}]},{"given":"Jiahui","family":"Ren","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University"}]},{"given":"Yang","family":"Tang","sequence":"additional","affiliation":[{"name":"East China University of Science and Technology"}]},{"given":"Fabio","family":"Tosi","sequence":"additional","affiliation":[{"name":"University of Bologna"}]},{"given":"Linh","family":"Trinh","sequence":"additional","affiliation":[{"name":"University of Antwerp"}]},{"given":"S. M. Nadim","family":"Uddin","sequence":"additional","affiliation":[{"name":"DeltaX"}]},{"given":"Khan Muhammad","family":"Umair","sequence":"additional","affiliation":[{"name":"DeltaX"}]},{"given":"Kaixuan","family":"Wang","sequence":"additional","affiliation":[{"name":"DJI Technology"}]},{"given":"Yufei","family":"Wang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University"}]},{"given":"Yixing","family":"Wang","sequence":"additional","affiliation":[{"name":"VIVO"}]},{"given":"Mochu","family":"Xiang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University"}]},{"given":"Guangkai","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China"}]},{"given":"Wei","family":"Yin","sequence":"additional","affiliation":[{"name":"DJI Technology"}]},{"given":"Jun","family":"Yu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China"}]},{"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[{"name":"VIVO"}]},{"given":"Chaoqiang","family":"Zhao","sequence":"additional","affiliation":[{"name":"East China University of Science and Technology"}]}],"member":"263","reference":[{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2930258"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.438"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16329"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299152"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00218"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2974454"},{"key":"ref46","first-page":"331","article-title":"Evaluation of CNN-Based Single-Image Depth Estimation Methods","author":"koch","year":"2018","journal-title":"European Conf on Comp Vision Workshop"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_43"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00714"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.32"},{"key":"ref42","article-title":"Spatial Transformer Networks","volume":"28","author":"jaderberg","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref41","article-title":"Cutdepth: Edge-aware data augmentation in depth estimation","author":"ishii","year":"2021"},{"key":"ref44","article-title":"What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?","volume":"30","author":"kendall","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00481"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-20870-7_41"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19769-7_28"},{"key":"ref7","first-page":"4009","article-title":"Adabins: Depth estimation using adaptive bins","author":"bhat","year":"2021","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"ref9","article-title":"Zoedepth: Zero-shot transfer by combining relative and metric depth","author":"bhat","year":"2023"},{"key":"ref4","article-title":"BEit: BERT pre-training of image transformers","author":"bao","year":"2022","journal-title":"International Conference on Learning Representations"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_35"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0168-5"},{"key":"ref5","article-title":"Arkitscenes&#x2013;a diverse real-world dataset for 3d indoor scene understanding using mobile rgb-d data","author":"baruch","year":"2021"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636616"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00288"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487797"},{"key":"ref37","article-title":"Holopix50k: A large-scale in-the-wild stereo image dataset","author":"hua","year":"2020"},{"key":"ref36","first-page":"409","article-title":"One thousand and one hours: Self-driving motion prediction dataset","author":"houston","year":"2021","journal-title":"Conference on Robot Learning"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00907"},{"key":"ref30","first-page":"6847","article-title":"PLADE-Net: Towards Pixel-Level Accuracy for Self-Supervised Single-View Depth Estimation with Neural Positional Encoding and Distilled Matting Loss","author":"gonzalez bello","year":"2021","journal-title":"Conference on Computer Vision and Pattern Recognition"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3117059"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/IVS.2019.8813809"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2926463"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"ref23","first-page":"11826","article-title":"Camconvs: Camera-aware multi-scale convolutions for single-view depth","author":"facil","year":"2019","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068942"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00510"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.304"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"248","DOI":"10.1109\/CVPR.2009.5206848","article-title":"Imagenet: A large-scale hierarchical image database","author":"deng","year":"2009","journal-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2021.3120753"},{"key":"ref14","article-title":"Single-image depth perception in the wild","volume":"29","author":"chen","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00027"},{"key":"ref96","article-title":"Diversedepth: Affine-invariant depth prediction using diverse data","author":"yin","year":"2020"},{"key":"ref11","article-title":"Virtual kitti 2","author":"cabon","year":"2020"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00271"},{"key":"ref10","first-page":"611","article-title":"A naturalistic open source movie for optical flow evaluation","author":"butler","year":"2012","journal-title":"Computer Vision&#x2013;ECCV 2012 12th European Conference on Computer Vision Florence Italy October 7-13 2012 Proceedings Part VI 12"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref16","article-title":"Diml\/cvl rgb-d dataset: 2m rgb-d images of natural indoor and outdoor scenes","author":"cho","year":"2021"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00136"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00099"},{"key":"ref95","doi-asserted-by":"crossref","first-page":"7282","DOI":"10.1109\/TPAMI.2021.3097396","article-title":"Virtual normal: Enforcing geometric constraints for accurate and robust depth prediction","volume":"44","author":"yin","year":"2021","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00186"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00056"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9565009"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00069"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00225"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341801"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00040"},{"key":"ref87","article-title":"Convnext v2: Co-designing and scaling convnets with masked autoencoders","author":"woo","year":"2023"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.596"},{"key":"ref81","first-page":"11","article-title":"Sparsity Invariant CNNs","author":"uhrig","year":"2018","journal-title":"International Conference on 3D Vision"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00046"},{"key":"ref83","article-title":"Diode: A dense indoor and outdoor depth dataset","author":"vasiljevic","year":"2019"},{"key":"ref80","first-page":"6105","article-title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks","author":"tan","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385773"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"ref78","article-title":"Deconstructing self-supervised monocular reconstruction: The design decisions that matter","author":"spencer","year":"2022","journal-title":"Transactions on Machine Learning Research"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/3DV57658.2022.00077"},{"key":"ref107","article-title":"Self-Supervised Monocular Depth Estimation with Internal Feature Fusion","author":"zhou","year":"2021","journal-title":"British Machine Vision Conference"},{"key":"ref75","doi-asserted-by":"crossref","first-page":"105524","DOI":"10.1016\/j.asoc.2019.105524","article-title":"Investigating the impact of data normalization on classification performance","volume":"97","author":"singh","year":"2020","journal-title":"Applied Soft Computing"},{"key":"ref104","author":"zendel","year":"0","journal-title":"Robust Vision Challenge 2022 &#x2014; robustvi-sion net"},{"key":"ref74","first-page":"572","article-title":"Feature-Metric Loss for Self-supervised Learning of Depth and Egomotion","author":"shu","year":"2020","journal-title":"European Conference on Computer Vision"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00043"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW58289.2023.00069"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00143"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01441"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00391"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00581"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1038\/srep35805"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01073"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.243"},{"key":"ref73","first-page":"835","article-title":"Deep Virtual Stereo Odometry: Leveraging Deep Depth Prediction for Monocular Direct Sparse Odometry","author":"rui","year":"2018","journal-title":"European Conference on Computer Vision"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.594"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01252"},{"key":"ref67","article-title":"Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer","author":"ranftl","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref69","article-title":"Deep robust single image depth estimation neural network using scene understanding","volume":"2","author":"ren","year":"2019","journal-title":"CVPR Workshops"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793621"},{"key":"ref63","article-title":"Codalab competitions: An open source platform to organize scientific challenges","author":"pavao","year":"2022","journal-title":"Technical Report"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00329"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00956"},{"key":"ref62","article-title":"From 2D to 3D: Re-thinking Benchmarking of Monocular Depth Prediction","author":"\u00f6rnek","year":"2022"},{"key":"ref61","article-title":"Indoor segmentation and support inference from rgbd images","author":"nathan silberman","year":"2012","journal-title":"ECCV"}],"event":{"name":"2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","location":"Vancouver, BC, Canada","start":{"date-parts":[[2023,6,17]]},"end":{"date-parts":[[2023,6,24]]}},"container-title":["2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10208270\/10208119\/10208890.pdf?arnumber=10208890","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,4]],"date-time":"2023-09-04T17:43:12Z","timestamp":1693849392000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10208890\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6]]},"references-count":108,"URL":"https:\/\/doi.org\/10.1109\/cvprw59228.2023.00308","relation":{},"subject":[],"published":{"date-parts":[[2023,6]]}}}