forked from PaddlePaddle/PaddleGAN
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add MidDaSv2 in ppgan.apps (PaddlePaddle#118)
* Add MidDaSv2 in ppgan.apps * remove ppgan/apps/midas/run.py
- Loading branch information
1 parent
2cc72be
commit 77b8bac
Showing
11 changed files
with
771 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
## Monocular Depth Estimation | ||
|
||
|
||
The implemention of MiDasv2 refers to https://github.com/intel-isl/MiDaS. | ||
|
||
|
||
@article{Ranftl2020, | ||
author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, | ||
title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, | ||
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, | ||
year = {2020}, | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
# Refer https://github.com/intel-isl/MiDaS | ||
|
||
import paddle | ||
import paddle.nn as nn | ||
|
||
|
||
def _make_encoder(backbone, | ||
features, | ||
use_pretrained, | ||
groups=1, | ||
expand=False, | ||
exportable=True): | ||
if backbone == "resnext101_wsl": | ||
# resnext101_wsl | ||
pretrained = _make_pretrained_resnext101_wsl(use_pretrained) | ||
scratch = _make_scratch([256, 512, 1024, 2048], | ||
features, | ||
groups=groups, | ||
expand=expand) | ||
else: | ||
print(f"Backbone '{backbone}' not implemented") | ||
assert False | ||
return pretrained, scratch | ||
|
||
|
||
def _make_scratch(in_shape, out_shape, groups=1, expand=False): | ||
scratch = nn.Layer() | ||
|
||
out_shape1 = out_shape | ||
out_shape2 = out_shape | ||
out_shape3 = out_shape | ||
out_shape4 = out_shape | ||
if expand == True: | ||
out_shape1 = out_shape | ||
out_shape2 = out_shape * 2 | ||
out_shape3 = out_shape * 4 | ||
out_shape4 = out_shape * 8 | ||
|
||
scratch.layer1_rn = nn.Conv2D(in_shape[0], | ||
out_shape1, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=False, | ||
groups=groups) | ||
scratch.layer2_rn = nn.Conv2D(in_shape[1], | ||
out_shape2, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=False, | ||
groups=groups) | ||
scratch.layer3_rn = nn.Conv2D(in_shape[2], | ||
out_shape3, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=False, | ||
groups=groups) | ||
scratch.layer4_rn = nn.Conv2D(in_shape[3], | ||
out_shape4, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=False, | ||
groups=groups) | ||
|
||
return scratch | ||
|
||
|
||
def _make_resnet_backbone(resnet): | ||
pretrained = nn.Layer() | ||
pretrained.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, | ||
resnet.maxpool, resnet.layer1) | ||
|
||
pretrained.layer2 = resnet.layer2 | ||
pretrained.layer3 = resnet.layer3 | ||
pretrained.layer4 = resnet.layer4 | ||
|
||
return pretrained | ||
|
||
|
||
def _make_pretrained_resnext101_wsl(use_pretrained): | ||
from .resnext import resnext101_32x8d_wsl | ||
resnet = resnext101_32x8d_wsl() | ||
return _make_resnet_backbone(resnet) | ||
|
||
|
||
class ResidualConvUnit(nn.Layer): | ||
"""Residual convolution module. | ||
""" | ||
def __init__(self, features): | ||
"""Init. | ||
Args: | ||
features (int): number of features | ||
""" | ||
super().__init__() | ||
|
||
self.conv1 = nn.Conv2D(features, | ||
features, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=True) | ||
|
||
self.conv2 = nn.Conv2D(features, | ||
features, | ||
kernel_size=3, | ||
stride=1, | ||
padding=1, | ||
bias_attr=True) | ||
|
||
self.relu = nn.ReLU() | ||
|
||
def forward(self, x): | ||
"""Forward pass. | ||
Args: | ||
x (tensor): input | ||
Returns: | ||
tensor: output | ||
""" | ||
x = self.relu(x) | ||
out = self.conv1(x) | ||
out = self.relu(out) | ||
out = self.conv2(out) | ||
|
||
return out + x | ||
|
||
|
||
class FeatureFusionBlock(nn.Layer): | ||
"""Feature fusion block. | ||
""" | ||
def __init__(self, features): | ||
"""Init. | ||
Args: | ||
features (int): number of features | ||
""" | ||
super(FeatureFusionBlock, self).__init__() | ||
|
||
self.resConfUnit1 = ResidualConvUnit(features) | ||
self.resConfUnit2 = ResidualConvUnit(features) | ||
|
||
def forward(self, *xs): | ||
"""Forward pass. | ||
Returns: | ||
tensor: output | ||
""" | ||
output = xs[0] | ||
|
||
if len(xs) == 2: | ||
output += self.resConfUnit1(xs[1]) | ||
|
||
output = self.resConfUnit2(output) | ||
output = nn.functional.interpolate(output, | ||
scale_factor=2, | ||
mode="bilinear", | ||
align_corners=True) | ||
|
||
return output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# Refer https://github.com/intel-isl/MiDaS | ||
"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. | ||
""" | ||
import numpy as np | ||
import paddle | ||
import paddle.nn as nn | ||
|
||
from .blocks import FeatureFusionBlock, _make_encoder | ||
|
||
|
||
class BaseModel(paddle.nn.Layer): | ||
def load(self, path): | ||
"""Load model from file. | ||
Args: | ||
path (str): file path | ||
""" | ||
parameters = paddle.load(path) | ||
self.set_dict(parameters) | ||
|
||
|
||
class MidasNet(BaseModel): | ||
"""Network for monocular depth estimation. | ||
""" | ||
def __init__(self, path=None, features=256, non_negative=True): | ||
"""Init. | ||
Args: | ||
path (str, optional): Path to saved model. Defaults to None. | ||
features (int, optional): Number of features. Defaults to 256. | ||
backbone (str, optional): Backbone network for encoder. Defaults to resnet50 | ||
""" | ||
print("Loading weights: ", path) | ||
|
||
super(MidasNet, self).__init__() | ||
|
||
use_pretrained = False if path is None else True | ||
|
||
self.pretrained, self.scratch = _make_encoder( | ||
backbone="resnext101_wsl", | ||
features=features, | ||
use_pretrained=use_pretrained) | ||
|
||
self.scratch.refinenet4 = FeatureFusionBlock(features) | ||
self.scratch.refinenet3 = FeatureFusionBlock(features) | ||
self.scratch.refinenet2 = FeatureFusionBlock(features) | ||
self.scratch.refinenet1 = FeatureFusionBlock(features) | ||
|
||
output_conv = [ | ||
nn.Conv2D(features, 128, kernel_size=3, stride=1, padding=1), | ||
nn.Upsample(scale_factor=2, mode="bilinear"), | ||
nn.Conv2D(128, 32, kernel_size=3, stride=1, padding=1), | ||
nn.ReLU(), | ||
nn.Conv2D(32, 1, kernel_size=1, stride=1, padding=0), | ||
nn.ReLU() if non_negative else nn.Identity(), | ||
] | ||
if non_negative: | ||
output_conv.append(nn.ReLU()) | ||
|
||
self.scratch.output_conv = nn.Sequential(*output_conv) | ||
|
||
if path: | ||
self.load(path) | ||
|
||
def forward(self, x): | ||
"""Forward pass. | ||
Args: | ||
x (tensor): input data (image) | ||
Returns: | ||
tensor: depth | ||
""" | ||
|
||
layer_1 = self.pretrained.layer1(x) | ||
layer_2 = self.pretrained.layer2(layer_1) | ||
layer_3 = self.pretrained.layer3(layer_2) | ||
layer_4 = self.pretrained.layer4(layer_3) | ||
|
||
layer_1_rn = self.scratch.layer1_rn(layer_1) | ||
layer_2_rn = self.scratch.layer2_rn(layer_2) | ||
layer_3_rn = self.scratch.layer3_rn(layer_3) | ||
layer_4_rn = self.scratch.layer4_rn(layer_4) | ||
|
||
path_4 = self.scratch.refinenet4(layer_4_rn) | ||
path_3 = self.scratch.refinenet3(path_4, layer_3_rn) | ||
path_2 = self.scratch.refinenet2(path_3, layer_2_rn) | ||
path_1 = self.scratch.refinenet1(path_2, layer_1_rn) | ||
|
||
out = self.scratch.output_conv(path_1) | ||
|
||
return paddle.squeeze(out, axis=1) |
Oops, something went wrong.