jiadingfang
diff --git a/Diff for: ‎LICENSE.md
+21 b/Diff for: ‎LICENSE.md
+21
diff --git a/Diff for: ‎Makefile
+43 b/Diff for: ‎Makefile
+43
diff --git a/Diff for: ‎README.md
+169-2 b/Diff for: ‎README.md
+169-2
diff --git a/Diff for: ‎docker/Dockerfile
+77 b/Diff for: ‎docker/Dockerfile
+77
diff --git a/Diff for: ‎evaluate_kitti.sh
+12 b/Diff for: ‎evaluate_kitti.sh
+12
diff --git a/Diff for: ‎media/figs/beams_16.jpg
396 KB b/Diff for: ‎media/figs/beams_16.jpg
396 KB
diff --git a/Diff for: ‎media/figs/beams_32.jpg
520 KB b/Diff for: ‎media/figs/beams_32.jpg
520 KB
diff --git a/Diff for: ‎media/figs/beams_4.jpg
418 KB b/Diff for: ‎media/figs/beams_4.jpg
418 KB
diff --git a/Diff for: ‎media/figs/beams_64.jpg
585 KB b/Diff for: ‎media/figs/beams_64.jpg
585 KB
diff --git a/Diff for: ‎media/figs/beams_8.jpg
448 KB b/Diff for: ‎media/figs/beams_8.jpg
448 KB
diff --git a/Diff for: ‎media/figs/beams_full.jpg
663 KB b/Diff for: ‎media/figs/beams_full.jpg
663 KB
diff --git a/Diff for: ‎media/figs/ddad1.png
1.24 MB b/Diff for: ‎media/figs/ddad1.png
1.24 MB
diff --git a/Diff for: ‎media/figs/ddad2.png
1.27 MB b/Diff for: ‎media/figs/ddad2.png
1.27 MB
diff --git a/Diff for: ‎media/figs/ddad3.png
1.15 MB b/Diff for: ‎media/figs/ddad3.png
1.15 MB
diff --git a/Diff for: ‎media/figs/ddad4.png
1.09 MB b/Diff for: ‎media/figs/ddad4.png
1.09 MB
diff --git a/Diff for: ‎media/figs/infinite_depth.png
402 KB b/Diff for: ‎media/figs/infinite_depth.png
402 KB
diff --git a/Diff for: ‎media/figs/semguided.png
1.72 MB b/Diff for: ‎media/figs/semguided.png
1.72 MB
diff --git a/Diff for: ‎media/figs/sparse_beams.png
1.5 MB b/Diff for: ‎media/figs/sparse_beams.png
1.5 MB
diff --git a/Diff for: ‎media/figs/teaser27.png
3.56 MB b/Diff for: ‎media/figs/teaser27.png
3.56 MB
diff --git a/Diff for: ‎media/figs/teaser291.png
391 KB b/Diff for: ‎media/figs/teaser291.png
391 KB
diff --git a/Diff for: ‎media/figs/teaser305.png
3.56 MB b/Diff for: ‎media/figs/teaser305.png
3.56 MB
diff --git a/Diff for: ‎media/figs/teaser51.png
3.56 MB b/Diff for: ‎media/figs/teaser51.png
3.56 MB
diff --git a/Diff for: ‎media/figs/tri-logo.png
9.04 KB b/Diff for: ‎media/figs/tri-logo.png
9.04 KB
diff --git a/Diff for: ‎monodepth/__init__.py
+1 b/Diff for: ‎monodepth/__init__.py
+1
diff --git a/Diff for: ‎monodepth/datasets/__init__.py
+1 b/Diff for: ‎monodepth/datasets/__init__.py
+1
diff --git a/Diff for: ‎monodepth/datasets/data_augmentation.py
+62 b/Diff for: ‎monodepth/datasets/data_augmentation.py
+62
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Toyota Research Institute (TRI)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,43 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
+
+DEPTH_TYPE ?= None
+CROP ?= None
+SAVE_OUTPUT ?= None
+
+PYTHON ?= python
+DOCKER_IMAGE ?= packnet-sfm:master-latest
+DOCKER_OPTS := --name packnet-sfm --rm -it \
+            -e DISPLAY=${DISPLAY} \
+            -e XAUTHORITY \
+            -e NVIDIA_DRIVER_CAPABILITIES=all \
+			-v ~/.cache:/root/.cache \
+			-v /data:/data \
+			-v ${PWD}:/workspace/self-supervised-learning \
+			-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
+			-v /dev/null:/dev/raw1394 \
+			-w /workspace/self-supervised-learning \
+			--shm-size=444G \
+			--privileged \
+			--network=host
+
+.PHONY: all clean docker-build
+
+all: clean
+
+clean:
+	find . -name "*.pyc" | xargs rm -f && \
+	find . -name "__pycache__" | xargs rm -rf
+
+
+docker-build:
+	docker build \
+		-t ${DOCKER_IMAGE} . -f docker/Dockerfile
+
+docker-start-interactive: docker-build
+	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
+		bash
+
+docker-evaluate-depth: docker-build
+	nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
+            bash -c "bash scripts/evaluate_depth.sh ${MODEL} ${INPUT_PATH} ${DEPTH_TYPE} ${CROP} ${SAVE_OUTPUT}"
+
@@ -1,2 +1,169 @@
-# packnet-sfm
-Code for "PackNet-SfM: 3D Packing for Self-Supervised Monocular Depth Estimation"
+[<img src="/media/figs/tri-logo.png" width="30%">](https://www.tri.global/)
+
+This repository contains code for the following papers:
+
+## 3D Packing for Self-Supervised Monocular Depth Estimation
+*Vitor Guizilini, Rares Ambrus, Sudeep Pillai, Allan Raventos and Adrien Gaidon*
+
+[**[Full paper]**](https://arxiv.org/abs/1905.02693) 
+[**[YouTube]**](https://www.youtube.com/watch?v=b62iDkLgGSI)
+
+## Robust Semi-Supervised Monocular Depth Estimation with Reprojected Distances
+*Vitor Guizilini, Jie Li, Rares Ambrus, Sudeep Pillai and Adrien Gaidon*
+
+[**[Full paper]**](https://arxiv.org/abs/1910.01765) 
+[**[YouTube]**](https://www.youtube.com/watch?v=cSwuF-XA4sg)
+
+## Two Stream Networks for Self-Supervised Ego-Motion Estimation
+*Rares Ambrus, Vitor Guizilini, Jie Li, Sudeep Pillai and Adrien Gaidon*
+
+[**[Full paper]**](https://arxiv.org/abs/1910.01764) 
+
+## Semantically-Guided Representation Learning for Self-Supervised Monocular Depth
+*Vitor Guizilini, Rui Hou, Jie Li, Rares Ambrus and Adrien Gaidon*
+
+[**[Full paper]**](https://arxiv.org/abs/2002.12319) 
+
+## SuperDepth: Self-Supervised, Super-Resolved Monocular Depth Estimation
+*Sudeep Pillai, Rares Ambrus and Adrien Gaidon*
+
+[**[Full paper]**](https://arxiv.org/abs/1810.01849)
+[**[YouTube]**](https://www.youtube.com/watch?v=jKNgBeBMx0I&t=33s)
+
+## Contributions
+
+- **PackNet**: A new convolutional network architecture for high-resolution self-supervised monocular depth estimation.  We propose new packing and unpacking blocks that jointly leverage 3D convolutions to learn representations that maximally propagate dense appearance and geometric information while still being able to run in real time.   
+
+- **Weak Velocity Supervision**: A novel optional loss that can leverage the camera’s velocity when available (e.g. from cars, robots, mobile phones) to solve the inherent scale ambiguity in monocular vision.    
+
+- **Dense Depth for Automated Driving (DDAD)**: A new dataset that leverages diverse logs from a fleet of well-calibrated self-driving cars equipped with cameras and high-accuracy long-range LiDARs.  Compared toexisting benchmarks, DDAD enables much more accurate depth evaluation at range, which is key for high resolution monocular depth estimation methods.
+ 
+## Qualitative Results
+
+### Self-Supervised - KITTI
+
+<img src="/media/figs/teaser27.png" width="49%"> <img src="/media/figs/teaser51.png" width="49%">
+<img src="/media/figs/teaser305.png" width="49%"> <img src="/media/figs/teaser291.png" width="49%">
+
+### Self-Supervised - DDAD
+
+<img src="/media/figs/ddad1.png" width="49%"> <img src="/media/figs/ddad2.png" width="49%">
+<img src="/media/figs/ddad3.png" width="49%"> <img src="/media/figs/ddad4.png" width="49%">
+
+### Semi-Supervised - KITTI
+
+<img src="/media/figs/beams_full.jpg" width="32%" height="170cm"> <img src="/media/figs/beams_64.jpg" width="32%"  height="170cm"> <img src="/media/figs/beams_32.jpg" width="32%" height="170cm">
+<img src="/media/figs/beams_16.jpg" width="32%"  height="170cm"> <img src="/media/figs/beams_8.jpg" width="32%"  height="170cm"> <img src="/media/figs/beams_4.jpg" width="32%"  height="170cm">
+
+### Semantically-Guided Self-Supervised Depth - KITTI
+
+<img src="/media/figs/semguided.png" width="98%">>
+
+### Solving the Infinite Depth Problem
+
+<img src="/media/figs/infinite_depth.png" width="98%">
+
+## How to Use
+ 
+### Step 1: Clone this repository
+
+```
+git clone https://github.com/vguizilini/packnet-sfm.git
+```
+
+### Step 2: Create symbolic link to data folder
+
+```
+sudo ln -s path/to/data /data
+```
+
+### Step 3: Download datasets into /data/datasets
+
+#### [KITTI_raw](http://www.cvlibs.net/datasets/kitti/raw_data.php) 
+- For convenience, we also provide the pre-computed depth maps used in our papers (unzip into the same root folder)
+    ```
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/depth_maps/KITTI_raw_velodyne.tar.gz
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/depth_maps/KITTI_raw_groundtruth.tar.gz
+    ```
+    
+### Step 4: Download pre-trained models into /data/models
+
+#### KITTI
+- Self-Supervised (192x640, K)
+    ```
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/models/PackNet_MR_selfsup_K.pth.tar
+    ```
+- Self-Supervised (192x640, CS)
+    ```
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/models/PackNet_MR_selfsup_CS.pth.tar
+    ```
+- Self-Supervised Scale-Aware (192x640, CS &rightarrow; K)
+    ```
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/models/PackNet_MR_velsup_CStoK.pth.tar
+    ```
+- Semi-Supervised (Annotated depth maps) (192x640, CS &rightarrow; K)
+    ```
+    wget https://tri-ml-public.s3.amazonaws.com/github/packnet-sfm/models/PackNet_MR_semisup_CStoK.pth.tar
+    ```
+
+### Step 5: Inference
+```
+bash evaluate_kitti.sh
+```
+
+### License
+
+The source code is released under the [MIT license](LICENSE.md).
+
+### Citations
+Depending on the application, please use the following citations when referencing our work:
+
+```
+@misc{packnet-sfm-selfsup,
+  author = {Vitor Guizilini and Rares Ambrus and Sudeep Pillai and Allan Raventos and Adrien Gaidon},
+  title = {3D Packing for Self-Supervised Monocular Depth Estimation},
+  archivePrefix = {arXiv:1905.02693},
+  primaryClass = {cs.CV}
+  year = {2019},
+}
+```
+
+```
+@proceedings{packnet-sfm-semisup,
+  author = {Vitor Guizilini and Jie Li and Rares Ambrus and Sudeep Pillai and Adrien Gaidon},
+  title = {Robust Semi-Supervised Monocular Depth Estimation with Reprojected Distances},
+  booktitle = {In Proceedings of the 3rd Annual Conference on Robot Learning (CoRL)}
+  month = {October},
+  year = {2019},
+}
+```
+
+```
+@proeedings{packnet-sfm-twostream,
+  author = {Rares Ambrus and Vitor Guizilini and Jie Li and Sudeep Pillai and Adrien Gaidon},
+  title = {{Two Stream Networks for Self-Supervised Ego-Motion Estimation}},
+  booktitle = {In Proceedings of the 3rd Annual Conference on Robot Learning (CoRL)}
+  month = {October},
+  year = {2019},
+}
+```
+
+```
+@proceedings{packnet-sfm-semguided,
+  author = {Vitor Guizilini and Rui Hou and Jie Li and Rares Ambrus and Adrien Gaidon},
+  title = {Semantically-Guided Representation Learning for Self-Supervised Monocular Depth},
+  booktitle = {In Proceedings of the 8th International Conference on Learning Representations (ICLR)}
+  month = {April},
+  year = {2020},
+}
+```
+
+```
+@proceedings{superdepth,
+  author = {Sudeep Pillai and Rares Ambrus and Adrien Gaidon},
+  title = {SuperDepth: Self-Supervised, Super-Resolved Monocular Depth Estimation},
+  booktitle = {In Proceedings of the IEEE International Conference on Robotics and Automation (ICRA)}
+  month = {May},
+  year = {2019},
+}
+```
@@ -0,0 +1,77 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
+
+FROM nvidia/cuda:10.0-devel-ubuntu18.04
+
+ENV PYTORCH_VERSION=1.1.0
+ENV TORCHVISION_VERSION=0.3.0
+ENV CUDNN_VERSION=7.6.0.64-1+cuda10.0
+ENV NCCL_VERSION=2.4.7-1+cuda10.0
+
+# Python 2.7 or 3.6 is supported by Ubuntu Bionic out of the box
+ARG python=3.6
+ENV PYTHON_VERSION=${python}
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Set default shell to /bin/bash
+SHELL ["/bin/bash", "-cu"]
+
+RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
+    build-essential \
+    cmake \
+    g++-4.8 \
+    git \
+    curl \
+    docker.io \
+    vim \
+    wget \
+    ca-certificates \
+    libcudnn7=${CUDNN_VERSION} \
+    libnccl2=${NCCL_VERSION} \
+    libnccl-dev=${NCCL_VERSION} \
+    libjpeg-dev \
+    libpng-dev \
+    python${PYTHON_VERSION} \
+    python${PYTHON_VERSION}-dev \
+    python3-tk \
+    librdmacm1 \
+    libibverbs1 \
+    ibverbs-providers \
+    libgtk2.0-dev \
+    unzip \
+    bzip2 \
+    htop \
+    gnuplot \
+    ffmpeg 
+
+# Instal Python and pip
+RUN if [[ "${PYTHON_VERSION}" == "3.6" ]]; then \
+    apt-get install -y python${PYTHON_VERSION}-distutils; \
+    fi
+
+RUN ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
+
+RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
+    python get-pip.py && \
+    rm get-pip.py
+
+# Install PyTorch
+RUN pip install future typing numpy awscli
+RUN pip install https://download.pytorch.org/whl/cu100/torch-${PYTORCH_VERSION}-cp36-cp36m-linux_x86_64.whl
+RUN pip install https://download.pytorch.org/whl/cu100/torchvision-${TORCHVISION_VERSION}-cp36-cp36m-linux_x86_64.whl
+RUN pip install numpy h5py
+
+# Configure environment variables - default working directory is "/workspace"
+WORKDIR /workspace
+ENV PYTHONPATH="/workspace"
+
+RUN pip install awscli tqdm numpy-quaternion termcolor path.py pillow==6.1 opencv-python-headless matplotlib
+
+# self-supervised-learning copy
+RUN mkdir -p /workspace/experiments
+RUN mkdir -p /workspace/self-supervised-learning
+WORKDIR /workspace/self-supervised-learning
+
+# Copy self-supervised learning source
+COPY . /workspace/self-supervised-learning
+ENV PYTHONPATH="/workspace/self-supervised-learning:$PYTHONPATH"
+
@@ -0,0 +1,12 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
+
+# Example of evaluation script for KITTI
+
+make docker-evaluate-depth \
+MODEL=/data/models/packnet/PackNet_MR_selfsup_K.pth.tar \
+INPUT_PATH=/data/datasets/KITTI_raw/data_splits/eigen_test_files.txt \
+DEPTH_TYPE=velodyne \
+CROP=garg \
+SAVE_OUTPUT=output
+
+
@@ -0,0 +1 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
@@ -0,0 +1 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
@@ -0,0 +1,62 @@
+# Copyright 2020 Toyota Research Institute.  All rights reserved.
+
+"""
+Data augmentation functions
+"""
+
+import numpy as np
+import torchvision.transforms as transforms
+from PIL import Image
+
+
+def filter_dict(dict, keywords):
+    """
+    Returns only keywords that are present in a dictionary
+    """
+    return [key for key in keywords if key in dict]
+
+
+def resize_sample_image_and_intrinsics(sample, image_shape, image_interpolation=Image.ANTIALIAS):
+    """
+    Takes a sample and resizes the input image ['left_image'].
+    It also resizes the corresponding camera intrinsics ['left_intrinsics'] and ['right_intrinsics']
+    """
+    # Resize image and corresponding intrinsics
+    image_transform = transforms.Resize(image_shape, interpolation=image_interpolation)
+    original_shape = sample['left_image'].size
+    (orig_w, orig_h) = original_shape
+    (out_h, out_w) = image_shape
+
+    for key in filter_dict(sample, [
+        'left_intrinsics', 'right_intrinsics'
+    ]):
+        # Note this is swapped here because PIL.Image.size -> (w,h)
+        # but we specify image_shape -> (h,w) for rescaling
+        y_scale = out_h / orig_h
+        x_scale = out_w / orig_w
+        # scale fx and fy appropriately
+        intrinsics = np.copy(sample[key])
+        intrinsics[0] *= x_scale
+        intrinsics[1] *= y_scale
+        sample[key] = intrinsics
+
+    # Scale image (default is antialias)
+    for key in filter_dict(sample, [
+        'left_image', 'right_image',
+    ]):
+        sample[key] = image_transform(sample[key])
+
+    return sample
+
+
+def to_tensor_sample(sample, tensor_type='torch.FloatTensor'):
+    """
+    Converts all fields from a sample to tensor.
+    """
+    transform = transforms.ToTensor()
+    for key in filter_dict(sample, [
+            'left_image', 'right_image',
+            'left_depth', 'right_depth',
+    ]):
+        sample[key] = transform(sample[key]).type(tensor_type)
+    return sample
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Copyright 2020 Toyota Research Institute. All rights reserved.`