Skip to content

Commit f41342d

Browse files
First release of training code (TRI-ML#11)
1 parent fec6d0b commit f41342d

File tree

350 files changed

+64112
-1808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

350 files changed

+64112
-1808
lines changed

Diff for: Makefile

+57-18
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,82 @@
1-
# Copyright 2020 Toyota Research Institute. All rights reserved.
1+
# Handy commands:
2+
# - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`)
3+
PROJECT ?= packnet-sfm
4+
WORKSPACE ?= /workspace/$(PROJECT)
5+
DOCKER_IMAGE ?= ${PROJECT}:latest
26

3-
DEPTH_TYPE ?= None
4-
CROP ?= None
5-
SAVE_OUTPUT ?= None
6-
7-
PYTHON ?= python
8-
DOCKER_IMAGE ?= packnet-sfm:master-latest
9-
DOCKER_OPTS := --name packnet-sfm --rm -it \
7+
SHMSIZE ?= 444G
8+
WANDB_MODE ?= run
9+
DOCKER_OPTS := \
10+
--name ${PROJECT} \
11+
--rm -it \
12+
--shm-size=${SHMSIZE} \
13+
-e AWS_DEFAULT_REGION \
14+
-e AWS_ACCESS_KEY_ID \
15+
-e AWS_SECRET_ACCESS_KEY \
16+
-e WANDB_API_KEY \
17+
-e WANDB_ENTITY \
18+
-e WANDB_MODE \
19+
-e HOST_HOSTNAME= \
20+
-e OMP_NUM_THREADS=1 -e KMP_AFFINITY="granularity=fine,compact,1,0" \
21+
-e OMPI_ALLOW_RUN_AS_ROOT=1 \
22+
-e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
23+
-e NCCL_DEBUG=VERSION \
1024
-e DISPLAY=${DISPLAY} \
1125
-e XAUTHORITY \
1226
-e NVIDIA_DRIVER_CAPABILITIES=all \
27+
-v ~/.aws:/root/.aws \
28+
-v /root/.ssh:/root/.ssh \
1329
-v ~/.cache:/root/.cache \
1430
-v /data:/data \
15-
-v ${PWD}:/workspace/self-supervised-learning \
16-
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
31+
-v /mnt/fsx/:/mnt/fsx \
1732
-v /dev/null:/dev/raw1394 \
18-
-w /workspace/self-supervised-learning \
19-
--shm-size=444G \
33+
-v /tmp:/tmp \
34+
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
35+
-v /var/run/docker.sock:/var/run/docker.sock \
36+
-v ${PWD}:${WORKSPACE} \
37+
-w ${WORKSPACE} \
2038
--privileged \
39+
--ipc=host \
2140
--network=host
2241

23-
.PHONY: all clean docker-build
42+
NGPUS=$(shell nvidia-smi -L | wc -l)
43+
MPI_CMD=mpirun \
44+
-allow-run-as-root \
45+
-np ${NGPUS} \
46+
-H localhost:${NGPUS} \
47+
-x MASTER_ADDR=127.0.0.1 \
48+
-x MASTER_PORT=23457 \
49+
-x HOROVOD_TIMELINE \
50+
-x OMP_NUM_THREADS=1 \
51+
-x KMP_AFFINITY='granularity=fine,compact,1,0' \
52+
-bind-to none -map-by slot -x NCCL_DEBUG=INFO -x NCCL_MIN_NRINGS=4 \
53+
--report-bindings
54+
55+
56+
.PHONY: all clean docker-build docker-overfit-pose
2457

2558
all: clean
2659

2760
clean:
2861
find . -name "*.pyc" | xargs rm -f && \
2962
find . -name "__pycache__" | xargs rm -rf
3063

31-
3264
docker-build:
3365
docker build \
34-
-t ${DOCKER_IMAGE} . -f docker/Dockerfile
66+
-f docker/Dockerfile \
67+
-t ${DOCKER_IMAGE} .
3568

3669
docker-start-interactive: docker-build
70+
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash
71+
72+
docker-start-jupyter: docker-build
3773
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
38-
bash
74+
bash -c "jupyter notebook --port=8888 -ip=0.0.0.0 --allow-root --no-browser"
3975

40-
docker-evaluate-depth: docker-build
76+
docker-run: docker-build
4177
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
42-
bash -c "bash scripts/evaluate_depth.sh ${MODEL} ${INPUT_PATH} ${DEPTH_TYPE} ${CROP} ${SAVE_OUTPUT}"
78+
bash -c "${COMMAND}"
4379

80+
docker-run-mpi: docker-build
81+
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
82+
bash -c "${MPI_CMD} ${COMMAND}"

Diff for: README.md

+182-69
Large diffs are not rendered by default.

Diff for: configs/default_config.py

+184
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Default packnet_sfm configuration parameters (overridable in configs/*.yaml)
2+
"""
3+
4+
import os
5+
from yacs.config import CfgNode as CN
6+
7+
########################################################################################################################
8+
cfg = CN()
9+
cfg.name = '' # Run name
10+
cfg.debug = False # Debugging flag
11+
########################################################################################################################
12+
### ARCH
13+
########################################################################################################################
14+
cfg.arch = CN()
15+
cfg.arch.seed = 42 # Random seed for Pytorch/Numpy initialization
16+
cfg.arch.min_epochs = 1 # Minimum number of epochs
17+
cfg.arch.max_epochs = 50 # Maximum number of epochs
18+
########################################################################################################################
19+
### CHECKPOINT
20+
########################################################################################################################
21+
cfg.checkpoint = CN()
22+
cfg.checkpoint.filepath = '' # Checkpoint filepath to save data
23+
cfg.checkpoint.save_top_k = 5 # Number of best models to save
24+
cfg.checkpoint.monitor = 'loss' # Metric to monitor for logging
25+
cfg.checkpoint.monitor_index = 0 # Dataset index for the metric to monitor
26+
cfg.checkpoint.mode = 'auto' # Automatically determine direction of improvement (increase or decrease)
27+
cfg.checkpoint.s3_path = '' # s3 path for AWS model syncing
28+
cfg.checkpoint.s3_frequency = 1 # How often to s3 sync
29+
########################################################################################################################
30+
### SAVE
31+
########################################################################################################################
32+
cfg.save = CN()
33+
cfg.save.folder = '' # Folder where data will be saved
34+
cfg.save.viz = True # Flag for saving inverse depth map visualization
35+
cfg.save.npz = True # Flag for saving numpy depth maps
36+
########################################################################################################################
37+
### WANDB
38+
########################################################################################################################
39+
cfg.wandb = CN()
40+
cfg.wandb.dry_run = True # Wandb dry-run (not logging)
41+
cfg.wandb.name = '' # Wandb run name
42+
cfg.wandb.project = os.environ.get("WANDB_PROJECT", "") # Wandb project
43+
cfg.wandb.entity = os.environ.get("WANDB_ENTITY", "") # Wandb entity
44+
cfg.wandb.tags = [] # Wandb tags
45+
cfg.wandb.dir = '' # Wandb save folder
46+
########################################################################################################################
47+
### MODEL
48+
########################################################################################################################
49+
cfg.model = CN()
50+
cfg.model.name = '' # Training model
51+
cfg.model.checkpoint_path = '' # Checkpoint path for model saving
52+
########################################################################################################################
53+
### MODEL.OPTIMIZER
54+
########################################################################################################################
55+
cfg.model.optimizer = CN()
56+
cfg.model.optimizer.name = 'Adam' # Optimizer name
57+
cfg.model.optimizer.depth = CN()
58+
cfg.model.optimizer.depth.lr = 0.0002 # Depth learning rate
59+
cfg.model.optimizer.depth.weight_decay = 0.0 # Dept weight decay
60+
cfg.model.optimizer.pose = CN()
61+
cfg.model.optimizer.pose.lr = 0.0002 # Pose learning rate
62+
cfg.model.optimizer.pose.weight_decay = 0.0 # Pose weight decay
63+
########################################################################################################################
64+
### MODEL.SCHEDULER
65+
########################################################################################################################
66+
cfg.model.scheduler = CN()
67+
cfg.model.scheduler.name = 'StepLR' # Scheduler name
68+
cfg.model.scheduler.step_size = 10 # Scheduler step size
69+
cfg.model.scheduler.gamma = 0.5 # Scheduler gamma value
70+
cfg.model.scheduler.T_max = 20 # Scheduler maximum number of iterations
71+
########################################################################################################################
72+
### MODEL.PARAMS
73+
########################################################################################################################
74+
cfg.model.params = CN()
75+
cfg.model.params.crop = '' # Which crop should be used during evaluation
76+
cfg.model.params.min_depth = 0.0 # Minimum depth value to evaluate
77+
cfg.model.params.max_depth = 80.0 # Maximum depth value to evaluate
78+
########################################################################################################################
79+
### MODEL.LOSS
80+
########################################################################################################################
81+
cfg.model.loss = CN()
82+
#
83+
cfg.model.loss.num_scales = 4 # Number of inverse depth scales to use
84+
cfg.model.loss.progressive_scaling = 0.0 # Training percentage to decay number of scales
85+
cfg.model.loss.flip_lr_prob = 0.5 # Probablity of horizontal flippping
86+
cfg.model.loss.rotation_mode = 'euler' # Rotation mode
87+
cfg.model.loss.upsample_depth_maps = True # Resize depth maps to highest resolution
88+
#
89+
cfg.model.loss.ssim_loss_weight = 0.85 # SSIM loss weight
90+
cfg.model.loss.occ_reg_weight = 0.1 # Occlusion regularizer loss weight
91+
cfg.model.loss.smooth_loss_weight = 0.001 # Smoothness loss weight
92+
cfg.model.loss.C1 = 1e-4 # SSIM parameter
93+
cfg.model.loss.C2 = 9e-4 # SSIM parameter
94+
cfg.model.loss.photometric_reduce_op = 'min' # Method for photometric loss reducing
95+
cfg.model.loss.disp_norm = True # Inverse depth normalization
96+
cfg.model.loss.clip_loss = 0.0 # Clip loss threshold variance
97+
cfg.model.loss.padding_mode = 'zeros' # Photometric loss padding mode
98+
cfg.model.loss.automask_loss = True # Automasking to remove static pixels
99+
#
100+
cfg.model.loss.supervised_method = 'sparse-l1' # Method for depth supervision
101+
cfg.model.loss.supervised_num_scales = 4 # Number of scales for supervised learning
102+
cfg.model.loss.supervised_loss_weight = 0.9 # Supervised loss weight
103+
########################################################################################################################
104+
### MODEL.DEPTH_NET
105+
########################################################################################################################
106+
cfg.model.depth_net = CN()
107+
cfg.model.depth_net.name = '' # Depth network name
108+
cfg.model.depth_net.checkpoint_path = '' # Depth checkpoint filepath
109+
cfg.model.depth_net.version = '' # Depth network version
110+
cfg.model.depth_net.dropout = 0.0 # Depth network dropout
111+
########################################################################################################################
112+
### MODEL.POSE_NET
113+
########################################################################################################################
114+
cfg.model.pose_net = CN()
115+
cfg.model.pose_net.name = '' # Pose network name
116+
cfg.model.pose_net.checkpoint_path = '' # Pose checkpoint filepath
117+
cfg.model.pose_net.version = '' # Pose network version
118+
cfg.model.pose_net.dropout = 0.0 # Pose network dropout
119+
########################################################################################################################
120+
### DATASETS
121+
########################################################################################################################
122+
cfg.datasets = CN()
123+
########################################################################################################################
124+
### DATASETS.AUGMENTATION
125+
########################################################################################################################
126+
cfg.datasets.augmentation = CN()
127+
cfg.datasets.augmentation.image_shape = (192, 640) # Image shape
128+
cfg.datasets.augmentation.jittering = (0.2, 0.2, 0.2, 0.05) # Color jittering values
129+
########################################################################################################################
130+
### DATASETS.TRAIN
131+
########################################################################################################################
132+
cfg.datasets.train = CN()
133+
cfg.datasets.train.batch_size = 8 # Training batch size
134+
cfg.datasets.train.num_workers = 16 # Training number of workers
135+
cfg.datasets.train.back_context = 1 # Training backward context
136+
cfg.datasets.train.forward_context = 1 # Training forward context
137+
cfg.datasets.train.dataset = [] # Training dataset
138+
cfg.datasets.train.path = [] # Training data path
139+
cfg.datasets.train.split = [] # Training split
140+
cfg.datasets.train.depth_type = [''] # Training depth type
141+
cfg.datasets.train.cameras = [] # Training cameras
142+
cfg.datasets.train.repeat = [1] # Number of times training dataset is repeated per epoch
143+
cfg.datasets.train.num_logs = 5 # Number of training images to log
144+
########################################################################################################################
145+
### DATASETS.VALIDATION
146+
########################################################################################################################
147+
cfg.datasets.validation = CN()
148+
cfg.datasets.validation.batch_size = 1 # Validation batch size
149+
cfg.datasets.validation.num_workers = 8 # Validation number of workers
150+
cfg.datasets.validation.back_context = 0 # Validation backward context
151+
cfg.datasets.validation.forward_context = 0 # Validation forward contxt
152+
cfg.datasets.validation.dataset = [] # Validation dataset
153+
cfg.datasets.validation.path = [] # Validation data path
154+
cfg.datasets.validation.split = [] # Validation split
155+
cfg.datasets.validation.depth_type = [''] # Validation depth type
156+
cfg.datasets.validation.cameras = [] # Validation cameras
157+
cfg.datasets.validation.num_logs = 5 # Number of validation images to log
158+
########################################################################################################################
159+
### DATASETS.TEST
160+
########################################################################################################################
161+
cfg.datasets.test = CN()
162+
cfg.datasets.test.batch_size = 1 # Test batch size
163+
cfg.datasets.test.num_workers = 8 # Test number of workers
164+
cfg.datasets.test.back_context = 0 # Test backward context
165+
cfg.datasets.test.forward_context = 0 # Test forward context
166+
cfg.datasets.test.dataset = [] # Test dataset
167+
cfg.datasets.test.path = [] # Test data path
168+
cfg.datasets.test.split = [] # Test split
169+
cfg.datasets.test.depth_type = [''] # Test depth type
170+
cfg.datasets.test.cameras = [] # Test cameras
171+
cfg.datasets.test.num_logs = 5 # Number of test images to log
172+
########################################################################################################################
173+
### THESE SHOULD NOT BE CHANGED
174+
########################################################################################################################
175+
cfg.config = '' # Run configuration file
176+
cfg.default = '' # Run default configuration file
177+
cfg.wandb.url = '' # Wandb URL
178+
cfg.checkpoint.s3_url = '' # s3 URL
179+
cfg.save.pretrained = '' # Pretrained checkpoint
180+
cfg.prepared = False # Prepared flag
181+
########################################################################################################################
182+
183+
def get_cfg_defaults():
184+
return cfg.clone()

Diff for: configs/eval_ddad.yaml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
model:
2+
name: 'SelfSupModel'
3+
depth_net:
4+
name: 'PackNet01'
5+
version: '1A'
6+
pose_net:
7+
name: 'PoseNet'
8+
version: ''
9+
params:
10+
crop: ''
11+
min_depth: 0.0
12+
max_depth: 200.0
13+
datasets:
14+
augmentation:
15+
image_shape: (384, 640)
16+
test:
17+
dataset: ['DGP']
18+
path: ['/data/datasets/DDAD/ddad.json']
19+
split: ['val']
20+
depth_type: ['lidar']
21+
cameras: ['camera_01']
22+
save:
23+
folder: '/data/save'
24+
viz: True
25+
npz: True

Diff for: configs/eval_image.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
model:
2+
name: 'SelfSupModel'
3+
depth_net:
4+
name: 'PackNet01'
5+
version: '1A'
6+
pose_net:
7+
name: 'PoseNet'
8+
version: ''
9+
datasets:
10+
augmentation:
11+
image_shape: (384, 640)
12+
test:
13+
dataset: ['Image']
14+
path: ['images']
15+
split: ['{:010d}']
16+
save:
17+
folder: '/data/save'
18+
viz: True
19+
npy: True

Diff for: configs/eval_kitti.yaml

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
model:
2+
name: 'SelfSupModel'
3+
depth_net:
4+
name: 'PackNet01'
5+
version: '1A'
6+
pose_net:
7+
name: 'PoseNet'
8+
version: ''
9+
params:
10+
crop: 'garg'
11+
min_depth: 0.0
12+
max_depth: 80.0
13+
datasets:
14+
augmentation:
15+
image_shape: (192, 640)
16+
test:
17+
dataset: ['KITTI']
18+
path: ['/data/datasets/KITTI_raw']
19+
split: ['data_splits/eigen_test_files.txt']
20+
depth_type: ['velodyne']
21+
save:
22+
folder: '/data/save'
23+
viz: True
24+
npz: True

0 commit comments

Comments
 (0)