chrischoy
diff --git a/‎README.md
+4-4 b/‎README.md
+4-4
diff --git a/‎demo.py
+2-2 b/‎demo.py
+2-2
diff --git a/‎lib/_init_paths.py
+1 b/‎lib/_init_paths.py
+1
diff --git a/‎lib/config.py
+5-5 b/‎lib/config.py
+5-5
diff --git a/‎lib/data_augmentation.py
+10-11 b/‎lib/data_augmentation.py
+10-11
diff --git a/‎lib/data_io.py
+6-80 b/‎lib/data_io.py
+6-80
diff --git a/‎lib/data_process.py
+2-7 b/‎lib/data_process.py
+2-7
@@ -1,6 +1,6 @@
 # 3D-R<sup>2</sup>N<sup>2</sup>: 3D Recurrent Reconstruction Neural Network
 
-This repository contains the source codes for the paper [Choy et al., 3D-R2N2: A Unified Approach for Single and Multi-view 3D Object Reconstruction, ECCV 2016](http://arxiv.org/abs/1604.00449). Given one or multiple views of an object, the network generates voxelized (voxel is 3D equivalent of pixel) reconstruction of the object in 3D.
+This repository contains the source codes for the paper [Choy et al., 3D-R2N2: A Unified Approach for Single and Multi-view 3D Object Reconstruction, ECCV 2016](http://arxiv.org/abs/1604.00449). Given one or multiple views of an object, the network generates voxelized (voxel is 3D equivalent of pixel) reconstruction of the object in 3D. This is the first work that generates a voxelized 3D reconstruction given 2D image observations using a neural network.
 
 ## Citing this work
 
@@ -22,9 +22,9 @@ If you find this work useful in your research, please consider citing:
 
 Traditionally, single view reconstruction and multi view reconstruction are disjoint problmes that has been dealt using different approaches. In this work, we first propose a unified framework for both single and multi view reconstruction using a `3D Recurrent Reconstruction Neural Network` (3D-R2N2).
 
-| Schematic of `3D-Convolutional LSTM` | Inputs (red cells + feature) for each cell (purple)|
-|:------------------------------------:|:--------------------------------------------------:|
-| ![3D-LSTM](imgs/lstm.png)            | ![3D-LSTM](imgs/lstm_time.png)                     |
+| Schematic of the `3D-Convolutional LSTM` | Inputs (red cells + feature) for each cell (purple) |
+|:----------------------------------------:|:---------------------------------------------------:|
+| ![3D-LSTM](imgs/lstm.png)                | ![3D-LSTM](imgs/lstm_time.png)                      |
 
 We can feed in images a random order since the network is trained to be invariant to the order. The ciritical component that enables the network to be invariant to the order is the `3D-Convolutional LSTM` which we first proposed in this work. The `3D-Convolutional LSTM` selectively updates parts that are visible and keeps the parts that are self occluded (please refer to [http://cvgl.stanford.edu/3d-r2n2/](http://cvgl.stanford.edu/3d-r2n2/) for the supplementary material for analysis).
 
 
@@ -18,8 +18,8 @@ def download_model(fn):
     if not os.path.isfile(fn):
         # Download the file if doewn't exist
         print('Downloading a pretrained model')
-        call(['curl',  'ftp://cs.stanford.edu/cs/cvgl/ResidualGRUNet.npy',
-              '--create-dirs', '-o', fn])
+        call(['curl', 'ftp://cs.stanford.edu/cs/cvgl/ResidualGRUNet.npy', '--create-dirs', '-o',
+              fn])
 
 
 def load_demo_images():
 
@@ -8,6 +8,7 @@ def add_path(path):
     if path not in sys.path:
         sys.path.insert(0, path)
 
+
 this_dir = osp.dirname(__file__)
 
 # Add lib to PYTHONPATH
 
@@ -74,15 +74,14 @@
 __C.TRAIN.DEFAULT_LEARNING_RATE = 1e-4
 __C.TRAIN.POLICY = 'adam'  # def: sgd, adam
 # The EasyDict can't use dict with integers as keys
-__C.TRAIN.LEARNING_RATES = {'20000': 1e-5,
-                            '60000': 1e-6}
+__C.TRAIN.LEARNING_RATES = {'20000': 1e-5, '60000': 1e-6}
 __C.TRAIN.MOMENTUM = 0.90
 # weight decay or regularization constant. If not set, the loss can diverge
 # after the training almost converged since weight can increase indefinitely
 # (for cross entropy loss). Too high regularization will also hinder training.
 __C.TRAIN.WEIGHT_DECAY = 0.00005
 __C.TRAIN.LOSS_LIMIT = 2  # stop training if the loss exceeds the limit
-__C.TRAIN.SAVE_FREQ = 10000   # weights will be overwritten every save_freq
+__C.TRAIN.SAVE_FREQ = 10000  # weights will be overwritten every save_freq
 __C.TRAIN.PRINT_FREQ = 40
 
 #
@@ -117,8 +116,7 @@ def _merge_a_into_b(a, b):
         # the types must match, too
         if type(b[k]) is not type(v):
             raise ValueError(('Type mismatch ({} vs. {}) '
-                              'for config key: {}').format(type(b[k]),
-                                                           type(v), k))
+                              'for config key: {}').format(type(b[k]), type(v), k))
 
         # recursively merge dicts
         if type(v) is edict:
@@ -130,6 +128,7 @@ def _merge_a_into_b(a, b):
         else:
             b[k] = v
 
+
 def cfg_from_file(filename):
     """Load a config file and merge it into the default options."""
     import yaml
@@ -138,6 +137,7 @@ def cfg_from_file(filename):
 
     _merge_a_into_b(yaml_cfg, __C)
 
+
 def cfg_from_list(cfg_list):
     """Set config keys via list (e.g., from command line)."""
     from ast import literal_eval
 
@@ -10,15 +10,14 @@ def image_transform(img, crop_x, crop_y, crop_loc=None, color_tint=None):
 
     # Slight translation
     if cfg.TRAIN.RANDOM_CROP and not crop_loc:
-        crop_loc = [np.random.randint(0, crop_y),
-                    np.random.randint(0, crop_x)]
+        crop_loc = [np.random.randint(0, crop_y), np.random.randint(0, crop_x)]
 
     if crop_loc:
         cr, cc = crop_loc
         height, width, _ = img.shape
         img_h = height - crop_y
         img_w = width - crop_x
-        img = img[cr:cr+img_h, cc:cc+img_w]
+        img = img[cr:cr + img_h, cc:cc + img_w]
         # depth = depth[cr:cr+img_h, cc:cc+img_w]
 
     if cfg.TRAIN.FLIP and np.random.rand() > 0.5:
@@ -28,12 +27,12 @@ def image_transform(img, crop_x, crop_y, crop_loc=None, color_tint=None):
 
 
 def crop_center(im, new_height, new_width):
-    height = im.shape[0]   # Get dimensions
+    height = im.shape[0]  # Get dimensions
     width = im.shape[1]
-    left = (width - new_width)/2
-    top = (height - new_height)/2
-    right = (width + new_width)/2
-    bottom = (height + new_height)/2
+    left = (width - new_width) / 2
+    top = (height - new_height) / 2
+    right = (width + new_width) / 2
+    bottom = (height + new_height) / 2
     return im[top:bottom, left:right]
 
 
@@ -48,15 +47,15 @@ def add_random_color_background(im, color_range):
         alpha = (np.expand_dims(im[:, :, 3], axis=2) == 0).astype(np.float)
         im = im[:, :, :3]
         bg_color = np.array([[[r, g, b]]])
-        im =  alpha * bg_color + (1 - alpha) * im
+        im = alpha * bg_color + (1 - alpha) * im
 
     return im
 
 
 def preprocess_img(im, train=True):
     # add random background
-    im = add_random_color_background(im, cfg.TRAIN.NO_BG_COLOR_RANGE
-                                     if train else cfg.TEST.NO_BG_COLOR_RANGE)
+    im = add_random_color_background(im, cfg.TRAIN.NO_BG_COLOR_RANGE if train else
+                                     cfg.TEST.NO_BG_COLOR_RANGE)
 
     # If the image has alpha channel, remove it.
     im_rgb = np.array(im)[:, :, :3].astype(np.float32)
 
@@ -1,6 +1,5 @@
 import os
 import json
-import scipy.io
 from collections import OrderedDict
 
 from lib.config import cfg
@@ -12,44 +11,15 @@ def id_to_name(id, category_list):
             return (k, id - v[0])
 
 
-def return_aligned_models(model_path, model_ids, num_models):
-    """ Load metadata"""
-
-    def orientation_mapping(x):
-        """Used for Seeing 3D Chair dataset json metadata loading"""
-        return {
-            'left': 0,
-            'right': 180,
-            'up': 90,
-            'up_left': 45,
-            'up_right': 135,
-            'down': -90,
-            'down_left': -45,
-            'down_right': -135
-        }[x]
-
-    aligned_models = []
-    for model_id in model_ids:
-        metadata_file = open(os.path.join(model_path, model_id,
-                                          "metadata.json"))
-        model_metadata = json.load(metadata_file)
-        metadata_file.close()
-
-        azimuth_offset = orientation_mapping(model_metadata['orientation'])
-        if azimuth_offset == 0:
-            aligned_models.append(model_id)
-            if len(aligned_models) == num_models:
-                return aligned_models
-
-
 def category_model_id_pair(dataset_portion=[]):
     '''
     Load category, model names from a shapenet dataset.
     '''
+
     def model_names(model_path):
         """ Return model names"""
-        model_names =  [name for name in os.listdir(model_path)
-                        if os.path.isdir(os.path.join(model_path, name))]
+        model_names = [name for name in os.listdir(model_path)
+                       if os.path.isdir(os.path.join(model_path, name))]
         return model_names
 
     category_name_pair = []  # full path of the objs files
@@ -63,10 +33,8 @@ def model_names(model_path):
         models = model_names(model_path)
         num_models = len(models)
 
-        portioned_models = models[
-            int(num_models * dataset_portion[0]):
-            int(num_models * dataset_portion[1])
-        ]
+        portioned_models = models[int(num_models * dataset_portion[0]):int(num_models *
+                                                                           dataset_portion[1])]
 
         category_name_pair.extend([(cat['id'], model_id) for model_id in portioned_models])
 
@@ -84,46 +52,4 @@ def get_voxel_file(category, model_id):
 
 
 def get_rendering_file(category, model_id, rendering_id):
-    return os.path.join(cfg.DIR.RENDERING_PATH % (category, model_id),
-                        '%02d.png' % rendering_id)
-
-
-def get_voc2012_imglist():
-    """Retrieves list of PASCAL image that can be used for random background."""
-    whitelist_img = set()  # Set of class-safe images to return.
-    blacklist_img = set()
-    classes_path = os.path.join(cfg.PASCAL.VOC2012_DIR, cfg.PASCAL.CLASSES_DIR)
-    # Parse all class definition files for each class.
-    for c in cfg.PASCAL.BLACKLIST_CLASSES:
-        for file_name in cfg.PASCAL.CLASSES_FILES:
-            class_file = os.path.join(classes_path, c + file_name)
-            with open(class_file) as f:
-                for line in f.readlines():
-                    image_file, class_exists = line.rstrip().split()
-                    # Add image_file to whitelist if it doesn't have any of the
-                    # blacklisted class in it.
-                    if class_exists == '-1' and image_file not in blacklist_img:
-                        whitelist_img.add(image_file)
-                    else:
-                        whitelist_img.discard(image_file)
-                        blacklist_img.add(image_file)
-    # Return full path of whitelisted image files.
-    return [os.path.join(cfg.PASCAL.VOC2012_DIR, cfg.PASCAL.IMGS_DIR,
-                         img + '.jpg') for img in whitelist_img]
-
-
-def get_voc2012_eval_metadata(is_train=True):
-    """Retrieves PASCAL dataset evaluation metadata.
-
-    Returns tuple of ('data', 'label').
-    'label' is an integer vector indexing 'classes'.
-    'data' is a struct array with following perperties:
-    ['imsize', 'voc_image_id', 'voc_rec_id', 'pascal_bbox', 'view', 'kps',
-     'part_names', 'bbox', 'poly_x', 'poly_y', 'class', 'flip', 'rotP3d',
-     'euler', 'subtype', 'objectIndP3d']
-    """
-    metadata = scipy.io.loadmat(cfg.PASCAL3D.EVAL_METADATA, squeeze_me=True)
-    if is_train:
-        return (metadata['train_data'], metadata['train_label'])
-    else:
-        return (metadata['test_data'], metadata['test_label'])
+    return os.path.join(cfg.DIR.RENDERING_PATH % (category, model_id), '%02d.png' % rendering_id)
@@ -4,7 +4,6 @@
 import _init_paths
 
 import sys
-import os
 import time
 import theano
 import numpy as np
@@ -15,7 +14,7 @@
 
 from lib.config import cfg
 from lib.data_augmentation import preprocess_img
-from lib.data_io import get_model_file, get_voxel_file, get_rendering_file
+from lib.data_io import get_voxel_file, get_rendering_file
 
 import tools.binvox_rw as binvox_rw
 
@@ -105,11 +104,7 @@ def load_label(self, path):
 
 class ReconstructionDataProcess(DataProcess):
 
-    def __init__(self,
-                 data_queue,
-                 category_model_pair,
-                 background_imgs=[],
-                 repeat=True,
+    def __init__(self, data_queue, category_model_pair, background_imgs=[], repeat=True,
                  train=True):
         self.repeat = repeat
         self.train = train