From a37c73fb07be61e7daba3aff643cbd7a7399d1a0 Mon Sep 17 00:00:00 2001
From: Pablo Vela <pablovela5620@gmail.com>
Date: Sun, 12 Feb 2023 12:31:31 -0600
Subject: [PATCH] sdfstudio dataparser/dataset (#1381)

* add downloads for sdfstudio datasets

* add sdfstudio parser to work with sdfstudio data

* fix licensing and doc strings

* fix linter errors

* move depths/normals to metadata, create sdfdataset

* remove extra scenebox parameters

* more linting errors fix

* fix wrong annotation

* add in missing depth/normal files

* minor fixes

---------
---
 .../data/datamanagers/base_datamanager.py     |   2 +
 .../data/dataparsers/sdfstudio_dataparser.py  | 158 ++++++++++++++++++
 nerfstudio/data/datasets/sdf_dataset.py       |  97 +++++++++++
 scripts/downloads/download_data.py            |  70 ++++++++
 4 files changed, 327 insertions(+)
 create mode 100644 nerfstudio/data/dataparsers/sdfstudio_dataparser.py
 create mode 100644 nerfstudio/data/datasets/sdf_dataset.py

diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py
index d032c4d483..1c8c3e08d7 100644
--- a/nerfstudio/data/datamanagers/base_datamanager.py
+++ b/nerfstudio/data/datamanagers/base_datamanager.py
@@ -49,6 +49,7 @@
 from nerfstudio.data.dataparsers.phototourism_dataparser import (
     PhototourismDataParserConfig,
 )
+from nerfstudio.data.dataparsers.sdfstudio_dataparser import SDFStudioDataParserConfig
 from nerfstudio.data.datasets.base_dataset import InputDataset
 from nerfstudio.data.pixel_samplers import EquirectangularPixelSampler, PixelSampler
 from nerfstudio.data.utils.dataloaders import (
@@ -75,6 +76,7 @@
             "dnerf-data": DNeRFDataParserConfig(),
             "phototourism-data": PhototourismDataParserConfig(),
             "dycheck-data": DycheckDataParserConfig(),
+            "sdfstudio-data": SDFStudioDataParserConfig(),
         },
         prefix_names=False,  # Omit prefixes in subcommands themselves.
     )
diff --git a/nerfstudio/data/dataparsers/sdfstudio_dataparser.py b/nerfstudio/data/dataparsers/sdfstudio_dataparser.py
new file mode 100644
index 0000000000..f25c8f3e7c
--- /dev/null
+++ b/nerfstudio/data/dataparsers/sdfstudio_dataparser.py
@@ -0,0 +1,158 @@
+# Copyright 2022 The Nerfstudio Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Datapaser for sdfstudio formatted data"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Type
+
+import torch
+from rich.console import Console
+
+from nerfstudio.cameras import camera_utils
+from nerfstudio.cameras.cameras import Cameras, CameraType
+from nerfstudio.data.dataparsers.base_dataparser import (
+    DataParser,
+    DataParserConfig,
+    DataparserOutputs,
+)
+from nerfstudio.data.scene_box import SceneBox
+from nerfstudio.utils.io import load_from_json
+
+CONSOLE = Console()
+
+
+@dataclass
+class SDFStudioDataParserConfig(DataParserConfig):
+    """Scene dataset parser config"""
+
+    _target: Type = field(default_factory=lambda: SDFStudio)
+    """target class to instantiate"""
+    data: Path = Path("data/DTU/scan65")
+    """Directory specifying location of data."""
+    include_mono_prior: bool = False
+    """whether or not to load monocular depth and normal """
+    include_foreground_mask: bool = False
+    """whether or not to load foreground mask"""
+    downscale_factor: int = 1
+    scene_scale: float = 2.0
+    """
+    Sets the bounding cube to have edge length of this size.
+    The longest dimension of the Friends axis-aligned bbox will be scaled to this value.
+    """
+    skip_every_for_val_split: int = 1
+    """sub sampling validation images"""
+    auto_orient: bool = False
+
+
+@dataclass
+class SDFStudio(DataParser):
+    """SDFStudio Dataset"""
+
+    config: SDFStudioDataParserConfig
+
+    def _generate_dataparser_outputs(self, split="train"):  # pylint: disable=unused-argument,too-many-statements
+        # load meta data
+        meta = load_from_json(self.config.data / "meta_data.json")
+
+        indices = list(range(len(meta["frames"])))
+        # subsample to avoid out-of-memory for validation set
+        if split != "train" and self.config.skip_every_for_val_split >= 1:
+            indices = indices[:: self.config.skip_every_for_val_split]
+
+        image_filenames = []
+        depth_filenames = []
+        normal_filenames = []
+        transform = None
+        fx = []
+        fy = []
+        cx = []
+        cy = []
+        camera_to_worlds = []
+        for i, frame in enumerate(meta["frames"]):
+            if i not in indices:
+                continue
+
+            image_filename = self.config.data / frame["rgb_path"]
+            depth_filename = self.config.data / frame["mono_depth_path"]
+            normal_filename = self.config.data / frame["mono_normal_path"]
+
+            intrinsics = torch.tensor(frame["intrinsics"])
+            camtoworld = torch.tensor(frame["camtoworld"])
+
+            # append data
+            image_filenames.append(image_filename)
+            depth_filenames.append(depth_filename)
+            normal_filenames.append(normal_filename)
+            fx.append(intrinsics[0, 0])
+            fy.append(intrinsics[1, 1])
+            cx.append(intrinsics[0, 2])
+            cy.append(intrinsics[1, 2])
+            camera_to_worlds.append(camtoworld)
+
+        fx = torch.stack(fx)
+        fy = torch.stack(fy)
+        cx = torch.stack(cx)
+        cy = torch.stack(cy)
+        camera_to_worlds = torch.stack(camera_to_worlds)
+
+        # Convert from COLMAP's/OPENCV's camera coordinate system to nerfstudio
+        camera_to_worlds[:, 0:3, 1:3] *= -1
+
+        if self.config.auto_orient:
+            camera_to_worlds, transform = camera_utils.auto_orient_and_center_poses(
+                camera_to_worlds,
+                method="up",
+                center_poses=False,
+            )
+
+        # scene box from meta data
+        meta_scene_box = meta["scene_box"]
+        aabb = torch.tensor(meta_scene_box["aabb"], dtype=torch.float32)
+        scene_box = SceneBox(
+            aabb=aabb,
+        )
+
+        height, width = meta["height"], meta["width"]
+        cameras = Cameras(
+            fx=fx,
+            fy=fy,
+            cx=cx,
+            cy=cy,
+            height=height,
+            width=width,
+            camera_to_worlds=camera_to_worlds[:, :3, :4],
+            camera_type=CameraType.PERSPECTIVE,
+        )
+
+        # TODO supports downsample
+        # cameras.rescale_output_resolution(scaling_factor=1.0 / self.config.downscale_factor)
+
+        assert meta["has_mono_prior"] == self.config.include_mono_prior, f"no mono prior in {self.config.data}"
+
+        dataparser_outputs = DataparserOutputs(
+            image_filenames=image_filenames,
+            cameras=cameras,
+            scene_box=scene_box,
+            metadata={
+                "depth_filenames": depth_filenames if len(depth_filenames) > 0 else None,
+                "normal_filenames": normal_filenames if len(normal_filenames) > 0 else None,
+                "transform": transform,
+                "camera_to_worlds": camera_to_worlds if len(camera_to_worlds) > 0 else None,
+                "include_mono_prior": self.config.include_mono_prior,
+            },
+        )
+        return dataparser_outputs
diff --git a/nerfstudio/data/datasets/sdf_dataset.py b/nerfstudio/data/datasets/sdf_dataset.py
new file mode 100644
index 0000000000..48e0cff6d0
--- /dev/null
+++ b/nerfstudio/data/datasets/sdf_dataset.py
@@ -0,0 +1,97 @@
+# Copyright 2022 The Nerfstudio Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+SDFStudio dataset.
+"""
+
+from pathlib import Path
+from typing import Dict
+
+import numpy as np
+import torch
+
+from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs
+from nerfstudio.data.datasets.base_dataset import InputDataset
+
+
+class SDFDataset(InputDataset):
+    """Dataset that returns images and depths.
+
+    Args:
+        dataparser_outputs: description of where and how to read input images.
+        scale_factor: The scaling factor for the dataparser outputs.
+    """
+
+    def __init__(self, dataparser_outputs: DataparserOutputs, scale_factor: float = 1.0):
+        super().__init__(dataparser_outputs, scale_factor)
+
+        # can be none if monoprior not included
+        self.depth_filenames = self.metadata["depth_filenames"]
+        self.normal_filenames = self.metadata["normal_filenames"]
+        self.camera_to_worlds = self.metadata["camera_to_worlds"]
+        # can be none if auto orient not enabled in dataparser
+        self.transform = self.metadata["normal_filenames"]
+        self.include_mono_prior = self.metadata["include_mono_prior"]
+
+    def get_metadata(self, data: Dict) -> Dict:
+        # TODO supports foreground_masks
+        metadata = {}
+        if self.include_mono_prior:
+            depth_filepath = self.depth_filenames[data["image_idx"]]
+            normal_filepath = self.normal_filenames[data["image_idx"]]
+            camtoworld = self.camera_to_worlds[data["image_idx"]]
+
+            # Scale depth images to meter units and also by scaling applied to cameras
+            depth_image, normal_image = self.get_depths_and_normals(
+                depth_filepath=depth_filepath, normal_filename=normal_filepath, camtoworld=camtoworld
+            )
+            metadata["depth_image"] = depth_image
+            metadata["normal_image"] = normal_image
+
+        return metadata
+
+    def get_depths_and_normals(self, depth_filepath: Path, normal_filename: Path, camtoworld: np.ndarray):
+        """function to process additional depths and normal information
+        Args:
+            depth_filepath: path to depth file
+            normal_filename: path to normal file
+            camtoworld: camera to world transformation matrix
+        """
+
+        # load mono depth
+        depth = np.load(depth_filepath)
+        depth = torch.from_numpy(depth).float()
+
+        # load mono normal
+        normal = np.load(normal_filename)
+
+        # transform normal to world coordinate system
+        normal = normal * 2.0 - 1.0  # omnidata output is normalized so we convert it back to normal here
+        normal = torch.from_numpy(normal).float()
+
+        rot = camtoworld[:3, :3]
+
+        normal_map = normal.reshape(3, -1)
+        normal_map = torch.nn.functional.normalize(normal_map, p=2, dim=0)
+
+        normal_map = rot @ normal_map
+        normal = normal_map.permute(1, 0).reshape(*normal.shape[1:], 3)
+
+        if self.transform is not None:
+            h, w, _ = normal.shape
+            normal = self.transform[:3, :3] @ normal.reshape(-1, 3).permute(1, 0)
+            normal = normal.permute(1, 0).reshape(h, w, 3)
+
+        return depth, normal
diff --git a/scripts/downloads/download_data.py b/scripts/downloads/download_data.py
index 89c77391f5..b6775a3392 100755
--- a/scripts/downloads/download_data.py
+++ b/scripts/downloads/download_data.py
@@ -287,6 +287,75 @@ def download(self, save_dir: Path):
         os.remove(download_path)
 
 
+# credit to https://autonomousvision.github.io/sdfstudio/
+# pylint: disable=line-too-long
+sdfstudio_downloads = {
+    "sdfstudio-demo-data": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/sdfstudio-demo-data.tar",
+    "dtu": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/DTU.tar",
+    "replica": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/Replica.tar",
+    "scannet": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/scannet.tar",
+    "tanks-and-temple": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/tnt_advanced.tar",
+    "tanks-and-temple-highres": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/highresTNT.tar",
+    "heritage": "https://s3.eu-central-1.amazonaws.com/avg-projects/monosdf/data/Heritage-Recon.tar",
+    "neural-rgbd-data": "http://kaldir.vc.in.tum.de/neural_rgbd/neural_rgbd_data.zip",
+    "all": None,
+}
+
+SDFstudioCaptureName = tyro.extras.literal_type_from_choices(sdfstudio_downloads.keys())
+
+
+@dataclass
+class SDFstudioDemoDownload(DatasetDownload):
+    """Download the sdfstudio dataset."""
+
+    dataset_name: SDFstudioCaptureName = "sdfstudio-demo-data"
+
+    def download(self, save_dir: Path):
+        """Download the D-NeRF dataset (https://github.com/albertpumarola/D-NeRF)."""
+        # TODO: give this code the same structure as download_nerfstudio
+
+        if self.dataset_name == "all":
+            for dataset_name in sdfstudio_downloads:
+                if dataset_name != "all":
+                    SDFstudioDemoDownload(dataset_name=dataset_name).download(save_dir)
+            return
+
+        assert (
+            self.dataset_name in sdfstudio_downloads
+        ), f"Capture name {self.dataset_name} not found in {sdfstudio_downloads.keys()}"
+
+        url = sdfstudio_downloads[self.dataset_name]
+
+        target_path = str(save_dir / self.dataset_name)
+        os.makedirs(target_path, exist_ok=True)
+
+        file_format = url[-4:]
+
+        download_path = Path(f"{target_path}{file_format}")
+        tmp_path = str(save_dir / ".temp")
+        shutil.rmtree(tmp_path, ignore_errors=True)
+        os.makedirs(tmp_path, exist_ok=True)
+
+        os.system(f"curl -L {url} > {download_path}")
+        if file_format == ".tar":
+            with tarfile.open(download_path, "r") as tar_ref:
+                tar_ref.extractall(str(tmp_path))
+        elif file_format == ".zip":
+            with zipfile.ZipFile(download_path, "r") as zip_ref:
+                zip_ref.extractall(str(target_path))
+            return
+        else:
+            raise NotImplementedError
+
+        inner_folders = os.listdir(tmp_path)
+        assert len(inner_folders) == 1, "There is more than one folder inside this zip file."
+        folder = os.path.join(tmp_path, inner_folders[0])
+        shutil.rmtree(target_path)
+        shutil.move(folder, target_path)
+        shutil.rmtree(tmp_path)
+        os.remove(download_path)
+
+
 Commands = Union[
     Annotated[BlenderDownload, tyro.conf.subcommand(name="blender")],
     Annotated[FriendsDownload, tyro.conf.subcommand(name="friends")],
@@ -294,6 +363,7 @@ def download(self, save_dir: Path):
     Annotated[Record3dDownload, tyro.conf.subcommand(name="record3d")],
     Annotated[DNerfDownload, tyro.conf.subcommand(name="dnerf")],
     Annotated[PhototourismDownload, tyro.conf.subcommand(name="phototourism")],
+    Annotated[SDFstudioDemoDownload, tyro.conf.subcommand(name="sdfstudio")],
 ]