From 07d98bb410554dfa5f48244b98f38db04db2d097 Mon Sep 17 00:00:00 2001 From: Matt Tancik Date: Sun, 11 Dec 2022 12:36:03 -0800 Subject: [PATCH 1/2] Save out dataparser transforms --- nerfstudio/cameras/camera_utils.py | 12 +- .../data/datamanagers/base_datamanager.py | 7 +- .../data/dataparsers/base_dataparser.py | 19 +++ .../data/dataparsers/blender_dataparser.py | 1 + .../data/dataparsers/dnerf_dataparser.py | 6 +- .../data/dataparsers/friends_dataparser.py | 1 + .../dataparsers/instant_ngp_dataparser.py | 1 + .../data/dataparsers/nerfstudio_dataparser.py | 9 +- .../dataparsers/phototourism_dataparser.py | 9 +- .../data/dataparsers/record3d_dataparser.py | 160 ------------------ nerfstudio/engine/trainer.py | 4 + 11 files changed, 54 insertions(+), 175 deletions(-) delete mode 100644 nerfstudio/data/dataparsers/record3d_dataparser.py diff --git a/nerfstudio/cameras/camera_utils.py b/nerfstudio/cameras/camera_utils.py index 97599d239e..d3b0ca619e 100644 --- a/nerfstudio/cameras/camera_utils.py +++ b/nerfstudio/cameras/camera_utils.py @@ -409,7 +409,7 @@ def rotation_matrix(a: TensorType[3], b: TensorType[3]) -> TensorType[3, 3]: def auto_orient_and_center_poses( poses: TensorType["num_poses":..., 4, 4], method: Literal["pca", "up", "none"] = "up", center_poses: bool = True -) -> TensorType["num_poses":..., 3, 4]: +) -> Tuple[TensorType["num_poses":..., 3, 4], TensorType[4, 4]]: """Orients and centers the poses. We provide two methods for orientation: pca and up. pca: Orient the poses so that the principal component of the points is aligned with the axes. @@ -424,7 +424,7 @@ def auto_orient_and_center_poses( center_poses: If True, the poses are centered around the origin. Returns: - The oriented poses. + Tuple of the oriented poses and the transform matrix. """ translation = poses[..., :3, 3] @@ -457,7 +457,9 @@ def auto_orient_and_center_poses( transform = torch.cat([rotation, rotation @ -translation[..., None]], dim=-1) oriented_poses = transform @ poses elif method == "none": - oriented_poses = poses[:, :3] - oriented_poses[..., 3] -= translation + transform = torch.eye(4) + transform[:3, 3] = -translation + transform = transform[:3, :] + oriented_poses = transform @ poses - return oriented_poses + return oriented_poses, transform diff --git a/nerfstudio/data/datamanagers/base_datamanager.py b/nerfstudio/data/datamanagers/base_datamanager.py index 7adf8cd053..7f1282c87c 100644 --- a/nerfstudio/data/datamanagers/base_datamanager.py +++ b/nerfstudio/data/datamanagers/base_datamanager.py @@ -35,6 +35,7 @@ from nerfstudio.cameras.cameras import CameraType from nerfstudio.cameras.rays import RayBundle from nerfstudio.configs.base_config import InstantiateConfig +from nerfstudio.data.dataparsers.base_dataparser import DataparserOutputs from nerfstudio.data.dataparsers.blender_dataparser import BlenderDataParserConfig from nerfstudio.data.dataparsers.dnerf_dataparser import DNeRFDataParserConfig from nerfstudio.data.dataparsers.friends_dataparser import FriendsDataParserConfig @@ -46,7 +47,6 @@ from nerfstudio.data.dataparsers.phototourism_dataparser import ( PhototourismDataParserConfig, ) -from nerfstudio.data.dataparsers.record3d_dataparser import Record3DDataParserConfig from nerfstudio.data.datasets.base_dataset import InputDataset from nerfstudio.data.pixel_samplers import EquirectangularPixelSampler, PixelSampler from nerfstudio.data.utils.dataloaders import ( @@ -69,7 +69,6 @@ "friends-data": FriendsDataParserConfig(), "instant-ngp-data": InstantNGPDataParserConfig(), "nuscenes-data": NuScenesDataParserConfig(), - "record3d-data": Record3DDataParserConfig(), "dnerf-data": DNeRFDataParserConfig(), "phototourism-data": PhototourismDataParserConfig(), }, @@ -296,6 +295,7 @@ class VanillaDataManager(DataManager): # pylint: disable=abstract-method config: VanillaDataManagerConfig train_dataset: InputDataset eval_dataset: InputDataset + train_dataparser_outputs: DataparserOutputs def __init__( self, @@ -321,8 +321,9 @@ def __init__( def create_train_dataset(self) -> InputDataset: """Sets up the data loaders for training""" + self.train_dataparser_outputs = self.dataparser.get_dataparser_outputs(split="train") return InputDataset( - dataparser_outputs=self.dataparser.get_dataparser_outputs(split="train"), + dataparser_outputs=self.train_dataparser_outputs, scale_factor=self.config.camera_res_scale_factor, ) diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py index 3b99f274e5..358193857a 100644 --- a/nerfstudio/data/dataparsers/base_dataparser.py +++ b/nerfstudio/data/dataparsers/base_dataparser.py @@ -16,6 +16,7 @@ from __future__ import annotations +import json from abc import abstractmethod from dataclasses import dataclass, field from pathlib import Path @@ -63,11 +64,29 @@ class DataparserOutputs: """Dictionary of any metadata that be required for the given experiment. Will be processed by the InputDataset to create any additional tensors that may be required. """ + dataparser_transform: TensorType[3, 4] = torch.eye(4)[:3, :] + """Transform applied by the dataparser.""" + dataparser_scale: float = 1.0 + """Scale applied by the dataparser.""" def as_dict(self) -> dict: """Returns the dataclass as a dictionary.""" return vars(self) + def save_dataparser_transform(self, path: Path): + """Save dataparser transform to json file. Some dataparsers will apply a transform to the poses, + this method allows the transform to be saved so that it can be used in other applications. + + Args: + path: path to save transform to + """ + data = { + "transform": self.dataparser_transform.tolist(), + "scale": self.dataparser_scale, + } + with open(path, "w", encoding="UTF-8") as file: + json.dump(data, file, indent=4) + @dataclass class DataParserConfig(cfg.InstantiateConfig): diff --git a/nerfstudio/data/dataparsers/blender_dataparser.py b/nerfstudio/data/dataparsers/blender_dataparser.py index 0f31acff1e..f1044e8265 100644 --- a/nerfstudio/data/dataparsers/blender_dataparser.py +++ b/nerfstudio/data/dataparsers/blender_dataparser.py @@ -104,6 +104,7 @@ def _generate_dataparser_outputs(self, split="train"): cameras=cameras, alpha_color=alpha_color_tensor, scene_box=scene_box, + dataparser_scale=self.scale_factor, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/dnerf_dataparser.py b/nerfstudio/data/dataparsers/dnerf_dataparser.py index 56a9f232b0..c26621e924 100644 --- a/nerfstudio/data/dataparsers/dnerf_dataparser.py +++ b/nerfstudio/data/dataparsers/dnerf_dataparser.py @@ -102,7 +102,11 @@ def _generate_dataparser_outputs(self, split="train"): ) dataparser_outputs = DataparserOutputs( - image_filenames=image_filenames, cameras=cameras, alpha_color=alpha_color_tensor, scene_box=scene_box + image_filenames=image_filenames, + cameras=cameras, + alpha_color=alpha_color_tensor, + scene_box=scene_box, + dataparser_scale=self.scale_factor, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/friends_dataparser.py b/nerfstudio/data/dataparsers/friends_dataparser.py index 06fceef3c1..604dbd7c02 100644 --- a/nerfstudio/data/dataparsers/friends_dataparser.py +++ b/nerfstudio/data/dataparsers/friends_dataparser.py @@ -146,5 +146,6 @@ def _generate_dataparser_outputs(self, split="train"): # pylint: disable=unused cameras=cameras, scene_box=scene_box, metadata={"semantics": semantics} if self.config.include_semantics else {}, + dataparser_scale=scale, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/instant_ngp_dataparser.py b/nerfstudio/data/dataparsers/instant_ngp_dataparser.py index 1e5a69b996..289e23d558 100644 --- a/nerfstudio/data/dataparsers/instant_ngp_dataparser.py +++ b/nerfstudio/data/dataparsers/instant_ngp_dataparser.py @@ -115,6 +115,7 @@ def _generate_dataparser_outputs(self, split="train"): image_filenames=image_filenames, cameras=cameras, scene_box=scene_box, + dataparser_scale=self.config.scene_scale, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py index 347a292a57..d2c8c2d2e0 100644 --- a/nerfstudio/data/dataparsers/nerfstudio_dataparser.py +++ b/nerfstudio/data/dataparsers/nerfstudio_dataparser.py @@ -181,7 +181,7 @@ def _generate_dataparser_outputs(self, split="train"): orientation_method = self.config.orientation_method poses = torch.from_numpy(np.array(poses).astype(np.float32)) - poses = camera_utils.auto_orient_and_center_poses( + poses, transform_matrix = camera_utils.auto_orient_and_center_poses( poses, method=orientation_method, center_poses=self.config.center_poses, @@ -190,9 +190,10 @@ def _generate_dataparser_outputs(self, split="train"): # Scale poses scale_factor = 1.0 if self.config.auto_scale_poses: - scale_factor /= torch.max(torch.abs(poses[:, :3, 3])) + scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3]))) + scale_factor *= self.config.scale_factor - poses[:, :3, 3] *= scale_factor * self.config.scale_factor + poses[:, :3, 3] *= scale_factor # Choose image_filenames and poses based on split, but after auto orient and scaling the poses. image_filenames = [image_filenames[i] for i in indices] @@ -252,6 +253,8 @@ def _generate_dataparser_outputs(self, split="train"): cameras=cameras, scene_box=scene_box, mask_filenames=mask_filenames if len(mask_filenames) > 0 else None, + dataparser_scale=scale_factor, + dataparser_transform=transform_matrix, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/phototourism_dataparser.py b/nerfstudio/data/dataparsers/phototourism_dataparser.py index f0f6987e05..bb401d9dc6 100644 --- a/nerfstudio/data/dataparsers/phototourism_dataparser.py +++ b/nerfstudio/data/dataparsers/phototourism_dataparser.py @@ -137,16 +137,17 @@ def _generate_dataparser_outputs(self, split="train"): else: raise ValueError(f"Unknown dataparser split {split}") - poses = camera_utils.auto_orient_and_center_poses( + poses, transform_matrix = camera_utils.auto_orient_and_center_poses( poses, method=self.config.orientation_method, center_poses=self.config.center_poses ) # Scale poses scale_factor = 1.0 if self.config.auto_scale_poses: - scale_factor /= torch.max(torch.abs(poses[:, :3, 3])) + scale_factor /= float(torch.max(torch.abs(poses[:, :3, 3]))) + scale_factor *= self.config.scale_factor - poses[:, :3, 3] *= scale_factor * self.config.scale_factor + poses[:, :3, 3] *= scale_factor # in x,y,z order # assumes that the scene is centered at the origin @@ -175,6 +176,8 @@ def _generate_dataparser_outputs(self, split="train"): image_filenames=image_filenames, cameras=cameras, scene_box=scene_box, + dataparser_scale=scale_factor, + dataparser_transform=transform_matrix, ) return dataparser_outputs diff --git a/nerfstudio/data/dataparsers/record3d_dataparser.py b/nerfstudio/data/dataparsers/record3d_dataparser.py deleted file mode 100644 index 099470d015..0000000000 --- a/nerfstudio/data/dataparsers/record3d_dataparser.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright 2022 The Nerfstudio Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Data parser for record3d dataset""" -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Type - -import numpy as np -import torch -from rich.console import Console -from scipy.spatial.transform import Rotation -from typing_extensions import Literal - -from nerfstudio.cameras import camera_utils -from nerfstudio.cameras.cameras import Cameras, CameraType -from nerfstudio.data.dataparsers.base_dataparser import ( - DataParser, - DataParserConfig, - DataparserOutputs, -) -from nerfstudio.data.scene_box import SceneBox -from nerfstudio.utils import poses as pose_utils -from nerfstudio.utils.io import load_from_json - -CONSOLE = Console(width=120) - - -@dataclass -class Record3DDataParserConfig(DataParserConfig): - """Record3D dataset config""" - - _target: Type = field(default_factory=lambda: Record3D) - """target class to instantiate""" - data: Path = Path("data/record3d/bear") - """Location of data""" - val_skip: int = 8 - """1/val_skip images to use for validation.""" - aabb_scale: float = 4.0 - """Scene scale.""" - orientation_method: Literal["pca", "up"] = "up" - """The method to use for orientation""" - max_dataset_size: int = 300 - """Max number of images to train on. If the dataset has more, images will be sampled approximately evenly. If -1, - use all images.""" - - -@dataclass -class Record3D(DataParser): - """Record3D Dataset""" - - config: Record3DDataParserConfig - - def _generate_dataparser_outputs(self, split: str = "train") -> DataparserOutputs: - - CONSOLE.print( - "[bold red]DEPRECATION WARNING: The Record3D dataparser will be deprecated in future versions. " - "Use `ns-data-process record3d` to convert the data into the nerfstudio format instead." - ) - - image_dir = self.config.data / "rgb" - - if not image_dir.exists(): - raise ValueError(f"Image directory {image_dir} doesn't exist") - - image_filenames = [] - for f in image_dir.iterdir(): - if f.stem.isdigit(): # removes possible duplicate images (for example, 123(3).jpg) - image_filenames.append(f) - - image_filenames = sorted(image_filenames, key=lambda fn: int(fn.stem)) - image_filenames = np.array(image_filenames) - num_images = len(image_filenames) - - metadata_path = self.config.data / "metadata.json" - metadata_dict = load_from_json(metadata_path) - - poses_data = np.array(metadata_dict["poses"]) - # (N, 3, 4) - poses = np.concatenate( - [Rotation.from_quat(poses_data[:, :4]).as_matrix(), poses_data[:, 4:, None]], - axis=-1, - ).astype(np.float32) - - if self.config.max_dataset_size != -1 and num_images > self.config.max_dataset_size: - # Evenly select max_dataset_size images from dataset, including first - # and last indices. - idx = np.round(np.linspace(0, num_images - 1, self.config.max_dataset_size)).astype(int) - poses = poses[idx] - image_filenames = image_filenames[idx] - num_images = len(image_filenames) - - idx_test = np.arange(num_images)[:: self.config.val_skip] - idx_train = np.array([i for i in np.arange(num_images) if i not in idx_test]) - idx = idx_train if split == "train" else idx_test - if num_images != poses.shape[0]: - raise RuntimeError(f"Different number of images ({num_images}), and poses ({poses.shape[0]})") - - image_filenames = image_filenames[idx] - poses = poses[idx] - - # convert to Tensors - poses = torch.from_numpy(poses[:, :3, :4]) - - poses = camera_utils.auto_orient_and_center_poses( - pose_utils.to4x4(poses), method=self.config.orientation_method - )[:, :3, :4] - - # Centering poses - poses[:, :3, 3] = poses[:, :3, 3] - torch.mean(poses[:, :3, 3], dim=0) - poses = pose_utils.normalize(poses) - - # Camera intrinsics - K = np.array(metadata_dict["K"]).reshape((3, 3)).T - focal_length = K[0, 0] - - H = metadata_dict["h"] - W = metadata_dict["w"] - - # TODO(akristoffersen): The metadata dict comes with principle points, - # but caused errors in image coord indexing. Should update once that is fixed. - cx, cy = W / 2, H / 2 - - num_cameras = len(image_filenames) - num_intrinsics_params = 3 - intrinsics = torch.ones((num_cameras, num_intrinsics_params), dtype=torch.float32) - intrinsics *= torch.tensor([cx, cy, focal_length]) - - aabb = torch.tensor([[-1, -1, -1], [1, 1, 1]], dtype=torch.float32) * self.config.aabb_scale - scene_box = SceneBox(aabb=aabb) - - cameras = Cameras( - fx=focal_length, - fy=focal_length, - cx=cx, - cy=cy, - camera_to_worlds=poses, - camera_type=CameraType.PERSPECTIVE, - ) - - dataparser_outputs = DataparserOutputs( - image_filenames=image_filenames, - cameras=cameras, - scene_box=scene_box, - ) - - return dataparser_outputs diff --git a/nerfstudio/engine/trainer.py b/nerfstudio/engine/trainer.py index ae9da1ec2c..445788fbff 100644 --- a/nerfstudio/engine/trainer.py +++ b/nerfstudio/engine/trainer.py @@ -132,6 +132,10 @@ def train(self) -> None: """Train the model.""" assert self.pipeline.datamanager.train_dataset is not None, "Missing DatsetInputs" + self.pipeline.datamanager.train_dataparser_outputs.save_dataparser_transform( + self.base_dir / "dataparser_transforms.json" + ) + self._init_viewer_state() with TimeWriter(writer, EventName.TOTAL_TRAIN_TIME): num_iterations = self.config.trainer.max_num_iterations From df042b904d8f196726d788349ece291d6811bffd Mon Sep 17 00:00:00 2001 From: Matt Tancik Date: Sun, 11 Dec 2022 12:51:00 -0800 Subject: [PATCH 2/2] Make missing folder --- nerfstudio/data/dataparsers/base_dataparser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nerfstudio/data/dataparsers/base_dataparser.py b/nerfstudio/data/dataparsers/base_dataparser.py index 358193857a..0caf24cea3 100644 --- a/nerfstudio/data/dataparsers/base_dataparser.py +++ b/nerfstudio/data/dataparsers/base_dataparser.py @@ -84,6 +84,8 @@ def save_dataparser_transform(self, path: Path): "transform": self.dataparser_transform.tolist(), "scale": self.dataparser_scale, } + if not path.parent.exists(): + path.parent.mkdir(parents=True) with open(path, "w", encoding="UTF-8") as file: json.dump(data, file, indent=4)