Make colmapDataParser compatible with 360_v2 dataset format (nerfstudio-project#2860)

Jing1Ling · Jing · web-flow · commit 26804f812490 · 2024-04-08T12:07:21.000-04:00
* added an option to colmapdataparser to round up the image size when downscaling

* add round mode and update ffmpeg command

* [fix] wrong variable order

* update format

---------

Co-authored-by: Jing &lt;jing1ling@intel.com&gt;
diff --git a/nerfstudio/cameras/cameras.py b/nerfstudio/cameras/cameras.py
@@ -984,12 +984,15 @@ def get_intrinsics_matrices(self) -> Float[Tensor, "*num_cameras 3 3"]:
         return K
 
     def rescale_output_resolution(
-        self, scaling_factor: Union[Shaped[Tensor, "*num_cameras"], Shaped[Tensor, "*num_cameras 1"], float, int]
+        self,
+        scaling_factor: Union[Shaped[Tensor, "*num_cameras"], Shaped[Tensor, "*num_cameras 1"], float, int],
+        scale_rounding_mode: str = "floor",
     ) -> None:
         """Rescale the output resolution of the cameras.
 
         Args:
             scaling_factor: Scaling factor to apply to the output resolution.
+            scale_rounding_mode: round down or round up when calculating the scaled image height and width
         """
         if isinstance(scaling_factor, (float, int)):
             scaling_factor = torch.tensor([scaling_factor]).to(self.device).broadcast_to((self.cx.shape))
@@ -1006,5 +1009,14 @@ def rescale_output_resolution(
         self.fy = self.fy * scaling_factor
         self.cx = self.cx * scaling_factor
         self.cy = self.cy * scaling_factor
-        self.height = (self.height * scaling_factor).to(torch.int64)
-        self.width = (self.width * scaling_factor).to(torch.int64)
+        if scale_rounding_mode == "floor":
+            self.height = (self.height * scaling_factor).to(torch.int64)
+            self.width = (self.width * scaling_factor).to(torch.int64)
+        elif scale_rounding_mode == "round":
+            self.height = torch.floor(0.5 + (self.height * scaling_factor)).to(torch.int64)
+            self.width = torch.floor(0.5 + (self.width * scaling_factor)).to(torch.int64)
+        elif scale_rounding_mode == "ceil":
+            self.height = torch.ceil(self.height * scaling_factor).to(torch.int64)
+            self.width = torch.ceil(self.width * scaling_factor).to(torch.int64)
+        else:
+            raise ValueError("Scale rounding mode must be 'floor', 'round' or 'ceil'.")
diff --git a/nerfstudio/data/dataparsers/colmap_dataparser.py b/nerfstudio/data/dataparsers/colmap_dataparser.py
@@ -15,6 +15,7 @@
 
 from __future__ import annotations
 
+import math
 import sys
 from dataclasses import dataclass, field
 from functools import partial
@@ -56,6 +57,8 @@ class ColmapDataParserConfig(DataParserConfig):
     """How much to scale the camera origins by."""
     downscale_factor: Optional[int] = None
     """How much to downscale images. If not set, images are chosen such that the max dimension is <1600px."""
+    downscale_rounding_mode: Literal["floor", "round", "ceil"] = "floor"
+    """How to round downscale image height and Image width."""
     scene_scale: float = 1.0
     """How much to scale the region of interest by."""
     orientation_method: Literal["pca", "up", "vertical", "none"] = "up"
@@ -355,7 +358,9 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
             camera_type=camera_type,
         )
 
-        cameras.rescale_output_resolution(scaling_factor=1.0 / downscale_factor)
+        cameras.rescale_output_resolution(
+            scaling_factor=1.0 / downscale_factor, scale_rounding_mode=self.config.downscale_rounding_mode
+        )
 
         if "applied_transform" in meta:
             applied_transform = torch.tensor(meta["applied_transform"], dtype=transform_matrix.dtype)
@@ -452,18 +457,39 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca
             out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0)
         return out
 
-    def _downscale_images(self, paths, get_fname, downscale_factor: int, nearest_neighbor: bool = False):
+    def _downscale_images(
+        self,
+        paths,
+        get_fname,
+        downscale_factor: int,
+        downscale_rounding_mode: str = "floor",
+        nearest_neighbor: bool = False,
+    ):
+        def calculate_scaled_size(original_width, original_height, downscale_factor, mode="floor"):
+            if mode == "floor":
+                return math.floor(original_width / downscale_factor), math.floor(original_height / downscale_factor)
+            elif mode == "round":
+                return round(original_width / downscale_factor), round(original_height / downscale_factor)
+            elif mode == "ceil":
+                return math.ceil(original_width / downscale_factor), math.ceil(original_height / downscale_factor)
+            else:
+                raise ValueError("Invalid mode. Choose from 'floor', 'round', or 'ceil'.")
+
         with status(msg="[bold yellow]Downscaling images...", spinner="growVertical"):
             assert downscale_factor > 1
             assert isinstance(downscale_factor, int)
+            filepath = next(iter(paths))
+            img = Image.open(filepath)
+            w, h = img.size
+            w_scaled, h_scaled = calculate_scaled_size(w, h, downscale_factor, downscale_rounding_mode)
             # Using %05d ffmpeg commands appears to be unreliable (skips images).
             for path in paths:
                 nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
                 path_out = get_fname(path)
                 path_out.parent.mkdir(parents=True, exist_ok=True)
                 ffmpeg_cmd = [
                     f'ffmpeg -y -noautorotate -i "{path}" ',
-                    f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ",
+                    f"-q:v 2 -vf scale={w_scaled}:{h_scaled}{nn_flag} ",
                     f'"{path_out}"',
                 ]
                 ffmpeg_cmd = " ".join(ffmpeg_cmd)
@@ -488,7 +514,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
         if self._downscale_factor is None:
             if self.config.downscale_factor is None:
                 test_img = Image.open(filepath)
-                h, w = test_img.size
+                w, h = test_img.size
                 max_res = max(h, w)
                 df = 0
                 while True:
@@ -508,12 +534,17 @@ def get_fname(parent: Path, filepath: Path) -> Path:
                 CONSOLE.print(
                     f"[bold red]Downscaled images do not exist for factor of {self._downscale_factor}.[/bold red]"
                 )
-                if Confirm.ask("\nWould you like to downscale the images now?", default=False, console=CONSOLE):
+                if Confirm.ask(
+                    f"\nWould you like to downscale the images using '{self.config.downscale_rounding_mode}' rounding mode now?",
+                    default=False,
+                    console=CONSOLE,
+                ):
                     # Install the method
                     self._downscale_images(
                         image_filenames,
                         partial(get_fname, self.config.data / self.config.images_path),
                         self._downscale_factor,
+                        self.config.downscale_rounding_mode,
                         nearest_neighbor=False,
                     )
                     if len(mask_filenames) > 0:
@@ -522,6 +553,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
                             mask_filenames,
                             partial(get_fname, self.config.data / self.config.masks_path),
                             self._downscale_factor,
+                            self.config.downscale_rounding_mode,
                             nearest_neighbor=True,
                         )
                     if len(depth_filenames) > 0:
@@ -530,6 +562,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
                             depth_filenames,
                             partial(get_fname, self.config.data / self.config.depths_path),
                             self._downscale_factor,
+                            self.config.downscale_rounding_mode,
                             nearest_neighbor=True,
                         )
                 else: