Selection of the best colmap sparse model

When using the "exhaustive" method to perform matching, COLMAP generates N sparse models, always trying to improve upon the previous result. By default, the original code always selects the best first model ("colmap/sparse/0"), which is not always the best one. The modification proposes a scan of the "colmap/sparse" directory to locate all models, analyze the number of frames found in each one, and then save the "transforms.json" file based on the best model.
nerfstudio-project · Oct 25, 2024 · a9f1cf3 · a9f1cf3
1 parent cef9589
commit a9f1cf3
Show file tree

Hide file tree

Showing 3 changed files with 204 additions and 46 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py
@@ -14,12 +14,15 @@
 
 """Base class to processes a video or image sequence to a nerfstudio compatible dataset."""
 
+import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Literal, Optional, Tuple
 
-from nerfstudio.process_data import colmap_utils, hloc_utils, process_data_utils
-from nerfstudio.process_data.base_converter_to_nerfstudio_dataset import BaseConverterToNerfstudioDataset
+from nerfstudio.process_data import (colmap_utils, hloc_utils,
+                                     process_data_utils)
+from nerfstudio.process_data.base_converter_to_nerfstudio_dataset import \
+    BaseConverterToNerfstudioDataset
 from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
 from nerfstudio.utils import install_checks
 from nerfstudio.utils.rich_utils import CONSOLE
@@ -29,7 +32,9 @@
 class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):
     """Base class to process images or video into a nerfstudio dataset using colmap"""
 
-    camera_type: Literal["perspective", "fisheye", "equirectangular", "pinhole", "simple_pinhole"] = "perspective"
+    camera_type: Literal[
+        "perspective", "fisheye", "equirectangular", "pinhole", "simple_pinhole"
+    ] = "perspective"
     """Camera model to use."""
     matching_method: Literal["exhaustive", "sequential", "vocab_tree"] = "vocab_tree"
     """Feature matching method to use. Vocab tree is recommended for a balance of speed
@@ -130,23 +135,49 @@ def _save_transforms(
             image_id_to_depth_path: When including sfm-based depth, embed these depth file paths in the exported json
             image_rename_map: Use these image names instead of the names embedded in the COLMAP db
         """
+
         summary_log = []
-        if (self.absolute_colmap_model_path / "cameras.bin").exists():
-            with CONSOLE.status("[bold yellow]Saving results to transforms.json", spinner="balloon"):
+        models_path = self.absolute_colmap_path.joinpath("sparse")
+        if os.path.exists(models_path):
+            matched_frame_counts = []
+            models = os.listdir(str(self.absolute_colmap_path.joinpath("sparse")))
+            for model in models:
+                if (models_path / model / "cameras.bin").exists():
+                    model_path = models_path / model
+                    num_matched_frames = colmap_utils.colmap_to_json(
+                        recon_dir=model_path,
+                        output_dir=self.output_dir,
+                        image_id_to_depth_path=image_id_to_depth_path,
+                        camera_mask_path=camera_mask_path,
+                        image_rename_map=image_rename_map,
+                        use_single_camera_mode=self.use_single_camera_mode,
+                        save_output=False,
+                    )
+                    matched_frame_counts.append(num_matched_frames)
+                else:
+                    matched_frame_counts.append(-1)
+
+            best_model_index = matched_frame_counts.index(max(matched_frame_counts))
+            best_model_path = models_path / models[best_model_index]
+            with CONSOLE.status(
+                "[bold yellow]Saving results to transforms.json", spinner="balloon"
+            ):
                 num_matched_frames = colmap_utils.colmap_to_json(
-                    recon_dir=self.absolute_colmap_model_path,
+                    recon_dir=best_model_path,
                     output_dir=self.output_dir,
                     image_id_to_depth_path=image_id_to_depth_path,
                     camera_mask_path=camera_mask_path,
                     image_rename_map=image_rename_map,
                     use_single_camera_mode=self.use_single_camera_mode,
                 )
                 summary_log.append(f"Colmap matched {num_matched_frames} images")
-            summary_log.append(colmap_utils.get_matching_summary(num_frames, num_matched_frames))
-
+            summary_log.append(
+                colmap_utils.get_matching_summary(num_frames, num_matched_frames)
+            )
         else:
             CONSOLE.log(
-                "[bold yellow]Warning: Could not find existing COLMAP results. " "Not generating transforms.json"
+                "[bold yellow]Warning: Could not find existing COLMAP results. "
+                "Not generating transforms.json"
             )
         return summary_log
 
@@ -202,7 +233,9 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
 
         # check that sfm_tool is hloc if using use_single_camera_mode
         if not self.use_single_camera_mode:
-            assert sfm_tool == "hloc", "not_use_single_camera_mode only works with sfm_tool hloc"
+            assert (
+                sfm_tool == "hloc"
+            ), "not_use_single_camera_mode only works with sfm_tool hloc"
 
         # set the image_dir if didn't copy
         if self.skip_image_processing:
@@ -224,7 +257,10 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
             )
         elif sfm_tool == "hloc":
             if mask_path is not None:
-                raise RuntimeError("Cannot use a mask with hloc. Please remove the cropping options " "and try again.")
+                raise RuntimeError(
+                    "Cannot use a mask with hloc. Please remove the cropping options "
+                    "and try again."
+                )
 
             assert feature_type is not None
             assert matcher_type is not None
@@ -241,7 +277,10 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
                 use_single_camera_mode=self.use_single_camera_mode,
             )
         else:
-            raise RuntimeError("Invalid combination of sfm_tool, feature_type, and matcher_type, " "exiting")
+            raise RuntimeError(
+                "Invalid combination of sfm_tool, feature_type, and matcher_type, "
+                "exiting"
+            )
 
     def __post_init__(self) -> None:
         super().__post_init__()