Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):

camera_type: Literal["perspective", "fisheye", "equirectangular", "pinhole", "simple_pinhole"] = "perspective"
"""Camera model to use."""
matching_method: Literal["exhaustive", "sequential", "vocab_tree"] = "vocab_tree"
matching_method: Literal["exhaustive", "sequential", "vocab_tree", "spatial"] = "vocab_tree"
"""Feature matching method to use. Vocab tree is recommended for a balance of speed
and accuracy. Exhaustive is slower but more accurate. Sequential is faster but
should only be used for videos."""
should only be used for videos. Spatial can leverage EXIF GPS priors for pairing."""
sfm_tool: Literal["any", "colmap", "hloc"] = "any"
"""Structure from motion tool to use. Colmap will use sift features, hloc can use
many modern methods such as superpoint features and superglue matcher"""
Expand Down Expand Up @@ -104,6 +104,26 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset):
use_single_camera_mode: bool = True
"""Whether to assume all images taken with the same camera characteristics, set to False for multiple cameras in colmap (only works with hloc sfm_tool).
"""
# New options for pose priors and alignment
use_pose_prior: bool = False
"""If True, use EXIF pose priors by running pose_prior_mapper and optionally align to priors."""
prior_position_std: float = 2.0
"""Standard deviation (meters) for x/y/z prior used by pose_prior_mapper."""
overwrite_priors_covariance: bool = True
"""Whether to overwrite priors covariance in database when running pose_prior_mapper."""
align_model_to_priors: bool = False
"""If True, run model_aligner to align the reconstruction to GPS priors (writes back into sparse/0)."""
alignment_max_error: Optional[float] = None
"""Max alignment error for model_aligner. Defaults to prior_position_std if not set."""
# Normalization options
normalize_model: bool = False
"""If True, apply model_transformer to center and scale the reconstructed model for numeric stability."""
normalization_center: Literal["bbox", "mean"] = "bbox"
"""How to compute the model center for normalization (bbox center or mean point)."""
normalization_target_diagonal: float = 4.0
"""Target diagonal length (meters) for the normalized model if no explicit scale is provided."""
normalization_scale: Optional[float] = None
"""Explicit normalization scale; if set, overrides normalization_target_diagonal."""

@staticmethod
def default_colmap_path() -> Path:
Expand Down Expand Up @@ -219,6 +239,15 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
matching_method=self.matching_method,
refine_intrinsics=self.refine_intrinsics,
colmap_cmd=self.colmap_cmd,
use_pose_prior=self.use_pose_prior,
prior_position_std=self.prior_position_std,
overwrite_priors_covariance=self.overwrite_priors_covariance,
align_model_to_priors=self.align_model_to_priors,
alignment_max_error=self.alignment_max_error,
normalize_model=self.normalize_model,
normalization_center=self.normalization_center,
normalization_target_diagonal=self.normalization_target_diagonal,
normalization_scale=self.normalization_scale,
)
elif sfm_tool == "hloc":
if mask_path is not None:
Expand All @@ -227,12 +256,14 @@ def _run_colmap(self, mask_path: Optional[Path] = None):
assert feature_type is not None
assert matcher_type is not None
assert matcher_type != "NN" # Only used for colmap.
# hloc does not support 'spatial' matching_method; map it to 'vocab_tree' for compatibility
hloc_matching_method = self.matching_method if self.matching_method != "spatial" else "vocab_tree"
hloc_utils.run_hloc(
image_dir=image_dir,
colmap_dir=self.absolute_colmap_path,
camera_model=CAMERA_MODELS[self.camera_type],
verbose=self.verbose,
matching_method=self.matching_method,
matching_method=hloc_matching_method,
feature_type=feature_type,
matcher_type=matcher_type,
refine_pixsfm=self.refine_pixsfm,
Expand Down
107 changes: 98 additions & 9 deletions nerfstudio/process_data/colmap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,18 @@ def run_colmap(
camera_mask_path: Optional[Path] = None,
gpu: bool = True,
verbose: bool = False,
matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree",
matching_method: Literal["vocab_tree", "exhaustive", "sequential", "spatial"] = "vocab_tree",
refine_intrinsics: bool = True,
colmap_cmd: str = "colmap",
use_pose_prior: bool = False,
prior_position_std: float = 2.0,
overwrite_priors_covariance: bool = True,
align_model_to_priors: bool = False,
alignment_max_error: Optional[float] = None,
normalize_model: bool = False,
normalization_center: Literal["bbox", "mean"] = "bbox",
normalization_target_diagonal: float = 4.0,
normalization_scale: Optional[float] = None,
) -> None:
"""Runs COLMAP on the images.

Expand All @@ -112,6 +121,15 @@ def run_colmap(
matching_method: Matching method to use.
refine_intrinsics: If True, refine intrinsics.
colmap_cmd: Path to the COLMAP executable.
use_pose_prior: If True, use pose_prior_mapper to incorporate EXIF pose priors.
prior_position_std: Prior position standard deviation in meters for x/y/z.
overwrite_priors_covariance: If True, overwrite priors covariance in database when mapping.
align_model_to_priors: If True, run model_aligner to align the reconstruction to GPS priors.
alignment_max_error: Max alignment error (falls back to prior_position_std if None).
normalize_model: If True, apply a similarity transform to center and scale the model with model_transformer.
normalization_center: How to compute center (bbox center or mean point).
normalization_target_diagonal: Target diagonal length (meters) to scale the model to (if normalization_scale not given).
normalization_scale: Explicit scale factor. If provided, overrides normalization_target_diagonal.
"""

colmap_version = get_colmap_version(colmap_cmd)
Expand All @@ -126,7 +144,7 @@ def run_colmap(
f"--image_path {image_dir}",
"--ImageReader.single_camera 1",
f"--ImageReader.camera_model {camera_model.value}",
f"--SiftExtraction.use_gpu {int(gpu)}",
# f"--SiftExtraction.use_gpu={bool(gpu)}",
]
if camera_mask_path is not None:
feature_extractor_cmd.append(f"--ImageReader.camera_mask_path {camera_mask_path}")
Expand All @@ -140,7 +158,7 @@ def run_colmap(
feature_matcher_cmd = [
f"{colmap_cmd} {matching_method}_matcher",
f"--database_path {colmap_dir / 'database.db'}",
f"--SiftMatching.use_gpu {int(gpu)}",
# f"--SiftMatching.use_gpu={bool(gpu)}",
]
if matching_method == "vocab_tree":
vocab_tree_filename = get_vocab_tree()
Expand All @@ -150,19 +168,32 @@ def run_colmap(
run_command(feature_matcher_cmd, verbose=verbose)
CONSOLE.log("[bold green]:tada: Done matching COLMAP features.")

# Bundle adjustment
# Mapping / bundle adjustment
sparse_dir = colmap_dir / "sparse"
sparse_dir.mkdir(parents=True, exist_ok=True)
mapper_cmd = [
f"{colmap_cmd} mapper",

# Choose mapper variant
mapper_command_name = "pose_prior_mapper" if use_pose_prior else "mapper"

mapper_cmd_parts = [
f"{colmap_cmd} {mapper_command_name}",
f"--database_path {colmap_dir / 'database.db'}",
f"--image_path {image_dir}",
f"--output_path {sparse_dir}",
]
if colmap_version >= Version("3.7"):
mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6")

mapper_cmd = " ".join(mapper_cmd)
if not use_pose_prior and colmap_version >= Version("3.7"):
mapper_cmd_parts.append("--Mapper.ba_global_function_tolerance=1e-6")

if use_pose_prior:
# Set symmetric priors std for x/y/z and optionally overwrite covariance
mapper_cmd_parts.append(f"--prior_position_std_x {prior_position_std}")
mapper_cmd_parts.append(f"--prior_position_std_y {prior_position_std}")
mapper_cmd_parts.append(f"--prior_position_std_z {prior_position_std}")
if overwrite_priors_covariance:
mapper_cmd_parts.append("--overwrite_priors_covariance 1")

mapper_cmd = " ".join(mapper_cmd_parts)

with status(
msg="[bold yellow]Running COLMAP bundle adjustment... (This may take a while)",
Expand All @@ -172,6 +203,64 @@ def run_colmap(
run_command(mapper_cmd, verbose=verbose)
CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.")

# Optional alignment to GPS priors; write back into sparse/0 to keep downstream unchanged
if align_model_to_priors:
align_cmd_parts = [
f"{colmap_cmd} model_aligner",
f"--input_path {sparse_dir}/0",
f"--output_path {sparse_dir}/0",
f"--database_path {colmap_dir / 'database.db'}",
]
max_err = alignment_max_error if alignment_max_error is not None else prior_position_std
align_cmd_parts.append(f"--alignment_max_error {max_err}")
align_cmd = " ".join(align_cmd_parts)
with status(msg="[bold yellow]Aligning model to pose priors...", spinner="dots", verbose=verbose):
run_command(align_cmd, verbose=verbose)
CONSOLE.log("[bold green]:tada: Done aligning model to pose priors.")

# Optional normalization to human scale and centered coordinates using model_transformer
if normalize_model:
recon_dir = sparse_dir / "0"
try:
ptid_to_info = read_points3D_binary(recon_dir / "points3D.bin")
except Exception as e:
CONSOLE.print(f"[bold yellow]Warning: Could not read points3D for normalization: {e}")
ptid_to_info = {}
if len(ptid_to_info) == 0:
CONSOLE.print("[bold yellow]Warning: No 3D points to estimate normalization. Skipping normalization.")
else:
import numpy as np # local import to avoid overhead unless needed

pts = np.array([p.xyz for p in ptid_to_info.values()], dtype=np.float64)
if normalization_center == "mean":
Cx, Cy, Cz = pts.mean(axis=0).tolist()
else:
mins = pts.min(axis=0)
maxs = pts.max(axis=0)
Cx, Cy, Cz = ((mins + maxs) * 0.5).tolist()
diag = float(np.linalg.norm(pts.max(axis=0) - pts.min(axis=0)))
if normalization_scale is not None:
s = float(normalization_scale)
else:
eps = 1e-9
s = float(normalization_target_diagonal) / max(diag, eps)
# Forward transform desired: x' = s * (x - C) = s*x + t, with t = -s*C
tx, ty, tz = (-s * Cx, -s * Cy, -s * Cz)
# Write transform in format: scale qw qx qy qz tx ty tz (identity rotation)
transform_path = recon_dir / "normalization_transform.txt"
with open(transform_path, "w", encoding="utf-8") as f:
f.write(f"{s:.12g} 1 0 0 0 {tx:.12g} {ty:.12g} {tz:.12g}\n")
transform_cmd_parts = [
f"{colmap_cmd} model_transformer",
f"--input_path {recon_dir}",
f"--output_path {recon_dir}",
f"--transform_path {transform_path}",
]
transform_cmd = " ".join(transform_cmd_parts)
with status(msg="[bold yellow]Normalizing model scale and center...", spinner="dots", verbose=verbose):
run_command(transform_cmd, verbose=verbose)
CONSOLE.log("[bold green]:tada: Done normalizing model (model_transformer).")

if refine_intrinsics:
with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose):
bundle_adjuster_cmd = [
Expand Down
72 changes: 64 additions & 8 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,19 +294,30 @@ def copy_images_list(
pass
copied_image_paths.append(copied_image_path)

# Early return: if there is no transformation/downscale requested, avoid re-encoding to preserve EXIF
no_transform_requested = (
num_downscales == 0
and crop_border_pixels is None
and (crop_factor == (0.0, 0.0, 0.0, 0.0))
and upscale_factor is None
and same_dimensions
)
if no_transform_requested:
if len(image_paths) == 0:
CONSOLE.log("[bold red]:skull: No usable images in the data folder.")
else:
CONSOLE.log(f"[bold green]:tada: Done copying images with prefix '{image_prefix}'.")
return copied_image_paths

nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
# Build downscale graph labels. We will decide later whether to emit [out0] (base) depending on whether base transform is needed.
downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i}]" for i in range(num_downscales + 1)]
downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)]

for dir in downscale_dirs:
dir.mkdir(parents=True, exist_ok=True)

downscale_chain = (
f"split={num_downscales + 1}"
+ "".join([f"[t{i}]" for i in range(num_downscales + 1)])
+ ";"
+ ";".join(downscale_chains)
)
# We will construct the split size and outputs later per-frame based on whether [out0] is needed

num_frames = len(image_paths)
# ffmpeg batch commands assume all images are the same dimensions.
Expand All @@ -330,13 +341,58 @@ def copy_images_list(
if upscale_factor is not None:
select_cmd = f"[0:v]scale=iw*{upscale_factor}:ih*{upscale_factor}:flags=neighbor[upscaled];[upscaled]"

downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_chain}"' + "".join(
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}"' + "".join(
[
f' -map "[out{i}]" -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"'
f' -map "[out{i}]" "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"'
for i in range(num_downscales + 1)
]
)

# Decide whether to overwrite base images ([out0])
need_transform_base = (
crop_border_pixels is not None
or (crop_factor != (0.0, 0.0, 0.0, 0.0))
or upscale_factor is not None
or not same_dimensions
)

# Build filter graph: if base not needed, split only into downscaled outputs [out1..outN]; otherwise include [out0]
if num_downscales > 0:
if need_transform_base:
split_targets = [f"[t{i}]" for i in range(num_downscales + 1)] # include base
chains = ";".join(downscale_chains) # [out0..outN]
downscale_graph = f"split={num_downscales + 1}" + "".join(split_targets) + ";" + chains
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"'
mapping_entries = [
f' -map "[out0]" -map_metadata 0 -q:v 2 "{downscale_dirs[0] / f"{framename}{copied_image_paths[0].suffix}"}"'
]
for i in range(1, num_downscales + 1):
mapping_entries.append(
f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"'
)
downscale_cmd += "".join(mapping_entries)
else:
# Only emit downscaled outputs; reindex to start from out0 to avoid gaps and empty maps
# Build chains for i=1..N, then relabel [out{i}] -> [out{i-1}] via mapping labels
split_targets = [f"[t{i}]" for i in range(1, num_downscales + 1)]
chains = ";".join(
[f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i - 1}]" for i in range(1, num_downscales + 1)]
)
downscale_graph = f"split={num_downscales}" + "".join(split_targets) + ";" + chains
downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"'
mapping_entries = []
for i in range(num_downscales):
# map out{i} to images_{2**(i+1)}
out_dir = downscale_dirs[i + 1]
mapping_entries.append(
f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{out_dir / f"{framename}{copied_image_paths[0].suffix}"}"'
)
downscale_cmd += "".join(mapping_entries)
else:
# No downscales requested but we got here due to other transforms; keep single output
downscale_graph = ""
downscale_cmd = ""

ffmpeg_cmd += downscale_cmd
if verbose:
CONSOLE.log(f"... {ffmpeg_cmd}")
Expand Down
Loading