diff --git a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py index 55f657a3a4..918c755032 100644 --- a/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py +++ b/nerfstudio/process_data/colmap_converter_to_nerfstudio_dataset.py @@ -31,10 +31,10 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset): camera_type: Literal["perspective", "fisheye", "equirectangular", "pinhole", "simple_pinhole"] = "perspective" """Camera model to use.""" - matching_method: Literal["exhaustive", "sequential", "vocab_tree"] = "vocab_tree" + matching_method: Literal["exhaustive", "sequential", "vocab_tree", "spatial"] = "vocab_tree" """Feature matching method to use. Vocab tree is recommended for a balance of speed and accuracy. Exhaustive is slower but more accurate. Sequential is faster but - should only be used for videos.""" + should only be used for videos. Spatial can leverage EXIF GPS priors for pairing.""" sfm_tool: Literal["any", "colmap", "hloc"] = "any" """Structure from motion tool to use. Colmap will use sift features, hloc can use many modern methods such as superpoint features and superglue matcher""" @@ -104,6 +104,26 @@ class ColmapConverterToNerfstudioDataset(BaseConverterToNerfstudioDataset): use_single_camera_mode: bool = True """Whether to assume all images taken with the same camera characteristics, set to False for multiple cameras in colmap (only works with hloc sfm_tool). """ + # New options for pose priors and alignment + use_pose_prior: bool = False + """If True, use EXIF pose priors by running pose_prior_mapper and optionally align to priors.""" + prior_position_std: float = 2.0 + """Standard deviation (meters) for x/y/z prior used by pose_prior_mapper.""" + overwrite_priors_covariance: bool = True + """Whether to overwrite priors covariance in database when running pose_prior_mapper.""" + align_model_to_priors: bool = False + """If True, run model_aligner to align the reconstruction to GPS priors (writes back into sparse/0).""" + alignment_max_error: Optional[float] = None + """Max alignment error for model_aligner. Defaults to prior_position_std if not set.""" + # Normalization options + normalize_model: bool = False + """If True, apply model_transformer to center and scale the reconstructed model for numeric stability.""" + normalization_center: Literal["bbox", "mean"] = "bbox" + """How to compute the model center for normalization (bbox center or mean point).""" + normalization_target_diagonal: float = 4.0 + """Target diagonal length (meters) for the normalized model if no explicit scale is provided.""" + normalization_scale: Optional[float] = None + """Explicit normalization scale; if set, overrides normalization_target_diagonal.""" @staticmethod def default_colmap_path() -> Path: @@ -219,6 +239,15 @@ def _run_colmap(self, mask_path: Optional[Path] = None): matching_method=self.matching_method, refine_intrinsics=self.refine_intrinsics, colmap_cmd=self.colmap_cmd, + use_pose_prior=self.use_pose_prior, + prior_position_std=self.prior_position_std, + overwrite_priors_covariance=self.overwrite_priors_covariance, + align_model_to_priors=self.align_model_to_priors, + alignment_max_error=self.alignment_max_error, + normalize_model=self.normalize_model, + normalization_center=self.normalization_center, + normalization_target_diagonal=self.normalization_target_diagonal, + normalization_scale=self.normalization_scale, ) elif sfm_tool == "hloc": if mask_path is not None: @@ -227,12 +256,14 @@ def _run_colmap(self, mask_path: Optional[Path] = None): assert feature_type is not None assert matcher_type is not None assert matcher_type != "NN" # Only used for colmap. + # hloc does not support 'spatial' matching_method; map it to 'vocab_tree' for compatibility + hloc_matching_method = self.matching_method if self.matching_method != "spatial" else "vocab_tree" hloc_utils.run_hloc( image_dir=image_dir, colmap_dir=self.absolute_colmap_path, camera_model=CAMERA_MODELS[self.camera_type], verbose=self.verbose, - matching_method=self.matching_method, + matching_method=hloc_matching_method, feature_type=feature_type, matcher_type=matcher_type, refine_pixsfm=self.refine_pixsfm, diff --git a/nerfstudio/process_data/colmap_utils.py b/nerfstudio/process_data/colmap_utils.py index 1d9405c81a..0fe05a6662 100644 --- a/nerfstudio/process_data/colmap_utils.py +++ b/nerfstudio/process_data/colmap_utils.py @@ -96,9 +96,18 @@ def run_colmap( camera_mask_path: Optional[Path] = None, gpu: bool = True, verbose: bool = False, - matching_method: Literal["vocab_tree", "exhaustive", "sequential"] = "vocab_tree", + matching_method: Literal["vocab_tree", "exhaustive", "sequential", "spatial"] = "vocab_tree", refine_intrinsics: bool = True, colmap_cmd: str = "colmap", + use_pose_prior: bool = False, + prior_position_std: float = 2.0, + overwrite_priors_covariance: bool = True, + align_model_to_priors: bool = False, + alignment_max_error: Optional[float] = None, + normalize_model: bool = False, + normalization_center: Literal["bbox", "mean"] = "bbox", + normalization_target_diagonal: float = 4.0, + normalization_scale: Optional[float] = None, ) -> None: """Runs COLMAP on the images. @@ -112,6 +121,15 @@ def run_colmap( matching_method: Matching method to use. refine_intrinsics: If True, refine intrinsics. colmap_cmd: Path to the COLMAP executable. + use_pose_prior: If True, use pose_prior_mapper to incorporate EXIF pose priors. + prior_position_std: Prior position standard deviation in meters for x/y/z. + overwrite_priors_covariance: If True, overwrite priors covariance in database when mapping. + align_model_to_priors: If True, run model_aligner to align the reconstruction to GPS priors. + alignment_max_error: Max alignment error (falls back to prior_position_std if None). + normalize_model: If True, apply a similarity transform to center and scale the model with model_transformer. + normalization_center: How to compute center (bbox center or mean point). + normalization_target_diagonal: Target diagonal length (meters) to scale the model to (if normalization_scale not given). + normalization_scale: Explicit scale factor. If provided, overrides normalization_target_diagonal. """ colmap_version = get_colmap_version(colmap_cmd) @@ -126,7 +144,7 @@ def run_colmap( f"--image_path {image_dir}", "--ImageReader.single_camera 1", f"--ImageReader.camera_model {camera_model.value}", - f"--SiftExtraction.use_gpu {int(gpu)}", + # f"--SiftExtraction.use_gpu={bool(gpu)}", ] if camera_mask_path is not None: feature_extractor_cmd.append(f"--ImageReader.camera_mask_path {camera_mask_path}") @@ -140,7 +158,7 @@ def run_colmap( feature_matcher_cmd = [ f"{colmap_cmd} {matching_method}_matcher", f"--database_path {colmap_dir / 'database.db'}", - f"--SiftMatching.use_gpu {int(gpu)}", + # f"--SiftMatching.use_gpu={bool(gpu)}", ] if matching_method == "vocab_tree": vocab_tree_filename = get_vocab_tree() @@ -150,19 +168,32 @@ def run_colmap( run_command(feature_matcher_cmd, verbose=verbose) CONSOLE.log("[bold green]:tada: Done matching COLMAP features.") - # Bundle adjustment + # Mapping / bundle adjustment sparse_dir = colmap_dir / "sparse" sparse_dir.mkdir(parents=True, exist_ok=True) - mapper_cmd = [ - f"{colmap_cmd} mapper", + + # Choose mapper variant + mapper_command_name = "pose_prior_mapper" if use_pose_prior else "mapper" + + mapper_cmd_parts = [ + f"{colmap_cmd} {mapper_command_name}", f"--database_path {colmap_dir / 'database.db'}", f"--image_path {image_dir}", f"--output_path {sparse_dir}", ] - if colmap_version >= Version("3.7"): - mapper_cmd.append("--Mapper.ba_global_function_tolerance=1e-6") - mapper_cmd = " ".join(mapper_cmd) + if not use_pose_prior and colmap_version >= Version("3.7"): + mapper_cmd_parts.append("--Mapper.ba_global_function_tolerance=1e-6") + + if use_pose_prior: + # Set symmetric priors std for x/y/z and optionally overwrite covariance + mapper_cmd_parts.append(f"--prior_position_std_x {prior_position_std}") + mapper_cmd_parts.append(f"--prior_position_std_y {prior_position_std}") + mapper_cmd_parts.append(f"--prior_position_std_z {prior_position_std}") + if overwrite_priors_covariance: + mapper_cmd_parts.append("--overwrite_priors_covariance 1") + + mapper_cmd = " ".join(mapper_cmd_parts) with status( msg="[bold yellow]Running COLMAP bundle adjustment... (This may take a while)", @@ -172,6 +203,64 @@ def run_colmap( run_command(mapper_cmd, verbose=verbose) CONSOLE.log("[bold green]:tada: Done COLMAP bundle adjustment.") + # Optional alignment to GPS priors; write back into sparse/0 to keep downstream unchanged + if align_model_to_priors: + align_cmd_parts = [ + f"{colmap_cmd} model_aligner", + f"--input_path {sparse_dir}/0", + f"--output_path {sparse_dir}/0", + f"--database_path {colmap_dir / 'database.db'}", + ] + max_err = alignment_max_error if alignment_max_error is not None else prior_position_std + align_cmd_parts.append(f"--alignment_max_error {max_err}") + align_cmd = " ".join(align_cmd_parts) + with status(msg="[bold yellow]Aligning model to pose priors...", spinner="dots", verbose=verbose): + run_command(align_cmd, verbose=verbose) + CONSOLE.log("[bold green]:tada: Done aligning model to pose priors.") + + # Optional normalization to human scale and centered coordinates using model_transformer + if normalize_model: + recon_dir = sparse_dir / "0" + try: + ptid_to_info = read_points3D_binary(recon_dir / "points3D.bin") + except Exception as e: + CONSOLE.print(f"[bold yellow]Warning: Could not read points3D for normalization: {e}") + ptid_to_info = {} + if len(ptid_to_info) == 0: + CONSOLE.print("[bold yellow]Warning: No 3D points to estimate normalization. Skipping normalization.") + else: + import numpy as np # local import to avoid overhead unless needed + + pts = np.array([p.xyz for p in ptid_to_info.values()], dtype=np.float64) + if normalization_center == "mean": + Cx, Cy, Cz = pts.mean(axis=0).tolist() + else: + mins = pts.min(axis=0) + maxs = pts.max(axis=0) + Cx, Cy, Cz = ((mins + maxs) * 0.5).tolist() + diag = float(np.linalg.norm(pts.max(axis=0) - pts.min(axis=0))) + if normalization_scale is not None: + s = float(normalization_scale) + else: + eps = 1e-9 + s = float(normalization_target_diagonal) / max(diag, eps) + # Forward transform desired: x' = s * (x - C) = s*x + t, with t = -s*C + tx, ty, tz = (-s * Cx, -s * Cy, -s * Cz) + # Write transform in format: scale qw qx qy qz tx ty tz (identity rotation) + transform_path = recon_dir / "normalization_transform.txt" + with open(transform_path, "w", encoding="utf-8") as f: + f.write(f"{s:.12g} 1 0 0 0 {tx:.12g} {ty:.12g} {tz:.12g}\n") + transform_cmd_parts = [ + f"{colmap_cmd} model_transformer", + f"--input_path {recon_dir}", + f"--output_path {recon_dir}", + f"--transform_path {transform_path}", + ] + transform_cmd = " ".join(transform_cmd_parts) + with status(msg="[bold yellow]Normalizing model scale and center...", spinner="dots", verbose=verbose): + run_command(transform_cmd, verbose=verbose) + CONSOLE.log("[bold green]:tada: Done normalizing model (model_transformer).") + if refine_intrinsics: with status(msg="[bold yellow]Refine intrinsics...", spinner="dqpb", verbose=verbose): bundle_adjuster_cmd = [ diff --git a/nerfstudio/process_data/process_data_utils.py b/nerfstudio/process_data/process_data_utils.py index 75878d6a5f..8ad7f02192 100644 --- a/nerfstudio/process_data/process_data_utils.py +++ b/nerfstudio/process_data/process_data_utils.py @@ -294,19 +294,30 @@ def copy_images_list( pass copied_image_paths.append(copied_image_path) + # Early return: if there is no transformation/downscale requested, avoid re-encoding to preserve EXIF + no_transform_requested = ( + num_downscales == 0 + and crop_border_pixels is None + and (crop_factor == (0.0, 0.0, 0.0, 0.0)) + and upscale_factor is None + and same_dimensions + ) + if no_transform_requested: + if len(image_paths) == 0: + CONSOLE.log("[bold red]:skull: No usable images in the data folder.") + else: + CONSOLE.log(f"[bold green]:tada: Done copying images with prefix '{image_prefix}'.") + return copied_image_paths + nn_flag = "" if not nearest_neighbor else ":flags=neighbor" + # Build downscale graph labels. We will decide later whether to emit [out0] (base) depending on whether base transform is needed. downscale_chains = [f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i}]" for i in range(num_downscales + 1)] downscale_dirs = [Path(str(image_dir) + (f"_{2**i}" if i > 0 else "")) for i in range(num_downscales + 1)] for dir in downscale_dirs: dir.mkdir(parents=True, exist_ok=True) - downscale_chain = ( - f"split={num_downscales + 1}" - + "".join([f"[t{i}]" for i in range(num_downscales + 1)]) - + ";" - + ";".join(downscale_chains) - ) + # We will construct the split size and outputs later per-frame based on whether [out0] is needed num_frames = len(image_paths) # ffmpeg batch commands assume all images are the same dimensions. @@ -330,13 +341,58 @@ def copy_images_list( if upscale_factor is not None: select_cmd = f"[0:v]scale=iw*{upscale_factor}:ih*{upscale_factor}:flags=neighbor[upscaled];[upscaled]" - downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_chain}"' + "".join( + downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}"' + "".join( [ - f' -map "[out{i}]" -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"' + f' -map "[out{i}]" "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"' for i in range(num_downscales + 1) ] ) + # Decide whether to overwrite base images ([out0]) + need_transform_base = ( + crop_border_pixels is not None + or (crop_factor != (0.0, 0.0, 0.0, 0.0)) + or upscale_factor is not None + or not same_dimensions + ) + + # Build filter graph: if base not needed, split only into downscaled outputs [out1..outN]; otherwise include [out0] + if num_downscales > 0: + if need_transform_base: + split_targets = [f"[t{i}]" for i in range(num_downscales + 1)] # include base + chains = ";".join(downscale_chains) # [out0..outN] + downscale_graph = f"split={num_downscales + 1}" + "".join(split_targets) + ";" + chains + downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"' + mapping_entries = [ + f' -map "[out0]" -map_metadata 0 -q:v 2 "{downscale_dirs[0] / f"{framename}{copied_image_paths[0].suffix}"}"' + ] + for i in range(1, num_downscales + 1): + mapping_entries.append( + f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{downscale_dirs[i] / f"{framename}{copied_image_paths[0].suffix}"}"' + ) + downscale_cmd += "".join(mapping_entries) + else: + # Only emit downscaled outputs; reindex to start from out0 to avoid gaps and empty maps + # Build chains for i=1..N, then relabel [out{i}] -> [out{i-1}] via mapping labels + split_targets = [f"[t{i}]" for i in range(1, num_downscales + 1)] + chains = ";".join( + [f"[t{i}]scale=iw/{2**i}:ih/{2**i}{nn_flag}[out{i - 1}]" for i in range(1, num_downscales + 1)] + ) + downscale_graph = f"split={num_downscales}" + "".join(split_targets) + ";" + chains + downscale_cmd = f' -filter_complex "{select_cmd}{crop_cmd}{downscale_graph}"' + mapping_entries = [] + for i in range(num_downscales): + # map out{i} to images_{2**(i+1)} + out_dir = downscale_dirs[i + 1] + mapping_entries.append( + f' -map "[out{i}]" -map_metadata 0 -q:v 2 "{out_dir / f"{framename}{copied_image_paths[0].suffix}"}"' + ) + downscale_cmd += "".join(mapping_entries) + else: + # No downscales requested but we got here due to other transforms; keep single output + downscale_graph = "" + downscale_cmd = "" + ffmpeg_cmd += downscale_cmd if verbose: CONSOLE.log(f"... {ffmpeg_cmd}") diff --git a/nerfstudio/scripts/exporter.py b/nerfstudio/scripts/exporter.py index c26797a5c8..9fea81857e 100644 --- a/nerfstudio/scripts/exporter.py +++ b/nerfstudio/scripts/exporter.py @@ -558,7 +558,7 @@ def main(self) -> None: if not self.output_dir.exists(): self.output_dir.mkdir(parents=True) - _, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference") + config, pipeline, _, _ = eval_setup(self.load_config, test_mode="inference") assert isinstance(pipeline.model, SplatfactoModel) @@ -648,6 +648,88 @@ def main(self) -> None: ExportGaussianSplat.write_ply(str(filename), count, map_to_tensors) + # Write geo_transforms.json containing inverse dataparser and normalization transforms + try: + run_dir = Path(self.load_config).parent # where config.yml resides + dp_path = run_dir / "dataparser_transforms.json" + geo = {} + if dp_path.exists(): + with open(dp_path, "r", encoding="utf-8") as f: + dp = json.load(f) + T_dp = np.array(dp["transform"], dtype=np.float64) # 3x4 + R_dp = T_dp[:, :3] + t_dp = T_dp[:, 3] + s_dp = float(dp["scale"]) if "scale" in dp else 1.0 + R_dp_inv = (R_dp.T) / s_dp + t_dp_inv = -R_dp.T @ t_dp + M_dp_inv = np.eye(4, dtype=np.float64) + M_dp_inv[:3, :3] = R_dp_inv + M_dp_inv[:3, 3] = t_dp_inv + geo["dataparser_inverse"] = {"matrix": M_dp_inv.tolist(), "scale": s_dp} + # Normalization transform saved by processing: /colmap/sparse/0/normalization_transform.txt + # Find dataset root from config + dataset_root = None + try: + dataset_root = Path(config.pipeline.datamanager.dataparser.data) # type: ignore[attr-defined] + except Exception: + dataset_root = None + norm_txt = None + if dataset_root is not None: + candidate = dataset_root / "colmap" / "sparse" / "0" / "normalization_transform.txt" + if candidate.exists(): + norm_txt = candidate + if norm_txt is not None and norm_txt.exists(): + vals = np.loadtxt(norm_txt).astype(np.float64).flatten().tolist() + if len(vals) >= 8: + s0, qw, qx, qy, qz, tx, ty, tz = vals[:8] + # Build R from quaternion (qw, qx, qy, qz) + q = np.array([qw, qx, qy, qz], dtype=np.float64) + # Normalize quaternion + q = q / (np.linalg.norm(q) + 1e-12) + w, x, y, z = q + R = np.array( + [ + [1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)], + [2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)], + [2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)], + ], + dtype=np.float64, + ) + t = np.array([tx, ty, tz], dtype=np.float64) + R_norm_inv = (R.T) / s0 + t_norm_inv = -(R.T @ t) / s0 + M_norm_inv = np.eye(4, dtype=np.float64) + M_norm_inv[:3, :3] = R_norm_inv + M_norm_inv[:3, 3] = t_norm_inv + geo["normalization_inverse"] = { + "matrix": M_norm_inv.tolist(), + "scale": s0, + "quaternion": [qw, qx, qy, qz], + "translation": [tx, ty, tz], + } + # Composite: train -> ecef = M_norm_inv @ M_dp_inv + if "dataparser_inverse" in geo and "normalization_inverse" in geo: + M_dp_inv = np.array(geo["dataparser_inverse"]["matrix"], dtype=np.float64) + M_norm_inv = np.array(geo["normalization_inverse"]["matrix"], dtype=np.float64) + M_comp = M_norm_inv @ M_dp_inv + geo["composite_train_to_ecef"] = {"matrix": M_comp.tolist()} + # Also provide dataparser-style (transform 3x4 + scale) + A = M_comp[:3, :3] + b = M_comp[:3, 3] + # robust uniform scale estimation + row_norms = np.array([np.linalg.norm(A[i, :]) for i in range(3)]) + s_comp = float(np.maximum(row_norms.mean(), 1e-12)) + R_out = (A / s_comp).tolist() + t_out = (b / s_comp).tolist() + transform_3x4 = [R_out[0] + [t_out[0]], R_out[1] + [t_out[1]], R_out[2] + [t_out[2]]] + geo["composite_train_to_ecef_dataparser"] = {"transform": transform_3x4, "scale": s_comp} + out_geo = self.output_dir / "geo_transforms.json" + with open(out_geo, "w", encoding="utf-8") as f: + json.dump(geo, f, indent=2) + CONSOLE.print(f"[bold green]:white_check_mark: Saved geo transforms to {out_geo}") + except Exception as e: + CONSOLE.print(f"[bold yellow]Warning: Failed to write geo_transforms.json: {e}") + Commands = tyro.conf.FlagConversionOff[ Union[ diff --git a/nerfstudio/utils/scripts.py b/nerfstudio/utils/scripts.py index cba07e53f9..f887dd704a 100644 --- a/nerfstudio/utils/scripts.py +++ b/nerfstudio/utils/scripts.py @@ -34,8 +34,17 @@ def run_command(cmd: str, verbose=False) -> Optional[str]: CONSOLE.rule("[bold red] :skull: :skull: :skull: ERROR :skull: :skull: :skull: ", style="red") CONSOLE.print(f"[bold red]Error running command: {cmd}") CONSOLE.rule(style="red") - CONSOLE.print(out.stderr.decode("utf-8")) + if out.stderr is not None: + try: + CONSOLE.print(out.stderr.decode("utf-8")) + except Exception: + CONSOLE.print(str(out.stderr)) + else: + CONSOLE.print("Process returned non-zero exit code. See above logs for details.") sys.exit(1) if out.stdout is not None: - return out.stdout.decode("utf-8") - return out + try: + return out.stdout.decode("utf-8") + except Exception: + return str(out.stdout) + return None