Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for RealityCapture #1055

Merged
merged 6 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions nerfstudio/process_data/process_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,29 @@ class CameraModel(Enum):
}


def get_image_filenames(directory: Path, max_num_images: int = -1) -> Tuple[List[Path], int]:
"""Returns a list of image filenames in a directory.

Args:
dir: Path to the directory.
max_num_images: The maximum number of images to return. -1 means no limit.
Returns:
A tuple of A list of image filenames, number of original image paths.
"""
allowed_exts = [".jpg", ".jpeg", ".png", ".tif", ".tiff"]
image_paths = sorted([p for p in directory.glob("[!.]*") if p.suffix.lower() in allowed_exts])
num_orig_images = len(image_paths)

if max_num_images != -1 and num_orig_images > max_num_images:
idx = np.round(np.linspace(0, num_orig_images - 1, max_num_images)).astype(int)
else:
idx = np.arange(num_orig_images)

image_filenames = list(np.array(image_paths)[idx])

return image_filenames, num_orig_images


def get_num_frames_in_video(video: Path) -> int:
"""Returns the number of frames in a video.

Expand Down
119 changes: 119 additions & 0 deletions nerfstudio/process_data/realitycapture_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright 2022 The Nerfstudio Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helper utils for processing polycam data into the nerfstudio format."""

import csv
import json
from pathlib import Path
from typing import List

import numpy as np
from PIL import Image
from rich.console import Console

from nerfstudio.process_data.process_data_utils import CAMERA_MODELS

CONSOLE = Console(width=120)


def realitycapture_to_json(
image_filename_map: List[Path],
csv_filename: Path,
output_dir: Path,
) -> List[str]:
"""Convert RealityCapture data into a nerfstudio dataset.

Args:
image_filenames: List of paths to the original images.
csv_filename: Path to the csv file containing the camera poses.
output_dir: Path to the output directory.
verbose: Whether to print verbose output.

Returns:
Summary of the conversion.
"""
data = {}
data["camera_model"] = CAMERA_MODELS["perspective"].value
# Needs to be a string for camera_utils.auto_orient_and_center_poses
data["orientation_override"] = "none"

frames = []

with open(csv_filename, encoding="UTF-8") as file:
reader = csv.DictReader(file)
cameras = {}
for row in reader:
for column, value in row.items():
cameras.setdefault(column, []).append(value)

img = np.array(Image.open(output_dir / image_filename_map[cameras["#name"][0].split(".")[0]]))
height, width, _ = img.shape

data["h"] = int(height)
data["w"] = int(width)

for i, name in enumerate(cameras["#name"]):
frame = {}
frame["file_path"] = image_filename_map[name.split(".")[0]].as_posix()
frame["fl_x"] = float(cameras["f"][i]) * max(width, height) / 36
frame["fl_y"] = float(cameras["f"][i]) * max(width, height) / 36
# TODO: Unclear how to get the principal point from RealityCapture, here a guess...
frame["cx"] = float(cameras["px"][i]) / 36.0 + width / 2.0
frame["cy"] = float(cameras["py"][i]) / 36.0 + height / 2.0
# TODO: Not sure if RealityCapture uses this distortion model
frame["k1"] = cameras["k1"][i]
frame["k2"] = cameras["k2"][i]
frame["k3"] = cameras["k3"][i]
frame["k4"] = cameras["k4"][i]
frame["p1"] = cameras["t1"][i]
frame["p2"] = cameras["t2"][i]
machenmusik marked this conversation as resolved.
Show resolved Hide resolved

# Transform matrix to nerfstudio format. Please refer to the documentation for coordinate system conventions.
rot = _get_rotation_matrix(-float(cameras["heading"][i]), float(cameras["pitch"][i]), float(cameras["roll"][i]))

transform = np.eye(4)
transform[:3, :3] = rot
transform[:3, 3] = np.array([float(cameras["x"][i]), float(cameras["y"][i]), float(cameras["alt"][i])])

frame["transform_matrix"] = transform.tolist()
frames.append(frame)
data["frames"] = frames

with open(output_dir / "transforms.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=4)

summary = []
if len(frames) < len(image_filename_map):
summary.append(f"Missing camera data for {len(image_filename_map) - len(frames)} frames.")
summary.append(f"Final dataset is {len(frames)} frames.")

return summary


def _get_rotation_matrix(yaw, pitch, roll):
"""Returns a rotation matrix given euler angles."""

s_yaw = np.sin(np.deg2rad(yaw))
c_yaw = np.cos(np.deg2rad(yaw))
s_pitch = np.sin(np.deg2rad(pitch))
c_pitch = np.cos(np.deg2rad(pitch))
s_roll = np.sin(np.deg2rad(roll))
c_roll = np.cos(np.deg2rad(roll))

rot_x = np.array([[1, 0, 0], [0, c_pitch, -s_pitch], [0, s_pitch, c_pitch]])
rot_y = np.array([[c_roll, 0, s_roll], [0, 1, 0], [-s_roll, 0, c_roll]])
rot_z = np.array([[c_yaw, -s_yaw, 0], [s_yaw, c_yaw, 0], [0, 0, 1]])

return rot_z @ rot_x @ rot_y
136 changes: 102 additions & 34 deletions scripts/process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
metashape_utils,
polycam_utils,
process_data_utils,
realitycapture_utils,
record3d_utils,
)
from nerfstudio.process_data.process_data_utils import CAMERA_MODELS
Expand Down Expand Up @@ -479,8 +480,8 @@ def main(self) -> None:
summary_log.append(f"Used {num_frames} images out of {num_images} total")
if self.max_dataset_size > 0:
summary_log.append(
"To change the size of the dataset add the argument --max_dataset_size to larger than the "
f"current value ({self.max_dataset_size}), or -1 to use all images."
"To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
)

# Downscale images
Expand Down Expand Up @@ -558,19 +559,10 @@ def main(self) -> None:
raise ValueError(f"Image directory {polycam_image_dir} doesn't exist")

# Copy images to output directory
polycam_image_filenames, num_orig_images = process_data_utils.get_image_filenames(
polycam_image_dir, self.max_dataset_size
)

polycam_image_filenames = []
for f in polycam_image_dir.iterdir():
if f.suffix.lower() in [".jpg", ".jpeg", ".png", ".tif", ".tiff"]:
polycam_image_filenames.append(f)
polycam_image_filenames = sorted(polycam_image_filenames, key=lambda fn: int(fn.stem))
num_images = len(polycam_image_filenames)
idx = np.arange(num_images)
if self.max_dataset_size != -1 and num_images > self.max_dataset_size:
idx = np.round(np.linspace(0, num_images - 1, self.max_dataset_size)).astype(int)

polycam_image_filenames = list(np.array(polycam_image_filenames)[idx])
# Copy images to output directory
copied_image_paths = process_data_utils.copy_images_list(
polycam_image_filenames,
image_dir=image_dir,
Expand All @@ -581,11 +573,11 @@ def main(self) -> None:

copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]

if self.max_dataset_size > 0 and num_frames != num_images:
summary_log.append(f"Started with {num_frames} images out of {num_images} total")
if self.max_dataset_size > 0 and num_frames != num_orig_images:
summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
summary_log.append(
"To change the size of the dataset add the argument --max_dataset_size to larger than the "
f"current value ({self.max_dataset_size}), or -1 to use all images."
"To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
)
else:
summary_log.append(f"Started with {num_frames} images")
Expand Down Expand Up @@ -657,18 +649,7 @@ def main(self) -> None:
summary_log = []

# Copy images to output directory
image_filenames = []
for f in self.data.iterdir():
if f.suffix.lower() in [".jpg", ".jpeg", ".png", ".tif", ".tiff"]:
image_filenames.append(f)
image_filenames = sorted(image_filenames, key=lambda fn: fn.stem)
num_images = len(image_filenames)
idx = np.arange(num_images)
if self.max_dataset_size != -1 and num_images > self.max_dataset_size:
idx = np.round(np.linspace(0, num_images - 1, self.max_dataset_size)).astype(int)

image_filenames = list(np.array(image_filenames)[idx])
# Copy images to output directory
image_filenames, num_orig_images = process_data_utils.get_image_filenames(self.data, self.max_dataset_size)
copied_image_paths = process_data_utils.copy_images_list(
image_filenames,
image_dir=image_dir,
Expand All @@ -680,11 +661,11 @@ def main(self) -> None:
original_names = [image_path.stem for image_path in image_filenames]
image_filename_map = dict(zip(original_names, copied_image_paths))

if self.max_dataset_size > 0 and num_frames != num_images:
summary_log.append(f"Started with {num_frames} images out of {num_images} total")
if self.max_dataset_size > 0 and num_frames != num_orig_images:
summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
summary_log.append(
"To change the size of the dataset add the argument --max_dataset_size to larger than the "
f"current value ({self.max_dataset_size}), or -1 to use all images."
"To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
)
else:
summary_log.append(f"Started with {num_frames} images")
Expand Down Expand Up @@ -712,11 +693,98 @@ def main(self) -> None:
CONSOLE.rule()


@dataclass
class ProcessRealityCapture:
"""Process RealityCapture data into a nerfstudio dataset.

This script assumes that cameras have been aligned using RealityCapture. After alignment, it is necessary to
export the camera poses as a `.csv` file.

This script does the following:

1. Scales images to a specified size.
2. Converts RealityCapture poses into the nerfstudio format.
"""

data: Path
"""Path to a folder of images."""
csv: Path
"""Path to the RealityCapture cameras CSV file."""
output_dir: Path
"""Path to the output directory."""
num_downscales: int = 3
"""Number of times to downscale the images. Downscales by 2 each time. For example a value of 3
will downscale the images by 2x, 4x, and 8x."""
max_dataset_size: int = 600
"""Max number of images to train on. If the dataset has more, images will be sampled approximately evenly. If -1,
use all images."""
verbose: bool = False
"""If True, print extra logging."""

def main(self) -> None:
"""Process images into a nerfstudio dataset."""

if self.csv.suffix != ".csv":
raise ValueError(f"CSV file {self.csv} must have a .csv extension")
if not self.csv.exists:
raise ValueError(f"CSV file {self.csv} doesn't exist")

self.output_dir.mkdir(parents=True, exist_ok=True)
image_dir = self.output_dir / "images"
image_dir.mkdir(parents=True, exist_ok=True)

summary_log = []

# Copy images to output directory
image_filenames, num_orig_images = process_data_utils.get_image_filenames(self.data, self.max_dataset_size)
copied_image_paths = process_data_utils.copy_images_list(
image_filenames,
image_dir=image_dir,
verbose=self.verbose,
)
num_frames = len(copied_image_paths)

copied_image_paths = [Path("images/" + copied_image_path.name) for copied_image_path in copied_image_paths]
original_names = [image_path.stem for image_path in image_filenames]
image_filename_map = dict(zip(original_names, copied_image_paths))

if self.max_dataset_size > 0 and num_frames != num_orig_images:
summary_log.append(f"Started with {num_frames} images out of {num_orig_images} total")
summary_log.append(
"To change the size of the dataset add the argument [yellow]--max_dataset_size[/yellow] to "
f"larger than the current value ({self.max_dataset_size}), or -1 to use all images."
)
else:
summary_log.append(f"Started with {num_frames} images")

# Downscale images
summary_log.append(process_data_utils.downscale_images(image_dir, self.num_downscales, verbose=self.verbose))

# Save json
if num_frames == 0:
CONSOLE.print("[bold red]No images found, exiting")
sys.exit(1)
summary_log.extend(
realitycapture_utils.realitycapture_to_json(
image_filename_map=image_filename_map,
csv_filename=self.csv,
output_dir=self.output_dir,
)
)

CONSOLE.rule("[bold green]:tada: :tada: :tada: All DONE :tada: :tada: :tada:")

for summary in summary_log:
CONSOLE.print(summary, justify="center")
CONSOLE.rule()


Commands = Union[
Annotated[ProcessImages, tyro.conf.subcommand(name="images")],
Annotated[ProcessVideo, tyro.conf.subcommand(name="video")],
Annotated[ProcessPolycam, tyro.conf.subcommand(name="polycam")],
Annotated[ProcessMetashape, tyro.conf.subcommand(name="metashape")],
Annotated[ProcessRealityCapture, tyro.conf.subcommand(name="realitycapture")],
Annotated[ProcessInsta360, tyro.conf.subcommand(name="insta360")],
Annotated[ProcessRecord3D, tyro.conf.subcommand(name="record3d")],
]
Expand Down