From f634922b233c7e901787748c9e9bcdeb29419595 Mon Sep 17 00:00:00 2001 From: cnsumner Date: Mon, 28 Nov 2022 16:00:10 -0500 Subject: [PATCH] Add support for insta360 single-file format (#1024) * Add support for insta360 single-file format * Improve some variable names * Add documentation for crop_percentage parameter Co-authored-by: Matthew Tancik --- nerfstudio/process_data/insta360_utils.py | 75 +++++++++++++++++++++-- scripts/process_data.py | 41 ++++++++++--- 2 files changed, 103 insertions(+), 13 deletions(-) diff --git a/nerfstudio/process_data/insta360_utils.py b/nerfstudio/process_data/insta360_utils.py index c788836de3..4babc1504b 100644 --- a/nerfstudio/process_data/insta360_utils.py +++ b/nerfstudio/process_data/insta360_utils.py @@ -48,11 +48,6 @@ def get_insta360_filenames(data: Path) -> Tuple[Path, Path]: filename_back = data.parent / stem_back filename_front = data.parent / stem_front - if not filename_back.exists(): - raise FileNotFoundError(f"Could not find {filename_back}") - if not filename_front.exists(): - raise FileNotFoundError(f"Could not find {filename_front}") - return filename_back, filename_front @@ -71,6 +66,8 @@ def convert_insta360_to_images( video_back: Path to the back video. output_dir: Path to the output directory. num_frames_target: Number of frames to extract. + crop_percentage: Percentage used to calculate the cropped dimentions of extracted frames. Currently used to crop + out the curved portions of the fish-eye lens. verbose: If True, logs the output of the command. Returns: A tuple containing summary of the conversion and the number of extracted frames. @@ -123,3 +120,71 @@ def convert_insta360_to_images( CONSOLE.log("[bold green]:tada: Done converting insta360 to images.") return summary_log, num_final_frames + + +def convert_insta360_single_file_to_images( + video: Path, + image_dir: Path, + num_frames_target: int, + crop_percentage: float = 0.7, + verbose: bool = False, +) -> Tuple[List[str], int]: + """Converts a video into a sequence of images. + + Args: + video: Path to the video. + output_dir: Path to the output directory. + num_frames_target: Number of frames to extract. + crop_percentage: Percentage used to calculate the cropped dimentions of extracted frames. Currently used to crop + out the curved portions of the fish-eye lens. + verbose: If True, logs the output of the command. + Returns: + A tuple containing summary of the conversion and the number of extracted frames. + """ + + with status(msg="Converting video to images...", spinner="bouncingBall", verbose=verbose): + # delete existing images in folder + for img in image_dir.glob("*.png"): + if verbose: + CONSOLE.log(f"Deleting {img}") + img.unlink() + + num_frames = get_num_frames_in_video(video) + if num_frames == 0: + CONSOLE.print(f"[bold red]Error: Video has no frames: {video}") + sys.exit(1) + + spacing = num_frames // (num_frames_target // 2) + vf_cmds = [] + if spacing > 1: + vf_cmds = [f"thumbnail={spacing}", "setpts=N/TB"] + else: + CONSOLE.print("[bold red]Can't satify requested number of frames. Extracting all frames.") + + vf_cmds_back = vf_cmds.copy() + vf_cmds_front = vf_cmds.copy() + + vf_cmds_back.append( + f"crop=ih*{crop_percentage}:ih*{crop_percentage}:ih*({crop_percentage}/4):ih*({crop_percentage}/4)" + ) + vf_cmds_front.append( + f"crop=ih*{crop_percentage}:ih*{crop_percentage}:iw/2+ih*{crop_percentage/4}:ih*{crop_percentage/4}" + ) + + front_ffmpeg_cmd = f"ffmpeg -i {video} -vf {','.join(vf_cmds_front)} -r 1 {image_dir / 'frame_%05d.png'}" + back_ffmpeg_cmd = f"ffmpeg -i {video} -vf {','.join(vf_cmds_back)} -r 1 {image_dir / 'back_frame_%05d.png'}" + + run_command(back_ffmpeg_cmd, verbose=verbose) + run_command(front_ffmpeg_cmd, verbose=verbose) + + num_extracted_frames = len(list(image_dir.glob("frame*.png"))) + for i, img in enumerate(image_dir.glob("back_frame_*.png")): + img.rename(image_dir / f"frame_{i+1+num_extracted_frames:05d}.png") + + num_final_frames = len(list(image_dir.glob("*.png"))) + summary_log = [] + summary_log.append(f"Starting with {num_frames} video frames") + summary_log.append(f"We extracted {num_final_frames} images") + CONSOLE.log("[bold green]:tada: Done converting insta360 to images.") + + return summary_log, num_final_frames diff --git a/scripts/process_data.py b/scripts/process_data.py index 15fe15e6f0..da2b95d306 100755 --- a/scripts/process_data.py +++ b/scripts/process_data.py @@ -2,6 +2,7 @@ """Processes a video or image sequence to a nerfstudio compatible dataset.""" +import json import sys import zipfile from dataclasses import dataclass @@ -246,14 +247,38 @@ def main(self) -> None: filename_back, filename_front = insta360_utils.get_insta360_filenames(self.data) - # Convert video to images - summary_log, num_extracted_frames = insta360_utils.convert_insta360_to_images( - video_front=filename_front, - video_back=filename_back, - image_dir=image_dir, - num_frames_target=self.num_frames_target, - verbose=self.verbose, - ) + if not filename_back.exists(): + raise FileNotFoundError(f"Could not find {filename_back}") + + ffprobe_cmd = f"ffprobe -v quiet -print_format json -show_streams -select_streams v:0 {filename_back}" + + ffprobe_output = process_data_utils.run_command(ffprobe_cmd) + + assert ffprobe_output is not None + ffprobe_decoded = json.loads(ffprobe_output) + + width, height = ffprobe_decoded["streams"][0]["width"], ffprobe_decoded["streams"][0]["height"] + + summary_log, num_extracted_frames = [], 0 + + if width / height == 1: + if not filename_front.exists(): + raise FileNotFoundError(f"Could not find {filename_front}") + # Convert video to images + summary_log, num_extracted_frames = insta360_utils.convert_insta360_to_images( + video_front=filename_front, + video_back=filename_back, + image_dir=image_dir, + num_frames_target=self.num_frames_target, + verbose=self.verbose, + ) + else: + summary_log, num_extracted_frames = insta360_utils.convert_insta360_single_file_to_images( + video=filename_back, + image_dir=image_dir, + num_frames_target=self.num_frames_target, + verbose=self.verbose, + ) # Downscale images summary_log.append(process_data_utils.downscale_images(image_dir, self.num_downscales, verbose=self.verbose))