From f634922b233c7e901787748c9e9bcdeb29419595 Mon Sep 17 00:00:00 2001
From: cnsumner <cnsumner@users.noreply.github.com>
Date: Mon, 28 Nov 2022 16:00:10 -0500
Subject: [PATCH] Add support for insta360 single-file format (#1024)

* Add support for insta360 single-file format

* Improve some variable names

* Add documentation for crop_percentage parameter

Co-authored-by: Matthew Tancik <tancik@berkeley.edu>
---
 nerfstudio/process_data/insta360_utils.py | 75 +++++++++++++++++++++--
 scripts/process_data.py                   | 41 ++++++++++---
 2 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/nerfstudio/process_data/insta360_utils.py b/nerfstudio/process_data/insta360_utils.py
index c788836de3..4babc1504b 100644
--- a/nerfstudio/process_data/insta360_utils.py
+++ b/nerfstudio/process_data/insta360_utils.py
@@ -48,11 +48,6 @@ def get_insta360_filenames(data: Path) -> Tuple[Path, Path]:
     filename_back = data.parent / stem_back
     filename_front = data.parent / stem_front
 
-    if not filename_back.exists():
-        raise FileNotFoundError(f"Could not find {filename_back}")
-    if not filename_front.exists():
-        raise FileNotFoundError(f"Could not find {filename_front}")
-
     return filename_back, filename_front
 
 
@@ -71,6 +66,8 @@ def convert_insta360_to_images(
         video_back: Path to the back video.
         output_dir: Path to the output directory.
         num_frames_target: Number of frames to extract.
+        crop_percentage: Percentage used to calculate the cropped dimentions of extracted frames. Currently used to crop
+         out the curved portions of the fish-eye lens.
         verbose: If True, logs the output of the command.
     Returns:
         A tuple containing summary of the conversion and the number of extracted frames.
@@ -123,3 +120,71 @@ def convert_insta360_to_images(
     CONSOLE.log("[bold green]:tada: Done converting insta360 to images.")
 
     return summary_log, num_final_frames
+
+
+def convert_insta360_single_file_to_images(
+    video: Path,
+    image_dir: Path,
+    num_frames_target: int,
+    crop_percentage: float = 0.7,
+    verbose: bool = False,
+) -> Tuple[List[str], int]:
+    """Converts a video into a sequence of images.
+
+    Args:
+        video: Path to the video.
+        output_dir: Path to the output directory.
+        num_frames_target: Number of frames to extract.
+        crop_percentage: Percentage used to calculate the cropped dimentions of extracted frames. Currently used to crop
+         out the curved portions of the fish-eye lens.
+        verbose: If True, logs the output of the command.
+    Returns:
+        A tuple containing summary of the conversion and the number of extracted frames.
+    """
+
+    with status(msg="Converting video to images...", spinner="bouncingBall", verbose=verbose):
+        # delete existing images in folder
+        for img in image_dir.glob("*.png"):
+            if verbose:
+                CONSOLE.log(f"Deleting {img}")
+            img.unlink()
+
+        num_frames = get_num_frames_in_video(video)
+        if num_frames == 0:
+            CONSOLE.print(f"[bold red]Error: Video has no frames: {video}")
+            sys.exit(1)
+
+        spacing = num_frames // (num_frames_target // 2)
+        vf_cmds = []
+        if spacing > 1:
+            vf_cmds = [f"thumbnail={spacing}", "setpts=N/TB"]
+        else:
+            CONSOLE.print("[bold red]Can't satify requested number of frames. Extracting all frames.")
+
+        vf_cmds_back = vf_cmds.copy()
+        vf_cmds_front = vf_cmds.copy()
+
+        vf_cmds_back.append(
+            f"crop=ih*{crop_percentage}:ih*{crop_percentage}:ih*({crop_percentage}/4):ih*({crop_percentage}/4)"
+        )
+        vf_cmds_front.append(
+            f"crop=ih*{crop_percentage}:ih*{crop_percentage}:iw/2+ih*{crop_percentage/4}:ih*{crop_percentage/4}"
+        )
+
+        front_ffmpeg_cmd = f"ffmpeg -i {video} -vf {','.join(vf_cmds_front)} -r 1 {image_dir / 'frame_%05d.png'}"
+        back_ffmpeg_cmd = f"ffmpeg -i {video} -vf {','.join(vf_cmds_back)} -r 1 {image_dir / 'back_frame_%05d.png'}"
+
+        run_command(back_ffmpeg_cmd, verbose=verbose)
+        run_command(front_ffmpeg_cmd, verbose=verbose)
+
+        num_extracted_frames = len(list(image_dir.glob("frame*.png")))
+        for i, img in enumerate(image_dir.glob("back_frame_*.png")):
+            img.rename(image_dir / f"frame_{i+1+num_extracted_frames:05d}.png")
+
+    num_final_frames = len(list(image_dir.glob("*.png")))
+    summary_log = []
+    summary_log.append(f"Starting with {num_frames} video frames")
+    summary_log.append(f"We extracted {num_final_frames} images")
+    CONSOLE.log("[bold green]:tada: Done converting insta360 to images.")
+
+    return summary_log, num_final_frames
diff --git a/scripts/process_data.py b/scripts/process_data.py
index 15fe15e6f0..da2b95d306 100755
--- a/scripts/process_data.py
+++ b/scripts/process_data.py
@@ -2,6 +2,7 @@
 """Processes a video or image sequence to a nerfstudio compatible dataset."""
 
 
+import json
 import sys
 import zipfile
 from dataclasses import dataclass
@@ -246,14 +247,38 @@ def main(self) -> None:
 
         filename_back, filename_front = insta360_utils.get_insta360_filenames(self.data)
 
-        # Convert video to images
-        summary_log, num_extracted_frames = insta360_utils.convert_insta360_to_images(
-            video_front=filename_front,
-            video_back=filename_back,
-            image_dir=image_dir,
-            num_frames_target=self.num_frames_target,
-            verbose=self.verbose,
-        )
+        if not filename_back.exists():
+            raise FileNotFoundError(f"Could not find {filename_back}")
+
+        ffprobe_cmd = f"ffprobe -v quiet -print_format json -show_streams -select_streams v:0 {filename_back}"
+
+        ffprobe_output = process_data_utils.run_command(ffprobe_cmd)
+
+        assert ffprobe_output is not None
+        ffprobe_decoded = json.loads(ffprobe_output)
+
+        width, height = ffprobe_decoded["streams"][0]["width"], ffprobe_decoded["streams"][0]["height"]
+
+        summary_log, num_extracted_frames = [], 0
+
+        if width / height == 1:
+            if not filename_front.exists():
+                raise FileNotFoundError(f"Could not find {filename_front}")
+            # Convert video to images
+            summary_log, num_extracted_frames = insta360_utils.convert_insta360_to_images(
+                video_front=filename_front,
+                video_back=filename_back,
+                image_dir=image_dir,
+                num_frames_target=self.num_frames_target,
+                verbose=self.verbose,
+            )
+        else:
+            summary_log, num_extracted_frames = insta360_utils.convert_insta360_single_file_to_images(
+                video=filename_back,
+                image_dir=image_dir,
+                num_frames_target=self.num_frames_target,
+                verbose=self.verbose,
+            )
 
         # Downscale images
         summary_log.append(process_data_utils.downscale_images(image_dir, self.num_downscales, verbose=self.verbose))