Skip to content

Commit

Permalink
when camera_type is equirectangular, add spherical metadata to render…
Browse files Browse the repository at this point in the history
…ed video
  • Loading branch information
machenmusik committed Dec 20, 2022
1 parent af5953e commit c4dade4
Showing 1 changed file with 83 additions and 0 deletions.
83 changes: 83 additions & 0 deletions scripts/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from __future__ import annotations

import json
import os
import struct
import sys
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional
Expand Down Expand Up @@ -42,6 +45,7 @@ def _render_trajectory_video(
rendered_resolution_scaling_factor: float = 1.0,
seconds: float = 5.0,
output_format: Literal["images", "video"] = "video",
camera_type: str = "",
) -> None:
"""Helper function to create a video of the spiral trajectory.
Expand Down Expand Up @@ -94,7 +98,83 @@ def _render_trajectory_video(
# make the folder if it doesn't exist
output_filename.parent.mkdir(parents=True, exist_ok=True)
with CONSOLE.status("[yellow]Saving video", spinner="bouncingBall"):
# NOTE:
# we could use ffmpeg_args "-movflags faststart" for progressive download,
# which would force moov atom into known position before mdat,
# but then we would have to move all of mdat to insert metadata atom
# (unless we reserve enough space to overwrite with our uuid tag,
# but we don't know how big the video file will be, so it's not certain!)
media.write_video(output_filename, images, fps=fps)
if camera_type == "equirectangular":
# NOTE:
# because we didn't use faststart, the moov atom will be at the end;
# to insert our metadata, we need to find (skip atoms until we get to) moov.
# we should have 0x00000020 ftyp, then 0x00000008 free, then variable mdat.
spherical_uuid = b"\xff\xcc\x82\x63\xf8\x55\x4a\x93\x88\x14\x58\x7a\x02\x52\x1f\xdd"
spherical_metadata = bytes(
"""<rdf:SphericalVideo
xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
xmlns:GSpherical='http://ns.google.com/videos/1.0/spherical/'>
<GSpherical:ProjectionType>equirectangular</GSpherical:ProjectionType>
<GSpherical:Spherical>True</GSpherical:Spherical>
<GSpherical:Stitched>True</GSpherical:Stitched>
<GSpherical:StitchingSoftware>nerfstudio</GSpherical:StitchingSoftware>
</rdf:SphericalVideo>""",
"utf-8",
)
insert_size = len(spherical_metadata) + 8 + 16
mp4file = open(output_filename, mode="r+b")
try:
# get file size
mp4file.seek(0, os.SEEK_END)
mp4file_size = mp4file.tell()
mp4file.seek(0)
CONSOLE.print("[bold green]MP4 file " + str(output_filename) + " size " + str(mp4file_size))

# find moov container (probably after ftyp, free, mdat)
while True:
pos = mp4file.tell()
size_and_tag = mp4file.read(8)
CONSOLE.print("[bold green]len(size_and_tag) = " + str(len(size_and_tag)))
size, tag = struct.unpack(">I4s", size_and_tag)
CONSOLE.print("[bold green]read " + str(tag) + " size " + str(size) + " at " + str(pos))
if tag == b"moov":
break
mp4file.seek(pos + size)
# if moov isn't at end, bail
if pos + size != mp4file_size:
# TODO: to support faststart, rewrite all stco offsets
raise Exception("moov container not at end of file")
# go back and write inserted size
mp4file.seek(pos)
mp4file.write(struct.pack(">I", size + insert_size))
# go inside moov
mp4file.seek(pos + 8)
# find trak container (probably after mvhd)
while True:
pos = mp4file.tell()
size_and_tag = mp4file.read(8)
size, tag = struct.unpack(">I4s", size_and_tag)
CONSOLE.print("[bold green]read " + str(tag) + " size " + str(size) + " at " + str(pos))
if tag == b"trak":
break
mp4file.seek(pos + size)
# go back and write inserted size
mp4file.seek(pos)
mp4file.write(struct.pack(">I", size + insert_size))
# we need to read everything from end of trak to end of file in order to insert
# TODO: to support faststart, make more efficient (may load nearly all data)
mp4file.seek(pos + size)
rest_of_file = mp4file.read(mp4file_size - pos - size)
# go to end of trak (again)
mp4file.seek(pos + size)
# insert our uuid atom with spherical metadata
mp4file.write(struct.pack(">I4s16s", insert_size, b"uuid", spherical_uuid))
mp4file.write(spherical_metadata)
# write rest of file
mp4file.write(rest_of_file)
finally:
mp4file.close()
CONSOLE.rule("[green] :tada: :tada: :tada: Success :tada: :tada: :tada:")
CONSOLE.print(f"[green]Saved video to {output_filename}", justify="center")

Expand Down Expand Up @@ -139,10 +219,12 @@ def main(self) -> None:
camera_start = pipeline.datamanager.eval_dataloader.get_camera(image_idx=0).flatten()
# TODO(ethan): pass in the up direction of the camera
camera_path = get_spiral_path(camera_start, steps=30, radius=0.1)
camera_type = ""
elif self.traj == "filename":
with open(self.camera_path_filename, "r", encoding="utf-8") as f:
camera_path = json.load(f)
seconds = camera_path["seconds"]
camera_type = camera_path["camera_type"]
camera_path = get_path_from_json(camera_path)
else:
assert_never(self.traj)
Expand All @@ -155,6 +237,7 @@ def main(self) -> None:
rendered_resolution_scaling_factor=1.0 / self.downscale_factor,
seconds=seconds,
output_format=self.output_format,
camera_type=camera_type,
)


Expand Down

0 comments on commit c4dade4

Please sign in to comment.