Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove manual depth projection from car and nyud examples #1869

Merged
merged 4 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions examples/python/car/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ def log_car_data() -> None:
# In the viewer you can select how to view entities - by frame_nr or the built-in `log_time`.
rr.set_time_sequence("frame_nr", sample.frame_idx)

# We log the projected points in the "world" space:
rr.log_points("world/points", sample.point_cloud)

# Log the camera pose:
rr.log_rigid3(
"world/camera",
Expand Down Expand Up @@ -159,7 +156,6 @@ class SampleFrame:
frame_idx: int
camera: CameraParameters
depth_image_mm: npt.NDArray[np.float32]
point_cloud: npt.NDArray[np.float32]
rgb_image: npt.NDArray[np.float32]
car_bbox: Tuple[npt.NDArray[np.int32], npt.NDArray[np.int32]]

Expand All @@ -177,28 +173,6 @@ def __init__(self, image_width: int, image_height: int) -> None:
# Pre-generate image containing the x and y coordinates per pixel
self.u_coords, self.v_coords = np.meshgrid(np.arange(0, self.w), np.arange(0, self.h))

def back_project(
self,
depth_image_mm: npt.NDArray[np.float32],
) -> npt.NDArray[np.float32]:
"""
Given a depth image, generate a matching point cloud.

Parameters
----------
depth_image_mm
Depth image expressed in millimeters

"""

# Apply inverse of the `intrinsics` matrix:
z = depth_image_mm.reshape(-1) / 1000.0
x = (self.u_coords.reshape(-1).astype(float) - self.u_center) * z / self.focal_length
y = (self.v_coords.reshape(-1).astype(float) - self.v_center) * z / self.focal_length

back_projected = np.vstack((x, y, z)).T
return back_projected

def render_dummy_slanted_plane_mm(self) -> npt.NDArray[np.float32]:
"""Renders a depth image of a slanted plane in millimeters."""
return 1000.0 * 1.0 / (0.01 + 0.4 * self.v_coords / self.h)
Expand Down Expand Up @@ -266,12 +240,10 @@ def generate_car_data(num_frames: int) -> Iterator[SampleFrame]:
depth_image_mm = depth_background_mm.copy()
rgb = rgb_background.copy()
car.draw(depth_image_mm=depth_image_mm, rgb=rgb)
point_cloud = camera.back_project(depth_image_mm=depth_image_mm)
sample = SampleFrame(
frame_idx=i,
camera=camera.parameters,
depth_image_mm=depth_image_mm,
point_cloud=point_cloud,
rgb_image=rgb,
car_bbox=(car.min, car.size),
)
Expand Down
2 changes: 1 addition & 1 deletion examples/python/notebook/cube.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"metadata": {},
"outputs": [],
"source": [
emilk marked this conversation as resolved.
Show resolved Hide resolved
"\n",
emilk marked this conversation as resolved.
Show resolved Hide resolved
"\n",
emilk marked this conversation as resolved.
Show resolved Hide resolved
"STEPS = 100\n",
"twists = math.pi * np.sin(np.linspace(0, math.tau, STEPS)) / 4\n",
"for t in range(STEPS):\n",
Expand Down
75 changes: 21 additions & 54 deletions examples/python/nyud/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,6 @@ def camera_intrinsics(image: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
return np.array(((focal_length, 0, u_center), (0, focal_length, v_center), (0, 0, 1)))


def back_project(depth_image: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
"""Given a depth image, generate a matching point cloud."""
(h, w) = depth_image.shape
(u_center, v_center, focal_length) = camera_for_image(h, w)

# Pre-generate image containing the x and y coordinates per pixel
u_coords, v_coords = np.meshgrid(np.arange(0, w), np.arange(0, h))

# Apply inverse of the intrinsics matrix:
z = depth_image.reshape(-1)
x = (u_coords.reshape(-1).astype(float) - u_center) * z / focal_length
y = (v_coords.reshape(-1).astype(float) - v_center) * z / focal_length

back_projected = np.vstack((x, y, z)).T
return back_projected


def read_image_rgb(buf: bytes) -> npt.NDArray[np.uint8]:
"""Decode an image provided in `buf`, and interpret it as RGB data."""
np_buf: npt.NDArray[np.uint8] = np.ndarray(shape=(1, len(buf)), dtype=np.uint8, buffer=buf)
Expand All @@ -77,9 +60,7 @@ def read_image(buf: bytes) -> npt.NDArray[np.uint8]:
return img


def log_nyud_data(recording_path: Path, subset_idx: int = 0, depth_image_interval: int = 1) -> None:
depth_images_counter = 0

def log_nyud_data(recording_path: Path, subset_idx: int = 0) -> None:
rr.log_view_coordinates("world", up="-Y", timeless=True)

with zipfile.ZipFile(recording_path, "r") as archive:
Expand All @@ -105,32 +86,26 @@ def log_nyud_data(recording_path: Path, subset_idx: int = 0, depth_image_interva
rr.log_image("world/camera/image/rgb", img_rgb)

elif f.filename.endswith(".pgm"):
if depth_images_counter % depth_image_interval == 0:
buf = archive.read(f)
img_depth = read_image(buf)

point_cloud = back_project(depth_image=img_depth / DEPTH_IMAGE_SCALING)
rr.log_points("world/points", point_cloud, colors=np.array([255, 255, 255, 255]))

# Log the camera transforms:
translation = [0, 0, 0]
rotation_q = [0, 0, 0, 1]
rr.log_rigid3(
"world/camera",
parent_from_child=(translation, rotation_q),
xyz="RDF", # X=Right, Y=Down, Z=Forward
)
rr.log_pinhole(
"world/camera/image",
child_from_parent=camera_intrinsics(img_depth),
width=img_depth.shape[1],
height=img_depth.shape[0],
)

# Log the depth image to the cameras image-space:
rr.log_depth_image("world/camera/image/depth", img_depth, meter=DEPTH_IMAGE_SCALING)

depth_images_counter += 1
buf = archive.read(f)
img_depth = read_image(buf)

# Log the camera transforms:
translation = [0, 0, 0]
rotation_q = [0, 0, 0, 1]
rr.log_rigid3(
"world/camera",
parent_from_child=(translation, rotation_q),
xyz="RDF", # X=Right, Y=Down, Z=Forward
)
rr.log_pinhole(
"world/camera/image",
child_from_parent=camera_intrinsics(img_depth),
width=img_depth.shape[1],
height=img_depth.shape[0],
)

# Log the depth image to the cameras image-space:
rr.log_depth_image("world/camera/image/depth", img_depth, meter=DEPTH_IMAGE_SCALING)


def ensure_recording_downloaded(name: str) -> Path:
Expand Down Expand Up @@ -184,23 +159,15 @@ def download_progress(url: str, dst: Path) -> None:
help="Name of the NYU Depth Dataset V2 recording",
)
parser.add_argument("--subset-idx", type=int, default=0, help="The index of the subset of the recording to use.")
parser.add_argument(
"--depth-image-interval",
type=int,
default=8,
help="The number of rgb images logged for each depth image. (min value 1)",
)
rr.script_add_args(parser)
args = parser.parse_args()

rr.script_setup(args, "nyud")
recording_path = ensure_recording_downloaded(args.recording)

depth_image_interval = max(args.depth_image_interval, 1)
log_nyud_data(
recording_path=recording_path,
subset_idx=args.subset_idx,
depth_image_interval=depth_image_interval,
)

rr.script_teardown(args)