From afc1c2f8f9895f8af9b3aa33ae2152e6b144d284 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Mon, 2 Jan 2023 21:42:08 +0100 Subject: [PATCH] Some fixes and can run eval --- .gitignore | 4 ++ datasets/nerf_dataset.py | 4 +- examples/slam_demo.py | 2 +- fusion/tsdf_fusion.py | 46 ++++++++++-------- gui/open3d_gui.py | 13 ++--- requirements.txt | 2 + slam/inertial_frontends/inertial_frontend.py | 2 +- slam/vio_slam.py | 2 +- slam/visual_frontends/visual_frontend.py | 50 ++++++++++++-------- thirdparty/gtsam | 2 +- thirdparty/instant-ngp | 2 +- utils/flow_viz.py | 6 ++- 12 files changed, 79 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index 3a01d31..bc1794a 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +build_ngp +build_gtsam +datasets diff --git a/datasets/nerf_dataset.py b/datasets/nerf_dataset.py index 73f3876..13b7e49 100644 --- a/datasets/nerf_dataset.py +++ b/datasets/nerf_dataset.py @@ -17,7 +17,7 @@ def __init__(self, args, device): super().__init__("Nerf", args, device) self.parse_metadata() # self._build_dataset_index() # Loads all the data first, and then streams. - self.tqdm = tqdm(total=self.__len__()) # Call after parsing metadata + # self.tqdm = tqdm(total=self.__len__()) # Call after parsing metadata def get_cam_calib(self): w, h = self.json["w"], self.json["h"] @@ -165,7 +165,7 @@ def __len__(self): return len(self.image_paths) def __getitem__(self, k): - self.tqdm.update(1) + # self.tqdm.update(1) return self._get_data_packet(k) if self.data_packets is None else self.data_packets[k] diff --git a/examples/slam_demo.py b/examples/slam_demo.py index 45245de..7ad24e1 100644 --- a/examples/slam_demo.py +++ b/examples/slam_demo.py @@ -105,7 +105,7 @@ def run(args): slam_module.register_output_queue(slam_output_queue_for_fusion) fusion_module.register_input_queue("slam", slam_output_queue_for_fusion) - if (args.fusion == 'nerf' and not slam) or (args.fusion != 'nerf' and args.eval): + if (args.fusion == 'nerf' and not slam): # or (args.fusion != 'nerf' and args.eval): # Only used for evaluation, or in case we do not use slam (for nerf) data_provider_module.register_output_queue(data_for_fusion_output_queue) fusion_module.register_input_queue("data", data_for_fusion_output_queue) diff --git a/fusion/tsdf_fusion.py b/fusion/tsdf_fusion.py index 1d2ddae..169bf82 100644 --- a/fusion/tsdf_fusion.py +++ b/fusion/tsdf_fusion.py @@ -110,7 +110,7 @@ def handle_slam_packet(self, packet): packet = packet[1] if packet is None: - print("Fusion packet from SLAM module is None...") + # print("Fusion packet from SLAM module is None...") return True if self.evaluate and packet["is_last_frame"] and not "cam0_images" in packet: @@ -180,6 +180,7 @@ def evaluate_metrics(self): self.max_depth_sigma_thresh, evaluate=True) print("Done evaluating reconstruction.") + # exit(0) # builds volume from packet # TODO: build volume from pcl directly. def build_volume(self, packet, o3d_intrinsics, masks): @@ -368,7 +369,7 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth width_px=W, height_px=H) - print(f"Casting Rays for camera {ix}") + # print(f"Casting Rays for camera {ix}") render = None if self.use_old_volume: if self.scene: @@ -428,13 +429,13 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth rendered_depth_map = torch.tensor( render['depth'].cpu().numpy(), device=depth.device, dtype=depth.dtype).squeeze() - if self.debug: - cv2.imshow("Rendered depth", rendered_depth.colorize_depth( - self.depth_scale, self.min_depth, self.max_depth).as_tensor().cpu().numpy()) - cv2.imshow("Rendered color?", rendered_c.as_tensor().cpu().numpy()) - cv2.imshow('rendered color', sum_colors.cpu().numpy()) - cv2.imshow('actual color', color.cpu().numpy()) - cv2.imshow('rendered weights', sum_weights.cpu().numpy()) + if False: + # cv2.imshow("Rendered depth", rendered_depth.colorize_depth( + # self.depth_scale, self.min_depth, self.max_depth).as_tensor().cpu().numpy()) + # cv2.imshow("Rendered color?", rendered_c.as_tensor().cpu().numpy()) + # cv2.imshow('rendered color', sum_colors.cpu().numpy()) + # cv2.imshow('actual color', color.cpu().numpy()) + # cv2.imshow('rendered weights', sum_weights.cpu().numpy()) if self.use_old_volume: cv2.imshow("rend_depth_map", render['t_hit'].numpy()) rendered_depth_mask = (np.abs(render['geometry_ids'].numpy()) > 0).astype(float) @@ -443,13 +444,13 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth viz_depth_map(rendered_depth_map, fix_range=False, name="rendered_depth_map") viz_depth_map(depth, fix_range=False, name="estimated_depth_map") viz_depth_map(gt_depth, fix_range=False, name="ground-truth depth") - viz_depth_sigma(depth_sigma[None], name="estimated_depth_map sigma", - fix_range=True, - bg_img=color.permute(2,0,1)[None], - sigma_thresh=max_depth_sigma_thresh) - scale = gt_depth.mean() / rendered_depth_map.mean() - diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth) - viz_depth_map(diff_depth_map, fix_range=False, name="Error depth map", colormap=cv2.COLORMAP_TURBO, invert=False) + # viz_depth_sigma(depth_sigma[None], name="estimated_depth_map sigma", + # fix_range=True, + # bg_img=color.permute(2,0,1)[None], + # sigma_thresh=max_depth_sigma_thresh) + # scale = gt_depth.mean() / rendered_depth_map.mean() + # diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth) + # viz_depth_map(diff_depth_map, range=(0,0.5), name="Error depth map", colormap=cv2.COLORMAP_TURBO, invert=False) cv2.waitKey(1) @@ -469,7 +470,10 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth) diff_depth_map[diff_depth_map > 2.0] = 2.0 # Truncate outliers to 1m, otw biases metric, this can happen either bcs depth is not estimated or bcs gt depth is wrong. if self.debug: - viz_depth_map(diff_depth_map, fix_range=False, name="Error depth map after crop", colormap=cv2.COLORMAP_TURBO, invert=False) + viz_depth_map(rendered_depth_map, fix_range=False, name="rendered_depth_map") + viz_depth_map(depth, fix_range=False, name="estimated_depth_map") + viz_depth_map(gt_depth, fix_range=False, name="ground-truth depth") + viz_depth_map(diff_depth_map, range=(0,0.5), name="Error depth map after crop", colormap=cv2.COLORMAP_TURBO, invert=False) cv2.waitKey(1) l1 = diff_depth_map.mean().cpu().numpy() * 100 # From m to cm AND use the mean (as in Nice-SLAM) total_l1 += l1 @@ -480,8 +484,10 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth psnr = total_psnr / (len(viz_idx) or 1) l1 = total_l1 / (len(viz_idx) or 1) print(f"Dt={dt}; PSNR={psnr}; L1={l1}") - data_frame.loc[len(data_frame.index)] = [dt, psnr, l1] - data_frame.to_csv("results.csv") + # data_frame.loc[len(data_frame.index)] = [dt, psnr, l1] + # data_frame.to_csv("results.csv") + with open("results.csv", 'a') as f: + f.write(f"Dt={dt}; PSNR={psnr}; L1={l1}\n") def update_history(self, packet): if packet["is_last_frame"]: @@ -531,7 +537,7 @@ def get_history_packet(self): packet["cam0_intrinsics"] += [h["cam0_intrinsics"]] packet["calibs"] += [h["calibs"]] # TODO: WHY ARE WE DIVIDING? SHOULDN'T IT BE MULTIPLYING?!? - packet["gt_depths"] += [torch.tensor(h["gt_depths"], device=h["cam0_images"].device, dtype=torch.float32).permute(2,0,1) * h["calibs"].depth_scale] + packet["gt_depths"] += [h["gt_depths"].clone().detach() * h["calibs"].depth_scale] packet["viz_idx"] = torch.stack(packet["viz_idx"] ) packet["cam0_poses"] = torch.stack(packet["cam0_poses"] ) packet["cam0_depths_cov_up"] = torch.stack(packet["cam0_depths_cov_up"]) diff --git a/gui/open3d_gui.py b/gui/open3d_gui.py index e893ff7..ae77750 100644 --- a/gui/open3d_gui.py +++ b/gui/open3d_gui.py @@ -175,8 +175,9 @@ def initialize(self): self.viz.create_window(width=1200, height=680) #create_window(self, window_name='Open3D', width=1920, height=1080, left=50, top=50, visible=True) self.viz.get_render_option().point_size = 0.001 # in m - self.viz.get_render_option().background_color = np.asarray([0, 0, 0]) # Black background + # self.viz.get_render_option().background_color = np.asarray([0, 0, 0]) # Black background self.viz.get_render_option().light_on = False + self.viz.add_geometry(o3d.geometry.TriangleMesh.create_coordinate_frame(0.2)) self.viz.register_key_callback(ord("Q"), self.destroy_window) self.viz.register_key_callback(256, self.destroy_window) # esc @@ -401,7 +402,7 @@ def add_geometry(self, data_packets): raise NotImplementedError def viz_data_packet(self, packet): - print("Visualizing DATA packet.") + # print("Visualizing DATA packet.") if type(packet) is Values: values = packet @@ -441,10 +442,10 @@ def viz_slam_packet(self, packet): packet = packet[1] if packet is None: - print("O3d packet from SLAM module is None...") + # print("O3d packet from SLAM module is None...") return True - print("Visualizing SLAM packet.") + # print("Visualizing SLAM packet.") if self.evaluate and packet["is_last_frame"] and not "cam0_images" in packet: print("Last Frame reached, and no global BA") self.evaluate_metrics() @@ -509,7 +510,7 @@ def viz_fusion_packet(self, packet): print("Gui packet from Fusion module is None...") return True - print("Visualizing Gui packet from Fusion") + # print("Visualizing Gui packet from Fusion") mesh = packet["mesh"] if mesh: self.viz_mesh(mesh.create_mesh()) @@ -605,7 +606,7 @@ def vis_body_sigmas(self, packet): body_sigma_actor = self.create_sphere_actor(body_pose, radius=S.max().sqrt().item()) s = S.max().sqrt().item() - ic(s) + # ic(s) max_s = 0.3 s_norm = s / max_s if s < max_s else 1.0 color = self.colormap(s_norm)[:3] diff --git a/requirements.txt b/requirements.txt index d10dc2d..fee3bd1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,5 @@ pyrealsense2 pybind11 gdown + +colored_glog diff --git a/slam/inertial_frontends/inertial_frontend.py b/slam/inertial_frontends/inertial_frontend.py index 6bc591a..2d0bfc5 100644 --- a/slam/inertial_frontends/inertial_frontend.py +++ b/slam/inertial_frontends/inertial_frontend.py @@ -172,7 +172,7 @@ def initial_state(self): gtsam.Point3(0.878612,2.142470,0.947262)) true_vel = np.array([0.009474,-0.014009,-0.002145]) true_bias = gtsam.imuBias.ConstantBias(np.array([-0.012492,0.547666,0.069073]), np.array([-0.002229,0.020700,0.076350])) - naive_pose = gtsam.Pose3.identity() + naive_pose = gtsam.Pose3 #.identity() naive_vel = np.zeros(3) naive_bias = gtsam.imuBias.ConstantBias() initial_pose = true_pose diff --git a/slam/vio_slam.py b/slam/vio_slam.py index 5e0f3ac..cfb0571 100644 --- a/slam/vio_slam.py +++ b/slam/vio_slam.py @@ -62,7 +62,7 @@ def initial_state(): gtsam.Point3(0.878612, 2.142470, 0.947262)) true_vel = np.array([0.009474,-0.014009,-0.002145]) true_bias = gtsam.imuBias.ConstantBias(np.array([-0.012492,0.547666,0.069073]), np.array([-0.002229,0.020700,0.076350])) - naive_pose = gtsam.Pose3.identity() + naive_pose = gtsam.Pose3 #.identity() naive_vel = np.zeros(3) naive_bias = gtsam.imuBias.ConstantBias() initial_pose = true_world_T_imu_t0 diff --git a/slam/visual_frontends/visual_frontend.py b/slam/visual_frontends/visual_frontend.py index 5b1a75b..c63cceb 100644 --- a/slam/visual_frontends/visual_frontend.py +++ b/slam/visual_frontends/visual_frontend.py @@ -8,6 +8,7 @@ import cv2 import numpy as np +from tqdm import tqdm import torch from torch import nn @@ -45,8 +46,8 @@ def lietorch_pose_to_gtsam(pose : lietorch.SE3): def gtsam_pose_to_torch(pose: gtsam.Pose3, device, dtype): t = pose.translation() - q = pose.rotation().quaternion() - return torch.tensor([t[0], t[1], t[2], q[1], q[2], q[3], q[0]], device=device, dtype=dtype) + q = pose.rotation().toQuaternion() + return torch.tensor([t[0], t[1], t[2], q.x(), q.y(), q.z(), q.w()], device=device, dtype=dtype) class VisualFrontend(nn.Module): def __init__(self): @@ -151,6 +152,8 @@ def __init__(self, world_T_body_t0, body_T_cam0, args, device="cpu"): self.idepth_prior_cov = torch.pow(self.sigma_idepth, 2) self.g_prior_cov = torch.block_diag(self.r_cov, self.t_cov) # GTSAM convention, rotation first, then translation + self.tqdm = tqdm(total=1000) + def __del__(self): print("Calling frontend dtor...") torch.cuda.empty_cache() @@ -240,7 +243,7 @@ def initialize_buffers(self, image_size): def forward(self, batch): # The output of RaftVisualFrontend is not dense optical flow # but rather a bunch of pose-to-pose factors resulting from the reduced camera matrix. - print("RaftVisualFrontend.forward") + # print("RaftVisualFrontend.forward") k = batch["k"][0] @@ -291,16 +294,20 @@ def forward(self, batch): assert k > 0 assert self.kf_idx < self.buffer + self.tqdm.update(1) + self.tqdm.set_description(f'adding keyframe: {self.kf_idx}') + + if batch["is_last_frame"]: + self.kf_idx -= 1 # Because in the last iter we increased it, but aren't taking any... + print("Last frame reached, and no new motion: starting GLOBAL BA") + self.terminate() + # Send the whole viz_out to update the fact that BA has changed all poses/depths + viz_out = self.get_viz_out(batch) + return x0, factors, viz_out + # Add frame as keyframe if we have enough motion, otherwise discard: current_imgs_features = self.__feature_encoder(imgs_norm_k) if not self.has_enough_motion(current_imgs_features): - if batch["is_last_frame"]: - self.kf_idx -= 1 # Because in the last iter we increased it, but aren't taking any... - print("Last frame reached, and no new motion: starting GLOBAL BA") - self.terminate() - # Send the whole viz_out to update the fact that BA has changed all poses/depths - viz_out = self.get_viz_out(batch) - return x0, factors, viz_out # By returning, we do not increment self.kf_idx return x0, factors, viz_out @@ -435,7 +442,7 @@ def update(self, kf0=None, kf1=None, itrs=2, use_inactive=False, EP=1e-7, motion # Or rather the factors? # Dense bundle adjustment - ic("BA!") + # ic("BA!") x0, rcm_factor = self.ba(gru_estimated_flow, gru_estimated_flow_weight, damping, ii, jj, kf0, kf1, itrs=itrs, lm=1e-4, ep=0.1, motion_only=motion_only, compute_covariances=self.compute_covariances) @@ -977,7 +984,7 @@ def _normalize_imgs(self, images, droid_normalization=True): @torch.no_grad() def has_enough_motion(self, current_imgs_features): # Only calculates if enough motion by looking at cam0 - ic(self.last_kf_idx) + # ic(self.last_kf_idx) last_img_features = self.features_imgs[self.last_kf_idx][0] current_img_features = current_imgs_features[0] last_img_context = self.contexts_imgs[self.last_kf_idx][0] @@ -1135,7 +1142,7 @@ def ba(self, gru_estimated_flow, gru_estimated_flow_weight, damping, ii, jj, # Add Factors if initial_priors is not None: - ic("ADDING initial prior!") + # ic("ADDING initial prior!") linear_factor_graph.push_back(initial_priors.linearize(x0)) # SOLVE @@ -1337,7 +1344,13 @@ def terminate(self, stream=None): def get_viz_out(self, batch): viz_index, = torch.where(self.viz_idx) viz_out = None - if len(viz_index) != 0: + + if batch["is_last_frame"]: + ic("Last frame") + # Leave all the entries null, this signals end of seq + viz_out = {"is_last_frame": True} + + elif len(viz_index) != 0: cam0_T_world = torch.index_select(self.cam0_T_world, 0, viz_index) gt_poses = torch.index_select(self.gt_poses, 0, viz_index) gt_depths = torch.index_select(self.gt_depths, 0, viz_index) @@ -1359,7 +1372,7 @@ def get_viz_out(self, batch): else: out_device = self.device - ic(out_device) + # ic(out_device) viz_out = {"cam0_poses": cam0_T_world.to(device=out_device), "gt_poses": gt_poses.to(device=out_device), @@ -1381,12 +1394,7 @@ def get_viz_out(self, batch): "is_last_frame": batch["is_last_frame"] } self.viz_idx[:] = False - else: - print("viz_index is empty, nothing to visualize") - if batch["is_last_frame"]: - ic("Last frame") - # Leave all the entries null, this signals end of seq - viz_out = {"is_last_frame": True} + return viz_out \ No newline at end of file diff --git a/thirdparty/gtsam b/thirdparty/gtsam index 139c156..a0240d6 160000 --- a/thirdparty/gtsam +++ b/thirdparty/gtsam @@ -1 +1 @@ -Subproject commit 139c156a7a6fb5ef6062549993410a8ab986720b +Subproject commit a0240d64c99e7dc0a13783ff6033ddd037e020e9 diff --git a/thirdparty/instant-ngp b/thirdparty/instant-ngp index 54aba7c..a501f0f 160000 --- a/thirdparty/instant-ngp +++ b/thirdparty/instant-ngp @@ -1 +1 @@ -Subproject commit 54aba7cfbeaf6a60f29469a9938485bebeba24c3 +Subproject commit a501f0f14699a2b493f1e2fd42a975a8a9c172d1 diff --git a/utils/flow_viz.py b/utils/flow_viz.py index cf50010..8932e2a 100644 --- a/utils/flow_viz.py +++ b/utils/flow_viz.py @@ -263,9 +263,11 @@ def viz_idepth(disps, upmask, fix_range=False): depth = 1.0 / disps_up[-1].squeeze().to(torch.float) # Visualize only the last depth... viz_depth_map(depth, fix_range) -def viz_depth_map(depth, fix_range=False, name='Depth up', invert=True, colormap=cv2.COLORMAP_PLASMA, write=False): +def viz_depth_map(depth, fix_range=False, name='Depth up', invert=True, colormap=cv2.COLORMAP_PLASMA, write=False, range=None): valid = (depth > 0) & (~torch.isinf(depth)) & (~torch.isnan(depth)) # one shouldn't use exact equality on floats but for synthetic values it's ok - if fix_range: + if range is not None: + dmin, dmax = range + elif fix_range: dmin, dmax = 0.0, 3.0 else: if valid.any():