From afc1c2f8f9895f8af9b3aa33ae2152e6b144d284 Mon Sep 17 00:00:00 2001
From: Wei Zhang <wei.zhang@ifp.uni-stuttgart.de>
Date: Mon, 2 Jan 2023 21:42:08 +0100
Subject: [PATCH] Some fixes and can run eval

---
 .gitignore                                   |  4 ++
 datasets/nerf_dataset.py                     |  4 +-
 examples/slam_demo.py                        |  2 +-
 fusion/tsdf_fusion.py                        | 46 ++++++++++--------
 gui/open3d_gui.py                            | 13 ++---
 requirements.txt                             |  2 +
 slam/inertial_frontends/inertial_frontend.py |  2 +-
 slam/vio_slam.py                             |  2 +-
 slam/visual_frontends/visual_frontend.py     | 50 ++++++++++++--------
 thirdparty/gtsam                             |  2 +-
 thirdparty/instant-ngp                       |  2 +-
 utils/flow_viz.py                            |  6 ++-
 12 files changed, 79 insertions(+), 56 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3a01d31..bc1794a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,7 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+build_ngp
+build_gtsam
+datasets
diff --git a/datasets/nerf_dataset.py b/datasets/nerf_dataset.py
index 73f3876..13b7e49 100644
--- a/datasets/nerf_dataset.py
+++ b/datasets/nerf_dataset.py
@@ -17,7 +17,7 @@ def __init__(self, args, device):
         super().__init__("Nerf", args, device)
         self.parse_metadata()
         # self._build_dataset_index() # Loads all the data first, and then streams.
-        self.tqdm = tqdm(total=self.__len__()) # Call after parsing metadata
+        # self.tqdm = tqdm(total=self.__len__()) # Call after parsing metadata
 
     def get_cam_calib(self):
         w, h   = self.json["w"],    self.json["h"]
@@ -165,7 +165,7 @@ def __len__(self):
         return len(self.image_paths)
 
     def __getitem__(self, k):
-        self.tqdm.update(1)
+        # self.tqdm.update(1)
         return self._get_data_packet(k) if self.data_packets is None else self.data_packets[k]
 
 
diff --git a/examples/slam_demo.py b/examples/slam_demo.py
index 45245de..7ad24e1 100644
--- a/examples/slam_demo.py
+++ b/examples/slam_demo.py
@@ -105,7 +105,7 @@ def run(args):
             slam_module.register_output_queue(slam_output_queue_for_fusion)
             fusion_module.register_input_queue("slam", slam_output_queue_for_fusion)
         
-        if (args.fusion == 'nerf' and not slam) or (args.fusion != 'nerf' and args.eval):
+        if (args.fusion == 'nerf' and not slam): # or (args.fusion != 'nerf' and args.eval):
             # Only used for evaluation, or in case we do not use slam (for nerf)
             data_provider_module.register_output_queue(data_for_fusion_output_queue)
             fusion_module.register_input_queue("data", data_for_fusion_output_queue)
diff --git a/fusion/tsdf_fusion.py b/fusion/tsdf_fusion.py
index 1d2ddae..169bf82 100644
--- a/fusion/tsdf_fusion.py
+++ b/fusion/tsdf_fusion.py
@@ -110,7 +110,7 @@ def handle_slam_packet(self, packet):
 
         packet = packet[1]
         if packet is None:
-            print("Fusion packet from SLAM module is None...")
+            # print("Fusion packet from SLAM module is None...")
             return True
 
         if self.evaluate and packet["is_last_frame"] and not "cam0_images" in packet:
@@ -180,6 +180,7 @@ def evaluate_metrics(self):
                            self.max_depth_sigma_thresh,
                            evaluate=True)
         print("Done evaluating reconstruction.")
+        # exit(0)
 
     # builds volume from packet # TODO: build volume from pcl directly.
     def build_volume(self, packet, o3d_intrinsics, masks):
@@ -368,7 +369,7 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth
                 width_px=W,
                 height_px=H)
 
-            print(f"Casting Rays for camera {ix}")
+            # print(f"Casting Rays for camera {ix}")
             render = None
             if self.use_old_volume:
                 if self.scene:
@@ -428,13 +429,13 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth
                 rendered_depth_map = torch.tensor(
                     render['depth'].cpu().numpy(), device=depth.device, dtype=depth.dtype).squeeze()
 
-            if self.debug:
-                cv2.imshow("Rendered depth", rendered_depth.colorize_depth(
-                    self.depth_scale, self.min_depth, self.max_depth).as_tensor().cpu().numpy())
-                cv2.imshow("Rendered color?", rendered_c.as_tensor().cpu().numpy())
-                cv2.imshow('rendered color', sum_colors.cpu().numpy())
-                cv2.imshow('actual color', color.cpu().numpy())
-                cv2.imshow('rendered weights', sum_weights.cpu().numpy())
+            if False:
+                # cv2.imshow("Rendered depth", rendered_depth.colorize_depth(
+                #     self.depth_scale, self.min_depth, self.max_depth).as_tensor().cpu().numpy())
+                # cv2.imshow("Rendered color?", rendered_c.as_tensor().cpu().numpy())
+                # cv2.imshow('rendered color', sum_colors.cpu().numpy())
+                # cv2.imshow('actual color', color.cpu().numpy())
+                # cv2.imshow('rendered weights', sum_weights.cpu().numpy())
                 if self.use_old_volume:
                     cv2.imshow("rend_depth_map", render['t_hit'].numpy())
                     rendered_depth_mask = (np.abs(render['geometry_ids'].numpy()) > 0).astype(float)
@@ -443,13 +444,13 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth
                 viz_depth_map(rendered_depth_map, fix_range=False, name="rendered_depth_map")
                 viz_depth_map(depth, fix_range=False, name="estimated_depth_map")
                 viz_depth_map(gt_depth, fix_range=False, name="ground-truth depth")
-                viz_depth_sigma(depth_sigma[None], name="estimated_depth_map sigma",
-                                fix_range=True,
-                                bg_img=color.permute(2,0,1)[None],
-                                sigma_thresh=max_depth_sigma_thresh)
-                scale = gt_depth.mean() / rendered_depth_map.mean()
-                diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth)
-                viz_depth_map(diff_depth_map, fix_range=False, name="Error depth map", colormap=cv2.COLORMAP_TURBO, invert=False)
+                # viz_depth_sigma(depth_sigma[None], name="estimated_depth_map sigma",
+                #                 fix_range=True,
+                #                 bg_img=color.permute(2,0,1)[None],
+                #                 sigma_thresh=max_depth_sigma_thresh)
+                # scale = gt_depth.mean() / rendered_depth_map.mean()
+                # diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth)
+                # viz_depth_map(diff_depth_map, range=(0,0.5), name="Error depth map", colormap=cv2.COLORMAP_TURBO, invert=False)
                 cv2.waitKey(1)
 
 
@@ -469,7 +470,10 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth
                 diff_depth_map = torch.abs(scale * rendered_depth_map - gt_depth)
                 diff_depth_map[diff_depth_map > 2.0] = 2.0 # Truncate outliers to 1m, otw biases metric, this can happen either bcs depth is not estimated or bcs gt depth is wrong. 
                 if self.debug:
-                    viz_depth_map(diff_depth_map, fix_range=False, name="Error depth map after crop", colormap=cv2.COLORMAP_TURBO, invert=False)
+                    viz_depth_map(rendered_depth_map, fix_range=False, name="rendered_depth_map")
+                    viz_depth_map(depth, fix_range=False, name="estimated_depth_map")
+                    viz_depth_map(gt_depth, fix_range=False, name="ground-truth depth")
+                    viz_depth_map(diff_depth_map, range=(0,0.5), name="Error depth map after crop", colormap=cv2.COLORMAP_TURBO, invert=False)
                     cv2.waitKey(1)
                 l1 = diff_depth_map.mean().cpu().numpy() * 100 # From m to cm AND use the mean (as in Nice-SLAM)
                 total_l1 += l1
@@ -480,8 +484,10 @@ def render_volume(self, packet, o3d_intrinsics, min_weight_for_render, max_depth
             psnr = total_psnr / (len(viz_idx) or 1)
             l1 = total_l1 / (len(viz_idx) or 1)
             print(f"Dt={dt}; PSNR={psnr}; L1={l1}")
-            data_frame.loc[len(data_frame.index)] = [dt, psnr, l1]
-            data_frame.to_csv("results.csv")
+            # data_frame.loc[len(data_frame.index)] = [dt, psnr, l1]
+            # data_frame.to_csv("results.csv")
+            with open("results.csv", 'a') as f:
+                f.write(f"Dt={dt}; PSNR={psnr}; L1={l1}\n")
 
     def update_history(self, packet):
         if packet["is_last_frame"]:
@@ -531,7 +537,7 @@ def get_history_packet(self):
             packet["cam0_intrinsics"]    += [h["cam0_intrinsics"]]
             packet["calibs"]             += [h["calibs"]]
             # TODO: WHY ARE WE DIVIDING? SHOULDN'T IT BE MULTIPLYING?!? 
-            packet["gt_depths"]          += [torch.tensor(h["gt_depths"], device=h["cam0_images"].device, dtype=torch.float32).permute(2,0,1) * h["calibs"].depth_scale]
+            packet["gt_depths"]          += [h["gt_depths"].clone().detach() * h["calibs"].depth_scale]
         packet["viz_idx"]            = torch.stack(packet["viz_idx"]           )
         packet["cam0_poses"]         = torch.stack(packet["cam0_poses"]        )
         packet["cam0_depths_cov_up"] = torch.stack(packet["cam0_depths_cov_up"])
diff --git a/gui/open3d_gui.py b/gui/open3d_gui.py
index e893ff7..ae77750 100644
--- a/gui/open3d_gui.py
+++ b/gui/open3d_gui.py
@@ -175,8 +175,9 @@ def initialize(self):
         self.viz.create_window(width=1200, height=680) #create_window(self, window_name='Open3D', width=1920, height=1080, left=50, top=50, visible=True)
 
         self.viz.get_render_option().point_size = 0.001 # in m
-        self.viz.get_render_option().background_color = np.asarray([0, 0, 0]) # Black background
+        # self.viz.get_render_option().background_color = np.asarray([0, 0, 0]) # Black background
         self.viz.get_render_option().light_on = False
+        self.viz.add_geometry(o3d.geometry.TriangleMesh.create_coordinate_frame(0.2))
 
         self.viz.register_key_callback(ord("Q"), self.destroy_window)
         self.viz.register_key_callback(256, self.destroy_window) # esc
@@ -401,7 +402,7 @@ def add_geometry(self, data_packets):
                     raise NotImplementedError
 
     def viz_data_packet(self, packet):
-        print("Visualizing DATA packet.")
+        # print("Visualizing DATA packet.")
 
         if type(packet) is Values:
             values = packet
@@ -441,10 +442,10 @@ def viz_slam_packet(self, packet):
 
         packet = packet[1]
         if packet is None:
-            print("O3d packet from SLAM module is None...")
+            # print("O3d packet from SLAM module is None...")
             return True
 
-        print("Visualizing SLAM packet.")
+        # print("Visualizing SLAM packet.")
         if self.evaluate and packet["is_last_frame"] and not "cam0_images" in packet:
             print("Last Frame reached, and no global BA")
             self.evaluate_metrics()
@@ -509,7 +510,7 @@ def viz_fusion_packet(self, packet):
             print("Gui packet from Fusion module is None...")
             return True
 
-        print("Visualizing Gui packet from Fusion")
+        # print("Visualizing Gui packet from Fusion")
         mesh = packet["mesh"]
         if mesh:
             self.viz_mesh(mesh.create_mesh())
@@ -605,7 +606,7 @@ def vis_body_sigmas(self, packet):
             body_sigma_actor = self.create_sphere_actor(body_pose, radius=S.max().sqrt().item())
 
             s = S.max().sqrt().item()
-            ic(s)
+            # ic(s)
             max_s = 0.3
             s_norm =  s / max_s if s < max_s else 1.0
             color = self.colormap(s_norm)[:3]
diff --git a/requirements.txt b/requirements.txt
index d10dc2d..fee3bd1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,5 @@ pyrealsense2
 
 pybind11
 gdown
+
+colored_glog
diff --git a/slam/inertial_frontends/inertial_frontend.py b/slam/inertial_frontends/inertial_frontend.py
index 6bc591a..2d0bfc5 100644
--- a/slam/inertial_frontends/inertial_frontend.py
+++ b/slam/inertial_frontends/inertial_frontend.py
@@ -172,7 +172,7 @@ def initial_state(self):
                                                gtsam.Point3(0.878612,2.142470,0.947262))
         true_vel = np.array([0.009474,-0.014009,-0.002145])
         true_bias = gtsam.imuBias.ConstantBias(np.array([-0.012492,0.547666,0.069073]), np.array([-0.002229,0.020700,0.076350]))
-        naive_pose = gtsam.Pose3.identity()
+        naive_pose = gtsam.Pose3 #.identity()
         naive_vel = np.zeros(3)
         naive_bias = gtsam.imuBias.ConstantBias()
         initial_pose = true_pose
diff --git a/slam/vio_slam.py b/slam/vio_slam.py
index 5e0f3ac..cfb0571 100644
--- a/slam/vio_slam.py
+++ b/slam/vio_slam.py
@@ -62,7 +62,7 @@ def initial_state():
                                       gtsam.Point3(0.878612, 2.142470, 0.947262))
     true_vel = np.array([0.009474,-0.014009,-0.002145])
     true_bias = gtsam.imuBias.ConstantBias(np.array([-0.012492,0.547666,0.069073]), np.array([-0.002229,0.020700,0.076350]))
-    naive_pose = gtsam.Pose3.identity()
+    naive_pose = gtsam.Pose3 #.identity()
     naive_vel = np.zeros(3)
     naive_bias = gtsam.imuBias.ConstantBias()
     initial_pose = true_world_T_imu_t0
diff --git a/slam/visual_frontends/visual_frontend.py b/slam/visual_frontends/visual_frontend.py
index 5b1a75b..c63cceb 100644
--- a/slam/visual_frontends/visual_frontend.py
+++ b/slam/visual_frontends/visual_frontend.py
@@ -8,6 +8,7 @@
 import cv2
 
 import numpy as np
+from tqdm import tqdm
 
 import torch
 from torch import nn
@@ -45,8 +46,8 @@ def lietorch_pose_to_gtsam(pose : lietorch.SE3):
 
 def gtsam_pose_to_torch(pose: gtsam.Pose3, device, dtype):
     t = pose.translation()
-    q = pose.rotation().quaternion()
-    return torch.tensor([t[0], t[1], t[2], q[1], q[2], q[3], q[0]], device=device, dtype=dtype)
+    q = pose.rotation().toQuaternion()
+    return torch.tensor([t[0], t[1], t[2], q.x(), q.y(), q.z(), q.w()], device=device, dtype=dtype)
 
 class VisualFrontend(nn.Module):
     def __init__(self):
@@ -151,6 +152,8 @@ def __init__(self, world_T_body_t0, body_T_cam0, args, device="cpu"):
         self.idepth_prior_cov = torch.pow(self.sigma_idepth, 2)
         self.g_prior_cov = torch.block_diag(self.r_cov, self.t_cov) # GTSAM convention, rotation first, then translation
 
+        self.tqdm = tqdm(total=1000)
+
     def __del__(self):
         print("Calling frontend dtor...")
         torch.cuda.empty_cache()
@@ -240,7 +243,7 @@ def initialize_buffers(self, image_size):
     def forward(self, batch):
         # The output of RaftVisualFrontend is not dense optical flow
         # but rather a bunch of pose-to-pose factors resulting from the reduced camera matrix.
-        print("RaftVisualFrontend.forward")
+        # print("RaftVisualFrontend.forward")
 
         k = batch["k"][0]
 
@@ -291,16 +294,20 @@ def forward(self, batch):
         assert k > 0
         assert self.kf_idx < self.buffer
 
+        self.tqdm.update(1)
+        self.tqdm.set_description(f'adding keyframe: {self.kf_idx}')
+
+        if batch["is_last_frame"]:
+            self.kf_idx -= 1 # Because in the last iter we increased it, but aren't taking any...
+            print("Last frame reached, and no new motion: starting GLOBAL BA")
+            self.terminate()
+            # Send the whole viz_out to update the fact that BA has changed all poses/depths
+            viz_out = self.get_viz_out(batch)
+            return x0, factors, viz_out
+
         # Add frame as keyframe if we have enough motion, otherwise discard:
         current_imgs_features = self.__feature_encoder(imgs_norm_k)
         if not self.has_enough_motion(current_imgs_features):
-            if batch["is_last_frame"]:
-                self.kf_idx -= 1 # Because in the last iter we increased it, but aren't taking any...
-                print("Last frame reached, and no new motion: starting GLOBAL BA")
-                self.terminate()
-                # Send the whole viz_out to update the fact that BA has changed all poses/depths
-                viz_out = self.get_viz_out(batch)
-                return x0, factors, viz_out
             # By returning, we do not increment self.kf_idx
             return x0, factors, viz_out
 
@@ -435,7 +442,7 @@ def update(self, kf0=None, kf1=None, itrs=2, use_inactive=False, EP=1e-7, motion
             # Or rather the factors?
 
             # Dense bundle adjustment
-            ic("BA!")
+            # ic("BA!")
             x0, rcm_factor = self.ba(gru_estimated_flow, gru_estimated_flow_weight, damping,
                                  ii, jj, kf0, kf1, itrs=itrs, lm=1e-4, ep=0.1,
                                  motion_only=motion_only, compute_covariances=self.compute_covariances)
@@ -977,7 +984,7 @@ def _normalize_imgs(self, images, droid_normalization=True):
     @torch.no_grad()
     def has_enough_motion(self, current_imgs_features):
         # Only calculates if enough motion by looking at cam0
-        ic(self.last_kf_idx)
+        # ic(self.last_kf_idx)
         last_img_features    = self.features_imgs[self.last_kf_idx][0]
         current_img_features = current_imgs_features[0]
         last_img_context     = self.contexts_imgs[self.last_kf_idx][0]
@@ -1135,7 +1142,7 @@ def ba(self, gru_estimated_flow, gru_estimated_flow_weight, damping, ii, jj,
 
             # Add Factors
             if initial_priors is not None:
-                ic("ADDING initial prior!")
+                # ic("ADDING initial prior!")
                 linear_factor_graph.push_back(initial_priors.linearize(x0))
 
             # SOLVE
@@ -1337,7 +1344,13 @@ def terminate(self, stream=None):
     def get_viz_out(self, batch):
         viz_index, = torch.where(self.viz_idx)
         viz_out = None
-        if len(viz_index) != 0:
+        
+        if batch["is_last_frame"]:
+            ic("Last frame")
+            # Leave all the entries null, this signals end of seq
+            viz_out = {"is_last_frame": True}
+
+        elif len(viz_index) != 0:
             cam0_T_world       = torch.index_select(self.cam0_T_world, 0, viz_index)
             gt_poses           = torch.index_select(self.gt_poses, 0, viz_index)
             gt_depths          = torch.index_select(self.gt_depths, 0, viz_index)
@@ -1359,7 +1372,7 @@ def get_viz_out(self, batch):
             else:
                 out_device = self.device
 
-            ic(out_device)
+            # ic(out_device)
 
             viz_out = {"cam0_poses":          cam0_T_world.to(device=out_device),
                        "gt_poses":            gt_poses.to(device=out_device),
@@ -1381,12 +1394,7 @@ def get_viz_out(self, batch):
                        "is_last_frame":       batch["is_last_frame"]
                       }
             self.viz_idx[:] = False
-        else:
-            print("viz_index is empty, nothing to visualize")
-            if batch["is_last_frame"]:
-                ic("Last frame")
-                # Leave all the entries null, this signals end of seq
-                viz_out = {"is_last_frame": True}
+
             
         return viz_out
         
\ No newline at end of file
diff --git a/thirdparty/gtsam b/thirdparty/gtsam
index 139c156..a0240d6 160000
--- a/thirdparty/gtsam
+++ b/thirdparty/gtsam
@@ -1 +1 @@
-Subproject commit 139c156a7a6fb5ef6062549993410a8ab986720b
+Subproject commit a0240d64c99e7dc0a13783ff6033ddd037e020e9
diff --git a/thirdparty/instant-ngp b/thirdparty/instant-ngp
index 54aba7c..a501f0f 160000
--- a/thirdparty/instant-ngp
+++ b/thirdparty/instant-ngp
@@ -1 +1 @@
-Subproject commit 54aba7cfbeaf6a60f29469a9938485bebeba24c3
+Subproject commit a501f0f14699a2b493f1e2fd42a975a8a9c172d1
diff --git a/utils/flow_viz.py b/utils/flow_viz.py
index cf50010..8932e2a 100644
--- a/utils/flow_viz.py
+++ b/utils/flow_viz.py
@@ -263,9 +263,11 @@ def viz_idepth(disps, upmask, fix_range=False):
     depth = 1.0 / disps_up[-1].squeeze().to(torch.float) # Visualize only the last depth...
     viz_depth_map(depth, fix_range)
 
-def viz_depth_map(depth, fix_range=False, name='Depth up', invert=True, colormap=cv2.COLORMAP_PLASMA, write=False):
+def viz_depth_map(depth, fix_range=False, name='Depth up', invert=True, colormap=cv2.COLORMAP_PLASMA, write=False, range=None):
     valid = (depth > 0) & (~torch.isinf(depth)) & (~torch.isnan(depth)) # one shouldn't use exact equality on floats but for synthetic values it's ok
-    if fix_range:
+    if range is not None:
+        dmin, dmax = range
+    elif fix_range:
         dmin, dmax = 0.0, 3.0
     else:
         if valid.any():