Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search for place waypoint #206

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion spot_rl_experiments/experiments/skill_test/test_sem_place.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# at NYC
place_target = "test_desk"

spotskillmanager = SpotSkillManager(use_mobile_pick=False, use_semantic_place=True)
spotskillmanager = SpotSkillManager(use_mobile_pick=True, use_semantic_place=True)

is_local = False
if enable_estimation_before_place:
Expand All @@ -29,6 +29,7 @@
# Start testing
contnue = True
while contnue:
spotskillmanager.pick("cup")
rospy.set_param("is_gripper_blocked", 0)
spotskillmanager.place(place_target, is_local=is_local, visualize=True)
contnue = map_user_input_to_boolean("Do you want to do it again ? Y/N ")
Expand Down
9 changes: 9 additions & 0 deletions spot_rl_experiments/spot_rl/envs/gaze_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,22 @@ def reset(self, target_obj_name, *args, **kwargs):
def step(self, action_dict: Dict[str, Any]):
grasp = self.should_grasp()

has_grasp = False
if grasp:
has_grasp = True
grasp = False

# Update the action_dict with grasp and place flags
action_dict["grasp"] = grasp
action_dict["place"] = False # TODO: Why is gaze getting flag for place?

observations, reward, done, info = super().step(
action_dict=action_dict,
)

if has_grasp:
done = True

return observations, reward, done, info

def remap_observation_keys_for_hab3(self, observations):
Expand Down
190 changes: 186 additions & 4 deletions spot_rl_experiments/spot_rl/utils/img_publishers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,17 @@
except ModuleNotFoundError:
pass

import open3d as o3d

# owlvit
from spot_rl.models import OwlVit
from spot_rl.utils import search_table_location
from spot_rl.utils.gripper_t_intel_path import GRIPPER_T_INTEL_PATH
from spot_rl.utils.pixel_to_3d_conversion_utils import (
get_3d_point,
sample_patch_around_point,
)
from spot_rl.utils.plane_detection import plane_detect
from spot_rl.utils.stopwatch import Stopwatch
from spot_rl.utils.utils import construct_config
from spot_rl.utils.utils import ros_topics as rt
Expand Down Expand Up @@ -317,7 +322,7 @@ class SpotBoundingBoxPublisher(SpotProcessedImagesPublisher):
subscriber_topic = rt.HAND_RGB
publisher_topics = [rt.MASK_RCNN_VIZ_TOPIC]

def __init__(self, model):
def __init__(self, model, spot):
super().__init__()
self.model = model
self.detection_topic = rt.DETECTIONS_TOPIC
Expand All @@ -333,6 +338,11 @@ def __init__(self, model):
self.viz_topic = rt.MASK_RCNN_VIZ_TOPIC
self._reset_tracking_params()

# Parameter for plane detection
assert osp.exists(GRIPPER_T_INTEL_PATH), f"{GRIPPER_T_INTEL_PATH} not found"
self.gripper_T_intel = np.load(GRIPPER_T_INTEL_PATH)
self.spot = spot

def preprocess_image(self, img):
if self.image_scale != 1.0:
img = cv2.resize(
Expand All @@ -353,6 +363,37 @@ def preprocess_image(self, img):

return img

def shrink_bounding_box(self, x_min, x_max, y_min, y_max, shrink_factor=0.1):
"""
Shrinks the bounding box by a specified shrink factor.

Parameters:
x_min (float): Minimum x-coordinate of the bounding box.
x_max (float): Maximum x-coordinate of the bounding box.
y_min (float): Minimum y-coordinate of the bounding box.
y_max (float): Maximum y-coordinate of the bounding box.
shrink_factor (float): Factor by which to shrink the bounding box. Default is 0.1 (i.e., 10%).

Returns:
tuple: New coordinates (x_min_new, x_max_new, y_min_new, y_max_new).
"""
height = x_max - x_min
width = y_max - y_min

# Calculate the amount to shrink by
shrink_width = width * shrink_factor / 2
shrink_height = height * shrink_factor / 2

x_center = (x_max + x_min) / 2
y_center = (y_max + y_min) / 2
# Adjust the bounding box coordinates
x_min_new = x_center - shrink_height
x_max_new = x_center + shrink_height
y_min_new = y_center - shrink_width
y_max_new = y_center + shrink_width

return int(x_min_new), int(x_max_new), int(y_min_new), int(y_max_new)

def _reset_tracking_params(self):
self._tracking_first_time = (
True # flag to indicate if it is the first time tracking
Expand All @@ -376,6 +417,8 @@ def _publish(self):

# Fetch the tracking flag
enable_tracking = rospy.get_param("enable_tracking", False)
enable_plane_detector = rospy.get_param("enable_plane_detector", False)
enable_plane_detector = True

# Cache the vis image
viz_img = hand_rgb_preprocessed
Expand Down Expand Up @@ -436,6 +479,142 @@ def _publish(self):
# buffer size 2
self._tracking_images = [self._tracking_images[suc_frame[-1]]]
self._tracking_bboxs = [self._tracking_bboxs[suc_frame[-1]]]
elif enable_plane_detector:
(
img,
depth_raw,
camera_intrinsics_intel,
camera_intrinsics_gripper,
body_T_hand,
gripper_T_intel,
) = search_table_location.get_arguments(self.spot, self.gripper_T_intel)
h, w = img.shape[:2]
mask = np.ones_like(depth_raw).astype(bool)
fx = camera_intrinsics_intel.focal_length.x
fy = camera_intrinsics_intel.focal_length.y
cx = camera_intrinsics_intel.principal_point.x
cy = camera_intrinsics_intel.principal_point.y
# u,v in pixel -> depth at u,v, intriniscs -> xyz in 3D
t0 = time.time()
pcd = search_table_location.generate_point_cloud(
img, depth_raw, mask, fx, fy, cx, cy
)
print("Time generate_point_cloud:", time.time() - t0)
pcd = pcd.voxel_down_sample(voxel_size=0.01)
t0 = time.time()
pcd.estimate_normals(
search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30)
)
print("Time estimate_normals:", time.time() - t0)
# 0.25 - 0 < angle < 75
t0 = time.time()
pcd = search_table_location.filter_pointcloud_by_normals_in_the_given_direction(
pcd,
np.array([0.0, 0.0, 1.0]),
0.99,
body_T_hand,
gripper_T_intel,
visualize=False,
)
print(
"Time filter_pointcloud_by_normals_in_the_given_direction:",
time.time() - t0,
)

try:
plane_pcd = plane_detect(pcd)
plane_pcd.points = o3d.utility.Vector3dVector(
search_table_location.farthest_point_sampling(
np.array(plane_pcd.points), 1024
)
)
print(f"first: { np.array(plane_pcd.points).shape[0]}")
if np.array(plane_pcd.points).shape[0] > 10:
# o3d.visualization.draw_geometries([plane_pcd])
# Down-sample
# plane_pcd.points = o3d.utility.Vector3dVector(
# search_table_location.farthest_point_sampling(np.array(plane_pcd.points), 1024)
# )
(
target_points,
selected_point,
) = search_table_location.get_target_points_by_heuristic(
np.array(plane_pcd.points)
)
pixel_points = search_table_location.project_3d_to_pixel_uv(
np.array(target_points), fx, fy, cx, cy
)

filter_pixel_points = []
if len(pixel_points) != 0:
t0 = time.time()
for pt in pixel_points:
depth_pt = (
sample_patch_around_point(
int(pt[0]), int(pt[1]), depth_raw
)
/ 1000.0
)
intel_pt = get_3d_point(
camera_intrinsics_intel, pt, depth_pt
)
gripper_pt = np.array(
gripper_T_intel.transform_point(mn.Vector3(*intel_pt))
)
body_pt = body_T_hand.transform_point(
mn.Vector3(*gripper_pt)
)
if body_pt[2] > 0:
filter_pixel_points.append(pt)

filter_pixel_points = np.stack(filter_pixel_points)
print("Time get 3d:", time.time() - t0)
print(
f"before/after filter out: {pixel_points.shape[0]} {filter_pixel_points.shape[0]}"
)
if len(filter_pixel_points) >= 40:
max_x = int(np.max(filter_pixel_points[:, 1]))
min_x = int(np.min(filter_pixel_points[:, 1]))
max_y = int(np.max(filter_pixel_points[:, 0]))
min_y = int(np.min(filter_pixel_points[:, 0]))
(
new_min_x,
new_max_x,
new_min_y,
new_max_y,
) = self.shrink_bounding_box(
min_x, max_x, min_y, max_y, 0.5
)
# cur_bbox = [min_y, min_x, max_y, max_x]
cur_bbox = [new_min_y, new_min_x, new_max_y, new_max_x]
object_label = "plane"
bbox_data = (
f"{str(timestamp)}|{object_label},{1.0},"
+ ",".join([str(v) for v in cur_bbox])
)
# Apply the red bounding box to showcase tracking on the image
viz_img = cv2.rectangle(
viz_img, cur_bbox[:2], cur_bbox[2:], (0, 0, 255), 5
)
viz_img = cv2.rectangle(
viz_img,
[new_min_y, new_min_x],
[new_max_y, new_max_x],
(0, 255, 0),
5,
)
for xy in filter_pixel_points:
viz_img = cv2.circle(
viz_img, (int(xy[0]), int(xy[1])), 1, (255, 0, 0)
)
else:
bbox_data = f"{str(timestamp)}|None"
else:
bbox_data = f"{str(timestamp)}|None"
else:
bbox_data = f"{str(timestamp)}|None"
except Exception:
bbox_data = f"{str(timestamp)}|None"
else:
# Normal detection mode using detection model
bbox_data, viz_img = self.model.inference(
Expand Down Expand Up @@ -747,14 +926,17 @@ def inference(self, hand_rgb, timestamp, stopwatch):
elif filter_hand_depth:
node = SpotFilteredHandDepthImagesPublisher()
elif mrcnn:
spot = Spot("SpotBoundingBoxPublisher")
model = MRCNNModel()
node = SpotBoundingBoxPublisher(model)
node = SpotBoundingBoxPublisher(model, spot)
elif owlvit:
# TODO dynamic label
rospy.set_param("object_target", "ball")
rospy.set_param("enable_tracking", False)
rospy.set_param("enable_plane_detector", False)
spot = Spot("SpotBoundingBoxPublisher")
model = OWLVITModel()
node = SpotBoundingBoxPublisher(model)
node = SpotBoundingBoxPublisher(model, spot)
elif open_voc:
# Add open voc object detector here
spot = Spot("SpotOpenVocObjectDetectorPublisher")
Expand Down Expand Up @@ -785,7 +967,7 @@ def inference(self, hand_rgb, timestamp, stopwatch):
flags.append("--decompress")
elif local:
flags.append("--raw")
flags.append("--open-voc")
# flags.append("--open-voc")
else:
raise RuntimeError("This should be impossible.")
cmds = [f"python {osp.abspath(__file__)} {flag}" for flag in flags]
Expand Down
8 changes: 3 additions & 5 deletions spot_rl_experiments/spot_rl/utils/plane_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,16 +164,14 @@ def plane_detect(pcd):

# DrawPointCloud(points, color=(0.4, 0.4, 0.4))
t0 = time.time()
results = DetectMultiPlanes(
points, min_ratio=0.05, threshold=0.005, iterations=2000
)
print("Time:", time.time() - t0)
results = DetectMultiPlanes(points, min_ratio=0.05, threshold=0.01, iterations=2000)
print("Time Plane detection:", time.time() - t0)
planes = []
colors = []

highest_pts, high_i = -np.inf, 0
# lowest_dist_to_camera, low_i = np.inf, 0
print(f"{len(results)} plane are detected")
# print(f"{len(results)} plane are detected")
for i, (_, plane) in enumerate(results):

r = 1
Expand Down
Loading