Skip to content

Commit

Permalink
Spot social nav (facebookresearch#1658)
Browse files Browse the repository at this point in the history
* tyde CI

* getting a corrcet init location of arm

* tidy CI

* sensor clean up so that the size is the same

* setup social nav reward and yaml

* bounding box update

* clean up human bbox detector

* tidy

* tidy

* add stereo pari camera

* make the size of HumanoidDetectorSensor consistent with the senser obs

* fix nan issue

* fix init_distributed issue

* tidy

* tidy CI and add comments

* add more comments

* tidy CI

* add InitialGpsCompassSensor sensor

* refine social nav reward

* add exploration reward

* increase the video size

* polsih the PR

* tidy

* reduce the third_rgb_sensor size

---------

Co-authored-by: Jimmy Yang <[email protected]>
  • Loading branch information
jimmytyyang and Jimmy Yang authored Nov 11, 2023
1 parent c869f96 commit 6c0c62e
Show file tree
Hide file tree
Showing 11 changed files with 417 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ habitat_baselines:
third_rgb_sensor:
height: 224
width: 171

should_load_ckpt: True
video_option: ["disk"]

Expand Down
2 changes: 1 addition & 1 deletion habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def _init_train(self, resume_state=None):

self._agent = self._create_agent(resume_state)
if self._is_distributed:
self._agent.updater.init_distributed(find_unused_params=False) # type: ignore
self._agent.init_distributed(find_unused_params=False) # type: ignore
self._agent.post_init()

self._is_static_encoder = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defaults:
- /habitat: habitat_config_base
- /habitat/task: task_config_base
# Config agent sensors
- /habitat/simulator/[email protected]_0: rgbd_head_rgbdp_arm_agent
- /habitat/simulator/[email protected]_0: spot_agent
- /habitat/simulator/[email protected]_0: spot
- /habitat/simulator/[email protected]_1: rgbd_head_agent
- /habitat/simulator/[email protected]_1: human
Expand All @@ -19,17 +19,14 @@ defaults:
- /habitat/task/[email protected]_1_oracle_nav_randcoord_action: oracle_nav_action
# Config agent sensor
- /habitat/task/measurements:
- articulated_agent_force
- articulated_agent_colls
- num_steps
- did_agents_collide
- num_agents_collide
- composite_stage_goals
- pddl_subgoal_reward
- rearrange_cooperate_reward
- articulated_agent_force
- force_terminate
- social_nav_reward
- object_to_goal_distance
- articulated_agent_colls
- rot_dist_to_goal
- dist_to_goal
- nav_to_pos_succ
Expand All @@ -50,9 +47,17 @@ defaults:
- humanoid_detector_sensor
- has_finished_oracle_nav
- /habitat/task/[email protected]_sensors.agent_0_goal_to_agent_gps_compass: nav_goal_sensor
- /habitat/task/[email protected]_sensors.agent_0_spot_head_stereo_depth_sensor: spot_head_stereo_depth_sensor
- /habitat/task/[email protected]_sensors.agent_1_agents_within_threshold: agents_within_threshold
- _self_

# This yaml is designed specifically for learning a social nav policy for the Boston Dynamics Spot robot.
# The major changes compared to the base yaml are the following:
# (1) obs_keys: we ensure these observations can be obtained from the real robot (Spot)
# (2) social_nav_reward: social nav reward considers collisions based on a kinematic simulation
# (3) actions: Spot controls the base linear and angualr velocity
# (4) simulator: we simulate the environment via the kinematic mode to facilitate sim2real transfer

habitat:
task:
lab_sensors:
Expand All @@ -62,6 +67,11 @@ habitat:
agent_idx: 1
agent_0_goal_to_agent_gps_compass:
goal_is_human: True
# Defien the human detector
humanoid_detector_sensor:
# If the human detector function is image or binary flag
return_image: False
is_return_image_bbox: False
type: RearrangePddlSocialNavTask-v0
min_start_distance: 4.0
reward_measure: social_nav_reward
Expand All @@ -70,20 +80,28 @@ habitat:
slack_reward: -0.01
end_on_success: False
task_spec_base_path: benchmark/multi_agent/
task_spec: pddl/multi_agent_tidy_house
task_spec: pddl/multi_agent_social_nav
pddl_domain_def: fp
min_distance_start_agents: 5.0
constraint_violation_ends_episode: False
constraint_violation_drops_object: True

measurements:
rearrange_cooperate_reward:
stage_sparse_reward: 5.0
end_on_collide: True
collide_penalty: 0.5
# We do kinemantic simulation here, so no dynamic force
force_terminate:
max_accum_force: -1
max_instant_force: -1
# Define the social nav reward
social_nav_reward:
facing_human_reward: 0.01
count_coll_pen: 0.01
max_count_colls: 1000
count_coll_end_pen: 1.0
# Define the social nav seek success
social_nav_seek_success:
must_look_at_targ: False
following_step_succ_threshold: 500
need_to_face_human: True

actions:
agent_0_base_velocity:
Expand All @@ -102,6 +120,7 @@ habitat:
ang_speed: 5.0
enable_rotation_check_for_dyn_slide: False

# Human random walking action
agent_1_oracle_nav_action:
type: OracleNavAction
motion_control: human_joints
Expand All @@ -121,7 +140,7 @@ habitat:
gym:
obs_keys:
- agent_0_articulated_agent_arm_depth
- agent_0_goal_to_agent_gps_compass
- agent_0_spot_head_stereo_depth_sensor
- agent_0_humanoid_detector_sensor
- agent_1_head_depth
- agent_1_relative_resting_position
Expand Down Expand Up @@ -150,27 +169,8 @@ habitat:
agents_order:
- agent_0
- agent_1
agents:
agent_0:
radius: 0.25
height: 1.41
articulated_agent_urdf: data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
articulated_agent_type: "SpotRobot"
sim_sensors:
arm_rgb_sensor:
height: 480
width: 640
hfov: 47
arm_depth_sensor:
height: 224
width: 171
hfov: 55
arm_panoptic_sensor:
height: 224
width: 171
hfov: 55
joint_start_noise: 0.0

# We use the kinematic mode to train the real world social nav policy
kinematic_mode: True
ac_freq_ratio: 1
step_physics: False
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Version of tidy house that only has two target objects.
objects:
- name: any_targets|0
expr_type: movable_entity_type
- name: TARGET_any_targets|0
expr_type: goal_entity_type
- name: any_targets|1
expr_type: movable_entity_type
- name: TARGET_any_targets|1
expr_type: goal_entity_type
- name: robot_0
expr_type: robot_entity_type
- name: robot_1
expr_type: robot_entity_type

goal:
expr_type: AND
sub_exprs:
- at(any_targets|0,TARGET_any_targets|0)
- at(any_targets|1,TARGET_any_targets|1)
- not_holding(robot_0)
- not_holding(robot_1)

stage_goals:
stage_1_1:
expr_type: AND
sub_exprs:
- quantifier: EXISTS
inputs:
- name: robot
expr_type: robot_entity_type
expr_type: AND
sub_exprs:
- holding(any_targets|0, robot)
stage_1_2:
expr_type: AND
sub_exprs:
- at(any_targets|0,TARGET_any_targets|0)
stage_2_1:
expr_type: AND
sub_exprs:
- quantifier: EXISTS
inputs:
- name: robot
expr_type: robot_entity_type
expr_type: AND
sub_exprs:
- holding(any_targets|1, robot)
stage_2_2:
expr_type: AND
sub_exprs:
- at(any_targets|1,TARGET_any_targets|1)

solution:
- nav_to_goal(any_targets|1, robot_1)
- pick(any_targets|1, robot_1)
- nav_to_obj(TARGET_any_targets|1, robot_1)
- place(any_targets|1,TARGET_any_targets|1, robot_1)
37 changes: 37 additions & 0 deletions habitat-lab/habitat/config/default_structured_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"TargetStartSensorConfig",
"GoalSensorConfig",
"TargetStartGpsCompassSensorConfig",
"InitialGpsCompassSensorConfig",
"TargetGoalGpsCompassSensorConfig",
# REARRANGEMENT MEASUREMENTS
"EndEffectorToRestDistanceMeasurementConfig",
Expand Down Expand Up @@ -353,6 +354,9 @@ class OracleNavActionConfig(ActionConfig):
allow_back: bool = True
spawn_max_dist_to_obj: float = 2.0
num_spawn_attempts: int = 200
# For social nav training only. It controls the distance threshold
# between the robot and the human and decide if the human wants to walk or not
human_stop_and_walk_to_robot_distance_threshold: float = -1.0


# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -397,6 +401,10 @@ class HumanoidDetectorSensorConfig(LabSensorConfig):
human_id: int = 100
# How many pixels needed to consider that human is in frame
human_pixel_threshold: int = 1000
# Image based or binary based
return_image: bool = False
# Is the return image bounding box or not
is_return_image_bbox: bool = False


@dataclass
Expand Down Expand Up @@ -648,6 +656,14 @@ class TargetStartGpsCompassSensorConfig(LabSensorConfig):
type: str = "TargetStartGpsCompassSensor"


@dataclass
class InitialGpsCompassSensorConfig(LabSensorConfig):
r"""
Rearrangement only. Returns the relative distance to the initial starting location of the agent in 2D polar coordinates.
"""
type: str = "InitialGpsCompassSensor"


@dataclass
class TargetGoalGpsCompassSensorConfig(LabSensorConfig):
r"""
Expand Down Expand Up @@ -1258,12 +1274,27 @@ class SocialNavReward(MeasurementConfig):
facing_human_dis: float = 3.0
# -1 means that there is no facing_human_reward
facing_human_reward: float = -1.0
# toward_human_reward defualt is 1.0
toward_human_reward: float = 1.0
# -1 means that there is no near_human_bonus
near_human_bonus: float = -1.0
# -1 means that there is no exploration reward
explore_reward: float = -1.0
# If we want to use geo distance to measure the distance
# between the robot and the human
use_geo_distance: bool = False
# Set the id of the agent
robot_idx: int = 0
human_idx: int = 1
constraint_violate_pen: float = 10.0
force_pen: float = 0.0
max_force_pen: float = 1.0
force_end_pen: float = 10.0
# Collision based penality for kinematic simulation
count_coll_pen: float = -1.0
max_count_colls: int = -1
count_coll_end_pen: float = 1.0
collide_penalty: float = 1.0


@dataclass
Expand Down Expand Up @@ -2215,6 +2246,12 @@ class HabitatConfig(HabitatBaseConfig):
name="target_start_gps_compass_sensor",
node=TargetStartGpsCompassSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.initial_gps_compass_sensor",
group="habitat/task/lab_sensors",
name="initial_gps_compass_sensor",
node=InitialGpsCompassSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.multi_agent_all_predicates",
group="habitat/task/lab_sensors",
Expand Down
15 changes: 14 additions & 1 deletion habitat-lab/habitat/tasks/rearrange/robot_specific_sensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# LICENSE file in the root directory of this source tree.


import warnings

import numpy as np
from gym import spaces

Expand Down Expand Up @@ -54,8 +56,19 @@ def get_observation(self, observations, episode, task, *args, **kwargs):
target_key = [
f"agent_{self.agent_id}_{key}" for key in require_sensors
]

# Handle the case that there are no stereo observations
agent_do_not_have = False
for key in target_key:
assert key in observations
if key not in observations:
agent_do_not_have = True

if agent_do_not_have:
warnings.warn(
"You are using SpotHeadStereoDepthSensor but you do not provide head_stereo_right_depth and head_stereo_right_depth."
"We return an all zero image."
)
return np.float32(np.zeros((self._height, self._width, 1)))

# Combine two images
stereo_img = [observations[key] for key in target_key]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,37 @@ def step(self, *args, **kwargs):
if self.skill_done:
self.coord_nav = None

# If the robot is nearby, the human starts to walk, otherwise, the human
# just stops there and waits for robot to find it
if self._config.human_stop_and_walk_to_robot_distance_threshold != -1:
assert (
len(self._sim.agents_mgr) == 2
), "Does not support more than two agents when you want human to stop and walk based on the distance to the robot"
robot_id = int(1 - self._agent_index)
robot_pos = self._sim.get_agent_data(
robot_id
).articulated_agent.base_pos
human_pos = self.cur_articulated_agent.base_pos
dis = self._sim.geodesic_distance(robot_pos, human_pos)
# The human needs to stop and wait for robot to come if the distance is too larget
if (
dis
> self._config.human_stop_and_walk_to_robot_distance_threshold
):
self.humanoid_controller.set_framerate_for_linspeed(
0.0, 0.0, self._sim.ctrl_freq
)
# The human needs to walk otherwise
else:
speed = np.random.uniform(
self._config.lin_speed / 5.0, self._config.lin_speed
)
lin_speed = speed
ang_speed = speed
self.humanoid_controller.set_framerate_for_linspeed(
lin_speed, ang_speed, self._sim.ctrl_freq
)

try:
kwargs["task"].measurements.measures[
"social_nav_stats"
Expand Down
Loading

0 comments on commit 6c0c62e

Please sign in to comment.