Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spot social nav #1658

Merged
merged 24 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def _init_train(self, resume_state=None):

self._agent = self._create_agent(resume_state)
if self._is_distributed:
self._agent.updater.init_distributed(find_unused_params=False) # type: ignore
self._agent.init_distributed(find_unused_params=False) # type: ignore
self._agent.post_init()

self._is_static_encoder = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defaults:
- /habitat: habitat_config_base
- /habitat/task: task_config_base
# Config agent sensors
- /habitat/simulator/[email protected]_0: rgbd_head_rgbdp_arm_agent
- /habitat/simulator/[email protected]_0: spot_agent
- /habitat/simulator/[email protected]_0: spot
- /habitat/simulator/[email protected]_1: rgbd_head_agent
- /habitat/simulator/[email protected]_1: human
Expand All @@ -19,17 +19,14 @@ defaults:
- /habitat/task/[email protected]_1_oracle_nav_randcoord_action: oracle_nav_action
# Config agent sensor
- /habitat/task/measurements:
- articulated_agent_force
- articulated_agent_colls
- num_steps
- did_agents_collide
- num_agents_collide
- composite_stage_goals
- pddl_subgoal_reward
- rearrange_cooperate_reward
- articulated_agent_force
- force_terminate
- social_nav_reward
- object_to_goal_distance
- articulated_agent_colls
- rot_dist_to_goal
- dist_to_goal
- nav_to_pos_succ
Expand All @@ -50,9 +47,17 @@ defaults:
- humanoid_detector_sensor
- has_finished_oracle_nav
- /habitat/task/[email protected]_sensors.agent_0_goal_to_agent_gps_compass: nav_goal_sensor
- /habitat/task/[email protected]_sensors.agent_0_spot_head_stereo_depth_sensor: spot_head_stereo_depth_sensor
- /habitat/task/[email protected]_sensors.agent_1_agents_within_threshold: agents_within_threshold
- _self_

# This yaml is designed specifically for learning a social nav policy for the Boston Dynamics Spot robot.
# The major changes compared to the base yaml are the following:
# (1) obs_keys: we ensure these observations can be obtained from the real robot (Spot)
# (2) social_nav_reward: social nav reward considers collisions based on a kinematic simulation
# (3) actions: Spot controls the base linear and angualr velocity
# (4) simulator: we simulate the environment via the kinematic mode to facilitate sim2real transfer

habitat:
task:
lab_sensors:
Expand All @@ -62,6 +67,11 @@ habitat:
agent_idx: 1
agent_0_goal_to_agent_gps_compass:
goal_is_human: True
# Defien the human detector
humanoid_detector_sensor:
# If the human detector function is image or binary flag
return_image: False
is_return_image_bbox: False
type: RearrangePddlSocialNavTask-v0
min_start_distance: 4.0
reward_measure: social_nav_reward
Expand All @@ -77,13 +87,21 @@ habitat:
constraint_violation_drops_object: True

measurements:
rearrange_cooperate_reward:
stage_sparse_reward: 5.0
end_on_collide: True
collide_penalty: 0.5
# We do kinemantic simulation here, so no dynamic force
force_terminate:
max_accum_force: -1
max_instant_force: -1
# Define the social nav reward
social_nav_reward:
facing_human_reward: 0.01
count_coll_pen: 0.01
max_count_colls: 1000
count_coll_end_pen: 1.0
# Define the social nav seek success
social_nav_seek_success:
must_look_at_targ: False
following_step_succ_threshold: 500
need_to_face_human: True

actions:
agent_0_base_velocity:
Expand All @@ -102,6 +120,7 @@ habitat:
ang_speed: 5.0
enable_rotation_check_for_dyn_slide: False

# Human random walking action
agent_1_oracle_nav_action:
type: OracleNavAction
motion_control: human_joints
Expand All @@ -121,7 +140,7 @@ habitat:
gym:
obs_keys:
- agent_0_articulated_agent_arm_depth
- agent_0_goal_to_agent_gps_compass
- agent_0_spot_head_stereo_depth_sensor
- agent_0_humanoid_detector_sensor
- agent_1_head_depth
- agent_1_relative_resting_position
Expand Down Expand Up @@ -150,27 +169,8 @@ habitat:
agents_order:
- agent_0
- agent_1
agents:
agent_0:
radius: 0.25
height: 1.41
articulated_agent_urdf: data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
articulated_agent_type: "SpotRobot"
sim_sensors:
arm_rgb_sensor:
height: 480
width: 640
hfov: 47
arm_depth_sensor:
height: 224
width: 171
hfov: 55
arm_panoptic_sensor:
height: 224
width: 171
hfov: 55
joint_start_noise: 0.0

# We use the kinematic mode to train the real world social nav policy
kinematic_mode: True
ac_freq_ratio: 1
step_physics: False
Expand Down
28 changes: 28 additions & 0 deletions habitat-lab/habitat/config/default_structured_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
"TargetStartSensorConfig",
"GoalSensorConfig",
"TargetStartGpsCompassSensorConfig",
"InitialGpsCompassSensorConfig",
"TargetGoalGpsCompassSensorConfig",
# REARRANGEMENT MEASUREMENTS
"EndEffectorToRestDistanceMeasurementConfig",
Expand Down Expand Up @@ -397,6 +398,10 @@ class HumanoidDetectorSensorConfig(LabSensorConfig):
human_id: int = 100
# How many pixels needed to consider that human is in frame
human_pixel_threshold: int = 1000
# Image based or binary based
return_image: bool = False
# Is the return image bounding box or not
is_return_image_bbox: bool = False


@dataclass
Expand Down Expand Up @@ -648,6 +653,14 @@ class TargetStartGpsCompassSensorConfig(LabSensorConfig):
type: str = "TargetStartGpsCompassSensor"


@dataclass
class InitialGpsCompassSensorConfig(LabSensorConfig):
r"""
Rearrangement only. Returns the relative distance to the initial starting location of the agent in 2D polar coordinates.
"""
type: str = "InitialGpsCompassSensor"


@dataclass
class TargetGoalGpsCompassSensorConfig(LabSensorConfig):
r"""
Expand Down Expand Up @@ -1264,6 +1277,15 @@ class SocialNavReward(MeasurementConfig):
# Set the id of the agent
robot_idx: int = 0
human_idx: int = 1
constraint_violate_pen: float = 10.0
force_pen: float = 0.0
max_force_pen: float = 1.0
force_end_pen: float = 10.0
# Collision based penality for kinematic simulation
count_coll_pen: float = -1.0
max_count_colls: int = -1
count_coll_end_pen: float = 1.0
collide_penalty: float = 1.0


@dataclass
Expand Down Expand Up @@ -2215,6 +2237,12 @@ class HabitatConfig(HabitatBaseConfig):
name="target_start_gps_compass_sensor",
node=TargetStartGpsCompassSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.initial_gps_compass_sensor",
group="habitat/task/lab_sensors",
name="initial_gps_compass_sensor",
node=InitialGpsCompassSensorConfig,
)
cs.store(
package="habitat.task.lab_sensors.multi_agent_all_predicates",
group="habitat/task/lab_sensors",
Expand Down
27 changes: 26 additions & 1 deletion habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
import numpy as np

import habitat_sim
from habitat.articulated_agents.humanoids.kinematic_humanoid import (
KinematicHumanoid,
)
from habitat.sims.habitat_simulator.sim_utilities import get_ao_global_bb
from habitat.tasks.rearrange.marker_info import MarkerInfo
from habitat.tasks.rearrange.multi_task.rearrange_pddl import (
Expand All @@ -20,6 +23,7 @@
)
from habitat.tasks.rearrange.utils import (
place_agent_at_dist_from_pos,
random_place_agent,
rearrange_logger,
)

Expand Down Expand Up @@ -196,7 +200,6 @@ def set_state(
# Set the robot starting position
if isinstance(self.pos, PddlEntity):
targ_pos = sim_info.get_entity_pos(self.pos)

# Place some distance away from the object.
start_pos, start_rot, was_fail = place_agent_at_dist_from_pos(
target_position=targ_pos,
Expand All @@ -213,9 +216,31 @@ def set_state(
)
agent_data.articulated_agent.base_pos = start_pos
agent_data.articulated_agent.base_rot = start_rot
# The second attemp should just place the robot at the random location
if was_fail:
jimmytyyang marked this conversation as resolved.
Show resolved Hide resolved
start_pos, start_rot, was_fail = random_place_agent(
sim=sim,
num_spawn_attempts=sim_info.num_spawn_attempts,
filter_colliding_states=self.get_filter_colliding_states(
sim_info
),
agent=agent_data.articulated_agent,
)
agent_data.articulated_agent.base_pos = start_pos
agent_data.articulated_agent.base_rot = start_rot

if was_fail:
rearrange_logger.error("Failed to place the robot.")

# Record the robot's transformation if we are using "PddlSocialNavTask"
# We cannot import PddlSocialNavTask here since that will cause a circular dependency
if hasattr(sim_info.env, "initial_robot_trans") and not isinstance(
jimmytyyang marked this conversation as resolved.
Show resolved Hide resolved
agent_data.articulated_agent, KinematicHumanoid
):
sim_info.env.initial_robot_trans = (
agent_data.articulated_agent.base_transformation
)

# We teleported the agent. We also need to teleport the object the agent was holding.
agent_data.grasp_mgr.update_object_to_grasp()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,15 @@ def get_observation(self, observations, episode, task, *args, **kwargs):
target_key = [
f"agent_{self.agent_id}_{key}" for key in require_sensors
]

# Handle the case that there are no stereo observations
agent_do_not_have = False
for key in target_key:
assert key in observations
if key not in observations:
agent_do_not_have = True

if agent_do_not_have:
jimmytyyang marked this conversation as resolved.
Show resolved Hide resolved
return np.float32(np.zeros((self._height, self._width, 1)))

# Combine two images
stereo_img = [observations[key] for key in target_key]
Expand Down
Loading