Spot social nav (facebookresearch#1658)

* tyde CI * getting a corrcet init location of arm * tidy CI * sensor clean up so that the size is the same * setup social nav reward and yaml * bounding box update * clean up human bbox detector * tidy * tidy * add stereo pari camera * make the size of HumanoidDetectorSensor consistent with the senser obs * fix nan issue * fix init_distributed issue * tidy * tidy CI and add comments * add more comments * tidy CI * add InitialGpsCompassSensor sensor * refine social nav reward * add exploration reward * increase the video size * polsih the PR * tidy * reduce the third_rgb_sensor size --------- Co-authored-by: Jimmy Yang <[email protected]>
dannymcy · Nov 11, 2023 · 6c0c62e · 6c0c62e
1 parent c869f96
commit 6c0c62e
Show file tree

Hide file tree

Showing 11 changed files with 417 additions and 75 deletions.
diff --git a/habitat-baselines/habitat_baselines/config/social_nav/social_nav.yaml b/habitat-baselines/habitat_baselines/config/social_nav/social_nav.yaml
@@ -42,6 +42,7 @@ habitat_baselines:
       third_rgb_sensor:
         height: 224
         width: 171
+
     should_load_ckpt: True
     video_option: ["disk"]
 

diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
@@ -255,7 +255,7 @@ def _init_train(self, resume_state=None):
 
         self._agent = self._create_agent(resume_state)
         if self._is_distributed:
-            self._agent.updater.init_distributed(find_unused_params=False)  # type: ignore
+            self._agent.init_distributed(find_unused_params=False)  # type: ignore
         self._agent.post_init()
 
         self._is_static_encoder = (

diff --git a/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml b/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml
@@ -4,7 +4,7 @@ defaults:
   - /habitat: habitat_config_base
   - /habitat/task: task_config_base
   # Config agent sensors
-  - /habitat/simulator/[email protected]_0: rgbd_head_rgbdp_arm_agent
+  - /habitat/simulator/[email protected]_0: spot_agent
   - /habitat/simulator/[email protected]_0: spot
   - /habitat/simulator/[email protected]_1: rgbd_head_agent
   - /habitat/simulator/[email protected]_1: human
@@ -19,17 +19,14 @@ defaults:
   - /habitat/task/[email protected]_1_oracle_nav_randcoord_action: oracle_nav_action
   # Config agent sensor
   - /habitat/task/measurements:
+    - articulated_agent_force
+    - articulated_agent_colls
     - num_steps
     - did_agents_collide
     - num_agents_collide
-    - composite_stage_goals
-    - pddl_subgoal_reward
-    - rearrange_cooperate_reward
-    - articulated_agent_force
     - force_terminate
     - social_nav_reward
     - object_to_goal_distance
-    - articulated_agent_colls
     - rot_dist_to_goal
     - dist_to_goal
     - nav_to_pos_succ
@@ -50,9 +47,17 @@ defaults:
     - humanoid_detector_sensor
     - has_finished_oracle_nav
   - /habitat/task/[email protected]_sensors.agent_0_goal_to_agent_gps_compass: nav_goal_sensor
+  - /habitat/task/[email protected]_sensors.agent_0_spot_head_stereo_depth_sensor: spot_head_stereo_depth_sensor
   - /habitat/task/[email protected]_sensors.agent_1_agents_within_threshold: agents_within_threshold
   - _self_
 
+# This yaml is designed specifically for learning a social nav policy for the Boston Dynamics Spot robot.
+# The major changes compared to the base yaml are the following:
+# (1) obs_keys: we ensure these observations can be obtained from the real robot (Spot)
+# (2) social_nav_reward: social nav reward considers collisions based on a kinematic simulation
+# (3) actions: Spot controls the base linear and angualr velocity
+# (4) simulator: we simulate the environment via the kinematic mode to facilitate sim2real transfer
+
 habitat:
   task:
     lab_sensors:
@@ -62,6 +67,11 @@ habitat:
         agent_idx: 1
       agent_0_goal_to_agent_gps_compass:
         goal_is_human: True
+      # Defien the human detector
+      humanoid_detector_sensor:
+        # If the human detector function is image or binary flag
+        return_image: False
+        is_return_image_bbox: False
     type: RearrangePddlSocialNavTask-v0
     min_start_distance: 4.0
     reward_measure: social_nav_reward
@@ -70,20 +80,28 @@ habitat:
     slack_reward: -0.01
     end_on_success: False
     task_spec_base_path: benchmark/multi_agent/
-    task_spec: pddl/multi_agent_tidy_house
+    task_spec: pddl/multi_agent_social_nav
     pddl_domain_def: fp
     min_distance_start_agents: 5.0
     constraint_violation_ends_episode: False
     constraint_violation_drops_object: True
 
     measurements:
-      rearrange_cooperate_reward:
-        stage_sparse_reward: 5.0
-        end_on_collide: True
-        collide_penalty: 0.5
+      # We do kinemantic simulation here, so no dynamic force
       force_terminate:
         max_accum_force: -1
         max_instant_force: -1
+      # Define the social nav reward
+      social_nav_reward:
+        facing_human_reward: 0.01
+        count_coll_pen: 0.01
+        max_count_colls: 1000
+        count_coll_end_pen: 1.0
+      # Define the social nav seek success
+      social_nav_seek_success:
+        must_look_at_targ: False
+        following_step_succ_threshold: 500
+        need_to_face_human: True
 
     actions:
       agent_0_base_velocity:
@@ -102,6 +120,7 @@ habitat:
         ang_speed: 5.0
         enable_rotation_check_for_dyn_slide: False
 
+      # Human random walking action
       agent_1_oracle_nav_action:
         type: OracleNavAction
         motion_control: human_joints
@@ -121,7 +140,7 @@ habitat:
   gym:
     obs_keys:
       - agent_0_articulated_agent_arm_depth
-      - agent_0_goal_to_agent_gps_compass
+      - agent_0_spot_head_stereo_depth_sensor
       - agent_0_humanoid_detector_sensor
       - agent_1_head_depth
       - agent_1_relative_resting_position
@@ -150,27 +169,8 @@ habitat:
     agents_order:
       - agent_0
       - agent_1
-    agents:
-      agent_0:
-        radius: 0.25
-        height: 1.41
-        articulated_agent_urdf: data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
-        articulated_agent_type: "SpotRobot"
-        sim_sensors:
-          arm_rgb_sensor:
-            height: 480
-            width: 640
-            hfov: 47
-          arm_depth_sensor:
-            height: 224
-            width: 171
-            hfov: 55
-          arm_panoptic_sensor:
-            height: 224
-            width: 171
-            hfov: 55
-        joint_start_noise: 0.0
 
+    # We use the kinematic mode to train the real world social nav policy
     kinematic_mode: True
     ac_freq_ratio: 1
     step_physics: False

diff --git a/habitat-lab/habitat/config/benchmark/multi_agent/pddl/multi_agent_social_nav.yaml b/habitat-lab/habitat/config/benchmark/multi_agent/pddl/multi_agent_social_nav.yaml
@@ -0,0 +1,58 @@
+# Version of tidy house that only has two target objects.
+objects:
+  - name: any_targets|0
+    expr_type: movable_entity_type
+  - name: TARGET_any_targets|0
+    expr_type: goal_entity_type
+  - name: any_targets|1
+    expr_type: movable_entity_type
+  - name: TARGET_any_targets|1
+    expr_type: goal_entity_type
+  - name: robot_0
+    expr_type: robot_entity_type
+  - name: robot_1
+    expr_type: robot_entity_type
+
+goal:
+  expr_type: AND
+  sub_exprs:
+    - at(any_targets|0,TARGET_any_targets|0)
+    - at(any_targets|1,TARGET_any_targets|1)
+    - not_holding(robot_0)
+    - not_holding(robot_1)
+
+stage_goals:
+  stage_1_1:
+    expr_type: AND
+    sub_exprs:
+      - quantifier: EXISTS
+        inputs:
+          - name: robot
+            expr_type: robot_entity_type
+        expr_type: AND
+        sub_exprs:
+          - holding(any_targets|0, robot)
+  stage_1_2:
+    expr_type: AND
+    sub_exprs:
+      - at(any_targets|0,TARGET_any_targets|0)
+  stage_2_1:
+    expr_type: AND
+    sub_exprs:
+      - quantifier: EXISTS
+        inputs:
+          - name: robot
+            expr_type: robot_entity_type
+        expr_type: AND
+        sub_exprs:
+          - holding(any_targets|1, robot)
+  stage_2_2:
+    expr_type: AND
+    sub_exprs:
+      - at(any_targets|1,TARGET_any_targets|1)
+
+solution:
+  - nav_to_goal(any_targets|1, robot_1)
+  - pick(any_targets|1, robot_1)
+  - nav_to_obj(TARGET_any_targets|1, robot_1)
+  - place(any_targets|1,TARGET_any_targets|1, robot_1)
diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py
@@ -63,6 +63,7 @@
     "TargetStartSensorConfig",
     "GoalSensorConfig",
     "TargetStartGpsCompassSensorConfig",
+    "InitialGpsCompassSensorConfig",
     "TargetGoalGpsCompassSensorConfig",
     # REARRANGEMENT MEASUREMENTS
     "EndEffectorToRestDistanceMeasurementConfig",
@@ -353,6 +354,9 @@ class OracleNavActionConfig(ActionConfig):
     allow_back: bool = True
     spawn_max_dist_to_obj: float = 2.0
     num_spawn_attempts: int = 200
+    # For social nav training only. It controls the distance threshold
+    # between the robot and the human and decide if the human wants to walk or not
+    human_stop_and_walk_to_robot_distance_threshold: float = -1.0
 
 
 # -----------------------------------------------------------------------------
@@ -397,6 +401,10 @@ class HumanoidDetectorSensorConfig(LabSensorConfig):
     human_id: int = 100
     # How many pixels needed to consider that human is in frame
     human_pixel_threshold: int = 1000
+    # Image based or binary based
+    return_image: bool = False
+    # Is the return image bounding box or not
+    is_return_image_bbox: bool = False
 
 
 @dataclass
@@ -648,6 +656,14 @@ class TargetStartGpsCompassSensorConfig(LabSensorConfig):
     type: str = "TargetStartGpsCompassSensor"
 
 
+@dataclass
+class InitialGpsCompassSensorConfig(LabSensorConfig):
+    r"""
+    Rearrangement only. Returns the relative distance to the initial starting location of the agent in 2D polar coordinates.
+    """
+    type: str = "InitialGpsCompassSensor"
+
+
 @dataclass
 class TargetGoalGpsCompassSensorConfig(LabSensorConfig):
     r"""
@@ -1258,12 +1274,27 @@ class SocialNavReward(MeasurementConfig):
     facing_human_dis: float = 3.0
     # -1 means that there is no facing_human_reward
     facing_human_reward: float = -1.0
+    # toward_human_reward defualt is 1.0
+    toward_human_reward: float = 1.0
+    # -1 means that there is no near_human_bonus
+    near_human_bonus: float = -1.0
+    # -1 means that there is no exploration reward
+    explore_reward: float = -1.0
     # If we want to use geo distance to measure the distance
     # between the robot and the human
     use_geo_distance: bool = False
     # Set the id of the agent
     robot_idx: int = 0
     human_idx: int = 1
+    constraint_violate_pen: float = 10.0
+    force_pen: float = 0.0
+    max_force_pen: float = 1.0
+    force_end_pen: float = 10.0
+    # Collision based penality for kinematic simulation
+    count_coll_pen: float = -1.0
+    max_count_colls: int = -1
+    count_coll_end_pen: float = 1.0
+    collide_penalty: float = 1.0
 
 
 @dataclass
@@ -2215,6 +2246,12 @@ class HabitatConfig(HabitatBaseConfig):
     name="target_start_gps_compass_sensor",
     node=TargetStartGpsCompassSensorConfig,
 )
+cs.store(
+    package="habitat.task.lab_sensors.initial_gps_compass_sensor",
+    group="habitat/task/lab_sensors",
+    name="initial_gps_compass_sensor",
+    node=InitialGpsCompassSensorConfig,
+)
 cs.store(
     package="habitat.task.lab_sensors.multi_agent_all_predicates",
     group="habitat/task/lab_sensors",

diff --git a/habitat-lab/habitat/tasks/rearrange/robot_specific_sensors.py b/habitat-lab/habitat/tasks/rearrange/robot_specific_sensors.py
@@ -5,6 +5,8 @@
 # LICENSE file in the root directory of this source tree.
 
 
+import warnings
+
 import numpy as np
 from gym import spaces
 
@@ -54,8 +56,19 @@ def get_observation(self, observations, episode, task, *args, **kwargs):
             target_key = [
                 f"agent_{self.agent_id}_{key}" for key in require_sensors
             ]
+
+        # Handle the case that there are no stereo observations
+        agent_do_not_have = False
         for key in target_key:
-            assert key in observations
+            if key not in observations:
+                agent_do_not_have = True
+
+        if agent_do_not_have:
+            warnings.warn(
+                "You are using SpotHeadStereoDepthSensor but you do not provide head_stereo_right_depth and head_stereo_right_depth."
+                "We return an all zero image."
+            )
+            return np.float32(np.zeros((self._height, self._width, 1)))
 
         # Combine two images
         stereo_img = [observations[key] for key in target_key]

diff --git a/habitat-lab/habitat/tasks/rearrange/social_nav/oracle_social_nav_actions.py b/habitat-lab/habitat/tasks/rearrange/social_nav/oracle_social_nav_actions.py
@@ -341,6 +341,37 @@ def step(self, *args, **kwargs):
         if self.skill_done:
             self.coord_nav = None
 
+        # If the robot is nearby, the human starts to walk, otherwise, the human
+        # just stops there and waits for robot to find it
+        if self._config.human_stop_and_walk_to_robot_distance_threshold != -1:
+            assert (
+                len(self._sim.agents_mgr) == 2
+            ), "Does not support more than two agents when you want human to stop and walk based on the distance to the robot"
+            robot_id = int(1 - self._agent_index)
+            robot_pos = self._sim.get_agent_data(
+                robot_id
+            ).articulated_agent.base_pos
+            human_pos = self.cur_articulated_agent.base_pos
+            dis = self._sim.geodesic_distance(robot_pos, human_pos)
+            # The human needs to stop and wait for robot to come if the distance is too larget
+            if (
+                dis
+                > self._config.human_stop_and_walk_to_robot_distance_threshold
+            ):
+                self.humanoid_controller.set_framerate_for_linspeed(
+                    0.0, 0.0, self._sim.ctrl_freq
+                )
+            # The human needs to walk otherwise
+            else:
+                speed = np.random.uniform(
+                    self._config.lin_speed / 5.0, self._config.lin_speed
+                )
+                lin_speed = speed
+                ang_speed = speed
+                self.humanoid_controller.set_framerate_for_linspeed(
+                    lin_speed, ang_speed, self._sim.ctrl_freq
+                )
+
         try:
             kwargs["task"].measurements.measures[
                 "social_nav_stats"