facebookresearch · jimmytyyang · Nov 11, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
@@ -255,7 +255,7 @@ def _init_train(self, resume_state=None):
 
         self._agent = self._create_agent(resume_state)
         if self._is_distributed:
-            self._agent.updater.init_distributed(find_unused_params=False)  # type: ignore
+            self._agent.init_distributed(find_unused_params=False)  # type: ignore
         self._agent.post_init()
 
         self._is_static_encoder = (

diff --git a/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml b/habitat-lab/habitat/config/benchmark/multi_agent/hssd_spot_human_social_nav.yaml
@@ -4,7 +4,7 @@ defaults:
   - /habitat: habitat_config_base
   - /habitat/task: task_config_base
   # Config agent sensors
-  - /habitat/simulator/[email protected]_0: rgbd_head_rgbdp_arm_agent
+  - /habitat/simulator/[email protected]_0: spot_agent
   - /habitat/simulator/[email protected]_0: spot
   - /habitat/simulator/[email protected]_1: rgbd_head_agent
   - /habitat/simulator/[email protected]_1: human
@@ -19,17 +19,14 @@ defaults:
   - /habitat/task/[email protected]_1_oracle_nav_randcoord_action: oracle_nav_action
   # Config agent sensor
   - /habitat/task/measurements:
+    - articulated_agent_force
+    - articulated_agent_colls
     - num_steps
     - did_agents_collide
     - num_agents_collide
-    - composite_stage_goals
-    - pddl_subgoal_reward
-    - rearrange_cooperate_reward
-    - articulated_agent_force
     - force_terminate
     - social_nav_reward
     - object_to_goal_distance
-    - articulated_agent_colls
     - rot_dist_to_goal
     - dist_to_goal
     - nav_to_pos_succ
@@ -50,9 +47,17 @@ defaults:
     - humanoid_detector_sensor
     - has_finished_oracle_nav
   - /habitat/task/[email protected]_sensors.agent_0_goal_to_agent_gps_compass: nav_goal_sensor
+  - /habitat/task/[email protected]_sensors.agent_0_spot_head_stereo_depth_sensor: spot_head_stereo_depth_sensor
   - /habitat/task/[email protected]_sensors.agent_1_agents_within_threshold: agents_within_threshold
   - _self_
 
+# This yaml is designed specifically for learning a social nav policy for the Boston Dynamics Spot robot.
+# The major changes compared to the base yaml are the following:
+# (1) obs_keys: we ensure these observations can be obtained from the real robot (Spot)
+# (2) social_nav_reward: social nav reward considers collisions based on a kinematic simulation
+# (3) actions: Spot controls the base linear and angualr velocity
+# (4) simulator: we simulate the environment via the kinematic mode to facilitate sim2real transfer
+
 habitat:
   task:
     lab_sensors:
@@ -62,6 +67,11 @@ habitat:
         agent_idx: 1
       agent_0_goal_to_agent_gps_compass:
         goal_is_human: True
+      # Defien the human detector
+      humanoid_detector_sensor:
+        # If the human detector function is image or binary flag
+        return_image: False
+        is_return_image_bbox: False
     type: RearrangePddlSocialNavTask-v0
     min_start_distance: 4.0
     reward_measure: social_nav_reward
@@ -77,13 +87,21 @@ habitat:
     constraint_violation_drops_object: True
 
     measurements:
-      rearrange_cooperate_reward:
-        stage_sparse_reward: 5.0
-        end_on_collide: True
-        collide_penalty: 0.5
+      # We do kinemantic simulation here, so no dynamic force
       force_terminate:
         max_accum_force: -1
         max_instant_force: -1
+      # Define the social nav reward
+      social_nav_reward:
+        facing_human_reward: 0.01
+        count_coll_pen: 0.01
+        max_count_colls: 1000
+        count_coll_end_pen: 1.0
+      # Define the social nav seek success
+      social_nav_seek_success:
+        must_look_at_targ: False
+        following_step_succ_threshold: 500
+        need_to_face_human: True
 
     actions:
       agent_0_base_velocity:
@@ -102,6 +120,7 @@ habitat:
         ang_speed: 5.0
         enable_rotation_check_for_dyn_slide: False
 
+      # Human random walking action
       agent_1_oracle_nav_action:
         type: OracleNavAction
         motion_control: human_joints
@@ -121,7 +140,7 @@ habitat:
   gym:
     obs_keys:
       - agent_0_articulated_agent_arm_depth
-      - agent_0_goal_to_agent_gps_compass
+      - agent_0_spot_head_stereo_depth_sensor
       - agent_0_humanoid_detector_sensor
       - agent_1_head_depth
       - agent_1_relative_resting_position
@@ -150,27 +169,8 @@ habitat:
     agents_order:
       - agent_0
       - agent_1
-    agents:
-      agent_0:
-        radius: 0.25
-        height: 1.41
-        articulated_agent_urdf: data/robots/hab_spot_arm/urdf/hab_spot_arm.urdf
-        articulated_agent_type: "SpotRobot"
-        sim_sensors:
-          arm_rgb_sensor:
-            height: 480
-            width: 640
-            hfov: 47
-          arm_depth_sensor:
-            height: 224
-            width: 171
-            hfov: 55
-          arm_panoptic_sensor:
-            height: 224
-            width: 171
-            hfov: 55
-        joint_start_noise: 0.0
 
+    # We use the kinematic mode to train the real world social nav policy
     kinematic_mode: True
     ac_freq_ratio: 1
     step_physics: False

diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py
@@ -63,6 +63,7 @@
     "TargetStartSensorConfig",
     "GoalSensorConfig",
     "TargetStartGpsCompassSensorConfig",
+    "InitialGpsCompassSensorConfig",
     "TargetGoalGpsCompassSensorConfig",
     # REARRANGEMENT MEASUREMENTS
     "EndEffectorToRestDistanceMeasurementConfig",
@@ -397,6 +398,10 @@ class HumanoidDetectorSensorConfig(LabSensorConfig):
     human_id: int = 100
     # How many pixels needed to consider that human is in frame
     human_pixel_threshold: int = 1000
+    # Image based or binary based
+    return_image: bool = False
+    # Is the return image bounding box or not
+    is_return_image_bbox: bool = False
 
 
 @dataclass
@@ -648,6 +653,14 @@ class TargetStartGpsCompassSensorConfig(LabSensorConfig):
     type: str = "TargetStartGpsCompassSensor"
 
 
+@dataclass
+class InitialGpsCompassSensorConfig(LabSensorConfig):
+    r"""
+    Rearrangement only. Returns the relative distance to the initial starting location of the agent in 2D polar coordinates.
+    """
+    type: str = "InitialGpsCompassSensor"
+
+
 @dataclass
 class TargetGoalGpsCompassSensorConfig(LabSensorConfig):
     r"""
@@ -1264,6 +1277,15 @@ class SocialNavReward(MeasurementConfig):
     # Set the id of the agent
     robot_idx: int = 0
     human_idx: int = 1
+    constraint_violate_pen: float = 10.0
+    force_pen: float = 0.0
+    max_force_pen: float = 1.0
+    force_end_pen: float = 10.0
+    # Collision based penality for kinematic simulation
+    count_coll_pen: float = -1.0
+    max_count_colls: int = -1
+    count_coll_end_pen: float = 1.0
+    collide_penalty: float = 1.0
 
 
 @dataclass
@@ -2215,6 +2237,12 @@ class HabitatConfig(HabitatBaseConfig):
     name="target_start_gps_compass_sensor",
     node=TargetStartGpsCompassSensorConfig,
 )
+cs.store(
+    package="habitat.task.lab_sensors.initial_gps_compass_sensor",
+    group="habitat/task/lab_sensors",
+    name="initial_gps_compass_sensor",
+    node=InitialGpsCompassSensorConfig,
+)
 cs.store(
     package="habitat.task.lab_sensors.multi_agent_all_predicates",
     group="habitat/task/lab_sensors",

diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py
@@ -11,6 +11,9 @@
 import numpy as np
 
 import habitat_sim
+from habitat.articulated_agents.humanoids.kinematic_humanoid import (
+    KinematicHumanoid,
+)
 from habitat.sims.habitat_simulator.sim_utilities import get_ao_global_bb
 from habitat.tasks.rearrange.marker_info import MarkerInfo
 from habitat.tasks.rearrange.multi_task.rearrange_pddl import (
@@ -20,6 +23,7 @@
 )
 from habitat.tasks.rearrange.utils import (
     place_agent_at_dist_from_pos,
+    random_place_agent,
     rearrange_logger,
 )
 
@@ -196,7 +200,6 @@ def set_state(
         # Set the robot starting position
         if isinstance(self.pos, PddlEntity):
             targ_pos = sim_info.get_entity_pos(self.pos)
-
             # Place some distance away from the object.
             start_pos, start_rot, was_fail = place_agent_at_dist_from_pos(
                 target_position=targ_pos,
@@ -213,9 +216,31 @@ def set_state(
             )
             agent_data.articulated_agent.base_pos = start_pos
             agent_data.articulated_agent.base_rot = start_rot
+            # The second attemp should just place the robot at the random location
+            if was_fail:
+                start_pos, start_rot, was_fail = random_place_agent(
+                    sim=sim,
+                    num_spawn_attempts=sim_info.num_spawn_attempts,
+                    filter_colliding_states=self.get_filter_colliding_states(
+                        sim_info
+                    ),
+                    agent=agent_data.articulated_agent,
+                )
+                agent_data.articulated_agent.base_pos = start_pos
+                agent_data.articulated_agent.base_rot = start_rot
+
             if was_fail:
                 rearrange_logger.error("Failed to place the robot.")
 
+            # Record the robot's transformation if we are using "PddlSocialNavTask"
+            # We cannot import PddlSocialNavTask here since that will cause a circular dependency
+            if hasattr(sim_info.env, "initial_robot_trans") and not isinstance(
+                agent_data.articulated_agent, KinematicHumanoid
+            ):
+                sim_info.env.initial_robot_trans = (
+                    agent_data.articulated_agent.base_transformation
+                )
+
             # We teleported the agent. We also need to teleport the object the agent was holding.
             agent_data.grasp_mgr.update_object_to_grasp()
 

diff --git a/habitat-lab/habitat/tasks/rearrange/robot_specific_sensors.py b/habitat-lab/habitat/tasks/rearrange/robot_specific_sensors.py
@@ -54,8 +54,15 @@ def get_observation(self, observations, episode, task, *args, **kwargs):
             target_key = [
                 f"agent_{self.agent_id}_{key}" for key in require_sensors
             ]
+
+        # Handle the case that there are no stereo observations
+        agent_do_not_have = False
         for key in target_key:
-            assert key in observations
+            if key not in observations:
+                agent_do_not_have = True
+
+        if agent_do_not_have:
+            return np.float32(np.zeros((self._height, self._width, 1)))
 
         # Combine two images
         stereo_img = [observations[key] for key in target_key]