From b115788b9bed0a78e83a23e6eec8bf60291b5750 Mon Sep 17 00:00:00 2001 From: Vincent-Pierre BERGES <28320361+vincentpierre@users.noreply.github.com> Date: Mon, 2 May 2022 11:05:07 -0700 Subject: [PATCH] [gala_kinematic] navreach (#854) * attempt at navreach * some minor changes * adding stagger * - * setting up collision flag * Improved rendering. Cartesian coordinates. No more Mixed Precision training. (#2) * Improved rendering. Cartesian coordinates. No more MP training * Improved rendering. Cartesian coordinates. No more MP training * Fix default * Added back joint pos sensor * Fixed bug * Fixed in cart * Added configurable success thresh and sucess reward * Changed name of success reward and thresh keys * resolving conflicts * fixing the eval config * nav-pick-nav-reach (#3) * nav-pick-nav-reach * nav PLACE + measure the gradient norm * fixing a bug * fixing issues and setting new trainnig defaults Co-authored-by: vincentpierre * using logger and not print * addressing comments * minor changes * minor changes * minor changes * BPS policy integration (#6) Co-authored-by: vincentpierre Co-authored-by: Andrew Szot --- habitat/core/batched_env.py | 238 +++++++++++++----- habitat_baselines/config/default.py | 1 + .../rearrange/gala_kinematic_ddppo.yaml | 79 +++++- .../rearrange/gala_kinematic_ddppo_cart.yaml | 84 +++++++ .../rearrange/gala_kinematic_ddppo_eval.yaml | 102 ++++++++ .../rl/ddppo/policy/resnet_policy_bpsnav.py | 56 +++-- habitat_baselines/rl/ppo/policy.py | 35 +-- habitat_baselines/rl/ppo/ppo.py | 47 +++- habitat_baselines/rl/ppo/ppo_trainer.py | 24 +- habitat_baselines/utils/render_wrapper.py | 46 +++- 10 files changed, 577 insertions(+), 135 deletions(-) create mode 100644 habitat_baselines/config/rearrange/gala_kinematic_ddppo_cart.yaml create mode 100644 habitat_baselines/config/rearrange/gala_kinematic_ddppo_eval.yaml diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py index 5d87c910cf..4bb1de13e9 100644 --- a/habitat/core/batched_env.py +++ b/habitat/core/batched_env.py @@ -13,6 +13,7 @@ from gym import spaces from gym.spaces import Box +from habitat.core.logging import logger from habitat.tasks.utils import cartesian_to_polar from habitat.utils import profiling_wrapper from habitat.utils.geometry_utils import quaternion_rotate_vector @@ -32,13 +33,28 @@ def __init__(self, attach_link_name, pos, rotation, hfov): class StateSensorConfig: - def __init__(self, config_key, shape, obs_key): - self.config_key = config_key + def __init__( + self, shape, obs_key, max_episode_length, polar=False, **kwargs + ): + self.polar = polar self.shape = shape self.obs_key = obs_key + self.max_episode_length = max_episode_length - def get_obs(self, state): - pass + def get_obs(self, state) -> np.ndarray: + raise NotImplementedError() + + def _get_relative_coordinate( + self, source_position, goal_position, source_rotation + ): + if self.polar: + return _get_spherical_coordinates( + source_position, goal_position, source_rotation + ) + else: + return _get_cartesian_coordinates( + source_position, goal_position, source_rotation + ) def get_batch_obs(self, states): new_list = np.empty((len(states), self.shape), dtype=np.float32) @@ -49,6 +65,15 @@ def get_batch_obs(self, states): return torch.tensor(new_list) +def _get_cartesian_coordinates( + source_position, goal_position, source_rotation +): + source_T = mn.Matrix4.from_(source_rotation.to_matrix(), goal_position) + inverted_source_T = source_T.inverted() + rel_pos = inverted_source_T.transform_point(goal_position) + return np.array(rel_pos) + + def _get_spherical_coordinates_ref( source_position, goal_position, source_rotation ): @@ -114,68 +139,85 @@ def _get_spherical_coordinates( class RobotStartSensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("ROBOT_START_RELATIVE", 3, "robot_start_relative") - def get_obs(self, state): robot_pos = state.robot_pos robot_rot = state.robot_rotation start_pos = state.robot_start_pos - return _get_spherical_coordinates(robot_pos, start_pos, robot_rot) + return self._get_relative_coordinate(robot_pos, start_pos, robot_rot) class RobotTargetSensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("ROBOT_TARGET_RELATIVE", 3, "robot_target_relative") - def get_obs(self, state): robot_pos = state.robot_pos robot_rot = state.robot_rotation target_pos = state.target_obj_start_pos - return _get_spherical_coordinates(robot_pos, target_pos, robot_rot) + return self._get_relative_coordinate(robot_pos, target_pos, robot_rot) -class EEStartSensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("EE_START_RELATIVE", 3, "ee_start_relative") +class RobotGoalSensorConfig(StateSensorConfig): + def get_obs(self, state): + robot_pos = state.robot_pos + robot_rot = state.robot_rotation + target_pos = state.goal_pos + return self._get_relative_coordinate(robot_pos, target_pos, robot_rot) + +class EEStartSensorConfig(StateSensorConfig): def get_obs(self, state): ee_pos = state.ee_pos ee_rot = state.ee_rotation start_pos = state.robot_start_pos - return _get_spherical_coordinates(ee_pos, start_pos, ee_rot) + return self._get_relative_coordinate(ee_pos, start_pos, ee_rot) class EETargetSensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("EE_TARGET_RELATIVE", 3, "ee_target_relative") - def get_obs(self, state): ee_pos = state.ee_pos ee_rot = state.ee_rotation target_pos = state.target_obj_start_pos - return _get_spherical_coordinates(ee_pos, target_pos, ee_rot) + return self._get_relative_coordinate(ee_pos, target_pos, ee_rot) -class RobotEESensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("ROBOT_EE_RELATIVE", 3, "robot_ee_relative") +class EEGoalSensorConfig(StateSensorConfig): + def get_obs(self, state): + ee_pos = state.ee_pos + ee_rot = state.ee_rotation + target_pos = state.goal_pos + return self._get_relative_coordinate(ee_pos, target_pos, ee_rot) + +class RobotEESensorConfig(StateSensorConfig): def get_obs(self, state): robot_pos = state.robot_pos robot_rot = state.robot_rotation ee_pos = state.ee_pos - return _get_spherical_coordinates(robot_pos, ee_pos, robot_rot) + return self._get_relative_coordinate(robot_pos, ee_pos, robot_rot) class JointSensorConfig(StateSensorConfig): - def __init__(self): - super().__init__("JOINT_SENSOR", 7, "joint_pos") - def get_obs(self, state): return state.robot_joint_positions[-9:-2] +class HoldingSensorConfig(StateSensorConfig): + def get_obs(self, state): + return float(state.target_obj_idx != -1) + + +class StepCountSensorConfig(StateSensorConfig): + def get_obs(self, state): + fraction_steps_left = ( + (self.max_episode_length - state.episode_step_idx) + * 1.0 + / self.max_episode_length + ) + return ( + min(1.0, fraction_steps_left * 1.0), + min(1.0, fraction_steps_left * 5.0), + min(1.0, fraction_steps_left * 50.0), + ) + + # Note this class must match an API expected by PPOTrainer (episode_id and scene_id). # Beware adding extra state to this class as we often make deep copies of these objects. class EnvironmentEpisodeState: @@ -218,17 +260,18 @@ def __init__( include_depth = "DEPTH_SENSOR" in config.SENSORS include_rgb = "RGB_SENSOR" in config.SENSORS + self._max_episode_length = config.MAX_EPISODE_LENGTH + self.state_sensor_config: List[StateSensorConfig] = [] - for ssc in [ - RobotStartSensorConfig(), - RobotTargetSensorConfig(), - EEStartSensorConfig(), - EETargetSensorConfig(), - JointSensorConfig(), - RobotEESensorConfig(), - ]: - if ssc.config_key in config.SENSORS: - self.state_sensor_config.append(ssc) + for ssc_name in config.SENSORS: + if "RGB" in ssc_name or "DEPTH" in ssc_name: + continue + ssc_cfg = config.STATE_SENSORS[ssc_name] + ssc_cls = eval(ssc_cfg.TYPE) + ssc = ssc_cls( + max_episode_length=self._max_episode_length, **ssc_cfg + ) + self.state_sensor_config.append(ssc) assert include_depth or include_rgb @@ -277,8 +320,9 @@ def __init__( # generator_config.num_object_variations = 6 # generator_config.min_nontarget_objects = 27 # generator_config.max_nontarget_objects = 32 - # generator_config.used_fixed_robot_start_pos = True + # generator_config.used_fixed_robot_start_pos = False #True # generator_config.use_fixed_robot_start_yaw = False + # generator_config.use_fixed_robot_joint_start_positions = False bsim_config = BatchedSimulatorConfig() bsim_config.gpu_id = SIMULATOR_GPU_ID @@ -300,6 +344,13 @@ def __init__( bsim_config.do_procedural_episode_set = True bsim_config.episode_generator_config = generator_config + bsim_config.enable_robot_collision = self._config.get( + "ENABLE_ROBOT_COLLISION", True + ) + bsim_config.enable_held_object_collision = self._config.get( + "ENABLE_HELD_OBJECT_COLLISION", True + ) + # bsim_config.episode_set_filepath = "../data/episode_sets/train.episode_set.json" self._bsim = BatchedSimulator(bsim_config) @@ -321,7 +372,7 @@ def __init__( self._debug_camera = Camera( "base_link", mn.Vector3( - -0.8, 1.8, -0.8 + -0.8, 2.5, -0.8 ), # place behind, above, and to the left of the base mn.Quaternion.rotation( mn.Deg(-120.0), mn.Vector3(0.0, 1.0, 0.0) @@ -450,7 +501,11 @@ def __init__( self.dones = [False] * self._num_envs self.infos: List[Dict[str, Any]] = [{}] * self._num_envs self._previous_state: List[Optional[Any]] = [None] * self._num_envs - self._previous_target_position = [None] * self._num_envs + self._stagger_agents = [0] * self._num_envs + if self._config.get("STAGGER", True): + self._stagger_agents = [ + i % self._max_episode_length for i in range(self._num_envs) + ] self.action_spaces = [ action_space @@ -516,11 +571,32 @@ def get_nonpixel_observations(self, env_states, observations): # sensor_data = torch.tensor(ssc.get_obs(state)) # observations[ssc.obs_key][b, :] = sensor_data for ssc in self.state_sensor_config: - observations[ssc.obs_key] = ssc.get_batch_obs(env_states) + observations[ssc.obs_key] = torch.tensor( + ssc.get_batch_obs(env_states) + ) + if not torch.isfinite(observations[ssc.obs_key]).all(): + logger.info((ssc.obs_key, "nan")) + for b, s in enumerate(env_states): + logger.info(observations[ssc.obs_key][b, :]) + logger.info( + ( + s.robot_pos, + s.robot_rotation, + s.robot_start_pos, + s.robot_start_rotation, + s.ee_pos, + s.ee_rotation, + s.robot_start_pos, + s.robot_start_rotation, + s.target_obj_start_pos, + ) + ) + observations[ssc.obs_key] = torch.nan_to_num( + observations[ssc.obs_key], 0.0 + ) def get_dones_rewards_resets(self, env_states, actions): for (b, state) in enumerate(env_states): - max_episode_len = 50 if self._current_episodes[b].is_disabled(): # let this episode continue in the sim; ignore the results assert self.resets[b] == -1 @@ -529,23 +605,52 @@ def get_dones_rewards_resets(self, env_states, actions): self.infos[b] = {} continue - # Target position is arbitrarily fixed - local_target_position = mn.Vector3(0.6, 1, 0.6) + prev_state = self._previous_state[b] + ee_to_start = (state.target_obj_start_pos - state.ee_pos).length() + # success = curr_dist < self._config.REACH_SUCCESS_THRESH + # success = state.target_obj_idx == state.held_obj_idx - global_target_position = ( - state.robot_pos - + state.robot_rotation.transform_vector(local_target_position) + is_holding_correct = state.target_obj_idx == state.held_obj_idx + was_holding_correct = False + if prev_state is not None: + was_holding_correct = ( + prev_state.target_obj_idx == prev_state.held_obj_idx + ) + + obj_pos = state.obj_positions[state.target_obj_idx] + obj_to_goal = (state.goal_pos - obj_pos).length() + success = was_holding_correct and ( + obj_to_goal < self._config.NPNP_SUCCESS_THRESH + ) + + if self._config.get("TASK_IS_PLACE", False): + success = success and state.did_drop + + failure = (state.did_drop and not success) or ( + state.target_obj_idx != state.held_obj_idx + and state.held_obj_idx != -1 ) - curr_dist = (global_target_position - state.ee_pos).length() - success_dist = 0.05 - success = curr_dist < success_dist - if success or state.episode_step_idx >= max_episode_len: + if ( + success + or failure + or state.episode_step_idx + >= (self._max_episode_length - self._stagger_agents[b]) + ): + self._stagger_agents[b] = 0 self.dones[b] = True - self.rewards[b] = 10.0 if success else 0.0 + _rew = 0.0 + _rew += self._config.NPNP_SUCCESS_REWARD if success else 0.0 + _rew -= self._config.NPNP_FAILURE_PENALTY if success else 0.0 + self.rewards[b] = _rew self.infos[b] = { "success": float(success), + "failure": float(failure), "episode_steps": state.episode_step_idx, + "distance_to_start": ee_to_start, + "distance_to_goal": obj_to_goal, + "is_holding_correct": float(is_holding_correct), + "was_holding_correct": float(was_holding_correct), } self._previous_state[b] = None @@ -564,19 +669,34 @@ def get_dones_rewards_resets(self, env_states, actions): else: self.resets[b] = -1 self.dones[b] = False - self.rewards[b] = 0 + self.rewards[b] = -1.0 / self._max_episode_length self.infos[b] = { "success": 0.0, + "failure": 0.0, "episode_steps": state.episode_step_idx, + "distance_to_start": ee_to_start, + "distance_to_goal": obj_to_goal, + "is_holding_correct": float(is_holding_correct), + "was_holding_correct": float(was_holding_correct), } if self._previous_state[b] is not None: - last_dist = ( - self._previous_target_position[b] - - self._previous_state[b].ee_pos - ).length() - self.rewards[b] = -(curr_dist - last_dist) + if not is_holding_correct: + prev_dist = ( + prev_state.target_obj_start_pos - prev_state.ee_pos + ).length() + self.rewards[b] += -(ee_to_start - prev_dist) + else: + prev_obj_pos = prev_state.obj_positions[ + prev_state.target_obj_idx + ] + prev_obj_to_goal = ( + prev_state.goal_pos - prev_obj_pos + ).length() + self.rewards[b] += -(obj_to_goal - prev_obj_to_goal) + + if is_holding_correct and (prev_state.held_obj_idx == -1): + self.rewards[b] += self._config.PICK_REWARD self._previous_state[b] = state - self._previous_target_position[b] = global_target_position def reset(self): r"""Reset all the vectorized environments diff --git a/habitat_baselines/config/default.py b/habitat_baselines/config/default.py index 3801be964b..b189ebb0da 100644 --- a/habitat_baselines/config/default.py +++ b/habitat_baselines/config/default.py @@ -30,6 +30,7 @@ _C.TENSORBOARD_DIR = "tb" _C.VIDEO_DIR = "video_dir" _C.TEST_EPISODE_COUNT = -1 +_C.VIDEO_RENDER_ALL_INFO = False _C.EVAL_CKPT_PATH_DIR = "data/checkpoints" # path to ckpt or path to ckpts dir _C.NUM_ENVIRONMENTS = 16 _C.NUM_PROCESSES = -1 # depricated diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml index fbfdc5bac4..15cc50da4a 100644 --- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml +++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml @@ -1,16 +1,74 @@ TENSORBOARD_DIR: "/checkpoint/eundersander/gala_kinematic/tb/gala_kinematic_ddppo" CHECKPOINT_FOLDER: "/checkpoint/eundersander/gala_kinematic/ckpt/gala_kinematic_ddppo" -VIDEO_DIR: "../videos" +VIDEO_DIR: "../videos2" +VIDEO_OPTION: ["disk"] REWARD_SCALE: 1.0 # 0.01 -NUM_CHECKPOINTS: 0 +NUM_CHECKPOINTS: 10 BATCHED_ENV: True OVERLAP_PHYSICS: True SAVE_VIDEOS_INTERVAL: 500 -LOG_INTERVAL: 1 +LOG_INTERVAL: 10 NUM_UPDATES: -1 -TOTAL_NUM_STEPS: 20000000.0 +TOTAL_NUM_STEPS: 5e9 NUM_ENVIRONMENTS: 512 -SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "ROBOT_START_RELATIVE", "ROBOT_TARGET_RELATIVE", "EE_START_RELATIVE", "EE_TARGET_RELATIVE", "ROBOT_EE_RELATIVE", "JOINT_SENSOR"] +MAX_EPISODE_LENGTH: 500 +STAGGER: True +ENABLE_ROBOT_COLLISION: True +ENABLE_HELD_OBJECT_COLLISION: True +PICK_REWARD : 5.0 +TASK_IS_PLACE : False +NPNP_SUCCESS_REWARD: 10.0 +NPNP_SUCCESS_THRESH: 0.15 #0.3 #0.05 +NPNP_FAILURE_PENALTY: 10.0 +STATE_SENSORS: + ROBOT_START_RELATIVE: + TYPE: "RobotStartSensorConfig" + polar: True + obs_key: "robot_start_relative" + shape: 3 + ROBOT_TARGET_RELATIVE: + TYPE: "RobotTargetSensorConfig" + polar: True + obs_key: "robot_target_relative" + shape: 3 + ROBOT_GOAL_RELATIVE: + TYPE: "RobotGoalSensorConfig" + polar: True + obs_key: "robot_goal_relative" + shape: 3 + EE_GOAL_RELATIVE: + TYPE: "EEGoalSensorConfig" + polar: True + obs_key: "ee_goal_relative" + shape: 3 + EE_START_RELATIVE: + TYPE: "EEStartSensorConfig" + polar: True + obs_key: "ee_start_relative" + shape: 3 + EE_TARGET_RELATIVE: + TYPE: "EETargetSensorConfig" + polar: True + obs_key: "ee_target_relative" + shape: 3 + ROBOT_EE_RELATIVE: + TYPE: "RobotEESensorConfig" + polar: True + obs_key: "robot_ee_relative" + shape: 3 + JOINT_SENSOR: + TYPE: "JointSensorConfig" + obs_key: "joint_pos" + shape: 7 + STEP_COUNT_SENSOR: + TYPE: "StepCountSensorConfig" + obs_key: "step_count" + shape: 3 + IS_HOLDING_SENSOR: + TYPE: "JointSensorConfig" + obs_key: "is_holding" + shape: 1 +SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "ROBOT_START_RELATIVE", "ROBOT_TARGET_RELATIVE", "EE_START_RELATIVE", "EE_TARGET_RELATIVE", "ROBOT_EE_RELATIVE", "ROBOT_GOAL_RELATIVE", "EE_GOAL_RELATIVE", "IS_HOLDING_SENSOR", "JOINT_SENSOR", "STEP_COUNT_SENSOR"] SIMULATOR: AGENTS: ['AGENT_0'] HEAD_RGB_SENSOR: @@ -32,12 +90,13 @@ WB: RL: PPO: ppo_epoch: 1 - num_steps: 32 - entropy_coef: 0.0 # 001 - lr: 2.5e-4 + num_mini_batch: 2 + num_steps: 64 + entropy_coef: 0.0005 + lr: 3.5e-4 use_normalized_advantage: True - use_linear_lr_decay: False - max_grad_norm: 100.0 + use_linear_lr_decay: True + max_grad_norm: 2.0 POLICY: name: "PointNavBaselinePolicy" action_distribution_type: "gaussian" diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo_cart.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo_cart.yaml new file mode 100644 index 0000000000..56ebcac881 --- /dev/null +++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo_cart.yaml @@ -0,0 +1,84 @@ +TENSORBOARD_DIR: "/checkpoint/eundersander/gala_kinematic/tb/gala_kinematic_ddppo" +CHECKPOINT_FOLDER: "/checkpoint/eundersander/gala_kinematic/ckpt/gala_kinematic_ddppo" +VIDEO_DIR: "../videos2" +VIDEO_OPTION: ["disk"] +REWARD_SCALE: 1.0 # 0.01 +NUM_CHECKPOINTS: 50 +BATCHED_ENV: True +OVERLAP_PHYSICS: True +SAVE_VIDEOS_INTERVAL: 500 +LOG_INTERVAL: 10 +NUM_UPDATES: -1 +TOTAL_NUM_STEPS: 5e8 +NUM_ENVIRONMENTS: 512 +MAX_EPISODE_LENGTH: 500 +STAGGER: True +ENABLE_ROBOT_COLLISION: True +ENABLE_HELD_OBJECT_COLLISION: True +REACH_SUCCESS_REWARD: 10.0 +REACH_SUCCESS_THRESH: 0.3 +STATE_SENSORS: + ROBOT_START_RELATIVE: + TYPE: "RobotStartSensorConfig" + polar: False + obs_key: "robot_start_relative" + shape: 3 + ROBOT_TARGET_RELATIVE: + TYPE: "RobotTargetSensorConfig" + polar: False + obs_key: "robot_target_relative" + shape: 3 + EE_START_RELATIVE: + TYPE: "EEStartSensorConfig" + polar: False + obs_key: "ee_start_relative" + shape: 3 + EE_TARGET_RELATIVE: + TYPE: "EETargetSensorConfig" + polar: False + obs_key: "ee_target_relative" + shape: 3 + ROBOT_EE_RELATIVE: + TYPE: "RobotEESensorConfig" + polar: False + obs_key: "robot_ee_relative" + shape: 3 + JOINT_SENSOR: + TYPE: "JointSensorConfig" + obs_key: "joint_pos" + shape: 7 + STEP_COUNT_SENSOR: + TYPE: "StepCountSensorConfig" + obs_key: "step_count" + shape: 3 +# SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "ROBOT_START_RELATIVE", "ROBOT_TARGET_RELATIVE", "EE_START_RELATIVE", "EE_TARGET_RELATIVE", "ROBOT_EE_RELATIVE", "JOINT_SENSOR", "DEBUG_RGB_SENSOR"] +SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "ROBOT_START_RELATIVE", "ROBOT_TARGET_RELATIVE", "EE_START_RELATIVE", "EE_TARGET_RELATIVE", "ROBOT_EE_RELATIVE", "JOINT_SENSOR"] +SIMULATOR: + AGENTS: ['AGENT_0'] + AGENT_0: + SENSORS: ['HEAD_RGB_SENSOR'] + HEAD_RGB_SENSOR: + WIDTH: 128 + HEIGHT: 128 +WRITER_TYPE: "wb" +WB: + PROJECT_NAME: "gala" + ENTITY: "vincentpierre" + GROUP: "" + RUN_NAME: "" +RL: + PPO: + ppo_epoch: 1 + num_mini_batch: 2 + num_steps: 64 + entropy_coef: 0.0001 + lr: 2.5e-4 + use_normalized_advantage: True + use_linear_lr_decay: False + # max_grad_norm: 1.0e+32 + POLICY: + name: "PointNavResNetPolicy" + action_distribution_type: "gaussian" + DDPPO: + backbone: "resnet18" +FORCE_BLIND_POLICY: False diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo_eval.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo_eval.yaml new file mode 100644 index 0000000000..2afbfe0820 --- /dev/null +++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo_eval.yaml @@ -0,0 +1,102 @@ +TENSORBOARD_DIR: "/checkpoint/eundersander/gala_kinematic/tb/gala_kinematic_ddppo" +CHECKPOINT_FOLDER: "/checkpoint/eundersander/gala_kinematic/ckpt/gala_kinematic_ddppo" +VIDEO_DIR: "../videos" +VIDEO_OPTION: ["disk"] +REWARD_SCALE: 1.0 # 0.01 +NUM_CHECKPOINTS: 10 +BATCHED_ENV: True +OVERLAP_PHYSICS: True +SAVE_VIDEOS_INTERVAL: 500 +LOG_INTERVAL: 1 +NUM_UPDATES: -1 +TOTAL_NUM_STEPS: 50000000.0 +NUM_ENVIRONMENTS: 2 +MAX_EPISODE_LENGTH: 500 +STAGGER: False +TEST_EPISODE_COUNT: 10 +ENABLE_ROBOT_COLLISION: True +ENABLE_HELD_OBJECT_COLLISION: True +PICK_REWARD : 5.0 +TASK_IS_PLACE : False +NPNP_SUCCESS_REWARD: 10.0 +NPNP_SUCCESS_THRESH: 0.3 #0.05 +NPNP_FAILURE_PENALTY: 10.0 +STATE_SENSORS: + ROBOT_START_RELATIVE: + TYPE: "RobotStartSensorConfig" + polar: True + obs_key: "robot_start_relative" + shape: 3 + ROBOT_TARGET_RELATIVE: + TYPE: "RobotTargetSensorConfig" + polar: True + obs_key: "robot_target_relative" + shape: 3 + ROBOT_GOAL_RELATIVE: + TYPE: "RobotGoalSensorConfig" + polar: True + obs_key: "robot_goal_relative" + shape: 3 + EE_GOAL_RELATIVE: + TYPE: "EEGoalSensorConfig" + polar: True + obs_key: "ee_goal_relative" + shape: 3 + EE_START_RELATIVE: + TYPE: "EEStartSensorConfig" + polar: True + obs_key: "ee_start_relative" + shape: 3 + EE_TARGET_RELATIVE: + TYPE: "EETargetSensorConfig" + polar: True + obs_key: "ee_target_relative" + shape: 3 + ROBOT_EE_RELATIVE: + TYPE: "RobotEESensorConfig" + polar: True + obs_key: "robot_ee_relative" + shape: 3 + JOINT_SENSOR: + TYPE: "JointSensorConfig" + obs_key: "joint_pos" + shape: 7 + STEP_COUNT_SENSOR: + TYPE: "StepCountSensorConfig" + obs_key: "step_count" + shape: 3 + IS_HOLDING_SENSOR: + TYPE: "JointSensorConfig" + obs_key: "is_holding" + shape: 1 +VIDEO_RENDER_ALL_INFO: True +SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "ROBOT_START_RELATIVE", "ROBOT_TARGET_RELATIVE", "EE_START_RELATIVE", "EE_TARGET_RELATIVE", "ROBOT_EE_RELATIVE", "ROBOT_GOAL_RELATIVE", "EE_GOAL_RELATIVE", "IS_HOLDING_SENSOR", "JOINT_SENSOR", "STEP_COUNT_SENSOR", "DEBUG_RGB_SENSOR"] +SIMULATOR: + AGENTS: ['AGENT_0'] + AGENT_0: + SENSORS: ['HEAD_RGB_SENSOR'] + HEAD_RGB_SENSOR: + WIDTH: 128 + HEIGHT: 128 +WRITER_TYPE: "wb" +WB: + PROJECT_NAME: "gala" + ENTITY: "vincentpierre" + GROUP: "" + RUN_NAME: "" +RL: + PPO: + ppo_epoch: 1 + num_mini_batch: 2 + num_steps: 32 + entropy_coef: 0.0001 + lr: 2.5e-4 + use_normalized_advantage: True + use_linear_lr_decay: False + # max_grad_norm: 1.0e+32 + POLICY: + name: "PointNavBaselinePolicy" + action_distribution_type: "gaussian" + DDPPO: + backbone: "PointNavResNetPolicy" +FORCE_BLIND_POLICY: False diff --git a/habitat_baselines/rl/ddppo/policy/resnet_policy_bpsnav.py b/habitat_baselines/rl/ddppo/policy/resnet_policy_bpsnav.py index 71dbb4ed59..9058732b37 100644 --- a/habitat_baselines/rl/ddppo/policy/resnet_policy_bpsnav.py +++ b/habitat_baselines/rl/ddppo/policy/resnet_policy_bpsnav.py @@ -12,6 +12,7 @@ from gym import spaces from habitat.config import Config +from habitat.core.logging import logger from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.rl.ddppo.policy import resnet_bpsnav as resnet from habitat_baselines.rl.ddppo.policy.resnet_bpsnav import FixupBasicBlock @@ -344,12 +345,24 @@ def __init__( self._n_prev_action = 32 rnn_input_size = self._n_prev_action - if "pointgoal_with_gps_compass" in observation_space.spaces: - n_input_goal = 3 - self.tgt_embeding = nn.Linear(n_input_goal, 32) - rnn_input_size += 32 - else: - self.tgt_embeding = nn.Sequential() + self.ssc_keys: List[str] = [] + self._n_state = 0 + for k, v in observation_space.spaces.items(): + # All 1-dimension sensors are state sensors. + if len(v.shape) == 1: + self._n_state += v.shape[0] + self.ssc_keys.append(k) + logger.info(f"adding sensor {k} : {v.shape} ") + + self.tgt_embeding = nn.Linear(self._n_state, hidden_size) + rnn_input_size += hidden_size + + # if "pointgoal_with_gps_compass" in observation_space.spaces: + # n_input_goal = 3 + # self.tgt_embeding = nn.Linear(n_input_goal, 32) + # rnn_input_size += 32 + # else: + # self.tgt_embeding = nn.Sequential() self._hidden_size = hidden_size @@ -408,9 +421,7 @@ def forward( rnn_hidden_states, prev_actions, masks, - observations["pointgoal_with_gps_compass"] - if "pointgoal_with_gps_compass" in observations - else None, + {k: observations[k] for k in self.ssc_keys}, ) @torch.jit.export @@ -432,17 +443,22 @@ def rnn_forward( goal_observations: Optional[torch.Tensor] = None, ): inputs: List[torch.Tensor] = [tensorrt_output] - if goal_observations is not None: - goal_observations = torch.stack( - [ - goal_observations[:, 0], - torch.cos(-goal_observations[:, 1]), - torch.sin(-goal_observations[:, 1]), - ], - -1, - ) - goal_observations = self.tgt_embeding(goal_observations) - inputs.append(goal_observations) + # if goal_observations is not None: + # goal_observations = torch.stack( + # [ + # goal_observations[:, 0], + # torch.cos(-goal_observations[:, 1]), + # torch.sin(-goal_observations[:, 1]), + # ], + # -1, + # ) + # goal_observations = self.tgt_embeding(goal_observations) + # inputs.append(goal_observations) + state_inputs = torch.cat( + [goal_observations[k] for k in self.ssc_keys], -1 + ) + state_inputs = self.tgt_embeding(state_inputs) + inputs.append(state_inputs) # prev_actions = torch.where(masks, prev_actions + 1, prev_actions.new_zeros(())) # prev_actions = self.prev_action_embedding(prev_actions.view(-1)) diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py index b7abcf170d..f4548ba872 100644 --- a/habitat_baselines/rl/ppo/policy.py +++ b/habitat_baselines/rl/ppo/policy.py @@ -4,19 +4,14 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import abc +from typing import List import torch from gym import spaces from torch import nn as nn from habitat.config import Config -from habitat.core.batched_env import ( - EEStartSensorConfig, - EETargetSensorConfig, - JointSensorConfig, - RobotStartSensorConfig, - RobotTargetSensorConfig, -) +from habitat.core.logging import logger from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.rl.models.rnn_state_encoder import ( build_rnn_state_encoder, @@ -207,20 +202,13 @@ def __init__( #### [gala_kinematic] Manually adding sensors in there self.observation_space = observation_space self._n_state = 0 - - self.ssc_dict = { - x.obs_key: x - for x in [ - RobotStartSensorConfig(), - RobotTargetSensorConfig(), - EEStartSensorConfig(), - EETargetSensorConfig(), - JointSensorConfig(), - ] - } - for k, v in self.ssc_dict.items(): - if k in self.observation_space.spaces: - self._n_state += v.shape + self.ssc_keys: List[str] = [] + for k, v in self.observation_space.spaces.items(): + # All 1-dimension sensors are state sensors. + if len(v.shape) == 1: + self._n_state += v.shape[0] + self.ssc_keys.append(k) + logger.info(f"adding sensor {k} : {v.shape} ") # if ( # IntegratedPointGoalGPSAndCompassSensor.cls_uuid @@ -269,9 +257,8 @@ def num_recurrent_layers(self): def forward(self, observations, rnn_hidden_states, prev_actions, masks): #### [gala_kinematic] Manually adding sensors in there x = [self.visual_encoder(observations)] - for k in self.ssc_dict.keys(): - if k in self.observation_space.spaces: - x += [observations[k]] + for k in self.ssc_keys: + x += [observations[k]] # if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations: # target_encoding = observations[ diff --git a/habitat_baselines/rl/ppo/ppo.py b/habitat_baselines/rl/ppo/ppo.py index d9a3f190ec..cbdb451f89 100644 --- a/habitat_baselines/rl/ppo/ppo.py +++ b/habitat_baselines/rl/ppo/ppo.py @@ -4,7 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Optional, Tuple +from typing import Iterable, Optional, Tuple, Union import torch from torch import Tensor @@ -15,7 +15,7 @@ from habitat_baselines.common.rollout_storage import RolloutStorage from habitat_baselines.rl.ppo.policy import Policy -use_mixed_precision = True +use_mixed_precision = False from torch.cuda.amp import GradScaler, autocast @@ -31,6 +31,31 @@ def __exit__(self, exc_type, exc_value, traceback): EPS_PPO = 1e-5 +def get_grad_norm( + parameters: Union[torch.Tensor, Iterable[torch.Tensor]], + norm_type: float = 2.0, +) -> float: + r""" + Calculates the norm of the gradient + """ + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = [p for p in parameters if p.grad is not None] + if len(parameters) == 0: + return torch.tensor(0.0) + device = parameters[0].grad.device + total_norm = torch.norm( + torch.stack( + [ + torch.norm(p.grad.detach(), norm_type).to(device) + for p in parameters + ] + ), + norm_type, + ) + return total_norm.item() + + class PPO(nn.Module): def __init__( self, @@ -91,12 +116,15 @@ def get_advantages(self, rollouts: RolloutStorage) -> Tensor: return (advantages - advantages.mean()) / (advantages.std() + EPS_PPO) - def update(self, rollouts: RolloutStorage) -> Tuple[float, float, float]: + def update( + self, rollouts: RolloutStorage + ) -> Tuple[float, float, float, float]: advantages = self.get_advantages(rollouts) value_loss_epoch = 0.0 action_loss_epoch = 0.0 dist_entropy_epoch = 0.0 + grad_norm_epoch = 0.0 for _e in range(self.ppo_epoch): # profiling_wrapper.range_push("PPO.update epoch") @@ -179,6 +207,11 @@ def update(self, rollouts: RolloutStorage) -> Tuple[float, float, float]: self.after_backward(total_loss) self.before_step() + + grad_norm_epoch += get_grad_norm( + self.actor_critic.parameters() + ) + profiling_wrapper.range_push("optimizer.step") if use_mixed_precision: self.grad_scaler.step(self.optimizer) @@ -200,8 +233,14 @@ def update(self, rollouts: RolloutStorage) -> Tuple[float, float, float]: value_loss_epoch /= num_updates action_loss_epoch /= num_updates dist_entropy_epoch /= num_updates + grad_norm_epoch /= num_updates - return value_loss_epoch, action_loss_epoch, dist_entropy_epoch + return ( + value_loss_epoch, + action_loss_epoch, + dist_entropy_epoch, + grad_norm_epoch, + ) def _evaluate_actions( self, observations, rnn_hidden_states, prev_actions, masks, action diff --git a/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat_baselines/rl/ppo/ppo_trainer.py index 72fcb97171..a0b2070d0d 100644 --- a/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat_baselines/rl/ppo/ppo_trainer.py @@ -61,6 +61,7 @@ construct_batched_envs, construct_envs, ) +from habitat_baselines.utils.render_wrapper import overlay_frame try: from habitat_sim.utils import viz_utils as vut @@ -551,6 +552,8 @@ def _collect_environment_result(self, buffer_index: int = 0): if self.config.BATCHED_ENV: batch = batched_observations + for obs_name in batch: + batch[obs_name] = batch[obs_name].to(self.device) else: batch = batch_obs( observations, @@ -653,7 +656,7 @@ def _update_agent(self): self.agent.train() - value_loss, action_loss, dist_entropy = self.agent.update( + value_loss, action_loss, dist_entropy, grad_norm = self.agent.update( self.rollouts ) @@ -664,6 +667,7 @@ def _update_agent(self): value_loss, action_loss, dist_entropy, + grad_norm, ) def _coalesce_post_step( @@ -742,16 +746,16 @@ def _training_log( {"updates": float(self.num_updates_done)}, self.num_steps_done, ) - # log stats if self.num_updates_done % self.config.LOG_INTERVAL == 0: t_curr = time.time() + fps = (self.num_steps_done - self.recent_num_steps_done) / ( + (t_curr - self.t_recent) + ) + writer.add_scalar("metrics/fps", fps, self.num_steps_done) + logger.info( - "update: {}\tfps: {:.3f}\t".format( - self.num_updates_done, - (self.num_steps_done - self.recent_num_steps_done) - / ((t_curr - self.t_recent)), - ) + "update: {}\tfps: {:.3f}\t".format(self.num_updates_done, fps) ) self.recent_num_steps_done = self.num_steps_done self.t_recent = t_curr @@ -925,6 +929,7 @@ def train(self) -> None: value_loss, action_loss, dist_entropy, + grad_norm, ) = self._update_agent() if ppo_cfg.use_linear_lr_decay: @@ -936,6 +941,7 @@ def train(self) -> None: value_loss=value_loss, action_loss=action_loss, entropy_loss=dist_entropy, + grad_norm=grad_norm, ), count_steps_delta, ) @@ -1323,7 +1329,7 @@ def _eval_checkpoint( checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info(infos[i]), tb_writer=writer, - include_frame_number=True, + include_frame_number=False, ) rgb_frames[i] = [] @@ -1334,6 +1340,8 @@ def _eval_checkpoint( frame = observations_to_image( {k: v[i] for k, v in batch.items()}, infos[i] ) + if self.config.VIDEO_RENDER_ALL_INFO: + frame = overlay_frame(frame, infos[i]) rgb_frames[i].append(frame) not_done_masks = not_done_masks.to(device=self.device) diff --git a/habitat_baselines/utils/render_wrapper.py b/habitat_baselines/utils/render_wrapper.py index 77895b6c59..ed7e94267a 100644 --- a/habitat_baselines/utils/render_wrapper.py +++ b/habitat_baselines/utils/render_wrapper.py @@ -7,45 +7,71 @@ from habitat_baselines.utils.gym_adapter import flatten_dict -def append_text_to_image(image: np.ndarray, text: List[str]): - r"""Appends lines of text underneath an image. +def append_text_to_image( + image: np.ndarray, text: List[str], font_size: float = 0.5 +): + r"""Appends lines of text on top of an image. First this will render to the + left-hand side of the image, once that column is full, it will render to + the right hand-side of the image. :param image: the image to put text underneath :param text: The list of strings which will be rendered, separated by new lines. :returns: A new image with text inserted underneath the input image """ h, w, c = image.shape - font_size = 0.5 font_thickness = 1 font = cv2.FONT_HERSHEY_SIMPLEX - text_image = np.zeros_like(image, dtype=np.uint8) y = 0 + left_aligned = True for line in text: textsize = cv2.getTextSize(line, font, font_size, font_thickness)[0] y += textsize[1] + 10 - x = 10 + if y > h: + left_aligned = False + y = textsize[1] + 10 + + if left_aligned: + x = 10 + else: + x = w - (textsize[0] + 10) + cv2.putText( - text_image, + image, line, (x, y), font, font_size, - (255, 255, 255), + (0, 0, 0), + font_thickness * 2, + lineType=cv2.LINE_AA, + ) + + cv2.putText( + image, + line, + (x, y), + font, + font_size, + (255, 255, 255, 255), font_thickness, lineType=cv2.LINE_AA, ) - return np.clip(image + text_image, 0, 255) + + return np.clip(image, 0, 255) def overlay_frame(frame, info, additional=None): lines = [] flattened_info = flatten_dict(info) for k, v in flattened_info.items(): - lines.append(f"{k}: {v:.2f}") + if isinstance(v, str): + lines.append(f"{k}: {v}") + else: + lines.append(f"{k}: {v:.2f}") if additional is not None: lines.extend(additional) - frame = append_text_to_image(frame, lines) + frame = append_text_to_image(frame, lines, font_size=0.25) return frame