diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 4ec3948eaab..e9d22ef80d0 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -38,8 +38,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.utils import log_say from lerobot.utils.visualization_utils import _init_rerun diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py index 1671ef904a3..1eef0f8ae10 100644 --- a/examples/phone_to_so100/teleoperate.py +++ b/examples/phone_to_so100/teleoperate.py @@ -28,8 +28,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone # Initialize the robot and teleoperator robot_config = SO100FollowerConfig( @@ -81,11 +81,6 @@ print("Starting teleop loop. Move your phone to teleoperate the robot.") while True: - phone_obs = teleop_device.get_action() - if not phone_obs: - time.sleep(0.01) - continue - # Get teleop observation phone_obs = teleop_device.get_action() diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py index f0e0815778d..3a8f8b10937 100644 --- a/src/lerobot/processor/converters.py +++ b/src/lerobot/processor/converters.py @@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool: def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: state, images = {}, {} for k, v in obs.items(): - if _is_image(v): + if "image" in k.lower() or _is_image(v): images[k] = v else: state[k] = v @@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]: out: dict[str, Any] = {} action_dict = transition.get(TransitionKey.ACTION) or {} + if action_dict is None: + return out + for k, v in action_dict.items(): if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")): out_key = k[len("action.") :] # Strip the 'action.' prefix. diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index cf7b51fe60c..7c6c73a4d96 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -19,7 +19,7 @@ import numpy as np from scipy.spatial.transform import Rotation -from lerobot.configs.types import PolicyFeature +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.model.kinematics import RobotKinematics from lerobot.processor.pipeline import ( ActionProcessor, @@ -123,16 +123,12 @@ def action(self, action): # Write action fields pos = desired[:3, 3] tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec() - new_action.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(tw[0]), - "action.ee.wy": float(tw[1]), - "action.ee.wz": float(tw[2]), - } - ) + new_action["action.ee.x"] = float(pos[0]) + new_action["action.ee.y"] = float(pos[1]) + new_action["action.ee.z"] = float(pos[2]) + new_action["action.ee.wx"] = float(tw[0]) + new_action["action.ee.wy"] = float(tw[1]) + new_action["action.ee.wz"] = float(tw[2]) self._prev_enabled = enabled return new_action @@ -151,12 +147,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po features.pop("action.target_wy", None) features.pop("action.target_wz", None) - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float + features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -184,12 +180,12 @@ class EEBoundsAndSafety(ActionProcessor): _last_twist: np.ndarray | None = field(default=None, init=False, repr=False) def action(self, act: dict) -> dict: - x = act.pop("action.ee.x", None) - y = act.pop("action.ee.y", None) - z = act.pop("action.ee.z", None) - wx = act.pop("action.ee.wx", None) - wy = act.pop("action.ee.wy", None) - wz = act.pop("action.ee.wz", None) + x = act.get("action.ee.x", None) + y = act.get("action.ee.y", None) + z = act.get("action.ee.z", None) + wx = act.get("action.ee.wx", None) + wy = act.get("action.ee.wy", None) + wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): return act @@ -211,16 +207,12 @@ def action(self, act: dict) -> dict: self._last_pos = pos self._last_twist = twist - act.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(twist[0]), - "action.ee.wy": float(twist[1]), - "action.ee.wz": float(twist[2]), - } - ) + act["action.ee.x"] = float(pos[0]) + act["action.ee.y"] = float(pos[1]) + act["action.ee.z"] = float(pos[2]) + act["action.ee.wx"] = float(twist[0]) + act["action.ee.wy"] = float(twist[1]) + act["action.ee.wz"] = float(twist[2]) return act def reset(self): @@ -266,18 +258,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): - # Nothing to do; restore what we popped and return - act.update( - { - "action.ee.x": x, - "action.ee.y": y, - "action.ee.z": z, - "action.ee.wx": wx, - "action.ee.wy": wy, - "action.ee.wz": wz, - } - ) - transition[TransitionKey.ACTION] = act return transition # Get joint positions from complimentary data @@ -314,10 +294,10 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: return transition def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: - features["observation.state.gripper.pos"] = float - features["action.gripper.pos"] = float + features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) for name in self.motor_names: - features[f"action.{name}.pos"] = float + features[f"action.{name}.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -385,13 +365,13 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: new_act.pop("action.gripper", None) transition[TransitionKey.ACTION] = new_act - obs.update({"observation.state.gripper.pos": curr_pos}) + obs["observation.state.gripper.pos"] = curr_pos transition[TransitionKey.OBSERVATION] = obs return transition def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: features.pop("action.gripper", None) - features["action.gripper.pos"] = float + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -424,22 +404,18 @@ def observation(self, obs: dict) -> dict: pos = t[:3, 3] tw = Rotation.from_matrix(t[:3, :3]).as_rotvec() - obs.update( - { - "observation.state.ee.x": float(pos[0]), - "observation.state.ee.y": float(pos[1]), - "observation.state.ee.z": float(pos[2]), - "observation.state.ee.wx": float(tw[0]), - "observation.state.ee.wy": float(tw[1]), - "observation.state.ee.wz": float(tw[2]), - } - ) + obs["observation.state.ee.x"] = float(pos[0]) + obs["observation.state.ee.y"] = float(pos[1]) + obs["observation.state.ee.z"] = float(pos[2]) + obs["observation.state.ee.wx"] = float(tw[0]) + obs["observation.state.ee.wy"] = float(tw[1]) + obs["observation.state.ee.wz"] = float(tw[2]) return obs def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # We specify the dataset features of this step that we want to be stored in the dataset for k in ["x", "y", "z", "wx", "wy", "wz"]: - features[f"observation.state.ee.{k}"] = float + features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py index 9801cf90f92..36880e0c85f 100644 --- a/src/lerobot/teleoperators/phone/phone_processor.py +++ b/src/lerobot/teleoperators/phone/phone_processor.py @@ -16,7 +16,7 @@ from dataclasses import dataclass, field -from lerobot.configs.types import PolicyFeature +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry from lerobot.teleoperators.phone.config_phone import PhoneOS @@ -69,18 +69,14 @@ def action(self, act: dict) -> dict: ) # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed # For some actions we need to invert the axis - act.update( - { - "action.enabled": enabled, - "action.target_x": -pos[1] if enabled else 0.0, - "action.target_y": pos[0] if enabled else 0.0, - "action.target_z": pos[2] if enabled else 0.0, - "action.target_wx": rotvec[1] if enabled else 0.0, - "action.target_wy": rotvec[0] if enabled else 0.0, - "action.target_wz": -rotvec[2] if enabled else 0.0, - "action.gripper": gripper, # Still send gripper action when disabled - } - ) + act["action.enabled"] = enabled + act["action.target_x"] = -pos[1] if enabled else 0.0 + act["action.target_y"] = pos[0] if enabled else 0.0 + act["action.target_z"] = pos[2] if enabled else 0.0 + act["action.target_wx"] = rotvec[1] if enabled else 0.0 + act["action.target_wy"] = rotvec[0] if enabled else 0.0 + act["action.target_wz"] = -rotvec[2] if enabled else 0.0 + act["action.gripper"] = gripper # Still send gripper action when disabled return act def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: @@ -89,12 +85,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po features.pop("action.phone.rot", None) features.pop("action.phone.raw_inputs", None) - features["action.enabled"] = bool - features["action.target_x"] = float - features["action.target_y"] = float - features["action.target_z"] = float - features["action.target_wx"] = float - features["action.target_wy"] = float - features["action.target_wz"] = float - features["action.gripper"] = float + features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features diff --git a/src/lerobot/teleoperators/phone/phone.py b/src/lerobot/teleoperators/phone/teleop_phone.py similarity index 100% rename from src/lerobot/teleoperators/phone/phone.py rename to src/lerobot/teleoperators/phone/teleop_phone.py