From 5a3c855319eb00e0babedb399dfa71c6d30cd3d5 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 29 Aug 2025 09:54:14 +0200 Subject: [PATCH 1/3] use key to determine image --- src/lerobot/processor/converters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py index f0e0815778d..4eb6faa9640 100644 --- a/src/lerobot/processor/converters.py +++ b/src/lerobot/processor/converters.py @@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool: def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: state, images = {}, {} for k, v in obs.items(): - if _is_image(v): + if "image" in k.lower() or _is_image(v): images[k] = v else: state[k] = v From 9ab1eae9a0682a95d7972f459a69cc1da23b49c3 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 29 Aug 2025 10:22:48 +0200 Subject: [PATCH 2/3] Address rest of PR comments --- examples/phone_to_so100/record.py | 2 +- examples/phone_to_so100/teleoperate.py | 7 +- src/lerobot/processor/converters.py | 3 + .../robot_kinematic_processor.py | 112 +++++++----------- .../teleoperators/phone/phone_processor.py | 20 ++-- .../phone/{phone.py => teleop_phone.py} | 0 6 files changed, 57 insertions(+), 87 deletions(-) rename src/lerobot/teleoperators/phone/{phone.py => teleop_phone.py} (100%) diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 4ec3948eaab..e9d22ef80d0 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -38,8 +38,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.utils import log_say from lerobot.utils.visualization_utils import _init_rerun diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py index 1671ef904a3..1eef0f8ae10 100644 --- a/examples/phone_to_so100/teleoperate.py +++ b/examples/phone_to_so100/teleoperate.py @@ -28,8 +28,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone # Initialize the robot and teleoperator robot_config = SO100FollowerConfig( @@ -81,11 +81,6 @@ print("Starting teleop loop. Move your phone to teleoperate the robot.") while True: - phone_obs = teleop_device.get_action() - if not phone_obs: - time.sleep(0.01) - continue - # Get teleop observation phone_obs = teleop_device.get_action() diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py index 4eb6faa9640..3a8f8b10937 100644 --- a/src/lerobot/processor/converters.py +++ b/src/lerobot/processor/converters.py @@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]: out: dict[str, Any] = {} action_dict = transition.get(TransitionKey.ACTION) or {} + if action_dict is None: + return out + for k, v in action_dict.items(): if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")): out_key = k[len("action.") :] # Strip the 'action.' prefix. diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index cff18c161bc..dfbf8b0af7b 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -19,7 +19,7 @@ import numpy as np from scipy.spatial.transform import Rotation -from lerobot.configs.types import PolicyFeature +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.model.kinematics import RobotKinematics from lerobot.processor.pipeline import ( ActionProcessor, @@ -123,16 +123,12 @@ def action(self, action): # Write action fields pos = desired[:3, 3] tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec() - new_action.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(tw[0]), - "action.ee.wy": float(tw[1]), - "action.ee.wz": float(tw[2]), - } - ) + new_action["action.ee.x"] = float(pos[0]) + new_action["action.ee.y"] = float(pos[1]) + new_action["action.ee.z"] = float(pos[2]) + new_action["action.ee.wx"] = float(tw[0]) + new_action["action.ee.wy"] = float(tw[1]) + new_action["action.ee.wz"] = float(tw[2]) self._prev_enabled = enabled return new_action @@ -167,12 +163,12 @@ class EEBoundsAndSafety(ActionProcessor): _last_twist: np.ndarray | None = field(default=None, init=False, repr=False) def action(self, act: dict) -> dict: - x = act.pop("action.ee.x", None) - y = act.pop("action.ee.y", None) - z = act.pop("action.ee.z", None) - wx = act.pop("action.ee.wx", None) - wy = act.pop("action.ee.wy", None) - wz = act.pop("action.ee.wz", None) + x = act.get("action.ee.x", None) + y = act.get("action.ee.y", None) + z = act.get("action.ee.z", None) + wx = act.get("action.ee.wx", None) + wy = act.get("action.ee.wy", None) + wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): return act @@ -194,16 +190,12 @@ def action(self, act: dict) -> dict: self._last_pos = pos self._last_twist = twist - act.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(twist[0]), - "action.ee.wy": float(twist[1]), - "action.ee.wz": float(twist[2]), - } - ) + act["action.ee.x"] = float(pos[0]) + act["action.ee.y"] = float(pos[1]) + act["action.ee.z"] = float(pos[2]) + act["action.ee.wx"] = float(twist[0]) + act["action.ee.wy"] = float(twist[1]) + act["action.ee.wz"] = float(twist[2]) return act def reset(self): @@ -212,12 +204,12 @@ def reset(self): def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # Because this is last step we specify the dataset features of this step that we want to be stored in the dataset - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float + features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -259,18 +251,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): - # Nothing to do; restore what we popped and return - act.update( - { - "action.ee.x": x, - "action.ee.y": y, - "action.ee.z": z, - "action.ee.wx": wx, - "action.ee.wy": wy, - "action.ee.wz": wz, - } - ) - transition[TransitionKey.ACTION] = act return transition # Get joint positions from complimentary data @@ -308,15 +288,15 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # We specify the dataset features of this step that we want to be stored in the dataset - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float - - features["observation.state.gripper.pos"] = float - features["action.gripper.pos"] = float + features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + + features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features def reset(self): @@ -383,14 +363,14 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: new_act.pop("action.gripper", None) transition[TransitionKey.ACTION] = new_act - obs.update({"observation.state.gripper.pos": curr_pos}) + obs["observation.state.gripper.pos"] = curr_pos transition[TransitionKey.OBSERVATION] = obs return transition def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # We specify the dataset features of this step that we want to be stored in the dataset - features["observation.state.gripper.pos"] = float - features["action.gripper.pos"] = float + features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -423,22 +403,18 @@ def observation(self, obs: dict) -> dict: pos = t[:3, 3] tw = Rotation.from_matrix(t[:3, :3]).as_rotvec() - obs.update( - { - "observation.state.ee.x": float(pos[0]), - "observation.state.ee.y": float(pos[1]), - "observation.state.ee.z": float(pos[2]), - "observation.state.ee.wx": float(tw[0]), - "observation.state.ee.wy": float(tw[1]), - "observation.state.ee.wz": float(tw[2]), - } - ) + obs["observation.state.ee.x"] = float(pos[0]) + obs["observation.state.ee.y"] = float(pos[1]) + obs["observation.state.ee.z"] = float(pos[2]) + obs["observation.state.ee.wx"] = float(tw[0]) + obs["observation.state.ee.wy"] = float(tw[1]) + obs["observation.state.ee.wz"] = float(tw[2]) return obs def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # We specify the dataset features of this step that we want to be stored in the dataset for k in ["x", "y", "z", "wx", "wy", "wz"]: - features[f"observation.state.ee.{k}"] = float + features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py index 37a3d21027f..92fb7452bd2 100644 --- a/src/lerobot/teleoperators/phone/phone_processor.py +++ b/src/lerobot/teleoperators/phone/phone_processor.py @@ -68,16 +68,12 @@ def action(self, act: dict) -> dict: ) # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed # For some actions we need to invert the axis - act.update( - { - "action.enabled": enabled, - "action.target_x": -pos[1] if enabled else 0.0, - "action.target_y": pos[0] if enabled else 0.0, - "action.target_z": pos[2] if enabled else 0.0, - "action.target_wx": rotvec[1] if enabled else 0.0, - "action.target_wy": rotvec[0] if enabled else 0.0, - "action.target_wz": -rotvec[2] if enabled else 0.0, - "action.gripper": gripper, # Still send gripper action when disabled - } - ) + act["action.enabled"] = enabled + act["action.target_x"] = -pos[1] if enabled else 0.0 + act["action.target_y"] = pos[0] if enabled else 0.0 + act["action.target_z"] = pos[2] if enabled else 0.0 + act["action.target_wx"] = rotvec[1] if enabled else 0.0 + act["action.target_wy"] = rotvec[0] if enabled else 0.0 + act["action.target_wz"] = -rotvec[2] if enabled else 0.0 + act["action.gripper"] = gripper # Still send gripper action when disabled return act diff --git a/src/lerobot/teleoperators/phone/phone.py b/src/lerobot/teleoperators/phone/teleop_phone.py similarity index 100% rename from src/lerobot/teleoperators/phone/phone.py rename to src/lerobot/teleoperators/phone/teleop_phone.py From 9e2500f5b7c659bc8e4bd3feea88e3cf4bc59f50 Mon Sep 17 00:00:00 2001 From: Pepijn Date: Fri, 29 Aug 2025 10:33:24 +0200 Subject: [PATCH 3/3] use PolicyFeatures in transform_features --- .../robot_kinematic_processor.py | 12 ++++++------ .../teleoperators/phone/phone_processor.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index 8789d17f366..7c6c73a4d96 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -147,12 +147,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po features.pop("action.target_wy", None) features.pop("action.target_wz", None) - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float + features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py index 36df760989e..36880e0c85f 100644 --- a/src/lerobot/teleoperators/phone/phone_processor.py +++ b/src/lerobot/teleoperators/phone/phone_processor.py @@ -16,7 +16,7 @@ from dataclasses import dataclass, field -from lerobot.configs.types import PolicyFeature +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry from lerobot.teleoperators.phone.config_phone import PhoneOS @@ -85,12 +85,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po features.pop("action.phone.rot", None) features.pop("action.phone.raw_inputs", None) - features["action.enabled"] = bool - features["action.target_x"] = float - features["action.target_y"] = float - features["action.target_z"] = float - features["action.target_wx"] = float - features["action.target_wy"] = float - features["action.target_wz"] = float - features["action.gripper"] = float + features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features