huggingface · pkooij · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py
@@ -38,8 +38,8 @@
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
-from lerobot.teleoperators.phone.phone import Phone
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
+from lerobot.teleoperators.phone.teleop_phone import Phone
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import _init_rerun

diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py
@@ -28,8 +28,8 @@
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
-from lerobot.teleoperators.phone.phone import Phone
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
+from lerobot.teleoperators.phone.teleop_phone import Phone
 
 # Initialize the robot and teleoperator
 robot_config = SO100FollowerConfig(
@@ -81,11 +81,6 @@
 
 print("Starting teleop loop. Move your phone to teleoperate the robot.")
 while True:
-    phone_obs = teleop_device.get_action()
-    if not phone_obs:
-        time.sleep(0.01)
-        continue
-
     # Get teleop observation
     phone_obs = teleop_device.get_action()
 

diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py
@@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool:
 def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
     state, images = {}, {}
     for k, v in obs.items():
-        if _is_image(v):
+        if "image" in k.lower() or _is_image(v):
             images[k] = v
         else:
             state[k] = v
@@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]:
     out: dict[str, Any] = {}
     action_dict = transition.get(TransitionKey.ACTION) or {}
 
+    if action_dict is None:
+        return out
+
     for k, v in action_dict.items():
         if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")):
             out_key = k[len("action.") :]  # Strip the 'action.' prefix.

diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py
@@ -19,7 +19,7 @@
 import numpy as np
 from scipy.spatial.transform import Rotation
 
-from lerobot.configs.types import PolicyFeature
+from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.model.kinematics import RobotKinematics
 from lerobot.processor.pipeline import (
     ActionProcessor,
@@ -123,16 +123,12 @@ def action(self, action):
         # Write action fields
         pos = desired[:3, 3]
         tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
-        new_action.update(
-            {
-                "action.ee.x": float(pos[0]),
-                "action.ee.y": float(pos[1]),
-                "action.ee.z": float(pos[2]),
-                "action.ee.wx": float(tw[0]),
-                "action.ee.wy": float(tw[1]),
-                "action.ee.wz": float(tw[2]),
-            }
-        )
+        new_action["action.ee.x"] = float(pos[0])
+        new_action["action.ee.y"] = float(pos[1])
+        new_action["action.ee.z"] = float(pos[2])
+        new_action["action.ee.wx"] = float(tw[0])
+        new_action["action.ee.wy"] = float(tw[1])
+        new_action["action.ee.wz"] = float(tw[2])
 
         self._prev_enabled = enabled
         return new_action
@@ -151,12 +147,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po
         features.pop("action.target_wy", None)
         features.pop("action.target_wz", None)
 
-        features["action.ee.x"] = float
-        features["action.ee.y"] = float
-        features["action.ee.z"] = float
-        features["action.ee.wx"] = float
-        features["action.ee.wy"] = float
-        features["action.ee.wz"] = float
+        features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
 
 
@@ -184,12 +180,12 @@ class EEBoundsAndSafety(ActionProcessor):
     _last_twist: np.ndarray | None = field(default=None, init=False, repr=False)
 
     def action(self, act: dict) -> dict:
-        x = act.pop("action.ee.x", None)
-        y = act.pop("action.ee.y", None)
-        z = act.pop("action.ee.z", None)
-        wx = act.pop("action.ee.wx", None)
-        wy = act.pop("action.ee.wy", None)
-        wz = act.pop("action.ee.wz", None)
+        x = act.get("action.ee.x", None)
+        y = act.get("action.ee.y", None)
+        z = act.get("action.ee.z", None)
+        wx = act.get("action.ee.wx", None)
+        wy = act.get("action.ee.wy", None)
+        wz = act.get("action.ee.wz", None)
 
         if None in (x, y, z, wx, wy, wz):
             return act
@@ -211,16 +207,12 @@ def action(self, act: dict) -> dict:
         self._last_pos = pos
         self._last_twist = twist
 
-        act.update(
-            {
-                "action.ee.x": float(pos[0]),
-                "action.ee.y": float(pos[1]),
-                "action.ee.z": float(pos[2]),
-                "action.ee.wx": float(twist[0]),
-                "action.ee.wy": float(twist[1]),
-                "action.ee.wz": float(twist[2]),
-            }
-        )
+        act["action.ee.x"] = float(pos[0])
+        act["action.ee.y"] = float(pos[1])
+        act["action.ee.z"] = float(pos[2])
+        act["action.ee.wx"] = float(twist[0])
+        act["action.ee.wy"] = float(twist[1])
+        act["action.ee.wz"] = float(twist[2])
         return act
 
     def reset(self):
@@ -266,18 +258,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         wz = act.get("action.ee.wz", None)
 
         if None in (x, y, z, wx, wy, wz):
-            # Nothing to do; restore what we popped and return
-            act.update(
-                {
-                    "action.ee.x": x,
-                    "action.ee.y": y,
-                    "action.ee.z": z,
-                    "action.ee.wx": wx,
-                    "action.ee.wy": wy,
-                    "action.ee.wz": wz,
-                }
-            )
-            transition[TransitionKey.ACTION] = act
             return transition
 
         # Get joint positions from complimentary data
@@ -314,10 +294,10 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         return transition
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
-        features["observation.state.gripper.pos"] = float
-        features["action.gripper.pos"] = float
+        features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         for name in self.motor_names:
-            features[f"action.{name}.pos"] = float
+            features[f"action.{name}.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
 
         return features
 
@@ -385,13 +365,13 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         new_act.pop("action.gripper", None)
         transition[TransitionKey.ACTION] = new_act
 
-        obs.update({"observation.state.gripper.pos": curr_pos})
+        obs["observation.state.gripper.pos"] = curr_pos
         transition[TransitionKey.OBSERVATION] = obs
         return transition
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
         features.pop("action.gripper", None)
-        features["action.gripper.pos"] = float
+        features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
 
 
@@ -424,22 +404,18 @@ def observation(self, obs: dict) -> dict:
         pos = t[:3, 3]
         tw = Rotation.from_matrix(t[:3, :3]).as_rotvec()
 
-        obs.update(
-            {
-                "observation.state.ee.x": float(pos[0]),
-                "observation.state.ee.y": float(pos[1]),
-                "observation.state.ee.z": float(pos[2]),
-                "observation.state.ee.wx": float(tw[0]),
-                "observation.state.ee.wy": float(tw[1]),
-                "observation.state.ee.wz": float(tw[2]),
-            }
-        )
+        obs["observation.state.ee.x"] = float(pos[0])
+        obs["observation.state.ee.y"] = float(pos[1])
+        obs["observation.state.ee.z"] = float(pos[2])
+        obs["observation.state.ee.wx"] = float(tw[0])
+        obs["observation.state.ee.wy"] = float(tw[1])
+        obs["observation.state.ee.wz"] = float(tw[2])
         return obs
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
         # We specify the dataset features of this step that we want to be stored in the dataset
         for k in ["x", "y", "z", "wx", "wy", "wz"]:
-            features[f"observation.state.ee.{k}"] = float
+            features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
 
 

diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py
@@ -16,7 +16,7 @@
 
 from dataclasses import dataclass, field
 
-from lerobot.configs.types import PolicyFeature
+from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry
 from lerobot.teleoperators.phone.config_phone import PhoneOS
 
@@ -69,18 +69,14 @@ def action(self, act: dict) -> dict:
             )  # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed
 
         # For some actions we need to invert the axis
-        act.update(
-            {
-                "action.enabled": enabled,
-                "action.target_x": -pos[1] if enabled else 0.0,
-                "action.target_y": pos[0] if enabled else 0.0,
-                "action.target_z": pos[2] if enabled else 0.0,
-                "action.target_wx": rotvec[1] if enabled else 0.0,
-                "action.target_wy": rotvec[0] if enabled else 0.0,
-                "action.target_wz": -rotvec[2] if enabled else 0.0,
-                "action.gripper": gripper,  # Still send gripper action when disabled
-            }
-        )
+        act["action.enabled"] = enabled
+        act["action.target_x"] = -pos[1] if enabled else 0.0
+        act["action.target_y"] = pos[0] if enabled else 0.0
+        act["action.target_z"] = pos[2] if enabled else 0.0
+        act["action.target_wx"] = rotvec[1] if enabled else 0.0
+        act["action.target_wy"] = rotvec[0] if enabled else 0.0
+        act["action.target_wz"] = -rotvec[2] if enabled else 0.0
+        act["action.gripper"] = gripper  # Still send gripper action when disabled
         return act
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
@@ -89,12 +85,12 @@ def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, Po
         features.pop("action.phone.rot", None)
         features.pop("action.phone.raw_inputs", None)
 
-        features["action.enabled"] = bool
-        features["action.target_x"] = float
-        features["action.target_y"] = float
-        features["action.target_z"] = float
-        features["action.target_wx"] = float
-        features["action.target_wy"] = float
-        features["action.target_wz"] = float
-        features["action.gripper"] = float
+        features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
diff --git a/src/lerobot/teleoperators/phone/phone.py → ...robot/teleoperators/phone/teleop_phone.py b/src/lerobot/teleoperators/phone/phone.py → ...robot/teleoperators/phone/teleop_phone.py