huggingface · imstevenpmwork · Aug 31, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py
@@ -38,8 +38,8 @@
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
-from lerobot.teleoperators.phone.phone import Phone
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
+from lerobot.teleoperators.phone.teleop_phone import Phone
 from lerobot.utils.control_utils import init_keyboard_listener
 from lerobot.utils.utils import log_say
 from lerobot.utils.visualization_utils import _init_rerun

diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py
@@ -28,8 +28,8 @@
 )
 from lerobot.robots.so100_follower.so100_follower import SO100Follower
 from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
-from lerobot.teleoperators.phone.phone import Phone
 from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
+from lerobot.teleoperators.phone.teleop_phone import Phone
 
 # Initialize the robot and teleoperator
 robot_config = SO100FollowerConfig(
@@ -81,11 +81,6 @@
 
 print("Starting teleop loop. Move your phone to teleoperate the robot.")
 while True:
-    phone_obs = teleop_device.get_action()
-    if not phone_obs:
-        time.sleep(0.01)
-        continue
-
     # Get teleop observation
     phone_obs = teleop_device.get_action()
 

diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py
@@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool:
 def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
     state, images = {}, {}
     for k, v in obs.items():
-        if _is_image(v):
+        if "image" in k.lower() or _is_image(v):
             images[k] = v
         else:
             state[k] = v
@@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]:
     out: dict[str, Any] = {}
     action_dict = transition.get(TransitionKey.ACTION) or {}
 
+    if action_dict is None:
+        return out
+
     for k, v in action_dict.items():
         if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")):
             out_key = k[len("action.") :]  # Strip the 'action.' prefix.

diff --git a/src/lerobot/processor/normalize_processor.py b/src/lerobot/processor/normalize_processor.py
@@ -157,9 +157,16 @@ def _apply_transform(
         if self.device and tensor.device != self.device:
             tensor = tensor.to(self.device)
 
+        # For Accelerate compatibility: move stats to match input tensor device
+        input_device = tensor.device
         stats = self._tensor_stats[key]
         tensor = tensor.to(dtype=torch.float32)
 
+        # Move stats to input device if needed
+        stats_device = next(iter(stats.values())).device
+        if stats_device != input_device:
+            stats = _convert_stats_to_tensors({key: self._tensor_stats[key]}, device=input_device)[key]
+
         if norm_mode == NormalizationMode.MEAN_STD and "mean" in stats and "std" in stats:
             mean, std = stats["mean"], stats["std"]
             # Avoid division by zero by adding a small epsilon.
@@ -175,7 +182,7 @@ def _apply_transform(
             # to prevent division by zero. This consistently maps an input equal to
             # min_val to -1, ensuring a stable transformation.
             denom = torch.where(
-                denom == 0, torch.tensor(self.eps, device=self.device, dtype=torch.float32), denom
+                denom == 0, torch.tensor(self.eps, device=input_device, dtype=torch.float32), denom
             )
             if inverse:
                 # Map from [-1, 1] back to [min, max]

diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py
@@ -19,7 +19,7 @@
 import numpy as np
 from scipy.spatial.transform import Rotation
 
-from lerobot.configs.types import PolicyFeature
+from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.model.kinematics import RobotKinematics
 from lerobot.processor.pipeline import (
     ActionProcessor,
@@ -123,16 +123,12 @@ def action(self, action):
         # Write action fields
         pos = desired[:3, 3]
         tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
-        new_action.update(
-            {
-                "action.ee.x": float(pos[0]),
-                "action.ee.y": float(pos[1]),
-                "action.ee.z": float(pos[2]),
-                "action.ee.wx": float(tw[0]),
-                "action.ee.wy": float(tw[1]),
-                "action.ee.wz": float(tw[2]),
-            }
-        )
+        new_action["action.ee.x"] = float(pos[0])
+        new_action["action.ee.y"] = float(pos[1])
+        new_action["action.ee.z"] = float(pos[2])
+        new_action["action.ee.wx"] = float(tw[0])
+        new_action["action.ee.wy"] = float(tw[1])
+        new_action["action.ee.wz"] = float(tw[2])
 
         self._prev_enabled = enabled
         return new_action
@@ -142,6 +138,23 @@ def reset(self):
         self.reference_ee_pose = None
         self._command_when_disabled = None
 
+    def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
+        features.pop("action.enabled", None)
+        features.pop("action.target_x", None)
+        features.pop("action.target_y", None)
+        features.pop("action.target_z", None)
+        features.pop("action.target_wx", None)
+        features.pop("action.target_wy", None)
+        features.pop("action.target_wz", None)
+
+        features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        return features
+
 
 @ProcessorStepRegistry.register("ee_bounds_and_safety")
 @dataclass
@@ -167,12 +180,12 @@ class EEBoundsAndSafety(ActionProcessor):
     _last_twist: np.ndarray | None = field(default=None, init=False, repr=False)
 
     def action(self, act: dict) -> dict:
-        x = act.pop("action.ee.x", None)
-        y = act.pop("action.ee.y", None)
-        z = act.pop("action.ee.z", None)
-        wx = act.pop("action.ee.wx", None)
-        wy = act.pop("action.ee.wy", None)
-        wz = act.pop("action.ee.wz", None)
+        x = act.get("action.ee.x", None)
+        y = act.get("action.ee.y", None)
+        z = act.get("action.ee.z", None)
+        wx = act.get("action.ee.wx", None)
+        wy = act.get("action.ee.wy", None)
+        wz = act.get("action.ee.wz", None)
 
         if None in (x, y, z, wx, wy, wz):
             return act
@@ -194,32 +207,18 @@ def action(self, act: dict) -> dict:
         self._last_pos = pos
         self._last_twist = twist
 
-        act.update(
-            {
-                "action.ee.x": float(pos[0]),
-                "action.ee.y": float(pos[1]),
-                "action.ee.z": float(pos[2]),
-                "action.ee.wx": float(twist[0]),
-                "action.ee.wy": float(twist[1]),
-                "action.ee.wz": float(twist[2]),
-            }
-        )
+        act["action.ee.x"] = float(pos[0])
+        act["action.ee.y"] = float(pos[1])
+        act["action.ee.z"] = float(pos[2])
+        act["action.ee.wx"] = float(twist[0])
+        act["action.ee.wy"] = float(twist[1])
+        act["action.ee.wz"] = float(twist[2])
         return act
 
     def reset(self):
         self._last_pos = None
         self._last_twist = None
 
-    def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
-        # Because this is last step we specify the dataset features of this step that we want to be stored in the dataset
-        features["action.ee.x"] = float
-        features["action.ee.y"] = float
-        features["action.ee.z"] = float
-        features["action.ee.wx"] = float
-        features["action.ee.wy"] = float
-        features["action.ee.wz"] = float
-        return features
-
 
 @ProcessorStepRegistry.register("inverse_kinematics_ee_to_joints")
 @dataclass
@@ -259,18 +258,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         wz = act.get("action.ee.wz", None)
 
         if None in (x, y, z, wx, wy, wz):
-            # Nothing to do; restore what we popped and return
-            act.update(
-                {
-                    "action.ee.x": x,
-                    "action.ee.y": y,
-                    "action.ee.z": z,
-                    "action.ee.wx": wx,
-                    "action.ee.wy": wy,
-                    "action.ee.wz": wz,
-                }
-            )
-            transition[TransitionKey.ACTION] = act
             return transition
 
         # Get joint positions from complimentary data
@@ -307,16 +294,11 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         return transition
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
-        # We specify the dataset features of this step that we want to be stored in the dataset
-        features["action.ee.x"] = float
-        features["action.ee.y"] = float
-        features["action.ee.z"] = float
-        features["action.ee.wx"] = float
-        features["action.ee.wy"] = float
-        features["action.ee.wz"] = float
-
-        features["observation.state.gripper.pos"] = float
-        features["action.gripper.pos"] = float
+        features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        for name in self.motor_names:
+            features[f"action.{name}.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+
         return features
 
     def reset(self):
@@ -383,14 +365,13 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         new_act.pop("action.gripper", None)
         transition[TransitionKey.ACTION] = new_act
 
-        obs.update({"observation.state.gripper.pos": curr_pos})
+        obs["observation.state.gripper.pos"] = curr_pos
         transition[TransitionKey.OBSERVATION] = obs
         return transition
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
-        # We specify the dataset features of this step that we want to be stored in the dataset
-        features["observation.state.gripper.pos"] = float
-        features["action.gripper.pos"] = float
+        features.pop("action.gripper", None)
+        features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
 
 
@@ -423,22 +404,18 @@ def observation(self, obs: dict) -> dict:
         pos = t[:3, 3]
         tw = Rotation.from_matrix(t[:3, :3]).as_rotvec()
 
-        obs.update(
-            {
-                "observation.state.ee.x": float(pos[0]),
-                "observation.state.ee.y": float(pos[1]),
-                "observation.state.ee.z": float(pos[2]),
-                "observation.state.ee.wx": float(tw[0]),
-                "observation.state.ee.wy": float(tw[1]),
-                "observation.state.ee.wz": float(tw[2]),
-            }
-        )
+        obs["observation.state.ee.x"] = float(pos[0])
+        obs["observation.state.ee.y"] = float(pos[1])
+        obs["observation.state.ee.z"] = float(pos[2])
+        obs["observation.state.ee.wx"] = float(tw[0])
+        obs["observation.state.ee.wy"] = float(tw[1])
+        obs["observation.state.ee.wz"] = float(tw[2])
         return obs
 
     def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
         # We specify the dataset features of this step that we want to be stored in the dataset
         for k in ["x", "y", "z", "wx", "wy", "wz"]:
-            features[f"observation.state.ee.{k}"] = float
+            features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
         return features
 
 

diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py
@@ -16,6 +16,7 @@
 
 from dataclasses import dataclass, field
 
+from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry
 from lerobot.teleoperators.phone.config_phone import PhoneOS
 
@@ -47,7 +48,7 @@ class MapPhoneActionToRobotAction(ActionProcessor):
 
     def action(self, act: dict) -> dict:
         # Pop them from the action
-        enabled = act.pop("action.phone.enabled", 0)
+        enabled = bool(act.pop("action.phone.enabled", 0))
         pos = act.pop("action.phone.pos", None)
         rot = act.pop("action.phone.rot", None)
         inputs = act.pop("action.phone.raw_inputs", {})
@@ -68,16 +69,28 @@ def action(self, act: dict) -> dict:
             )  # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed
 
         # For some actions we need to invert the axis
-        act.update(
-            {
-                "action.enabled": enabled,
-                "action.target_x": -pos[1] if enabled else 0.0,
-                "action.target_y": pos[0] if enabled else 0.0,
-                "action.target_z": pos[2] if enabled else 0.0,
-                "action.target_wx": rotvec[1] if enabled else 0.0,
-                "action.target_wy": rotvec[0] if enabled else 0.0,
-                "action.target_wz": -rotvec[2] if enabled else 0.0,
-                "action.gripper": gripper,  # Still send gripper action when disabled
-            }
-        )
+        act["action.enabled"] = enabled
+        act["action.target_x"] = -pos[1] if enabled else 0.0
+        act["action.target_y"] = pos[0] if enabled else 0.0
+        act["action.target_z"] = pos[2] if enabled else 0.0
+        act["action.target_wx"] = rotvec[1] if enabled else 0.0
+        act["action.target_wy"] = rotvec[0] if enabled else 0.0
+        act["action.target_wz"] = -rotvec[2] if enabled else 0.0
+        act["action.gripper"] = gripper  # Still send gripper action when disabled
         return act
+
+    def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
+        features.pop("action.phone.enabled", None)
+        features.pop("action.phone.pos", None)
+        features.pop("action.phone.rot", None)
+        features.pop("action.phone.raw_inputs", None)
+
+        features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
+        return features
diff --git a/src/lerobot/teleoperators/phone/phone.py → ...robot/teleoperators/phone/teleop_phone.py b/src/lerobot/teleoperators/phone/phone.py → ...robot/teleoperators/phone/teleop_phone.py
@@ -70,6 +70,8 @@ def send_feedback(self, feedback: dict[str, float]) -> None:
 
 
 class IOSPhone(BasePhone, Teleoperator):
+    name = "ios_phone"
+
     def __init__(self, config: PhoneConfig):
         super().__init__(config)
         self.config = config
@@ -186,6 +188,8 @@ def disconnect(self) -> None:
 
 
 class AndroidPhone(BasePhone, Teleoperator):
+    name = "android_phone"
+
     def __init__(self, config: PhoneConfig):
         super().__init__(config)
         self.config = config