Skip to content
2 changes: 1 addition & 1 deletion examples/phone_to_so100/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
)
from lerobot.robots.so100_follower.so100_follower import SO100Follower
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone import Phone
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone
from lerobot.utils.control_utils import init_keyboard_listener
from lerobot.utils.utils import log_say
from lerobot.utils.visualization_utils import _init_rerun
Expand Down
7 changes: 1 addition & 6 deletions examples/phone_to_so100/teleoperate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
)
from lerobot.robots.so100_follower.so100_follower import SO100Follower
from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS
from lerobot.teleoperators.phone.phone import Phone
from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction
from lerobot.teleoperators.phone.teleop_phone import Phone

# Initialize the robot and teleoperator
robot_config = SO100FollowerConfig(
Expand Down Expand Up @@ -81,11 +81,6 @@

print("Starting teleop loop. Move your phone to teleoperate the robot.")
while True:
phone_obs = teleop_device.get_action()
if not phone_obs:
time.sleep(0.01)
continue

# Get teleop observation
phone_obs = teleop_device.get_action()

Expand Down
5 changes: 4 additions & 1 deletion src/lerobot/processor/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool:
def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
state, images = {}, {}
for k, v in obs.items():
if _is_image(v):
if "image" in k.lower() or _is_image(v):
images[k] = v
else:
state[k] = v
Expand Down Expand Up @@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]:
out: dict[str, Any] = {}
action_dict = transition.get(TransitionKey.ACTION) or {}

if action_dict is None:
return out

for k, v in action_dict.items():
if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")):
out_key = k[len("action.") :] # Strip the 'action.' prefix.
Expand Down
9 changes: 8 additions & 1 deletion src/lerobot/processor/normalize_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,16 @@ def _apply_transform(
if self.device and tensor.device != self.device:
tensor = tensor.to(self.device)

# For Accelerate compatibility: move stats to match input tensor device
input_device = tensor.device
stats = self._tensor_stats[key]
tensor = tensor.to(dtype=torch.float32)

# Move stats to input device if needed
stats_device = next(iter(stats.values())).device
if stats_device != input_device:
stats = _convert_stats_to_tensors({key: self._tensor_stats[key]}, device=input_device)[key]

if norm_mode == NormalizationMode.MEAN_STD and "mean" in stats and "std" in stats:
mean, std = stats["mean"], stats["std"]
# Avoid division by zero by adding a small epsilon.
Expand All @@ -175,7 +182,7 @@ def _apply_transform(
# to prevent division by zero. This consistently maps an input equal to
# min_val to -1, ensuring a stable transformation.
denom = torch.where(
denom == 0, torch.tensor(self.eps, device=self.device, dtype=torch.float32), denom
denom == 0, torch.tensor(self.eps, device=input_device, dtype=torch.float32), denom
)
if inverse:
# Map from [-1, 1] back to [min, max]
Expand Down
125 changes: 51 additions & 74 deletions src/lerobot/robots/so100_follower/robot_kinematic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
from scipy.spatial.transform import Rotation

from lerobot.configs.types import PolicyFeature
from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.model.kinematics import RobotKinematics
from lerobot.processor.pipeline import (
ActionProcessor,
Expand Down Expand Up @@ -123,16 +123,12 @@ def action(self, action):
# Write action fields
pos = desired[:3, 3]
tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec()
new_action.update(
{
"action.ee.x": float(pos[0]),
"action.ee.y": float(pos[1]),
"action.ee.z": float(pos[2]),
"action.ee.wx": float(tw[0]),
"action.ee.wy": float(tw[1]),
"action.ee.wz": float(tw[2]),
}
)
new_action["action.ee.x"] = float(pos[0])
new_action["action.ee.y"] = float(pos[1])
new_action["action.ee.z"] = float(pos[2])
new_action["action.ee.wx"] = float(tw[0])
new_action["action.ee.wy"] = float(tw[1])
new_action["action.ee.wz"] = float(tw[2])

self._prev_enabled = enabled
return new_action
Expand All @@ -142,6 +138,23 @@ def reset(self):
self.reference_ee_pose = None
self._command_when_disabled = None

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
features.pop("action.enabled", None)
features.pop("action.target_x", None)
features.pop("action.target_y", None)
features.pop("action.target_z", None)
features.pop("action.target_wx", None)
features.pop("action.target_wy", None)
features.pop("action.target_wz", None)

features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
return features


@ProcessorStepRegistry.register("ee_bounds_and_safety")
@dataclass
Expand All @@ -167,12 +180,12 @@ class EEBoundsAndSafety(ActionProcessor):
_last_twist: np.ndarray | None = field(default=None, init=False, repr=False)

def action(self, act: dict) -> dict:
x = act.pop("action.ee.x", None)
y = act.pop("action.ee.y", None)
z = act.pop("action.ee.z", None)
wx = act.pop("action.ee.wx", None)
wy = act.pop("action.ee.wy", None)
wz = act.pop("action.ee.wz", None)
x = act.get("action.ee.x", None)
y = act.get("action.ee.y", None)
z = act.get("action.ee.z", None)
wx = act.get("action.ee.wx", None)
wy = act.get("action.ee.wy", None)
wz = act.get("action.ee.wz", None)

if None in (x, y, z, wx, wy, wz):
return act
Expand All @@ -194,32 +207,18 @@ def action(self, act: dict) -> dict:
self._last_pos = pos
self._last_twist = twist

act.update(
{
"action.ee.x": float(pos[0]),
"action.ee.y": float(pos[1]),
"action.ee.z": float(pos[2]),
"action.ee.wx": float(twist[0]),
"action.ee.wy": float(twist[1]),
"action.ee.wz": float(twist[2]),
}
)
act["action.ee.x"] = float(pos[0])
act["action.ee.y"] = float(pos[1])
act["action.ee.z"] = float(pos[2])
act["action.ee.wx"] = float(twist[0])
act["action.ee.wy"] = float(twist[1])
act["action.ee.wz"] = float(twist[2])
return act

def reset(self):
self._last_pos = None
self._last_twist = None

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
# Because this is last step we specify the dataset features of this step that we want to be stored in the dataset
features["action.ee.x"] = float
features["action.ee.y"] = float
features["action.ee.z"] = float
features["action.ee.wx"] = float
features["action.ee.wy"] = float
features["action.ee.wz"] = float
return features


@ProcessorStepRegistry.register("inverse_kinematics_ee_to_joints")
@dataclass
Expand Down Expand Up @@ -259,18 +258,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
wz = act.get("action.ee.wz", None)

if None in (x, y, z, wx, wy, wz):
# Nothing to do; restore what we popped and return
act.update(
{
"action.ee.x": x,
"action.ee.y": y,
"action.ee.z": z,
"action.ee.wx": wx,
"action.ee.wy": wy,
"action.ee.wz": wz,
}
)
transition[TransitionKey.ACTION] = act
return transition

# Get joint positions from complimentary data
Expand Down Expand Up @@ -307,16 +294,11 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
return transition

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
# We specify the dataset features of this step that we want to be stored in the dataset
features["action.ee.x"] = float
features["action.ee.y"] = float
features["action.ee.z"] = float
features["action.ee.wx"] = float
features["action.ee.wy"] = float
features["action.ee.wz"] = float

features["observation.state.gripper.pos"] = float
features["action.gripper.pos"] = float
features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
for name in self.motor_names:
features[f"action.{name}.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)

return features

def reset(self):
Expand Down Expand Up @@ -383,14 +365,13 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
new_act.pop("action.gripper", None)
transition[TransitionKey.ACTION] = new_act

obs.update({"observation.state.gripper.pos": curr_pos})
obs["observation.state.gripper.pos"] = curr_pos
transition[TransitionKey.OBSERVATION] = obs
return transition

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
# We specify the dataset features of this step that we want to be stored in the dataset
features["observation.state.gripper.pos"] = float
features["action.gripper.pos"] = float
features.pop("action.gripper", None)
features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
return features


Expand Down Expand Up @@ -423,22 +404,18 @@ def observation(self, obs: dict) -> dict:
pos = t[:3, 3]
tw = Rotation.from_matrix(t[:3, :3]).as_rotvec()

obs.update(
{
"observation.state.ee.x": float(pos[0]),
"observation.state.ee.y": float(pos[1]),
"observation.state.ee.z": float(pos[2]),
"observation.state.ee.wx": float(tw[0]),
"observation.state.ee.wy": float(tw[1]),
"observation.state.ee.wz": float(tw[2]),
}
)
obs["observation.state.ee.x"] = float(pos[0])
obs["observation.state.ee.y"] = float(pos[1])
obs["observation.state.ee.z"] = float(pos[2])
obs["observation.state.ee.wx"] = float(tw[0])
obs["observation.state.ee.wy"] = float(tw[1])
obs["observation.state.ee.wz"] = float(tw[2])
return obs

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
# We specify the dataset features of this step that we want to be stored in the dataset
for k in ["x", "y", "z", "wx", "wy", "wz"]:
features[f"observation.state.ee.{k}"] = float
features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
return features


Expand Down
39 changes: 26 additions & 13 deletions src/lerobot/teleoperators/phone/phone_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from dataclasses import dataclass, field

from lerobot.configs.types import FeatureType, PolicyFeature
from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry
from lerobot.teleoperators.phone.config_phone import PhoneOS

Expand Down Expand Up @@ -47,7 +48,7 @@ class MapPhoneActionToRobotAction(ActionProcessor):

def action(self, act: dict) -> dict:
# Pop them from the action
enabled = act.pop("action.phone.enabled", 0)
enabled = bool(act.pop("action.phone.enabled", 0))
pos = act.pop("action.phone.pos", None)
rot = act.pop("action.phone.rot", None)
inputs = act.pop("action.phone.raw_inputs", {})
Expand All @@ -68,16 +69,28 @@ def action(self, act: dict) -> dict:
) # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed

# For some actions we need to invert the axis
act.update(
{
"action.enabled": enabled,
"action.target_x": -pos[1] if enabled else 0.0,
"action.target_y": pos[0] if enabled else 0.0,
"action.target_z": pos[2] if enabled else 0.0,
"action.target_wx": rotvec[1] if enabled else 0.0,
"action.target_wy": rotvec[0] if enabled else 0.0,
"action.target_wz": -rotvec[2] if enabled else 0.0,
"action.gripper": gripper, # Still send gripper action when disabled
}
)
act["action.enabled"] = enabled
act["action.target_x"] = -pos[1] if enabled else 0.0
act["action.target_y"] = pos[0] if enabled else 0.0
act["action.target_z"] = pos[2] if enabled else 0.0
act["action.target_wx"] = rotvec[1] if enabled else 0.0
act["action.target_wy"] = rotvec[0] if enabled else 0.0
act["action.target_wz"] = -rotvec[2] if enabled else 0.0
act["action.gripper"] = gripper # Still send gripper action when disabled
return act

def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]:
features.pop("action.phone.enabled", None)
features.pop("action.phone.pos", None)
features.pop("action.phone.rot", None)
features.pop("action.phone.raw_inputs", None)

features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),)
return features
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def send_feedback(self, feedback: dict[str, float]) -> None:


class IOSPhone(BasePhone, Teleoperator):
name = "ios_phone"

def __init__(self, config: PhoneConfig):
super().__init__(config)
self.config = config
Expand Down Expand Up @@ -186,6 +188,8 @@ def disconnect(self) -> None:


class AndroidPhone(BasePhone, Teleoperator):
name = "android_phone"

def __init__(self, config: PhoneConfig):
super().__init__(config)
self.config = config
Expand Down
Loading
Loading