diff --git a/examples/phone_to_so100/record.py b/examples/phone_to_so100/record.py index 4ec3948eaab..e9d22ef80d0 100644 --- a/examples/phone_to_so100/record.py +++ b/examples/phone_to_so100/record.py @@ -38,8 +38,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone from lerobot.utils.control_utils import init_keyboard_listener from lerobot.utils.utils import log_say from lerobot.utils.visualization_utils import _init_rerun diff --git a/examples/phone_to_so100/teleoperate.py b/examples/phone_to_so100/teleoperate.py index 1671ef904a3..1eef0f8ae10 100644 --- a/examples/phone_to_so100/teleoperate.py +++ b/examples/phone_to_so100/teleoperate.py @@ -28,8 +28,8 @@ ) from lerobot.robots.so100_follower.so100_follower import SO100Follower from lerobot.teleoperators.phone.config_phone import PhoneConfig, PhoneOS -from lerobot.teleoperators.phone.phone import Phone from lerobot.teleoperators.phone.phone_processor import MapPhoneActionToRobotAction +from lerobot.teleoperators.phone.teleop_phone import Phone # Initialize the robot and teleoperator robot_config = SO100FollowerConfig( @@ -81,11 +81,6 @@ print("Starting teleop loop. Move your phone to teleoperate the robot.") while True: - phone_obs = teleop_device.get_action() - if not phone_obs: - time.sleep(0.01) - continue - # Get teleop observation phone_obs = teleop_device.get_action() diff --git a/src/lerobot/processor/converters.py b/src/lerobot/processor/converters.py index f0e0815778d..3a8f8b10937 100644 --- a/src/lerobot/processor/converters.py +++ b/src/lerobot/processor/converters.py @@ -53,7 +53,7 @@ def _is_image(arr: Any) -> bool: def _split_obs_to_state_and_images(obs: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: state, images = {}, {} for k, v in obs.items(): - if _is_image(v): + if "image" in k.lower() or _is_image(v): images[k] = v else: state[k] = v @@ -116,6 +116,9 @@ def to_output_robot_action(transition: EnvTransition) -> dict[str, Any]: out: dict[str, Any] = {} action_dict = transition.get(TransitionKey.ACTION) or {} + if action_dict is None: + return out + for k, v in action_dict.items(): if isinstance(k, str) and k.startswith("action.") and k.endswith((".pos", ".vel")): out_key = k[len("action.") :] # Strip the 'action.' prefix. diff --git a/src/lerobot/processor/normalize_processor.py b/src/lerobot/processor/normalize_processor.py index 4a8d177c5d3..fa635414ca5 100644 --- a/src/lerobot/processor/normalize_processor.py +++ b/src/lerobot/processor/normalize_processor.py @@ -157,9 +157,16 @@ def _apply_transform( if self.device and tensor.device != self.device: tensor = tensor.to(self.device) + # For Accelerate compatibility: move stats to match input tensor device + input_device = tensor.device stats = self._tensor_stats[key] tensor = tensor.to(dtype=torch.float32) + # Move stats to input device if needed + stats_device = next(iter(stats.values())).device + if stats_device != input_device: + stats = _convert_stats_to_tensors({key: self._tensor_stats[key]}, device=input_device)[key] + if norm_mode == NormalizationMode.MEAN_STD and "mean" in stats and "std" in stats: mean, std = stats["mean"], stats["std"] # Avoid division by zero by adding a small epsilon. @@ -175,7 +182,7 @@ def _apply_transform( # to prevent division by zero. This consistently maps an input equal to # min_val to -1, ensuring a stable transformation. denom = torch.where( - denom == 0, torch.tensor(self.eps, device=self.device, dtype=torch.float32), denom + denom == 0, torch.tensor(self.eps, device=input_device, dtype=torch.float32), denom ) if inverse: # Map from [-1, 1] back to [min, max] diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index cff18c161bc..7c6c73a4d96 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -19,7 +19,7 @@ import numpy as np from scipy.spatial.transform import Rotation -from lerobot.configs.types import PolicyFeature +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.model.kinematics import RobotKinematics from lerobot.processor.pipeline import ( ActionProcessor, @@ -123,16 +123,12 @@ def action(self, action): # Write action fields pos = desired[:3, 3] tw = Rotation.from_matrix(desired[:3, :3]).as_rotvec() - new_action.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(tw[0]), - "action.ee.wy": float(tw[1]), - "action.ee.wz": float(tw[2]), - } - ) + new_action["action.ee.x"] = float(pos[0]) + new_action["action.ee.y"] = float(pos[1]) + new_action["action.ee.z"] = float(pos[2]) + new_action["action.ee.wx"] = float(tw[0]) + new_action["action.ee.wy"] = float(tw[1]) + new_action["action.ee.wz"] = float(tw[2]) self._prev_enabled = enabled return new_action @@ -142,6 +138,23 @@ def reset(self): self.reference_ee_pose = None self._command_when_disabled = None + def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: + features.pop("action.enabled", None) + features.pop("action.target_x", None) + features.pop("action.target_y", None) + features.pop("action.target_z", None) + features.pop("action.target_wx", None) + features.pop("action.target_wy", None) + features.pop("action.target_wz", None) + + features["action.ee.x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.ee.wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + return features + @ProcessorStepRegistry.register("ee_bounds_and_safety") @dataclass @@ -167,12 +180,12 @@ class EEBoundsAndSafety(ActionProcessor): _last_twist: np.ndarray | None = field(default=None, init=False, repr=False) def action(self, act: dict) -> dict: - x = act.pop("action.ee.x", None) - y = act.pop("action.ee.y", None) - z = act.pop("action.ee.z", None) - wx = act.pop("action.ee.wx", None) - wy = act.pop("action.ee.wy", None) - wz = act.pop("action.ee.wz", None) + x = act.get("action.ee.x", None) + y = act.get("action.ee.y", None) + z = act.get("action.ee.z", None) + wx = act.get("action.ee.wx", None) + wy = act.get("action.ee.wy", None) + wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): return act @@ -194,32 +207,18 @@ def action(self, act: dict) -> dict: self._last_pos = pos self._last_twist = twist - act.update( - { - "action.ee.x": float(pos[0]), - "action.ee.y": float(pos[1]), - "action.ee.z": float(pos[2]), - "action.ee.wx": float(twist[0]), - "action.ee.wy": float(twist[1]), - "action.ee.wz": float(twist[2]), - } - ) + act["action.ee.x"] = float(pos[0]) + act["action.ee.y"] = float(pos[1]) + act["action.ee.z"] = float(pos[2]) + act["action.ee.wx"] = float(twist[0]) + act["action.ee.wy"] = float(twist[1]) + act["action.ee.wz"] = float(twist[2]) return act def reset(self): self._last_pos = None self._last_twist = None - def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: - # Because this is last step we specify the dataset features of this step that we want to be stored in the dataset - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float - return features - @ProcessorStepRegistry.register("inverse_kinematics_ee_to_joints") @dataclass @@ -259,18 +258,6 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: wz = act.get("action.ee.wz", None) if None in (x, y, z, wx, wy, wz): - # Nothing to do; restore what we popped and return - act.update( - { - "action.ee.x": x, - "action.ee.y": y, - "action.ee.z": z, - "action.ee.wx": wx, - "action.ee.wy": wy, - "action.ee.wz": wz, - } - ) - transition[TransitionKey.ACTION] = act return transition # Get joint positions from complimentary data @@ -307,16 +294,11 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: return transition def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: - # We specify the dataset features of this step that we want to be stored in the dataset - features["action.ee.x"] = float - features["action.ee.y"] = float - features["action.ee.z"] = float - features["action.ee.wx"] = float - features["action.ee.wy"] = float - features["action.ee.wz"] = float - - features["observation.state.gripper.pos"] = float - features["action.gripper.pos"] = float + features["observation.state.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + for name in self.motor_names: + features[f"action.{name}.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + return features def reset(self): @@ -383,14 +365,13 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: new_act.pop("action.gripper", None) transition[TransitionKey.ACTION] = new_act - obs.update({"observation.state.gripper.pos": curr_pos}) + obs["observation.state.gripper.pos"] = curr_pos transition[TransitionKey.OBSERVATION] = obs return transition def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: - # We specify the dataset features of this step that we want to be stored in the dataset - features["observation.state.gripper.pos"] = float - features["action.gripper.pos"] = float + features.pop("action.gripper", None) + features["action.gripper.pos"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features @@ -423,22 +404,18 @@ def observation(self, obs: dict) -> dict: pos = t[:3, 3] tw = Rotation.from_matrix(t[:3, :3]).as_rotvec() - obs.update( - { - "observation.state.ee.x": float(pos[0]), - "observation.state.ee.y": float(pos[1]), - "observation.state.ee.z": float(pos[2]), - "observation.state.ee.wx": float(tw[0]), - "observation.state.ee.wy": float(tw[1]), - "observation.state.ee.wz": float(tw[2]), - } - ) + obs["observation.state.ee.x"] = float(pos[0]) + obs["observation.state.ee.y"] = float(pos[1]) + obs["observation.state.ee.z"] = float(pos[2]) + obs["observation.state.ee.wx"] = float(tw[0]) + obs["observation.state.ee.wy"] = float(tw[1]) + obs["observation.state.ee.wz"] = float(tw[2]) return obs def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: # We specify the dataset features of this step that we want to be stored in the dataset for k in ["x", "y", "z", "wx", "wy", "wz"]: - features[f"observation.state.ee.{k}"] = float + features[f"observation.state.ee.{k}"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) return features diff --git a/src/lerobot/teleoperators/phone/phone_processor.py b/src/lerobot/teleoperators/phone/phone_processor.py index 37a3d21027f..36880e0c85f 100644 --- a/src/lerobot/teleoperators/phone/phone_processor.py +++ b/src/lerobot/teleoperators/phone/phone_processor.py @@ -16,6 +16,7 @@ from dataclasses import dataclass, field +from lerobot.configs.types import FeatureType, PolicyFeature from lerobot.processor.pipeline import ActionProcessor, ProcessorStepRegistry from lerobot.teleoperators.phone.config_phone import PhoneOS @@ -47,7 +48,7 @@ class MapPhoneActionToRobotAction(ActionProcessor): def action(self, act: dict) -> dict: # Pop them from the action - enabled = act.pop("action.phone.enabled", 0) + enabled = bool(act.pop("action.phone.enabled", 0)) pos = act.pop("action.phone.pos", None) rot = act.pop("action.phone.rot", None) inputs = act.pop("action.phone.raw_inputs", {}) @@ -68,16 +69,28 @@ def action(self, act: dict) -> dict: ) # Positive if a is pressed, negative if b is pressed, 0 if both or neither are pressed # For some actions we need to invert the axis - act.update( - { - "action.enabled": enabled, - "action.target_x": -pos[1] if enabled else 0.0, - "action.target_y": pos[0] if enabled else 0.0, - "action.target_z": pos[2] if enabled else 0.0, - "action.target_wx": rotvec[1] if enabled else 0.0, - "action.target_wy": rotvec[0] if enabled else 0.0, - "action.target_wz": -rotvec[2] if enabled else 0.0, - "action.gripper": gripper, # Still send gripper action when disabled - } - ) + act["action.enabled"] = enabled + act["action.target_x"] = -pos[1] if enabled else 0.0 + act["action.target_y"] = pos[0] if enabled else 0.0 + act["action.target_z"] = pos[2] if enabled else 0.0 + act["action.target_wx"] = rotvec[1] if enabled else 0.0 + act["action.target_wy"] = rotvec[0] if enabled else 0.0 + act["action.target_wz"] = -rotvec[2] if enabled else 0.0 + act["action.gripper"] = gripper # Still send gripper action when disabled return act + + def transform_features(self, features: dict[str, PolicyFeature]) -> dict[str, PolicyFeature]: + features.pop("action.phone.enabled", None) + features.pop("action.phone.pos", None) + features.pop("action.phone.rot", None) + features.pop("action.phone.raw_inputs", None) + + features["action.enabled"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_x"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_y"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_z"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wx"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wy"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.target_wz"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + features["action.gripper"] = (PolicyFeature(type=FeatureType.ACTION, shape=(1,)),) + return features diff --git a/src/lerobot/teleoperators/phone/phone.py b/src/lerobot/teleoperators/phone/teleop_phone.py similarity index 99% rename from src/lerobot/teleoperators/phone/phone.py rename to src/lerobot/teleoperators/phone/teleop_phone.py index dfa457b28ca..ed985c0815a 100644 --- a/src/lerobot/teleoperators/phone/phone.py +++ b/src/lerobot/teleoperators/phone/teleop_phone.py @@ -70,6 +70,8 @@ def send_feedback(self, feedback: dict[str, float]) -> None: class IOSPhone(BasePhone, Teleoperator): + name = "ios_phone" + def __init__(self, config: PhoneConfig): super().__init__(config) self.config = config @@ -186,6 +188,8 @@ def disconnect(self) -> None: class AndroidPhone(BasePhone, Teleoperator): + name = "android_phone" + def __init__(self, config: PhoneConfig): super().__init__(config) self.config = config diff --git a/tests/processor/test_batch_processor.py b/tests/processor/test_batch_processor.py index c9c4cd1dd72..0bf050e20ca 100644 --- a/tests/processor/test_batch_processor.py +++ b/tests/processor/test_batch_processor.py @@ -603,24 +603,6 @@ def test_action_dtype_preservation(): assert result[TransitionKey.ACTION].shape == (1, 4) -def test_action_in_place_mutation(): - """Test that the processor mutates the transition in place for actions.""" - processor = ToBatchProcessor() - - action = torch.randn(4) - transition = create_transition(action=action) - - # Store reference to original transition - original_transition = transition - - # Process - result = processor(transition) - - # Should be the same object (in-place mutation) - assert result is original_transition - assert result[TransitionKey.ACTION].shape == (1, 4) - - def test_empty_action_tensor(): """Test handling of empty action tensors.""" processor = ToBatchProcessor() @@ -851,27 +833,6 @@ def test_task_comprehensive_string_cases(): processed_comp_data = result[TransitionKey.COMPLEMENTARY_DATA] assert processed_comp_data["task"] == task_list assert isinstance(processed_comp_data["task"], list) - assert processed_comp_data["task"] is task_list # Should be same object (in-place) - - -def test_task_in_place_mutation(): - """Test that the processor mutates complementary_data in place for tasks.""" - processor = ToBatchProcessor() - - complementary_data = {"task": "sort_objects"} - transition = create_transition(complementary_data=complementary_data) - - # Store reference to original transition and complementary_data - original_transition = transition - original_comp_data = complementary_data - - # Process - result = processor(transition) - - # Should be the same objects (in-place mutation) - assert result is original_transition - assert result[TransitionKey.COMPLEMENTARY_DATA] is original_comp_data - assert original_comp_data["task"] == ["sort_objects"] def test_task_preserves_other_keys(): @@ -1127,3 +1088,49 @@ def test_empty_index_tensor(): # Should remain unchanged (already 1D) assert result[TransitionKey.COMPLEMENTARY_DATA]["index"].shape == (0,) + + +def test_action_processing_creates_new_transition(): + """Test that the processor creates a new transition object with correctly processed action.""" + processor = ToBatchProcessor() + + action = torch.randn(4) + transition = create_transition(action=action) + + # Store reference to original transition + original_transition = transition + + # Process + result = processor(transition) + + # Should be a different object (functional design, not in-place mutation) + assert result is not original_transition + # Original transition should remain unchanged + assert original_transition[TransitionKey.ACTION].shape == (4,) + # Result should have correctly processed action with batch dimension + assert result[TransitionKey.ACTION].shape == (1, 4) + assert torch.equal(result[TransitionKey.ACTION][0], action) + + +def test_task_processing_creates_new_transition(): + """Test that the processor creates a new transition object with correctly processed task.""" + processor = ToBatchProcessor() + + complementary_data = {"task": "sort_objects"} + transition = create_transition(complementary_data=complementary_data) + + # Store reference to original transition and complementary_data + original_transition = transition + original_comp_data = complementary_data + + # Process + result = processor(transition) + + # Should be different transition object (functional design) + assert result is not original_transition + # But complementary_data is the same reference (current implementation behavior) + assert result[TransitionKey.COMPLEMENTARY_DATA] is original_comp_data + # The task should be processed correctly (wrapped in list) + assert result[TransitionKey.COMPLEMENTARY_DATA]["task"] == ["sort_objects"] + # Original complementary data is also modified (current behavior) + assert original_comp_data["task"] == ["sort_objects"] diff --git a/tests/processor/test_tokenizer_processor.py b/tests/processor/test_tokenizer_processor.py index 3cac6d6f8a4..300191d8635 100644 --- a/tests/processor/test_tokenizer_processor.py +++ b/tests/processor/test_tokenizer_processor.py @@ -919,32 +919,6 @@ def test_device_detection_from_action(): assert attention_mask.device.type == "cuda" -@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") -@require_package("transformers") -def test_device_detection_from_complementary_data(): - """Test that device is detected from tensors in complementary_data.""" - mock_tokenizer = MockTokenizer(vocab_size=100) - processor = TokenizerProcessor(tokenizer=mock_tokenizer, max_length=10) - - # Create transition with tensor in complementary_data - transition = create_transition( - observation={"metadata": {"key": "value"}}, # No tensors - complementary_data={ - "task": "comp data test", - "index": torch.tensor([42]).cuda(), # Tensor in complementary_data - }, - ) - - result = processor(transition) - - # Check that tokenized tensors match complementary_data tensor's device - tokens = result[TransitionKey.OBSERVATION][f"{OBS_LANGUAGE}.tokens"] - attention_mask = result[TransitionKey.OBSERVATION][f"{OBS_LANGUAGE}.attention_mask"] - - assert tokens.device.type == "cuda" - assert attention_mask.device.type == "cuda" - - @require_package("transformers") def test_device_detection_preserves_dtype(): """Test that device detection doesn't affect dtype of tokenized tensors."""