diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py index 6df92d893b3..12dfa0b724f 100644 --- a/src/lerobot/scripts/lerobot_record.py +++ b/src/lerobot/scripts/lerobot_record.py @@ -182,6 +182,10 @@ class RecordConfig: policy: PreTrainedConfig | None = None # Display all cameras on screen display_data: bool = False + # Display data on a remote Rerun server + display_url: str = None + # Port of the remote Rerun server + display_port: int = 9876 # Use vocal synthesis to read events. play_sounds: bool = True # Resume recording on an existing dataset. @@ -374,7 +378,7 @@ def record(cfg: RecordConfig) -> LeRobotDataset: init_logging() logging.info(pformat(asdict(cfg))) if cfg.display_data: - init_rerun(session_name="recording") + init_rerun(session_name="recording", url=cfg.display_url, port=cfg.display_port) robot = make_robot_from_config(cfg.robot) teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None diff --git a/src/lerobot/scripts/lerobot_teleoperate.py b/src/lerobot/scripts/lerobot_teleoperate.py index 0a418f3bcad..a208ae0a8ff 100644 --- a/src/lerobot/scripts/lerobot_teleoperate.py +++ b/src/lerobot/scripts/lerobot_teleoperate.py @@ -104,6 +104,10 @@ class TeleoperateConfig: teleop_time_s: float | None = None # Display all cameras on screen display_data: bool = False + # Display data on a remote Rerun server + display_url: str = None + # Port of the remote Rerun server + display_port: int = 9876 def teleop_loop( @@ -186,7 +190,7 @@ def teleoperate(cfg: TeleoperateConfig): init_logging() logging.info(pformat(asdict(cfg))) if cfg.display_data: - init_rerun(session_name="teleoperation") + init_rerun(session_name="teleoperation", url=cfg.display_url, port=cfg.display_port) teleop = make_teleoperator_from_config(cfg.teleop) robot = make_robot_from_config(cfg.robot) diff --git a/src/lerobot/utils/visualization_utils.py b/src/lerobot/utils/visualization_utils.py index 991b10247fa..de29018e1d1 100644 --- a/src/lerobot/utils/visualization_utils.py +++ b/src/lerobot/utils/visualization_utils.py @@ -16,19 +16,23 @@ import os from typing import Any +import cv2 import numpy as np import rerun as rr from .constants import OBS_PREFIX, OBS_STR -def init_rerun(session_name: str = "lerobot_control_loop") -> None: +def init_rerun(session_name: str = "lerobot_control_loop", url: str = None, port: int = 9876) -> None: """Initializes the Rerun SDK for visualizing the control loop.""" batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000") os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size rr.init(session_name) memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%") - rr.spawn(memory_limit=memory_limit) + if url: + rr.connect_grpc(url=f"rerun+http://{url}:{port}/proxy") + else: + rr.spawn(memory_limit=memory_limit) def _is_scalar(x): @@ -48,7 +52,7 @@ def log_rerun_data( to the Rerun viewer. It handles different data types appropriately: - Scalars values (floats, ints) are logged as `rr.Scalars`. - 3D NumPy arrays that resemble images (e.g., with 1, 3, or 4 channels first) are transposed - from CHW to HWC format and logged as `rr.Image`. + from CHW to HWC format, encoded as JPEG and logged as `rr.EncodedImage`. - 1D NumPy arrays are logged as a series of individual scalars, with each element indexed. - Other multi-dimensional arrays are flattened and logged as individual scalars. @@ -75,7 +79,11 @@ def log_rerun_data( for i, vi in enumerate(arr): rr.log(f"{key}_{i}", rr.Scalars(float(vi))) else: - rr.log(key, rr.Image(arr), static=True) + _, buffer = cv2.imencode( + ".jpg", cv2.cvtColor(arr, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 50] + ) + encoded_image = buffer.tobytes() + rr.log(key, rr.EncodedImage(contents=encoded_image, media_type="image/jpeg"), static=True) if action: for k, v in action.items():