diff --git a/.gitattributes b/.gitattributes
index 44e16cf1d0..7d89f37b2b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 *.memmap filter=lfs diff=lfs merge=lfs -text
 *.stl filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 *.mp4 filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.json !text !filter !merge !diff
+tests/artifacts/cameras/*.png filter=lfs diff=lfs merge=lfs -text
+*.bag filter=lfs diff=lfs merge=lfs -text
diff --git a/lerobot/calibrate.py b/lerobot/calibrate.py
index 6d1a63cdfb..088aee299d 100644
--- a/lerobot/calibrate.py
+++ b/lerobot/calibrate.py
@@ -31,7 +31,8 @@
 
 import draccus
 
-from lerobot.common.cameras import intel, opencv  # noqa: F401
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
 from lerobot.common.robots import (  # noqa: F401
     Robot,
     RobotConfig,
diff --git a/lerobot/common/cameras/__init__.py b/lerobot/common/cameras/__init__.py
index f115e95fbc..1488cd89ea 100644
--- a/lerobot/common/cameras/__init__.py
+++ b/lerobot/common/cameras/__init__.py
@@ -1,3 +1,17 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .camera import Camera
-from .configs import CameraConfig
+from .configs import CameraConfig, ColorMode, Cv2Rotation
 from .utils import make_cameras_from_configs
diff --git a/lerobot/common/cameras/camera.py b/lerobot/common/cameras/camera.py
index 5d9ac50942..1937205b18 100644
--- a/lerobot/common/cameras/camera.py
+++ b/lerobot/common/cameras/camera.py
@@ -1,25 +1,120 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import abc
+from typing import Any, Dict, List
 
 import numpy as np
 
+from .configs import CameraConfig, ColorMode
+
 
 class Camera(abc.ABC):
+    """Base class for camera implementations.
+
+    Defines a standard interface for camera operations across different backends.
+    Subclasses must implement all abstract methods.
+
+    Manages basic camera properties (FPS, resolution) and core operations:
+    - Connection/disconnection
+    - Frame capture (sync/async)
+
+    Attributes:
+        fps (int | None): Configured frames per second
+        width (int | None): Frame width in pixels
+        height (int | None): Frame height in pixels
+
+    Example:
+        class MyCamera(Camera):
+            def __init__(self, config): ...
+            @property
+            def is_connected(self) -> bool: ...
+            def connect(self, warmup=True): ...
+            # Plus other required methods
+    """
+
+    def __init__(self, config: CameraConfig):
+        """Initialize the camera with the given configuration.
+
+        Args:
+            config: Camera configuration containing FPS and resolution.
+        """
+        self.fps: int | None = config.fps
+        self.width: int | None = config.width
+        self.height: int | None = config.height
+
+    @property
     @abc.abstractmethod
-    def connect(self):
+    def is_connected(self) -> bool:
+        """Check if the camera is currently connected.
+
+        Returns:
+            bool: True if the camera is connected and ready to capture frames,
+                  False otherwise.
+        """
         pass
 
+    @staticmethod
     @abc.abstractmethod
-    def read(self, temporary_color: str | None = None) -> np.ndarray:
+    def find_cameras() -> List[Dict[str, Any]]:
+        """Detects available cameras connected to the system.
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries,
+            where each dictionary contains information about a detected camera.
+        """
         pass
 
     @abc.abstractmethod
-    def async_read(self) -> np.ndarray:
+    def connect(self, warmup: bool = True) -> None:
+        """Establish connection to the camera.
+
+        Args:
+            warmup: If True (default), captures a warmup frame before returning. Useful
+                   for cameras that require time to adjust capture settings.
+                   If False, skips the warmup frame.
+        """
+        pass
+
+    @abc.abstractmethod
+    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
+        """Capture and return a single frame from the camera.
+
+        Args:
+            color_mode: Desired color mode for the output frame. If None,
+                        uses the camera's default color mode.
+
+        Returns:
+            np.ndarray: Captured frame as a numpy array.
+        """
         pass
 
     @abc.abstractmethod
-    def disconnect(self):
+    def async_read(self, timeout_ms: float = ...) -> np.ndarray:
+        """Asynchronously capture and return a single frame from the camera.
+
+        Args:
+            timeout_ms: Maximum time to wait for a frame in milliseconds.
+                        Defaults to implementation-specific timeout.
+
+        Returns:
+            np.ndarray: Captured frame as a numpy array.
+        """
         pass
 
-    def __del__(self):
-        if getattr(self, "is_connected", False):
-            self.disconnect()
+    @abc.abstractmethod
+    def disconnect(self) -> None:
+        """Disconnect from the camera and release resources."""
+        pass
diff --git a/lerobot/common/cameras/configs.py b/lerobot/common/cameras/configs.py
index 4c796a03de..0488a97ffd 100644
--- a/lerobot/common/cameras/configs.py
+++ b/lerobot/common/cameras/configs.py
@@ -1,11 +1,44 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import abc
 from dataclasses import dataclass
+from enum import Enum
 
 import draccus
 
 
-@dataclass
+class ColorMode(str, Enum):
+    RGB = "rgb"
+    BGR = "bgr"
+
+
+class Cv2Rotation(int, Enum):
+    NO_ROTATION = 0
+    ROTATE_90 = 90
+    ROTATE_180 = 180
+    ROTATE_270 = -90
+
+
+@dataclass(kw_only=True)
 class CameraConfig(draccus.ChoiceRegistry, abc.ABC):
+    fps: int | None = None
+    width: int | None = None
+    height: int | None = None
+
     @property
     def type(self) -> str:
         return self.get_choice_name(self.__class__)
diff --git a/lerobot/common/cameras/intel/__init__.py b/lerobot/common/cameras/intel/__init__.py
deleted file mode 100644
index d875ebf4e1..0000000000
--- a/lerobot/common/cameras/intel/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .camera_realsense import RealSenseCamera
-from .configuration_realsense import RealSenseCameraConfig
-
-__all__ = ["RealSenseCamera", "RealSenseCameraConfig"]
diff --git a/lerobot/common/cameras/intel/camera_realsense.py b/lerobot/common/cameras/intel/camera_realsense.py
deleted file mode 100644
index cd41286f25..0000000000
--- a/lerobot/common/cameras/intel/camera_realsense.py
+++ /dev/null
@@ -1,535 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This file contains utilities for recording frames from Intel Realsense cameras.
-"""
-
-import argparse
-import concurrent.futures
-import logging
-import math
-import shutil
-import threading
-import time
-import traceback
-from collections import Counter
-from pathlib import Path
-from threading import Thread
-
-import numpy as np
-from PIL import Image
-
-from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from lerobot.common.utils.robot_utils import (
-    busy_wait,
-)
-from lerobot.common.utils.utils import capture_timestamp_utc
-
-from ..camera import Camera
-from .configuration_realsense import RealSenseCameraConfig
-
-SERIAL_NUMBER_INDEX = 1
-
-
-def find_cameras(raise_when_empty=True, mock=False) -> list[dict]:
-    """
-    Find the names and the serial numbers of the Intel RealSense cameras
-    connected to the computer.
-    """
-    if mock:
-        import tests.cameras.mock_pyrealsense2 as rs
-    else:
-        import pyrealsense2 as rs
-
-    cameras = []
-    for device in rs.context().query_devices():
-        serial_number = int(device.get_info(rs.camera_info(SERIAL_NUMBER_INDEX)))
-        name = device.get_info(rs.camera_info.name)
-        cameras.append(
-            {
-                "serial_number": serial_number,
-                "name": name,
-            }
-        )
-
-    if raise_when_empty and len(cameras) == 0:
-        raise OSError(
-            "Not a single camera was detected. Try re-plugging, or re-installing `librealsense` and its python wrapper `pyrealsense2`, or updating the firmware."
-        )
-
-    return cameras
-
-
-def save_image(img_array, serial_number, frame_index, images_dir):
-    try:
-        img = Image.fromarray(img_array)
-        path = images_dir / f"camera_{serial_number}_frame_{frame_index:06d}.png"
-        path.parent.mkdir(parents=True, exist_ok=True)
-        img.save(str(path), quality=100)
-        logging.info(f"Saved image: {path}")
-    except Exception as e:
-        logging.error(f"Failed to save image for camera {serial_number} frame {frame_index}: {e}")
-
-
-def save_images_from_cameras(
-    images_dir: Path,
-    serial_numbers: list[int] | None = None,
-    fps=None,
-    width=None,
-    height=None,
-    record_time_s=2,
-    mock=False,
-):
-    """
-    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
-    associated to a given serial number.
-    """
-    if serial_numbers is None or len(serial_numbers) == 0:
-        camera_infos = find_cameras(mock=mock)
-        serial_numbers = [cam["serial_number"] for cam in camera_infos]
-
-    if mock:
-        import tests.cameras.mock_cv2 as cv2
-    else:
-        import cv2
-
-    print("Connecting cameras")
-    cameras = []
-    for cam_sn in serial_numbers:
-        print(f"{cam_sn=}")
-        config = RealSenseCameraConfig(serial_number=cam_sn, fps=fps, width=width, height=height, mock=mock)
-        camera = RealSenseCamera(config)
-        camera.connect()
-        print(
-            f"RealSenseCamera({camera.serial_number}, fps={camera.fps}, width={camera.capture_width}, height={camera.capture_height}, color_mode={camera.color_mode})"
-        )
-        cameras.append(camera)
-
-    images_dir = Path(images_dir)
-    if images_dir.exists():
-        shutil.rmtree(
-            images_dir,
-        )
-    images_dir.mkdir(parents=True, exist_ok=True)
-
-    print(f"Saving images to {images_dir}")
-    frame_index = 0
-    start_time = time.perf_counter()
-    try:
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-            while True:
-                now = time.perf_counter()
-
-                for camera in cameras:
-                    # If we use async_read when fps is None, the loop will go full speed, and we will end up
-                    # saving the same images from the cameras multiple times until the RAM/disk is full.
-                    image = camera.read() if fps is None else camera.async_read()
-                    if image is None:
-                        print("No Frame")
-
-                    bgr_converted_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-
-                    executor.submit(
-                        save_image,
-                        bgr_converted_image,
-                        camera.serial_number,
-                        frame_index,
-                        images_dir,
-                    )
-
-                if fps is not None:
-                    dt_s = time.perf_counter() - now
-                    busy_wait(1 / fps - dt_s)
-
-                if time.perf_counter() - start_time > record_time_s:
-                    break
-
-                print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
-
-                frame_index += 1
-    finally:
-        print(f"Images have been saved to {images_dir}")
-        for camera in cameras:
-            camera.disconnect()
-
-
-class RealSenseCamera(Camera):
-    """
-    The RealSenseCamera class is similar to OpenCVCamera class but adds additional features for Intel Real Sense cameras:
-    - is instantiated with the serial number of the camera - won't randomly change as it can be the case of OpenCVCamera for Linux,
-    - can also be instantiated with the camera's name — if it's unique — using RealSenseCamera.init_from_name(),
-    - depth map can be returned.
-
-    To find the camera indices of your cameras, you can run our utility script that will save a few frames for each camera:
-    ```bash
-    python lerobot/common/robot_devices/cameras/intelrealsense.py --images-dir outputs/images_from_intelrealsense_cameras
-    ```
-
-    When an RealSenseCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
-    of the given camera will be used.
-
-    Example of instantiating with a serial number:
-    ```python
-    from lerobot.common.robot_devices.cameras.configs import RealSenseCameraConfig
-
-    config = RealSenseCameraConfig(serial_number=128422271347)
-    camera = RealSenseCamera(config)
-    camera.connect()
-    color_image = camera.read()
-    # when done using the camera, consider disconnecting
-    camera.disconnect()
-    ```
-
-    Example of instantiating with a name if it's unique:
-    ```
-    config = RealSenseCameraConfig(name="Intel RealSense D405")
-    ```
-
-    Example of changing default fps, width, height and color_mode:
-    ```python
-    config = RealSenseCameraConfig(serial_number=128422271347, fps=30, width=1280, height=720)
-    config = RealSenseCameraConfig(serial_number=128422271347, fps=90, width=640, height=480)
-    config = RealSenseCameraConfig(serial_number=128422271347, fps=90, width=640, height=480, color_mode="bgr")
-    # Note: might error out upon `camera.connect()` if these settings are not compatible with the camera
-    ```
-
-    Example of returning depth:
-    ```python
-    config = RealSenseCameraConfig(serial_number=128422271347, use_depth=True)
-    camera = RealSenseCamera(config)
-    camera.connect()
-    color_image, depth_map = camera.read()
-    ```
-    """
-
-    def __init__(
-        self,
-        config: RealSenseCameraConfig,
-    ):
-        self.config = config
-        if config.name is not None:
-            self.serial_number = self.find_serial_number_from_name(config.name)
-        else:
-            self.serial_number = config.serial_number
-
-        # Store the raw (capture) resolution from the config.
-        self.capture_width = config.width
-        self.capture_height = config.height
-
-        # If rotated by ±90, swap width and height.
-        if config.rotation in [-90, 90]:
-            self.width = config.height
-            self.height = config.width
-        else:
-            self.width = config.width
-            self.height = config.height
-
-        self.fps = config.fps
-        self.channels = config.channels
-        self.color_mode = config.color_mode
-        self.use_depth = config.use_depth
-        self.force_hardware_reset = config.force_hardware_reset
-        self.mock = config.mock
-
-        self.camera = None
-        self.is_connected = False
-        self.thread = None
-        self.stop_event = None
-        self.color_image = None
-        self.depth_map = None
-        self.logs = {}
-
-        if self.mock:
-            import tests.cameras.mock_cv2 as cv2
-        else:
-            import cv2
-
-        self.rotation = None
-        if config.rotation == -90:
-            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
-        elif config.rotation == 90:
-            self.rotation = cv2.ROTATE_90_CLOCKWISE
-        elif config.rotation == 180:
-            self.rotation = cv2.ROTATE_180
-
-    def find_serial_number_from_name(self, name):
-        camera_infos = find_cameras()
-        camera_names = [cam["name"] for cam in camera_infos]
-        this_name_count = Counter(camera_names)[name]
-        if this_name_count > 1:
-            # TODO(aliberts): Test this with multiple identical cameras (Aloha)
-            raise ValueError(
-                f"Multiple {name} cameras have been detected. Please use their serial number to instantiate them."
-            )
-
-        name_to_serial_dict = {cam["name"]: cam["serial_number"] for cam in camera_infos}
-        cam_sn = name_to_serial_dict[name]
-
-        return cam_sn
-
-    def connect(self):
-        if self.is_connected:
-            raise DeviceAlreadyConnectedError(f"RealSenseCamera({self.serial_number}) is already connected.")
-
-        if self.mock:
-            import tests.cameras.mock_pyrealsense2 as rs
-        else:
-            import pyrealsense2 as rs
-
-        config = rs.config()
-        config.enable_device(str(self.serial_number))
-
-        if self.fps and self.capture_width and self.capture_height:
-            # TODO(rcadene): can we set rgb8 directly?
-            config.enable_stream(
-                rs.stream.color, self.capture_width, self.capture_height, rs.format.rgb8, self.fps
-            )
-        else:
-            config.enable_stream(rs.stream.color)
-
-        if self.use_depth:
-            if self.fps and self.capture_width and self.capture_height:
-                config.enable_stream(
-                    rs.stream.depth, self.capture_width, self.capture_height, rs.format.z16, self.fps
-                )
-            else:
-                config.enable_stream(rs.stream.depth)
-
-        self.camera = rs.pipeline()
-        try:
-            profile = self.camera.start(config)
-            is_camera_open = True
-        except RuntimeError:
-            is_camera_open = False
-            traceback.print_exc()
-
-        # If the camera doesn't work, display the camera indices corresponding to
-        # valid cameras.
-        if not is_camera_open:
-            # Verify that the provided `serial_number` is valid before printing the traceback
-            camera_infos = find_cameras()
-            serial_numbers = [cam["serial_number"] for cam in camera_infos]
-            if self.serial_number not in serial_numbers:
-                raise ValueError(
-                    f"`serial_number` is expected to be one of these available cameras {serial_numbers}, but {self.serial_number} is provided instead. "
-                    "To find the serial number you should use, run `python lerobot/common/robot_devices/cameras/intelrealsense.py`."
-                )
-
-            raise OSError(f"Can't access RealSenseCamera({self.serial_number}).")
-
-        color_stream = profile.get_stream(rs.stream.color)
-        color_profile = color_stream.as_video_stream_profile()
-        actual_fps = color_profile.fps()
-        actual_width = color_profile.width()
-        actual_height = color_profile.height()
-
-        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
-        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
-            # Using `OSError` since it's a broad that encompasses issues related to device communication
-            raise OSError(
-                f"Can't set {self.fps=} for RealSenseCamera({self.serial_number}). Actual value is {actual_fps}."
-            )
-        if self.capture_width is not None and self.capture_width != actual_width:
-            raise OSError(
-                f"Can't set {self.capture_width=} for RealSenseCamera({self.serial_number}). Actual value is {actual_width}."
-            )
-        if self.capture_height is not None and self.capture_height != actual_height:
-            raise OSError(
-                f"Can't set {self.capture_height=} for RealSenseCamera({self.serial_number}). Actual value is {actual_height}."
-            )
-
-        self.fps = round(actual_fps)
-        self.capture_width = round(actual_width)
-        self.capture_height = round(actual_height)
-
-        self.is_connected = True
-
-    def read(self, temporary_color: str | None = None) -> np.ndarray | tuple[np.ndarray, np.ndarray]:
-        """Read a frame from the camera returned in the format height x width x channels (e.g. 480 x 640 x 3)
-        of type `np.uint8`, contrarily to the pytorch format which is float channel first.
-
-        When `use_depth=True`, returns a tuple `(color_image, depth_map)` with a depth map in the format
-        height x width (e.g. 480 x 640) of type np.uint16.
-
-        Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
-        If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
-        """
-        if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"RealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
-            )
-
-        if self.mock:
-            import tests.cameras.mock_cv2 as cv2
-        else:
-            import cv2
-
-        start_time = time.perf_counter()
-
-        frame = self.camera.wait_for_frames(timeout_ms=5000)
-
-        color_frame = frame.get_color_frame()
-
-        if not color_frame:
-            raise OSError(f"Can't capture color image from RealSenseCamera({self.serial_number}).")
-
-        color_image = np.asanyarray(color_frame.get_data())
-
-        requested_color_mode = self.color_mode if temporary_color is None else temporary_color
-        if requested_color_mode not in ["rgb", "bgr"]:
-            raise ValueError(
-                f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
-            )
-
-        # IntelRealSense uses RGB format as default (red, green, blue).
-        if requested_color_mode == "bgr":
-            color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
-
-        h, w, _ = color_image.shape
-        if h != self.capture_height or w != self.capture_width:
-            raise OSError(
-                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
-            )
-
-        if self.rotation is not None:
-            color_image = cv2.rotate(color_image, self.rotation)
-
-        # log the number of seconds it took to read the image
-        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
-
-        # log the utc time at which the image was received
-        self.logs["timestamp_utc"] = capture_timestamp_utc()
-
-        if self.use_depth:
-            depth_frame = frame.get_depth_frame()
-            if not depth_frame:
-                raise OSError(f"Can't capture depth image from RealSenseCamera({self.serial_number}).")
-
-            depth_map = np.asanyarray(depth_frame.get_data())
-
-            h, w = depth_map.shape
-            if h != self.capture_height or w != self.capture_width:
-                raise OSError(
-                    f"Can't capture depth map with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
-                )
-
-            if self.rotation is not None:
-                depth_map = cv2.rotate(depth_map, self.rotation)
-
-            return color_image, depth_map
-        else:
-            return color_image
-
-    def read_loop(self):
-        while not self.stop_event.is_set():
-            if self.use_depth:
-                self.color_image, self.depth_map = self.read()
-            else:
-                self.color_image = self.read()
-
-    def async_read(self):
-        """Access the latest color image"""
-        if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"RealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
-            )
-
-        if self.thread is None:
-            self.stop_event = threading.Event()
-            self.thread = Thread(target=self.read_loop, args=())
-            self.thread.daemon = True
-            self.thread.start()
-
-        num_tries = 0
-        while self.color_image is None:
-            # TODO(rcadene, aliberts): intelrealsense has diverged compared to opencv over here
-            num_tries += 1
-            time.sleep(1 / self.fps)
-            if num_tries > self.fps and (self.thread.ident is None or not self.thread.is_alive()):
-                raise Exception(
-                    "The thread responsible for `self.async_read()` took too much time to start. There might be an issue. Verify that `self.thread.start()` has been called."
-                )
-
-        if self.use_depth:
-            return self.color_image, self.depth_map
-        else:
-            return self.color_image
-
-    def disconnect(self):
-        if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"RealSenseCamera({self.serial_number}) is not connected. Try running `camera.connect()` first."
-            )
-
-        if self.thread is not None and self.thread.is_alive():
-            # wait for the thread to finish
-            self.stop_event.set()
-            self.thread.join()
-            self.thread = None
-            self.stop_event = None
-
-        self.camera.stop()
-        self.camera = None
-
-        self.is_connected = False
-
-    def __del__(self):
-        if getattr(self, "is_connected", False):
-            self.disconnect()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Save a few frames using `RealSenseCamera` for all cameras connected to the computer, or a selected subset."
-    )
-    parser.add_argument(
-        "--serial-numbers",
-        type=int,
-        nargs="*",
-        default=None,
-        help="List of serial numbers used to instantiate the `RealSenseCamera`. If not provided, find and use all available camera indices.",
-    )
-    parser.add_argument(
-        "--fps",
-        type=int,
-        default=30,
-        help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
-    )
-    parser.add_argument(
-        "--width",
-        type=int,
-        default=640,
-        help="Set the width for all cameras. If not provided, use the default width of each camera.",
-    )
-    parser.add_argument(
-        "--height",
-        type=int,
-        default=480,
-        help="Set the height for all cameras. If not provided, use the default height of each camera.",
-    )
-    parser.add_argument(
-        "--images-dir",
-        type=Path,
-        default="outputs/images_from_intelrealsense_cameras",
-        help="Set directory to save a few frames for each camera.",
-    )
-    parser.add_argument(
-        "--record-time-s",
-        type=float,
-        default=2.0,
-        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
-    )
-    args = parser.parse_args()
-    save_images_from_cameras(**vars(args))
diff --git a/lerobot/common/cameras/intel/configuration_realsense.py b/lerobot/common/cameras/intel/configuration_realsense.py
deleted file mode 100644
index 66bb1b4f11..0000000000
--- a/lerobot/common/cameras/intel/configuration_realsense.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import dataclass
-
-from ..configs import CameraConfig
-
-
-@CameraConfig.register_subclass("intelrealsense")
-@dataclass
-class RealSenseCameraConfig(CameraConfig):
-    """
-    Example of tested options for Intel Real Sense D405:
-
-    ```python
-    RealSenseCameraConfig(128422271347, 30, 640, 480)
-    RealSenseCameraConfig(128422271347, 60, 640, 480)
-    RealSenseCameraConfig(128422271347, 90, 640, 480)
-    RealSenseCameraConfig(128422271347, 30, 1280, 720)
-    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)
-    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=90)
-    ```
-    """
-
-    name: str | None = None
-    serial_number: int | None = None
-    fps: int | None = None
-    width: int | None = None
-    height: int | None = None
-    color_mode: str = "rgb"
-    channels: int | None = None
-    use_depth: bool = False
-    force_hardware_reset: bool = True
-    rotation: int | None = None
-    mock: bool = False
-
-    def __post_init__(self):
-        # bool is stronger than is None, since it works with empty strings
-        if bool(self.name) and bool(self.serial_number):
-            raise ValueError(
-                f"One of them must be set: name or serial_number, but {self.name=} and {self.serial_number=} provided."
-            )
-
-        if self.color_mode not in ["rgb", "bgr"]:
-            raise ValueError(
-                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
-            )
-
-        self.channels = 3
-
-        at_least_one_is_not_none = self.fps is not None or self.width is not None or self.height is not None
-        at_least_one_is_none = self.fps is None or self.width is None or self.height is None
-        if at_least_one_is_not_none and at_least_one_is_none:
-            raise ValueError(
-                "For `fps`, `width` and `height`, either all of them need to be set, or none of them, "
-                f"but {self.fps=}, {self.width=}, {self.height=} were provided."
-            )
-
-        if self.rotation not in [-90, None, 90, 180]:
-            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
diff --git a/lerobot/common/cameras/opencv/__init__.py b/lerobot/common/cameras/opencv/__init__.py
index edfd6df3d4..11d3139fe2 100644
--- a/lerobot/common/cameras/opencv/__init__.py
+++ b/lerobot/common/cameras/opencv/__init__.py
@@ -1,4 +1,16 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from .camera_opencv import OpenCVCamera
 from .configuration_opencv import OpenCVCameraConfig
-
-__all__ = ["OpenCVCamera", "OpenCVCameraConfig"]
diff --git a/lerobot/common/cameras/opencv/camera_opencv.py b/lerobot/common/cameras/opencv/camera_opencv.py
index d1b08743c6..48001f0159 100644
--- a/lerobot/common/cameras/opencv/camera_opencv.py
+++ b/lerobot/common/cameras/opencv/camera_opencv.py
@@ -13,507 +13,467 @@
 # limitations under the License.
 
 """
-This file contains utilities for recording frames from cameras. For more info look at `OpenCVCamera` docstring.
+Provides the OpenCVCamera class for capturing frames from cameras using OpenCV.
 """
 
-import argparse
-import concurrent.futures
+import logging
 import math
 import platform
-import shutil
-import threading
 import time
 from pathlib import Path
-from threading import Thread
+from threading import Event, Lock, Thread
+from typing import Any, Dict, List
 
+import cv2
 import numpy as np
-from PIL import Image
 
 from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from lerobot.common.utils.robot_utils import (
-    busy_wait,
-)
-from lerobot.common.utils.utils import capture_timestamp_utc
 
 from ..camera import Camera
-from .configuration_opencv import OpenCVCameraConfig
+from ..utils import get_cv2_backend, get_cv2_rotation
+from .configuration_opencv import ColorMode, OpenCVCameraConfig
 
-# The maximum opencv device index depends on your operating system. For instance,
+# NOTE(Steven): The maximum opencv device index depends on your operating system. For instance,
 # if you have 3 cameras, they should be associated to index 0, 1, and 2. This is the case
 # on MacOS. However, on Ubuntu, the indices are different like 6, 16, 23.
 # When you change the USB port or reboot the computer, the operating system might
 # treat the same cameras as new devices. Thus we select a higher bound to search indices.
 MAX_OPENCV_INDEX = 60
 
+logger = logging.getLogger(__name__)
 
-def find_cameras(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX, mock=False) -> list[dict]:
-    cameras = []
-    if platform.system() == "Linux":
-        print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports")
-        possible_ports = [str(port) for port in Path("/dev").glob("video*")]
-        ports = _find_cameras(possible_ports, mock=mock)
-        for port in ports:
-            cameras.append(
-                {
-                    "port": port,
-                    "index": int(port.removeprefix("/dev/video")),
-                }
-            )
-    else:
-        print(
-            "Mac or Windows detected. Finding available camera indices through "
-            f"scanning all indices from 0 to {MAX_OPENCV_INDEX}"
-        )
-        possible_indices = range(max_index_search_range)
-        indices = _find_cameras(possible_indices, mock=mock)
-        for index in indices:
-            cameras.append(
-                {
-                    "port": None,
-                    "index": index,
-                }
-            )
 
-    return cameras
+class OpenCVCamera(Camera):
+    """
+    Manages camera interactions using OpenCV for efficient frame recording.
+
+    This class provides a high-level interface to connect to, configure, and read
+    frames from cameras compatible with OpenCV's VideoCapture. It supports both
+    synchronous and asynchronous frame reading.
+
+    An OpenCVCamera instance requires a camera index (e.g., 0) or a device path
+    (e.g., '/dev/video0' on Linux). Camera indices can be unstable across reboots
+    or port changes, especially on Linux. Use the provided utility script to find
+    available camera indices or paths:
+    ```bash
+    python -m lerobot.find_cameras opencv
+    ```
 
+    The camera's default settings (FPS, resolution, color mode) are used unless
+    overridden in the configuration.
+
+    Example:
+        ```python
+        from lerobot.common.cameras.opencv import OpenCVCamera
+        from lerobot.common.cameras.configuration_opencv import OpenCVCameraConfig, ColorMode, Cv2Rotation
+
+        # Basic usage with camera index 0
+        config = OpenCVCameraConfig(index_or_path=0)
+        camera = OpenCVCamera(config)
+        camera.connect()
 
-def _find_cameras(
-    possible_camera_ids: list[int | str], raise_when_empty=False, mock=False
-) -> list[int | str]:
-    if mock:
-        import tests.cameras.mock_cv2 as cv2
-    else:
-        import cv2
+        # Read 1 frame synchronously
+        color_image = camera.read()
+        print(color_image.shape)
 
-    camera_ids = []
-    for camera_idx in possible_camera_ids:
-        camera = cv2.VideoCapture(camera_idx)
-        is_open = camera.isOpened()
-        camera.release()
+        # Read 1 frame asynchronously
+        async_image = camera.async_read()
 
-        if is_open:
-            print(f"Camera found at index {camera_idx}")
-            camera_ids.append(camera_idx)
+        # When done, properly disconnect the camera using
+        camera.disconnect()
 
-    if raise_when_empty and len(camera_ids) == 0:
-        raise OSError(
-            "Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, "
-            "or your camera driver, or make sure your camera is compatible with opencv2."
+        # Example with custom settings
+        custom_config = OpenCVCameraConfig(
+            index_or_path='/dev/video0', # Or use an index
+            fps=30,
+            width=1280,
+            height=720,
+            color_mode=ColorMode.RGB,
+            rotation=Cv2Rotation.ROTATE_90
         )
+        custom_camera = OpenCVCamera(custom_config)
+        # ... connect, read, disconnect ...
+        ```
+    """
 
-    return camera_ids
+    def __init__(self, config: OpenCVCameraConfig):
+        """
+        Initializes the OpenCVCamera instance.
 
+        Args:
+            config: The configuration settings for the camera.
+        """
+        super().__init__(config)
 
-def is_valid_unix_path(path: str) -> bool:
-    """Note: if 'path' points to a symlink, this will return True only if the target exists"""
-    p = Path(path)
-    return p.is_absolute() and p.exists()
+        self.config = config
+        self.index_or_path = config.index_or_path
 
+        self.fps = config.fps
+        self.color_mode = config.color_mode
+        self.warmup_s = config.warmup_s
 
-def get_camera_index_from_unix_port(port: Path) -> int:
-    return int(str(port.resolve()).removeprefix("/dev/video"))
+        self.videocapture: cv2.VideoCapture | None = None
 
+        self.thread: Thread | None = None
+        self.stop_event: Event | None = None
+        self.frame_lock: Lock = Lock()
+        self.latest_frame: np.ndarray | None = None
+        self.new_frame_event: Event = Event()
 
-def save_image(img_array, camera_index, frame_index, images_dir):
-    img = Image.fromarray(img_array)
-    path = images_dir / f"camera_{camera_index:02d}_frame_{frame_index:06d}.png"
-    path.parent.mkdir(parents=True, exist_ok=True)
-    img.save(str(path), quality=100)
+        self.rotation: int | None = get_cv2_rotation(config.rotation)
+        self.backend: int = get_cv2_backend()
 
+        if self.height and self.width:
+            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+                self.capture_width, self.capture_height = self.height, self.width
+            else:
+                self.capture_width, self.capture_height = self.width, self.height
 
-def save_images_from_cameras(
-    images_dir: Path,
-    camera_ids: list | None = None,
-    fps=None,
-    width=None,
-    height=None,
-    record_time_s=2,
-    mock=False,
-):
-    """
-    Initializes all the cameras and saves images to the directory. Useful to visually identify the camera
-    associated to a given camera index.
-    """
-    if camera_ids is None or len(camera_ids) == 0:
-        camera_infos = find_cameras(mock=mock)
-        camera_ids = [cam["index"] for cam in camera_infos]
-
-    print("Connecting cameras")
-    cameras = []
-    for cam_idx in camera_ids:
-        config = OpenCVCameraConfig(camera_index=cam_idx, fps=fps, width=width, height=height, mock=mock)
-        camera = OpenCVCamera(config)
-        camera.connect()
-        print(
-            f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.capture_width}, "
-            f"height={camera.capture_height}, color_mode={camera.color_mode})"
-        )
-        cameras.append(camera)
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({self.index_or_path})"
 
-    images_dir = Path(images_dir)
-    if images_dir.exists():
-        shutil.rmtree(
-            images_dir,
-        )
-    images_dir.mkdir(parents=True, exist_ok=True)
-
-    print(f"Saving images to {images_dir}")
-    frame_index = 0
-    start_time = time.perf_counter()
-    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-        while True:
-            now = time.perf_counter()
-
-            for camera in cameras:
-                # If we use async_read when fps is None, the loop will go full speed, and we will endup
-                # saving the same images from the cameras multiple times until the RAM/disk is full.
-                image = camera.read() if fps is None else camera.async_read()
-
-                executor.submit(
-                    save_image,
-                    image,
-                    camera.camera_index,
-                    frame_index,
-                    images_dir,
-                )
-
-            if fps is not None:
-                dt_s = time.perf_counter() - now
-                busy_wait(1 / fps - dt_s)
-
-            print(f"Frame: {frame_index:04d}\tLatency (ms): {(time.perf_counter() - now) * 1000:.2f}")
-
-            if time.perf_counter() - start_time > record_time_s:
-                break
+    @property
+    def is_connected(self) -> bool:
+        """Checks if the camera is currently connected and opened."""
+        return isinstance(self.videocapture, cv2.VideoCapture) and self.videocapture.isOpened()
 
-            frame_index += 1
+    def connect(self, warmup: bool = True):
+        """
+        Connects to the OpenCV camera specified in the configuration.
 
-    print(f"Images have been saved to {images_dir}")
+        Initializes the OpenCV VideoCapture object, sets desired camera properties
+        (FPS, width, height), and performs initial checks.
 
+        Raises:
+            DeviceAlreadyConnectedError: If the camera is already connected.
+            ConnectionError: If the specified camera index/path is not found or the camera is found but fails to open.
+            RuntimeError: If the camera opens but fails to apply requested FPS/resolution settings.
+        """
+        if self.is_connected:
+            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
 
-class OpenCVCamera(Camera):
-    """
-    The OpenCVCamera class allows to efficiently record images from cameras. It relies on opencv2 to communicate
-    with the cameras. Most cameras are compatible. For more info, see the [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
+        # Use 1 thread for OpenCV operations to avoid potential conflicts or
+        # blocking in multi-threaded applications, especially during data collection.
+        cv2.setNumThreads(1)
 
-    An OpenCVCamera instance requires a camera index (e.g. `OpenCVCamera(camera_index=0)`). When you only have one camera
-    like a webcam of a laptop, the camera index is expected to be 0, but it might also be very different, and the camera index
-    might change if you reboot your computer or re-plug your camera. This behavior depends on your operation system.
+        self.videocapture = cv2.VideoCapture(self.index_or_path, self.backend)
 
-    To find the camera indices of your cameras, you can run our utility script that will be save a few frames for each camera:
-    ```bash
-    python lerobot/common/robot_devices/cameras/opencv.py --images-dir outputs/images_from_opencv_cameras
-    ```
+        if not self.videocapture.isOpened():
+            self.videocapture.release()
+            self.videocapture = None
+            raise ConnectionError(
+                f"Failed to open {self}."
+                f"Run `python -m lerobot.find_cameras opencv` to find available cameras."
+            )
 
-    When an OpenCVCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode
-    of the given camera will be used.
+        self._configure_capture_settings()
 
-    Example of usage:
-    ```python
-    from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
+        if warmup:
+            start_time = time.time()
+            while time.time() - start_time < self.warmup_s:
+                self.read()
+                time.sleep(0.1)
 
-    config = OpenCVCameraConfig(camera_index=0)
-    camera = OpenCVCamera(config)
-    camera.connect()
-    color_image = camera.read()
-    # when done using the camera, consider disconnecting
-    camera.disconnect()
-    ```
+        logger.info(f"{self} connected.")
 
-    Example of changing default fps, width, height and color_mode:
-    ```python
-    config = OpenCVCameraConfig(camera_index=0, fps=30, width=1280, height=720)
-    config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480)
-    config = OpenCVCameraConfig(camera_index=0, fps=90, width=640, height=480, color_mode="bgr")
-    # Note: might error out open `camera.connect()` if these settings are not compatible with the camera
-    ```
-    """
+    def _configure_capture_settings(self) -> None:
+        """
+        Applies the specified FPS, width, and height settings to the connected camera.
 
-    def __init__(self, config: OpenCVCameraConfig):
-        self.config = config
-        self.camera_index = config.camera_index
-        self.port = None
+        This method attempts to set the camera properties via OpenCV. It checks if
+        the camera successfully applied the settings and raises an error if not.
 
-        # Linux uses ports for connecting to cameras
-        if platform.system() == "Linux":
-            if isinstance(self.camera_index, int):
-                self.port = Path(f"/dev/video{self.camera_index}")
-            elif isinstance(self.camera_index, str) and is_valid_unix_path(self.camera_index):
-                self.port = Path(self.camera_index)
-                # Retrieve the camera index from a potentially symlinked path
-                self.camera_index = get_camera_index_from_unix_port(self.port)
-            else:
-                raise ValueError(f"Please check the provided camera_index: {self.camera_index}")
+        Args:
+            fps: The desired frames per second. If None, the setting is skipped.
+            width: The desired capture width. If None, the setting is skipped.
+            height: The desired capture height. If None, the setting is skipped.
 
-        # Store the raw (capture) resolution from the config.
-        self.capture_width = config.width
-        self.capture_height = config.height
+        Raises:
+            RuntimeError: If the camera fails to set any of the specified properties
+                          to the requested value.
+            DeviceNotConnectedError: If the camera is not connected when attempting
+                                     to configure settings.
+        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"Cannot configure settings for {self} as it is not connected.")
 
-        # If rotated by ±90, swap width and height.
-        if config.rotation in [-90, 90]:
-            self.width = config.height
-            self.height = config.width
+        if self.fps is None:
+            self.fps = self.videocapture.get(cv2.CAP_PROP_FPS)
         else:
-            self.width = config.width
-            self.height = config.height
+            self._validate_fps()
 
-        self.fps = config.fps
-        self.channels = config.channels
-        self.color_mode = config.color_mode
-        self.mock = config.mock
-
-        self.camera = None
-        self.is_connected = False
-        self.thread = None
-        self.stop_event = None
-        self.color_image = None
-        self.logs = {}
+        default_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
+        default_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 
-        if self.mock:
-            import tests.cameras.mock_cv2 as cv2
+        if self.width is None or self.height is None:
+            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+                self.width, self.height = default_height, default_width
+                self.capture_width, self.capture_height = default_width, default_height
+            else:
+                self.width, self.height = default_width, default_height
+                self.capture_width, self.capture_height = default_width, default_height
         else:
-            import cv2
+            self._validate_width_and_height()
+
+    def _validate_fps(self) -> None:
+        """Validates and sets the camera's frames per second (FPS)."""
+
+        success = self.videocapture.set(cv2.CAP_PROP_FPS, float(self.fps))
+        actual_fps = self.videocapture.get(cv2.CAP_PROP_FPS)
+        # Use math.isclose for robust float comparison
+        if not success or not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
+            raise RuntimeError(f"{self} failed to set fps={self.fps} ({actual_fps=}).")
+
+    def _validate_width_and_height(self) -> None:
+        """Validates and sets the camera's frame capture width and height."""
+
+        success = self.videocapture.set(cv2.CAP_PROP_FRAME_WIDTH, float(self.capture_width))
+        actual_width = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)))
+        if not success or self.capture_width != actual_width:
+            raise RuntimeError(f"{self} failed to set capture_width={self.capture_width} ({actual_width=}).")
+
+        success = self.videocapture.set(cv2.CAP_PROP_FRAME_HEIGHT, float(self.capture_height))
+        actual_height = int(round(self.videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+        if not success or self.capture_height != actual_height:
+            raise RuntimeError(
+                f"{self} failed to set capture_height={self.capture_height} ({actual_height})."
+            )
 
-        self.rotation = None
-        if config.rotation == -90:
-            self.rotation = cv2.ROTATE_90_COUNTERCLOCKWISE
-        elif config.rotation == 90:
-            self.rotation = cv2.ROTATE_90_CLOCKWISE
-        elif config.rotation == 180:
-            self.rotation = cv2.ROTATE_180
+    @staticmethod
+    def find_cameras() -> List[Dict[str, Any]]:
+        """
+        Detects available OpenCV cameras connected to the system.
 
-    def connect(self):
-        if self.is_connected:
-            raise DeviceAlreadyConnectedError(f"OpenCVCamera({self.camera_index}) is already connected.")
+        On Linux, it scans '/dev/video*' paths. On other systems (like macOS, Windows),
+        it checks indices from 0 up to `MAX_OPENCV_INDEX`.
 
-        if self.mock:
-            import tests.cameras.mock_cv2 as cv2
-        else:
-            import cv2
-
-            # Use 1 thread to avoid blocking the main thread. Especially useful during data collection
-            # when other threads are used to save the images.
-            cv2.setNumThreads(1)
-
-        backend = (
-            cv2.CAP_V4L2
-            if platform.system() == "Linux"
-            else cv2.CAP_DSHOW
-            if platform.system() == "Windows"
-            else cv2.CAP_AVFOUNDATION
-            if platform.system() == "Darwin"
-            else cv2.CAP_ANY
-        )
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries,
+            where each dictionary contains 'type', 'id' (port index or path),
+            and the default profile properties (width, height, fps, format).
+        """
+        found_cameras_info = []
 
-        camera_idx = f"/dev/video{self.camera_index}" if platform.system() == "Linux" else self.camera_index
-        # First create a temporary camera trying to access `camera_index`,
-        # and verify it is a valid camera by calling `isOpened`.
-        tmp_camera = cv2.VideoCapture(camera_idx, backend)
-        is_camera_open = tmp_camera.isOpened()
-        # Release camera to make it accessible for `find_camera_indices`
-        tmp_camera.release()
-        del tmp_camera
-
-        # If the camera doesn't work, display the camera indices corresponding to
-        # valid cameras.
-        if not is_camera_open:
-            # Verify that the provided `camera_index` is valid before printing the traceback
-            cameras_info = find_cameras()
-            available_cam_ids = [cam["index"] for cam in cameras_info]
-            if self.camera_index not in available_cam_ids:
-                raise ValueError(
-                    f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. "
-                    "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`."
-                )
-
-            raise OSError(f"Can't access OpenCVCamera({camera_idx}).")
-
-        # Secondly, create the camera that will be used downstream.
-        # Note: For some unknown reason, calling `isOpened` blocks the camera which then
-        # needs to be re-created.
-        self.camera = cv2.VideoCapture(camera_idx, backend)
-
-        if self.fps is not None:
-            self.camera.set(cv2.CAP_PROP_FPS, self.fps)
-        if self.capture_width is not None:
-            self.camera.set(cv2.CAP_PROP_FRAME_WIDTH, self.capture_width)
-        if self.capture_height is not None:
-            self.camera.set(cv2.CAP_PROP_FRAME_HEIGHT, self.capture_height)
-
-        actual_fps = self.camera.get(cv2.CAP_PROP_FPS)
-        actual_width = self.camera.get(cv2.CAP_PROP_FRAME_WIDTH)
-        actual_height = self.camera.get(cv2.CAP_PROP_FRAME_HEIGHT)
-
-        # Using `math.isclose` since actual fps can be a float (e.g. 29.9 instead of 30)
-        if self.fps is not None and not math.isclose(self.fps, actual_fps, rel_tol=1e-3):
-            # Using `OSError` since it's a broad that encompasses issues related to device communication
-            raise OSError(
-                f"Can't set {self.fps=} for OpenCVCamera({self.camera_index}). Actual value is {actual_fps}."
-            )
-        if self.capture_width is not None and not math.isclose(
-            self.capture_width, actual_width, rel_tol=1e-3
-        ):
-            raise OSError(
-                f"Can't set {self.capture_width=} for OpenCVCamera({self.camera_index}). Actual value is {actual_width}."
-            )
-        if self.capture_height is not None and not math.isclose(
-            self.capture_height, actual_height, rel_tol=1e-3
-        ):
-            raise OSError(
-                f"Can't set {self.capture_height=} for OpenCVCamera({self.camera_index}). Actual value is {actual_height}."
-            )
+        if platform.system() == "Linux":
+            possible_paths = sorted(Path("/dev").glob("video*"), key=lambda p: p.name)
+            targets_to_scan = [str(p) for p in possible_paths]
+        else:
+            targets_to_scan = list(range(MAX_OPENCV_INDEX))
+
+        for target in targets_to_scan:
+            camera = cv2.VideoCapture(target)
+            if camera.isOpened():
+                default_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
+                default_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                default_fps = camera.get(cv2.CAP_PROP_FPS)
+                default_format = camera.get(cv2.CAP_PROP_FORMAT)
+                camera_info = {
+                    "name": f"OpenCV Camera @ {target}",
+                    "type": "OpenCV",
+                    "id": target,
+                    "backend_api": camera.getBackendName(),
+                    "default_stream_profile": {
+                        "format": default_format,
+                        "width": default_width,
+                        "height": default_height,
+                        "fps": default_fps,
+                    },
+                }
 
-        self.fps = round(actual_fps)
-        self.capture_width = round(actual_width)
-        self.capture_height = round(actual_height)
-        self.is_connected = True
+                found_cameras_info.append(camera_info)
+                camera.release()
 
-    def read(self, temporary_color_mode: str | None = None) -> np.ndarray:
-        """Read a frame from the camera returned in the format (height, width, channels)
-        (e.g. 480 x 640 x 3), contrarily to the pytorch format which is channel first.
+        return found_cameras_info
 
-        Note: Reading a frame is done every `camera.fps` times per second, and it is blocking.
-        If you are reading data from other sensors, we advise to use `camera.async_read()` which is non blocking version of `camera.read()`.
+    def read(self, color_mode: ColorMode | None = None) -> np.ndarray:
+        """
+        Reads a single frame synchronously from the camera.
+
+        This is a blocking call. It waits for the next available frame from the
+        camera hardware via OpenCV.
+
+        Args:
+            color_mode (Optional[ColorMode]): If specified, overrides the default
+                color mode (`self.color_mode`) for this read operation (e.g.,
+                request RGB even if default is BGR).
+
+        Returns:
+            np.ndarray: The captured frame as a NumPy array in the format
+                       (height, width, channels), using the specified or default
+                       color mode and applying any configured rotation.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is not connected.
+            RuntimeError: If reading the frame from the camera fails or if the
+                          received frame dimensions don't match expectations before rotation.
+            ValueError: If an invalid `color_mode` is requested.
         """
         if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
-            )
+            raise DeviceNotConnectedError(f"{self} is not connected.")
 
         start_time = time.perf_counter()
 
-        ret, color_image = self.camera.read()
+        ret, frame = self.videocapture.read()
+
+        if not ret or frame is None:
+            raise RuntimeError(f"{self} read failed (status={ret}).")
 
-        if not ret:
-            raise OSError(f"Can't capture color image from camera {self.camera_index}.")
+        processed_frame = self._postprocess_image(frame, color_mode)
 
-        requested_color_mode = self.color_mode if temporary_color_mode is None else temporary_color_mode
+        read_duration_ms = (time.perf_counter() - start_time) * 1e3
+        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
 
-        if requested_color_mode not in ["rgb", "bgr"]:
+        return processed_frame
+
+    def _postprocess_image(self, image: np.ndarray, color_mode: ColorMode | None = None) -> np.ndarray:
+        """
+        Applies color conversion, dimension validation, and rotation to a raw frame.
+
+        Args:
+            image (np.ndarray): The raw image frame (expected BGR format from OpenCV).
+            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
+                                             uses the instance's default `self.color_mode`.
+
+        Returns:
+            np.ndarray: The processed image frame.
+
+        Raises:
+            ValueError: If the requested `color_mode` is invalid.
+            RuntimeError: If the raw frame dimensions do not match the configured
+                          `width` and `height`.
+        """
+        requested_color_mode = self.color_mode if color_mode is None else color_mode
+
+        if requested_color_mode not in (ColorMode.RGB, ColorMode.BGR):
             raise ValueError(
-                f"Expected color values are 'rgb' or 'bgr', but {requested_color_mode} is provided."
+                f"Invalid color mode '{requested_color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
             )
 
-        # OpenCV uses BGR format as default (blue, green, red) for all operations, including displaying images.
-        # However, Deep Learning framework such as LeRobot uses RGB format as default to train neural networks,
-        # so we convert the image color from BGR to RGB.
-        if requested_color_mode == "rgb":
-            if self.mock:
-                import tests.cameras.mock_cv2 as cv2
-            else:
-                import cv2
-
-            color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
+        h, w, c = image.shape
 
-        h, w, _ = color_image.shape
         if h != self.capture_height or w != self.capture_width:
-            raise OSError(
-                f"Can't capture color image with expected height and width ({self.height} x {self.width}). ({h} x {w}) returned instead."
+            raise RuntimeError(
+                f"{self} frame width={w} or height={h} do not match configured width={self.capture_width} or height={self.capture_height}."
             )
 
-        if self.rotation is not None:
-            color_image = cv2.rotate(color_image, self.rotation)
+        if c != 3:
+            raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")
 
-        # log the number of seconds it took to read the image
-        self.logs["delta_timestamp_s"] = time.perf_counter() - start_time
+        processed_image = image
+        if requested_color_mode == ColorMode.RGB:
+            processed_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
-        # log the utc time at which the image was received
-        self.logs["timestamp_utc"] = capture_timestamp_utc()
+        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+            processed_image = cv2.rotate(processed_image, self.rotation)
 
-        self.color_image = color_image
+        return processed_image
 
-        return color_image
+    def _read_loop(self):
+        """
+        Internal loop run by the background thread for asynchronous reading.
 
-    def read_loop(self):
+        Continuously reads frames from the camera using the synchronous `read()`
+        method and places the latest frame into the `frame_queue`. It overwrites
+        any previous frame in the queue.
+        """
         while not self.stop_event.is_set():
             try:
-                self.color_image = self.read()
+                color_image = self.read()
+
+                with self.frame_lock:
+                    self.latest_frame = color_image
+                self.new_frame_event.set()
+
+            except DeviceNotConnectedError:
+                break
             except Exception as e:
-                print(f"Error reading in thread: {e}")
+                logger.warning(f"Error reading frame in background thread for {self}: {e}")
+
+    def _start_read_thread(self) -> None:
+        """Starts or restarts the background read thread if it's not running."""
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=0.1)
+        if self.stop_event is not None:
+            self.stop_event.set()
+
+        self.stop_event = Event()
+        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
+        self.thread.daemon = True
+        self.thread.start()
+
+    def _stop_read_thread(self) -> None:
+        """Signals the background read thread to stop and waits for it to join."""
+        if self.stop_event is not None:
+            self.stop_event.set()
+
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=2.0)
+
+        self.thread = None
+        self.stop_event = None
+
+    def async_read(self, timeout_ms: float = 2000) -> np.ndarray:
+        """
+        Reads the latest available frame asynchronously.
+
+        This method retrieves the most recent frame captured by the background
+        read thread. It does not block waiting for the camera hardware directly,
+        only waits for a frame to appear in the internal queue up to the specified
+        timeout.
+
+        Args:
+            timeout_ms (float): Maximum time in milliseconds to wait for a frame
+                to become available in the queue. Defaults to 2000ms (2 seconds).
 
-    def async_read(self):
+        Returns:
+            np.ndarray: The latest captured frame as a NumPy array in the format
+                       (height, width, channels), processed according to configuration.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is not connected.
+            TimeoutError: If no frame becomes available within the specified timeout.
+            RuntimeError: If an unexpected error occurs while retrieving from the queue.
+        """
         if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+
+        if self.thread is None or not self.thread.is_alive():
+            self._start_read_thread()
+
+        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
+            thread_alive = self.thread is not None and self.thread.is_alive()
+            raise TimeoutError(
+                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
+                f"Read thread alive: {thread_alive}."
             )
 
-        if self.thread is None:
-            self.stop_event = threading.Event()
-            self.thread = Thread(target=self.read_loop, args=())
-            self.thread.daemon = True
-            self.thread.start()
+        with self.frame_lock:
+            frame = self.latest_frame
+            self.new_frame_event.clear()
 
-        num_tries = 0
-        while True:
-            if self.color_image is not None:
-                return self.color_image
+        if frame is None:
+            raise RuntimeError(f"Internal error: Event set but no frame available for {self}.")
 
-            time.sleep(1 / self.fps)
-            num_tries += 1
-            if num_tries > self.fps * 2:
-                raise TimeoutError("Timed out waiting for async_read() to start.")
+        return frame
 
     def disconnect(self):
-        if not self.is_connected:
-            raise DeviceNotConnectedError(
-                f"OpenCVCamera({self.camera_index}) is not connected. Try running `camera.connect()` first."
-            )
+        """
+        Disconnects from the camera and cleans up resources.
+
+        Stops the background read thread (if running) and releases the OpenCV
+        VideoCapture object.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is already disconnected.
+        """
+        if not self.is_connected and self.thread is None:
+            raise DeviceNotConnectedError(f"{self} not connected.")
 
         if self.thread is not None:
-            self.stop_event.set()
-            self.thread.join()  # wait for the thread to finish
-            self.thread = None
-            self.stop_event = None
-
-        self.camera.release()
-        self.camera = None
-        self.is_connected = False
-
-    def __del__(self):
-        if getattr(self, "is_connected", False):
-            self.disconnect()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Save a few frames using `OpenCVCamera` for all cameras connected to the computer, or a selected subset."
-    )
-    parser.add_argument(
-        "--camera-ids",
-        type=int,
-        nargs="*",
-        default=None,
-        help="List of camera indices used to instantiate the `OpenCVCamera`. If not provided, find and use all available camera indices.",
-    )
-    parser.add_argument(
-        "--fps",
-        type=int,
-        default=None,
-        help="Set the number of frames recorded per seconds for all cameras. If not provided, use the default fps of each camera.",
-    )
-    parser.add_argument(
-        "--width",
-        type=int,
-        default=None,
-        help="Set the width for all cameras. If not provided, use the default width of each camera.",
-    )
-    parser.add_argument(
-        "--height",
-        type=int,
-        default=None,
-        help="Set the height for all cameras. If not provided, use the default height of each camera.",
-    )
-    parser.add_argument(
-        "--images-dir",
-        type=Path,
-        default="outputs/images_from_opencv_cameras",
-        help="Set directory to save a few frames for each camera.",
-    )
-    parser.add_argument(
-        "--record-time-s",
-        type=float,
-        default=4.0,
-        help="Set the number of seconds used to record the frames. By default, 2 seconds.",
-    )
-    args = parser.parse_args()
-    save_images_from_cameras(**vars(args))
+            self._stop_read_thread()
+
+        if self.videocapture is not None:
+            self.videocapture.release()
+            self.videocapture = None
+
+        logger.info(f"{self} disconnected.")
diff --git a/lerobot/common/cameras/opencv/configuration_opencv.py b/lerobot/common/cameras/opencv/configuration_opencv.py
index 983199bfda..3ac92de36e 100644
--- a/lerobot/common/cameras/opencv/configuration_opencv.py
+++ b/lerobot/common/cameras/opencv/configuration_opencv.py
@@ -1,38 +1,73 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from dataclasses import dataclass
+from pathlib import Path
 
-from ..configs import CameraConfig
+from ..configs import CameraConfig, ColorMode, Cv2Rotation
 
 
 @CameraConfig.register_subclass("opencv")
 @dataclass
 class OpenCVCameraConfig(CameraConfig):
-    """
-    Example of tested options for Intel Real Sense D405:
+    """Configuration class for OpenCV-based camera devices or video files.
+
+    This class provides configuration options for cameras accessed through OpenCV,
+    supporting both physical camera devices and video files. It includes settings
+    for resolution, frame rate, color mode, and image rotation.
 
+    Example configurations:
     ```python
-    OpenCVCameraConfig(0, 30, 640, 480)
-    OpenCVCameraConfig(0, 60, 640, 480)
-    OpenCVCameraConfig(0, 90, 640, 480)
-    OpenCVCameraConfig(0, 30, 1280, 720)
+    # Basic configurations
+    OpenCVCameraConfig(0, 30, 1280, 720)   # 1280x720 @ 30FPS
+    OpenCVCameraConfig(/dev/video4, 60, 640, 480)   # 640x480 @ 60FPS
+
+    # Advanced configurations
+    OpenCVCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
     ```
+
+    Attributes:
+        index_or_path: Either an integer representing the camera device index,
+                      or a Path object pointing to a video file.
+        fps: Requested frames per second for the color stream.
+        width: Requested frame width in pixels for the color stream.
+        height: Requested frame height in pixels for the color stream.
+        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
+        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
+        warmup_s: Time reading frames before returning from connect (in seconds)
+
+    Note:
+        - Only 3-channel color output (RGB/BGR) is currently supported.
     """
 
-    camera_index: int
-    fps: int | None = None
-    width: int | None = None
-    height: int | None = None
-    color_mode: str = "rgb"
-    channels: int | None = None
-    rotation: int | None = None
-    mock: bool = False
+    index_or_path: int | Path
+    color_mode: ColorMode = ColorMode.RGB
+    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
+    warmup_s: int = 1
 
     def __post_init__(self):
-        if self.color_mode not in ["rgb", "bgr"]:
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
             raise ValueError(
-                f"`color_mode` is expected to be 'rgb' or 'bgr', but {self.color_mode} is provided."
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
             )
 
-        self.channels = 3
-
-        if self.rotation not in [-90, None, 90, 180]:
-            raise ValueError(f"`rotation` must be in [-90, None, 90, 180] (got {self.rotation})")
+        if self.rotation not in (
+            Cv2Rotation.NO_ROTATION,
+            Cv2Rotation.ROTATE_90,
+            Cv2Rotation.ROTATE_180,
+            Cv2Rotation.ROTATE_270,
+        ):
+            raise ValueError(
+                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
+            )
diff --git a/lerobot/common/cameras/realsense/__init__.py b/lerobot/common/cameras/realsense/__init__.py
new file mode 100644
index 0000000000..67f2f4000d
--- /dev/null
+++ b/lerobot/common/cameras/realsense/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .camera_realsense import RealSenseCamera
+from .configuration_realsense import RealSenseCameraConfig
diff --git a/lerobot/common/cameras/realsense/camera_realsense.py b/lerobot/common/cameras/realsense/camera_realsense.py
new file mode 100644
index 0000000000..77ec60f5ee
--- /dev/null
+++ b/lerobot/common/cameras/realsense/camera_realsense.py
@@ -0,0 +1,555 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Provides the RealSenseCamera class for capturing frames from Intel RealSense cameras.
+"""
+
+import logging
+import time
+from threading import Event, Lock, Thread
+from typing import Any, Dict, List
+
+import cv2
+import numpy as np
+
+try:
+    import pyrealsense2 as rs
+except Exception as e:
+    logging.info(f"Could not import realsense: {e}")
+
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+
+from ..camera import Camera
+from ..configs import ColorMode
+from ..utils import get_cv2_rotation
+from .configuration_realsense import RealSenseCameraConfig
+
+logger = logging.getLogger(__name__)
+
+
+class RealSenseCamera(Camera):
+    """
+    Manages interactions with Intel RealSense cameras for frame and depth recording.
+
+    This class provides an interface similar to `OpenCVCamera` but tailored for
+    RealSense devices, leveraging the `pyrealsense2` library. It uses the camera's
+    unique serial number for identification, offering more stability than device
+    indices, especially on Linux. It also supports capturing depth maps alongside
+    color frames.
+
+    Use the provided utility script to find available camera indices and default profiles:
+    ```bash
+    python -m lerobot.find_cameras realsense
+    ```
+
+    A `RealSenseCamera` instance requires a configuration object specifying the
+    camera's serial number or a unique device name. If using the name, ensure only
+    one camera with that name is connected.
+
+    The camera's default settings (FPS, resolution, color mode) from the stream
+    profile are used unless overridden in the configuration.
+
+    Example:
+        ```python
+        from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
+        from lerobot.common.cameras import ColorMode, Cv2Rotation
+
+        # Basic usage with serial number
+        config = RealSenseCameraConfig(serial_number_or_name=1234567890) # Replace with actual SN
+        camera = RealSenseCamera(config)
+        camera.connect()
+
+        # Read 1 frame synchronously
+        color_image = camera.read()
+        print(color_image.shape)
+
+        # Read 1 frame asynchronously
+        async_image = camera.async_read()
+
+        # When done, properly disconnect the camera using
+        camera.disconnect()
+
+        # Example with depth capture and custom settings
+        custom_config = RealSenseCameraConfig(
+            serial_number_or_name=1234567890, # Replace with actual SN
+            fps=30,
+            width=1280,
+            height=720,
+            color_mode=ColorMode.BGR, # Request BGR output
+            rotation=Cv2Rotation.NO_ROTATION,
+            use_depth=True
+        )
+        depth_camera = RealSenseCamera(custom_config)
+        depth_camera.connect()
+
+        # Read 1 depth frame
+        depth_map = depth_camera.read_depth()
+
+        # Example using a unique camera name
+        name_config = RealSenseCameraConfig(serial_number_or_name="Intel RealSense D435") # If unique
+        name_camera = RealSenseCamera(name_config)
+        # ... connect, read, disconnect ...
+        ```
+    """
+
+    def __init__(self, config: RealSenseCameraConfig):
+        """
+        Initializes the RealSenseCamera instance.
+
+        Args:
+            config: The configuration settings for the camera.
+        """
+
+        super().__init__(config)
+
+        self.config = config
+
+        if isinstance(config.serial_number_or_name, int):
+            self.serial_number = str(config.serial_number_or_name)
+        else:
+            self.serial_number = self._find_serial_number_from_name(config.serial_number_or_name)
+
+        self.fps = config.fps
+        self.color_mode = config.color_mode
+        self.use_depth = config.use_depth
+        self.warmup_s = config.warmup_s
+
+        self.rs_pipeline: rs.pipeline | None = None
+        self.rs_profile: rs.pipeline_profile | None = None
+
+        self.thread: Thread | None = None
+        self.stop_event: Event | None = None
+        self.frame_lock: Lock = Lock()
+        self.latest_frame: np.ndarray | None = None
+        self.new_frame_event: Event = Event()
+
+        self.rotation: int | None = get_cv2_rotation(config.rotation)
+
+        if self.height and self.width:
+            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+                self.capture_width, self.capture_height = self.height, self.width
+            else:
+                self.capture_width, self.capture_height = self.width, self.height
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({self.serial_number})"
+
+    @property
+    def is_connected(self) -> bool:
+        """Checks if the camera pipeline is started and streams are active."""
+        return self.rs_pipeline is not None and self.rs_profile is not None
+
+    def connect(self, warmup: bool = True):
+        """
+        Connects to the RealSense camera specified in the configuration.
+
+        Initializes the RealSense pipeline, configures the required streams (color
+        and optionally depth), starts the pipeline, and validates the actual stream settings.
+
+        Raises:
+            DeviceAlreadyConnectedError: If the camera is already connected.
+            ValueError: If the configuration is invalid (e.g., missing serial/name, name not unique).
+            ConnectionError: If the camera is found but fails to start the pipeline or no RealSense devices are detected at all.
+            RuntimeError: If the pipeline starts but fails to apply requested settings.
+        """
+        if self.is_connected:
+            raise DeviceAlreadyConnectedError(f"{self} is already connected.")
+
+        self.rs_pipeline = rs.pipeline()
+        rs_config = rs.config()
+        self._configure_rs_pipeline_config(rs_config)
+
+        try:
+            self.rs_profile = self.rs_pipeline.start(rs_config)
+        except RuntimeError as e:
+            self.rs_profile = None
+            self.rs_pipeline = None
+            raise ConnectionError(
+                f"Failed to open {self}."
+                "Run `python -m lerobot.find_cameras realsense` to find available cameras."
+            ) from e
+
+        self._configure_capture_settings()
+
+        if warmup:
+            time.sleep(
+                1
+            )  # NOTE(Steven): RS cameras need a bit of time to warm up before the first read. If we don't wait, the first read from the warmup will raise.
+            start_time = time.time()
+            while time.time() - start_time < self.warmup_s:
+                self.read()
+                time.sleep(0.1)
+
+        logger.info(f"{self} connected.")
+
+    @staticmethod
+    def find_cameras() -> List[Dict[str, Any]]:
+        """
+        Detects available Intel RealSense cameras connected to the system.
+
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries,
+            where each dictionary contains 'type', 'id' (serial number), 'name',
+            firmware version, USB type, and other available specs, and the default profile properties (width, height, fps, format).
+
+        Raises:
+            OSError: If pyrealsense2 is not installed.
+            ImportError: If pyrealsense2 is not installed.
+        """
+        found_cameras_info = []
+        context = rs.context()
+        devices = context.query_devices()
+
+        for device in devices:
+            camera_info = {
+                "name": device.get_info(rs.camera_info.name),
+                "type": "RealSense",
+                "id": device.get_info(rs.camera_info.serial_number),
+                "firmware_version": device.get_info(rs.camera_info.firmware_version),
+                "usb_type_descriptor": device.get_info(rs.camera_info.usb_type_descriptor),
+                "physical_port": device.get_info(rs.camera_info.physical_port),
+                "product_id": device.get_info(rs.camera_info.product_id),
+                "product_line": device.get_info(rs.camera_info.product_line),
+            }
+
+            # Get stream profiles for each sensor
+            sensors = device.query_sensors()
+            for sensor in sensors:
+                profiles = sensor.get_stream_profiles()
+
+                for profile in profiles:
+                    if profile.is_video_stream_profile() and profile.is_default():
+                        vprofile = profile.as_video_stream_profile()
+                        stream_info = {
+                            "stream_type": vprofile.stream_name(),
+                            "format": vprofile.format().name,
+                            "width": vprofile.width(),
+                            "height": vprofile.height(),
+                            "fps": vprofile.fps(),
+                        }
+                        camera_info["default_stream_profile"] = stream_info
+
+            found_cameras_info.append(camera_info)
+
+        return found_cameras_info
+
+    def _find_serial_number_from_name(self, name: str) -> str:
+        """Finds the serial number for a given unique camera name."""
+        camera_infos = self.find_cameras()
+        found_devices = [cam for cam in camera_infos if str(cam["name"]) == name]
+
+        if not found_devices:
+            available_names = [cam["name"] for cam in camera_infos]
+            raise ValueError(
+                f"No RealSense camera found with name '{name}'. Available camera names: {available_names}"
+            )
+
+        if len(found_devices) > 1:
+            serial_numbers = [dev["serial_number"] for dev in found_devices]
+            raise ValueError(
+                f"Multiple RealSense cameras found with name '{name}'. "
+                f"Please use a unique serial number instead. Found SNs: {serial_numbers}"
+            )
+
+        serial_number = str(found_devices[0]["serial_number"])
+        return serial_number
+
+    def _configure_rs_pipeline_config(self, rs_config):
+        """Creates and configures the RealSense pipeline configuration object."""
+        rs.config.enable_device(rs_config, self.serial_number)
+
+        if self.width and self.height and self.fps:
+            rs_config.enable_stream(
+                rs.stream.color, self.capture_width, self.capture_height, rs.format.rgb8, self.fps
+            )
+            if self.use_depth:
+                rs_config.enable_stream(
+                    rs.stream.depth, self.capture_width, self.capture_height, rs.format.z16, self.fps
+                )
+        else:
+            rs_config.enable_stream(rs.stream.color)
+            if self.use_depth:
+                rs_config.enable_stream(rs.stream.depth)
+
+    def _configure_capture_settings(self) -> None:
+        """Sets fps, width, and height from device stream if not already configured.
+
+        Uses the color stream profile to update unset attributes. Handles rotation by
+        swapping width/height when needed. Original capture dimensions are always stored.
+
+        Raises:
+            DeviceNotConnectedError: If device is not connected.
+        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"Cannot validate settings for {self} as it is not connected.")
+
+        stream = self.rs_profile.get_stream(rs.stream.color).as_video_stream_profile()
+
+        if self.fps is None:
+            self.fps = stream.fps()
+
+        if self.width is None or self.height is None:
+            actual_width = int(round(stream.width()))
+            actual_height = int(round(stream.height()))
+            if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+                self.width, self.height = actual_height, actual_width
+                self.capture_width, self.capture_height = actual_width, actual_height
+            else:
+                self.width, self.height = actual_width, actual_height
+                self.capture_width, self.capture_height = actual_width, actual_height
+
+    def read_depth(self, timeout_ms: int = 100) -> np.ndarray:
+        """
+        Reads a single frame (depth) synchronously from the camera.
+
+        This is a blocking call. It waits for a coherent set of frames (depth)
+        from the camera hardware via the RealSense pipeline.
+
+        Args:
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms.
+
+        Returns:
+            np.ndarray: The depth map as a NumPy array (height, width)
+                  of type `np.uint16` (raw depth values in millimeters) and rotation.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is not connected.
+            RuntimeError: If reading frames from the pipeline fails or frames are invalid.
+        """
+
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+        if not self.use_depth:
+            raise RuntimeError(
+                f"Failed to capture depth frame '.read_depth()'. Depth stream is not enabled for {self}."
+            )
+
+        start_time = time.perf_counter()
+
+        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
+
+        if not ret or frame is None:
+            raise RuntimeError(f"{self} read_depth failed (status={ret}).")
+
+        depth_frame = frame.get_depth_frame()
+        depth_map = np.asanyarray(depth_frame.get_data())
+
+        depth_map_processed = self._postprocess_image(depth_map, depth_frame=True)
+
+        read_duration_ms = (time.perf_counter() - start_time) * 1e3
+        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
+
+        return depth_map_processed
+
+    def read(self, color_mode: ColorMode | None = None, timeout_ms: int = 100) -> np.ndarray:
+        """
+        Reads a single frame (color) synchronously from the camera.
+
+        This is a blocking call. It waits for a coherent set of frames (color)
+        from the camera hardware via the RealSense pipeline.
+
+        Args:
+            timeout_ms (int): Maximum time in milliseconds to wait for a frame. Defaults to 100ms.
+
+        Returns:
+            np.ndarray: The captured color frame as a NumPy array
+              (height, width, channels), processed according to `color_mode` and rotation.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is not connected.
+            RuntimeError: If reading frames from the pipeline fails or frames are invalid.
+            ValueError: If an invalid `color_mode` is requested.
+        """
+
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+
+        start_time = time.perf_counter()
+
+        ret, frame = self.rs_pipeline.try_wait_for_frames(timeout_ms=timeout_ms)
+
+        if not ret or frame is None:
+            raise RuntimeError(f"{self} read failed (status={ret}).")
+
+        color_frame = frame.get_color_frame()
+        color_image_raw = np.asanyarray(color_frame.get_data())
+
+        color_image_processed = self._postprocess_image(color_image_raw, color_mode)
+
+        read_duration_ms = (time.perf_counter() - start_time) * 1e3
+        logger.debug(f"{self} read took: {read_duration_ms:.1f}ms")
+
+        return color_image_processed
+
+    def _postprocess_image(
+        self, image: np.ndarray, color_mode: ColorMode | None = None, depth_frame: bool = False
+    ) -> np.ndarray:
+        """
+        Applies color conversion, dimension validation, and rotation to a raw color frame.
+
+        Args:
+            image (np.ndarray): The raw image frame (expected RGB format from RealSense).
+            color_mode (Optional[ColorMode]): The target color mode (RGB or BGR). If None,
+                                             uses the instance's default `self.color_mode`.
+
+        Returns:
+            np.ndarray: The processed image frame according to `self.color_mode` and `self.rotation`.
+
+        Raises:
+            ValueError: If the requested `color_mode` is invalid.
+            RuntimeError: If the raw frame dimensions do not match the configured
+                          `width` and `height`.
+        """
+
+        if color_mode and color_mode not in (ColorMode.RGB, ColorMode.BGR):
+            raise ValueError(
+                f"Invalid requested color mode '{color_mode}'. Expected {ColorMode.RGB} or {ColorMode.BGR}."
+            )
+
+        if depth_frame:
+            h, w = image.shape
+        else:
+            h, w, c = image.shape
+
+            if c != 3:
+                raise RuntimeError(f"{self} frame channels={c} do not match expected 3 channels (RGB/BGR).")
+
+        if h != self.capture_height or w != self.capture_width:
+            raise RuntimeError(
+                f"{self} frame width={w} or height={h} do not match configured width={self.capture_width} or height={self.capture_height}."
+            )
+
+        processed_image = image
+        if self.color_mode == ColorMode.BGR:
+            processed_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+        if self.rotation in [cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_90_COUNTERCLOCKWISE]:
+            processed_image = cv2.rotate(processed_image, self.rotation)
+
+        return processed_image
+
+    def _read_loop(self):
+        """
+        Internal loop run by the background thread for asynchronous reading.
+
+        Continuously reads frames (color and optional depth) using `read()`
+        and places the latest result (single image or tuple) into the `frame_queue`.
+        It overwrites any previous frame in the queue.
+        """
+        while not self.stop_event.is_set():
+            try:
+                color_image = self.read(timeout_ms=500)
+
+                with self.frame_lock:
+                    self.latest_frame = color_image
+                self.new_frame_event.set()
+
+            except DeviceNotConnectedError:
+                break
+            except Exception as e:
+                logger.warning(f"Error reading frame in background thread for {self}: {e}")
+
+    def _start_read_thread(self) -> None:
+        """Starts or restarts the background read thread if it's not running."""
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=0.1)
+        if self.stop_event is not None:
+            self.stop_event.set()
+
+        self.stop_event = Event()
+        self.thread = Thread(target=self._read_loop, args=(), name=f"{self}_read_loop")
+        self.thread.daemon = True
+        self.thread.start()
+
+    def _stop_read_thread(self):
+        """Signals the background read thread to stop and waits for it to join."""
+        if self.stop_event is not None:
+            self.stop_event.set()
+
+        if self.thread is not None and self.thread.is_alive():
+            self.thread.join(timeout=2.0)
+
+        self.thread = None
+        self.stop_event = None
+
+    # NOTE(Steven): Missing implementation for depth for now
+    def async_read(self, timeout_ms: float = 100) -> np.ndarray:
+        """
+        Reads the latest available frame data (color or color+depth) asynchronously.
+
+        This method retrieves the most recent frame captured by the background
+        read thread. It does not block waiting for the camera hardware directly,
+        only waits for a frame to appear in the internal queue up to the specified
+        timeout.
+
+        Args:
+            timeout_ms (float): Maximum time in milliseconds to wait for a frame
+                to become available in the queue. Defaults to 100ms (0.1 seconds).
+
+        Returns:
+            np.ndarray:
+            The latest captured frame data (color image), processed according to configuration.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is not connected.
+            TimeoutError: If no frame data becomes available within the specified timeout.
+            RuntimeError: If the background thread died unexpectedly or another queue error occurs.
+        """
+        if not self.is_connected:
+            raise DeviceNotConnectedError(f"{self} is not connected.")
+
+        if self.thread is None or not self.thread.is_alive():
+            self._start_read_thread()
+
+        if not self.new_frame_event.wait(timeout=timeout_ms / 1000.0):
+            thread_alive = self.thread is not None and self.thread.is_alive()
+            raise TimeoutError(
+                f"Timed out waiting for frame from camera {self} after {timeout_ms} ms. "
+                f"Read thread alive: {thread_alive}."
+            )
+
+        with self.frame_lock:
+            frame = self.latest_frame
+            self.new_frame_event.clear()
+
+        if frame is None:
+            raise RuntimeError(f"Internal error: Event set but no frame available for {self}.")
+
+        return frame
+
+    def disconnect(self):
+        """
+        Disconnects from the camera, stops the pipeline, and cleans up resources.
+
+        Stops the background read thread (if running) and stops the RealSense pipeline.
+
+        Raises:
+            DeviceNotConnectedError: If the camera is already disconnected (pipeline not running).
+        """
+
+        if not self.is_connected and self.thread is None:
+            raise DeviceNotConnectedError(
+                f"Attempted to disconnect {self}, but it appears already disconnected."
+            )
+
+        if self.thread is not None:
+            self._stop_read_thread()
+
+        if self.rs_pipeline is not None:
+            self.rs_pipeline.stop()
+            self.rs_pipeline = None
+            self.rs_profile = None
+
+        logger.info(f"{self} disconnected.")
diff --git a/lerobot/common/cameras/realsense/configuration_realsense.py b/lerobot/common/cameras/realsense/configuration_realsense.py
new file mode 100644
index 0000000000..0183965a6c
--- /dev/null
+++ b/lerobot/common/cameras/realsense/configuration_realsense.py
@@ -0,0 +1,82 @@
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+
+from ..configs import CameraConfig, ColorMode, Cv2Rotation
+
+
+@CameraConfig.register_subclass("intelrealsense")
+@dataclass
+class RealSenseCameraConfig(CameraConfig):
+    """Configuration class for Intel RealSense cameras.
+
+    This class provides specialized configuration options for Intel RealSense cameras,
+    including support for depth sensing and device identification via serial number or name.
+
+    Example configurations for Intel RealSense D405:
+    ```python
+    # Basic configurations
+    RealSenseCameraConfig(128422271347, 30, 1280, 720)   # 1280x720 @ 30FPS
+    RealSenseCameraConfig(128422271347, 60, 640, 480)   # 640x480 @ 60FPS
+
+    # Advanced configurations
+    RealSenseCameraConfig(128422271347, 30, 640, 480, use_depth=True)  # With depth sensing
+    RealSenseCameraConfig(128422271347, 30, 640, 480, rotation=Cv2Rotation.ROTATE_90)     # With 90° rotation
+    ```
+
+    Attributes:
+        fps: Requested frames per second for the color stream.
+        width: Requested frame width in pixels for the color stream.
+        height: Requested frame height in pixels for the color stream.
+        serial_number_or_name: Unique serial number or human-readable name to identify the camera.
+        color_mode: Color mode for image output (RGB or BGR). Defaults to RGB.
+        use_depth: Whether to enable depth stream. Defaults to False.
+        rotation: Image rotation setting (0°, 90°, 180°, or 270°). Defaults to no rotation.
+        warmup_s: Time reading frames before returning from connect (in seconds)
+
+    Note:
+        - Either name or serial_number must be specified.
+        - Depth stream configuration (if enabled) will use the same FPS as the color stream.
+        - The actual resolution and FPS may be adjusted by the camera to the nearest supported mode.
+        - For `fps`, `width` and `height`, either all of them need to be set, or none of them.
+    """
+
+    serial_number_or_name: int | str
+    color_mode: ColorMode = ColorMode.RGB
+    use_depth: bool = False
+    rotation: Cv2Rotation = Cv2Rotation.NO_ROTATION
+    warmup_s: int = 1
+
+    def __post_init__(self):
+        if self.color_mode not in (ColorMode.RGB, ColorMode.BGR):
+            raise ValueError(
+                f"`color_mode` is expected to be {ColorMode.RGB.value} or {ColorMode.BGR.value}, but {self.color_mode} is provided."
+            )
+
+        if self.rotation not in (
+            Cv2Rotation.NO_ROTATION,
+            Cv2Rotation.ROTATE_90,
+            Cv2Rotation.ROTATE_180,
+            Cv2Rotation.ROTATE_270,
+        ):
+            raise ValueError(
+                f"`rotation` is expected to be in {(Cv2Rotation.NO_ROTATION, Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_180, Cv2Rotation.ROTATE_270)}, but {self.rotation} is provided."
+            )
+
+        values = (self.fps, self.width, self.height)
+        if any(v is not None for v in values) and any(v is None for v in values):
+            raise ValueError(
+                "For `fps`, `width` and `height`, either all of them need to be set, or none of them."
+            )
diff --git a/lerobot/common/cameras/utils.py b/lerobot/common/cameras/utils.py
index f0b6ce5f27..f8bbd6e704 100644
--- a/lerobot/common/cameras/utils.py
+++ b/lerobot/common/cameras/utils.py
@@ -1,5 +1,27 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import platform
+from pathlib import Path
+from typing import TypeAlias
+
 from .camera import Camera
-from .configs import CameraConfig
+from .configs import CameraConfig, Cv2Rotation
+
+IndexOrPath: TypeAlias = int | Path
 
 
 def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[str, Camera]:
@@ -12,10 +34,32 @@ def make_cameras_from_configs(camera_configs: dict[str, CameraConfig]) -> dict[s
             cameras[key] = OpenCVCamera(cfg)
 
         elif cfg.type == "intelrealsense":
-            from .intel.camera_realsense import RealSenseCamera
+            from .realsense.camera_realsense import RealSenseCamera
 
             cameras[key] = RealSenseCamera(cfg)
         else:
             raise ValueError(f"The motor type '{cfg.type}' is not valid.")
 
     return cameras
+
+
+def get_cv2_rotation(rotation: Cv2Rotation) -> int | None:
+    import cv2
+
+    if rotation == Cv2Rotation.ROTATE_90:
+        return cv2.ROTATE_90_CLOCKWISE
+    elif rotation == Cv2Rotation.ROTATE_180:
+        return cv2.ROTATE_180
+    elif rotation == Cv2Rotation.ROTATE_270:
+        return cv2.ROTATE_90_COUNTERCLOCKWISE
+    else:
+        return None
+
+
+def get_cv2_backend() -> int:
+    import cv2
+
+    if platform.system() == "Windows":
+        return cv2.CAP_AVFOUNDATION
+    else:
+        return cv2.CAP_ANY
diff --git a/lerobot/common/robots/config.py b/lerobot/common/robots/config.py
index ca8b5b0d26..a85a831693 100644
--- a/lerobot/common/robots/config.py
+++ b/lerobot/common/robots/config.py
@@ -26,6 +26,15 @@ class RobotConfig(draccus.ChoiceRegistry, abc.ABC):
     # Directory to store calibration file
     calibration_dir: Path | None = None
 
+    def __post_init__(self):
+        if hasattr(self, "cameras") and self.cameras:
+            for _, config in self.cameras.items():
+                for attr in ["width", "height", "fps"]:
+                    if getattr(config, attr) is None:
+                        raise ValueError(
+                            f"Specifying '{attr}' is required for the camera to be used in a robot"
+                        )
+
     @property
     def type(self) -> str:
         return self.get_choice_name(self.__class__)
diff --git a/lerobot/common/robots/stretch3/configuration_stretch3.py b/lerobot/common/robots/stretch3/configuration_stretch3.py
index 4508079044..e62e4fa016 100644
--- a/lerobot/common/robots/stretch3/configuration_stretch3.py
+++ b/lerobot/common/robots/stretch3/configuration_stretch3.py
@@ -15,8 +15,8 @@
 from dataclasses import dataclass, field
 
 from lerobot.common.cameras import CameraConfig
-from lerobot.common.cameras.intel import RealSenseCameraConfig
 from lerobot.common.cameras.opencv import OpenCVCameraConfig
+from lerobot.common.cameras.realsense import RealSenseCameraConfig
 
 from ..config import RobotConfig
 
@@ -33,7 +33,7 @@ class Stretch3RobotConfig(RobotConfig):
     cameras: dict[str, CameraConfig] = field(
         default_factory=lambda: {
             "navigation": OpenCVCameraConfig(
-                camera_index="/dev/hello-nav-head-camera",
+                index_or_path="/dev/hello-nav-head-camera",
                 fps=10,
                 width=1280,
                 height=720,
diff --git a/lerobot/find_cameras.py b/lerobot/find_cameras.py
new file mode 100644
index 0000000000..3b5c4af3c0
--- /dev/null
+++ b/lerobot/find_cameras.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Helper to find the camera devices available in your system.
+
+Example:
+
+```shell
+python -m lerobot.find_cameras
+```
+"""
+
+# NOTE(Steven): RealSense can also be identified/opened as OpenCV cameras. If you know the camera is a RealSense, use the `lerobot.find_cameras realsense` flag to avoid confusion.
+# NOTE(Steven): macOS cameras sometimes report different FPS at init time, not an issue here as we don't specify FPS when opening the cameras, but the information displayed might not be truthful.
+
+import argparse
+import concurrent.futures
+import logging
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+import numpy as np
+from PIL import Image
+
+from lerobot.common.cameras.configs import ColorMode
+from lerobot.common.cameras.opencv.camera_opencv import OpenCVCamera
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig
+from lerobot.common.cameras.realsense.camera_realsense import RealSenseCamera
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig
+
+logger = logging.getLogger(__name__)
+
+
+def find_all_opencv_cameras() -> List[Dict[str, Any]]:
+    """
+    Finds all available OpenCV cameras plugged into the system.
+
+    Returns:
+        A list of all available OpenCV cameras with their metadata.
+    """
+    all_opencv_cameras_info: List[Dict[str, Any]] = []
+    logger.info("Searching for OpenCV cameras...")
+    try:
+        opencv_cameras = OpenCVCamera.find_cameras()
+        for cam_info in opencv_cameras:
+            all_opencv_cameras_info.append(cam_info)
+        logger.info(f"Found {len(opencv_cameras)} OpenCV cameras.")
+    except Exception as e:
+        logger.error(f"Error finding OpenCV cameras: {e}")
+
+    return all_opencv_cameras_info
+
+
+def find_all_realsense_cameras() -> List[Dict[str, Any]]:
+    """
+    Finds all available RealSense cameras plugged into the system.
+
+    Returns:
+        A list of all available RealSense cameras with their metadata.
+    """
+    all_realsense_cameras_info: List[Dict[str, Any]] = []
+    logger.info("Searching for RealSense cameras...")
+    try:
+        realsense_cameras = RealSenseCamera.find_cameras()
+        for cam_info in realsense_cameras:
+            all_realsense_cameras_info.append(cam_info)
+        logger.info(f"Found {len(realsense_cameras)} RealSense cameras.")
+    except ImportError:
+        logger.warning("Skipping RealSense camera search: pyrealsense2 library not found or not importable.")
+    except Exception as e:
+        logger.error(f"Error finding RealSense cameras: {e}")
+
+    return all_realsense_cameras_info
+
+
+def find_and_print_cameras(camera_type_filter: str | None = None) -> List[Dict[str, Any]]:
+    """
+    Finds available cameras based on an optional filter and prints their information.
+
+    Args:
+        camera_type_filter: Optional string to filter cameras ("realsense" or "opencv").
+                            If None, lists all cameras.
+
+    Returns:
+        A list of all available cameras matching the filter, with their metadata.
+    """
+    all_cameras_info: List[Dict[str, Any]] = []
+
+    if camera_type_filter:
+        camera_type_filter = camera_type_filter.lower()
+
+    if camera_type_filter is None or camera_type_filter == "opencv":
+        all_cameras_info.extend(find_all_opencv_cameras())
+    if camera_type_filter is None or camera_type_filter == "realsense":
+        all_cameras_info.extend(find_all_realsense_cameras())
+
+    if not all_cameras_info:
+        if camera_type_filter:
+            logger.warning(f"No {camera_type_filter} cameras were detected.")
+        else:
+            logger.warning("No cameras (OpenCV or RealSense) were detected.")
+    else:
+        print("\n--- Detected Cameras ---")
+        for i, cam_info in enumerate(all_cameras_info):
+            print(f"Camera #{i}:")
+            for key, value in cam_info.items():
+                if key == "default_stream_profile" and isinstance(value, dict):
+                    print(f"  {key.replace('_', ' ').capitalize()}:")
+                    for sub_key, sub_value in value.items():
+                        print(f"    {sub_key.capitalize()}: {sub_value}")
+                else:
+                    print(f"  {key.replace('_', ' ').capitalize()}: {value}")
+            print("-" * 20)
+    return all_cameras_info
+
+
+def save_image(
+    img_array: np.ndarray,
+    camera_identifier: str | int,
+    images_dir: Path,
+    camera_type: str,
+):
+    """
+    Saves a single image to disk using Pillow. Handles color conversion if necessary.
+    """
+    try:
+        img = Image.fromarray(img_array, mode="RGB")
+
+        safe_identifier = str(camera_identifier).replace("/", "_").replace("\\", "_")
+        filename_prefix = f"{camera_type.lower()}_{safe_identifier}"
+        filename = f"{filename_prefix}.png"
+
+        path = images_dir / filename
+        path.parent.mkdir(parents=True, exist_ok=True)
+        img.save(str(path))
+        logger.info(f"Saved image: {path}")
+    except Exception as e:
+        logger.error(f"Failed to save image for camera {camera_identifier} (type {camera_type}): {e}")
+
+
+def create_camera_instance(cam_meta: Dict[str, Any]) -> Dict[str, Any] | None:
+    """Create and connect to a camera instance based on metadata."""
+    cam_type = cam_meta.get("type")
+    cam_id = cam_meta.get("id")
+    instance = None
+
+    logger.info(f"Preparing {cam_type} ID {cam_id} with default profile")
+
+    try:
+        if cam_type == "OpenCV":
+            cv_config = OpenCVCameraConfig(
+                index_or_path=cam_id,
+                color_mode=ColorMode.RGB,
+            )
+            instance = OpenCVCamera(cv_config)
+        elif cam_type == "RealSense":
+            rs_config = RealSenseCameraConfig(
+                serial_number_or_name=int(cam_id),
+                color_mode=ColorMode.RGB,
+            )
+            instance = RealSenseCamera(rs_config)
+        else:
+            logger.warning(f"Unknown camera type: {cam_type} for ID {cam_id}. Skipping.")
+            return None
+
+        if instance:
+            logger.info(f"Connecting to {cam_type} camera: {cam_id}...")
+            instance.connect(warmup=False)
+            return {"instance": instance, "meta": cam_meta}
+    except Exception as e:
+        logger.error(f"Failed to connect or configure {cam_type} camera {cam_id}: {e}")
+        if instance and instance.is_connected:
+            instance.disconnect()
+        return None
+
+
+def process_camera_image(
+    cam_dict: Dict[str, Any], output_dir: Path, current_time: float
+) -> concurrent.futures.Future | None:
+    """Capture and process an image from a single camera."""
+    cam = cam_dict["instance"]
+    meta = cam_dict["meta"]
+    cam_type_str = str(meta.get("type", "unknown"))
+    cam_id_str = str(meta.get("id", "unknown"))
+
+    try:
+        image_data = cam.read()
+
+        return save_image(
+            image_data,
+            cam_id_str,
+            output_dir,
+            cam_type_str,
+        )
+    except TimeoutError:
+        logger.warning(
+            f"Timeout reading from {cam_type_str} camera {cam_id_str} at time {current_time:.2f}s."
+        )
+    except Exception as e:
+        logger.error(f"Error reading from {cam_type_str} camera {cam_id_str}: {e}")
+    return None
+
+
+def cleanup_cameras(cameras_to_use: List[Dict[str, Any]]):
+    """Disconnect all cameras."""
+    logger.info(f"Disconnecting {len(cameras_to_use)} cameras...")
+    for cam_dict in cameras_to_use:
+        try:
+            if cam_dict["instance"] and cam_dict["instance"].is_connected:
+                cam_dict["instance"].disconnect()
+        except Exception as e:
+            logger.error(f"Error disconnecting camera {cam_dict['meta'].get('id')}: {e}")
+
+
+def save_images_from_all_cameras(
+    output_dir: Path,
+    record_time_s: float = 2.0,
+    camera_type: str | None = None,
+):
+    """
+    Connects to detected cameras (optionally filtered by type) and saves images from each.
+    Uses default stream profiles for width, height, and FPS.
+
+    Args:
+        output_dir: Directory to save images.
+        record_time_s: Duration in seconds to record images.
+        camera_type: Optional string to filter cameras ("realsense" or "opencv").
+                            If None, uses all detected cameras.
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logger.info(f"Saving images to {output_dir}")
+    all_camera_metadata = find_and_print_cameras(camera_type_filter=camera_type)
+
+    if not all_camera_metadata:
+        logger.warning("No cameras detected matching the criteria. Cannot save images.")
+        return
+
+    cameras_to_use = []
+    for cam_meta in all_camera_metadata:
+        camera_instance = create_camera_instance(cam_meta)
+        if camera_instance:
+            cameras_to_use.append(camera_instance)
+
+    if not cameras_to_use:
+        logger.warning("No cameras could be connected. Aborting image save.")
+        return
+
+    logger.info(f"Starting image capture for {record_time_s} seconds from {len(cameras_to_use)} cameras.")
+    start_time = time.perf_counter()
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=len(cameras_to_use) * 2) as executor:
+        try:
+            while time.perf_counter() - start_time < record_time_s:
+                futures = []
+                current_capture_time = time.perf_counter()
+
+                for cam_dict in cameras_to_use:
+                    future = process_camera_image(cam_dict, output_dir, current_capture_time)
+                    if future:
+                        futures.append(future)
+
+                if futures:
+                    concurrent.futures.wait(futures)
+
+        except KeyboardInterrupt:
+            logger.info("Capture interrupted by user.")
+        finally:
+            print("\nFinalizing image saving...")
+            executor.shutdown(wait=True)
+            cleanup_cameras(cameras_to_use)
+            logger.info(f"Image capture finished. Images saved to {output_dir}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Unified camera utility script for listing cameras and capturing images."
+    )
+
+    parser.add_argument(
+        "camera_type",
+        type=str,
+        nargs="?",
+        default=None,
+        choices=["realsense", "opencv"],
+        help="Specify camera type to capture from (e.g., 'realsense', 'opencv'). Captures from all if omitted.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default="outputs/captured_images",
+        help="Directory to save images. Default: outputs/captured_images",
+    )
+    parser.add_argument(
+        "--record-time-s",
+        type=float,
+        default=6.0,
+        help="Time duration to attempt capturing frames. Default: 6 seconds.",
+    )
+    args = parser.parse_args()
+    save_images_from_all_cameras(**vars(args))
diff --git a/lerobot/record.py b/lerobot/record.py
index 0059752b85..508b09292a 100644
--- a/lerobot/record.py
+++ b/lerobot/record.py
@@ -44,9 +44,9 @@
 
 from lerobot.common.cameras import (  # noqa: F401
     CameraConfig,  # noqa: F401
-    intel,
-    opencv,
 )
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
 from lerobot.common.datasets.image_writer import safe_stop_image_writer
 from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.datasets.utils import build_dataset_frame, hw_to_dataset_features
diff --git a/lerobot/teleoperate.py b/lerobot/teleoperate.py
index f14f9a796e..f3496a69c0 100644
--- a/lerobot/teleoperate.py
+++ b/lerobot/teleoperate.py
@@ -38,7 +38,8 @@
 import numpy as np
 import rerun as rr
 
-from lerobot.common.cameras import intel, opencv  # noqa: F401
+from lerobot.common.cameras.opencv.configuration_opencv import OpenCVCameraConfig  # noqa: F401
+from lerobot.common.cameras.realsense.configuration_realsense import RealSenseCameraConfig  # noqa: F401
 from lerobot.common.robots import (  # noqa: F401
     Robot,
     RobotConfig,
diff --git a/pyproject.toml b/pyproject.toml
index 1578060eb2..c4faa400ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,10 @@ dora = [
 ]
 dynamixel = ["dynamixel-sdk>=3.7.31"]
 feetech = ["feetech-servo-sdk>=1.0.0"]
-intelrealsense = ["pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'"]
+intelrealsense = [
+    "pyrealsense2>=2.55.1.6486 ; sys_platform != 'darwin'",
+    "pyrealsense2-macosx>=2.54 ; sys_platform == 'darwin'",
+]
 pi0 = ["transformers>=4.48.0"]
 pusht = ["gym-pusht>=0.1.5 ; python_version < '4.0'"]
 stretch = [
diff --git a/tests/artifacts/cameras/image_128x128.png b/tests/artifacts/cameras/image_128x128.png
new file mode 100644
index 0000000000..b117f49f27
--- /dev/null
+++ b/tests/artifacts/cameras/image_128x128.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dc9df05797dc0e7b92edc845caab2e4c37c3cfcabb4ee6339c67212b5baba3b
+size 38023
diff --git a/tests/artifacts/cameras/image_160x120.png b/tests/artifacts/cameras/image_160x120.png
new file mode 100644
index 0000000000..cdc681d183
--- /dev/null
+++ b/tests/artifacts/cameras/image_160x120.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e11af87616b83c1cdb30330e951b91e86b51c64a1326e1ba5b4a3fbcdec1a11
+size 55698
diff --git a/tests/artifacts/cameras/image_320x180.png b/tests/artifacts/cameras/image_320x180.png
new file mode 100644
index 0000000000..4cfd511a71
--- /dev/null
+++ b/tests/artifacts/cameras/image_320x180.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8840fb643afe903191248703b1f95a57faf5812ecd9978ac502ee939646fdb2
+size 121115
diff --git a/tests/artifacts/cameras/image_480x270.png b/tests/artifacts/cameras/image_480x270.png
new file mode 100644
index 0000000000..b564d5424c
--- /dev/null
+++ b/tests/artifacts/cameras/image_480x270.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f79d14daafb1c0cf2fec5d46ee8029a73fe357402fdd31a7cd4a4794d7319a7c
+size 260367
diff --git a/tests/artifacts/cameras/test_rs.bag b/tests/artifacts/cameras/test_rs.bag
new file mode 100644
index 0000000000..1b9662c356
--- /dev/null
+++ b/tests/artifacts/cameras/test_rs.bag
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8d6e64d6cb0e02c94ae125630ee758055bd2e695772c0463a30d63ddc6c5e17
+size 3520862
diff --git a/tests/cameras/mock_cv2.py b/tests/cameras/mock_cv2.py
deleted file mode 100644
index eeaf859cc2..0000000000
--- a/tests/cameras/mock_cv2.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from functools import cache
-
-import numpy as np
-
-CAP_V4L2 = 200
-CAP_DSHOW = 700
-CAP_AVFOUNDATION = 1200
-CAP_ANY = -1
-
-CAP_PROP_FPS = 5
-CAP_PROP_FRAME_WIDTH = 3
-CAP_PROP_FRAME_HEIGHT = 4
-COLOR_RGB2BGR = 4
-COLOR_BGR2RGB = 4
-
-ROTATE_90_COUNTERCLOCKWISE = 2
-ROTATE_90_CLOCKWISE = 0
-ROTATE_180 = 1
-
-
-@cache
-def _generate_image(width: int, height: int):
-    return np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
-
-
-def cvtColor(color_image, color_conversion):  # noqa: N802
-    if color_conversion in [COLOR_RGB2BGR, COLOR_BGR2RGB]:
-        return color_image[:, :, [2, 1, 0]]
-    else:
-        raise NotImplementedError(color_conversion)
-
-
-def rotate(color_image, rotation):
-    if rotation is None:
-        return color_image
-    elif rotation == ROTATE_90_CLOCKWISE:
-        return np.rot90(color_image, k=1)
-    elif rotation == ROTATE_180:
-        return np.rot90(color_image, k=2)
-    elif rotation == ROTATE_90_COUNTERCLOCKWISE:
-        return np.rot90(color_image, k=3)
-    else:
-        raise NotImplementedError(rotation)
-
-
-class VideoCapture:
-    def __init__(self, *args, **kwargs):
-        self._mock_dict = {
-            CAP_PROP_FPS: 30,
-            CAP_PROP_FRAME_WIDTH: 640,
-            CAP_PROP_FRAME_HEIGHT: 480,
-        }
-        self._is_opened = True
-
-    def isOpened(self):  # noqa: N802
-        return self._is_opened
-
-    def set(self, propId: int, value: float) -> bool:  # noqa: N803
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        self._mock_dict[propId] = value
-        return True
-
-    def get(self, propId: int) -> float:  # noqa: N803
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        value = self._mock_dict[propId]
-        if value == 0:
-            if propId == CAP_PROP_FRAME_HEIGHT:
-                value = 480
-            elif propId == CAP_PROP_FRAME_WIDTH:
-                value = 640
-        return value
-
-    def read(self):
-        if not self._is_opened:
-            raise RuntimeError("Camera is not opened")
-        h = self.get(CAP_PROP_FRAME_HEIGHT)
-        w = self.get(CAP_PROP_FRAME_WIDTH)
-        ret = True
-        return ret, _generate_image(width=w, height=h)
-
-    def release(self):
-        self._is_opened = False
-
-    def __del__(self):
-        if self._is_opened:
-            self.release()
diff --git a/tests/cameras/mock_pyrealsense2.py b/tests/cameras/mock_pyrealsense2.py
deleted file mode 100644
index c477eb0626..0000000000
--- a/tests/cameras/mock_pyrealsense2.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import enum
-
-import numpy as np
-
-
-class stream(enum.Enum):  # noqa: N801
-    color = 0
-    depth = 1
-
-
-class format(enum.Enum):  # noqa: N801
-    rgb8 = 0
-    z16 = 1
-
-
-class config:  # noqa: N801
-    def enable_device(self, device_id: str):
-        self.device_enabled = device_id
-
-    def enable_stream(self, stream_type: stream, width=None, height=None, color_format=None, fps=None):
-        self.stream_type = stream_type
-        # Overwrite default values when possible
-        self.width = 848 if width is None else width
-        self.height = 480 if height is None else height
-        self.color_format = format.rgb8 if color_format is None else color_format
-        self.fps = 30 if fps is None else fps
-
-
-class RSColorProfile:
-    def __init__(self, config):
-        self.config = config
-
-    def fps(self):
-        return self.config.fps
-
-    def width(self):
-        return self.config.width
-
-    def height(self):
-        return self.config.height
-
-
-class RSColorStream:
-    def __init__(self, config):
-        self.config = config
-
-    def as_video_stream_profile(self):
-        return RSColorProfile(self.config)
-
-
-class RSProfile:
-    def __init__(self, config):
-        self.config = config
-
-    def get_stream(self, color_format):
-        del color_format  # unused
-        return RSColorStream(self.config)
-
-
-class pipeline:  # noqa: N801
-    def __init__(self):
-        self.started = False
-        self.config = None
-
-    def start(self, config):
-        self.started = True
-        self.config = config
-        return RSProfile(self.config)
-
-    def stop(self):
-        if not self.started:
-            raise RuntimeError("You need to start the camera before stop.")
-        self.started = False
-        self.config = None
-
-    def wait_for_frames(self, timeout_ms=50000):
-        del timeout_ms  # unused
-        return RSFrames(self.config)
-
-
-class RSFrames:
-    def __init__(self, config):
-        self.config = config
-
-    def get_color_frame(self):
-        return RSColorFrame(self.config)
-
-    def get_depth_frame(self):
-        return RSDepthFrame(self.config)
-
-
-class RSColorFrame:
-    def __init__(self, config):
-        self.config = config
-
-    def get_data(self):
-        data = np.ones((self.config.height, self.config.width, 3), dtype=np.uint8)
-        # Create a difference between rgb and bgr
-        data[:, :, 0] = 2
-        return data
-
-
-class RSDepthFrame:
-    def __init__(self, config):
-        self.config = config
-
-    def get_data(self):
-        return np.ones((self.config.height, self.config.width), dtype=np.uint16)
-
-
-class RSDevice:
-    def __init__(self):
-        pass
-
-    def get_info(self, camera_info) -> str:
-        del camera_info  # unused
-        # return fake serial number
-        return "123456789"
-
-
-class context:  # noqa: N801
-    def __init__(self):
-        pass
-
-    def query_devices(self):
-        return [RSDevice()]
-
-
-class camera_info:  # noqa: N801
-    # fake name
-    name = "Intel RealSense D435I"
-
-    def __init__(self, serial_number):
-        del serial_number
-        pass
diff --git a/tests/cameras/test_cameras.py b/tests/cameras/test_cameras.py
deleted file mode 100644
index 6dbc716c97..0000000000
--- a/tests/cameras/test_cameras.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Tests for physical cameras and their mocked versions.
-If the physical camera is not connected to the computer, or not working,
-the test will be skipped.
-
-Example of running a specific test:
-```bash
-pytest -sx tests/test_cameras.py::test_camera
-```
-
-Example of running test on a real camera connected to the computer:
-```bash
-pytest -sx 'tests/test_cameras.py::test_camera[opencv-False]'
-pytest -sx 'tests/test_cameras.py::test_camera[intelrealsense-False]'
-```
-
-Example of running test on a mocked version of the camera:
-```bash
-pytest -sx 'tests/test_cameras.py::test_camera[opencv-True]'
-pytest -sx 'tests/test_cameras.py::test_camera[intelrealsense-True]'
-```
-"""
-
-import numpy as np
-import pytest
-
-from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-from tests.utils import TEST_CAMERA_TYPES, make_camera, require_camera
-
-# Maximum absolute difference between two consecutive images recorded by a camera.
-# This value differs with respect to the camera.
-MAX_PIXEL_DIFFERENCE = 25
-
-
-def compute_max_pixel_difference(first_image, second_image):
-    return np.abs(first_image.astype(float) - second_image.astype(float)).max()
-
-
-@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
-@require_camera
-def test_camera(request, camera_type, mock):
-    """Test assumes that `camera.read()` returns the same image when called multiple times in a row.
-    So the environment should not change (you shouldnt be in front of the camera) and the camera should not be moving.
-
-    Warning: The tests worked for a macbookpro camera, but I am getting assertion error (`np.allclose(color_image, async_color_image)`)
-    for my iphone camera and my LG monitor camera.
-    """
-    # TODO(rcadene): measure fps in nightly?
-    # TODO(rcadene): test logs
-
-    if camera_type == "opencv" and not mock:
-        pytest.skip("TODO(rcadene): fix test for opencv physical camera")
-
-    camera_kwargs = {"camera_type": camera_type, "mock": mock}
-
-    # Test instantiating
-    camera = make_camera(**camera_kwargs)
-
-    # Test reading, async reading, disconnecting before connecting raises an error
-    with pytest.raises(DeviceNotConnectedError):
-        camera.read()
-    with pytest.raises(DeviceNotConnectedError):
-        camera.async_read()
-    with pytest.raises(DeviceNotConnectedError):
-        camera.disconnect()
-
-    # Test deleting the object without connecting first
-    del camera
-
-    # Test connecting
-    camera = make_camera(**camera_kwargs)
-    camera.connect()
-    assert camera.is_connected
-    assert camera.fps is not None
-    assert camera.capture_width is not None
-    assert camera.capture_height is not None
-
-    # Test connecting twice raises an error
-    with pytest.raises(DeviceAlreadyConnectedError):
-        camera.connect()
-
-    # Test reading from the camera
-    color_image = camera.read()
-    assert isinstance(color_image, np.ndarray)
-    assert color_image.ndim == 3
-    h, w, c = color_image.shape
-    assert c == 3
-    assert w > h
-
-    # Test read and async_read outputs similar images
-    # ...warming up as the first frames can be black
-    for _ in range(30):
-        camera.read()
-    color_image = camera.read()
-    async_color_image = camera.async_read()
-    error_msg = (
-        "max_pixel_difference between read() and async_read()",
-        compute_max_pixel_difference(color_image, async_color_image),
-    )
-    # TODO(rcadene): properly set `rtol`
-    np.testing.assert_allclose(
-        color_image, async_color_image, rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
-    )
-
-    # Test disconnecting
-    camera.disconnect()
-    assert camera.camera is None
-    assert camera.thread is None
-
-    # Test disconnecting with `__del__`
-    camera = make_camera(**camera_kwargs)
-    camera.connect()
-    del camera
-
-    # Test acquiring a bgr image
-    camera = make_camera(**camera_kwargs, color_mode="bgr")
-    camera.connect()
-    assert camera.color_mode == "bgr"
-    bgr_color_image = camera.read()
-    np.testing.assert_allclose(
-        color_image, bgr_color_image[:, :, [2, 1, 0]], rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
-    )
-    del camera
-
-    # Test acquiring a rotated image
-    camera = make_camera(**camera_kwargs)
-    camera.connect()
-    ori_color_image = camera.read()
-    del camera
-
-    for rotation in [None, 90, 180, -90]:
-        camera = make_camera(**camera_kwargs, rotation=rotation)
-        camera.connect()
-
-        if mock:
-            import tests.cameras.mock_cv2 as cv2
-        else:
-            import cv2
-
-        if rotation is None:
-            manual_rot_img = ori_color_image
-            assert camera.rotation is None
-        elif rotation == 90:
-            manual_rot_img = np.rot90(color_image, k=1)
-            assert camera.rotation == cv2.ROTATE_90_CLOCKWISE
-        elif rotation == 180:
-            manual_rot_img = np.rot90(color_image, k=2)
-            assert camera.rotation == cv2.ROTATE_180
-        elif rotation == -90:
-            manual_rot_img = np.rot90(color_image, k=3)
-            assert camera.rotation == cv2.ROTATE_90_COUNTERCLOCKWISE
-
-        rot_color_image = camera.read()
-
-        np.testing.assert_allclose(
-            rot_color_image, manual_rot_img, rtol=1e-5, atol=MAX_PIXEL_DIFFERENCE, err_msg=error_msg
-        )
-        del camera
-
-    # TODO(rcadene): Add a test for a camera that doesnt support fps=60 and raises an OSError
-    # TODO(rcadene): Add a test for a camera that supports fps=60
-
-    # Test width and height can be set
-    camera = make_camera(**camera_kwargs, fps=30, width=1280, height=720)
-    camera.connect()
-    assert camera.fps == 30
-    assert camera.width == 1280
-    assert camera.height == 720
-    color_image = camera.read()
-    h, w, c = color_image.shape
-    assert h == 720
-    assert w == 1280
-    assert c == 3
-    del camera
-
-    # Test not supported width and height raise an error
-    camera = make_camera(**camera_kwargs, fps=30, width=0, height=0)
-    with pytest.raises(OSError):
-        camera.connect()
-    del camera
-
-
-@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
-@require_camera
-def test_save_images_from_cameras(tmp_path, request, camera_type, mock):
-    # TODO(rcadene): refactor
-    if camera_type == "opencv":
-        from lerobot.common.cameras.opencv.camera_opencv import save_images_from_cameras
-    elif camera_type == "intelrealsense":
-        from lerobot.common.cameras.intel.camera_realsense import save_images_from_cameras
-
-    # Small `record_time_s` to speedup unit tests
-    save_images_from_cameras(tmp_path, record_time_s=0.02, mock=mock)
-
-
-@pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES)
-@require_camera
-def test_camera_rotation(request, camera_type, mock):
-    config_kwargs = {"camera_type": camera_type, "mock": mock, "width": 640, "height": 480, "fps": 30}
-
-    # No rotation.
-    camera = make_camera(**config_kwargs, rotation=None)
-    camera.connect()
-    assert camera.capture_width == 640
-    assert camera.capture_height == 480
-    assert camera.width == 640
-    assert camera.height == 480
-    no_rot_img = camera.read()
-    h, w, c = no_rot_img.shape
-    assert h == 480 and w == 640 and c == 3
-    camera.disconnect()
-
-    # Rotation = 90 (clockwise).
-    camera = make_camera(**config_kwargs, rotation=90)
-    camera.connect()
-    # With a 90° rotation, we expect the metadata dimensions to be swapped.
-    assert camera.capture_width == 640
-    assert camera.capture_height == 480
-    assert camera.width == 480
-    assert camera.height == 640
-    import cv2
-
-    assert camera.rotation == cv2.ROTATE_90_CLOCKWISE
-    rot_img = camera.read()
-    h, w, c = rot_img.shape
-    assert h == 640 and w == 480 and c == 3
-    camera.disconnect()
-
-    # Rotation = 180.
-    camera = make_camera(**config_kwargs, rotation=None)
-    camera.connect()
-    assert camera.capture_width == 640
-    assert camera.capture_height == 480
-    assert camera.width == 640
-    assert camera.height == 480
-    no_rot_img = camera.read()
-    h, w, c = no_rot_img.shape
-    assert h == 480 and w == 640 and c == 3
-    camera.disconnect()
diff --git a/tests/cameras/test_opencv.py b/tests/cameras/test_opencv.py
new file mode 100644
index 0000000000..357ce1e140
--- /dev/null
+++ b/tests/cameras/test_opencv.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Example of running a specific test:
+# ```bash
+# pytest tests/cameras/test_opencv.py::test_connect
+# ```
+
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from lerobot.common.cameras.configs import Cv2Rotation
+from lerobot.common.cameras.opencv import OpenCVCamera, OpenCVCameraConfig
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+
+# NOTE(Steven): more tests + assertions?
+TEST_ARTIFACTS_DIR = Path(__file__).parent.parent / "artifacts" / "cameras"
+DEFAULT_PNG_FILE_PATH = TEST_ARTIFACTS_DIR / "image_160x120.png"
+TEST_IMAGE_SIZES = ["128x128", "160x120", "320x180", "480x270"]
+TEST_IMAGE_PATHS = [TEST_ARTIFACTS_DIR / f"image_{size}.png" for size in TEST_IMAGE_SIZES]
+
+
+def test_abc_implementation():
+    """Instantiation should raise an error if the class doesn't implement abstract methods/properties."""
+    config = OpenCVCameraConfig(index_or_path=0)
+
+    _ = OpenCVCamera(config)
+
+
+def test_connect():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+
+    camera.connect(warmup=False)
+
+    assert camera.is_connected
+
+
+def test_connect_already_connected():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    with pytest.raises(DeviceAlreadyConnectedError):
+        camera.connect(warmup=False)
+
+
+def test_connect_invalid_camera_path():
+    config = OpenCVCameraConfig(index_or_path="nonexistent/camera.png")
+    camera = OpenCVCamera(config)
+
+    with pytest.raises(ConnectionError):
+        camera.connect(warmup=False)
+
+
+def test_invalid_width_connect():
+    config = OpenCVCameraConfig(
+        index_or_path=DEFAULT_PNG_FILE_PATH,
+        width=99999,  # Invalid width to trigger error
+        height=480,
+    )
+    camera = OpenCVCamera(config)
+
+    with pytest.raises(RuntimeError):
+        camera.connect(warmup=False)
+
+
+@pytest.mark.parametrize("index_or_path", TEST_IMAGE_PATHS, ids=TEST_IMAGE_SIZES)
+def test_read(index_or_path):
+    config = OpenCVCameraConfig(index_or_path=index_or_path)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    img = camera.read()
+
+    assert isinstance(img, np.ndarray)
+
+
+def test_read_before_connect():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        _ = camera.read()
+
+
+def test_disconnect():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    camera.disconnect()
+
+    assert not camera.is_connected
+
+
+def test_disconnect_before_connect():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        _ = camera.disconnect()
+
+
+@pytest.mark.parametrize("index_or_path", TEST_IMAGE_PATHS, ids=TEST_IMAGE_SIZES)
+def test_async_read(index_or_path):
+    config = OpenCVCameraConfig(index_or_path=index_or_path)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    try:
+        img = camera.async_read()
+
+        assert camera.thread is not None
+        assert camera.thread.is_alive()
+        assert isinstance(img, np.ndarray)
+    finally:
+        if camera.is_connected:
+            camera.disconnect()  # To stop/join the thread. Otherwise get warnings when the test ends
+
+
+def test_async_read_timeout():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    try:
+        with pytest.raises(TimeoutError):
+            camera.async_read(
+                timeout_ms=0
+            )  # NOTE(Steven): This is flaky as sdometimes we actually get a frame
+    finally:
+        if camera.is_connected:
+            camera.disconnect()
+
+
+def test_async_read_before_connect():
+    config = OpenCVCameraConfig(index_or_path=DEFAULT_PNG_FILE_PATH)
+    camera = OpenCVCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        _ = camera.async_read()
+
+
+@pytest.mark.parametrize("index_or_path", TEST_IMAGE_PATHS, ids=TEST_IMAGE_SIZES)
+@pytest.mark.parametrize(
+    "rotation",
+    [
+        Cv2Rotation.NO_ROTATION,
+        Cv2Rotation.ROTATE_90,
+        Cv2Rotation.ROTATE_180,
+        Cv2Rotation.ROTATE_270,
+    ],
+    ids=["no_rot", "rot90", "rot180", "rot270"],
+)
+def test_rotation(rotation, index_or_path):
+    filename = Path(index_or_path).name
+    dimensions = filename.split("_")[-1].split(".")[0]  # Assumes filenames format (_wxh.png)
+    original_width, original_height = map(int, dimensions.split("x"))
+
+    config = OpenCVCameraConfig(index_or_path=index_or_path, rotation=rotation)
+    camera = OpenCVCamera(config)
+    camera.connect(warmup=False)
+
+    img = camera.read()
+    assert isinstance(img, np.ndarray)
+
+    if rotation in (Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_270):
+        assert camera.width == original_height
+        assert camera.height == original_width
+        assert img.shape[:2] == (original_width, original_height)
+    else:
+        assert camera.width == original_width
+        assert camera.height == original_height
+        assert img.shape[:2] == (original_height, original_width)
diff --git a/tests/cameras/test_realsense.py b/tests/cameras/test_realsense.py
new file mode 100644
index 0000000000..4b552c1e33
--- /dev/null
+++ b/tests/cameras/test_realsense.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Example of running a specific test:
+# ```bash
+# pytest tests/cameras/test_opencv.py::test_connect
+# ```
+
+from pathlib import Path
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+
+from lerobot.common.cameras.configs import Cv2Rotation
+from lerobot.common.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
+
+pytest.importorskip("pyrealsense2")
+
+from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
+
+TEST_ARTIFACTS_DIR = Path(__file__).parent.parent / "artifacts" / "cameras"
+BAG_FILE_PATH = TEST_ARTIFACTS_DIR / "test_rs.bag"
+
+# NOTE(Steven): For some reason these tests take ~20sec in macOS but only ~2sec in Linux.
+
+
+def mock_rs_config_enable_device_from_file(rs_config_instance, _sn):
+    return rs_config_instance.enable_device_from_file(str(BAG_FILE_PATH), repeat_playback=True)
+
+
+def mock_rs_config_enable_device_bad_file(rs_config_instance, _sn):
+    return rs_config_instance.enable_device_from_file("non_existent_file.bag", repeat_playback=True)
+
+
+@pytest.fixture(name="patch_realsense", autouse=True)
+def fixture_patch_realsense():
+    """Automatically mock pyrealsense2.config.enable_device for all tests."""
+    with patch(
+        "pyrealsense2.config.enable_device", side_effect=mock_rs_config_enable_device_from_file
+    ) as mock:
+        yield mock
+
+
+def test_abc_implementation():
+    """Instantiation should raise an error if the class doesn't implement abstract methods/properties."""
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    _ = RealSenseCamera(config)
+
+
+def test_connect():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+
+    camera.connect(warmup=False)
+    assert camera.is_connected
+
+
+def test_connect_already_connected():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    with pytest.raises(DeviceAlreadyConnectedError):
+        camera.connect(warmup=False)
+
+
+def test_connect_invalid_camera_path(patch_realsense):
+    patch_realsense.side_effect = mock_rs_config_enable_device_bad_file
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+
+    with pytest.raises(ConnectionError):
+        camera.connect(warmup=False)
+
+
+def test_invalid_width_connect():
+    config = RealSenseCameraConfig(serial_number_or_name=42, width=99999, height=480, fps=30)
+    camera = RealSenseCamera(config)
+
+    with pytest.raises(ConnectionError):
+        camera.connect(warmup=False)
+
+
+def test_read():
+    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    img = camera.read()
+    assert isinstance(img, np.ndarray)
+
+
+def test_read_depth():
+    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30, use_depth=True)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    img = camera.read_depth(timeout_ms=1000)  # NOTE(Steven): Reading depth takes longer
+    assert isinstance(img, np.ndarray)
+
+
+def test_read_before_connect():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        _ = camera.read()
+
+
+def test_disconnect():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    camera.disconnect()
+
+    assert not camera.is_connected
+
+
+def test_disconnect_before_connect():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        camera.disconnect()
+
+
+def test_async_read():
+    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    try:
+        img = camera.async_read()
+
+        assert camera.thread is not None
+        assert camera.thread.is_alive()
+        assert isinstance(img, np.ndarray)
+    finally:
+        if camera.is_connected:
+            camera.disconnect()  # To stop/join the thread. Otherwise get warnings when the test ends
+
+
+def test_async_read_timeout():
+    config = RealSenseCameraConfig(serial_number_or_name=42, width=640, height=480, fps=30)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    try:
+        with pytest.raises(TimeoutError):
+            camera.async_read(
+                timeout_ms=0
+            )  # NOTE(Steven): This is flaky as sdometimes we actually get a frame
+    finally:
+        if camera.is_connected:
+            camera.disconnect()
+
+
+def test_async_read_before_connect():
+    config = RealSenseCameraConfig(serial_number_or_name=42)
+    camera = RealSenseCamera(config)
+
+    with pytest.raises(DeviceNotConnectedError):
+        _ = camera.async_read()
+
+
+@pytest.mark.parametrize(
+    "rotation",
+    [
+        Cv2Rotation.NO_ROTATION,
+        Cv2Rotation.ROTATE_90,
+        Cv2Rotation.ROTATE_180,
+        Cv2Rotation.ROTATE_270,
+    ],
+    ids=["no_rot", "rot90", "rot180", "rot270"],
+)
+def test_rotation(rotation):
+    config = RealSenseCameraConfig(serial_number_or_name=42, rotation=rotation)
+    camera = RealSenseCamera(config)
+    camera.connect(warmup=False)
+
+    img = camera.read()
+    assert isinstance(img, np.ndarray)
+
+    if rotation in (Cv2Rotation.ROTATE_90, Cv2Rotation.ROTATE_270):
+        assert camera.width == 480
+        assert camera.height == 640
+        assert img.shape[:2] == (640, 480)
+    else:
+        assert camera.width == 640
+        assert camera.height == 480
+        assert img.shape[:2] == (480, 640)
diff --git a/tests/utils.py b/tests/utils.py
index 918dc03be6..bf3de0ed7a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -223,7 +223,7 @@ def make_camera(camera_type: str, **kwargs) -> Camera:
     elif camera_type == "intelrealsense":
         serial_number = kwargs.pop("serial_number", INTELREALSENSE_SERIAL_NUMBER)
         kwargs["serial_number"] = serial_number
-        from lerobot.common.cameras.intel import RealSenseCamera, RealSenseCameraConfig
+        from lerobot.common.cameras.realsense import RealSenseCamera, RealSenseCameraConfig
 
         config = RealSenseCameraConfig(**kwargs)
         return RealSenseCamera(config)