diff --git a/docs/snippets/all/archetypes/image_advanced.py b/docs/snippets/all/archetypes/image_advanced.py index 8dc0d2d0ff94..d9959452c041 100644 --- a/docs/snippets/all/archetypes/image_advanced.py +++ b/docs/snippets/all/archetypes/image_advanced.py @@ -33,6 +33,5 @@ # Read with OpenCV image = cv2.imread(file_path) -# OpenCV uses BGR ordering, so we need to convert to RGB. -image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -rr.log("from_opencv", rr.Image(image)) +# OpenCV uses BGR ordering, we need to make this known to Rerun. +rr.log("from_opencv", rr.Image(image, color_model="BGR")) diff --git a/examples/python/arkit_scenes/arkit_scenes/__main__.py b/examples/python/arkit_scenes/arkit_scenes/__main__.py index e03b2ccf0d57..90f47237737a 100755 --- a/examples/python/arkit_scenes/arkit_scenes/__main__.py +++ b/examples/python/arkit_scenes/arkit_scenes/__main__.py @@ -225,7 +225,6 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None: rr.set_time_seconds("time", float(frame_timestamp)) # load the lowres image and depth bgr = cv2.imread(f"{lowres_image_dir}/{video_id}_{frame_timestamp}.png") - rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) depth = cv2.imread(f"{lowres_depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH) high_res_exists: bool = (image_dir / f"{video_id}_{frame_timestamp}.png").exists() and include_highres @@ -240,7 +239,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None: LOWRES_POSED_ENTITY_PATH, ) - rr.log(f"{LOWRES_POSED_ENTITY_PATH}/rgb", rr.Image(rgb).compress(jpeg_quality=95)) + rr.log(f"{LOWRES_POSED_ENTITY_PATH}/bgr", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=95)) rr.log(f"{LOWRES_POSED_ENTITY_PATH}/depth", rr.DepthImage(depth, meter=1000)) # log the high res camera @@ -260,9 +259,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None: highres_bgr = cv2.imread(f"{image_dir}/{video_id}_{frame_timestamp}.png") highres_depth = cv2.imread(f"{depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH) - highres_rgb = cv2.cvtColor(highres_bgr, cv2.COLOR_BGR2RGB) - - rr.log(f"{HIGHRES_ENTITY_PATH}/rgb", rr.Image(highres_rgb).compress(jpeg_quality=75)) + rr.log(f"{HIGHRES_ENTITY_PATH}/bgr", rr.Image(highres_bgr, color_model="BGR").compress(jpeg_quality=75)) rr.log(f"{HIGHRES_ENTITY_PATH}/depth", rr.DepthImage(highres_depth, meter=1000)) @@ -293,9 +290,9 @@ def main() -> None: # For this to work, the origin of the 2D views has to be a pinhole camera, # this way the viewer knows how to project the 3D annotations into the 2D views. rrb.Spatial2DView( - name="RGB", + name="BGR", origin=primary_camera_entity, - contents=["$origin/rgb", "/world/annotations/**"], + contents=["$origin/bgr", "/world/annotations/**"], ), rrb.Spatial2DView( name="Depth", diff --git a/examples/python/face_tracking/face_tracking.py b/examples/python/face_tracking/face_tracking.py index 5d5a09c13a0c..d7ec8bb21402 100755 --- a/examples/python/face_tracking/face_tracking.py +++ b/examples/python/face_tracking/face_tracking.py @@ -357,15 +357,12 @@ def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count: # On some platforms it always returns zero, so we compute from the frame counter and fps frame_time_nano = int(frame_idx * 1000 / fps * 1e6) - # convert to rgb - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - # log data rr.set_time_sequence("frame_nr", frame_idx) rr.set_time_nanos("frame_time", frame_time_nano) detector.detect_and_log(frame, frame_time_nano) landmarker.detect_and_log(frame, frame_time_nano) - rr.log("video/image", rr.Image(frame)) + rr.log("video/image", rr.Image(frame, color_model="BGR")) except KeyboardInterrupt: pass @@ -379,12 +376,11 @@ def run_from_sample_image(path: Path, max_dim: int | None, num_faces: int) -> No """Run the face detector on a single image.""" image = cv2.imread(str(path)) image = resize_image(image, max_dim) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) logger = FaceDetectorLogger(video_mode=False) landmarker = FaceLandmarkerLogger(video_mode=False, num_faces=num_faces) logger.detect_and_log(image, 0) landmarker.detect_and_log(image, 0) - rr.log("video/image", rr.Image(image)) + rr.log("video/image", rr.Image(image, color_model="BGR")) def main() -> None: diff --git a/examples/python/gesture_detection/gesture_detection.py b/examples/python/gesture_detection/gesture_detection.py index c0c21e6f2cac..c76fa9964002 100755 --- a/examples/python/gesture_detection/gesture_detection.py +++ b/examples/python/gesture_detection/gesture_detection.py @@ -192,8 +192,7 @@ def run_from_sample_image(path: Path | str) -> None: """Run the gesture recognition on a single image.""" image = cv2.imread(str(path)) # image = resize_image(image, max_dim) - show_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - rr.log("media/image", rr.Image(show_image)) + rr.log("media/image", rr.Image(image, color_model="BGR")) logger = GestureDetectorLogger(video_mode=False) logger.detect_and_log(show_image, 0) @@ -236,14 +235,11 @@ def run_from_video_capture(vid: int | str, max_frame_count: int | None) -> None: # On some platforms it always returns zero, so we compute from the frame counter and fps frame_time_nano = int(frame_idx * 1000 / fps * 1e6) - # convert to rgb - frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - # log data rr.set_time_sequence("frame_nr", frame_idx) rr.set_time_nanos("frame_time", frame_time_nano) detector.detect_and_log(frame, frame_time_nano) - rr.log("media/video", rr.Image(frame).compress(jpeg_quality=75)) + rr.log("media/video", rr.Image(frame, color_model="BGR").compress(jpeg_quality=75)) except KeyboardInterrupt: pass diff --git a/examples/python/human_pose_tracking/human_pose_tracking.py b/examples/python/human_pose_tracking/human_pose_tracking.py index e58809c4eb46..817e6e07ef50 100755 --- a/examples/python/human_pose_tracking/human_pose_tracking.py +++ b/examples/python/human_pose_tracking/human_pose_tracking.py @@ -77,15 +77,14 @@ def track_pose(video_path: str, model_path: str, *, segment: bool, max_frame_cou break mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=bgr_frame.data) - rgb = cv2.cvtColor(bgr_frame.data, cv2.COLOR_BGR2RGB) rr.set_time_seconds("time", bgr_frame.time) rr.set_time_sequence("frame_idx", bgr_frame.idx) results = pose_landmarker.detect_for_video(mp_image, int(bgr_frame.time * 1000)) - h, w, _ = rgb.shape + h, w, _ = bgr_frame.data.shape landmark_positions_2d = read_landmark_positions_2d(results, w, h) - rr.log("video/rgb", rr.Image(rgb).compress(jpeg_quality=75)) + rr.log("video/bgr", rr.Image(bgr_frame.data, color_model="BGR").compress(jpeg_quality=75)) if landmark_positions_2d is not None: rr.log( "video/pose/points", @@ -237,7 +236,7 @@ def main() -> None: rrb.Spatial3DView(origin="person", name="3D pose"), ), rrb.Vertical( - rrb.Spatial2DView(origin="video/rgb", name="Raw video"), + rrb.Spatial2DView(origin="video/bgr", name="Raw video"), rrb.TextDocumentView(origin="description", name="Description"), row_shares=[2, 3], ), diff --git a/examples/python/live_camera_edge_detection/live_camera_edge_detection.py b/examples/python/live_camera_edge_detection/live_camera_edge_detection.py index 4d3206d6c21b..1a521e1df1fd 100755 --- a/examples/python/live_camera_edge_detection/live_camera_edge_detection.py +++ b/examples/python/live_camera_edge_detection/live_camera_edge_detection.py @@ -42,8 +42,7 @@ def run_canny(num_frames: int | None) -> None: frame_nr += 1 # Log the original image - rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - rr.log("image/rgb", rr.Image(rgb)) + rr.log("image/rgb", rr.Image(img, color_model="BGR")) # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) diff --git a/examples/python/ocr/ocr.py b/examples/python/ocr/ocr.py index 846e6e864dd2..030f9bbc8198 100755 --- a/examples/python/ocr/ocr.py +++ b/examples/python/ocr/ocr.py @@ -365,7 +365,7 @@ def detect_and_log_layouts(file_path: str) -> None: else: # read image img = cv2.imread(file_path) - image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + image_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Rerun can handle BGR as well, but `ocr_model_pp` expects RGB images.append(image_rgb.astype(np.uint8)) # Extracte the layout from each image diff --git a/examples/python/rgbd/rgbd.py b/examples/python/rgbd/rgbd.py index 51c855fda94d..baec0faa89eb 100755 --- a/examples/python/rgbd/rgbd.py +++ b/examples/python/rgbd/rgbd.py @@ -44,13 +44,11 @@ def parse_timestamp(filename: str) -> datetime: return datetime.fromtimestamp(float(time)) -def read_image_rgb(buf: bytes) -> npt.NDArray[np.uint8]: +def read_image_bgr(buf: bytes) -> npt.NDArray[np.uint8]: """Decode an image provided in `buf`, and interpret it as RGB data.""" np_buf: npt.NDArray[np.uint8] = np.ndarray(shape=(1, len(buf)), dtype=np.uint8, buffer=buf) - # OpenCV reads images in BGR rather than RGB format img_bgr = cv2.imdecode(np_buf, cv2.IMREAD_COLOR) - img_rgb: npt.NDArray[Any] = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) - return img_rgb + return img_bgr def read_depth_image(buf: bytes) -> npt.NDArray[Any]: @@ -85,8 +83,8 @@ def log_nyud_data(recording_path: Path, subset_idx: int, frames: int) -> None: if f.filename.endswith(".ppm"): buf = archive.read(f) - img_rgb = read_image_rgb(buf) - rr.log("world/camera/image/rgb", rr.Image(img_rgb).compress(jpeg_quality=95)) + img_bgr = read_image_bgr(buf) + rr.log("world/camera/image/rgb", rr.Image(img_bgr, color_model="BGR").compress(jpeg_quality=95)) elif f.filename.endswith(".pgm"): buf = archive.read(f) diff --git a/examples/python/segment_anything_model/segment_anything_model.py b/examples/python/segment_anything_model/segment_anything_model.py index ea3cc91ae384..57540b15e405 100755 --- a/examples/python/segment_anything_model/segment_anything_model.py +++ b/examples/python/segment_anything_model/segment_anything_model.py @@ -138,6 +138,7 @@ def load_image(image_uri: str) -> cv2.typing.MatLike: else: image = cv2.imread(image_uri, cv2.IMREAD_COLOR) + # Rerun can handle BGR as well, but SAM requires RGB. image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) return image diff --git a/examples/python/structure_from_motion/structure_from_motion/__main__.py b/examples/python/structure_from_motion/structure_from_motion/__main__.py index cba96fa1943e..d68f57675704 100755 --- a/examples/python/structure_from_motion/structure_from_motion/__main__.py +++ b/examples/python/structure_from_motion/structure_from_motion/__main__.py @@ -162,8 +162,7 @@ def read_and_log_sparse_reconstruction(dataset_path: Path, filter_output: bool, if resize: bgr = cv2.imread(str(image_file)) bgr = cv2.resize(bgr, resize) - rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) - rr.log("camera/image", rr.Image(rgb).compress(jpeg_quality=75)) + rr.log("camera/image", rr.Image(bgr, color_model="BGR").compress(jpeg_quality=75)) else: rr.log("camera/image", rr.EncodedImage(path=dataset_path / "images" / image.name))