Add jpeg_quality parameter to log_image (#2418)

Closes #2175 You can now do `log_image("image", image, jpeg_quality=75)` to compress your logged images so they take up less space on disk and in RAM. I also updated the UI so you can actually tell whether or not a tensor was compressed. This makes a many of our example data lot smaller. For instance, `tracking_hf_opencv` goes from 753 MB to 101MB. * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)  PR Build Summary: https://build.rerun.io/pr/2418  Docs preview: https://rerun.io/preview/194b4be/docs Examples preview: https://rerun.io/preview/194b4be/examples
rerun-io · Jun 15, 2023 · 7e169b7 · 7e169b7
1 parent 183b566
commit 7e169b7
Show file tree

Hide file tree

Showing 11 changed files with 70 additions and 32 deletions.
diff --git a/.github/workflows/reusable_build_and_test_wheels.yml b/.github/workflows/reusable_build_and_test_wheels.yml
@@ -243,7 +243,7 @@ jobs:
         # TODO(jleibs): understand why deps can't be installed in the same step as the wheel
         shell: bash
         run: |
-          pip install deprecated numpy>=1.23 pyarrow==10.0.1 pytest==7.1.2
+          pip install deprecated numpy>=1.23 pillow>=9.5.0 pyarrow==10.0.1 pytest==7.1.2
 
       - name: Install built wheel
         if: needs.set-config.outputs.RUN_TESTS == 'true'

diff --git a/crates/re_data_ui/src/image.rs b/crates/re_data_ui/src/image.rs
@@ -57,7 +57,7 @@ fn tensor_ui(
     verbosity: UiVerbosity,
     entity_path: &re_data_store::EntityPath,
     annotations: &Annotations,
-    _encoded_tensor: &Tensor,
+    original_tensor: &Tensor,
     tensor: &DecodedTensor,
 ) {
     // See if we can convert the tensor to a GPU texture.
@@ -102,18 +102,25 @@ fn tensor_ui(
                 }
 
                 ui.label(format!(
-                    "{} x {}",
+                    "{} x {}{}",
                     tensor.dtype(),
-                    format_tensor_shape_single_line(tensor.shape())
+                    format_tensor_shape_single_line(tensor.shape()),
+                    if original_tensor.data.is_compressed_image() {
+                        " (compressed)"
+                    } else {
+                        ""
+                    }
                 ))
-                .on_hover_ui(|ui| tensor_summary_ui(ctx.re_ui, ui, tensor, &tensor_stats));
+                .on_hover_ui(|ui| {
+                    tensor_summary_ui(ctx.re_ui, ui, original_tensor, tensor, &tensor_stats);
+                });
             });
         }
 
         UiVerbosity::All | UiVerbosity::Reduced => {
             ui.vertical(|ui| {
                 ui.set_min_width(100.0);
-                tensor_summary_ui(ctx.re_ui, ui, tensor, &tensor_stats);
+                tensor_summary_ui(ctx.re_ui, ui, original_tensor, tensor, &tensor_stats);
 
                 if let Some(texture) = &texture_result {
                     let max_size = ui
@@ -147,9 +154,9 @@ fn tensor_ui(
 
                     // TODO(emilk): support copying and saving images on web
                     #[cfg(not(target_arch = "wasm32"))]
-                    if _encoded_tensor.data.is_compressed_image() || tensor.could_be_dynamic_image()
+                    if original_tensor.data.is_compressed_image() || tensor.could_be_dynamic_image()
                     {
-                        copy_and_save_image_ui(ui, tensor, _encoded_tensor);
+                        copy_and_save_image_ui(ui, tensor, original_tensor);
                     }
 
                     if let Some([_h, _w, channels]) = tensor.image_height_width_channels() {
@@ -208,16 +215,17 @@ fn show_image_at_max_size(
 pub fn tensor_summary_ui_grid_contents(
     re_ui: &re_ui::ReUi,
     ui: &mut egui::Ui,
-    tensor: &Tensor,
+    original_tensor: &Tensor,
+    tensor: &DecodedTensor,
     tensor_stats: &TensorStats,
 ) {
     let Tensor {
         tensor_id: _,
         shape,
-        data,
+        data: _,
         meaning,
         meter,
-    } = tensor;
+    } = tensor.inner();
 
     re_ui
         .grid_left_hand_label(ui, "Data type")
@@ -260,7 +268,7 @@ pub fn tensor_summary_ui_grid_contents(
         ui.end_row();
     }
 
-    match data {
+    match &original_tensor.data {
         re_log_types::component_types::TensorData::U8(_)
         | re_log_types::component_types::TensorData::U16(_)
         | re_log_types::component_types::TensorData::U32(_)
@@ -313,13 +321,14 @@ pub fn tensor_summary_ui_grid_contents(
 pub fn tensor_summary_ui(
     re_ui: &re_ui::ReUi,
     ui: &mut egui::Ui,
-    tensor: &Tensor,
+    original_tensor: &Tensor,
+    tensor: &DecodedTensor,
     tensor_stats: &TensorStats,
 ) {
     egui::Grid::new("tensor_summary_ui")
         .num_columns(2)
         .show(ui, |ui| {
-            tensor_summary_ui_grid_contents(re_ui, ui, tensor, tensor_stats);
+            tensor_summary_ui_grid_contents(re_ui, ui, original_tensor, tensor, tensor_stats);
         });
 }
 

diff --git a/crates/re_viewer/src/ui/view_tensor/ui.rs b/crates/re_viewer/src/ui/view_tensor/ui.rs
@@ -74,12 +74,8 @@ impl ViewTensorState {
         ctx.re_ui
             .selection_grid(ui, "tensor_selection_ui")
             .show(ui, |ui| {
-                tensor_summary_ui_grid_contents(
-                    ctx.re_ui,
-                    ui,
-                    tensor,
-                    ctx.cache.entry::<TensorStatsCache>().entry(tensor),
-                );
+                let tensor_stats = ctx.cache.entry::<TensorStatsCache>().entry(tensor);
+                tensor_summary_ui_grid_contents(ctx.re_ui, ui, tensor, tensor, tensor_stats);
                 self.texture_settings.ui(ctx.re_ui, ui);
                 self.color_mapping.ui(ctx.render_ctx, ctx.re_ui, ui);
             });

diff --git a/examples/python/arkitscenes/download_dataset.py b/examples/python/arkitscenes/download_dataset.py
@@ -117,7 +117,7 @@ def download_file(url: str, file_name: str, dst: Path) -> bool:
             return False
         os.rename(filepath + ".tmp", filepath)
     else:
-        print(f"WARNING: skipping download of existing file: {filepath}")
+        pass  # skipping download of existing file
     return True
 
 
@@ -274,7 +274,7 @@ def download_data(
             if not file_name.endswith(".zip") or not os.path.isdir(dst_path[: -len(".zip")]):
                 download_file(url, dst_path, dst_dir)
             else:
-                print(f"WARNING: skipping download of existing zip file: {dst_path}")
+                pass  # skipping download of existing zip file
             if file_name.endswith(".zip") and os.path.isfile(dst_path):
                 unzip_file(file_name, dst_dir, keep_zip)
 

diff --git a/examples/python/arkitscenes/main.py b/examples/python/arkitscenes/main.py
@@ -396,7 +396,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
                 colors_list,
             )
 
-            rr.log_image(f"{lowres_posed_entity_id}/rgb", rgb)
+            rr.log_image(f"{lowres_posed_entity_id}/rgb", rgb, jpeg_quality=95)
             rr.log_depth_image(f"{lowres_posed_entity_id}/depth", depth, meter=1000)
 
         # log the high res camera
@@ -420,7 +420,7 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
             highres_depth = cv2.imread(f"{depth_dir}/{video_id}_{frame_timestamp}.png", cv2.IMREAD_ANYDEPTH)
 
             highres_rgb = cv2.cvtColor(highres_bgr, cv2.COLOR_BGR2RGB)
-            rr.log_image(f"{highres_entity_id}/rgb", highres_rgb)
+            rr.log_image(f"{highres_entity_id}/rgb", highres_rgb, jpeg_quality=75)
             rr.log_depth_image(f"{highres_entity_id}/depth", highres_depth, meter=1000)
 
 

diff --git a/examples/python/colmap/main.py b/examples/python/colmap/main.py
@@ -160,9 +160,7 @@ def read_and_log_sparse_reconstruction(dataset_path: Path, filter_output: bool,
         if resize:
             img = cv2.imread(str(image_file))
             img = cv2.resize(img, resize)
-            jpeg_quality = [int(cv2.IMWRITE_JPEG_QUALITY), 75]
-            _, encimg = cv2.imencode(".jpg", img, jpeg_quality)
-            rr.log_image_file("camera/image", img_bytes=encimg)
+            rr.log_image("camera/image", img, jpeg_quality=75)
         else:
             rr.log_image_file("camera/image", img_path=dataset_path / "images" / image.name)
 

diff --git a/examples/python/mp_pose/main.py b/examples/python/mp_pose/main.py
@@ -45,7 +45,7 @@ def track_pose(video_path: str, segment: bool) -> None:
             rgb = cv2.cvtColor(bgr_frame.data, cv2.COLOR_BGR2RGB)
             rr.set_time_seconds("time", bgr_frame.time)
             rr.set_time_sequence("frame_idx", bgr_frame.idx)
-            rr.log_image("video/rgb", rgb)
+            rr.log_image("video/rgb", rgb, jpeg_quality=75)
 
             results = pose.process(rgb)
             h, w, _ = rgb.shape

diff --git a/examples/python/nyud/main.py b/examples/python/nyud/main.py
@@ -84,7 +84,7 @@ def log_nyud_data(recording_path: Path, subset_idx: int = 0) -> None:
             if f.filename.endswith(".ppm"):
                 buf = archive.read(f)
                 img_rgb = read_image_rgb(buf)
-                rr.log_image("world/camera/image/rgb", img_rgb)
+                rr.log_image("world/camera/image/rgb", img_rgb, jpeg_quality=95)
 
             elif f.filename.endswith(".pgm"):
                 buf = archive.read(f)

diff --git a/examples/python/tracking_hf_opencv/main.py b/examples/python/tracking_hf_opencv/main.py
@@ -85,7 +85,7 @@ def detect_objects_to_track(self, rgb: npt.NDArray[np.uint8], frame_idx: int) ->
         _, _, scaled_height, scaled_width = inputs["pixel_values"].shape
         scaled_size = (scaled_width, scaled_height)
         rgb_scaled = cv2.resize(rgb, scaled_size)
-        rr.log_image("image_scaled/rgb", rgb_scaled)
+        rr.log_image("image_scaled/rgb", rgb_scaled, jpeg_quality=95)
 
         logging.debug("Pass image to detection network")
         outputs = self.model(**inputs)
@@ -333,7 +333,7 @@ def track_objects(video_path: str) -> None:
             break
 
         rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
-        rr.log_image("image/rgb", rgb)
+        rr.log_image("image/rgb", rgb, jpeg_quality=95)
 
         if not trackers or frame_idx % 40 == 0:
             detections = detector.detect_objects_to_track(rgb=rgb, frame_idx=frame_idx)

diff --git a/rerun_py/pyproject.toml b/rerun_py/pyproject.toml
@@ -11,7 +11,13 @@ classifiers = [
   "Topic :: Scientific/Engineering :: Artificial Intelligence",
   "Topic :: Scientific/Engineering :: Visualization",
 ]
-dependencies = ["deprecated", "numpy>=1.23", "pyarrow==10.0.1"]
+dependencies = [
+  # Must match list in `.github/workflows/reusable_build_and_test_wheels.yml`
+  "deprecated",
+  "numpy>=1.23",
+  "pillow>=9.5.0,<10", # Used for JPEG encoding
+  "pyarrow==10.0.1",
+]
 description = "The Rerun Logging SDK"
 keywords = ["computer-vision", "logging", "rerun"]
 name = "rerun-sdk"

diff --git a/rerun_py/rerun_sdk/rerun/log/image.py b/rerun_py/rerun_sdk/rerun/log/image.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
+from io import BytesIO
 from typing import Any
 
 import numpy as np
 import numpy.typing as npt
+from PIL import Image
 
 from rerun import bindings
 from rerun.log.error_utils import _send_warning
+from rerun.log.file import ImageFormat, log_image_file
 from rerun.log.log_decorator import log_decorator
 from rerun.log.tensor import Tensor, _log_tensor, _to_numpy
 from rerun.recording_stream import RecordingStream
@@ -27,6 +30,7 @@ def log_image(
     ext: dict[str, Any] | None = None,
     timeless: bool = False,
     recording: RecordingStream | None = None,
+    jpeg_quality: int | None = None,
 ) -> None:
     """
     Log a gray or color image.
@@ -59,6 +63,14 @@ def log_image(
         Specifies the [`rerun.RecordingStream`][] to use.
         If left unspecified, defaults to the current active data recording, if there is one.
         See also: [`rerun.init`][], [`rerun.set_global_data_recording`][].
+    jpeg_quality:
+        If set, encode the image as a JPEG to save storage space.
+        Higher quality = larger file size.
+        A quality of 95 still saves a lot of space, but is visually very similar.
+        JPEG compression works best for photographs.
+        Only RGB images are supported.
+        Note that compressing to JPEG costs a bit of CPU time, both when logging
+        and later when viewing them.
 
     """
 
@@ -90,6 +102,23 @@ def log_image(
     if interpretable_as_image and num_non_empty_dims != len(shape):
         image = np.squeeze(image)
 
+    if jpeg_quality is not None:
+        # TODO(emilk): encode JPEG in background thread instead
+
+        if image.dtype not in ["uint8", "sint32", "float32"]:
+            # Convert to a format supported by Image.fromarray
+            image = image.astype("float32")
+
+        pil_image = Image.fromarray(image)
+        output = BytesIO()
+        pil_image.save(output, format="JPEG", quality=jpeg_quality)
+        jpeg_bytes = output.getvalue()
+        output.close()
+
+        # TODO(emilk): pass draw_order too
+        log_image_file(entity_path=entity_path, img_bytes=jpeg_bytes, img_format=ImageFormat.JPEG, timeless=timeless)
+        return
+
     _log_tensor(entity_path, image, draw_order=draw_order, ext=ext, timeless=timeless, recording=recording)