Skip to content

Commit

Permalink
Fix image view not handling images with extra leading dimensions of s…
Browse files Browse the repository at this point in the history
…ize 1 (#5579)

### What

Encountered this while inspecting the `depth_guided_stable_diffusion`
example:

Before: Nothing shown for a depth image with dimensions `1x1x64x96`
<img width="1694" alt="image"
src="https://github.com/rerun-io/rerun/assets/1220815/375afc21-7663-43a0-a12d-188ef8ed4e46">

After:
<img width="1622" alt="image"
src="https://github.com/rerun-io/rerun/assets/1220815/18e295d5-49ed-4dfc-b8e2-bae9fba747e3">


Note that the changed `shape_short` is only used for
`image_height_width_channels`.



### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using newly built examples:
[app.rerun.io](https://app.rerun.io/pr/5579/index.html)
* Using examples from latest `main` build:
[app.rerun.io](https://app.rerun.io/pr/5579/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[app.rerun.io](https://app.rerun.io/pr/5579/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!

- [PR Build Summary](https://build.rerun.io/pr/5579)
- [Docs
preview](https://rerun.io/preview/ce7eab6287449b0573583223c0cf643c6caf065b/docs)
<!--DOCS-PREVIEW-->
- [Examples
preview](https://rerun.io/preview/ce7eab6287449b0573583223c0cf643c6caf065b/examples)
<!--EXAMPLES-PREVIEW-->
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

---------

Co-authored-by: Clement Rey <[email protected]>
  • Loading branch information
Wumpf and teh-cmc authored Mar 20, 2024
1 parent aba8fca commit c6bbfaf
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 18 deletions.
11 changes: 4 additions & 7 deletions crates/re_types/src/datatypes/tensor_data_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,17 @@ impl TensorData {
self.shape.as_slice()
}

/// Returns the shape of the tensor with all trailing dimensions of size 1 ignored.
/// Returns the shape of the tensor with all leading & trailing dimensions of size 1 ignored.
///
/// If all dimension sizes are one, this returns only the first dimension.
#[inline]
pub fn shape_short(&self) -> &[TensorDimension] {
if self.shape.is_empty() {
&self.shape
} else {
self.shape
.iter()
.enumerate()
.rev()
.find(|(_, dim)| dim.size != 1)
.map_or(&self.shape[0..1], |(i, _)| &self.shape[..(i + 1)])
let first_not_one = self.shape.iter().position(|dim| dim.size != 1);
let last_not_one = self.shape.iter().rev().position(|dim| dim.size != 1);
&self.shape[first_not_one.unwrap_or(0)..self.shape.len() - last_not_one.unwrap_or(0)]
}
}

Expand Down
13 changes: 6 additions & 7 deletions examples/python/depth_guided_stable_diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ thumbnail_dimensions = [480, 253]
channel = "nightly"
-->


<picture data-inline-viewer="depth_guided_stable_diffusion">
<source media="(max-width: 480px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/480w.png">
<source media="(max-width: 768px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/768w.png">
<source media="(max-width: 1024px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/1024w.png">
<source media="(max-width: 1200px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/1200w.png">
<img src="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/full.png" alt="Depth-guided stable diffusion screenshot">
<picture>
<img src="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/full.png" alt="Depth-guided stable diffusion screenshot">
<source media="(max-width: 480px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/480w.png">
<source media="(max-width: 768px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/768w.png">
<source media="(max-width: 1024px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/1024w.png">
<source media="(max-width: 1200px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/1200w.png">
</picture>

A more elaborate example running Depth Guided Stable Diffusion 2.0.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_fr
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
rr.log("prompt/text_input/ids", rr.Tensor(text_input_ids))
rr.log("prompt/text_input/ids", rr.BarChart(text_input_ids))
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids

if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
Expand All @@ -229,7 +229,7 @@ def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_fr
)

if hasattr(self.text_encoder.config, "use_attention_mask") and self.text_encoder.config.use_attention_mask:
rr.log("prompt/text_input/attention_mask", rr.Tensor(text_inputs.attention_mask))
rr.log("prompt/text_input/attention_mask", rr.BarChart(text_inputs.attention_mask))
attention_mask = text_inputs.attention_mask.to(device)
else:
attention_mask = None
Expand Down
53 changes: 52 additions & 1 deletion examples/python/depth_guided_stable_diffusion/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import requests
import rerun as rr # pip install rerun-sdk
import rerun.blueprint as rrb
import torch
from huggingface_pipeline import StableDiffusionDepth2ImgPipeline
from PIL import Image
Expand Down Expand Up @@ -112,7 +113,57 @@ def main() -> None:
rr.script_add_args(parser)
args = parser.parse_args()

rr.script_setup(args, "rerun_example_depth_guided_stable_diffusion")
rr.script_setup(
args,
"rerun_example_depth_guided_stable_diffusion",
# This example is very complex, making it too hard for the Viewer to infer a good layout.
# Therefore, we specify everything explicitly:
# We set up three columns using a `Horizontal` layout, one each for
# * inputs
# * depth & initializations
# * diffusion outputs
blueprint=rrb.Blueprint(
rrb.Horizontal(
rrb.Vertical(
rrb.Tabs(
rrb.Spatial2DView(name="Image original", origin="image/original"),
rrb.TensorView(name="Image preprocessed", origin="input_image/preprocessed"),
),
rrb.Vertical(
rrb.TextLogView(name="Prompt", contents=["prompt/text", "prompt/text_negative"]),
rrb.Tabs(
rrb.TensorView(name="Text embeddings", origin="prompt/text_embeddings"),
rrb.TensorView(name="Unconditional embeddings", origin="prompt/uncond_embeddings"),
),
rrb.BarChartView(name="Prompt ids", origin="prompt/text_input"),
),
),
rrb.Vertical(
rrb.Tabs(
rrb.Spatial2DView(name="Depth estimated", origin="depth/estimated"),
rrb.Spatial2DView(name="Depth interpolated", origin="depth/interpolated"),
rrb.Spatial2DView(name="Depth normalized", origin="depth/normalized"),
rrb.TensorView(name="Depth input pre-processed", origin="depth/input_preprocessed"),
active_tab="Depth interpolated",
),
rrb.Tabs(
rrb.TensorView(name="Encoded input", origin="encoded_input_image"),
rrb.TensorView(name="Decoded init latents", origin="decoded_init_latents"),
),
),
rrb.Vertical(
rrb.Spatial2DView(name="Image diffused", origin="image/diffused"),
rrb.Horizontal(
rrb.TensorView(name="Latent Model Input", origin="diffusion/latent_model_input"),
rrb.TensorView(name="Diffusion latents", origin="diffusion/latents"),
# rrb.TensorView(name="Noise Prediction", origin="diffusion/noise_pred"),
),
),
),
rrb.SelectionPanel(expanded=False),
rrb.TimePanel(expanded=False),
),
)

image_path = args.image_path # type: str
if not image_path:
Expand Down
2 changes: 1 addition & 1 deletion rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def values__field_converter_override(data: TensorDataArrayLike) -> TensorDataBat
# once we coerce to a canonical non-arrow type.
shape_dims = tensor_data.as_arrow_array()[0].value["shape"].values.field(0).to_numpy()

if len(shape_dims) != 1:
if len([d for d in shape_dims if d != 1]) != 1:
_send_warning_or_raise(
f"Bar chart data should only be 1D. Got values with shape: {shape_dims}",
2,
Expand Down

0 comments on commit c6bbfaf

Please sign in to comment.