Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix image view not handling images with extra leading dimensions of size 1 #5579

Merged
merged 3 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions crates/re_types/src/datatypes/tensor_data_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,17 @@ impl TensorData {
self.shape.as_slice()
}

/// Returns the shape of the tensor with all trailing dimensions of size 1 ignored.
/// Returns the shape of the tensor with all leading & trailing dimensions of size 1 ignored.
///
/// If all dimension sizes are one, this returns only the first dimension.
#[inline]
pub fn shape_short(&self) -> &[TensorDimension] {
if self.shape.is_empty() {
&self.shape
} else {
self.shape
.iter()
.enumerate()
.rev()
.find(|(_, dim)| dim.size != 1)
.map_or(&self.shape[0..1], |(i, _)| &self.shape[..(i + 1)])
let first_not_one = self.shape.iter().position(|dim| dim.size != 1);
let last_not_one = self.shape.iter().rev().position(|dim| dim.size != 1);
&self.shape[first_not_one.unwrap_or(0)..self.shape.len() - last_not_one.unwrap_or(0)]
}
}

Expand Down
13 changes: 6 additions & 7 deletions examples/python/depth_guided_stable_diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ thumbnail_dimensions = [480, 253]
channel = "nightly"
-->


<picture data-inline-viewer="depth_guided_stable_diffusion">
<source media="(max-width: 480px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/480w.png">
<source media="(max-width: 768px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/768w.png">
<source media="(max-width: 1024px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/1024w.png">
<source media="(max-width: 1200px)" srcset="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/1200w.png">
<img src="https://static.rerun.io/depth_guided_stable_diffusion/a85516aba09f72649517891d767e15383ce7f4ea/full.png" alt="Depth-guided stable diffusion screenshot">
<picture>
<img src="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/full.png" alt="Depth-guided stable diffusion screenshot">
<source media="(max-width: 480px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/480w.png">
<source media="(max-width: 768px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/768w.png">
<source media="(max-width: 1024px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/1024w.png">
<source media="(max-width: 1200px)" srcset="https://static.rerun.io/depth-guided-stable-diffusion/bea9bfaf33ebed4296f576d931c8c8e6fdd08a21/1200w.png">
</picture>

A more elaborate example running Depth Guided Stable Diffusion 2.0.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_fr
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
rr.log("prompt/text_input/ids", rr.Tensor(text_input_ids))
rr.log("prompt/text_input/ids", rr.BarChart(text_input_ids))
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids

if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
Expand All @@ -229,7 +229,7 @@ def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_fr
)

if hasattr(self.text_encoder.config, "use_attention_mask") and self.text_encoder.config.use_attention_mask:
rr.log("prompt/text_input/attention_mask", rr.Tensor(text_inputs.attention_mask))
rr.log("prompt/text_input/attention_mask", rr.BarChart(text_inputs.attention_mask))
attention_mask = text_inputs.attention_mask.to(device)
else:
attention_mask = None
Expand Down
53 changes: 52 additions & 1 deletion examples/python/depth_guided_stable_diffusion/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import requests
import rerun as rr # pip install rerun-sdk
import rerun.blueprint as rrb
import torch
from huggingface_pipeline import StableDiffusionDepth2ImgPipeline
from PIL import Image
Expand Down Expand Up @@ -112,7 +113,57 @@ def main() -> None:
rr.script_add_args(parser)
args = parser.parse_args()

rr.script_setup(args, "rerun_example_depth_guided_stable_diffusion")
rr.script_setup(
args,
"rerun_example_depth_guided_stable_diffusion",
# This example is very complex, making it too hard for the Viewer to infer a good layout.
# Therefore, we specify everything explicitly:
# We set up three columns using a `Horizontal` layout, one each for
# * inputs
# * depth & initializations
# * diffusion outputs
blueprint=rrb.Blueprint(
rrb.Horizontal(
rrb.Vertical(
rrb.Tabs(
rrb.Spatial2DView(name="Image original", origin="image/original"),
rrb.TensorView(name="Image preprocessed", origin="input_image/preprocessed"),
),
rrb.Vertical(
rrb.TextLogView(name="Prompt", contents=["prompt/text", "prompt/text_negative"]),
rrb.Tabs(
rrb.TensorView(name="Text embeddings", origin="prompt/text_embeddings"),
rrb.TensorView(name="Unconditional embeddings", origin="prompt/uncond_embeddings"),
),
rrb.BarChartView(name="Prompt ids", origin="prompt/text_input"),
),
),
rrb.Vertical(
rrb.Tabs(
rrb.Spatial2DView(name="Depth estimated", origin="depth/estimated"),
rrb.Spatial2DView(name="Depth interpolated", origin="depth/interpolated"),
rrb.Spatial2DView(name="Depth normalized", origin="depth/normalized"),
rrb.TensorView(name="Depth input pre-processed", origin="depth/input_preprocessed"),
active_tab="Depth interpolated",
),
rrb.Tabs(
rrb.TensorView(name="Encoded input", origin="encoded_input_image"),
rrb.TensorView(name="Decoded init latents", origin="decoded_init_latents"),
),
),
rrb.Vertical(
rrb.Spatial2DView(name="Image diffused", origin="image/diffused"),
rrb.Horizontal(
rrb.TensorView(name="Latent Model Input", origin="diffusion/latent_model_input"),
rrb.TensorView(name="Diffusion latents", origin="diffusion/latents"),
# rrb.TensorView(name="Noise Prediction", origin="diffusion/noise_pred"),
),
),
),
rrb.SelectionPanel(expanded=False),
rrb.TimePanel(expanded=False),
),
)

image_path = args.image_path # type: str
if not image_path:
Expand Down
2 changes: 1 addition & 1 deletion rerun_py/rerun_sdk/rerun/archetypes/bar_chart_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def values__field_converter_override(data: TensorDataArrayLike) -> TensorDataBat
# once we coerce to a canonical non-arrow type.
shape_dims = tensor_data.as_arrow_array()[0].value["shape"].values.field(0).to_numpy()

if len(shape_dims) != 1:
if len([d for d in shape_dims if d != 1]) != 1:
_send_warning_or_raise(
f"Bar chart data should only be 1D. Got values with shape: {shape_dims}",
2,
Expand Down
Loading