Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve entity size stats: include whole subtree #4542

Merged
merged 6 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions crates/re_arrow_store/src/store_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,14 +258,14 @@ impl DataStore {
num_rows: table.buckets_num_rows,
size_bytes: table.buckets_size_bytes,
time_range: table.time_range(),
timelines_rows: 0,
timelines_size_bytes: 0,
timeless_rows: 0,
timeless_size_bytes: 0,
},
);

if let Some(timeless) = self.timeless_tables.get(&entity_path_hash) {
entity_stats.timelines_rows = timeless.inner.read().num_rows();
entity_stats.timelines_size_bytes = timeless.total_size_bytes();
entity_stats.timeless_rows = timeless.inner.read().num_rows();
entity_stats.timeless_size_bytes = timeless.total_size_bytes();
}

entity_stats
Expand All @@ -284,10 +284,10 @@ pub struct EntityStats {
pub time_range: re_log_types::TimeRange,

/// Number of timeless rows
pub timelines_rows: u64,
pub timeless_rows: u64,

/// Number of timeless bytes
pub timelines_size_bytes: u64,
pub timeless_size_bytes: u64,
}

impl Default for EntityStats {
Expand All @@ -296,8 +296,8 @@ impl Default for EntityStats {
num_rows: 0,
size_bytes: 0,
time_range: re_log_types::TimeRange::EMPTY,
timelines_rows: 0,
timelines_size_bytes: 0,
timeless_rows: 0,
timeless_size_bytes: 0,
}
}
}
Expand Down
25 changes: 25 additions & 0 deletions crates/re_data_store/src/entity_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,47 @@ pub struct SubtreeInfo {
///
/// ⚠ Auto-generated instance keys are _not_ accounted for. ⚠
pub time_histogram: TimeHistogramPerTimeline,

/// Number of bytes used by all arrow data in this tree (ignores overhead from book-keeping, schemas, etc).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size of the cell does take into account the copy of the arrow schema that is stored in the cell

data_bytes: u64,
}

impl SubtreeInfo {
/// Assumes the event has been filtered to be part of this subtree.
fn on_event(&mut self, event: &StoreEvent) {
use re_types_core::SizeBytes as _;

match event.kind {
StoreDiffKind::Addition => {
self.time_histogram
.add(&event.times, event.num_components() as _);

for cell in event.cells.values() {
self.data_bytes += cell.heap_size_bytes();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cells themselves are also stored on the heap at this point, so you want total_size_bytes()

}
}
StoreDiffKind::Deletion => {
self.time_histogram
.remove(&event.timepoint(), event.num_components() as _);

for cell in event.cells.values() {
if let Some(bytes_left) = self.data_bytes.checked_sub(cell.heap_size_bytes()) {
self.data_bytes = bytes_left;
} else if cfg!(debug_assertions) {
re_log::warn_once!(
"Error in book-keeping: we've removed more bytes then we've added"
);
}
}
}
}
}

/// Number of bytes used by all arrow data in this tree (ignores overhead from book-keeping, schemas, etc).
#[inline]
pub fn data_bytes(&self) -> u64 {
self.data_bytes
}
}

/// Maintains an optimized representation of a batch of [`StoreEvent`]s specifically designed to
Expand Down
5 changes: 3 additions & 2 deletions crates/re_data_ui/src/instance_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ impl DataUi for InstancePath {

let Some(components) = store.all_components(&query.timeline, entity_path) else {
if ctx.store_db.is_known_entity(entity_path) {
ui.label(ctx.re_ui.warning_text(format!(
// This is fine - e.g. we're looking at `/world` and the user has only logged to `/world/car`.
ui.label(format!(
"No components logged on timeline {:?}",
query.timeline.name()
)));
));
} else {
ui.label(
ctx.re_ui
Expand Down
106 changes: 61 additions & 45 deletions crates/re_data_ui/src/item_ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! TODO(andreas): This is not a `data_ui`, can this go somewhere else, shouldn't be in `re_data_ui`.

use egui::Ui;
use re_data_store::InstancePath;
use re_data_store::{EntityTree, InstancePath};
use re_log_types::{ComponentPath, EntityPath, TimeInt, Timeline};
use re_viewer_context::{
DataQueryId, HoverHighlight, Item, SpaceViewId, UiVerbosity, ViewerContext,
Expand Down Expand Up @@ -105,62 +105,78 @@ pub fn instance_path_button_to(
cursor_interact_with_selectable(ctx, response, item)
}

fn entity_stats_ui(ui: &mut egui::Ui, timeline: &Timeline, stats: &re_arrow_store::EntityStats) {
fn entity_tree_stats_ui(ui: &mut egui::Ui, timeline: &Timeline, tree: &EntityTree) {
use re_format::format_bytes;

let total_bytes = stats.size_bytes + stats.timelines_size_bytes;
// Show total bytes used in whole subtree
let total_bytes = tree.subtree.data_bytes();

let subtree_caveat = if tree.children.is_empty() {
""
} else {
" (including subtree)"
};

if total_bytes == 0 {
return;
}

// `num_events` is approximate - we could be logging a Tensor image and a transform
// at approximately the same time. That should only count as one fence-post.
let num_events = stats.num_rows;

if stats.time_range.min < stats.time_range.max && 1 < num_events {
// Estimate a data rate.
//
// Let's do our best to avoid fencepost errors.
// If we log 1 MiB every second, then after three
// events we have a span of 2 seconds, and 3 MiB,
// but the data rate is still 1 MiB/s.
//
// <-----2 sec----->
// t: 0s 1s 2s
// data: 1MiB 1MiB 1MiB

let duration = stats.time_range.abs_length();

let mut bytes_per_time = stats.size_bytes as f64 / duration as f64;

// Fencepost adjustment:
bytes_per_time *= (num_events - 1) as f64 / num_events as f64;

let data_rate = match timeline.typ() {
re_log_types::TimeType::Time => {
let bytes_per_second = 1e9 * bytes_per_time;

format!(
"{}/s in {}",
format_bytes(bytes_per_second),
timeline.name()
)
}

re_log_types::TimeType::Sequence => {
format!("{} / {}", format_bytes(bytes_per_time), timeline.name())
let mut data_rate = None;

// Try to estimate data-rate
if let Some(time_histogram) = tree.subtree.time_histogram.get(timeline) {
// `num_events` is approximate - we could be logging a Tensor image and a transform
// at _almost_ approximately the same time, but it should only count as one fence-post.
let num_events = time_histogram.total_count(); // TODO(emilk): we should ask the histogram to count the number of non-zero keys instead.

if let (Some(min_time), Some(max_time)) =
(time_histogram.min_key(), time_histogram.max_key())
{
if min_time < max_time && 1 < num_events {
// Let's do our best to avoid fencepost errors.
// If we log 1 MiB once every second, then after three
// events we have a span of 2 seconds, and 3 MiB,
// but the data rate is still 1 MiB/s.
//
// <-----2 sec----->
// t: 0s 1s 2s
// data: 1MiB 1MiB 1MiB

let duration = max_time - min_time;

let mut bytes_per_time = total_bytes as f64 / duration as f64;

// Fencepost adjustment:
bytes_per_time *= (num_events - 1) as f64 / num_events as f64;

data_rate = Some(match timeline.typ() {
re_log_types::TimeType::Time => {
let bytes_per_second = 1e9 * bytes_per_time;

format!(
"{}/s in '{}'",
format_bytes(bytes_per_second),
timeline.name()
)
}

re_log_types::TimeType::Sequence => {
format!("{} / {}", format_bytes(bytes_per_time), timeline.name())
}
});
}
};
}
}

if let Some(data_rate) = data_rate {
ui.label(format!(
"Using {} in total ≈ {}",
"Using {}{subtree_caveat} ≈ {}",
format_bytes(total_bytes as f64),
data_rate
));
} else {
ui.label(format!(
"Using {} in total",
"Using {}{subtree_caveat}",
format_bytes(total_bytes as f64)
));
}
Expand Down Expand Up @@ -341,9 +357,9 @@ pub fn instance_hover_card_ui(ui: &mut Ui, ctx: &ViewerContext<'_>, instance_pat
let query = ctx.current_query();

if instance_path.instance_key.is_splat() {
let store = ctx.store_db.store();
let stats = store.entity_stats(query.timeline, instance_path.entity_path.hash());
entity_stats_ui(ui, &query.timeline, &stats);
if let Some(subtree) = ctx.store_db.tree().subtree(&instance_path.entity_path) {
entity_tree_stats_ui(ui, &query.timeline, subtree);
}
} else {
// TODO(emilk): per-component stats
}
Expand Down
Loading