Skip to content

Commit

Permalink
Optimize out unnecessary joins when querying archetypes (#3377)
Browse files Browse the repository at this point in the history
What the title says; see the commits for details, it's pretty trivial.

Going from a ~62ms space view build time to ~40ms in the OPF example, so
pretty nice gains overall.

OPF, before:
![image
(17)](https://github.com/rerun-io/rerun/assets/2910679/700fe397-7fa2-4f4c-b661-07b9dfb30937)

OPF, after:

![image](https://github.com/rerun-io/rerun/assets/2910679/4b4ae9db-ee6b-4318-a0ed-ea25e08facdd)


* Fixes #3107
  • Loading branch information
teh-cmc authored Sep 20, 2023
1 parent a91f415 commit 84581b7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 10 deletions.
16 changes: 15 additions & 1 deletion crates/re_log_types/src/data_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// # assert_eq!(points, cell.to_native().as_slice());
/// ```
///
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone)]
pub struct DataCell {
/// While the arrow data is already refcounted, the contents of the `DataCell` still have to
/// be wrapped in an `Arc` to work around performance issues in `arrow2`.
Expand All @@ -112,6 +112,20 @@ pub struct DataCell {
pub inner: Arc<DataCellInner>,
}

impl PartialEq for DataCell {
fn eq(&self, rhs: &Self) -> bool {
let Self { inner: lhs_inner } = self;
let Self { inner: rhs_inner } = rhs;

// NOTE: Compare the inner pointers first, and only if they don't match actually do a full
// contents comparison.
// Arc normally handles this automatically if T implements `Eq`, but in our case
// `DataCellInner` cannot implement `Eq`.
// Still, the optimization is valid, and so here we are.
Arc::as_ptr(lhs_inner) == Arc::as_ptr(rhs_inner) || self.inner == rhs.inner
}
}

/// The actual contents of a [`DataCell`].
///
/// Despite the fact that the arrow data is already refcounted, this has to live separately, behind
Expand Down
48 changes: 39 additions & 9 deletions crates/re_query/src/archetype_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,24 +322,54 @@ impl<A: Archetype> ArchetypeView<A> {
let component = self.components.get(&C::name());

if let Some(component) = component {
let primary_instance_key_iter = self.iter_instance_keys();
// NOTE(1): Autogenerated instance keys are interned behind datacells.
// If two or more rows in the store share the same keys, then they will share
// also the same cells.
// Therefore we can compare those cells, and early out if they match.
//
// NOTE(2): Comparing cells that point to the same backing storage is a simple pointer
// comparison; no data comparison involved.
// If the cells are not interned, this will fall back to a more costly data comparison:
// - If the data is the same, the cost of the comparison will be won back by having a
// faster iterator.
// - If the data isn't the same, the cost of the comparison will be dwarfed by the cost
// of the join anyway.
if self.required_comp().instance_keys == component.instance_keys {
// NOTE: A component instance cannot be optional in itself, and if we're on this
// path then we know for a fact that both batches can be intersected 1-to-1.
// Therefore there cannot be any null values, therefore we can go through the fast
// deserialization path.
let component_value_iter = {
re_tracing::profile_scope!("try_from_arrow", C::name());
C::try_from_arrow(component.values.as_arrow_ref())?
.into_iter()
.map(Some)
};

let mut component_instance_key_iter = component.instance_keys().into_iter();
return Ok(itertools::Either::Left(itertools::Either::Left(
component_value_iter,
)));
}

let component_value_iter = {
re_tracing::profile_scope!("try_from_arrow_opt", C::name());
C::try_from_arrow_opt(component.values.as_arrow_ref())?.into_iter()
};

let primary_instance_key_iter = self.iter_instance_keys();
let mut component_instance_key_iter = component.instance_keys().into_iter();

let next_component_instance_key = component_instance_key_iter.next();

Ok(itertools::Either::Left(ComponentJoinedIterator {
primary_instance_key_iter,
component_instance_key_iter,
component_value_iter,
next_component_instance_key,
splatted_component_value: None,
}))
Ok(itertools::Either::Left(itertools::Either::Right(
ComponentJoinedIterator {
primary_instance_key_iter,
component_instance_key_iter,
component_value_iter,
next_component_instance_key,
splatted_component_value: None,
},
)))
} else {
let primary = self.required_comp();
let nulls = (0..primary.len()).map(|_| None);
Expand Down

0 comments on commit 84581b7

Please sign in to comment.