Skip to content

Commit

Permalink
work a the datacell level instead of raw arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Sep 20, 2023
1 parent df83d5b commit b0410c8
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 19 deletions.
16 changes: 15 additions & 1 deletion crates/re_log_types/src/data_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub type DataCellResult<T> = ::std::result::Result<T, DataCellError>;
/// # assert_eq!(points, cell.to_native().as_slice());
/// ```
///
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone)]
pub struct DataCell {
/// While the arrow data is already refcounted, the contents of the `DataCell` still have to
/// be wrapped in an `Arc` to work around performance issues in `arrow2`.
Expand All @@ -112,6 +112,20 @@ pub struct DataCell {
pub inner: Arc<DataCellInner>,
}

impl PartialEq for DataCell {
fn eq(&self, rhs: &Self) -> bool {
let Self { inner: lhs_inner } = self;
let Self { inner: rhs_inner } = rhs;

// NOTE: Compare the inner pointers first, and only if they don't match actually do a full
// contents comparison.
// Arc normally handles this automatically if T implements `Eq`, but in our case
// `DataCellInner` cannot implement `Eq`.
// Still, the optimization is valid, and so here we are.
Arc::as_ptr(lhs_inner) == Arc::as_ptr(rhs_inner) || self.inner == rhs.inner
}
}

/// The actual contents of a [`DataCell`].
///
/// Despite the fact that the arrow data is already refcounted, this has to live separately, behind
Expand Down
25 changes: 7 additions & 18 deletions crates/re_query/src/archetype_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,25 +322,14 @@ impl<A: Archetype> ArchetypeView<A> {
let component = self.components.get(&C::name());

if let Some(component) = component {
// NOTE: Autogenerated instance keys are interned: if two or more rows in the store
// share the same keys, then they will literally point to the same memory address.
// Therefore we can compare those addresses, and early out if they match.
let primary_instance_key_buffer =
self.required_comp().instance_keys.as_arrow_ref() as *const dyn Array;
let component_instance_key_buffer =
component.instance_keys.as_arrow_ref() as *const dyn Array;

// TODO(rust-lang/rust#81513): We want to compare just the data part of these fat pointers,
// because comparing the vtable pointers, while defined, can lead to unstable results.
// NOTE(1): Autogenerated instance keys are interned behind datacells.
// If two or more rows in the store share the same keys, then they will share
// also the same cells.
// Therefore we can compare those cells, and early out if they match.
//
// We'd like to use `std::ptr::to_raw_parts` for that (or any of the myriad of tools
// for dealing with smart pointers available in the stdlib)… but everything that
// relates to fat pointers is nightly only.
// So, for now, we do it the old way.
if std::ptr::eq(
primary_instance_key_buffer.cast::<u8>(),
component_instance_key_buffer.cast::<u8>(),
) {
// NOTE(2): Comparing cells that point to the same backing storage is a simple pointer
// comparison, not data comparison involved.
if self.required_comp().instance_keys == component.instance_keys {
// NOTE: A component instance cannot be optional in itself, and if we're on this
// path then we know for a fact that both batches can be intersected 1-to-1.
// Therefore there cannot be any null values, therefore we can go through the fast
Expand Down

0 comments on commit b0410c8

Please sign in to comment.