Skip to content

Commit

Permalink
Introduce legacy shims and migrate DataCell to re_types::Component (#…
Browse files Browse the repository at this point in the history
…2752)

### What
This started off with migrating DataCell to use Component instead of the
arrow2-convert.
However, the type-conversion of ComponentName caused some pretty
significant ripples.
Also, the Component-related traits bounds are still pretty annoying, so
definitely some cleanup to be done here.

### TODO:
- [x] Explore benchmarks to see how much of a regression this is causing
 - [x] Sort out the short_name / full_name trait stuff for ComponentName
 - [x] Verify the size-increases are inline with expectations
- [x] ~~See if we can cut down on the `Primary:
std::convert::Into<std::borrow::Cow<'a, Primary>>` trait-bounds.~~
 - [x] Errors about not finding expected components when running demos.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/2752) (if
applicable)

- [PR Build Summary](https://build.rerun.io/pr/2752)
- [Docs
preview](https://rerun.io/preview/pr%3Ajleibs%2Fhope_legacy_shims/docs)
- [Examples
preview](https://rerun.io/preview/pr%3Ajleibs%2Fhope_legacy_shims/examples)
  • Loading branch information
jleibs authored Jul 21, 2023
1 parent 00eeacb commit 8bad645
Show file tree
Hide file tree
Showing 170 changed files with 1,436 additions and 1,289 deletions.
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/re_arrow_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ re_format.workspace = true
re_log_types.workspace = true
re_log.workspace = true
re_tracing.workspace = true
re_types.workspace = true

# External dependencies:
ahash.workspace = true
Expand Down
19 changes: 9 additions & 10 deletions crates/re_arrow_store/benches/arrow2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ use re_components::{
datagen::{build_some_instances, build_some_point2d, build_some_rects},
Point2D, Rect2D,
};
use re_log_types::{
external::arrow2_convert::serialize::TryIntoArrow, DataCell, InstanceKey,
SerializableComponent, SizeBytes as _,
};
use re_log_types::{DataCell, SizeBytes as _};
use re_types::{components::InstanceKey, Component};

// ---

Expand Down Expand Up @@ -98,13 +96,14 @@ fn erased_clone(c: &mut Criterion) {
}

// TODO(cmc): Use cells once `cell.size_bytes()` has landed (#1727)
fn bench_arrow<T: SerializableComponent>(
fn bench_arrow<'a, T: Component + 'a>(
group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
data: &[T],
) {
let arrays: Vec<Box<dyn Array>> = (0..NUM_ROWS)
.map(|_| TryIntoArrow::try_into_arrow(data).unwrap())
.collect_vec();
data: &'a [T],
) where
&'a T: Into<::std::borrow::Cow<'a, T>>,
{
let arrays: Vec<Box<dyn Array>> =
(0..NUM_ROWS).map(|_| T::to_arrow(data, None)).collect_vec();

let total_size_bytes = arrays
.iter()
Expand Down
7 changes: 3 additions & 4 deletions crates/re_arrow_store/benches/arrow2_convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

use arrow2::{array::PrimitiveArray, datatypes::PhysicalType, types::PrimitiveType};
use criterion::{criterion_group, Criterion};
use re_log_types::{
external::arrow2_convert::deserialize::TryIntoCollection, Component as _, DataCell, InstanceKey,
};
use re_log_types::DataCell;
use re_types::{components::InstanceKey, Loggable as _};

// ---

Expand Down Expand Up @@ -99,7 +98,7 @@ fn deserialize(c: &mut Criterion) {
{
group.bench_function("arrow2_convert", |b| {
b.iter(|| {
let keys: Vec<InstanceKey> = data.as_ref().try_into_collection().unwrap();
let keys: Vec<InstanceKey> = InstanceKey::from_arrow(data.as_ref());
assert_eq!(NUM_INSTANCES, keys.len());
assert_eq!(
InstanceKey(NUM_INSTANCES as u64 / 2),
Expand Down
6 changes: 2 additions & 4 deletions crates/re_arrow_store/benches/data_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ use re_components::{
datagen::{build_frame_nr, build_some_instances, build_some_rects},
Rect2D,
};
use re_log_types::{
Component as _, ComponentName, DataCell, DataRow, DataTable, EntityPath, InstanceKey, RowId,
TableId, TimeType, Timeline,
};
use re_log_types::{DataCell, DataRow, DataTable, EntityPath, RowId, TableId, TimeType, Timeline};
use re_types::{components::InstanceKey, ComponentName, Loggable as _};

criterion_group!(benches, insert, latest_at, latest_at_missing, range, gc);
criterion_main!(benches);
Expand Down
3 changes: 2 additions & 1 deletion crates/re_arrow_store/examples/dump_dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ use re_components::datagen::{
build_frame_nr, build_log_time, build_some_instances, build_some_instances_from,
build_some_point2d, build_some_rects,
};
use re_log_types::{Component as _, EntityPath, InstanceKey, Time};
use re_log_types::{EntityPath, Time};
use re_types::{components::InstanceKey, Loggable};

// ---

Expand Down
3 changes: 2 additions & 1 deletion crates/re_arrow_store/examples/latest_component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use re_components::{
datagen::{build_frame_nr, build_some_point2d, build_some_rects},
Point2D, Rect2D,
};
use re_log_types::{Component, EntityPath, InstanceKey};
use re_log_types::EntityPath;
use re_types::{components::InstanceKey, Loggable};

fn main() {
let mut store = DataStore::new(InstanceKey::name(), Default::default());
Expand Down
3 changes: 2 additions & 1 deletion crates/re_arrow_store/examples/latest_components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use re_components::{
datagen::{build_frame_nr, build_some_point2d, build_some_rects},
Point2D, Rect2D,
};
use re_log_types::{Component, EntityPath, InstanceKey};
use re_log_types::EntityPath;
use re_types::{components::InstanceKey, Loggable};

fn main() {
let mut store = DataStore::new(InstanceKey::name(), Default::default());
Expand Down
3 changes: 2 additions & 1 deletion crates/re_arrow_store/examples/range_components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ use re_components::{
datagen::{build_frame_nr, build_some_point2d, build_some_rects},
Point2D, Rect2D,
};
use re_log_types::{Component as _, EntityPath, InstanceKey, TimeType, Timeline};
use re_log_types::{EntityPath, TimeType, Timeline};
use re_types::{components::InstanceKey, Loggable};

fn main() {
let mut store = DataStore::new(InstanceKey::name(), Default::default());
Expand Down
6 changes: 4 additions & 2 deletions crates/re_arrow_store/src/arrow_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,10 @@ mod tests {
Vec3D(Vec3D),
}

impl re_log_types::Component for TestComponentWithUnionAndFixedSizeList {
fn name() -> re_log_types::ComponentName {
re_log_types::component_legacy_shim!(TestComponentWithUnionAndFixedSizeList);

impl re_log_types::LegacyComponent for TestComponentWithUnionAndFixedSizeList {
fn legacy_name() -> re_log_types::ComponentName {
"test_component_with_union_and_fixed_size_list".into()
}
}
Expand Down
16 changes: 9 additions & 7 deletions crates/re_arrow_store/src/polars_util.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use itertools::Itertools;
use polars_core::{prelude::*, series::Series};
use polars_ops::prelude::*;
use re_log_types::{ComponentName, DataCell, EntityPath, RowId, TimeInt};
use re_log_types::{DataCell, EntityPath, RowId, TimeInt};
use re_types::ComponentName;

use crate::{ArrayExt, DataStore, LatestAtQuery, RangeQuery};

Expand Down Expand Up @@ -139,7 +140,7 @@ pub fn range_components<'a, const N: usize>(
if df.as_ref().map_or(false, |df| {
// We only care about the initial state if it A) isn't empty and B) contains any data
// at all for the primary component.
!df.is_empty() && df.column(primary.as_str()).is_ok()
!df.is_empty() && df.column(primary.as_ref()).is_ok()
}) {
df_latest = Some(df);
}
Expand Down Expand Up @@ -186,8 +187,9 @@ pub fn range_components<'a, const N: usize>(
let df = df
.select(
components
.clone()
.iter()
.filter(|col| columns.contains(&col.as_str())),
.filter(|col| columns.contains(&col.as_ref())),
)
.unwrap();
(time, df)
Expand All @@ -207,7 +209,7 @@ pub fn dataframe_from_cells<const N: usize>(
.flatten()
.map(|cell| {
Series::try_from((
cell.component_name().as_str(),
cell.component_name().as_ref(),
cell.as_arrow_ref().clean_for_polars(),
))
})
Expand Down Expand Up @@ -239,15 +241,15 @@ pub fn join_dataframes(
for col in right
.get_column_names()
.iter()
.filter(|col| *col != &cluster_key.as_str())
.filter(|col| *col != &cluster_key)
{
_ = left.drop_in_place(col);
}

left.join(
&right,
[cluster_key.as_str()],
[cluster_key.as_str()],
[cluster_key],
[cluster_key],
join_type.clone(),
None,
)
Expand Down
9 changes: 5 additions & 4 deletions crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ use ahash::HashMap;
use arrow2::datatypes::DataType;
use nohash_hasher::{IntMap, IntSet};
use parking_lot::RwLock;
use re_types::ComponentName;
use smallvec::SmallVec;

use re_log_types::{
ComponentName, DataCell, DataCellColumn, EntityPath, EntityPathHash, ErasedTimeVec,
NumInstancesVec, RowId, RowIdVec, SizeBytes, TimeInt, TimePoint, TimeRange, Timeline,
DataCell, DataCellColumn, EntityPath, EntityPathHash, ErasedTimeVec, NumInstancesVec, RowId,
RowIdVec, SizeBytes, TimeInt, TimePoint, TimeRange, Timeline,
};

// --- Data store ---
Expand Down Expand Up @@ -284,7 +285,7 @@ impl DataStore {
&self.config
}

/// Lookup the arrow [`DataType`] of a [`re_log_types::Component`] in the internal
/// Lookup the arrow [`DataType`] of a [`re_types::Component`] in the internal
/// `DataTypeRegistry`.
pub fn lookup_datatype(&self, component: &ComponentName) -> Option<&DataType> {
self.type_registry.get(component)
Expand Down Expand Up @@ -335,7 +336,7 @@ impl DataStore {
#[test]
fn datastore_internal_repr() {
use re_components::datagen::data_table_example;
use re_log_types::{Component as _, InstanceKey};
use re_types::{components::InstanceKey, Loggable as _};

let mut store = DataStore::new(
InstanceKey::name(),
Expand Down
8 changes: 4 additions & 4 deletions crates/re_arrow_store/src/store_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ use std::collections::BTreeMap;
use arrow2::{array::Array, chunk::Chunk, datatypes::Schema};
use nohash_hasher::IntMap;
use re_log_types::{
ComponentName, DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID,
DataCellColumn, DataTable, DataTableResult, RowId, Timeline, COLUMN_INSERT_ID,
COLUMN_NUM_INSTANCES, COLUMN_ROW_ID,
};
use re_types::ComponentName;

use crate::store::{IndexedBucket, IndexedBucketInner, PersistentIndexedTable};

Expand Down Expand Up @@ -188,16 +189,15 @@ fn serialize_data_columns(
// NOTE: cannot fail, the cluster key _has_ to be there by definition
let cluster_column = table.remove(&cluster_key).unwrap();
{
let (field, column) =
DataTable::serialize_data_column(cluster_key.as_str(), cluster_column)?;
let (field, column) = DataTable::serialize_data_column(cluster_key, cluster_column)?;
schema.fields.push(field);
columns.push(column);
}

for (component, column) in table {
// NOTE: Don't serialize columns with only null values.
if column.iter().any(Option::is_some) {
let (field, column) = DataTable::serialize_data_column(component.as_str(), column)?;
let (field, column) = DataTable::serialize_data_column(component, column)?;
schema.fields.push(field);
columns.push(column);
}
Expand Down
Loading

1 comment on commit 8bad645

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Rust Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.25.

Benchmark suite Current: 8bad645 Previous: 00eeacb Ratio
arrow_mono_points/query 1372089 ns/iter (± 5814) 936852 ns/iter (± 2289) 1.46
arrow_batch_points/query 43686 ns/iter (± 71) 12401 ns/iter (± 7) 3.52
arrow_batch_vecs/query 1236247 ns/iter (± 11267) 316997 ns/iter (± 484) 3.90

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.