Skip to content

Commit

Permalink
arrow2 estimated_bytes_size benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Mar 31, 2023
1 parent 1e84aa5 commit 92edb63
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 0 deletions.
4 changes: 4 additions & 0 deletions crates/re_arrow_store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ required-features = ["polars"]
name = "data_store"
harness = false

[[bench]]
name = "arrow2"
harness = false

[[bench]]
name = "arrow2_convert"
harness = false
98 changes: 98 additions & 0 deletions crates/re_arrow_store/benches/arrow2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//! Keeping track of performance issues/regressions in `arrow2` that directly affect us.
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

use arrow2::{array::Array, compute::aggregate::estimated_bytes_size};
use criterion::{criterion_group, criterion_main, Criterion};
use re_log_types::{
component_types::Point2D, datagen::build_some_point2d,
external::arrow2_convert::serialize::TryIntoArrow,
};

// ---

criterion_group!(benches, estimated_size_bytes);
criterion_main!(benches);

// ---

#[cfg(not(debug_assertions))]
const NUM_ROWS: usize = 10_000;
#[cfg(not(debug_assertions))]
const NUM_INSTANCES: usize = 100;

// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
#[cfg(debug_assertions)]
const NUM_ROWS: usize = 1;
#[cfg(debug_assertions)]
const NUM_INSTANCES: usize = 1;

// ---

fn estimated_size_bytes(c: &mut Criterion) {
let mut group = c.benchmark_group(format!(
"arrow2/size_bytes/rows={NUM_ROWS}/instances={NUM_INSTANCES}"
));
group.throughput(criterion::Throughput::Elements(NUM_ROWS as _));

// TODO(cmc): Use cells once `cell.size_bytes()` has landed (#1727)
{
fn generate_arrays() -> Vec<Box<dyn Array>> {
(0..NUM_ROWS)
.map(|_| {
TryIntoArrow::try_into_arrow(build_some_point2d(NUM_INSTANCES).as_slice())
.unwrap()
})
.collect()
}

let arrays = generate_arrays();
let total_size_bytes = arrays
.iter()
.map(|array| estimated_bytes_size(&**array) as u64)
.sum::<u64>();
assert!(
total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::<Point2D>()
);

group.bench_function("array", |b| {
b.iter(|| {
let sz = arrays
.iter()
.map(|array| estimated_bytes_size(&**array) as u64)
.sum::<u64>();
assert_eq!(total_size_bytes, sz);
sz
});
});
}

{
fn generate_vecs() -> Vec<Vec<Point2D>> {
(0..NUM_ROWS)
.map(|_| build_some_point2d(NUM_INSTANCES))
.collect()
}

let vecs = generate_vecs();
let total_size_bytes = vecs
.iter()
.map(|vec| std::mem::size_of_val(vec.as_slice()) as u64)
.sum::<u64>();
assert!(
total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::<Point2D>()
);

group.bench_function("vec", |b| {
b.iter(|| {
let sz = vecs
.iter()
.map(|vec| std::mem::size_of_val(vec.as_slice()) as u64)
.sum::<u64>();
assert_eq!(total_size_bytes, sz);
sz
});
});
}
}

0 comments on commit 92edb63

Please sign in to comment.