Skip to content

Commit

Permalink
arrow2 estimated_bytes_size benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Apr 3, 2023
1 parent 4439309 commit 739709b
Showing 1 changed file with 140 additions and 7 deletions.
147 changes: 140 additions & 7 deletions crates/re_arrow_store/benches/arrow2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,22 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

use std::sync::Arc;

use arrow2::array::{Array, PrimitiveArray, StructArray};
use arrow2::{
array::{Array, PrimitiveArray, StructArray},
compute::aggregate::estimated_bytes_size,
};
use criterion::{criterion_group, criterion_main, Criterion};
use itertools::Itertools;
use re_log_types::{
component_types::{InstanceKey, Point2D},
datagen::{build_some_instances, build_some_point2d},
DataCell,
component_types::{InstanceKey, Point2D, Rect2D},
datagen::{build_some_instances, build_some_point2d, build_some_rects},
external::arrow2_convert::serialize::TryIntoArrow,
DataCell, SerializableComponent,
};

// ---

criterion_group!(benches, estimated_size_bytes);
criterion_group!(benches, erased_clone, estimated_size_bytes);
criterion_main!(benches);

// ---
Expand All @@ -41,19 +45,138 @@ enum ArrayKind {

/// E.g. an array of `Point2D`.
Struct,

/// E.g. an array of `Rect2D`.
StructLarge,
}

impl std::fmt::Display for ArrayKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
ArrayKind::Primitive => "primitive",
ArrayKind::Struct => "struct",
ArrayKind::StructLarge => "struct_large",
})
}
}

fn erased_clone(c: &mut Criterion) {
let kind = [
ArrayKind::Primitive,
ArrayKind::Struct,
ArrayKind::StructLarge,
];

for kind in kind {
let mut group = c.benchmark_group(format!(
"arrow2/size_bytes/{kind}/rows={NUM_ROWS}/instances={NUM_INSTANCES}"
));
group.throughput(criterion::Throughput::Elements(NUM_ROWS as _));

match kind {
ArrayKind::Primitive => {
let data = build_some_instances(NUM_INSTANCES);
bench_arrow(&mut group, data.as_slice());
bench_native(&mut group, data.as_slice());
}
ArrayKind::Struct => {
let data = build_some_point2d(NUM_INSTANCES);
bench_arrow(&mut group, data.as_slice());
bench_native(&mut group, data.as_slice());
}
ArrayKind::StructLarge => {
let data = build_some_rects(NUM_INSTANCES);
bench_arrow(&mut group, data.as_slice());
bench_native(&mut group, data.as_slice());
}
}
}

// TODO(cmc): Use cells once `cell.size_bytes()` has landed (#1727)
fn bench_arrow<T: SerializableComponent>(
group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
data: &[T],
) {
let arrays: Vec<Box<dyn Array>> = (0..NUM_ROWS)
.map(|_| TryIntoArrow::try_into_arrow(data).unwrap())
.collect_vec();

let total_size_bytes = arrays
.iter()
.map(|array| estimated_bytes_size(&**array) as u64)
.sum::<u64>();
assert!(total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::<T>());

group.bench_function("array", |b| {
b.iter(|| {
let sz = arrays
.iter()
.map(|array| estimated_bytes_size(&**array) as u64)
.sum::<u64>();
assert_eq!(total_size_bytes, sz);
sz
});
});
}

fn bench_native<T: Clone>(
group: &mut criterion::BenchmarkGroup<'_, criterion::measurement::WallTime>,
data: &[T],
) {
let vecs = (0..NUM_ROWS).map(|_| data.to_vec()).collect_vec();

let total_size_bytes = vecs
.iter()
.map(|vec| std::mem::size_of_val(vec.as_slice()) as u64)
.sum::<u64>();
assert!(total_size_bytes as usize >= NUM_ROWS * NUM_INSTANCES * std::mem::size_of::<T>());

{
let vecs = (0..NUM_ROWS).map(|_| data.to_vec()).collect_vec();
group.bench_function("vec", |b| {
b.iter(|| {
let sz = vecs
.iter()
.map(|vec| std::mem::size_of_val(vec.as_slice()) as u64)
.sum::<u64>();
assert_eq!(total_size_bytes, sz);
sz
});
});
}

trait SizeOf {
fn size_of(&self) -> usize;
}

impl<T> SizeOf for Vec<T> {
fn size_of(&self) -> usize {
std::mem::size_of_val(self.as_slice())
}
}

{
let vecs: Vec<Box<dyn SizeOf>> = (0..NUM_ROWS)
.map(|_| Box::new(data.to_vec()) as Box<dyn SizeOf>)
.collect_vec();

group.bench_function("vec/erased", |b| {
b.iter(|| {
let sz = vecs.iter().map(|vec| vec.size_of() as u64).sum::<u64>();
assert_eq!(total_size_bytes, sz);
sz
});
});
}
}
}

fn estimated_size_bytes(c: &mut Criterion) {
let kind = [ArrayKind::Primitive, ArrayKind::Struct];
let kind = [
ArrayKind::Primitive,
ArrayKind::Struct,
ArrayKind::StructLarge,
];

for kind in kind {
let mut group = c.benchmark_group(format!(
Expand All @@ -69,6 +192,9 @@ fn estimated_size_bytes(c: &mut Criterion) {
ArrayKind::Struct => (0..NUM_ROWS)
.map(|_| DataCell::from_native(build_some_point2d(NUM_INSTANCES).as_slice()))
.collect(),
ArrayKind::StructLarge => (0..NUM_ROWS)
.map(|_| DataCell::from_native(build_some_rects(NUM_INSTANCES).as_slice()))
.collect(),
}
}

Expand Down Expand Up @@ -153,7 +279,7 @@ fn estimated_size_bytes(c: &mut Criterion) {
});
});
}
ArrayKind::Struct => {
ArrayKind::Struct | ArrayKind::StructLarge => {
let cells = generate_cells(kind);
let arrays = cells
.iter()
Expand Down Expand Up @@ -196,9 +322,16 @@ fn estimated_size_bytes(c: &mut Criterion) {
.collect()
}

fn generate_rects() -> Vec<Vec<Rect2D>> {
(0..NUM_ROWS)
.map(|_| build_some_rects(NUM_INSTANCES))
.collect()
}

match kind {
ArrayKind::Primitive => bench_std(&mut group, generate_keys()),
ArrayKind::Struct => bench_std(&mut group, generate_points()),
ArrayKind::StructLarge => bench_std(&mut group, generate_rects()),
}

fn bench_std<T: Clone>(
Expand Down

0 comments on commit 739709b

Please sign in to comment.