Skip to content

Commit

Permalink
Introduce new benchmark for string performance. (#2926)
Browse files Browse the repository at this point in the history
### What
Add a new benchmark of strings so we can verify the move to buffers in
#2931 is actually an improvement.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/2926) (if
applicable)

- [PR Build Summary](https://build.rerun.io/pr/2926)
- [Docs
preview](https://rerun.io/preview/pr%3Ajleibs%2Fstrings_and_bench/docs)
- [Examples
preview](https://rerun.io/preview/pr%3Ajleibs%2Fstrings_and_bench/examples)
  • Loading branch information
jleibs authored Aug 8, 2023
1 parent aaf955e commit 404e58c
Showing 1 changed file with 140 additions and 2 deletions.
142 changes: 140 additions & 2 deletions crates/re_query/benches/query_benchmark2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use re_log_types::{entity_path, DataRow, EntityPath, Index, RowId, TimeInt, Time
use re_query::query_archetype;
use re_types::{
archetypes::Points2D,
components::{Color, InstanceKey, Point2D},
components::{Color, InstanceKey, Label, Point2D},
Loggable as _,
};

Expand All @@ -19,14 +19,28 @@ use re_types::{
const NUM_FRAMES_POINTS: u32 = 1_000;
#[cfg(not(debug_assertions))]
const NUM_POINTS: u32 = 1_000;
#[cfg(not(debug_assertions))]
const NUM_FRAMES_STRINGS: u32 = 1_000;
#[cfg(not(debug_assertions))]
const NUM_STRINGS: u32 = 1_000;

// `cargo test` also runs the benchmark setup code, so make sure they run quickly:
#[cfg(debug_assertions)]
const NUM_FRAMES_POINTS: u32 = 1;
#[cfg(debug_assertions)]
const NUM_POINTS: u32 = 1;
#[cfg(debug_assertions)]
const NUM_FRAMES_STRINGS: u32 = 1;
#[cfg(debug_assertions)]
const NUM_STRINGS: u32 = 1;

criterion_group!(benches, mono_points, batch_points);
criterion_group!(
benches,
mono_points,
mono_strings,
batch_points,
batch_strings
);
criterion_main!(benches);

// --- Benchmarks ---
Expand All @@ -50,6 +64,23 @@ pub fn build_frame_nr(frame_nr: TimeInt) -> (Timeline, TimeInt) {
(Timeline::new("frame_nr", TimeType::Sequence), frame_nr)
}

pub fn build_some_strings(len: usize) -> Vec<Label> {
use rand::Rng as _;
let mut rng = rand::thread_rng();

(0..len)
.map(|_| {
let ilen: usize = rng.gen_range(0..10000);
let s: String = rand::thread_rng()
.sample_iter(&rand::distributions::Alphanumeric)
.take(ilen)
.map(char::from)
.collect();
Label::from(s)
})
.collect()
}

fn mono_points(c: &mut Criterion) {
// Each mono point gets logged at a different path
let paths = (0..NUM_POINTS)
Expand Down Expand Up @@ -79,6 +110,34 @@ fn mono_points(c: &mut Criterion) {
}
}

fn mono_strings(c: &mut Criterion) {
// Each mono string gets logged at a different path
let paths = (0..NUM_STRINGS)
.map(move |string_idx| entity_path!("strings", Index::Sequence(string_idx as _)))
.collect_vec();
let msgs = build_strings_rows(&paths, 1);

{
let mut group = c.benchmark_group("arrow_mono_strings2");
group.sample_size(10);
group.throughput(criterion::Throughput::Elements(
(NUM_STRINGS * NUM_FRAMES_STRINGS) as _,
));
group.bench_function("insert", |b| {
b.iter(|| insert_rows(msgs.iter()));
});
}

{
let mut group = c.benchmark_group("arrow_mono_strings2");
group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
let mut store = insert_rows(msgs.iter());
group.bench_function("query", |b| {
b.iter(|| query_and_visit_strings(&mut store, &paths));
});
}
}

fn batch_points(c: &mut Criterion) {
// Batch points are logged together at a single path
let paths = [EntityPath::from("points")];
Expand All @@ -104,6 +163,31 @@ fn batch_points(c: &mut Criterion) {
}
}

fn batch_strings(c: &mut Criterion) {
// Batch strings are logged together at a single path
let paths = [EntityPath::from("points")];
let msgs = build_strings_rows(&paths, NUM_STRINGS as _);

{
let mut group = c.benchmark_group("arrow_batch_strings2");
group.throughput(criterion::Throughput::Elements(
(NUM_STRINGS * NUM_FRAMES_STRINGS) as _,
));
group.bench_function("insert", |b| {
b.iter(|| insert_rows(msgs.iter()));
});
}

{
let mut group = c.benchmark_group("arrow_batch_strings2");
group.throughput(criterion::Throughput::Elements(NUM_POINTS as _));
let mut store = insert_rows(msgs.iter());
group.bench_function("query", |b| {
b.iter(|| query_and_visit_strings(&mut store, &paths));
});
}
}

// --- Helpers ---

fn build_points_rows(paths: &[EntityPath], pts: usize) -> Vec<DataRow> {
Expand All @@ -128,6 +212,36 @@ fn build_points_rows(paths: &[EntityPath], pts: usize) -> Vec<DataRow> {
.collect()
}

fn build_strings_rows(paths: &[EntityPath], num_strings: usize) -> Vec<DataRow> {
(0..NUM_FRAMES_STRINGS)
.flat_map(move |frame_idx| {
paths.iter().map(move |path| {
let mut row = DataRow::from_cells2(
RowId::ZERO,
path.clone(),
[build_frame_nr((frame_idx as i64).into())],
num_strings as _,
// We still need to create points because they are the primary for the
// archetype query we want to do. We won't actually deserialize the points
// during the query -- we just need it for the primary keys.
// TODO(jleibs): switch this to use `TextEntry` once the new type has
// landed.
(
build_some_point2d(num_strings),
build_some_strings(num_strings),
),
);
// NOTE: Using unsized cells will crash in debug mode, and benchmarks are run for 1 iteration,
// in debug mode, by the standard test harness.
if cfg!(debug_assertions) {
row.compute_all_size_bytes();
}
row
})
})
.collect()
}

fn insert_rows<'a>(msgs: impl Iterator<Item = &'a DataRow>) -> DataStore {
let mut store = DataStore::new(InstanceKey::name(), Default::default());
msgs.for_each(|row| store.insert_row(row).unwrap());
Expand Down Expand Up @@ -162,3 +276,27 @@ fn query_and_visit_points(store: &mut DataStore, paths: &[EntityPath]) -> Vec<Sa
assert_eq!(NUM_POINTS as usize, points.len());
points
}

struct SaveString {
_label: Option<Label>,
}

fn query_and_visit_strings(store: &mut DataStore, paths: &[EntityPath]) -> Vec<SaveString> {
let timeline_frame_nr = Timeline::new("frame_nr", TimeType::Sequence);
let query = LatestAtQuery::new(timeline_frame_nr, (NUM_FRAMES_STRINGS as i64 / 2).into());

let mut strings = Vec::with_capacity(NUM_STRINGS as _);

for path in paths.iter() {
let arch_view = query_archetype::<Points2D>(store, &query, path).unwrap();
arch_view
.iter_optional_component::<Label>()
.unwrap()
.for_each(|label| {
strings.push(SaveString { _label: label });
});
}
assert_eq!(NUM_STRINGS as usize, strings.len());

criterion::black_box(strings)
}

0 comments on commit 404e58c

Please sign in to comment.