diff --git a/kvdb-rocksdb/Cargo.toml b/kvdb-rocksdb/Cargo.toml index c01b51c6a..2049da4ba 100644 --- a/kvdb-rocksdb/Cargo.toml +++ b/kvdb-rocksdb/Cargo.toml @@ -7,6 +7,10 @@ description = "kvdb implementation backed by rocksDB" license = "GPL-3.0" edition = "2018" +[[bench]] +name = "bench_read_perf" +harness = false + [dependencies] elastic-array = "0.10.2" fs-swap = "0.2.4" @@ -20,5 +24,8 @@ rocksdb = { version = "0.13", features = ["snappy"], default-features = false } owning_ref = "0.4.0" [dev-dependencies] -tempdir = "0.3.7" +alloc_counter = "0.0.4" +criterion = "0.3" ethereum-types = { version = "0.8.0", path = "../ethereum-types" } +rand = "0.7.2" +tempdir = "0.3.7" diff --git a/kvdb-rocksdb/benches/.gitignore b/kvdb-rocksdb/benches/.gitignore new file mode 100644 index 000000000..85954e328 --- /dev/null +++ b/kvdb-rocksdb/benches/.gitignore @@ -0,0 +1 @@ +_rocksdb_bench_get diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs new file mode 100644 index 000000000..350fdf2eb --- /dev/null +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -0,0 +1,212 @@ +// Copyright 2015-2019 Parity Technologies (UK) Ltd. +// This file is part of Parity Ethereum. + +// Parity Ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity Ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity Ethereum. If not, see . + +//! Benchmark RocksDB read performance. +//! The benchmark setup consists in writing `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` 32-bytes random +//! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get one +//! million keys; ideally the db should be deleted for each benchmark run but in practice it has +//! little impact on the performance numbers for these small database sizes. +//! Allocations (on the Rust side) are counted and printed. +//! +//! Note that this benchmark is not a good way to measure the performance of the database itself; +//! its purpose is to be a tool to gauge the performance of the glue code, or work as a starting point +//! for a more elaborate benchmark of a specific workload. + +const NEEDLES: usize = 10_000; +const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; + +use std::io; +use std::time::{Duration, Instant}; + +use alloc_counter::{count_alloc, AllocCounterSystem}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use ethereum_types::H256; +use rand::{distributions::Uniform, seq::SliceRandom, Rng}; + +use kvdb_rocksdb::{Database, DatabaseConfig}; + +#[global_allocator] +static A: AllocCounterSystem = AllocCounterSystem; + +criterion_group!(benches, get, iter); +criterion_main!(benches); + +/// Opens (or creates) a RocksDB database in the `benches/` folder of the crate with one column +/// family and default options. Needs manual cleanup. +fn open_db() -> Database { + let tempdir_str = "./benches/_rocksdb_bench_get"; + let cfg = DatabaseConfig::with_columns(Some(1)); + let db = Database::open(&cfg, tempdir_str).expect("rocksdb works"); + db +} + +/// Generate `n` random bytes +/- 20%. +/// The variability in the payload size lets us simulate payload allocation patterns: `DBValue` is +/// an `ElasticArray128` so sometimes we save on allocations. +fn n_random_bytes(n: usize) -> Vec { + let mut rng = rand::thread_rng(); + let variability: i64 = rng.gen_range(0, (n / 5) as i64); + let plus_or_minus: i64 = if variability % 2 == 0 { 1 } else { -1 }; + let range = Uniform::from(0..u8::max_value()); + rng.sample_iter(&range).take((n as i64 + plus_or_minus * variability) as usize).collect() +} + +/// Writes `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` keys to the DB. Keys are random, 32 bytes long and +/// values are random, 120-180 bytes long. Every `NEEDLES_TO_HAYSTACK_RATIO` keys are kept and +/// returned in a `Vec` for and used to benchmark point lookup performance. Keys are sorted +/// lexicographically in the DB, and the benchmark keys are random bytes making the needles are +/// effectively random points in the key set. +fn populate(db: &Database) -> io::Result> { + let mut needles = Vec::with_capacity(NEEDLES); + let mut batch = db.transaction(); + for i in 0..NEEDLES * NEEDLES_TO_HAYSTACK_RATIO { + let key = H256::random(); + if i % NEEDLES_TO_HAYSTACK_RATIO == 0 { + needles.push(key.clone()); + if i % 100_000 == 0 && i > 0 { + println!("[populate] {} keys", i); + } + } + // In ethereum keys are mostly 32 bytes and payloads ~140bytes. + batch.put(Some(0), &key.as_bytes(), &n_random_bytes(140)); + } + db.write(batch)?; + // Clear the overlay + db.flush()?; + Ok(needles) +} + +fn get(c: &mut Criterion) { + let db = open_db(); + let needles = populate(&db).expect("rocksdb works"); + + let mut total_iterations = 0; + let mut total_allocs = 0; + + c.bench_function("get key", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + // This has no measurable impact on performance (~30ns) + let needle = needles.choose(&mut rand::thread_rng()).expect("needles is not empty"); + black_box(db.get(Some(0), needle.as_bytes()).unwrap()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[get key] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } + + total_iterations = 0; + total_allocs = 0; + c.bench_function("get key by prefix", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + // This has no measurable impact on performance (~30ns) + let needle = needles.choose(&mut rand::thread_rng()).expect("needles is not empty"); + black_box(db.get_by_prefix(Some(0), &needle.as_bytes()[..8]).unwrap()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[get key by prefix] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } +} + +fn iter(c: &mut Criterion) { + let db = open_db(); + let mut total_iterations = 0; + let mut total_allocs = 0; + + c.bench_function("iterate over 1k keys", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + black_box(db.iter(Some(0)).take(1000).collect::>()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } + + total_allocs = 0; + total_iterations = 0; + c.bench_function("single key from iterator", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + black_box(db.iter(Some(0)).next().unwrap()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[single key from iterator] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } +}