From 52749c51da066687facdc2a9befc82d49d8029f9 Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 29 Nov 2019 11:47:20 +0100 Subject: [PATCH 1/7] [kvdb-rocksdb] Add benchmark for point lookups --- kvdb-rocksdb/Cargo.toml | 9 +- kvdb-rocksdb/benches/.gitignore | 1 + kvdb-rocksdb/benches/bench_read_perf.rs | 114 ++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 kvdb-rocksdb/benches/.gitignore create mode 100644 kvdb-rocksdb/benches/bench_read_perf.rs diff --git a/kvdb-rocksdb/Cargo.toml b/kvdb-rocksdb/Cargo.toml index c01b51c6a..8f46b9873 100644 --- a/kvdb-rocksdb/Cargo.toml +++ b/kvdb-rocksdb/Cargo.toml @@ -7,6 +7,10 @@ description = "kvdb implementation backed by rocksDB" license = "GPL-3.0" edition = "2018" +[[bench]] +name = "bench_read_perf" +harness = false + [dependencies] elastic-array = "0.10.2" fs-swap = "0.2.4" @@ -20,5 +24,8 @@ rocksdb = { version = "0.13", features = ["snappy"], default-features = false } owning_ref = "0.4.0" [dev-dependencies] -tempdir = "0.3.7" +alloc_counter = "0.0.3" +criterion = "0.3" ethereum-types = { version = "0.8.0", path = "../ethereum-types" } +rand = "0.7.2" +tempdir = "0.3.7" diff --git a/kvdb-rocksdb/benches/.gitignore b/kvdb-rocksdb/benches/.gitignore new file mode 100644 index 000000000..85954e328 --- /dev/null +++ b/kvdb-rocksdb/benches/.gitignore @@ -0,0 +1 @@ +_rocksdb_bench_get diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs new file mode 100644 index 000000000..4a239aace --- /dev/null +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -0,0 +1,114 @@ +// Copyright 2015-2019 Parity Technologies (UK) Ltd. +// This file is part of Parity Ethereum. + +// Parity Ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity Ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity Ethereum. If not, see . + +//! Benchmark RocksDB read performance. +//! The benchmark setup consists in writing `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` 32-bytes random +//! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get 1 +//! million keys; ideally the db should be deleted for each benchmark run but in practice it has +//! little impact on the performance numbers for these small database sizes. +//! Allocations on the Rust side are counted and printed. + +const NEEDLES: usize = 10_000; +const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; + +use std::io; +use std::time::Instant; + +use alloc_counter::{AllocCounterSystem, count_alloc}; +use criterion::{Criterion, criterion_group, criterion_main, black_box}; +use elastic_array::core_::time::Duration; +use ethereum_types::H256; +use rand::{distributions::Uniform, Rng, seq::SliceRandom}; + +use kvdb_rocksdb::{Database, DatabaseConfig}; + +#[global_allocator] +static A: AllocCounterSystem = AllocCounterSystem; + +criterion_group!(benches, get); +criterion_main!(benches); + +/// Opens (or creates) a RocksDB database in the `benches/` folder of the crate with one column +/// family and default options. Needs manual cleanup. +fn open_db() -> Database { + let tempdir_str = "./benches/_rocksdb_bench_get"; + let cfg = DatabaseConfig::with_columns(Some(1)); + let db = Database::open(&cfg, tempdir_str).expect("rocksdb works"); + db +} + +/// Generate `n` random bytes +/- 20%. +fn n_random_bytes(n: usize) -> Vec { + let mut rng = rand::thread_rng(); + let variability: i64 = rng.gen_range(0, (n as f64 * 0.2) as i64); + let plus_or_minus: i64 = if variability % 2 == 0 { 1 } else { -1 }; + let range = Uniform::from(0..u8::max_value()); + rng.sample_iter(&range).take((n as i64 + plus_or_minus * variability) as usize).collect() +} + +/// Writes `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` keys to the DB. Keys are random, 32 bytes +/// long and values are random, 120-180 bytes long. Every `NEEDLES_TO_HAYSTACK_RATIO` keys are kept +/// and returned in a `Vec` for use to benchmark point lookup performance. As keys are sorted +/// lexicographically in the DB, and random bytes are used, the needles are effectively random +/// points in the key set. +fn populate(db: &Database) -> io::Result> { + let mut needles = Vec::with_capacity(NEEDLES); + let mut batch = db.transaction(); + for i in 0..NEEDLES * NEEDLES_TO_HAYSTACK_RATIO { + let key = H256::random(); + if i % NEEDLES_TO_HAYSTACK_RATIO == 0 { + needles.push(key.clone()); + if i % 100_000 == 0 && i > 0{ + println!("[populate] {} keys", i); + } + } + // In ethereum keys are mostly 32 bytes and payloads ~140bytes. + batch.put(Some(0), &key.as_bytes(), &n_random_bytes(150)); + } + db.write(batch)?; + // Clear the overlay + db.flush()?; + Ok(needles) +} + +fn get(c: &mut Criterion) { + let db = open_db(); + let needles = populate(&db).expect("rocksdb works"); + + let mut total_iterations = 0; + let mut total_allocs = 0; + + c.bench_function("get key (pinned)", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + // This has no measurable impact on performance (~30ns) + let needle = needles.choose(&mut rand::thread_rng()).expect("needles is not empty"); + let _ = db.get(black_box(Some(0)), black_box(needle.as_bytes())).unwrap(); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }) + }); + println!("[get key (pinned)] total: iters={}, allocs={}; allocs per iter={:.2}", + total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 + ); +} From 566f6a315372d937f84b6eeda9dc0b24189dbb44 Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 29 Nov 2019 17:29:21 +0100 Subject: [PATCH 2/7] Document variability and how it influences allocations/iter Add iter benchmark --- kvdb-rocksdb/benches/bench_read_perf.rs | 59 ++++++++++++++++++++----- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs index 4a239aace..bc2dcc502 100644 --- a/kvdb-rocksdb/benches/bench_read_perf.rs +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -27,18 +27,18 @@ const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; use std::io; use std::time::Instant; -use alloc_counter::{AllocCounterSystem, count_alloc}; -use criterion::{Criterion, criterion_group, criterion_main, black_box}; +use alloc_counter::{count_alloc, AllocCounterSystem}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; use elastic_array::core_::time::Duration; use ethereum_types::H256; -use rand::{distributions::Uniform, Rng, seq::SliceRandom}; +use rand::{distributions::Uniform, seq::SliceRandom, Rng}; use kvdb_rocksdb::{Database, DatabaseConfig}; #[global_allocator] static A: AllocCounterSystem = AllocCounterSystem; -criterion_group!(benches, get); +criterion_group!(benches, get, iter); criterion_main!(benches); /// Opens (or creates) a RocksDB database in the `benches/` folder of the crate with one column @@ -51,6 +51,8 @@ fn open_db() -> Database { } /// Generate `n` random bytes +/- 20%. +/// The variability in the payload size lets us simulate payload allocation patterns: `DBValue` is +/// an `ElasticArray128` so sometimes we save on allocations. fn n_random_bytes(n: usize) -> Vec { let mut rng = rand::thread_rng(); let variability: i64 = rng.gen_range(0, (n as f64 * 0.2) as i64); @@ -71,12 +73,12 @@ fn populate(db: &Database) -> io::Result> { let key = H256::random(); if i % NEEDLES_TO_HAYSTACK_RATIO == 0 { needles.push(key.clone()); - if i % 100_000 == 0 && i > 0{ + if i % 100_000 == 0 && i > 0 { println!("[populate] {} keys", i); } } // In ethereum keys are mostly 32 bytes and payloads ~140bytes. - batch.put(Some(0), &key.as_bytes(), &n_random_bytes(150)); + batch.put(Some(0), &key.as_bytes(), &n_random_bytes(140)); } db.write(batch)?; // Clear the overlay @@ -91,7 +93,7 @@ fn get(c: &mut Criterion) { let mut total_iterations = 0; let mut total_allocs = 0; - c.bench_function("get key (pinned)", |b| { + c.bench_function("get key", |b| { b.iter_custom(|iterations| { total_iterations += iterations; let mut elapsed = Duration::new(0, 0); @@ -106,9 +108,44 @@ fn get(c: &mut Criterion) { }); total_allocs += alloc_stats.0; elapsed - }) + }); }); - println!("[get key (pinned)] total: iters={}, allocs={}; allocs per iter={:.2}", - total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 - ); + if total_iterations > 0 { + println!( + "[get key] total: iterations={}, allocations={}; allocations per iter={:.2}", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } +} + +fn iter(c: &mut Criterion) { + let db = open_db(); + let mut total_iterations = 0; + let mut total_allocs = 0; + + c.bench_function("iterate over 1k keys", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + black_box(db.iter(Some(0)).take(1000).collect::>()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 / 1000.0 + ); + } } From 570b68fa8bf4257cc52c01d67cde078f51842f6e Mon Sep 17 00:00:00 2001 From: David Palm Date: Fri, 29 Nov 2019 18:17:23 +0100 Subject: [PATCH 3/7] Add benchmarks for get_by_prefix and getting a single item off an iterator --- kvdb-rocksdb/benches/bench_read_perf.rs | 62 +++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs index bc2dcc502..95e892422 100644 --- a/kvdb-rocksdb/benches/bench_read_perf.rs +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -19,7 +19,7 @@ //! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get 1 //! million keys; ideally the db should be deleted for each benchmark run but in practice it has //! little impact on the performance numbers for these small database sizes. -//! Allocations on the Rust side are counted and printed. +//! Allocations (on the Rust side) are counted and printed. const NEEDLES: usize = 10_000; const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; @@ -102,7 +102,7 @@ fn get(c: &mut Criterion) { for _ in 0..iterations { // This has no measurable impact on performance (~30ns) let needle = needles.choose(&mut rand::thread_rng()).expect("needles is not empty"); - let _ = db.get(black_box(Some(0)), black_box(needle.as_bytes())).unwrap(); + black_box(db.get(Some(0), needle.as_bytes()).unwrap()); } elapsed = start.elapsed(); }); @@ -112,7 +112,35 @@ fn get(c: &mut Criterion) { }); if total_iterations > 0 { println!( - "[get key] total: iterations={}, allocations={}; allocations per iter={:.2}", + "[get key] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } + + total_iterations = 0; + total_allocs = 0; + c.bench_function("get key by prefix", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + // This has no measurable impact on performance (~30ns) + let needle = needles.choose(&mut rand::thread_rng()).expect("needles is not empty"); + black_box(db.get_by_prefix(Some(0), &needle.as_bytes()[..8]).unwrap()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[get key by prefix] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 @@ -142,10 +170,36 @@ fn iter(c: &mut Criterion) { }); if total_iterations > 0 { println!( - "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}", + "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, total_allocs as f64 / total_iterations as f64 / 1000.0 ); } + + total_allocs = 0; + total_iterations = 0; + c.bench_function("single key from iterator", |b| { + b.iter_custom(|iterations| { + total_iterations += iterations; + let mut elapsed = Duration::new(0, 0); + let (alloc_stats, _) = count_alloc(|| { + let start = Instant::now(); + for _ in 0..iterations { + black_box(db.iter(Some(0)).next().unwrap()); + } + elapsed = start.elapsed(); + }); + total_allocs += alloc_stats.0; + elapsed + }); + }); + if total_iterations > 0 { + println!( + "[single key from iterator] total: iterations={}, allocations={}; allocations per iter={:.2}\n", + total_iterations, + total_allocs, + total_allocs as f64 / total_iterations as f64 + ); + } } From e839dd1e9a3d6d3368092de031e49328e782e26d Mon Sep 17 00:00:00 2001 From: David Palm Date: Sat, 30 Nov 2019 08:37:21 +0100 Subject: [PATCH 4/7] Bump alloc_counter --- kvdb-rocksdb/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvdb-rocksdb/Cargo.toml b/kvdb-rocksdb/Cargo.toml index 8f46b9873..2049da4ba 100644 --- a/kvdb-rocksdb/Cargo.toml +++ b/kvdb-rocksdb/Cargo.toml @@ -24,7 +24,7 @@ rocksdb = { version = "0.13", features = ["snappy"], default-features = false } owning_ref = "0.4.0" [dev-dependencies] -alloc_counter = "0.0.3" +alloc_counter = "0.0.4" criterion = "0.3" ethereum-types = { version = "0.8.0", path = "../ethereum-types" } rand = "0.7.2" From 18a064d96425bb3060d3fec3fc47a19dfc934e32 Mon Sep 17 00:00:00 2001 From: David Palm Date: Wed, 4 Dec 2019 08:49:22 +0100 Subject: [PATCH 5/7] More docs and notes --- kvdb-rocksdb/benches/bench_read_perf.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs index 95e892422..7e1327903 100644 --- a/kvdb-rocksdb/benches/bench_read_perf.rs +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -16,10 +16,14 @@ //! Benchmark RocksDB read performance. //! The benchmark setup consists in writing `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` 32-bytes random -//! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get 1 +//! keys with random values 150 +/- 30 bytes long. With 10 000 keys and a ratio of 100 we get one //! million keys; ideally the db should be deleted for each benchmark run but in practice it has //! little impact on the performance numbers for these small database sizes. //! Allocations (on the Rust side) are counted and printed. +//! +//! Note that this benchmark is not a good way to measure the performance of the database itself; +//! its purpose is to be a tool to gauge the performance of the glue code, or work as a starting point +//! for a more elaborate benchmark of a specific workload. const NEEDLES: usize = 10_000; const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; @@ -61,11 +65,11 @@ fn n_random_bytes(n: usize) -> Vec { rng.sample_iter(&range).take((n as i64 + plus_or_minus * variability) as usize).collect() } -/// Writes `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` keys to the DB. Keys are random, 32 bytes -/// long and values are random, 120-180 bytes long. Every `NEEDLES_TO_HAYSTACK_RATIO` keys are kept -/// and returned in a `Vec` for use to benchmark point lookup performance. As keys are sorted -/// lexicographically in the DB, and random bytes are used, the needles are effectively random -/// points in the key set. +/// Writes `NEEDLES * NEEDLES_TO_HAYSTACK_RATIO` keys to the DB. Keys are random, 32 bytes long and +/// values are random, 120-180 bytes long. Every `NEEDLES_TO_HAYSTACK_RATIO` keys are kept and +/// returned in a `Vec` for and used to benchmark point lookup performance. Keys are sorted +/// lexicographically in the DB, and the benchmark keys are random bytes making the needles are +/// effectively random points in the key set. fn populate(db: &Database) -> io::Result> { let mut needles = Vec::with_capacity(NEEDLES); let mut batch = db.transaction(); @@ -97,6 +101,7 @@ fn get(c: &mut Criterion) { b.iter_custom(|iterations| { total_iterations += iterations; let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc(|| { let start = Instant::now(); for _ in 0..iterations { @@ -125,6 +130,7 @@ fn get(c: &mut Criterion) { b.iter_custom(|iterations| { total_iterations += iterations; let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc(|| { let start = Instant::now(); for _ in 0..iterations { @@ -157,6 +163,7 @@ fn iter(c: &mut Criterion) { b.iter_custom(|iterations| { total_iterations += iterations; let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc(|| { let start = Instant::now(); for _ in 0..iterations { @@ -173,7 +180,7 @@ fn iter(c: &mut Criterion) { "[iterate over 1k keys] total: iterations={}, allocations={}; allocations per iter={:.2}\n", total_iterations, total_allocs, - total_allocs as f64 / total_iterations as f64 / 1000.0 + total_allocs as f64 / total_iterations as f64 ); } @@ -183,6 +190,7 @@ fn iter(c: &mut Criterion) { b.iter_custom(|iterations| { total_iterations += iterations; let mut elapsed = Duration::new(0, 0); + // NOTE: counts allocations on the Rust side only let (alloc_stats, _) = count_alloc(|| { let start = Instant::now(); for _ in 0..iterations { From 3823c229d1b03b6b7e6a7b21993363fa1adcf9c6 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 10 Dec 2019 19:49:02 +0100 Subject: [PATCH 6/7] review grumble --- kvdb-rocksdb/benches/bench_read_perf.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs index 7e1327903..c40f0bb60 100644 --- a/kvdb-rocksdb/benches/bench_read_perf.rs +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -29,11 +29,10 @@ const NEEDLES: usize = 10_000; const NEEDLES_TO_HAYSTACK_RATIO: usize = 100; use std::io; -use std::time::Instant; +use std::time::{Duration, Instant}; use alloc_counter::{count_alloc, AllocCounterSystem}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use elastic_array::core_::time::Duration; use ethereum_types::H256; use rand::{distributions::Uniform, seq::SliceRandom, Rng}; From 1434984a433a83ac25a38e875ebe0b1ac1182a0b Mon Sep 17 00:00:00 2001 From: David Date: Tue, 10 Dec 2019 19:50:18 +0100 Subject: [PATCH 7/7] review grumbles --- kvdb-rocksdb/benches/bench_read_perf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kvdb-rocksdb/benches/bench_read_perf.rs b/kvdb-rocksdb/benches/bench_read_perf.rs index c40f0bb60..350fdf2eb 100644 --- a/kvdb-rocksdb/benches/bench_read_perf.rs +++ b/kvdb-rocksdb/benches/bench_read_perf.rs @@ -58,7 +58,7 @@ fn open_db() -> Database { /// an `ElasticArray128` so sometimes we save on allocations. fn n_random_bytes(n: usize) -> Vec { let mut rng = rand::thread_rng(); - let variability: i64 = rng.gen_range(0, (n as f64 * 0.2) as i64); + let variability: i64 = rng.gen_range(0, (n / 5) as i64); let plus_or_minus: i64 = if variability % 2 == 0 { 1 } else { -1 }; let range = Uniform::from(0..u8::max_value()); rng.sample_iter(&range).take((n as i64 + plus_or_minus * variability) as usize).collect()