Skip to content

Commit

Permalink
chore(db): microbenchmarking for table serialization and db insertion (
Browse files Browse the repository at this point in the history
…#513)

Co-authored-by: Georgios Konstantopoulos <[email protected]>
  • Loading branch information
joshieDo and gakonst authored Jan 31, 2023
1 parent 6ef4882 commit cc43b72
Show file tree
Hide file tree
Showing 19 changed files with 722 additions and 64 deletions.
6 changes: 6 additions & 0 deletions .github/scripts/compare_iai.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

# This script should be run on the main branch, after running the iai benchmarks on the target branch.

# If the main branch has a better iai performance, exits in error. It ignores L2 differences, since they seem hard to stabilize across runs.
cargo bench --package reth-db --bench iai | tee /dev/tty | awk '/((L1)|(Ins)|(RAM)|(Est))+.*\(\+[1-9]+[0-9]*\..*%\)/{f=1} END{exit f}'
40 changes: 40 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,43 @@ jobs:
- uses: Swatinem/rust-cache@v2
- name: Check if documentation builds
run: RUSTDOCFLAGS="-D warnings" cargo doc --all --no-deps --all-features --document-private-items

benchmarks:
# Pin to `20.04` instead of `ubuntu-latest`, until ubuntu-latest migration is complete
# See also <https://github.com/foundry-rs/foundry/issues/3827>
runs-on: ubuntu-20.04
steps:
- name: Install Valgrind
run: |
sudo apt install valgrind
- name: Checkout PR sources
uses: actions/checkout@v3
with:
ref: main

- uses: Swatinem/rust-cache@v1
with:
cache-on-failure: true

- name: Generate test-vectors
uses: actions-rs/cargo@v1
with:
command: run
args: --bin reth -- test-vectors tables

- name: Set main baseline
uses: actions-rs/cargo@v1
with:
command: bench
args: --package reth-db --bench iai

- name: Checkout main sources
uses: actions/checkout@v3
with:
clean: false

- name: Compare PR benchmark
shell: 'script -q -e -c "bash {0}"' # required to workaround /dev/tty not being available
run: |
./.github/scripts/compare_iai.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ target/

# Generated by MacOS
.DS_Store

# Generated test-vectors for DB
testdata/micro/db
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion bin/reth/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ readme = "README.md"

[dependencies]
# reth
reth-primitives = { path = "../../crates/primitives" }
reth-primitives = { path = "../../crates/primitives", features = ["arbitrary"] }
reth-db = {path = "../../crates/storage/db", features = ["mdbx", "test-utils"] }
# TODO: Temporary use of the test-utils feature
reth-provider = { path = "../../crates/storage/provider", features = ["test-utils"] }
Expand Down Expand Up @@ -44,6 +44,9 @@ metrics = "0.20.1"
metrics-exporter-prometheus = { version = "0.11.0", features = ["http-listener"] }
metrics-util = "0.14.0"

# test vectors generation
proptest = "1.0"

# misc
eyre = "0.6.8"
clap = { version = "4.0", features = ["derive", "cargo"] }
Expand Down
6 changes: 5 additions & 1 deletion bin/reth/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use crate::{
db,
dirs::{LogsDir, PlatformPath},
node, p2p, stage, test_eth_chain,
node, p2p, stage, test_eth_chain, test_vectors,
};
use clap::{ArgAction, Args, Parser, Subcommand};
use reth_tracing::{
Expand All @@ -25,6 +25,7 @@ pub async fn run() -> eyre::Result<()> {
Commands::Db(command) => command.execute().await,
Commands::Stage(command) => command.execute().await,
Commands::P2P(command) => command.execute().await,
Commands::TestVectors(command) => command.execute().await,
}
}

Expand All @@ -51,6 +52,9 @@ pub enum Commands {
/// Run Ethereum blockchain tests
#[command(name = "test-chain")]
TestEthChain(test_eth_chain::Command),
/// Generate Test Vectors
#[command(name = "test-vectors")]
TestVectors(test_vectors::Command),
}

#[derive(Parser)]
Expand Down
1 change: 1 addition & 0 deletions bin/reth/src/db/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ impl Command {
BlockBodies,
BlockOmmers,
TxHashNumber,
PlainStorageState,
PlainAccountState,
BlockTransitionIndex,
TxTransitionIndex,
Expand Down
1 change: 1 addition & 0 deletions bin/reth/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod p2p;
pub mod prometheus_exporter;
pub mod stage;
pub mod test_eth_chain;
pub mod test_vectors;
pub use reth_staged_sync::utils;

use clap::Args;
Expand Down
33 changes: 33 additions & 0 deletions bin/reth/src/test_vectors/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//! Command for generating test vectors.
use clap::{Parser, Subcommand};

mod tables;

/// Generate test-vectors for different data types.
#[derive(Debug, Parser)]
pub struct Command {
#[clap(subcommand)]
command: Subcommands,
}

#[derive(Subcommand, Debug)]
/// `reth test-vectors` subcommands
pub enum Subcommands {
/// Generates test vectors for specified tables. If no table is specified, generate for all.
Tables {
/// List of table names. Case-sensitive.
names: Vec<String>,
},
}

impl Command {
/// Execute the command
pub async fn execute(self) -> eyre::Result<()> {
match self.command {
Subcommands::Tables { names } => {
tables::generate_vectors(names)?;
}
}
Ok(())
}
}
170 changes: 170 additions & 0 deletions bin/reth/src/test_vectors/tables.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
use std::collections::HashSet;

use eyre::Result;
use proptest::{
arbitrary::Arbitrary,
prelude::{any_with, ProptestConfig},
strategy::{Strategy, ValueTree},
test_runner::TestRunner,
};
use reth_db::{
table::{DupSort, Table},
tables,
};
use tracing::error;

const VECTORS_FOLDER: &str = "testdata/micro/db";
const PER_TABLE: usize = 1000;

/// Generates test vectors for specified `tables`. If list is empty, then generate for all tables.
pub(crate) fn generate_vectors(mut tables: Vec<String>) -> Result<()> {
let mut runner = TestRunner::new(ProptestConfig::default());
std::fs::create_dir_all(VECTORS_FOLDER)?;

macro_rules! generate_vector {
($table_type:ident, $per_table:expr, TABLE) => {
generate_table_vector::<tables::$table_type>(&mut runner, $per_table)?;
};
($table_type:ident, $per_table:expr, DUPSORT) => {
generate_dupsort_vector::<tables::$table_type>(&mut runner, $per_table)?;
};
}

macro_rules! generate {
([$(($table_type:ident, $per_table:expr, $table_or_dup:tt)),*]) => {
let all_tables = vec![$(stringify!($table_type).to_string(),)*];

if tables.is_empty() {
tables = all_tables;
}

for table in tables {
match table.as_str() {
$(
stringify!($table_type) => {
println!("Generating test vectors for {} <{}>.", stringify!($table_or_dup), tables::$table_type::NAME);

generate_vector!($table_type, $per_table, $table_or_dup);
},
)*
_ => {
error!(target: "reth::cli", "Unknown table: {}", table);
}
}
}
}
}

generate!([
(CanonicalHeaders, PER_TABLE, TABLE),
(HeaderTD, PER_TABLE, TABLE),
(HeaderNumbers, PER_TABLE, TABLE),
(Headers, PER_TABLE, TABLE),
(BlockBodies, PER_TABLE, TABLE),
(BlockOmmers, 100, TABLE),
(TxHashNumber, PER_TABLE, TABLE),
(BlockTransitionIndex, PER_TABLE, TABLE),
(TxTransitionIndex, PER_TABLE, TABLE),
(Transactions, 100, TABLE),
(PlainStorageState, PER_TABLE, DUPSORT),
(PlainAccountState, PER_TABLE, TABLE)
]);

Ok(())
}

/// Generates test-vectors for normal tables. Keys are sorted and not repeated.
fn generate_table_vector<T: Table>(runner: &mut TestRunner, per_table: usize) -> Result<()>
where
T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash,
T::Value: Arbitrary + serde::Serialize,
{
let mut rows = vec![];
let mut seen_keys = HashSet::new();
let strat = proptest::collection::vec(
any_with::<(T::Key, T::Value)>((
<T::Key as Arbitrary>::Parameters::default(),
<T::Value as Arbitrary>::Parameters::default(),
)),
per_table - rows.len(),
)
.no_shrink()
.boxed();

while rows.len() < per_table {
// Generate all `per_table` rows: (Key, Value)
rows.extend(
&mut strat
.new_tree(runner)
.map_err(|e| eyre::eyre!("{e}"))?
.current()
.into_iter()
.filter(|e| seen_keys.insert(e.0.clone())),
);
}
// Sort them by `Key`
rows.sort_by(|a, b| a.0.cmp(&b.0));

save_to_file::<T>(rows)
}

/// Generates test-vectors for DUPSORT tables. Each key has multiple (subkey, value). Keys and
/// subkeys are sorted.
fn generate_dupsort_vector<T: Table>(runner: &mut TestRunner, per_table: usize) -> Result<()>
where
T: DupSort,
T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash,
T::Value: Arbitrary + serde::Serialize + Ord,
{
let mut rows = vec![];

// We want to control our repeated keys
let mut seen_keys = HashSet::new();

let strat_values = proptest::collection::vec(
any_with::<T::Value>(<T::Value as Arbitrary>::Parameters::default()),
100..300,
)
.no_shrink()
.boxed();

let strat_keys =
any_with::<T::Key>(<T::Key as Arbitrary>::Parameters::default()).no_shrink().boxed();

while rows.len() < per_table {
let key: T::Key = strat_keys.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();

if !seen_keys.insert(key.clone()) {
continue
}

let mut values: Vec<T::Value> =
strat_values.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();

values.sort();

for value in values {
rows.push((key.clone(), value));
}
}

// Sort them by `Key`
rows.sort_by(|a, b| a.0.cmp(&b.0));

save_to_file::<T>(rows)
}

/// Save rows to file.
fn save_to_file<T: Table>(rows: Vec<(T::Key, T::Value)>) -> eyre::Result<()>
where
T::Key: serde::Serialize,
T::Value: serde::Serialize,
{
serde_json::to_writer_pretty(
std::io::BufWriter::new(
std::fs::File::create(format!("{VECTORS_FOLDER}/{}.json", T::NAME)).unwrap(),
),
&rows,
)
.map_err(|e| eyre::eyre!({ e }))
}
7 changes: 4 additions & 3 deletions crates/primitives/src/storage.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use super::{H256, U256};
use reth_codecs::Compact;
use serde::Serialize;
use reth_codecs::{derive_arbitrary, Compact};
use serde::{Deserialize, Serialize};

/// Account storage entry.
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize)]
#[derive_arbitrary(compact)]
#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
pub struct StorageEntry {
/// Storage key.
pub key: H256,
Expand Down
12 changes: 8 additions & 4 deletions crates/storage/db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@ arbitrary = { version = "1.1.7", features = ["derive"] }
proptest = { version = "1.0" }
proptest-derive = "0.3"

serde_json = "1.0"

paste = "1.0"


[features]
default = ["mdbx"]
test-utils = ["tempfile"]
test-utils = ["tempfile", "arbitrary"]
bench-postcard = ["bench"]
mdbx = ["reth-libmdbx"]
bench = []
Expand All @@ -79,9 +83,9 @@ arbitrary = [
]

[[bench]]
name = "encoding_crit"
name = "criterion"
harness = false

[[bench]]
name = "encoding_iai"
harness = false
name = "iai"
harness = false
Loading

0 comments on commit cc43b72

Please sign in to comment.