From 8a6e9ef189cf6f00f986486e00523a679cd107e2 Mon Sep 17 00:00:00 2001 From: Andrei Sandu <54316454+sandreim@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:57:17 +0200 Subject: [PATCH] Introduce subsystem benchmarking tool (#2528) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This tool makes it easy to run parachain consensus stress/performance testing on your development machine or in CI. ## Motivation The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or `dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard to orchestrate and is a huge development time sink. ## PR contents - CLI tool - Data Availability Read test - reusable mockups and components needed so far - Documentation on how to get started ### Data Availability Read test An overseer is built with using a real `availability-recovery` susbsytem instance while dependent subsystems like `av-store`, `network-bridge` and `runtime-api` are mocked. The network bridge will emulate all the network peers and their answering to requests. The test is going to be run for a number of blocks. For each block it will generate send a “RecoverAvailableData” request for an arbitrary number of candidates. We wait for the subsystem to respond to all requests before moving to the next block. At the same time we collect the usual subsystem metrics and task CPU metrics and show some nice progress reports while running. ### Here is how the CLI looks like: ``` [2023-11-28T13:06:27Z INFO subsystem_bench::core::display] n_validators = 1000, n_cores = 20, pov_size = 5120 - 5120, error = 3, latency = Some(PeerLatency { min_latency: 1ms, max_latency: 100ms }) [2023-11-28T13:06:27Z INFO subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880 [2023-11-28T13:06:27Z INFO subsystem-bench::availability] Created test environment. [2023-11-28T13:06:27Z INFO subsystem-bench::availability] Pre-generating 60 candidates. [2023-11-28T13:06:30Z INFO subsystem-bench::core] Initializing network emulation for 1000 peers. [2023-11-28T13:06:30Z INFO subsystem-bench::availability] Current block 1/3 [2023-11-28T13:06:30Z INFO substrate_prometheus_endpoint] 〽️ Prometheus exporter started at 127.0.0.1:9999 [2023-11-28T13:06:30Z INFO subsystem_bench::availability] 20 recoveries pending [2023-11-28T13:06:37Z INFO subsystem_bench::availability] Block time 6262ms [2023-11-28T13:06:37Z INFO subsystem-bench::availability] Sleeping till end of block (0ms) [2023-11-28T13:06:37Z INFO subsystem-bench::availability] Current block 2/3 [2023-11-28T13:06:37Z INFO subsystem_bench::availability] 20 recoveries pending [2023-11-28T13:06:43Z INFO subsystem_bench::availability] Block time 6369ms [2023-11-28T13:06:43Z INFO subsystem-bench::availability] Sleeping till end of block (0ms) [2023-11-28T13:06:43Z INFO subsystem-bench::availability] Current block 3/3 [2023-11-28T13:06:43Z INFO subsystem_bench::availability] 20 recoveries pending [2023-11-28T13:06:49Z INFO subsystem_bench::availability] Block time 6194ms [2023-11-28T13:06:49Z INFO subsystem-bench::availability] Sleeping till end of block (0ms) [2023-11-28T13:06:49Z INFO subsystem_bench::availability] All blocks processed in 18829ms [2023-11-28T13:06:49Z INFO subsystem_bench::availability] Throughput: 102400 KiB/block [2023-11-28T13:06:49Z INFO subsystem_bench::availability] Block time: 6276 ms [2023-11-28T13:06:49Z INFO subsystem_bench::availability] Total received from network: 415 MiB Total sent to network: 724 KiB Total subsystem CPU usage 24.00s CPU usage per block 8.00s Total test environment CPU usage 0.15s CPU usage per block 0.05s ``` ### Prometheus/Grafana stack in action Screenshot 2023-11-28 at 15 11 10 Screenshot 2023-11-28 at 15 12 01 Screenshot 2023-11-28 at 15 12 38 --------- Signed-off-by: Andrei Sandu --- Cargo.lock | 92 +- Cargo.toml | 1 + .../network/availability-recovery/Cargo.toml | 4 + .../network/availability-recovery/src/lib.rs | 13 +- .../availability-recovery/src/metrics.rs | 17 +- .../network/availability-recovery/src/task.rs | 3 +- .../availability-recovery/src/tests.rs | 29 +- polkadot/node/overseer/src/lib.rs | 2 + polkadot/node/subsystem-bench/Cargo.toml | 61 + polkadot/node/subsystem-bench/README.md | 216 ++ .../examples/availability_read.yaml | 57 + .../grafana/availability-read.json | 1874 +++++++++++++++++ .../grafana/task-cpu-usage.json | 755 +++++++ .../subsystem-bench/src/availability/cli.rs | 37 + .../subsystem-bench/src/availability/mod.rs | 339 +++ polkadot/node/subsystem-bench/src/cli.rs | 60 + .../subsystem-bench/src/core/configuration.rs | 262 +++ .../node/subsystem-bench/src/core/display.rs | 191 ++ .../subsystem-bench/src/core/environment.rs | 333 +++ .../node/subsystem-bench/src/core/keyring.rs | 40 + .../subsystem-bench/src/core/mock/av_store.rs | 137 ++ .../subsystem-bench/src/core/mock/dummy.rs | 98 + .../node/subsystem-bench/src/core/mock/mod.rs | 77 + .../src/core/mock/network_bridge.rs | 323 +++ .../src/core/mock/runtime_api.rs | 110 + polkadot/node/subsystem-bench/src/core/mod.rs | 24 + .../node/subsystem-bench/src/core/network.rs | 485 +++++ .../subsystem-bench/src/subsystem-bench.rs | 186 ++ .../node/subsystem-test-helpers/Cargo.toml | 3 + .../node/subsystem-test-helpers/src/lib.rs | 31 + .../node/subsystem-test-helpers/src/mock.rs | 7 +- 31 files changed, 5829 insertions(+), 38 deletions(-) create mode 100644 polkadot/node/subsystem-bench/Cargo.toml create mode 100644 polkadot/node/subsystem-bench/README.md create mode 100644 polkadot/node/subsystem-bench/examples/availability_read.yaml create mode 100644 polkadot/node/subsystem-bench/grafana/availability-read.json create mode 100644 polkadot/node/subsystem-bench/grafana/task-cpu-usage.json create mode 100644 polkadot/node/subsystem-bench/src/availability/cli.rs create mode 100644 polkadot/node/subsystem-bench/src/availability/mod.rs create mode 100644 polkadot/node/subsystem-bench/src/cli.rs create mode 100644 polkadot/node/subsystem-bench/src/core/configuration.rs create mode 100644 polkadot/node/subsystem-bench/src/core/display.rs create mode 100644 polkadot/node/subsystem-bench/src/core/environment.rs create mode 100644 polkadot/node/subsystem-bench/src/core/keyring.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/av_store.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/dummy.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/mod.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs create mode 100644 polkadot/node/subsystem-bench/src/core/mod.rs create mode 100644 polkadot/node/subsystem-bench/src/core/network.rs create mode 100644 polkadot/node/subsystem-bench/src/subsystem-bench.rs diff --git a/Cargo.lock b/Cargo.lock index 6d976452e4c3..40e2d9ebf3da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2534,6 +2534,15 @@ dependencies = [ "clap_derive 4.4.7", ] +[[package]] +name = "clap-num" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488557e97528174edaa2ee268b23a809e0c598213a4bbcb4f34575a46fda147e" +dependencies = [ + "num-traits", +] + [[package]] name = "clap_builder" version = "4.4.11" @@ -2747,6 +2756,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +dependencies = [ + "is-terminal", + "lazy_static", + "windows-sys 0.48.0", +] + [[package]] name = "comfy-table" version = "7.0.1" @@ -11922,6 +11942,7 @@ dependencies = [ "sp-core", "sp-keyring", "thiserror", + "tokio", "tracing-gum", ] @@ -12651,6 +12672,8 @@ dependencies = [ "async-trait", "futures", "parking_lot 0.12.1", + "polkadot-erasure-coding", + "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-util", "polkadot-primitives", @@ -13266,6 +13289,52 @@ dependencies = [ "sp-core", ] +[[package]] +name = "polkadot-subsystem-bench" +version = "1.0.0" +dependencies = [ + "assert_matches", + "async-trait", + "clap 4.4.11", + "clap-num", + "color-eyre", + "colored", + "env_logger 0.9.3", + "futures", + "futures-timer", + "itertools 0.11.0", + "log", + "orchestra", + "parity-scale-codec", + "paste", + "polkadot-availability-recovery", + "polkadot-erasure-coding", + "polkadot-node-metrics", + "polkadot-node-network-protocol", + "polkadot-node-primitives", + "polkadot-node-subsystem", + "polkadot-node-subsystem-test-helpers", + "polkadot-node-subsystem-types", + "polkadot-node-subsystem-util", + "polkadot-overseer", + "polkadot-primitives", + "polkadot-primitives-test-helpers", + "prometheus", + "rand 0.8.5", + "sc-keystore", + "sc-network", + "sc-service", + "serde", + "serde_yaml", + "sp-application-crypto", + "sp-core", + "sp-keyring", + "sp-keystore", + "substrate-prometheus-endpoint", + "tokio", + "tracing-gum", +] + [[package]] name = "polkadot-test-client" version = "1.0.0" @@ -16739,6 +16808,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cc7a1570e38322cfe4154732e5110f887ea57e22b76f4bfd32b5bdd3368666c" +dependencies = [ + "indexmap 2.0.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "serial_test" version = "2.0.0" @@ -19417,9 +19499,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.32.0" +version = "1.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" +checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" dependencies = [ "backtrace", "bytes", @@ -20027,6 +20109,12 @@ dependencies = [ "subtle 2.4.1", ] +[[package]] +name = "unsafe-libyaml" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" + [[package]] name = "unsigned-varint" version = "0.7.1" diff --git a/Cargo.toml b/Cargo.toml index 0091c2d7c8be..e3b6e3aadfe5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,6 +151,7 @@ members = [ "polkadot/node/primitives", "polkadot/node/service", "polkadot/node/subsystem", + "polkadot/node/subsystem-bench", "polkadot/node/subsystem-test-helpers", "polkadot/node/subsystem-types", "polkadot/node/subsystem-util", diff --git a/polkadot/node/network/availability-recovery/Cargo.toml b/polkadot/node/network/availability-recovery/Cargo.toml index b97572181b0b..063d75275ca5 100644 --- a/polkadot/node/network/availability-recovery/Cargo.toml +++ b/polkadot/node/network/availability-recovery/Cargo.toml @@ -11,6 +11,7 @@ workspace = true [dependencies] futures = "0.3.21" +tokio = "1.24.2" schnellru = "0.2.1" rand = "0.8.5" fatality = "0.0.6" @@ -40,3 +41,6 @@ sc-network = { path = "../../../../substrate/client/network" } polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" } polkadot-primitives-test-helpers = { path = "../../../primitives/test-helpers" } + +[features] +subsystem-benchmarks = [] diff --git a/polkadot/node/network/availability-recovery/src/lib.rs b/polkadot/node/network/availability-recovery/src/lib.rs index 4a658449f09c..fb8064878f4f 100644 --- a/polkadot/node/network/availability-recovery/src/lib.rs +++ b/polkadot/node/network/availability-recovery/src/lib.rs @@ -65,7 +65,7 @@ mod error; mod futures_undead; mod metrics; mod task; -use metrics::Metrics; +pub use metrics::Metrics; #[cfg(test)] mod tests; @@ -603,7 +603,8 @@ impl AvailabilityRecoverySubsystem { } } - async fn run(self, mut ctx: Context) -> SubsystemResult<()> { + /// Starts the inner subsystem loop. + pub async fn run(self, mut ctx: Context) -> SubsystemResult<()> { let mut state = State::default(); let Self { mut req_receiver, @@ -681,6 +682,7 @@ impl AvailabilityRecoverySubsystem { &mut state, signal, ).await? { + gum::debug!(target: LOG_TARGET, "subsystem concluded"); return Ok(()); } FromOrchestra::Communication { msg } => { @@ -845,12 +847,17 @@ async fn erasure_task_thread( let _ = sender.send(maybe_data); }, None => { - gum::debug!( + gum::trace!( target: LOG_TARGET, "Erasure task channel closed. Node shutting down ?", ); break }, } + + // In benchmarks this is a very hot loop not yielding at all. + // To update CPU metrics for the task we need to yield. + #[cfg(feature = "subsystem-benchmarks")] + tokio::task::yield_now().await; } } diff --git a/polkadot/node/network/availability-recovery/src/metrics.rs b/polkadot/node/network/availability-recovery/src/metrics.rs index aa7216739507..d82a8f9ae5fa 100644 --- a/polkadot/node/network/availability-recovery/src/metrics.rs +++ b/polkadot/node/network/availability-recovery/src/metrics.rs @@ -29,7 +29,10 @@ struct MetricsInner { /// /// Gets incremented on each sent chunk requests. chunk_requests_issued: Counter, - + /// Total number of bytes recovered + /// + /// Gets incremented on each succesful recovery + recovered_bytes_total: Counter, /// A counter for finished chunk requests. /// /// Split by result: @@ -133,9 +136,10 @@ impl Metrics { } /// A full recovery succeeded. - pub fn on_recovery_succeeded(&self) { + pub fn on_recovery_succeeded(&self, bytes: usize) { if let Some(metrics) = &self.0 { - metrics.full_recoveries_finished.with_label_values(&["success"]).inc() + metrics.full_recoveries_finished.with_label_values(&["success"]).inc(); + metrics.recovered_bytes_total.inc_by(bytes as u64) } } @@ -171,6 +175,13 @@ impl metrics::Metrics for Metrics { )?, registry, )?, + recovered_bytes_total: prometheus::register( + Counter::new( + "polkadot_parachain_availability_recovery_bytes_total", + "Total number of bytes recovered", + )?, + registry, + )?, chunk_requests_finished: prometheus::register( CounterVec::new( Opts::new( diff --git a/polkadot/node/network/availability-recovery/src/task.rs b/polkadot/node/network/availability-recovery/src/task.rs index f705d5c0e4cf..c300c221da5c 100644 --- a/polkadot/node/network/availability-recovery/src/task.rs +++ b/polkadot/node/network/availability-recovery/src/task.rs @@ -23,6 +23,7 @@ use crate::{ PostRecoveryCheck, LOG_TARGET, }; use futures::{channel::oneshot, SinkExt}; +use parity_scale_codec::Encode; #[cfg(not(test))] use polkadot_node_network_protocol::request_response::CHUNK_REQUEST_TIMEOUT; use polkadot_node_network_protocol::request_response::{ @@ -432,7 +433,7 @@ where return Err(err) }, Ok(data) => { - self.params.metrics.on_recovery_succeeded(); + self.params.metrics.on_recovery_succeeded(data.encoded_size()); return Ok(data) }, } diff --git a/polkadot/node/network/availability-recovery/src/tests.rs b/polkadot/node/network/availability-recovery/src/tests.rs index 63ccf0e94f91..1cb52757bac9 100644 --- a/polkadot/node/network/availability-recovery/src/tests.rs +++ b/polkadot/node/network/availability-recovery/src/tests.rs @@ -24,12 +24,12 @@ use parity_scale_codec::Encode; use polkadot_node_network_protocol::request_response::{ self as req_res, IncomingRequest, Recipient, ReqProtocolNames, Requests, }; +use polkadot_node_subsystem_test_helpers::derive_erasure_chunks_with_proofs_and_root; use super::*; use sc_network::{config::RequestResponseConfig, IfDisconnected, OutboundFailure, RequestFailure}; -use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; use polkadot_node_primitives::{BlockData, PoV, Proof}; use polkadot_node_subsystem::messages::{ AllMessages, NetworkBridgeTxMessage, RuntimeApiMessage, RuntimeApiRequest, @@ -456,33 +456,6 @@ fn validator_authority_id(val_ids: &[Sr25519Keyring]) -> Vec), -) -> (Vec, Hash) { - let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); - - for (i, chunk) in chunks.iter_mut().enumerate() { - alter_chunk(i, chunk) - } - - // create proofs for each erasure chunk - let branches = branches(chunks.as_ref()); - - let root = branches.root(); - let erasure_chunks = branches - .enumerate() - .map(|(index, (proof, chunk))| ErasureChunk { - chunk: chunk.to_vec(), - index: ValidatorIndex(index as _), - proof: Proof::try_from(proof).unwrap(), - }) - .collect::>(); - - (erasure_chunks, root) -} - impl Default for TestState { fn default() -> Self { let validators = vec![ diff --git a/polkadot/node/overseer/src/lib.rs b/polkadot/node/overseer/src/lib.rs index da99546a44f7..f4eddf1f41ce 100644 --- a/polkadot/node/overseer/src/lib.rs +++ b/polkadot/node/overseer/src/lib.rs @@ -276,6 +276,7 @@ impl From> for BlockInfo { /// An event from outside the overseer scope, such /// as the substrate framework or user interaction. +#[derive(Debug)] pub enum Event { /// A new block was imported. /// @@ -300,6 +301,7 @@ pub enum Event { } /// Some request from outer world. +#[derive(Debug)] pub enum ExternalRequest { /// Wait for the activation of a particular hash /// and be notified by means of the return channel. diff --git a/polkadot/node/subsystem-bench/Cargo.toml b/polkadot/node/subsystem-bench/Cargo.toml new file mode 100644 index 000000000000..08d1a31adf55 --- /dev/null +++ b/polkadot/node/subsystem-bench/Cargo.toml @@ -0,0 +1,61 @@ +[package] +name = "polkadot-subsystem-bench" +description = "Subsystem performance benchmark client" +version = "1.0.0" +authors.workspace = true +edition.workspace = true +license.workspace = true +readme = "README.md" +publish = false + +[[bin]] +name = "subsystem-bench" +path = "src/subsystem-bench.rs" + +# Prevent rustdoc error. Already documented from top-level Cargo.toml. +doc = false + +[dependencies] +polkadot-node-subsystem = { path = "../subsystem" } +polkadot-node-subsystem-util = { path = "../subsystem-util" } +polkadot-node-subsystem-types = { path = "../subsystem-types" } +polkadot-node-primitives = { path = "../primitives" } +polkadot-primitives = { path = "../../primitives" } +polkadot-node-network-protocol = { path = "../network/protocol" } +polkadot-availability-recovery = { path = "../network/availability-recovery", features = ["subsystem-benchmarks"] } +color-eyre = { version = "0.6.1", default-features = false } +polkadot-overseer = { path = "../overseer" } +colored = "2.0.4" +assert_matches = "1.5" +async-trait = "0.1.57" +sp-keystore = { path = "../../../substrate/primitives/keystore" } +sc-keystore = { path = "../../../substrate/client/keystore" } +sp-core = { path = "../../../substrate/primitives/core" } +clap = { version = "4.4.6", features = ["derive"] } +futures = "0.3.21" +futures-timer = "3.0.2" +gum = { package = "tracing-gum", path = "../gum" } +polkadot-erasure-coding = { package = "polkadot-erasure-coding", path = "../../erasure-coding" } +log = "0.4.17" +env_logger = "0.9.0" +rand = "0.8.5" +parity-scale-codec = { version = "3.6.1", features = ["derive", "std"] } +tokio = "1.24.2" +clap-num = "1.0.2" +polkadot-node-subsystem-test-helpers = { path = "../subsystem-test-helpers" } +sp-keyring = { path = "../../../substrate/primitives/keyring" } +sp-application-crypto = { path = "../../../substrate/primitives/application-crypto" } +sc-network = { path = "../../../substrate/client/network" } +sc-service = { path = "../../../substrate/client/service" } +polkadot-node-metrics = { path = "../metrics" } +itertools = "0.11.0" +polkadot-primitives-test-helpers = { path = "../../primitives/test-helpers" } +prometheus_endpoint = { package = "substrate-prometheus-endpoint", path = "../../../substrate/utils/prometheus" } +prometheus = { version = "0.13.0", default-features = false } +serde = "1.0.192" +serde_yaml = "0.9" +paste = "1.0.14" +orchestra = { version = "0.3.3", default-features = false, features = ["futures_channel"] } + +[features] +default = [] diff --git a/polkadot/node/subsystem-bench/README.md b/polkadot/node/subsystem-bench/README.md new file mode 100644 index 000000000000..21844853334b --- /dev/null +++ b/polkadot/node/subsystem-bench/README.md @@ -0,0 +1,216 @@ +# Subsystem benchmark client + +Run parachain consensus stress and performance tests on your development machine. + +## Motivation + +The parachain consensus node implementation spans across many modules which we call subsystems. Each subsystem is +responsible for a small part of logic of the parachain consensus pipeline, but in general the most load and +performance issues are localized in just a few core subsystems like `availability-recovery`, `approval-voting` or +`dispute-coordinator`. In the absence of such a tool, we would run large test nets to load/stress test these parts of +the system. Setting up and making sense of the amount of data produced by such a large test is very expensive, hard +to orchestrate and is a huge development time sink. + +This tool aims to solve the problem by making it easy to: + +- set up and run core subsystem load tests locally on your development machine +- iterate and conclude faster when benchmarking new optimizations or comparing implementations +- automate and keep track of performance regressions in CI runs +- simulate various networking topologies, bandwidth and connectivity issues + +## Test environment setup + +`cargo build --profile=testnet --bin subsystem-bench -p polkadot-subsystem-bench` + +The output binary will be placed in `target/testnet/subsystem-bench`. + +### Test metrics + +Subsystem, CPU usage and network metrics are exposed via a prometheus endpoint during the test execution. +A small subset of these collected metrics are displayed in the CLI, but for an in depth analysys of the test results, +a local Grafana/Prometheus stack is needed. + +### Install Prometheus + +Please follow the [official installation guide](https://prometheus.io/docs/prometheus/latest/installation/) for your +platform/OS. + +After succesfully installing and starting up Prometheus, we need to alter it's configuration such that it +will scrape the benchmark prometheus endpoint `127.0.0.1:9999`. Please check the prometheus official documentation +regarding the location of `prometheus.yml`. On MacOS for example the full path `/opt/homebrew/etc/prometheus.yml` + +prometheus.yml: + +``` +global: + scrape_interval: 5s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + - job_name: "subsystem-bench" + scrape_interval: 0s500ms + static_configs: + - targets: ['localhost:9999'] +``` + +To complete this step restart Prometheus server such that it picks up the new configuration. + +### Install and setup Grafana + +Follow the [installation guide](https://grafana.com/docs/grafana/latest/setup-grafana/installation/) relevant +to your operating system. + +Once you have the installation up and running, configure the local Prometheus as a data source by following +[this guide](https://grafana.com/docs/grafana/latest/datasources/prometheus/configure-prometheus-data-source/) + +#### Import dashboards + +Follow [this guide](https://grafana.com/docs/grafana/latest/dashboards/manage-dashboards/#export-and-import-dashboards) +to import the dashboards from the repository `grafana` folder. + +## How to run a test + +To run a test, you need to first choose a test objective. Currently, we support the following: + +``` +target/testnet/subsystem-bench --help +The almighty Subsystem Benchmark Tool™️ + +Usage: subsystem-bench [OPTIONS] + +Commands: + data-availability-read Benchmark availability recovery strategies + +``` + +Note: `test-sequence` is a special test objective that wraps up an arbitrary number of test objectives. It is tipically + used to run a suite of tests defined in a `yaml` file like in this [example](examples/availability_read.yaml). + +### Standard test options + +``` +Options: + --network The type of network to be emulated [default: ideal] [possible values: + ideal, healthy, degraded] + --n-cores Number of cores to fetch availability for [default: 100] + --n-validators Number of validators to fetch chunks from [default: 500] + --min-pov-size The minimum pov size in KiB [default: 5120] + --max-pov-size The maximum pov size bytes [default: 5120] + -n, --num-blocks The number of blocks the test is going to run [default: 1] + -p, --peer-bandwidth The bandwidth of simulated remote peers in KiB + -b, --bandwidth The bandwidth of our simulated node in KiB + --peer-error Simulated conection error ratio [0-100] + --peer-min-latency Minimum remote peer latency in milliseconds [0-5000] + --peer-max-latency Maximum remote peer latency in milliseconds [0-5000] + -h, --help Print help + -V, --version Print version +``` + +These apply to all test objectives, except `test-sequence` which relies on the values being specified in a file. + +### Test objectives + +Each test objective can have it's specific configuration options, in contrast with the standard test options. + +For `data-availability-read` the recovery strategy to be used is configurable. + +``` +target/testnet/subsystem-bench data-availability-read --help +Benchmark availability recovery strategies + +Usage: subsystem-bench data-availability-read [OPTIONS] + +Options: + -f, --fetch-from-backers Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU + as we don't need to re-construct from chunks. Tipically this is only faster if nodes + have enough bandwidth + -h, --help Print help +``` + +### Understanding the test configuration + +A single test configuration `TestConfiguration` struct applies to a single run of a certain test objective. + +The configuration describes the following important parameters that influence the test duration and resource +usage: + +- how many validators are on the emulated network (`n_validators`) +- how many cores per block the subsystem will have to do work on (`n_cores`) +- for how many blocks the test should run (`num_blocks`) + +From the perspective of the subsystem under test, this means that it will receive an `ActiveLeavesUpdate` signal +followed by an arbitrary amount of messages. This process repeats itself for `num_blocks`. The messages are generally +test payloads pre-generated before the test run, or constructed on pre-genereated payloads. For example the +`AvailabilityRecoveryMessage::RecoverAvailableData` message includes a `CandidateReceipt` which is generated before +the test is started. + +### Example run + +Let's run an availabilty read test which will recover availability for 10 cores with max PoV size on a 500 +node validator network. + +``` + target/testnet/subsystem-bench --n-cores 10 data-availability-read +[2023-11-28T09:01:59Z INFO subsystem_bench::core::display] n_validators = 500, n_cores = 10, pov_size = 5120 - 5120, + error = 0, latency = None +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Generating template candidate index=0 pov_size=5242880 +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Created test environment. +[2023-11-28T09:01:59Z INFO subsystem-bench::availability] Pre-generating 10 candidates. +[2023-11-28T09:02:01Z INFO subsystem-bench::core] Initializing network emulation for 500 peers. +[2023-11-28T09:02:01Z INFO substrate_prometheus_endpoint] 〽️ Prometheus exporter started at 127.0.0.1:9999 +[2023-11-28T09:02:01Z INFO subsystem-bench::availability] Current block 1/1 +[2023-11-28T09:02:01Z INFO subsystem_bench::availability] 10 recoveries pending +[2023-11-28T09:02:04Z INFO subsystem_bench::availability] Block time 3231ms +[2023-11-28T09:02:04Z INFO subsystem-bench::availability] Sleeping till end of block (2768ms) +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] All blocks processed in 6001ms +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] Throughput: 51200 KiB/block +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] Block time: 6001 ms +[2023-11-28T09:02:07Z INFO subsystem_bench::availability] + + Total received from network: 66 MiB + Total sent to network: 58 KiB + Total subsystem CPU usage 4.16s + CPU usage per block 4.16s + Total test environment CPU usage 0.00s + CPU usage per block 0.00s +``` + +`Block time` in the context of `data-availability-read` has a different meaning. It measures the amount of time it +took the subsystem to finish processing all of the messages sent in the context of the current test block. + +### Test logs + +You can select log target, subtarget and verbosity just like with Polkadot node CLI, simply setting +`RUST_LOOG="parachain=debug"` turns on debug logs for all parachain consensus subsystems in the test. + +### View test metrics + +Assuming the Grafana/Prometheus stack installation steps completed succesfully, you should be able to +view the test progress in real time by accessing [this link](http://localhost:3000/goto/SM5B8pNSR?orgId=1). + +Now run +`target/testnet/subsystem-bench test-sequence --path polkadot/node/subsystem-bench/examples/availability_read.yaml` +and view the metrics in real time and spot differences between different `n_valiator` values. + +## Create new test objectives + +This tool is intended to make it easy to write new test objectives that focus individual subsystems, +or even multiple subsystems (for example `approval-distribution` and `approval-voting`). + +A special kind of test objectives are performance regression tests for the CI pipeline. These should be sequences +of tests that check the performance characteristics (such as CPU usage, speed) of the subsystem under test in both +happy and negative scenarios (low bandwidth, network errors and low connectivity). + +### Reusable test components + +To faster write a new test objective you need to use some higher level wrappers and logic: `TestEnvironment`, +`TestConfiguration`, `TestAuthorities`, `NetworkEmulator`. To create the `TestEnvironment` you will +need to also build an `Overseer`, but that should be easy using the mockups for subsystems in`core::mock`. + +### Mocking + +Ideally we want to have a single mock implementation for subsystems that can be minimally configured to +be used in different tests. A good example is `runtime-api` which currently only responds to session information +requests based on static data. It can be easily extended to service other requests. diff --git a/polkadot/node/subsystem-bench/examples/availability_read.yaml b/polkadot/node/subsystem-bench/examples/availability_read.yaml new file mode 100644 index 000000000000..311ea972141f --- /dev/null +++ b/polkadot/node/subsystem-bench/examples/availability_read.yaml @@ -0,0 +1,57 @@ +TestConfiguration: +# Test 1 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 300 + n_cores: 20 + min_pov_size: 5120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 3 + +# Test 2 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 500 + n_cores: 20 + min_pov_size: 5120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 3 + +# Test 3 +- objective: !DataAvailabilityRead + fetch_from_backers: false + n_validators: 1000 + n_cores: 20 + min_pov_size: 5120 + max_pov_size: 5120 + peer_bandwidth: 52428800 + bandwidth: 52428800 + latency: + min_latency: + secs: 0 + nanos: 1000000 + max_latency: + secs: 0 + nanos: 100000000 + error: 3 + num_blocks: 3 diff --git a/polkadot/node/subsystem-bench/grafana/availability-read.json b/polkadot/node/subsystem-bench/grafana/availability-read.json new file mode 100644 index 000000000000..31c4ad3c7952 --- /dev/null +++ b/polkadot/node/subsystem-bench/grafana/availability-read.json @@ -0,0 +1,1874 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Subsystem and test environment metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": 60000, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 90, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_validators{}", + "instant": false, + "legendFormat": "n_vaidators", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_n_cores{}", + "hide": false, + "instant": false, + "legendFormat": "n_cores", + "range": true, + "refId": "B" + } + ], + "title": "Test configuration", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 31, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 57, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "repeat": "nodename", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[2s])) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_group}}", + "range": true, + "refId": "A" + } + ], + "title": "All tasks CPU usage breakdown", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 6 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 93, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_polling_duration_sum{task_group=\"availability-recovery\"}[6s])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Availability subsystem CPU usage per block", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 94, + "interval": "1s", + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(substrate_tasks_polling_duration_sum{}) by ($cpu_group_by)", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Total CPU burn", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$data_source" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-red", + "value": 6000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 95, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.0.2", + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "subsystem_benchmark_block_time", + "interval": "", + "legendFormat": "Instant block time", + "range": true, + "refId": "A" + } + ], + "title": "All candidates in block recovery time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 2, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 89, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_received{}[5s]))", + "instant": false, + "legendFormat": "Received", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "sum(rate(subsystem_benchmark_network_peer_total_bytes_sent{}[5s]))", + "hide": false, + "instant": false, + "legendFormat": "Sent", + "range": true, + "refId": "B" + } + ], + "title": "Emulated network throughput ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 88, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_received{}[10s])", + "instant": false, + "legendFormat": "Received by {{peer}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "editorMode": "code", + "expr": "rate(subsystem_benchmark_network_peer_total_bytes_sent{}[10s])", + "hide": false, + "instant": false, + "legendFormat": "Sent by {{peer}}", + "range": true, + "refId": "B" + } + ], + "title": "Emulated peer throughput", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 52 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 92, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "bytes" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(subsystem_benchmark_pov_size_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovered PoV sizes", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "chunks/s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 43, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_chunk_requests_issued{}[10s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Chunks requested", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Availability", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 77 + }, + "id": 35, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Availability subystem metrics", + "type": "row" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 68, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_total_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Time to recover a PoV", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 78 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 67, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_chunk_request_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Chunk request duration", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "bitfields", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 85, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "rate(polkadot_parachain_availability_recovery_bytes_total{}[30s])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Bytes recovered", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Recovery throughtput", + "transformations": [], + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 88 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 84, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_reencode_chunks_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Re-encoding chunks timing", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateInferno", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 98 + }, + "heatmap": {}, + "hideZeroBuckets": true, + "highlightCards": true, + "id": 83, + "interval": "1s", + "legend": { + "show": true + }, + "maxDataPoints": 1340, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": { + "decimals": 0 + }, + "color": { + "exponent": 0.5, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Inferno", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": true + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.1.1", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(polkadot_parachain_availability_recovery_time_erasure_recovery_bucket{}[$__rate_interval])) by (le)", + "format": "heatmap", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{le}}", + "queryType": "randomWalk", + "refId": "B" + } + ], + "title": "Erasure recovery (no I/O)", + "tooltip": { + "show": true, + "showHistogram": true + }, + "tooltipDecimals": 0, + "transformations": [], + "type": "heatmap", + "xAxis": { + "show": true + }, + "yAxis": { + "decimals": 0, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Number of erasure-encoded chunks of data belonging to candidate blocks. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic", + "seriesBy": "max" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 108 + }, + "id": 86, + "interval": "1s", + "maxDataPoints": 1340, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recoveries_finished{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Finished", + "queryType": "randomWalk", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(polkadot_parachain_availability_recovery_recovieries_started{}[1s]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Started", + "queryType": "randomWalk", + "refId": "A" + } + ], + "title": "Recoveries", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 118 + }, + "id": 2, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Approval voting", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 38, + "style": "dark", + "tags": [ + "subsystem", + "benchmark" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "hide": 0, + "includeAll": false, + "label": "Source of data", + "multi": false, + "name": "data_source", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "task_name", + "value": "task_name" + }, + "description": "Sum CPU usage by task name or task group.", + "hide": 0, + "includeAll": false, + "label": "Group CPU usage", + "multi": false, + "name": "cpu_group_by", + "options": [ + { + "selected": true, + "text": "task_name", + "value": "task_name" + }, + { + "selected": false, + "text": "task_group", + "value": "task_group" + } + ], + "query": "task_name, task_group", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "2023-11-28T13:05:32.794Z", + "to": "2023-11-28T13:06:56.173Z" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s" + ] + }, + "timezone": "utc", + "title": "Data Availability Read", + "uid": "asdadasd1", + "version": 58, + "weekStart": "" +} \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/grafana/task-cpu-usage.json b/polkadot/node/subsystem-bench/grafana/task-cpu-usage.json new file mode 100644 index 000000000000..90763444abf1 --- /dev/null +++ b/polkadot/node/subsystem-bench/grafana/task-cpu-usage.json @@ -0,0 +1,755 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:326", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "$$hashKey": "object:327", + "datasource": { + "uid": "$data_source" + }, + "enable": true, + "expr": "increase(${metric_namespace}_tasks_ended_total{reason=\"panic\", node=~\"${nodename}\"}[10m])", + "hide": true, + "iconColor": "rgba(255, 96, 96, 1)", + "limit": 100, + "name": "Task panics", + "rawQuery": "SELECT\n extract(epoch from time_column) AS time,\n text_column as text,\n tags_column as tags\nFROM\n metric_table\nWHERE\n $__timeFilter(time_column)\n", + "showIn": 0, + "step": "10m", + "tags": [], + "textFormat": "{{node}} - {{task_name}}", + "titleFormat": "Panic!", + "type": "tags" + }, + { + "$$hashKey": "object:621", + "datasource": { + "uid": "$data_source" + }, + "enable": true, + "expr": "changes(${metric_namespace}_process_start_time_seconds{node=~\"${nodename}\"}[10m])", + "hide": false, + "iconColor": "#8AB8FF", + "name": "Node reboots", + "showIn": 0, + "step": "10m", + "textFormat": "{{node}}", + "titleFormat": "Reboots" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 29, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Tasks", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 11, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "sum(rate(substrate_tasks_polling_duration_sum{}[$__rate_interval])) by (task_name)", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU time spent on each task", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2721", + "format": "percentunit", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2722", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 30, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "rate(substrate_tasks_polling_duration_count{}[$__rate_interval])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Task polling rate per second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2571", + "format": "cps", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2572", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 43, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_polling_duration_sum{}[$__rate_interval]) / increase(substrate_tasks_polling_duration_count{}[$__rate_interval])", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Average time it takes to call Future::poll()", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2571", + "format": "s", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:2572", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 15, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "increase(substrate_tasks_spawned_total{}[$__rate_interval])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of tasks started", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:771", + "format": "short", + "logBase": 10, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:772", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 2, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "substrate_tasks_spawned_total{} - sum(substrate_tasks_ended_total{}) without(reason)\n\n# Fallback if tasks_ended_total is null for that task\nor on(task_name) substrate_tasks_spawned_total{}", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of tasks running", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:919", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:920", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "e56e7dd2-a992-4eec-aa96-e47b21c9020b" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 7, + "interval": "1s", + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.1.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": "nodename", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": true, + "targets": [ + { + "datasource": { + "uid": "$data_source" + }, + "editorMode": "code", + "expr": "irate(substrate_tasks_polling_duration_bucket{le=\"+Inf\"}[$__rate_interval])\n - ignoring(le)\n irate(substrate_tasks_polling_duration_bucket{le=\"1.024\"}[$__rate_interval]) > 0", + "interval": "", + "legendFormat": "{{task_name}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Number of calls to `Future::poll` that took more than one second", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3040", + "format": "cps", + "label": "Calls to `Future::poll`/second", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3041", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 27, + "panels": [], + "targets": [ + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "refId": "A" + } + ], + "title": "Unbounded Channels", + "type": "row" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "utc", + "title": "Substrate Service Tasks with substrate prefix", + "uid": "S7sc-M_Gk", + "version": 17, + "weekStart": "" + } \ No newline at end of file diff --git a/polkadot/node/subsystem-bench/src/availability/cli.rs b/polkadot/node/subsystem-bench/src/availability/cli.rs new file mode 100644 index 000000000000..65df8c1552aa --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/cli.rs @@ -0,0 +1,37 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use serde::{Deserialize, Serialize}; + +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] +#[value(rename_all = "kebab-case")] +#[non_exhaustive] +pub enum NetworkEmulation { + Ideal, + Healthy, + Degraded, +} + +#[derive(Debug, Clone, Serialize, Deserialize, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct DataAvailabilityReadOptions { + #[clap(short, long, default_value_t = false)] + /// Turbo boost AD Read by fetching the full availability datafrom backers first. Saves CPU as + /// we don't need to re-construct from chunks. Tipically this is only faster if nodes have + /// enough bandwidth. + pub fetch_from_backers: bool, +} diff --git a/polkadot/node/subsystem-bench/src/availability/mod.rs b/polkadot/node/subsystem-bench/src/availability/mod.rs new file mode 100644 index 000000000000..7c81b9313659 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/availability/mod.rs @@ -0,0 +1,339 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +use itertools::Itertools; +use std::{collections::HashMap, iter::Cycle, ops::Sub, sync::Arc, time::Instant}; + +use crate::TestEnvironment; +use polkadot_node_subsystem::{Overseer, OverseerConnector, SpawnGlue}; +use polkadot_node_subsystem_test_helpers::derive_erasure_chunks_with_proofs_and_root; +use polkadot_overseer::Handle as OverseerHandle; +use sc_network::request_responses::ProtocolConfig; + +use colored::Colorize; + +use futures::{channel::oneshot, stream::FuturesUnordered, StreamExt}; +use polkadot_node_metrics::metrics::Metrics; + +use polkadot_availability_recovery::AvailabilityRecoverySubsystem; + +use crate::GENESIS_HASH; +use parity_scale_codec::Encode; +use polkadot_node_network_protocol::request_response::{IncomingRequest, ReqProtocolNames}; +use polkadot_node_primitives::{BlockData, PoV}; +use polkadot_node_subsystem::messages::{AllMessages, AvailabilityRecoveryMessage}; + +use crate::core::{ + environment::TestEnvironmentDependencies, + mock::{ + av_store, + network_bridge::{self, MockNetworkBridgeTx, NetworkAvailabilityState}, + runtime_api, MockAvailabilityStore, MockRuntimeApi, + }, +}; + +use super::core::{configuration::TestConfiguration, mock::dummy_builder, network::*}; + +const LOG_TARGET: &str = "subsystem-bench::availability"; + +use polkadot_node_primitives::{AvailableData, ErasureChunk}; + +use super::{cli::TestObjective, core::mock::AlwaysSupportsParachains}; +use polkadot_node_subsystem_test_helpers::mock::new_block_import_info; +use polkadot_primitives::{ + CandidateHash, CandidateReceipt, GroupIndex, Hash, HeadData, PersistedValidationData, +}; +use polkadot_primitives_test_helpers::{dummy_candidate_receipt, dummy_hash}; +use sc_service::SpawnTaskHandle; + +mod cli; +pub use cli::{DataAvailabilityReadOptions, NetworkEmulation}; + +fn build_overseer( + spawn_task_handle: SpawnTaskHandle, + runtime_api: MockRuntimeApi, + av_store: MockAvailabilityStore, + network_bridge: MockNetworkBridgeTx, + availability_recovery: AvailabilityRecoverySubsystem, +) -> (Overseer, AlwaysSupportsParachains>, OverseerHandle) { + let overseer_connector = OverseerConnector::with_event_capacity(64000); + let dummy = dummy_builder!(spawn_task_handle); + let builder = dummy + .replace_runtime_api(|_| runtime_api) + .replace_availability_store(|_| av_store) + .replace_network_bridge_tx(|_| network_bridge) + .replace_availability_recovery(|_| availability_recovery); + + let (overseer, raw_handle) = + builder.build_with_connector(overseer_connector).expect("Should not fail"); + + (overseer, OverseerHandle::new(raw_handle)) +} + +/// Takes a test configuration and uses it to creates the `TestEnvironment`. +pub fn prepare_test( + config: TestConfiguration, + state: &mut TestState, +) -> (TestEnvironment, ProtocolConfig) { + prepare_test_inner(config, state, TestEnvironmentDependencies::default()) +} + +fn prepare_test_inner( + config: TestConfiguration, + state: &mut TestState, + dependencies: TestEnvironmentDependencies, +) -> (TestEnvironment, ProtocolConfig) { + // Generate test authorities. + let test_authorities = config.generate_authorities(); + + let runtime_api = runtime_api::MockRuntimeApi::new(config.clone(), test_authorities.clone()); + + let av_store = + av_store::MockAvailabilityStore::new(state.chunks.clone(), state.candidate_hashes.clone()); + + let availability_state = NetworkAvailabilityState { + candidate_hashes: state.candidate_hashes.clone(), + available_data: state.available_data.clone(), + chunks: state.chunks.clone(), + }; + + let network = NetworkEmulator::new(&config, &dependencies, &test_authorities); + + let network_bridge_tx = network_bridge::MockNetworkBridgeTx::new( + config.clone(), + availability_state, + network.clone(), + ); + + let use_fast_path = match &state.config().objective { + TestObjective::DataAvailabilityRead(options) => options.fetch_from_backers, + _ => panic!("Unexpected objective"), + }; + + let (collation_req_receiver, req_cfg) = + IncomingRequest::get_config_receiver(&ReqProtocolNames::new(GENESIS_HASH, None)); + + let subsystem = if use_fast_path { + AvailabilityRecoverySubsystem::with_fast_path( + collation_req_receiver, + Metrics::try_register(&dependencies.registry).unwrap(), + ) + } else { + AvailabilityRecoverySubsystem::with_chunks_only( + collation_req_receiver, + Metrics::try_register(&dependencies.registry).unwrap(), + ) + }; + + let (overseer, overseer_handle) = build_overseer( + dependencies.task_manager.spawn_handle(), + runtime_api, + av_store, + network_bridge_tx, + subsystem, + ); + + (TestEnvironment::new(dependencies, config, network, overseer, overseer_handle), req_cfg) +} + +#[derive(Clone)] +pub struct TestState { + // Full test configuration + config: TestConfiguration, + // A cycle iterator on all PoV sizes used in the test. + pov_sizes: Cycle>, + // Generated candidate receipts to be used in the test + candidates: Cycle>, + // Map from pov size to candidate index + pov_size_to_candidate: HashMap, + // Map from generated candidate hashes to candidate index in `available_data` + // and `chunks`. + candidate_hashes: HashMap, + // Per candidate index receipts. + candidate_receipt_templates: Vec, + // Per candidate index `AvailableData` + available_data: Vec, + // Per candiadte index chunks + chunks: Vec>, +} + +impl TestState { + fn config(&self) -> &TestConfiguration { + &self.config + } + + pub fn next_candidate(&mut self) -> Option { + let candidate = self.candidates.next(); + let candidate_hash = candidate.as_ref().unwrap().hash(); + gum::trace!(target: LOG_TARGET, "Next candidate selected {:?}", candidate_hash); + candidate + } + + /// Generate candidates to be used in the test. + fn generate_candidates(&mut self) { + let count = self.config.n_cores * self.config.num_blocks; + gum::info!(target: LOG_TARGET,"{}", format!("Pre-generating {} candidates.", count).bright_blue()); + + // Generate all candidates + self.candidates = (0..count) + .map(|index| { + let pov_size = self.pov_sizes.next().expect("This is a cycle; qed"); + let candidate_index = *self + .pov_size_to_candidate + .get(&pov_size) + .expect("pov_size always exists; qed"); + let mut candidate_receipt = + self.candidate_receipt_templates[candidate_index].clone(); + + // Make it unique. + candidate_receipt.descriptor.relay_parent = Hash::from_low_u64_be(index as u64); + // Store the new candidate in the state + self.candidate_hashes.insert(candidate_receipt.hash(), candidate_index); + + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_receipt.hash(), "new candidate"); + + candidate_receipt + }) + .collect::>() + .into_iter() + .cycle(); + } + + pub fn new(config: &TestConfiguration) -> Self { + let config = config.clone(); + + let mut chunks = Vec::new(); + let mut available_data = Vec::new(); + let mut candidate_receipt_templates = Vec::new(); + let mut pov_size_to_candidate = HashMap::new(); + + // we use it for all candidates. + let persisted_validation_data = PersistedValidationData { + parent_head: HeadData(vec![7, 8, 9]), + relay_parent_number: Default::default(), + max_pov_size: 1024, + relay_parent_storage_root: Default::default(), + }; + + // For each unique pov we create a candidate receipt. + for (index, pov_size) in config.pov_sizes().iter().cloned().unique().enumerate() { + gum::info!(target: LOG_TARGET, index, pov_size, "{}", "Generating template candidate".bright_blue()); + + let mut candidate_receipt = dummy_candidate_receipt(dummy_hash()); + let pov = PoV { block_data: BlockData(vec![index as u8; pov_size]) }; + + let new_available_data = AvailableData { + validation_data: persisted_validation_data.clone(), + pov: Arc::new(pov), + }; + + let (new_chunks, erasure_root) = derive_erasure_chunks_with_proofs_and_root( + config.n_validators, + &new_available_data, + |_, _| {}, + ); + + candidate_receipt.descriptor.erasure_root = erasure_root; + + chunks.push(new_chunks); + available_data.push(new_available_data); + pov_size_to_candidate.insert(pov_size, index); + candidate_receipt_templates.push(candidate_receipt); + } + + let pov_sizes = config.pov_sizes().to_owned(); + let pov_sizes = pov_sizes.into_iter().cycle(); + gum::info!(target: LOG_TARGET, "{}","Created test environment.".bright_blue()); + + let mut _self = Self { + config, + available_data, + candidate_receipt_templates, + chunks, + pov_size_to_candidate, + pov_sizes, + candidate_hashes: HashMap::new(), + candidates: Vec::new().into_iter().cycle(), + }; + + _self.generate_candidates(); + _self + } +} + +pub async fn benchmark_availability_read(env: &mut TestEnvironment, mut state: TestState) { + let config = env.config().clone(); + + env.import_block(new_block_import_info(Hash::repeat_byte(1), 1)).await; + + let start_marker = Instant::now(); + let mut batch = FuturesUnordered::new(); + let mut availability_bytes = 0u128; + + env.metrics().set_n_validators(config.n_validators); + env.metrics().set_n_cores(config.n_cores); + + for block_num in 0..env.config().num_blocks { + gum::info!(target: LOG_TARGET, "Current block {}/{}", block_num + 1, env.config().num_blocks); + env.metrics().set_current_block(block_num); + + let block_start_ts = Instant::now(); + for candidate_num in 0..config.n_cores as u64 { + let candidate = + state.next_candidate().expect("We always send up to n_cores*num_blocks; qed"); + let (tx, rx) = oneshot::channel(); + batch.push(rx); + + let message = AllMessages::AvailabilityRecovery( + AvailabilityRecoveryMessage::RecoverAvailableData( + candidate.clone(), + 1, + Some(GroupIndex( + candidate_num as u32 % (std::cmp::max(5, config.n_cores) / 5) as u32, + )), + tx, + ), + ); + env.send_message(message).await; + } + + gum::info!("{}", format!("{} recoveries pending", batch.len()).bright_black()); + while let Some(completed) = batch.next().await { + let available_data = completed.unwrap().unwrap(); + env.metrics().on_pov_size(available_data.encoded_size()); + availability_bytes += available_data.encoded_size() as u128; + } + + let block_time = Instant::now().sub(block_start_ts).as_millis() as u64; + env.metrics().set_block_time(block_time); + gum::info!("All work for block completed in {}", format!("{:?}ms", block_time).cyan()); + } + + let duration: u128 = start_marker.elapsed().as_millis(); + let availability_bytes = availability_bytes / 1024; + gum::info!("All blocks processed in {}", format!("{:?}ms", duration).cyan()); + gum::info!( + "Throughput: {}", + format!("{} KiB/block", availability_bytes / env.config().num_blocks as u128).bright_red() + ); + gum::info!( + "Block time: {}", + format!("{} ms", start_marker.elapsed().as_millis() / env.config().num_blocks as u128) + .red() + ); + + gum::info!("{}", &env); + env.stop().await; +} diff --git a/polkadot/node/subsystem-bench/src/cli.rs b/polkadot/node/subsystem-bench/src/cli.rs new file mode 100644 index 000000000000..3352f33a3503 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/cli.rs @@ -0,0 +1,60 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +use super::availability::DataAvailabilityReadOptions; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct TestSequenceOptions { + #[clap(short, long, ignore_case = true)] + pub path: String, +} + +/// Define the supported benchmarks targets +#[derive(Debug, Clone, clap::Parser, Serialize, Deserialize)] +#[command(rename_all = "kebab-case")] +pub enum TestObjective { + /// Benchmark availability recovery strategies. + DataAvailabilityRead(DataAvailabilityReadOptions), + /// Run a test sequence specified in a file + TestSequence(TestSequenceOptions), +} + +#[derive(Debug, clap::Parser)] +#[clap(rename_all = "kebab-case")] +#[allow(missing_docs)] +pub struct StandardTestOptions { + #[clap(long, ignore_case = true, default_value_t = 100)] + /// Number of cores to fetch availability for. + pub n_cores: usize, + + #[clap(long, ignore_case = true, default_value_t = 500)] + /// Number of validators to fetch chunks from. + pub n_validators: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The minimum pov size in KiB + pub min_pov_size: usize, + + #[clap(long, ignore_case = true, default_value_t = 5120)] + /// The maximum pov size bytes + pub max_pov_size: usize, + + #[clap(short, long, ignore_case = true, default_value_t = 1)] + /// The number of blocks the test is going to run. + pub num_blocks: usize, +} diff --git a/polkadot/node/subsystem-bench/src/core/configuration.rs b/polkadot/node/subsystem-bench/src/core/configuration.rs new file mode 100644 index 000000000000..164addb51900 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/configuration.rs @@ -0,0 +1,262 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +// +//! Test configuration definition and helpers. +use super::*; +use keyring::Keyring; +use std::{path::Path, time::Duration}; + +pub use crate::cli::TestObjective; +use polkadot_primitives::{AuthorityDiscoveryId, ValidatorId}; +use rand::{distributions::Uniform, prelude::Distribution, thread_rng}; +use serde::{Deserialize, Serialize}; + +pub fn random_pov_size(min_pov_size: usize, max_pov_size: usize) -> usize { + random_uniform_sample(min_pov_size, max_pov_size) +} + +fn random_uniform_sample + From>(min_value: T, max_value: T) -> T { + Uniform::from(min_value.into()..=max_value.into()) + .sample(&mut thread_rng()) + .into() +} + +/// Peer response latency configuration. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct PeerLatency { + /// Min latency for `NetworkAction` completion. + pub min_latency: Duration, + /// Max latency or `NetworkAction` completion. + pub max_latency: Duration, +} + +// Default PoV size in KiB. +fn default_pov_size() -> usize { + 5120 +} + +// Default bandwidth in bytes +fn default_bandwidth() -> usize { + 52428800 +} + +// Default connectivity percentage +fn default_connectivity() -> usize { + 100 +} + +/// The test input parameters +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TestConfiguration { + /// The test objective + pub objective: TestObjective, + /// Number of validators + pub n_validators: usize, + /// Number of cores + pub n_cores: usize, + /// The min PoV size + #[serde(default = "default_pov_size")] + pub min_pov_size: usize, + /// The max PoV size, + #[serde(default = "default_pov_size")] + pub max_pov_size: usize, + /// Randomly sampled pov_sizes + #[serde(skip)] + pov_sizes: Vec, + /// The amount of bandiwdth remote validators have. + #[serde(default = "default_bandwidth")] + pub peer_bandwidth: usize, + /// The amount of bandiwdth our node has. + #[serde(default = "default_bandwidth")] + pub bandwidth: usize, + /// Optional peer emulation latency + #[serde(default)] + pub latency: Option, + /// Error probability, applies to sending messages to the emulated network peers + #[serde(default)] + pub error: usize, + /// Connectivity ratio, the percentage of peers we are not connected to, but ar part of + /// the topology. + #[serde(default = "default_connectivity")] + pub connectivity: usize, + /// Number of blocks to run the test for + pub num_blocks: usize, +} + +fn generate_pov_sizes(count: usize, min_kib: usize, max_kib: usize) -> Vec { + (0..count).map(|_| random_pov_size(min_kib * 1024, max_kib * 1024)).collect() +} + +#[derive(Serialize, Deserialize)] +pub struct TestSequence { + #[serde(rename(serialize = "TestConfiguration", deserialize = "TestConfiguration"))] + test_configurations: Vec, +} + +impl TestSequence { + pub fn into_vec(self) -> Vec { + self.test_configurations + .into_iter() + .map(|mut config| { + config.pov_sizes = + generate_pov_sizes(config.n_cores, config.min_pov_size, config.max_pov_size); + config + }) + .collect() + } +} + +impl TestSequence { + pub fn new_from_file(path: &Path) -> std::io::Result { + let string = String::from_utf8(std::fs::read(path)?).expect("File is valid UTF8"); + Ok(serde_yaml::from_str(&string).expect("File is valid test sequence YA")) + } +} + +/// Helper struct for authority related state. +#[derive(Clone)] +pub struct TestAuthorities { + pub keyrings: Vec, + pub validator_public: Vec, + pub validator_authority_id: Vec, +} + +impl TestConfiguration { + #[allow(unused)] + pub fn write_to_disk(&self) { + // Serialize a slice of configurations + let yaml = serde_yaml::to_string(&TestSequence { test_configurations: vec![self.clone()] }) + .unwrap(); + std::fs::write("last_test.yaml", yaml).unwrap(); + } + + pub fn pov_sizes(&self) -> &[usize] { + &self.pov_sizes + } + + /// Generates the authority keys we need for the network emulation. + pub fn generate_authorities(&self) -> TestAuthorities { + let keyrings = (0..self.n_validators) + .map(|peer_index| Keyring::new(format!("Node{}", peer_index))) + .collect::>(); + + // Generate `AuthorityDiscoveryId`` for each peer + let validator_public: Vec = keyrings + .iter() + .map(|keyring: &Keyring| keyring.clone().public().into()) + .collect::>(); + + let validator_authority_id: Vec = keyrings + .iter() + .map(|keyring| keyring.clone().public().into()) + .collect::>(); + + TestAuthorities { keyrings, validator_public, validator_authority_id } + } + + /// An unconstrained standard configuration matching Polkadot/Kusama + pub fn ideal_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + // No latency + latency: None, + error: 0, + num_blocks, + min_pov_size, + max_pov_size, + connectivity: 100, + } + } + + pub fn healthy_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + latency: Some(PeerLatency { + min_latency: Duration::from_millis(1), + max_latency: Duration::from_millis(100), + }), + error: 3, + num_blocks, + min_pov_size, + max_pov_size, + connectivity: 95, + } + } + + pub fn degraded_network( + objective: TestObjective, + num_blocks: usize, + n_validators: usize, + n_cores: usize, + min_pov_size: usize, + max_pov_size: usize, + ) -> TestConfiguration { + Self { + objective, + n_cores, + n_validators, + pov_sizes: generate_pov_sizes(n_cores, min_pov_size, max_pov_size), + bandwidth: 50 * 1024 * 1024, + peer_bandwidth: 50 * 1024 * 1024, + latency: Some(PeerLatency { + min_latency: Duration::from_millis(10), + max_latency: Duration::from_millis(500), + }), + error: 33, + num_blocks, + min_pov_size, + max_pov_size, + connectivity: 67, + } + } +} + +/// Produce a randomized duration between `min` and `max`. +pub fn random_latency(maybe_peer_latency: Option<&PeerLatency>) -> Option { + maybe_peer_latency.map(|peer_latency| { + Uniform::from(peer_latency.min_latency..=peer_latency.max_latency).sample(&mut thread_rng()) + }) +} + +/// Generate a random error based on `probability`. +/// `probability` should be a number between 0 and 100. +pub fn random_error(probability: usize) -> bool { + Uniform::from(0..=99).sample(&mut thread_rng()) < probability +} diff --git a/polkadot/node/subsystem-bench/src/core/display.rs b/polkadot/node/subsystem-bench/src/core/display.rs new file mode 100644 index 000000000000..d600cc484c14 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/display.rs @@ -0,0 +1,191 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +// +//! Display implementations and helper methods for parsing prometheus metrics +//! to a format that can be displayed in the CLI. +//! +//! Currently histogram buckets are skipped. +use super::{configuration::TestConfiguration, LOG_TARGET}; +use colored::Colorize; +use prometheus::{ + proto::{MetricFamily, MetricType}, + Registry, +}; +use std::fmt::Display; + +#[derive(Default)] +pub struct MetricCollection(Vec); + +impl From> for MetricCollection { + fn from(metrics: Vec) -> Self { + MetricCollection(metrics) + } +} + +impl MetricCollection { + pub fn all(&self) -> &Vec { + &self.0 + } + + /// Sums up all metrics with the given name in the collection + pub fn sum_by(&self, name: &str) -> f64 { + self.all() + .iter() + .filter(|metric| metric.name == name) + .map(|metric| metric.value) + .sum() + } + + pub fn subset_with_label_value(&self, label_name: &str, label_value: &str) -> MetricCollection { + self.0 + .iter() + .filter_map(|metric| { + if let Some(index) = metric.label_names.iter().position(|label| label == label_name) + { + if Some(&String::from(label_value)) == metric.label_values.get(index) { + Some(metric.clone()) + } else { + None + } + } else { + None + } + }) + .collect::>() + .into() + } +} + +impl Display for MetricCollection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f)?; + let metrics = self.all(); + for metric in metrics { + writeln!(f, "{}", metric)?; + } + Ok(()) + } +} +#[derive(Debug, Clone)] +pub struct TestMetric { + name: String, + label_names: Vec, + label_values: Vec, + value: f64, +} + +impl Display for TestMetric { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({} = {}) [{:?}, {:?}]", + self.name.cyan(), + format!("{}", self.value).white(), + self.label_names, + self.label_values + ) + } +} + +// Returns `false` if metric should be skipped. +fn check_metric_family(mf: &MetricFamily) -> bool { + if mf.get_metric().is_empty() { + gum::error!(target: LOG_TARGET, "MetricFamily has no metrics: {:?}", mf); + return false + } + if mf.get_name().is_empty() { + gum::error!(target: LOG_TARGET, "MetricFamily has no name: {:?}", mf); + return false + } + + true +} + +pub fn parse_metrics(registry: &Registry) -> MetricCollection { + let metric_families = registry.gather(); + let mut test_metrics = Vec::new(); + for mf in metric_families { + if !check_metric_family(&mf) { + continue + } + + let name: String = mf.get_name().into(); + let metric_type = mf.get_field_type(); + for m in mf.get_metric() { + let (label_names, label_values): (Vec, Vec) = m + .get_label() + .iter() + .map(|pair| (String::from(pair.get_name()), String::from(pair.get_value()))) + .unzip(); + + match metric_type { + MetricType::COUNTER => { + test_metrics.push(TestMetric { + name: name.clone(), + label_names, + label_values, + value: m.get_counter().get_value(), + }); + }, + MetricType::GAUGE => { + test_metrics.push(TestMetric { + name: name.clone(), + label_names, + label_values, + value: m.get_gauge().get_value(), + }); + }, + MetricType::HISTOGRAM => { + let h = m.get_histogram(); + let h_name = name.clone() + "_sum"; + test_metrics.push(TestMetric { + name: h_name, + label_names: label_names.clone(), + label_values: label_values.clone(), + value: h.get_sample_sum(), + }); + + let h_name = name.clone() + "_count"; + test_metrics.push(TestMetric { + name: h_name, + label_names, + label_values, + value: h.get_sample_sum(), + }); + }, + MetricType::SUMMARY => { + unimplemented!(); + }, + MetricType::UNTYPED => { + unimplemented!(); + }, + } + } + } + test_metrics.into() +} + +pub fn display_configuration(test_config: &TestConfiguration) { + gum::info!( + "{}, {}, {}, {}, {}", + format!("n_validators = {}", test_config.n_validators).blue(), + format!("n_cores = {}", test_config.n_cores).blue(), + format!("pov_size = {} - {}", test_config.min_pov_size, test_config.max_pov_size) + .bright_black(), + format!("error = {}", test_config.error).bright_black(), + format!("latency = {:?}", test_config.latency).bright_black(), + ); +} diff --git a/polkadot/node/subsystem-bench/src/core/environment.rs b/polkadot/node/subsystem-bench/src/core/environment.rs new file mode 100644 index 000000000000..247596474078 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/environment.rs @@ -0,0 +1,333 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! Test environment implementation +use crate::{ + core::{mock::AlwaysSupportsParachains, network::NetworkEmulator}, + TestConfiguration, +}; +use colored::Colorize; +use core::time::Duration; +use futures::FutureExt; +use polkadot_overseer::{BlockInfo, Handle as OverseerHandle}; + +use polkadot_node_subsystem::{messages::AllMessages, Overseer, SpawnGlue, TimeoutExt}; +use polkadot_node_subsystem_types::Hash; +use polkadot_node_subsystem_util::metrics::prometheus::{ + self, Gauge, Histogram, PrometheusError, Registry, U64, +}; + +use sc_network::peer_store::LOG_TARGET; +use sc_service::{SpawnTaskHandle, TaskManager}; +use std::{ + fmt::Display, + net::{Ipv4Addr, SocketAddr}, +}; +use tokio::runtime::Handle; + +const MIB: f64 = 1024.0 * 1024.0; + +/// Test environment/configuration metrics +#[derive(Clone)] +pub struct TestEnvironmentMetrics { + /// Number of bytes sent per peer. + n_validators: Gauge, + /// Number of received sent per peer. + n_cores: Gauge, + /// PoV size + pov_size: Histogram, + /// Current block + current_block: Gauge, + /// Current block + block_time: Gauge, +} + +impl TestEnvironmentMetrics { + pub fn new(registry: &Registry) -> Result { + let mut buckets = prometheus::exponential_buckets(16384.0, 2.0, 9) + .expect("arguments are always valid; qed"); + buckets.extend(vec![5.0 * MIB, 6.0 * MIB, 7.0 * MIB, 8.0 * MIB, 9.0 * MIB, 10.0 * MIB]); + + Ok(Self { + n_validators: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_validators", + "Total number of validators in the test", + )?, + registry, + )?, + n_cores: prometheus::register( + Gauge::new( + "subsystem_benchmark_n_cores", + "Number of cores we fetch availability for each block", + )?, + registry, + )?, + current_block: prometheus::register( + Gauge::new("subsystem_benchmark_current_block", "The current test block")?, + registry, + )?, + block_time: prometheus::register( + Gauge::new("subsystem_benchmark_block_time", "The time it takes for the target subsystems(s) to complete all the requests in a block")?, + registry, + )?, + pov_size: prometheus::register( + Histogram::with_opts( + prometheus::HistogramOpts::new( + "subsystem_benchmark_pov_size", + "The compressed size of the proof of validity of a candidate", + ) + .buckets(buckets), + )?, + registry, + )?, + }) + } + + pub fn set_n_validators(&self, n_validators: usize) { + self.n_validators.set(n_validators as u64); + } + + pub fn set_n_cores(&self, n_cores: usize) { + self.n_cores.set(n_cores as u64); + } + + pub fn set_current_block(&self, current_block: usize) { + self.current_block.set(current_block as u64); + } + + pub fn set_block_time(&self, block_time_ms: u64) { + self.block_time.set(block_time_ms); + } + + pub fn on_pov_size(&self, pov_size: usize) { + self.pov_size.observe(pov_size as f64); + } +} + +fn new_runtime() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_multi_thread() + .thread_name("subsystem-bench") + .enable_all() + .thread_stack_size(3 * 1024 * 1024) + .build() + .unwrap() +} + +/// Wrapper for dependencies +pub struct TestEnvironmentDependencies { + pub registry: Registry, + pub task_manager: TaskManager, + pub runtime: tokio::runtime::Runtime, +} + +impl Default for TestEnvironmentDependencies { + fn default() -> Self { + let runtime = new_runtime(); + let registry = Registry::new(); + let task_manager: TaskManager = + TaskManager::new(runtime.handle().clone(), Some(®istry)).unwrap(); + + Self { runtime, registry, task_manager } + } +} + +// A dummy genesis hash +pub const GENESIS_HASH: Hash = Hash::repeat_byte(0xff); + +// We use this to bail out sending messages to the subsystem if it is overloaded such that +// the time of flight is breaches 5s. +// This should eventually be a test parameter. +const MAX_TIME_OF_FLIGHT: Duration = Duration::from_millis(5000); + +/// The test environment is the high level wrapper of all things required to test +/// a certain subsystem. +/// +/// ## Mockups +/// The overseer is passed in during construction and it can host an arbitrary number of +/// real subsystems instances and the corresponding mocked instances such that the real +/// subsystems can get their messages answered. +/// +/// As the subsystem's performance depends on network connectivity, the test environment +/// emulates validator nodes on the network, see `NetworkEmulator`. The network emulation +/// is configurable in terms of peer bandwidth, latency and connection error rate using +/// uniform distribution sampling. +/// +/// +/// ## Usage +/// `TestEnvironment` is used in tests to send `Overseer` messages or signals to the subsystem +/// under test. +/// +/// ## Collecting test metrics +/// +/// ### Prometheus +/// A prometheus endpoint is exposed while the test is running. A local Prometheus instance +/// can scrape it every 1s and a Grafana dashboard is the preferred way of visualizing +/// the performance characteristics of the subsystem. +/// +/// ### CLI +/// A subset of the Prometheus metrics are printed at the end of the test. +pub struct TestEnvironment { + /// Test dependencies + dependencies: TestEnvironmentDependencies, + /// A runtime handle + runtime_handle: tokio::runtime::Handle, + /// A handle to the lovely overseer + overseer_handle: OverseerHandle, + /// The test configuration. + config: TestConfiguration, + /// A handle to the network emulator. + network: NetworkEmulator, + /// Configuration/env metrics + metrics: TestEnvironmentMetrics, +} + +impl TestEnvironment { + /// Create a new test environment + pub fn new( + dependencies: TestEnvironmentDependencies, + config: TestConfiguration, + network: NetworkEmulator, + overseer: Overseer, AlwaysSupportsParachains>, + overseer_handle: OverseerHandle, + ) -> Self { + let metrics = TestEnvironmentMetrics::new(&dependencies.registry) + .expect("Metrics need to be registered"); + + let spawn_handle = dependencies.task_manager.spawn_handle(); + spawn_handle.spawn_blocking("overseer", "overseer", overseer.run().boxed()); + + let registry_clone = dependencies.registry.clone(); + dependencies.task_manager.spawn_handle().spawn_blocking( + "prometheus", + "test-environment", + async move { + prometheus_endpoint::init_prometheus( + SocketAddr::new(std::net::IpAddr::V4(Ipv4Addr::LOCALHOST), 9999), + registry_clone, + ) + .await + .unwrap(); + }, + ); + + TestEnvironment { + runtime_handle: dependencies.runtime.handle().clone(), + dependencies, + overseer_handle, + config, + network, + metrics, + } + } + + pub fn config(&self) -> &TestConfiguration { + &self.config + } + + pub fn network(&self) -> &NetworkEmulator { + &self.network + } + + pub fn registry(&self) -> &Registry { + &self.dependencies.registry + } + + pub fn metrics(&self) -> &TestEnvironmentMetrics { + &self.metrics + } + + pub fn runtime(&self) -> Handle { + self.runtime_handle.clone() + } + + // Send a message to the subsystem under test environment. + pub async fn send_message(&mut self, msg: AllMessages) { + self.overseer_handle + .send_msg(msg, LOG_TARGET) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) + }); + } + + // Send an `ActiveLeavesUpdate` signal to all subsystems under test. + pub async fn import_block(&mut self, block: BlockInfo) { + self.overseer_handle + .block_imported(block) + .timeout(MAX_TIME_OF_FLIGHT) + .await + .unwrap_or_else(|| { + panic!("{}ms maximum time of flight breached", MAX_TIME_OF_FLIGHT.as_millis()) + }); + } + + // Stop overseer and subsystems. + pub async fn stop(&mut self) { + self.overseer_handle.stop().await; + } +} + +impl Display for TestEnvironment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let stats = self.network().stats(); + + writeln!(f, "\n")?; + writeln!( + f, + "Total received from network: {}", + format!( + "{} MiB", + stats + .iter() + .enumerate() + .map(|(_index, stats)| stats.tx_bytes_total as u128) + .sum::() / (1024 * 1024) + ) + .cyan() + )?; + writeln!( + f, + "Total sent to network: {}", + format!("{} KiB", stats[0].tx_bytes_total / (1024)).cyan() + )?; + + let test_metrics = super::display::parse_metrics(self.registry()); + let subsystem_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "availability-recovery"); + let total_cpu = subsystem_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + writeln!(f, "Total subsystem CPU usage {}", format!("{:.2}s", total_cpu).bright_purple())?; + writeln!( + f, + "CPU usage per block {}", + format!("{:.2}s", total_cpu / self.config().num_blocks as f64).bright_purple() + )?; + + let test_env_cpu_metrics = + test_metrics.subset_with_label_value("task_group", "test-environment"); + let total_cpu = test_env_cpu_metrics.sum_by("substrate_tasks_polling_duration_sum"); + writeln!( + f, + "Total test environment CPU usage {}", + format!("{:.2}s", total_cpu).bright_purple() + )?; + writeln!( + f, + "CPU usage per block {}", + format!("{:.2}s", total_cpu / self.config().num_blocks as f64).bright_purple() + ) + } +} diff --git a/polkadot/node/subsystem-bench/src/core/keyring.rs b/polkadot/node/subsystem-bench/src/core/keyring.rs new file mode 100644 index 000000000000..2d9aa348a922 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/keyring.rs @@ -0,0 +1,40 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +pub use sp_core::sr25519; +use sp_core::{ + sr25519::{Pair, Public}, + Pair as PairT, +}; +/// Set of test accounts. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Keyring { + name: String, +} + +impl Keyring { + pub fn new(name: String) -> Keyring { + Self { name } + } + + pub fn pair(self) -> Pair { + Pair::from_string(&format!("//{}", self.name), None).expect("input is always good; qed") + } + + pub fn public(self) -> Public { + self.pair().public() + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/av_store.rs b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs new file mode 100644 index 000000000000..a471230f1b3f --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/av_store.rs @@ -0,0 +1,137 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic av store subsystem mockup suitable to be used in benchmarks. + +use parity_scale_codec::Encode; +use polkadot_primitives::CandidateHash; + +use std::collections::HashMap; + +use futures::{channel::oneshot, FutureExt}; + +use polkadot_node_primitives::ErasureChunk; + +use polkadot_node_subsystem::{ + messages::AvailabilityStoreMessage, overseer, SpawnedSubsystem, SubsystemError, +}; + +use polkadot_node_subsystem_types::OverseerSignal; + +pub struct AvailabilityStoreState { + candidate_hashes: HashMap, + chunks: Vec>, +} + +const LOG_TARGET: &str = "subsystem-bench::av-store-mock"; + +/// A mock of the availability store subsystem. This one also generates all the +/// candidates that a +pub struct MockAvailabilityStore { + state: AvailabilityStoreState, +} + +impl MockAvailabilityStore { + pub fn new( + chunks: Vec>, + candidate_hashes: HashMap, + ) -> MockAvailabilityStore { + Self { state: AvailabilityStoreState { chunks, candidate_hashes } } + } + + async fn respond_to_query_all_request( + &self, + candidate_hash: CandidateHash, + send_chunk: impl Fn(usize) -> bool, + tx: oneshot::Sender>, + ) { + let candidate_index = self + .state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let v = self + .state + .chunks + .get(*candidate_index) + .unwrap() + .iter() + .filter(|c| send_chunk(c.index.0 as usize)) + .cloned() + .collect(); + + let _ = tx.send(v); + } +} + +#[overseer::subsystem(AvailabilityStore, error=SubsystemError, prefix=self::overseer)] +impl MockAvailabilityStore { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "test-environment", future } + } +} + +#[overseer::contextbounds(AvailabilityStore, prefix = self::overseer)] +impl MockAvailabilityStore { + async fn run(self, mut ctx: Context) { + gum::debug!(target: LOG_TARGET, "Subsystem running"); + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, + orchestra::FromOrchestra::Communication { msg } => match msg { + AvailabilityStoreMessage::QueryAvailableData(candidate_hash, tx) => { + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAvailableData"); + + // We never have the full available data. + let _ = tx.send(None); + }, + AvailabilityStoreMessage::QueryAllChunks(candidate_hash, tx) => { + // We always have our own chunk. + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryAllChunks"); + self.respond_to_query_all_request(candidate_hash, |index| index == 0, tx) + .await; + }, + AvailabilityStoreMessage::QueryChunkSize(candidate_hash, tx) => { + gum::debug!(target: LOG_TARGET, candidate_hash = ?candidate_hash, "Responding to QueryChunkSize"); + + let candidate_index = self + .state + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk_size = + self.state.chunks.get(*candidate_index).unwrap()[0].encoded_size(); + let _ = tx.send(Some(chunk_size)); + }, + _ => { + unimplemented!("Unexpected av-store message") + }, + }, + } + } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/dummy.rs b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs new file mode 100644 index 000000000000..0628368a49c0 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/dummy.rs @@ -0,0 +1,98 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! Dummy subsystem mocks. +use paste::paste; + +use futures::FutureExt; +use polkadot_node_subsystem::{overseer, SpawnedSubsystem, SubsystemError}; +use std::time::Duration; +use tokio::time::sleep; + +const LOG_TARGET: &str = "subsystem-bench::mockery"; + +macro_rules! mock { + // Just query by relay parent + ($subsystem_name:ident) => { + paste! { + pub struct [] {} + #[overseer::subsystem($subsystem_name, error=SubsystemError, prefix=self::overseer)] + impl [] { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + // The name will appear in substrate CPU task metrics as `task_group`.` + SpawnedSubsystem { name: "test-environment", future } + } + } + + #[overseer::contextbounds($subsystem_name, prefix = self::overseer)] + impl [] { + async fn run(self, mut ctx: Context) { + let mut count_total_msg = 0; + loop { + futures::select!{ + msg = ctx.recv().fuse() => { + match msg.unwrap() { + orchestra::FromOrchestra::Signal(signal) => { + match signal { + polkadot_node_subsystem_types::OverseerSignal::Conclude => {return}, + _ => {} + } + }, + orchestra::FromOrchestra::Communication { msg } => { + gum::debug!(target: LOG_TARGET, msg = ?msg, "mocked subsystem received message"); + } + } + + count_total_msg +=1; + } + _ = sleep(Duration::from_secs(6)).fuse() => { + if count_total_msg > 0 { + gum::trace!(target: LOG_TARGET, "Subsystem {} processed {} messages since last time", stringify!($subsystem_name), count_total_msg); + } + count_total_msg = 0; + } + } + } + } + } + } + }; +} + +mock!(AvailabilityStore); +mock!(StatementDistribution); +mock!(BitfieldSigning); +mock!(BitfieldDistribution); +mock!(Provisioner); +mock!(NetworkBridgeRx); +mock!(CollationGeneration); +mock!(CollatorProtocol); +mock!(GossipSupport); +mock!(DisputeDistribution); +mock!(DisputeCoordinator); +mock!(ProspectiveParachains); +mock!(PvfChecker); +mock!(CandidateBacking); +mock!(AvailabilityDistribution); +mock!(CandidateValidation); +mock!(AvailabilityRecovery); +mock!(NetworkBridgeTx); +mock!(ChainApi); +mock!(ChainSelection); +mock!(ApprovalVoting); +mock!(ApprovalDistribution); +mock!(RuntimeApi); diff --git a/polkadot/node/subsystem-bench/src/core/mock/mod.rs b/polkadot/node/subsystem-bench/src/core/mock/mod.rs new file mode 100644 index 000000000000..d59642e96058 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/mod.rs @@ -0,0 +1,77 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem::HeadSupportsParachains; +use polkadot_node_subsystem_types::Hash; + +pub mod av_store; +pub mod dummy; +pub mod network_bridge; +pub mod runtime_api; + +pub use av_store::*; +pub use network_bridge::*; +pub use runtime_api::*; + +pub struct AlwaysSupportsParachains {} +#[async_trait::async_trait] +impl HeadSupportsParachains for AlwaysSupportsParachains { + async fn head_supports_parachains(&self, _head: &Hash) -> bool { + true + } +} + +// An orchestra with dummy subsystems +macro_rules! dummy_builder { + ($spawn_task_handle: ident) => {{ + use super::core::mock::dummy::*; + + // Initialize a mock overseer. + // All subsystem except approval_voting and approval_distribution are mock subsystems. + Overseer::builder() + .approval_voting(MockApprovalVoting {}) + .approval_distribution(MockApprovalDistribution {}) + .availability_recovery(MockAvailabilityRecovery {}) + .candidate_validation(MockCandidateValidation {}) + .chain_api(MockChainApi {}) + .chain_selection(MockChainSelection {}) + .dispute_coordinator(MockDisputeCoordinator {}) + .runtime_api(MockRuntimeApi {}) + .network_bridge_tx(MockNetworkBridgeTx {}) + .availability_distribution(MockAvailabilityDistribution {}) + .availability_store(MockAvailabilityStore {}) + .pvf_checker(MockPvfChecker {}) + .candidate_backing(MockCandidateBacking {}) + .statement_distribution(MockStatementDistribution {}) + .bitfield_signing(MockBitfieldSigning {}) + .bitfield_distribution(MockBitfieldDistribution {}) + .provisioner(MockProvisioner {}) + .network_bridge_rx(MockNetworkBridgeRx {}) + .collation_generation(MockCollationGeneration {}) + .collator_protocol(MockCollatorProtocol {}) + .gossip_support(MockGossipSupport {}) + .dispute_distribution(MockDisputeDistribution {}) + .prospective_parachains(MockProspectiveParachains {}) + .activation_external_listeners(Default::default()) + .span_per_active_leaf(Default::default()) + .active_leaves(Default::default()) + .metrics(Default::default()) + .supports_parachains(AlwaysSupportsParachains {}) + .spawner(SpawnGlue($spawn_task_handle)) + }}; +} + +pub(crate) use dummy_builder; diff --git a/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs new file mode 100644 index 000000000000..b106b832011a --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/network_bridge.rs @@ -0,0 +1,323 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic av store subsystem mockup suitable to be used in benchmarks. + +use futures::Future; +use parity_scale_codec::Encode; +use polkadot_node_subsystem_types::OverseerSignal; +use std::{collections::HashMap, pin::Pin}; + +use futures::FutureExt; + +use polkadot_node_primitives::{AvailableData, ErasureChunk}; + +use polkadot_primitives::CandidateHash; +use sc_network::{OutboundFailure, RequestFailure}; + +use polkadot_node_subsystem::{ + messages::NetworkBridgeTxMessage, overseer, SpawnedSubsystem, SubsystemError, +}; + +use polkadot_node_network_protocol::request_response::{ + self as req_res, v1::ChunkResponse, Requests, +}; +use polkadot_primitives::AuthorityDiscoveryId; + +use crate::core::{ + configuration::{random_error, random_latency, TestConfiguration}, + network::{NetworkAction, NetworkEmulator, RateLimit}, +}; + +/// The availability store state of all emulated peers. +/// The network bridge tx mock will respond to requests as if the request is being serviced +/// by a remote peer on the network +pub struct NetworkAvailabilityState { + pub candidate_hashes: HashMap, + pub available_data: Vec, + pub chunks: Vec>, +} + +const LOG_TARGET: &str = "subsystem-bench::network-bridge-tx-mock"; + +/// A mock of the network bridge tx subsystem. +pub struct MockNetworkBridgeTx { + /// The test configurationg + config: TestConfiguration, + /// The network availability state + availabilty: NetworkAvailabilityState, + /// A network emulator instance + network: NetworkEmulator, +} + +impl MockNetworkBridgeTx { + pub fn new( + config: TestConfiguration, + availabilty: NetworkAvailabilityState, + network: NetworkEmulator, + ) -> MockNetworkBridgeTx { + Self { config, availabilty, network } + } + + fn not_connected_response( + &self, + authority_discovery_id: &AuthorityDiscoveryId, + future: Pin + Send>>, + ) -> NetworkAction { + // The network action will send the error after a random delay expires. + return NetworkAction::new( + authority_discovery_id.clone(), + future, + 0, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + } + /// Returns an `NetworkAction` corresponding to the peer sending the response. If + /// the peer is connected, the error is sent with a randomized latency as defined in + /// configuration. + fn respond_to_send_request( + &mut self, + request: Requests, + ingress_tx: &mut tokio::sync::mpsc::UnboundedSender, + ) -> NetworkAction { + let ingress_tx = ingress_tx.clone(); + + match request { + Requests::ChunkFetchingV1(outgoing_request) => { + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + // Account our sent request bytes. + self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + + // If peer is disconnected return an error + if !self.network.is_peer_connected(&authority_discovery_id) { + // We always send `NotConnected` error and we ignore `IfDisconnected` value in + // the caller. + let future = async move { + let _ = outgoing_request + .pending_response + .send(Err(RequestFailure::NotConnected)); + } + .boxed(); + return self.not_connected_response(&authority_discovery_id, future) + } + + // Account for remote received request bytes. + self.network + .peer_stats_by_id(&authority_discovery_id) + .inc_received(outgoing_request.payload.encoded_size()); + + let validator_index: usize = outgoing_request.payload.index.0 as usize; + let candidate_hash = outgoing_request.payload.candidate_hash; + + let candidate_index = self + .availabilty + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::warn!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let chunk: ChunkResponse = self.availabilty.chunks.get(*candidate_index).unwrap() + [validator_index] + .clone() + .into(); + let mut size = chunk.encoded_size(); + + let response = if random_error(self.config.error) { + // Error will not account to any bandwidth used. + size = 0; + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::ChunkFetchingResponse::from(Some(chunk)).encode()) + }; + + let authority_discovery_id_clone = authority_discovery_id.clone(); + + let future = async move { + let _ = outgoing_request.pending_response.send(response); + } + .boxed(); + + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + + NetworkAction::new( + authority_discovery_id, + future_wrapper, + size, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + }, + Requests::AvailableDataFetchingV1(outgoing_request) => { + let candidate_hash = outgoing_request.payload.candidate_hash; + let candidate_index = self + .availabilty + .candidate_hashes + .get(&candidate_hash) + .expect("candidate was generated previously; qed"); + gum::debug!(target: LOG_TARGET, ?candidate_hash, candidate_index, "Candidate mapped to index"); + + let authority_discovery_id = match outgoing_request.peer { + req_res::Recipient::Authority(authority_discovery_id) => authority_discovery_id, + _ => unimplemented!("Peer recipient not supported yet"), + }; + + // Account our sent request bytes. + self.network.peer_stats(0).inc_sent(outgoing_request.payload.encoded_size()); + + // If peer is disconnected return an error + if !self.network.is_peer_connected(&authority_discovery_id) { + let future = async move { + let _ = outgoing_request + .pending_response + .send(Err(RequestFailure::NotConnected)); + } + .boxed(); + return self.not_connected_response(&authority_discovery_id, future) + } + + // Account for remote received request bytes. + self.network + .peer_stats_by_id(&authority_discovery_id) + .inc_received(outgoing_request.payload.encoded_size()); + + let available_data = + self.availabilty.available_data.get(*candidate_index).unwrap().clone(); + + let size = available_data.encoded_size(); + + let response = if random_error(self.config.error) { + Err(RequestFailure::Network(OutboundFailure::ConnectionClosed)) + } else { + Ok(req_res::v1::AvailableDataFetchingResponse::from(Some(available_data)) + .encode()) + }; + + let future = async move { + let _ = outgoing_request.pending_response.send(response); + } + .boxed(); + + let authority_discovery_id_clone = authority_discovery_id.clone(); + + let future_wrapper = async move { + // Forward the response to the ingress channel of our node. + // On receive side we apply our node receiving rate limit. + let action = + NetworkAction::new(authority_discovery_id_clone, future, size, None); + ingress_tx.send(action).unwrap(); + } + .boxed(); + + NetworkAction::new( + authority_discovery_id, + future_wrapper, + size, + // Generate a random latency based on configuration. + random_latency(self.config.latency.as_ref()), + ) + }, + _ => panic!("received an unexpected request"), + } + } +} + +#[overseer::subsystem(NetworkBridgeTx, error=SubsystemError, prefix=self::overseer)] +impl MockNetworkBridgeTx { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "test-environment", future } + } +} + +#[overseer::contextbounds(NetworkBridgeTx, prefix = self::overseer)] +impl MockNetworkBridgeTx { + async fn run(mut self, mut ctx: Context) { + let (mut ingress_tx, mut ingress_rx) = + tokio::sync::mpsc::unbounded_channel::(); + + // Initialize our node bandwidth limits. + let mut rx_limiter = RateLimit::new(10, self.config.bandwidth); + + let our_network = self.network.clone(); + + // This task will handle node messages receipt from the simulated network. + ctx.spawn_blocking( + "network-receive", + async move { + while let Some(action) = ingress_rx.recv().await { + let size = action.size(); + + // account for our node receiving the data. + our_network.inc_received(size); + rx_limiter.reap(size).await; + action.run().await; + } + } + .boxed(), + ) + .expect("We never fail to spawn tasks"); + + // Main subsystem loop. + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, + orchestra::FromOrchestra::Communication { msg } => match msg { + NetworkBridgeTxMessage::SendRequests(requests, _if_disconnected) => { + for request in requests { + gum::debug!(target: LOG_TARGET, request = ?request, "Processing request"); + self.network.inc_sent(request_size(&request)); + let action = self.respond_to_send_request(request, &mut ingress_tx); + + // Will account for our node sending the request over the emulated + // network. + self.network.submit_peer_action(action.peer(), action); + } + }, + _ => { + unimplemented!("Unexpected network bridge message") + }, + }, + } + } + } +} + +// A helper to determine the request payload size. +fn request_size(request: &Requests) -> usize { + match request { + Requests::ChunkFetchingV1(outgoing_request) => outgoing_request.payload.encoded_size(), + Requests::AvailableDataFetchingV1(outgoing_request) => + outgoing_request.payload.encoded_size(), + _ => unimplemented!("received an unexpected request"), + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs new file mode 100644 index 000000000000..d664ebead3cc --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mock/runtime_api.rs @@ -0,0 +1,110 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +//! +//! A generic runtime api subsystem mockup suitable to be used in benchmarks. + +use polkadot_primitives::{GroupIndex, IndexedVec, SessionInfo, ValidatorIndex}; + +use polkadot_node_subsystem::{ + messages::{RuntimeApiMessage, RuntimeApiRequest}, + overseer, SpawnedSubsystem, SubsystemError, +}; +use polkadot_node_subsystem_types::OverseerSignal; + +use crate::core::configuration::{TestAuthorities, TestConfiguration}; +use futures::FutureExt; + +const LOG_TARGET: &str = "subsystem-bench::runtime-api-mock"; + +pub struct RuntimeApiState { + authorities: TestAuthorities, +} + +pub struct MockRuntimeApi { + state: RuntimeApiState, + config: TestConfiguration, +} + +impl MockRuntimeApi { + pub fn new(config: TestConfiguration, authorities: TestAuthorities) -> MockRuntimeApi { + Self { state: RuntimeApiState { authorities }, config } + } + + fn session_info(&self) -> SessionInfo { + let all_validators = (0..self.config.n_validators) + .map(|i| ValidatorIndex(i as _)) + .collect::>(); + + let validator_groups = all_validators.chunks(5).map(Vec::from).collect::>(); + + SessionInfo { + validators: self.state.authorities.validator_public.clone().into(), + discovery_keys: self.state.authorities.validator_authority_id.clone(), + validator_groups: IndexedVec::>::from(validator_groups), + assignment_keys: vec![], + n_cores: self.config.n_cores as u32, + zeroth_delay_tranche_width: 0, + relay_vrf_modulo_samples: 0, + n_delay_tranches: 0, + no_show_slots: 0, + needed_approvals: 0, + active_validator_indices: vec![], + dispute_period: 6, + random_seed: [0u8; 32], + } + } +} + +#[overseer::subsystem(RuntimeApi, error=SubsystemError, prefix=self::overseer)] +impl MockRuntimeApi { + fn start(self, ctx: Context) -> SpawnedSubsystem { + let future = self.run(ctx).map(|_| Ok(())).boxed(); + + SpawnedSubsystem { name: "test-environment", future } + } +} + +#[overseer::contextbounds(RuntimeApi, prefix = self::overseer)] +impl MockRuntimeApi { + async fn run(self, mut ctx: Context) { + loop { + let msg = ctx.recv().await.expect("Overseer never fails us"); + + match msg { + orchestra::FromOrchestra::Signal(signal) => + if signal == OverseerSignal::Conclude { + return + }, + orchestra::FromOrchestra::Communication { msg } => { + gum::debug!(target: LOG_TARGET, msg=?msg, "recv message"); + + match msg { + RuntimeApiMessage::Request( + _request, + RuntimeApiRequest::SessionInfo(_session_index, sender), + ) => { + let _ = sender.send(Ok(Some(self.session_info()))); + }, + // Long term TODO: implement more as needed. + _ => { + unimplemented!("Unexpected runtime-api message") + }, + } + }, + } + } + } +} diff --git a/polkadot/node/subsystem-bench/src/core/mod.rs b/polkadot/node/subsystem-bench/src/core/mod.rs new file mode 100644 index 000000000000..282788d143b4 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/mod.rs @@ -0,0 +1,24 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +const LOG_TARGET: &str = "subsystem-bench::core"; + +pub mod configuration; +pub mod display; +pub mod environment; +pub mod keyring; +pub mod mock; +pub mod network; diff --git a/polkadot/node/subsystem-bench/src/core/network.rs b/polkadot/node/subsystem-bench/src/core/network.rs new file mode 100644 index 000000000000..c4e20b421d34 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/core/network.rs @@ -0,0 +1,485 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . +use super::{ + configuration::{TestAuthorities, TestConfiguration}, + environment::TestEnvironmentDependencies, + *, +}; +use colored::Colorize; +use polkadot_primitives::AuthorityDiscoveryId; +use prometheus_endpoint::U64; +use rand::{seq::SliceRandom, thread_rng}; +use sc_service::SpawnTaskHandle; +use std::{ + collections::HashMap, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, + time::{Duration, Instant}, +}; +use tokio::sync::mpsc::UnboundedSender; + +// An emulated node egress traffic rate_limiter. +#[derive(Debug)] +pub struct RateLimit { + // How often we refill credits in buckets + tick_rate: usize, + // Total ticks + total_ticks: usize, + // Max refill per tick + max_refill: usize, + // Available credit. We allow for bursts over 1/tick_rate of `cps` budget, but we + // account it by negative credit. + credits: isize, + // When last refilled. + last_refill: Instant, +} + +impl RateLimit { + // Create a new `RateLimit` from a `cps` (credits per second) budget and + // `tick_rate`. + pub fn new(tick_rate: usize, cps: usize) -> Self { + // Compute how much refill for each tick + let max_refill = cps / tick_rate; + RateLimit { + tick_rate, + total_ticks: 0, + max_refill, + // A fresh start + credits: max_refill as isize, + last_refill: Instant::now(), + } + } + + pub async fn refill(&mut self) { + // If this is called to early, we need to sleep until next tick. + let now = Instant::now(); + let next_tick_delta = + (self.last_refill + Duration::from_millis(1000 / self.tick_rate as u64)) - now; + + // Sleep until next tick. + if !next_tick_delta.is_zero() { + gum::trace!(target: LOG_TARGET, "need to sleep {}ms", next_tick_delta.as_millis()); + tokio::time::sleep(next_tick_delta).await; + } + + self.total_ticks += 1; + self.credits += self.max_refill as isize; + self.last_refill = Instant::now(); + } + + // Reap credits from the bucket. + // Blocks if credits budged goes negative during call. + pub async fn reap(&mut self, amount: usize) { + self.credits -= amount as isize; + + if self.credits >= 0 { + return + } + + while self.credits < 0 { + gum::trace!(target: LOG_TARGET, "Before refill: {:?}", &self); + self.refill().await; + gum::trace!(target: LOG_TARGET, "After refill: {:?}", &self); + } + } +} + +#[cfg(test)] +mod tests { + use std::time::Instant; + + use super::RateLimit; + + #[tokio::test] + async fn test_expected_rate() { + let tick_rate = 200; + let budget = 1_000_000; + // rate must not exceeed 100 credits per second + let mut rate_limiter = RateLimit::new(tick_rate, budget); + let mut total_sent = 0usize; + let start = Instant::now(); + + let mut reap_amount = 0; + while rate_limiter.total_ticks < tick_rate { + reap_amount += 1; + reap_amount %= 100; + + rate_limiter.reap(reap_amount).await; + total_sent += reap_amount; + } + + let end = Instant::now(); + + println!("duration: {}", (end - start).as_millis()); + + // Allow up to `budget/max_refill` error tolerance + let lower_bound = budget as u128 * ((end - start).as_millis() / 1000u128); + let upper_bound = budget as u128 * + ((end - start).as_millis() / 1000u128 + rate_limiter.max_refill as u128); + assert!(total_sent as u128 >= lower_bound); + assert!(total_sent as u128 <= upper_bound); + } +} + +// A network peer emulator. It spawns a task that accepts `NetworkActions` and +// executes them with a configurable delay and bandwidth constraints. Tipically +// these actions wrap a future that performs a channel send to the subsystem(s) under test. +#[derive(Clone)] +struct PeerEmulator { + // The queue of requests waiting to be served by the emulator + actions_tx: UnboundedSender, +} + +impl PeerEmulator { + pub fn new( + bandwidth: usize, + spawn_task_handle: SpawnTaskHandle, + stats: Arc, + ) -> Self { + let (actions_tx, mut actions_rx) = tokio::sync::mpsc::unbounded_channel(); + + spawn_task_handle + .clone() + .spawn("peer-emulator", "test-environment", async move { + // Rate limit peer send. + let mut rate_limiter = RateLimit::new(10, bandwidth); + loop { + let stats_clone = stats.clone(); + let maybe_action: Option = actions_rx.recv().await; + if let Some(action) = maybe_action { + let size = action.size(); + rate_limiter.reap(size).await; + if let Some(latency) = action.latency { + spawn_task_handle.spawn( + "peer-emulator-latency", + "test-environment", + async move { + tokio::time::sleep(latency).await; + action.run().await; + stats_clone.inc_sent(size); + }, + ) + } else { + action.run().await; + stats_clone.inc_sent(size); + } + } else { + break + } + } + }); + + Self { actions_tx } + } + + // Queue a send request from the emulated peer. + pub fn send(&mut self, action: NetworkAction) { + self.actions_tx.send(action).expect("peer emulator task lives"); + } +} + +pub type ActionFuture = std::pin::Pin + std::marker::Send>>; +/// An network action to be completed by the emulator task. +pub struct NetworkAction { + // The function that performs the action + run: ActionFuture, + // The payload size that we simulate sending/receiving from a peer + size: usize, + // Peer which should run the action. + peer: AuthorityDiscoveryId, + // The amount of time to delay the polling `run` + latency: Option, +} + +unsafe impl Send for NetworkAction {} + +/// Book keeping of sent and received bytes. +pub struct PeerEmulatorStats { + rx_bytes_total: AtomicU64, + tx_bytes_total: AtomicU64, + metrics: Metrics, + peer_index: usize, +} + +impl PeerEmulatorStats { + pub(crate) fn new(peer_index: usize, metrics: Metrics) -> Self { + Self { + metrics, + rx_bytes_total: AtomicU64::from(0), + tx_bytes_total: AtomicU64::from(0), + peer_index, + } + } + + pub fn inc_sent(&self, bytes: usize) { + self.tx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); + self.metrics.on_peer_sent(self.peer_index, bytes); + } + + pub fn inc_received(&self, bytes: usize) { + self.rx_bytes_total.fetch_add(bytes as u64, Ordering::Relaxed); + self.metrics.on_peer_received(self.peer_index, bytes); + } + + pub fn sent(&self) -> u64 { + self.tx_bytes_total.load(Ordering::Relaxed) + } + + pub fn received(&self) -> u64 { + self.rx_bytes_total.load(Ordering::Relaxed) + } +} + +#[derive(Debug, Default)] +pub struct PeerStats { + pub rx_bytes_total: u64, + pub tx_bytes_total: u64, +} +impl NetworkAction { + pub fn new( + peer: AuthorityDiscoveryId, + run: ActionFuture, + size: usize, + latency: Option, + ) -> Self { + Self { run, size, peer, latency } + } + + pub fn size(&self) -> usize { + self.size + } + + pub async fn run(self) { + self.run.await; + } + + pub fn peer(&self) -> AuthorityDiscoveryId { + self.peer.clone() + } +} + +/// The state of a peer on the emulated network. +#[derive(Clone)] +enum Peer { + Connected(PeerEmulator), + Disconnected(PeerEmulator), +} + +impl Peer { + pub fn disconnect(&mut self) { + let new_self = match self { + Peer::Connected(peer) => Peer::Disconnected(peer.clone()), + _ => return, + }; + *self = new_self; + } + + pub fn is_connected(&self) -> bool { + matches!(self, Peer::Connected(_)) + } + + pub fn emulator(&mut self) -> &mut PeerEmulator { + match self { + Peer::Connected(ref mut emulator) => emulator, + Peer::Disconnected(ref mut emulator) => emulator, + } + } +} + +/// Mocks the network bridge and an arbitrary number of connected peer nodes. +/// Implements network latency, bandwidth and connection errors. +#[derive(Clone)] +pub struct NetworkEmulator { + // Per peer network emulation. + peers: Vec, + /// Per peer stats. + stats: Vec>, + /// Each emulated peer is a validator. + validator_authority_ids: HashMap, +} + +impl NetworkEmulator { + pub fn new( + config: &TestConfiguration, + dependencies: &TestEnvironmentDependencies, + authorities: &TestAuthorities, + ) -> Self { + let n_peers = config.n_validators; + gum::info!(target: LOG_TARGET, "{}",format!("Initializing emulation for a {} peer network.", n_peers).bright_blue()); + gum::info!(target: LOG_TARGET, "{}",format!("connectivity {}%, error {}%", config.connectivity, config.error).bright_black()); + + let metrics = + Metrics::new(&dependencies.registry).expect("Metrics always register succesfully"); + let mut validator_authority_id_mapping = HashMap::new(); + + // Create a `PeerEmulator` for each peer. + let (stats, mut peers): (_, Vec<_>) = (0..n_peers) + .zip(authorities.validator_authority_id.clone()) + .map(|(peer_index, authority_id)| { + validator_authority_id_mapping.insert(authority_id, peer_index); + let stats = Arc::new(PeerEmulatorStats::new(peer_index, metrics.clone())); + ( + stats.clone(), + Peer::Connected(PeerEmulator::new( + config.peer_bandwidth, + dependencies.task_manager.spawn_handle(), + stats, + )), + ) + }) + .unzip(); + + let connected_count = config.n_validators as f64 / (100.0 / config.connectivity as f64); + + let (_connected, to_disconnect) = + peers.partial_shuffle(&mut thread_rng(), connected_count as usize); + + for peer in to_disconnect { + peer.disconnect(); + } + + gum::info!(target: LOG_TARGET, "{}",format!("Network created, connected validator count {}", connected_count).bright_black()); + + Self { peers, stats, validator_authority_ids: validator_authority_id_mapping } + } + + pub fn is_peer_connected(&self, peer: &AuthorityDiscoveryId) -> bool { + self.peer(peer).is_connected() + } + + pub fn submit_peer_action(&mut self, peer: AuthorityDiscoveryId, action: NetworkAction) { + let index = self + .validator_authority_ids + .get(&peer) + .expect("all test authorities are valid; qed"); + + let peer = self.peers.get_mut(*index).expect("We just retrieved the index above; qed"); + + // Only actions of size 0 are allowed on disconnected peers. + // Typically this are delayed error response sends. + if action.size() > 0 && !peer.is_connected() { + gum::warn!(target: LOG_TARGET, peer_index = index, "Attempted to send data from a disconnected peer, operation ignored"); + return + } + + peer.emulator().send(action); + } + + // Returns the sent/received stats for `peer_index`. + pub fn peer_stats(&self, peer_index: usize) -> Arc { + self.stats[peer_index].clone() + } + + // Helper to get peer index by `AuthorityDiscoveryId` + fn peer_index(&self, peer: &AuthorityDiscoveryId) -> usize { + *self + .validator_authority_ids + .get(peer) + .expect("all test authorities are valid; qed") + } + + // Return the Peer entry for a given `AuthorityDiscoveryId`. + fn peer(&self, peer: &AuthorityDiscoveryId) -> &Peer { + &self.peers[self.peer_index(peer)] + } + // Returns the sent/received stats for `peer`. + pub fn peer_stats_by_id(&mut self, peer: &AuthorityDiscoveryId) -> Arc { + let peer_index = self.peer_index(peer); + + self.stats[peer_index].clone() + } + + // Returns the sent/received stats for all peers. + pub fn stats(&self) -> Vec { + let r = self + .stats + .iter() + .map(|stats| PeerStats { + rx_bytes_total: stats.received(), + tx_bytes_total: stats.sent(), + }) + .collect::>(); + r + } + + // Increment bytes sent by our node (the node that contains the subsystem under test) + pub fn inc_sent(&self, bytes: usize) { + // Our node always is peer 0. + self.peer_stats(0).inc_sent(bytes); + } + + // Increment bytes received by our node (the node that contains the subsystem under test) + pub fn inc_received(&self, bytes: usize) { + // Our node always is peer 0. + self.peer_stats(0).inc_received(bytes); + } +} + +use polkadot_node_subsystem_util::metrics::prometheus::{ + self, CounterVec, Opts, PrometheusError, Registry, +}; + +/// Emulated network metrics. +#[derive(Clone)] +pub(crate) struct Metrics { + /// Number of bytes sent per peer. + peer_total_sent: CounterVec, + /// Number of received sent per peer. + peer_total_received: CounterVec, +} + +impl Metrics { + pub fn new(registry: &Registry) -> Result { + Ok(Self { + peer_total_sent: prometheus::register( + CounterVec::new( + Opts::new( + "subsystem_benchmark_network_peer_total_bytes_sent", + "Total number of bytes a peer has sent.", + ), + &["peer"], + )?, + registry, + )?, + peer_total_received: prometheus::register( + CounterVec::new( + Opts::new( + "subsystem_benchmark_network_peer_total_bytes_received", + "Total number of bytes a peer has received.", + ), + &["peer"], + )?, + registry, + )?, + }) + } + + /// Increment total sent for a peer. + pub fn on_peer_sent(&self, peer_index: usize, bytes: usize) { + self.peer_total_sent + .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) + .inc_by(bytes as u64); + } + + /// Increment total receioved for a peer. + pub fn on_peer_received(&self, peer_index: usize, bytes: usize) { + self.peer_total_received + .with_label_values(vec![format!("node{}", peer_index).as_str()].as_slice()) + .inc_by(bytes as u64); + } +} diff --git a/polkadot/node/subsystem-bench/src/subsystem-bench.rs b/polkadot/node/subsystem-bench/src/subsystem-bench.rs new file mode 100644 index 000000000000..da7e5441f748 --- /dev/null +++ b/polkadot/node/subsystem-bench/src/subsystem-bench.rs @@ -0,0 +1,186 @@ +// Copyright (C) Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! A tool for running subsystem benchmark tests designed for development and +//! CI regression testing. +use clap::Parser; +use color_eyre::eyre; + +use colored::Colorize; +use std::{path::Path, time::Duration}; + +pub(crate) mod availability; +pub(crate) mod cli; +pub(crate) mod core; + +use availability::{prepare_test, NetworkEmulation, TestState}; +use cli::TestObjective; + +use core::{ + configuration::TestConfiguration, + environment::{TestEnvironment, GENESIS_HASH}, +}; + +use clap_num::number_range; + +use crate::core::display::display_configuration; + +fn le_100(s: &str) -> Result { + number_range(s, 0, 100) +} + +fn le_5000(s: &str) -> Result { + number_range(s, 0, 5000) +} + +#[derive(Debug, Parser)] +#[allow(missing_docs)] +struct BenchCli { + #[arg(long, value_enum, ignore_case = true, default_value_t = NetworkEmulation::Ideal)] + /// The type of network to be emulated + pub network: NetworkEmulation, + + #[clap(flatten)] + pub standard_configuration: cli::StandardTestOptions, + + #[clap(short, long)] + /// The bandwidth of simulated remote peers in KiB + pub peer_bandwidth: Option, + + #[clap(short, long)] + /// The bandwidth of our simulated node in KiB + pub bandwidth: Option, + + #[clap(long, value_parser=le_100)] + /// Simulated conection error ratio [0-100]. + pub peer_error: Option, + + #[clap(long, value_parser=le_5000)] + /// Minimum remote peer latency in milliseconds [0-5000]. + pub peer_min_latency: Option, + + #[clap(long, value_parser=le_5000)] + /// Maximum remote peer latency in milliseconds [0-5000]. + pub peer_max_latency: Option, + + #[command(subcommand)] + pub objective: cli::TestObjective, +} + +impl BenchCli { + fn launch(self) -> eyre::Result<()> { + let configuration = self.standard_configuration; + let mut test_config = match self.objective { + TestObjective::TestSequence(options) => { + let test_sequence = + core::configuration::TestSequence::new_from_file(Path::new(&options.path)) + .expect("File exists") + .into_vec(); + let num_steps = test_sequence.len(); + gum::info!( + "{}", + format!("Sequence contains {} step(s)", num_steps).bright_purple() + ); + for (index, test_config) in test_sequence.into_iter().enumerate() { + gum::info!("{}", format!("Step {}/{}", index + 1, num_steps).bright_purple(),); + display_configuration(&test_config); + + let mut state = TestState::new(&test_config); + let (mut env, _protocol_config) = prepare_test(test_config, &mut state); + env.runtime() + .block_on(availability::benchmark_availability_read(&mut env, state)); + } + return Ok(()) + }, + TestObjective::DataAvailabilityRead(ref _options) => match self.network { + NetworkEmulation::Healthy => TestConfiguration::healthy_network( + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, + ), + NetworkEmulation::Degraded => TestConfiguration::degraded_network( + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, + ), + NetworkEmulation::Ideal => TestConfiguration::ideal_network( + self.objective, + configuration.num_blocks, + configuration.n_validators, + configuration.n_cores, + configuration.min_pov_size, + configuration.max_pov_size, + ), + }, + }; + + let mut latency_config = test_config.latency.clone().unwrap_or_default(); + + if let Some(latency) = self.peer_min_latency { + latency_config.min_latency = Duration::from_millis(latency); + } + + if let Some(latency) = self.peer_max_latency { + latency_config.max_latency = Duration::from_millis(latency); + } + + if let Some(error) = self.peer_error { + test_config.error = error; + } + + if let Some(bandwidth) = self.peer_bandwidth { + // CLI expects bw in KiB + test_config.peer_bandwidth = bandwidth * 1024; + } + + if let Some(bandwidth) = self.bandwidth { + // CLI expects bw in KiB + test_config.bandwidth = bandwidth * 1024; + } + + display_configuration(&test_config); + + let mut state = TestState::new(&test_config); + let (mut env, _protocol_config) = prepare_test(test_config, &mut state); + // test_config.write_to_disk(); + env.runtime() + .block_on(availability::benchmark_availability_read(&mut env, state)); + + Ok(()) + } +} + +fn main() -> eyre::Result<()> { + color_eyre::install()?; + env_logger::builder() + .filter(Some("hyper"), log::LevelFilter::Info) + // Avoid `Terminating due to subsystem exit subsystem` warnings + .filter(Some("polkadot_overseer"), log::LevelFilter::Error) + .filter(None, log::LevelFilter::Info) + // .filter(None, log::LevelFilter::Trace) + .try_init() + .unwrap(); + + let cli: BenchCli = BenchCli::parse(); + cli.launch()?; + Ok(()) +} diff --git a/polkadot/node/subsystem-test-helpers/Cargo.toml b/polkadot/node/subsystem-test-helpers/Cargo.toml index eb6e10559c2a..7b616bdb4382 100644 --- a/polkadot/node/subsystem-test-helpers/Cargo.toml +++ b/polkadot/node/subsystem-test-helpers/Cargo.toml @@ -15,8 +15,11 @@ async-trait = "0.1.57" futures = "0.3.21" parking_lot = "0.12.0" polkadot-node-subsystem = { path = "../subsystem" } +polkadot-erasure-coding = { path = "../../erasure-coding" } polkadot-node-subsystem-util = { path = "../subsystem-util" } polkadot-primitives = { path = "../../primitives" } +polkadot-node-primitives = { path = "../primitives" } + sc-client-api = { path = "../../../substrate/client/api" } sc-utils = { path = "../../../substrate/client/utils" } sp-core = { path = "../../../substrate/primitives/core" } diff --git a/polkadot/node/subsystem-test-helpers/src/lib.rs b/polkadot/node/subsystem-test-helpers/src/lib.rs index 3f92513498c4..dfa78e04b8c9 100644 --- a/polkadot/node/subsystem-test-helpers/src/lib.rs +++ b/polkadot/node/subsystem-test-helpers/src/lib.rs @@ -18,11 +18,14 @@ #![warn(missing_docs)] +use polkadot_erasure_coding::{branches, obtain_chunks_v1 as obtain_chunks}; +use polkadot_node_primitives::{AvailableData, ErasureChunk, Proof}; use polkadot_node_subsystem::{ messages::AllMessages, overseer, FromOrchestra, OverseerSignal, SpawnGlue, SpawnedSubsystem, SubsystemError, SubsystemResult, TrySendError, }; use polkadot_node_subsystem_util::TimeoutExt; +use polkadot_primitives::{Hash, ValidatorIndex}; use futures::{channel::mpsc, poll, prelude::*}; use parking_lot::Mutex; @@ -440,6 +443,34 @@ impl Future for Yield { } } +/// Helper for chunking available data. +pub fn derive_erasure_chunks_with_proofs_and_root( + n_validators: usize, + available_data: &AvailableData, + alter_chunk: impl Fn(usize, &mut Vec), +) -> (Vec, Hash) { + let mut chunks: Vec> = obtain_chunks(n_validators, available_data).unwrap(); + + for (i, chunk) in chunks.iter_mut().enumerate() { + alter_chunk(i, chunk) + } + + // create proofs for each erasure chunk + let branches = branches(chunks.as_ref()); + + let root = branches.root(); + let erasure_chunks = branches + .enumerate() + .map(|(index, (proof, chunk))| ErasureChunk { + chunk: chunk.to_vec(), + index: ValidatorIndex(index as _), + proof: Proof::try_from(proof).unwrap(), + }) + .collect::>(); + + (erasure_chunks, root) +} + #[cfg(test)] mod tests { use super::*; diff --git a/polkadot/node/subsystem-test-helpers/src/mock.rs b/polkadot/node/subsystem-test-helpers/src/mock.rs index 522bc3c2cc4f..14026960ac13 100644 --- a/polkadot/node/subsystem-test-helpers/src/mock.rs +++ b/polkadot/node/subsystem-test-helpers/src/mock.rs @@ -16,7 +16,7 @@ use std::sync::Arc; -use polkadot_node_subsystem::{jaeger, ActivatedLeaf}; +use polkadot_node_subsystem::{jaeger, ActivatedLeaf, BlockInfo}; use sc_client_api::UnpinHandle; use sc_keystore::LocalKeystore; use sc_utils::mpsc::tracing_unbounded; @@ -59,3 +59,8 @@ pub fn new_leaf(hash: Hash, number: BlockNumber) -> ActivatedLeaf { span: Arc::new(jaeger::Span::Disabled), } } + +/// Create a new leaf with the given hash and number. +pub fn new_block_import_info(hash: Hash, number: BlockNumber) -> BlockInfo { + BlockInfo { hash, parent_hash: Hash::default(), number, unpin_handle: dummy_unpin_handle(hash) } +}