Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

98.6% OF DEVELOPERS CANNOT REVIEW THIS PR! [read more...] #7337

Merged
merged 61 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
d2e068d
[WIP] PVF: Split out worker binaries
mrcnski Jun 6, 2023
4ff0a66
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
s0me0ne-unkn0wn Jun 8, 2023
5bed726
Address compilation problems and re-design a bit
s0me0ne-unkn0wn Jun 10, 2023
d7a389c
Reorganize once more, fix tests
s0me0ne-unkn0wn Jun 11, 2023
818778c
Reformat with new nightly to make `cargo fmt` test happy
s0me0ne-unkn0wn Jun 11, 2023
f9a9280
Address `clippy` warnings
s0me0ne-unkn0wn Jun 11, 2023
165f116
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
s0me0ne-unkn0wn Jun 13, 2023
2f192ea
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
s0me0ne-unkn0wn Jun 14, 2023
01b97f8
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
s0me0ne-unkn0wn Jun 14, 2023
b5733fa
Add temporary trace to debug zombienet tests
s0me0ne-unkn0wn Jun 14, 2023
235766e
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
s0me0ne-unkn0wn Jun 16, 2023
f5ba965
Fix zombienet node upgrade test
s0me0ne-unkn0wn Jun 16, 2023
7079e2f
Fix malus and its CI
s0me0ne-unkn0wn Jun 16, 2023
95e643f
Fix building worker binaries with malus
s0me0ne-unkn0wn Jun 16, 2023
a1c093e
More fixes for malus
s0me0ne-unkn0wn Jun 16, 2023
5382cc7
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
s0me0ne-unkn0wn Jun 17, 2023
06de32d
Remove unneeded cli subcommands
s0me0ne-unkn0wn Jun 18, 2023
d7b5a5a
Support placing auxiliary binaries to `/usr/libexec`
s0me0ne-unkn0wn Jun 18, 2023
49475c2
Fix spelling
s0me0ne-unkn0wn Jun 18, 2023
4c72864
Spelling
s0me0ne-unkn0wn Jul 7, 2023
07d876e
Implement review comments (mostly nits)
mrcnski Jul 9, 2023
0da74cd
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
mrcnski Jul 9, 2023
126213e
Fix worker node version flag
mrcnski Jul 10, 2023
6640b29
Rework getting the worker paths
mrcnski Jul 10, 2023
633edbc
Address a couple of review comments
mrcnski Jul 11, 2023
41b0a49
Minor restructuring
mrcnski Jul 11, 2023
49d8696
Fix CI error
mrcnski Jul 11, 2023
dc5d6a0
Add tests for worker binaries detection
mrcnski Jul 14, 2023
6ef14fe
Improve tests; try to fix CI
mrcnski Jul 14, 2023
d2bef1f
Move workers module into separate file
mrcnski Jul 16, 2023
3e09438
Try to fix failing test and workers not printing latest version
mrcnski Jul 16, 2023
55365d8
Make a bunch of fixes
mrcnski Jul 17, 2023
751dd04
Rebuild nodes on version change
mrcnski Jul 17, 2023
76c16f3
Fix more issues
mrcnski Jul 18, 2023
a1e21dc
Fix tests
mrcnski Jul 19, 2023
7366c99
Merge remote-tracking branch 'origin/master' into mrcnski/pvf-split-o…
Jul 19, 2023
11bbf59
Pass node version from node into dependencies to avoid recompiles
mrcnski Jul 20, 2023
6244e3f
Some more improvements for smoother tests
mrcnski Jul 20, 2023
6f44e63
Merge remote-tracking branch 'origin/mrcnski/pvf-split-out-worker-bin…
mrcnski Jul 20, 2023
019bd97
Add back rerun to PVF workers
mrcnski Jul 20, 2023
df1117a
Move worker binaries into files in cli crate
mrcnski Jul 21, 2023
cde8dc6
Fix bug (was passing worker version for node version)
mrcnski Jul 23, 2023
98323d2
Move workers out of cli into root src/bin/ dir
mrcnski Jul 24, 2023
ecf0af8
Add some sanity checks for workers to dockerfiles
mrcnski Jul 24, 2023
3f929bb
Update malus
mrcnski Jul 24, 2023
508669c
Try to fix clippy errors
mrcnski Jul 24, 2023
e072517
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
mrcnski Jul 24, 2023
432dcf3
Address `cargo run` issue
mrcnski Jul 25, 2023
7407507
Update readme (installation instructions)
mrcnski Jul 25, 2023
0651bf6
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
mrcnski Jul 25, 2023
5d9c711
Allow disabling external workers for local/testing setups
mrcnski Jul 25, 2023
1ce0c3e
Revert unnecessary Cargo.lock changes
mrcnski Jul 27, 2023
09f4840
Remove unnecessary build scripts from collators
mrcnski Jul 27, 2023
700ee0d
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
mrcnski Jul 27, 2023
8851c20
Add back missing malus commands (should fix failing ZN job)
mrcnski Jul 27, 2023
fd0f018
Some minor fixes
mrcnski Jul 27, 2023
e5771a0
Update Cargo.lock
mrcnski Jul 27, 2023
0a04dd4
Fix some build errors
mrcnski Jul 27, 2023
9f05299
Undo self-contained binaries; cli flag to disable version check
mrcnski Jul 28, 2023
0477550
Try to fix failing job and add some docs for local tests
mrcnski Jul 28, 2023
70cd333
Merge branch 'master' into mrcnski/pvf-split-out-worker-binaries
mrcnski Jul 28, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
name = "polkadot"
path = "src/main.rs"

[[bin]]
name = "polkadot-execute-worker"
path = "node/core/pvf/execute-worker/src/main.rs"

[[bin]]
name = "polkadot-prepare-worker"
path = "node/core/pvf/prepare-worker/src/main.rs"

[package]
name = "polkadot"
description = "Implementation of a `https://polkadot.network` node in Rust based on the Substrate framework."
Expand All @@ -22,10 +30,15 @@ version = "0.9.43"
color-eyre = { version = "0.6.1", default-features = false }
tikv-jemallocator = "0.5.0"

# Needed for worker binaries.
polkadot-node-core-pvf-common = { path = "node/core/pvf/common" }
polkadot-node-core-pvf-execute-worker = { path = "node/core/pvf/execute-worker" }
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
polkadot-node-core-pvf-prepare-worker = { path = "node/core/pvf/prepare-worker" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

# Crates in our workspace, defined as dependencies so we can pass them feature flags.
polkadot-cli = { path = "cli", features = [ "polkadot-native", "kusama-native", "westend-native", "rococo-native" ] }
polkadot-node-core-pvf = { path = "node/core/pvf" }
polkadot-node-core-pvf-prepare-worker = { path = "node/core/pvf/prepare-worker" }
polkadot-overseer = { path = "node/overseer" }

[dev-dependencies]
Expand Down Expand Up @@ -226,6 +239,8 @@ license-file = ["LICENSE", "0"]
maintainer-scripts = "scripts/packaging/deb-maintainer-scripts"
assets = [
["target/release/polkadot", "/usr/bin/", "755"],
["target/release/polkadot-prepare-worker", "/usr/lib/polkadot/", "755"],
["target/release/polkadot-execute-worker", "/usr/lib/polkadot/", "755"],
["scripts/packaging/polkadot.service", "/lib/systemd/system/", "644"]
]
conf-files = [
Expand Down
27 changes: 8 additions & 19 deletions cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
//! Polkadot CLI library.

use clap::Parser;
use std::path::PathBuf;

#[allow(missing_docs)]
#[derive(Debug, Parser)]
Expand All @@ -42,14 +43,6 @@ pub enum Subcommand {
/// Revert the chain to a previous state.
Revert(sc_cli::RevertCmd),

#[allow(missing_docs)]
#[command(name = "prepare-worker", hide = true)]
PvfPrepareWorker(ValidationWorkerCommand),

#[allow(missing_docs)]
#[command(name = "execute-worker", hide = true)]
PvfExecuteWorker(ValidationWorkerCommand),

/// Sub-commands concerned with benchmarking.
/// The pallet benchmarking moved to the `pallet` sub-command.
#[command(subcommand)]
Expand All @@ -75,17 +68,6 @@ pub enum Subcommand {
ChainInfo(sc_cli::ChainInfoCmd),
}

#[allow(missing_docs)]
#[derive(Debug, Parser)]
pub struct ValidationWorkerCommand {
/// The path to the validation host's socket.
#[arg(long)]
pub socket_path: String,
/// Calling node implementation version
#[arg(long)]
pub node_impl_version: String,
}

#[allow(missing_docs)]
#[derive(Debug, Parser)]
#[group(skip)]
Expand Down Expand Up @@ -148,6 +130,13 @@ pub struct RunCmd {
/// **Dangerous!** Do not touch unless explicitly adviced to.
#[arg(long)]
pub overseer_channel_capacity_override: Option<usize>,

/// Path to the directory where auxiliary worker binaries reside. If not specified, the main
/// binary's directory is searched first, then `/usr/lib/polkadot` is searched. If the path
/// points to an executable rather then directory, that executable is used both as preparation
/// and execution worker (supposed to be used for tests only).
#[arg(long, value_name = "PATH")]
pub workers_path: Option<PathBuf>,
}

#[allow(missing_docs)]
Expand Down
69 changes: 15 additions & 54 deletions cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,16 +285,21 @@ where
let database_source = config.database.clone();
let task_manager = service::build_full(
config,
service::IsCollator::No,
grandpa_pause,
enable_beefy,
jaeger_agent,
None,
false,
overseer_gen,
cli.run.overseer_channel_capacity_override,
maybe_malus_finality_delay,
hwbench,
service::NewFullParams {
is_collator: service::IsCollator::No,
grandpa_pause,
enable_beefy,
jaeger_agent,
telemetry_worker_handle: None,
workers_path: cli.run.workers_path,
overseer_enable_anyways: false,
overseer_gen,
overseer_message_channel_capacity_override: cli
.run
.overseer_channel_capacity_override,
malus_finality_delay: maybe_malus_finality_delay,
hwbench,
},
)
.map(|full| full.task_manager)?;

Expand Down Expand Up @@ -421,50 +426,6 @@ pub fn run() -> Result<()> {
))
})?)
},
Some(Subcommand::PvfPrepareWorker(cmd)) => {
let mut builder = sc_cli::LoggerBuilder::new("");
builder.with_colors(false);
let _ = builder.init();

#[cfg(target_os = "android")]
{
return Err(sc_cli::Error::Input(
"PVF preparation workers are not supported under this platform".into(),
)
.into())
}

#[cfg(not(target_os = "android"))]
{
polkadot_node_core_pvf_prepare_worker::worker_entrypoint(
&cmd.socket_path,
Some(&cmd.node_impl_version),
);
Ok(())
}
},
Some(Subcommand::PvfExecuteWorker(cmd)) => {
let mut builder = sc_cli::LoggerBuilder::new("");
builder.with_colors(false);
let _ = builder.init();

#[cfg(target_os = "android")]
{
return Err(sc_cli::Error::Input(
"PVF execution workers are not supported under this platform".into(),
)
.into())
}

#[cfg(not(target_os = "android"))]
{
polkadot_node_core_pvf_execute_worker::worker_entrypoint(
&cmd.socket_path,
Some(&cmd.node_impl_version),
);
Ok(())
}
},
Some(Subcommand::Benchmark(cmd)) => {
let runner = cli.create_runner(cmd)?;
let chain_spec = &runner.config().chain_spec;
Expand Down
15 changes: 9 additions & 6 deletions node/core/candidate-validation/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ const DEFAULT_APPROVAL_EXECUTION_TIMEOUT: Duration = Duration::from_secs(12);
pub struct Config {
/// The path where candidate validation can store compiled artifacts for PVFs.
pub artifacts_cache_path: PathBuf,
/// The path to the executable which can be used for spawning PVF compilation & validation
/// workers.
pub program_path: PathBuf,
/// Path to the preparation worker binary
pub prep_worker_path: PathBuf,
/// Path to the execution worker binary
pub exec_worker_path: PathBuf,
}

/// The candidate validation subsystem.
Expand Down Expand Up @@ -129,7 +130,8 @@ impl<Context> CandidateValidationSubsystem {
self.metrics,
self.pvf_metrics,
self.config.artifacts_cache_path,
self.config.program_path,
self.config.prep_worker_path,
self.config.exec_worker_path,
)
.map_err(|e| SubsystemError::with_origin("candidate-validation", e))
.boxed();
Expand All @@ -143,10 +145,11 @@ async fn run<Context>(
metrics: Metrics,
pvf_metrics: polkadot_node_core_pvf::Metrics,
cache_path: PathBuf,
program_path: PathBuf,
prep_worker_path: PathBuf,
exec_worker_path: PathBuf,
) -> SubsystemResult<()> {
let (validation_host, task) = polkadot_node_core_pvf::start(
polkadot_node_core_pvf::Config::new(cache_path, program_path),
polkadot_node_core_pvf::Config::new(cache_path, prep_worker_path, exec_worker_path),
pvf_metrics,
);
ctx.spawn_blocking("pvf-validation-host", task.boxed())?;
Expand Down
61 changes: 45 additions & 16 deletions node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,63 @@ use tokio::{io, net::UnixStream, runtime::Runtime};
#[macro_export]
macro_rules! decl_worker_main {
($expected_command:expr, $entrypoint:expr) => {
fn print_help(expected_command: &str) {
let worker_version = $crate::worker::worker_version();

println!("{} {}", expected_command, worker_version);
println!();
println!("PVF worker that is called by polkadot.");
}

fn main() {
::sp_tracing::try_init_simple();

let args = std::env::args().collect::<Vec<_>>();
if args.len() < 3 {
panic!("wrong number of arguments");
if args.len() == 1 {
print_help($expected_command);
return
}

match args[1].as_ref() {
"--help" => {
print_help($expected_command);
return
},
"--version" => {
println!("{}", $crate::worker::worker_version());
return
},
subcommand => {
// Must be passed for compatibility with the single-binary test workers.
if subcommand != $expected_command {
panic!(
"trying to run {} binary with the {} subcommand",
$expected_command, subcommand
)
}
},
}

let mut version = None;
let mut node_version = None;
let mut socket_path: &str = "";

for i in 2..args.len() {
for i in (2..args.len()).step_by(2) {
match args[i].as_ref() {
"--socket-path" => socket_path = args[i + 1].as_str(),
"--node-version" => version = Some(args[i + 1].as_str()),
_ => (),
"--node-impl-version" => node_version = Some(args[i + 1].as_str()),
arg => panic!("Unexpected argument found: {}", arg),
}
}

let subcommand = &args[1];
if subcommand != $expected_command {
panic!(
"trying to run {} binary with the {} subcommand",
$expected_command, subcommand
)
}
$entrypoint(&socket_path, version);
$entrypoint(&socket_path, node_version);
}
};
}

pub fn worker_version() -> &'static str {
env!("SUBSTRATE_CLI_IMPL_VERSION")
}

/// Some allowed overhead that we account for in the "CPU time monitor" thread's sleeps, on the
/// child process.
pub const JOB_TIMEOUT_OVERHEAD: Duration = Duration::from_millis(50);
Expand All @@ -88,11 +114,14 @@ pub fn worker_event_loop<F, Fut>(
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);

// Check for a mismatch between the node and worker versions.
if let Some(version) = node_version {
if version != env!("SUBSTRATE_CLI_IMPL_VERSION") {
if let Some(node_version) = node_version {
let worker_version = env!("SUBSTRATE_CLI_IMPL_VERSION");
if node_version != worker_version {
gum::error!(
target: LOG_TARGET,
%worker_pid,
%node_version,
%worker_version,
"Node and worker version mismatch, node needs restarting, forcing shutdown",
);
kill_parent_node_in_emergency();
Expand Down
Loading