Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PVF worker: switch on seccomp networking restrictions #2221

Merged
merged 10 commits into from
Nov 21, 2023
7 changes: 4 additions & 3 deletions polkadot/node/core/pvf/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pub enum PrepareError {
#[codec(index = 9)]
ClearWorkerDir(String),
/// The preparation job process died, due to OOM, a seccomp violation, or some other factor.
JobDied(String),
JobDied { err: String, job_pid: i32 },
#[codec(index = 10)]
/// Some error occurred when interfacing with the kernel.
#[codec(index = 11)]
Expand All @@ -96,7 +96,7 @@ impl PrepareError {
match self {
Prevalidation(_) | Preparation(_) | JobError(_) | OutOfMemory => true,
IoErr(_) |
JobDied(_) |
JobDied { .. } |
CreateTmpFile(_) |
RenameTmpFile { .. } |
ClearWorkerDir(_) |
Expand All @@ -119,7 +119,8 @@ impl fmt::Display for PrepareError {
JobError(err) => write!(f, "panic: {}", err),
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
JobDied(err) => write!(f, "prepare: prepare job died: {}", err),
JobDied { err, job_pid } =>
write!(f, "prepare: prepare job with pid {job_pid} died: {err}"),
CreateTmpFile(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFile { err, src, dest } =>
write!(f, "prepare: error renaming tmp file ({:?} -> {:?}): {}", src, dest, err),
Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub enum WorkerResponse {
///
/// We cannot treat this as an internal error because malicious code may have killed the job.
/// We still retry it, because in the non-malicious case it is likely spurious.
JobDied(String),
JobDied { err: String, job_pid: i32 },
/// An unexpected error occurred in the job process, e.g. failing to spawn a thread, panic,
/// etc.
///
Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub struct SecurityStatus {
pub can_enable_landlock: bool,
/// Whether the seccomp features we use are fully available on this system.
pub can_enable_seccomp: bool,
// Whether we are able to unshare the user namespace and change the filesystem root.
/// Whether we are able to unshare the user namespace and change the filesystem root.
pub can_unshare_user_namespace_and_change_root: bool,
}

Expand Down
2 changes: 1 addition & 1 deletion polkadot/node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ pub fn run_worker<F>(
#[cfg_attr(not(target_os = "linux"), allow(unused_mut))] mut worker_dir_path: PathBuf,
node_version: Option<&str>,
worker_version: Option<&str>,
#[cfg_attr(not(target_os = "linux"), allow(unused_variables))] security_status: &SecurityStatus,
security_status: &SecurityStatus,
mut event_loop: F,
) where
F: FnMut(UnixStream, PathBuf) -> io::Result<Never>,
Expand Down
10 changes: 4 additions & 6 deletions polkadot/node/core/pvf/common/src/worker/security/seccomp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,9 @@
//!
//! # Action on syscall violations
//!
//! On syscall violations we currently only log, to make sure this works correctly before enforcing.
//!
//! In the future, when a forbidden syscall is attempted we immediately kill the process in order to
//! prevent the attacker from doing anything else. In execution, this will result in voting against
//! the candidate.
//! When a forbidden syscall is attempted we immediately kill the process in order to prevent the
//! attacker from doing anything else. In execution, this will result in voting against the
//! candidate.

use crate::{
worker::{stringify_panic_payload, WorkerKind},
Expand All @@ -82,7 +80,7 @@ use std::{collections::BTreeMap, path::Path};

/// The action to take on caught syscalls.
#[cfg(not(test))]
const CAUGHT_ACTION: SeccompAction = SeccompAction::Log;
const CAUGHT_ACTION: SeccompAction = SeccompAction::KillProcess;
/// Don't kill the process when testing.
#[cfg(test)]
const CAUGHT_ACTION: SeccompAction = SeccompAction::Errno(libc::EACCES as u32);
Expand Down
26 changes: 13 additions & 13 deletions polkadot/node/core/pvf/execute-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,6 @@ pub fn worker_entrypoint(
},
};

gum::trace!(
target: LOG_TARGET,
%worker_pid,
"worker: sending response to host: {:?}",
response
);
Comment on lines -225 to -230
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was producing a bit too much noise in the log output.

send_response(&mut stream, response)?;
}
},
Expand Down Expand Up @@ -360,7 +354,7 @@ fn handle_child_process(
/// - The response, either `Ok` or some error state.
fn handle_parent_process(
mut pipe_read: PipeReader,
child: Pid,
job_pid: Pid,
worker_pid: u32,
usage_before: Usage,
timeout: Duration,
Expand All @@ -373,10 +367,11 @@ fn handle_parent_process(
// Should retry at any rate.
.map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?;

let status = nix::sys::wait::waitpid(child, None);
let status = nix::sys::wait::waitpid(job_pid, None);
gum::trace!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute worker received wait status from job: {:?}",
status,
);
Expand All @@ -396,6 +391,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute job took {}ms cpu time, exceeded execute timeout {}ms",
cpu_tv.as_millis(),
timeout.as_millis(),
Expand Down Expand Up @@ -428,6 +424,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"execute job error: {}",
job_error,
);
Expand All @@ -443,15 +440,18 @@ fn handle_parent_process(
//
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
// other reason, so we still need to check for seccomp violations elsewhere.
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
Ok(WorkerResponse::JobDied(format!("received signal: {signal:?}"))),
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Ok(WorkerResponse::JobDied {
err: format!("received signal: {signal:?}"),
job_pid: job_pid.as_raw(),
}),
Err(errno) => Ok(internal_error_from_errno("waitpid", errno)),

// It is within an attacker's power to send an unexpected exit status. So we cannot treat
// this as an internal error (which would make us abstain), but must vote against.
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied(format!(
"unexpected status from wait: {unexpected_wait_status:?}"
))),
Ok(unexpected_wait_status) => Ok(WorkerResponse::JobDied {
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
job_pid: job_pid.as_raw(),
}),
}
}

Expand Down
24 changes: 15 additions & 9 deletions polkadot/node/core/pvf/prepare-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ pub fn worker_entrypoint(

handle_parent_process(
pipe_reader,
worker_pid,
child,
temp_artifact_dest.clone(),
worker_pid,
usage_before,
preparation_timeout,
)
Expand Down Expand Up @@ -506,9 +506,9 @@ fn handle_child_process(
/// - If the child process timeout, it returns `PrepareError::TimedOut`.
fn handle_parent_process(
mut pipe_read: PipeReader,
child: Pid,
temp_artifact_dest: PathBuf,
worker_pid: u32,
job_pid: Pid,
temp_artifact_dest: PathBuf,
usage_before: Usage,
timeout: Duration,
) -> Result<PrepareWorkerSuccess, PrepareError> {
Expand All @@ -518,10 +518,11 @@ fn handle_parent_process(
.read_to_end(&mut received_data)
.map_err(|err| PrepareError::IoErr(err.to_string()))?;

let status = nix::sys::wait::waitpid(child, None);
let status = nix::sys::wait::waitpid(job_pid, None);
gum::trace!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"prepare worker received wait status from job: {:?}",
status,
);
Expand All @@ -539,6 +540,7 @@ fn handle_parent_process(
gum::warn!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"prepare job took {}ms cpu time, exceeded prepare timeout {}ms",
cpu_tv.as_millis(),
timeout.as_millis(),
Expand Down Expand Up @@ -573,6 +575,7 @@ fn handle_parent_process(
gum::debug!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
"worker: writing artifact to {}",
temp_artifact_dest.display(),
);
Expand All @@ -593,15 +596,18 @@ fn handle_parent_process(
//
// The job gets SIGSYS on seccomp violations, but this signal may have been sent for some
// other reason, so we still need to check for seccomp violations elsewhere.
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) =>
Err(PrepareError::JobDied(format!("received signal: {signal:?}"))),
Ok(WaitStatus::Signaled(_pid, signal, _core_dump)) => Err(PrepareError::JobDied {
err: format!("received signal: {signal:?}"),
job_pid: job_pid.as_raw(),
}),
Err(errno) => Err(error_from_errno("waitpid", errno)),

// An attacker can make the child process return any exit status it wants. So we can treat
// all unexpected cases the same way.
Ok(unexpected_wait_status) => Err(PrepareError::JobDied(format!(
"unexpected status from wait: {unexpected_wait_status:?}"
))),
Ok(unexpected_wait_status) => Err(PrepareError::JobDied {
err: format!("unexpected status from wait: {unexpected_wait_status:?}"),
job_pid: job_pid.as_raw(),
}),
}
}

Expand Down
111 changes: 62 additions & 49 deletions polkadot/node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use polkadot_node_core_pvf_common::{
execute::{Handshake, WorkerResponse},
worker_dir, SecurityStatus,
};
use polkadot_parachain_primitives::primitives::ValidationResult;
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
use polkadot_primitives::ExecutorParams;
use std::{path::Path, time::Duration};
use tokio::{io, net::UnixStream};
Expand Down Expand Up @@ -156,6 +156,16 @@ pub async fn start_work(
let response = futures::select! {
response = recv_response(&mut stream).fuse() => {
match response {
Ok(response) =>
handle_response(
response,
pid,
&artifact.id.code_hash,
&artifact_path,
execution_timeout,
audit_log_file
)
.await,
Err(error) => {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -164,56 +174,9 @@ pub async fn start_work(
?error,
"failed to recv an execute response",
);
// The worker died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}

return Outcome::WorkerIntfErr
},
Ok(response) => {
// Check if any syscall violations occurred during the job. For now this is
// only informative, as we are not enforcing the seccomp policy yet.
for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await {
gum::error!(
Comment on lines -186 to -189
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still do this check elsewhere in this file, but only if the worker died.

target: LOG_TARGET,
worker_pid = %pid,
%syscall,
validation_code_hash = ?artifact.id.code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}

if let WorkerResponse::Ok{duration, ..} = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid = %pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);

// Return a timeout error.
return Outcome::HardTimeout
}
}

response
},
}
},
_ = Delay::new(timeout).fuse() => {
Expand All @@ -238,7 +201,7 @@ pub async fn start_work(
idle_worker: IdleWorker { stream, pid, worker_dir },
},
WorkerResponse::JobTimedOut => Outcome::HardTimeout,
WorkerResponse::JobDied(err) => Outcome::JobDied { err },
WorkerResponse::JobDied { err, job_pid: _ } => Outcome::JobDied { err },
WorkerResponse::JobError(err) => Outcome::JobError { err },

WorkerResponse::InternalError(err) => Outcome::InternalError { err },
Expand All @@ -247,6 +210,56 @@ pub async fn start_work(
.await
}

/// Handles the case where we successfully received response bytes on the host from the child.
///
/// Here we know the artifact exists, but is still located in a temporary file which will be cleared
/// by [`with_worker_dir_setup`].
async fn handle_response(
response: WorkerResponse,
worker_pid: u32,
validation_code_hash: &ValidationCodeHash,
artifact_path: &Path,
execution_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> WorkerResponse {
if let WorkerResponse::Ok { duration, .. } = response {
if duration > execution_timeout {
// The job didn't complete within the timeout.
gum::warn!(
target: LOG_TARGET,
worker_pid,
"execute job took {}ms cpu time, exceeded execution timeout {}ms.",
duration.as_millis(),
execution_timeout.as_millis(),
);

// Return a timeout error.
return WorkerResponse::JobTimedOut
}
}

if let WorkerResponse::JobDied { err: _, job_pid } = response {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?validation_code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
}

response
}

/// Create a temporary file for an artifact in the worker cache, execute the given future/closure
/// passing the file path in, and clean up the worker cache.
///
Expand Down
4 changes: 2 additions & 2 deletions polkadot/node/core/pvf/src/prepare/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,14 +388,14 @@ fn handle_mux(
Ok(())
},
// The worker might still be usable, but we kill it just in case.
Outcome::JobDied(err) => {
Outcome::JobDied { err, job_pid } => {
if attempt_retire(metrics, spawned, worker) {
reply(
from_pool,
FromPool::Concluded {
worker,
rip: true,
result: Err(PrepareError::JobDied(err)),
result: Err(PrepareError::JobDied { err, job_pid }),
},
)?;
}
Expand Down
Loading