Skip to content

Commit

Permalink
Support before_sandbox/before_exec callbacks to set up subprocess env…
Browse files Browse the repository at this point in the history
…ironment

This commit is a bit oversized... adding support for these callbacks required
creating a way to pass errors back to the parent process, which inspired
fixing the error handling in start(), which uncovered some bugs in start():
* Immediate child process leaked as a zombie
* pipe_fds[0] leaked in parent
* pipe_fds[1] leaked into grandchild
  • Loading branch information
rocallahan committed Sep 29, 2017
1 parent 21055c5 commit c856934
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 28 deletions.
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ rand = "0.3"
[dev-dependencies]
num_cpus = "1"

[[test]]
name = "before-callbacks"
path = "tests/before-callbacks.rs"
harness = false

[[test]]
name = "file-read-all"
path = "tests/file-read-all.rs"
Expand Down
3 changes: 2 additions & 1 deletion lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ pub mod platform {
pub use platform::macos::{ChildSandbox, Operation, Sandbox};
#[cfg(any(target_os="android", target_os="linux", target_os="macos"))]
pub use platform::unix::process::{self, Process};
#[cfg(any(target_os="android", target_os="linux", target_os="macos"))]
pub use platform::unix::CommandInner;

#[cfg(any(target_os="android", target_os="linux"))]
pub mod linux;
Expand All @@ -31,4 +33,3 @@ pub mod platform {
#[cfg(any(target_os="android", target_os="linux", target_os="macos"))]
pub mod unix;
}

113 changes: 90 additions & 23 deletions platform/linux/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ use platform::unix;
use profile::{Operation, PathPattern, Profile};
use sandbox::Command;

use libc::{self, c_char, c_int, c_ulong, c_void, gid_t, pid_t, size_t, ssize_t, uid_t};
use libc::{self, EINVAL, O_CLOEXEC, c_char, c_int, c_ulong, c_void, gid_t, pid_t, size_t, ssize_t, uid_t};
use std::env;
use std::ffi::{CString, OsStr, OsString};
use std::fs::{self, File};
use std::io::{self, Write};
use std::iter;
use std::mem;
use std::os::unix::io::RawFd;
use std::os::unix::prelude::OsStrExt;
use std::path::{Path, PathBuf};
use std::ptr;
Expand Down Expand Up @@ -202,6 +203,50 @@ unsafe fn prepare_user_and_pid_namespaces(parent_uid: uid_t, parent_gid: gid_t)
Ok(())
}

unsafe fn fork_wrapper() -> io::Result<pid_t> {
let child = fork();
if child >= 0 {
Ok(child)
} else {
Err(io::Error::last_os_error())
}
}

unsafe fn pipe_write(pipe: RawFd, value: i32) {
assert!(libc::write(pipe,
&value as *const i32 as *const c_void,
mem::size_of::<i32>() as size_t) == mem::size_of::<i32>() as ssize_t);
}

unsafe fn pipe_read(pipe: RawFd) -> io::Result<Vec<i32>> {
let mut ret = Vec::new();
loop {
let mut v: i32 = 0;
let bytes = libc::read(pipe,
&mut v as *mut i32 as *mut c_void,
mem::size_of::<i32>() as size_t);
if bytes == mem::size_of::<i32>() as ssize_t {
ret.push(v);
} else if bytes == 0 {
return Ok(ret);
} else if bytes > 0 {
panic!("No idea how we got a partial read in this pipe");
} else {
return Err(io::Error::last_os_error())
}
}
}

unsafe fn handle_error<T>(result: io::Result<T>, pipe: RawFd) -> T {
match result {
Ok(v) => v,
Err(e) => {
pipe_write(pipe, -e.raw_os_error().unwrap_or(EINVAL));
libc::exit(0);
}
}
}

/// Spawns a child process in a new namespace.
///
/// This function is quite tricky. Hic sunt dracones!
Expand All @@ -227,53 +272,75 @@ pub fn start(profile: &Profile, command: &mut Command) -> io::Result<Process> {
unsafe {
// Create a pipe so we can communicate the PID of our grandchild back.
let mut pipe_fds = [0, 0];
assert!(libc::pipe(&mut pipe_fds[0]) == 0);
if libc::pipe2(&mut pipe_fds[0], O_CLOEXEC) != 0 {
return Err(io::Error::last_os_error());
}

// Set this `prctl` flag so that we can wait on our grandchild. (Otherwise it'll be
// reparented to init.)
assert!(seccomp::prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == 0);

// Fork so that we can unshare without removing our ability to create threads.
if fork() == 0 {
// Close the reading end of the pipe.
libc::close(pipe_fds[0]);

let forked = match fork_wrapper() {
Ok(pid) => pid,
Err(e) => {
libc::close(pipe_fds[0]);
libc::close(pipe_fds[1]);
return Err(e);
}
};
if forked == 0 {
handle_error(command.inner.before_sandbox(&[pipe_fds[1]]), pipe_fds[1]);
// Set up our user and PID namespaces. The PID namespace won't actually come into
// effect until the next fork(), because PIDs are immutable.
prepare_user_and_pid_namespaces(parent_uid, parent_gid).unwrap();
handle_error(prepare_user_and_pid_namespaces(parent_uid, parent_gid), pipe_fds[1]);

// Fork again, to enter the PID namespace.
match fork() {
match handle_error(fork_wrapper(), pipe_fds[1]) {
0 => {
// Enter the auxiliary namespaces.
assert!(unshare(unshare_flags) == 0);
if unshare(unshare_flags) != 0 {
handle_error::<()>(Err(io::Error::last_os_error()), pipe_fds[1]);
}

handle_error(command.inner.before_exec(&[pipe_fds[1]]), pipe_fds[1]);
// Go ahead and start the command.
drop(unix::process::exec(command));
abort()
handle_error::<()>(Err(unix::process::exec(command)), pipe_fds[1]);
}
grandchild_pid => {
// Send the PID of our child up to our parent and exit.
assert!(libc::write(pipe_fds[1],
&grandchild_pid as *const pid_t as *const c_void,
mem::size_of::<pid_t>() as size_t) ==
mem::size_of::<pid_t>() as ssize_t);
pipe_write(pipe_fds[1], grandchild_pid);
libc::exit(0);
}
}
}

// Grandparent execution continues here. First, close the writing end of the pipe.
// Grandparent execution continues here.

// Reap child zombie.
waitpid(forked, ptr::null_mut(), 0);

// Close pipe writer end now so that when the child/grandchild close
// theirs, we'll get EOF on reading.
libc::close(pipe_fds[1]);

// Retrieve our grandchild's PID.
let mut grandchild_pid: pid_t = 0;
assert!(libc::read(pipe_fds[0],
&mut grandchild_pid as *mut i32 as *mut c_void,
mem::size_of::<pid_t>() as size_t) ==
mem::size_of::<pid_t>() as ssize_t);
let pipe_vals = pipe_read(pipe_fds[0]);
libc::close(pipe_fds[0]);
let pipe_vals = pipe_vals?;

// We could get a PID followed by an error from the grandchild.
let grandchild_pid = pipe_vals.iter().find(|v| **v >= 0);
if let Some(err) = pipe_vals.iter().find(|v| **v < 0) {
if let Some(pid) = grandchild_pid {
// Reap failed grandchild zombie.
waitpid(*pid, ptr::null_mut(), 0);
}
return Err(io::Error::from_raw_os_error(-*err));
}

Ok(Process {
pid: grandchild_pid,
pid: *grandchild_pid.expect("We should have something in the pipe"),
})
}
}
Expand Down Expand Up @@ -331,7 +398,6 @@ const _LINUX_CAPABILITY_U32S_3: u32 = 2;
const PR_SET_CHILD_SUBREAPER: c_int = 36;

extern {
fn abort() -> !;
fn capset(hdrp: cap_user_header_t, datap: const_cap_user_data_t) -> c_int;
fn chroot(path: *const c_char) -> c_int;
fn fork() -> pid_t;
Expand All @@ -342,6 +408,7 @@ extern {
mountflags: c_ulong,
data: *const c_void)
-> c_int;
fn waitpid(pid: pid_t, stat_loc: *mut c_int, options: c_int) -> pid_t;
fn unshare(flags: c_int) -> c_int;
}

107 changes: 107 additions & 0 deletions platform/unix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,112 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::io;
use std::os::unix::io::RawFd;

pub mod process;

use sandbox::Command;

pub trait CommandExt {
/// Schedules a closure to be run after forking but before any sandbox
/// controls are applied. This may not be the final process that will exec.
/// This lets you set up subprocess state that must be initialized before
/// dropping privileges, without disturbing the parent process.
///
/// The closure is allowed to return an I/O error whose OS error code will
/// be communicated back to the parent and returned as an error from when
/// the start was requested.
///
/// Multiple closures can be registered and they will be called in order of
/// their registration. If a closure returns `Err` then no further closures
/// will be called and the start operation will immediately return with a
/// failure.
/// TODO on Mac, errors are not yet propagated to start().
///
/// # Notes
///
/// This closure will be run in the context of the child process after a
/// `fork`. This primarily means that any modificatons made to memory on
/// behalf of this closure will **not** be visible to the parent process.
/// This is often a very constrained environment where normal operations
/// like `malloc` or acquiring a mutex are not guaranteed to work (due to
/// other threads perhaps still running when the `fork` was run).
///
/// Avoid closing any file descriptors in the passed-in list. These are
/// O_CLOEXEC so will automatically close when the command runs.
fn before_sandbox<F>(&mut self, f: F) -> &mut Command
where F: FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static;
/// Schedules a closure to be run after any pre-exec sandbox controls are
/// but before exec, in the process that will exec. On Linux, this closure
/// can call ChildSandbox::activate(), letting you sandbox a foreign
/// executable and then perform process setup steps that must be performed
/// after the sandbox is activated.
///
/// The closure is allowed to return an I/O error whose OS error code will
/// be communicated back to the parent and returned as an error from when
/// the start was requested.
///
/// Multiple closures can be registered and they will be called in order of
/// their registration. If a closure returns `Err` then no further closures
/// will be called and the start operation will immediately return with a
/// failure.
/// TODO on Mac, errors are not yet propagated to start().
///
/// # Notes
///
/// This closure will be run in the context of the child process after a
/// `fork`. This primarily means that any modificatons made to memory on
/// behalf of this closure will **not** be visible to the parent process.
/// This is often a very constrained environment where normal operations
/// like `malloc` or acquiring a mutex are not guaranteed to work (due to
/// other threads perhaps still running when the `fork` was run).
///
/// Avoid closing any file descriptors in the passed-in list. These are
/// O_CLOEXEC so will automatically close when the command runs.
fn before_exec<F>(&mut self, f: F) -> &mut Command
where F: FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static;
}

pub struct CommandInner {
before_sandbox_closures: Vec<Box<FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static>>,
before_exec_closures: Vec<Box<FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static>>,
}

impl CommandInner {
pub fn new() -> CommandInner {
CommandInner {
before_sandbox_closures: Vec::new(),
before_exec_closures: Vec::new(),
}
}

pub fn before_sandbox(&mut self, preserve_fds: &[RawFd]) -> io::Result<()> {
for c in self.before_sandbox_closures.iter_mut() {
c(preserve_fds)?;
}
self.before_sandbox_closures.clear();
Ok(())
}

pub fn before_exec(&mut self, preserve_fds: &[RawFd]) -> io::Result<()> {
for c in self.before_exec_closures.iter_mut() {
c(preserve_fds)?;
}
self.before_exec_closures.clear();
Ok(())
}
}

impl CommandExt for Command {
fn before_sandbox<F>(&mut self, f: F) -> &mut Command
where F: FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static {
self.inner.before_sandbox_closures.push(Box::new(f));
self
}
fn before_exec<F>(&mut self, f: F) -> &mut Command
where F: FnMut(&[RawFd]) -> io::Result<()> + Send + Sync + 'static {
self.inner.before_exec_closures.push(Box::new(f));
self
}
}
4 changes: 3 additions & 1 deletion platform/unix/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ pub fn exec(command: &Command) -> io::Error {
io::Error::last_os_error()
}

pub fn spawn(command: &Command) -> io::Result<Process> {
pub fn spawn(command: &mut Command) -> io::Result<Process> {
unsafe {
match fork() {
0 => {
drop(command.inner.before_sandbox(&[]));
drop(command.inner.before_exec(&[]));
drop(exec(command));
panic!()
}
Expand Down
8 changes: 5 additions & 3 deletions sandbox.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::env;
use std::ffi::{CString, OsStr};
use std::io;

pub use platform::{ChildSandbox, Sandbox};
pub use platform::{ChildSandbox, CommandInner, Sandbox};

/// All platform-specific sandboxes implement this trait.
///
Expand Down Expand Up @@ -60,6 +60,8 @@ pub struct Command {
pub args: Vec<CString>,
/// The environment of the process.
pub env: HashMap<CString,CString>,
/// Platform-specific inner data
pub(crate) inner: CommandInner,
}

impl Command {
Expand All @@ -71,6 +73,7 @@ impl Command {
module_path: cstring(module_path),
args: Vec::new(),
env: HashMap::new(),
inner: CommandInner::new(),
}
}

Expand Down Expand Up @@ -99,8 +102,7 @@ impl Command {
}

/// Executes the command as a child process, which is returned.
pub fn spawn(&self) -> io::Result<Process> {
pub fn spawn(&mut self) -> io::Result<Process> {
process::spawn(self)
}
}

Loading

0 comments on commit c856934

Please sign in to comment.