Skip to content

Commit

Permalink
apiserver: reap exited child processes
Browse files Browse the repository at this point in the history
We spawn some background processes, like thar-be-settings, when they might take
some time and we don't want to hold up API clients.  Unless we `wait` for those
children when they exit, their process ID lives on as a zombie.  This adds a
SIGCHLD handler to apiserver to call wait when any child exits.
  • Loading branch information
tjkirch committed Mar 12, 2021
1 parent 8eb75c8 commit 5c9eb7c
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
5 changes: 3 additions & 2 deletions sources/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sources/api/apiserver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ percent-encoding = "2.1"
semver = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
signal-hook = "0.3"
simplelog = "0.9"
snafu = "0.6"
thar-be-updates = { path = "../thar-be-updates" }
Expand Down
45 changes: 42 additions & 3 deletions sources/api/apiserver/src/bin/apiserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@
extern crate log;

use libc::gid_t;
use nix::unistd::Gid;
use nix::{
sys::wait::{waitpid, WaitPidFlag, WaitStatus},
unistd::{Gid, Pid},
};
use signal_hook::{consts::SIGCHLD, iterator::Signals};
use simplelog::{Config as LogConfig, LevelFilter, SimpleLogger};
use snafu::{ensure, ResultExt};
use std::env;
use std::path::Path;
use std::process;
use std::str::FromStr;
use std::thread;

use apiserver::serve;

Expand All @@ -32,6 +37,9 @@ mod error {
#[snafu(display("{}", source))]
Server { source: apiserver::server::Error },

#[snafu(display("Failed to set up signal handler: {}", source))]
Signal { source: std::io::Error },

#[snafu(display("Logger setup error: {}", source))]
Logger { source: log::SetLoggerError },
}
Expand Down Expand Up @@ -131,15 +139,46 @@ async fn run() -> Result<()> {
let args = parse_args(env::args());

// SimpleLogger will send errors to stderr and anything less to stdout.
SimpleLogger::init(args.log_level, LogConfig::default())
.context(error::Logger)?;
SimpleLogger::init(args.log_level, LogConfig::default()).context(error::Logger)?;

// Make sure the datastore exists
ensure!(
Path::new(&args.datastore_path).exists(),
error::NonexistentDatastore
);

// We need to handle some actions asynchronously without holding up API clients, like invoking
// thar-be-settings to apply changes to the system, so we fork them off and don't wait for
// their status or output at that point. Instead, when we receive a SIGCHLD signal telling us
// that child processes have exited, we wait for them in this thread so that they don't become
// zombie processes.
let mut signals = Signals::new(&[SIGCHLD]).context(error::Signal)?;
thread::spawn(move || {
for signal in signals.forever() {
match signal {
SIGCHLD => {
// We loop as long as we have children to reap.
loop {
if let Ok(status) = waitpid(Pid::from_raw(-1), Some(WaitPidFlag::WNOHANG)) {
// You can get a CHLD signal in other scenarios, even if a child is
// still running; we only want to continue if we're making progress
// with reaping exited children. Otherwise, stop and wait for another
// signal.
if !matches!(status, WaitStatus::Exited(..)) {
break;
}
} else {
// There's not much we can do to recover from an error in waiting, so
// we'll just try again next loop.
break;
}
}
}
_ => {} // this space for rent!
}
}
});

// Each request makes its own handle to the datastore; there's no locking or
// synchronization yet. Therefore, only use 1 thread for safety.
let threads = 1;
Expand Down
3 changes: 2 additions & 1 deletion sources/api/apiserver/src/server/controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,8 @@ where
.context(error::ConfigApplierStart)?;
}

// Leave config applier to run in the background; we can't wait for it
// Leave config applier to run in the background; we can't wait for it.
// The apiserver binary waits for any exited children so they don't become zombies.
Ok(())
}

Expand Down

0 comments on commit 5c9eb7c

Please sign in to comment.