diff --git a/src/agent/onefuzz-agent/src/config.rs b/src/agent/onefuzz-agent/src/config.rs index a9a8c650dd..281286c7bc 100644 --- a/src/agent/onefuzz-agent/src/config.rs +++ b/src/agent/onefuzz-agent/src/config.rs @@ -209,8 +209,8 @@ pub struct DynamicConfig { } impl DynamicConfig { - pub async fn save(&self) -> Result<()> { - let path = Self::save_path()?; + pub async fn save(&self, machine_id: Uuid) -> Result<()> { + let path = Self::save_path(machine_id)?; let dir = path .parent() .ok_or(anyhow!("invalid dynamic config path"))?; @@ -223,8 +223,8 @@ impl DynamicConfig { Ok(()) } - pub async fn load() -> Result { - let path = Self::save_path()?; + pub async fn load(machine_id: Uuid) -> Result { + let path = Self::save_path(machine_id)?; let data = fs::read(&path) .await .with_context(|| format!("unable to load dynamic config: {}", path.display()))?; @@ -233,10 +233,10 @@ impl DynamicConfig { Ok(ctx) } - fn save_path() -> Result { + fn save_path(machine_id: Uuid) -> Result { Ok(onefuzz::fs::onefuzz_root()? .join("etc") - .join("dynamic-config.json")) + .join(format!("dynamic-config-{}.json", machine_id))) } } @@ -294,7 +294,7 @@ impl Registration { match response.error_for_status_with_body().await { Ok(response) => { let dynamic_config: DynamicConfig = response.json().await?; - dynamic_config.save().await?; + dynamic_config.save(machine_id).await?; return Ok(Self { config, dynamic_config, @@ -317,8 +317,8 @@ impl Registration { } pub async fn load_existing(config: StaticConfig) -> Result { - let dynamic_config = DynamicConfig::load().await?; let machine_id = config.machine_identity.machine_id; + let dynamic_config = DynamicConfig::load(machine_id).await?; let registration = Self { config, dynamic_config, @@ -355,7 +355,7 @@ impl Registration { .context("Registration.renew request body")?; let dynamic_config: DynamicConfig = response.json().await?; - dynamic_config.save().await?; + dynamic_config.save(self.machine_id).await?; Ok(Self { dynamic_config, diff --git a/src/agent/onefuzz-agent/src/failure.rs b/src/agent/onefuzz-agent/src/failure.rs index 561554ac93..9e0c95a7fb 100644 --- a/src/agent/onefuzz-agent/src/failure.rs +++ b/src/agent/onefuzz-agent/src/failure.rs @@ -4,23 +4,22 @@ use std::{ fs, path::{Path, PathBuf}, }; +use uuid::Uuid; -const FAILURE_FILE: &str = "onefuzz-agent-failure.txt"; - -pub fn failure_path() -> Result { - Ok(onefuzz_root()?.join(FAILURE_FILE)) +pub fn failure_path(machine_id: Uuid) -> Result { + Ok(onefuzz_root()?.join(format!("onefuzz-agent-failure-{}.txt", machine_id))) } -pub fn save_failure(err: &Error) -> Result<()> { +pub fn save_failure(err: &Error, machine_id: Uuid) -> Result<()> { error!("saving failure: {:?}", err); - let path = failure_path()?; + let path = failure_path(machine_id)?; let message = format!("{:?}", err); fs::write(&path, message) .with_context(|| format!("unable to write failure log: {}", path.display())) } -pub fn read_failure() -> Result { - let path = failure_path()?; +pub fn read_failure(machine_id: Uuid) -> Result { + let path = failure_path(machine_id)?; read_file_lossy(&path) } diff --git a/src/agent/onefuzz-agent/src/main.rs b/src/agent/onefuzz-agent/src/main.rs index c2eda46675..300d779e55 100644 --- a/src/agent/onefuzz-agent/src/main.rs +++ b/src/agent/onefuzz-agent/src/main.rs @@ -143,6 +143,18 @@ fn redirect(opt: RunOpt) -> Result<()> { cmd.arg("--config").arg(path); } + if let Some(machine_id) = opt.machine_id { + cmd.arg("--machine_id").arg(machine_id.to_string()); + } + + if let Some(machine_name) = opt.machine_name { + cmd.arg("--machine_name").arg(machine_name); + } + + if opt.reset_node_lock { + cmd.arg("--reset_lock"); + } + let exit_status: ExitStatus = cmd .spawn() .context("unable to start child onefuzz-agent")? @@ -181,13 +193,14 @@ fn run(opt: RunOpt) -> Result<()> { } let config = config?; + let machine_id = config.machine_identity.machine_id; if reset_lock { - done::remove_done_lock(config.machine_identity.machine_id)?; - } else if done::is_agent_done(config.machine_identity.machine_id)? { + done::remove_done_lock(machine_id)?; + } else if done::is_agent_done(machine_id)? { debug!( "agent is done, remove lock ({}) to continue", - done::done_path(config.machine_identity.machine_id)?.display() + done::done_path(machine_id)?.display() ); return Ok(()); } @@ -196,7 +209,7 @@ fn run(opt: RunOpt) -> Result<()> { if let Err(err) = &result { error!("error running supervisor agent: {:?}", err); - if let Err(err) = failure::save_failure(err) { + if let Err(err) = failure::save_failure(err, machine_id) { error!("unable to save failure log: {:?}", err); } } @@ -234,7 +247,7 @@ async fn check_existing_worksets(coordinator: &mut coordinator::Coordinator) -> if let Some(work) = WorkSet::load_from_fs_context(coordinator.get_machine_id()).await? { warn!("onefuzz-agent unexpectedly identified an existing workset on start"); - let failure = match failure::read_failure() { + let failure = match failure::read_failure(coordinator.get_machine_id()) { Ok(value) => format!("onefuzz-agent failed: {}", value), Err(failure_err) => { warn!("unable to read failure: {:?}", failure_err); @@ -306,7 +319,7 @@ async fn run_agent(config: StaticConfig, reset_node: bool) -> Result<()> { let mut coordinator = coordinator::Coordinator::new(registration.clone()).await?; debug!("initialized coordinator"); - let reboot = reboot::Reboot; + let reboot = reboot::Reboot::new(config.machine_identity.machine_id); let reboot_context = reboot.load_context().await?; if reset_node { WorkSet::remove_context(config.machine_identity.machine_id).await?; diff --git a/src/agent/onefuzz-agent/src/panic.rs b/src/agent/onefuzz-agent/src/panic.rs index dc7c942253..eaceb65a64 100644 --- a/src/agent/onefuzz-agent/src/panic.rs +++ b/src/agent/onefuzz-agent/src/panic.rs @@ -4,7 +4,7 @@ use std::{panic, sync::Once}; fn panic_hook(info: &panic::PanicInfo) { let err = anyhow!("supervisor panicked: {}\n{:?}", info, Backtrace::new()); - if let Err(err) = save_failure(&err) { + if let Err(err) = save_failure(&err, uuid::Uuid::nil()) { error!("unable to write panic log: {:?}", err); } } diff --git a/src/agent/onefuzz-agent/src/reboot.rs b/src/agent/onefuzz-agent/src/reboot.rs index 7584cea858..9b9232a11d 100644 --- a/src/agent/onefuzz-agent/src/reboot.rs +++ b/src/agent/onefuzz-agent/src/reboot.rs @@ -7,6 +7,7 @@ use std::process::Command; use anyhow::{Context, Result}; use downcast_rs::Downcast; use tokio::fs; +use uuid::Uuid; use crate::work::*; @@ -36,11 +37,17 @@ impl IReboot for Reboot { } } -pub struct Reboot; +pub struct Reboot { + machine_id: Uuid, +} impl Reboot { + pub fn new(machine_id: Uuid) -> Self { + Self { machine_id } + } + pub async fn save_context(&self, ctx: RebootContext) -> Result<()> { - let path = reboot_context_path()?; + let path = reboot_context_path(self.machine_id)?; info!("saving reboot context to: {}", path.display()); @@ -56,7 +63,7 @@ impl Reboot { pub async fn load_context(&self) -> Result> { use std::io::ErrorKind; - let path = reboot_context_path()?; + let path = reboot_context_path(self.machine_id)?; info!("checking for saved reboot context: {}", path.display()); @@ -127,8 +134,8 @@ impl RebootContext { } } -fn reboot_context_path() -> Result { - Ok(onefuzz::fs::onefuzz_root()?.join("reboot_context.json")) +fn reboot_context_path(machine_id: Uuid) -> Result { + Ok(onefuzz::fs::onefuzz_root()?.join(format!("reboot_context_{}.json", machine_id))) } #[cfg(test)]