Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/agent/onefuzz-agent/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ pub struct DynamicConfig {
}

impl DynamicConfig {
pub async fn save(&self) -> Result<()> {
let path = Self::save_path()?;
pub async fn save(&self, machine_id: Uuid) -> Result<()> {
let path = Self::save_path(machine_id)?;
let dir = path
.parent()
.ok_or(anyhow!("invalid dynamic config path"))?;
Expand All @@ -223,8 +223,8 @@ impl DynamicConfig {
Ok(())
}

pub async fn load() -> Result<Self> {
let path = Self::save_path()?;
pub async fn load(machine_id: Uuid) -> Result<Self> {
let path = Self::save_path(machine_id)?;
let data = fs::read(&path)
.await
.with_context(|| format!("unable to load dynamic config: {}", path.display()))?;
Expand All @@ -233,10 +233,10 @@ impl DynamicConfig {
Ok(ctx)
}

fn save_path() -> Result<PathBuf> {
fn save_path(machine_id: Uuid) -> Result<PathBuf> {
Ok(onefuzz::fs::onefuzz_root()?
.join("etc")
.join("dynamic-config.json"))
.join(format!("dynamic-config-{}.json", machine_id)))
}
}

Expand Down Expand Up @@ -294,7 +294,7 @@ impl Registration {
match response.error_for_status_with_body().await {
Ok(response) => {
let dynamic_config: DynamicConfig = response.json().await?;
dynamic_config.save().await?;
dynamic_config.save(machine_id).await?;
return Ok(Self {
config,
dynamic_config,
Expand All @@ -317,8 +317,8 @@ impl Registration {
}

pub async fn load_existing(config: StaticConfig) -> Result<Self> {
let dynamic_config = DynamicConfig::load().await?;
let machine_id = config.machine_identity.machine_id;
let dynamic_config = DynamicConfig::load(machine_id).await?;
let registration = Self {
config,
dynamic_config,
Expand Down Expand Up @@ -355,7 +355,7 @@ impl Registration {
.context("Registration.renew request body")?;

let dynamic_config: DynamicConfig = response.json().await?;
dynamic_config.save().await?;
dynamic_config.save(self.machine_id).await?;

Ok(Self {
dynamic_config,
Expand Down
15 changes: 7 additions & 8 deletions src/agent/onefuzz-agent/src/failure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@ use std::{
fs,
path::{Path, PathBuf},
};
use uuid::Uuid;

const FAILURE_FILE: &str = "onefuzz-agent-failure.txt";

pub fn failure_path() -> Result<PathBuf> {
Ok(onefuzz_root()?.join(FAILURE_FILE))
pub fn failure_path(machine_id: Uuid) -> Result<PathBuf> {
Ok(onefuzz_root()?.join(format!("onefuzz-agent-failure-{}.txt", machine_id)))
}

pub fn save_failure(err: &Error) -> Result<()> {
pub fn save_failure(err: &Error, machine_id: Uuid) -> Result<()> {
error!("saving failure: {:?}", err);
let path = failure_path()?;
let path = failure_path(machine_id)?;
let message = format!("{:?}", err);
fs::write(&path, message)
.with_context(|| format!("unable to write failure log: {}", path.display()))
}

pub fn read_failure() -> Result<String> {
let path = failure_path()?;
pub fn read_failure(machine_id: Uuid) -> Result<String> {
let path = failure_path(machine_id)?;
read_file_lossy(&path)
}

Expand Down
25 changes: 19 additions & 6 deletions src/agent/onefuzz-agent/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ fn redirect(opt: RunOpt) -> Result<()> {
cmd.arg("--config").arg(path);
}

if let Some(machine_id) = opt.machine_id {
cmd.arg("--machine_id").arg(machine_id.to_string());
}

if let Some(machine_name) = opt.machine_name {
cmd.arg("--machine_name").arg(machine_name);
}

if opt.reset_node_lock {
cmd.arg("--reset_lock");
}

let exit_status: ExitStatus = cmd
.spawn()
.context("unable to start child onefuzz-agent")?
Expand Down Expand Up @@ -181,13 +193,14 @@ fn run(opt: RunOpt) -> Result<()> {
}

let config = config?;
let machine_id = config.machine_identity.machine_id;

if reset_lock {
done::remove_done_lock(config.machine_identity.machine_id)?;
} else if done::is_agent_done(config.machine_identity.machine_id)? {
done::remove_done_lock(machine_id)?;
} else if done::is_agent_done(machine_id)? {
debug!(
"agent is done, remove lock ({}) to continue",
done::done_path(config.machine_identity.machine_id)?.display()
done::done_path(machine_id)?.display()
);
return Ok(());
}
Expand All @@ -196,7 +209,7 @@ fn run(opt: RunOpt) -> Result<()> {

if let Err(err) = &result {
error!("error running supervisor agent: {:?}", err);
if let Err(err) = failure::save_failure(err) {
if let Err(err) = failure::save_failure(err, machine_id) {
error!("unable to save failure log: {:?}", err);
}
}
Expand Down Expand Up @@ -234,7 +247,7 @@ async fn check_existing_worksets(coordinator: &mut coordinator::Coordinator) ->

if let Some(work) = WorkSet::load_from_fs_context(coordinator.get_machine_id()).await? {
warn!("onefuzz-agent unexpectedly identified an existing workset on start");
let failure = match failure::read_failure() {
let failure = match failure::read_failure(coordinator.get_machine_id()) {
Ok(value) => format!("onefuzz-agent failed: {}", value),
Err(failure_err) => {
warn!("unable to read failure: {:?}", failure_err);
Expand Down Expand Up @@ -306,7 +319,7 @@ async fn run_agent(config: StaticConfig, reset_node: bool) -> Result<()> {
let mut coordinator = coordinator::Coordinator::new(registration.clone()).await?;
debug!("initialized coordinator");

let reboot = reboot::Reboot;
let reboot = reboot::Reboot::new(config.machine_identity.machine_id);
let reboot_context = reboot.load_context().await?;
if reset_node {
WorkSet::remove_context(config.machine_identity.machine_id).await?;
Expand Down
2 changes: 1 addition & 1 deletion src/agent/onefuzz-agent/src/panic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{panic, sync::Once};

fn panic_hook(info: &panic::PanicInfo) {
let err = anyhow!("supervisor panicked: {}\n{:?}", info, Backtrace::new());
if let Err(err) = save_failure(&err) {
if let Err(err) = save_failure(&err, uuid::Uuid::nil()) {
error!("unable to write panic log: {:?}", err);
}
}
Expand Down
17 changes: 12 additions & 5 deletions src/agent/onefuzz-agent/src/reboot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::process::Command;
use anyhow::{Context, Result};
use downcast_rs::Downcast;
use tokio::fs;
use uuid::Uuid;

use crate::work::*;

Expand Down Expand Up @@ -36,11 +37,17 @@ impl IReboot for Reboot {
}
}

pub struct Reboot;
pub struct Reboot {
machine_id: Uuid,
}

impl Reboot {
pub fn new(machine_id: Uuid) -> Self {
Self { machine_id }
}

pub async fn save_context(&self, ctx: RebootContext) -> Result<()> {
let path = reboot_context_path()?;
let path = reboot_context_path(self.machine_id)?;

info!("saving reboot context to: {}", path.display());

Expand All @@ -56,7 +63,7 @@ impl Reboot {

pub async fn load_context(&self) -> Result<Option<RebootContext>> {
use std::io::ErrorKind;
let path = reboot_context_path()?;
let path = reboot_context_path(self.machine_id)?;

info!("checking for saved reboot context: {}", path.display());

Expand Down Expand Up @@ -127,8 +134,8 @@ impl RebootContext {
}
}

fn reboot_context_path() -> Result<PathBuf> {
Ok(onefuzz::fs::onefuzz_root()?.join("reboot_context.json"))
fn reboot_context_path(machine_id: Uuid) -> Result<PathBuf> {
Ok(onefuzz::fs::onefuzz_root()?.join(format!("reboot_context_{}.json", machine_id)))
}

#[cfg(test)]
Expand Down