diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a762f30553..95d857b6c33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,11 @@ - [#4179](https://github.com/firecracker-microvm/firecracker/pull/4179): Fixed a bug reporting a non-zero exit code on successful shutdown when starting Firecracker with `--no-api`. +- [#4271](https://github.com/firecracker-microvm/firecracker/pull/4271): Fixed + a bug where Firecracker would log "RunWithApiError error: MicroVMStopped + without an error: GenericError" when exiting after encountering an emulation + error. It now correctly prints "RunWithApiError error: MicroVMStopped *with* an + error: GenericError". - [#4270](https://github.com/firecracker-microvm/firecracker/pull/4270): Fixed a bug introduced in #4047 that limited the `--level` option of logger to Pascal-cased values (e.g. accepting "Info", but not "info"). It now diff --git a/src/firecracker/src/api_server_adapter.rs b/src/firecracker/src/api_server_adapter.rs index b2cad171f3c..f529bb60038 100644 --- a/src/firecracker/src/api_server_adapter.rs +++ b/src/firecracker/src/api_server_adapter.rs @@ -15,16 +15,17 @@ use utils::eventfd::EventFd; use vmm::logger::{error, warn, ProcessTimeReporter}; use vmm::resources::VmResources; use vmm::rpc_interface::{ - ApiRequest, ApiResponse, PrebootApiController, RuntimeApiController, VmmAction, + ApiRequest, ApiResponse, BuildMicrovmFromRequestsError, PrebootApiController, + RuntimeApiController, VmmAction, }; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::{EventManager, FcExitCode, Vmm}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ApiServerError { - /// MicroVMStopped without an error: {0:?} - MicroVMStoppedWithoutError(FcExitCode), - /// MicroVMStopped with an error: {0:?} + /// Failed to build MicroVM: {0}. + BuildMicroVmError(BuildMicrovmFromRequestsError), + /// MicroVM stopped with an error: {0:?} MicroVMStoppedWithError(FcExitCode), /// Failed to open the API socket at: {0}. Check that it is not already used. FailedToBindSocket(String), @@ -52,7 +53,7 @@ impl ApiServerAdapter { vm_resources: VmResources, vmm: Arc>, event_manager: &mut EventManager, - ) -> Result<(), FcExitCode> { + ) -> Result<(), ApiServerError> { let api_adapter = Arc::new(Mutex::new(Self { api_event_fd, from_api, @@ -67,7 +68,7 @@ impl ApiServerAdapter { match vmm.lock().unwrap().shutdown_exit_code() { Some(FcExitCode::Ok) => break, - Some(exit_code) => return Err(exit_code), + Some(exit_code) => return Err(ApiServerError::MicroVMStoppedWithError(exit_code)), None => continue, } } @@ -224,10 +225,10 @@ pub(crate) fn run_with_api( mmds_size_limit, metadata_json, ) - .map_err(ApiServerError::MicroVMStoppedWithError), + .map_err(ApiServerError::BuildMicroVmError), }; - let result = build_result.map(|(vm_resources, vmm)| { + let result = build_result.and_then(|(vm_resources, vmm)| { firecracker_metrics .lock() .expect("Poisoned lock") @@ -248,9 +249,5 @@ pub(crate) fn run_with_api( // shutdown-internal and returns from its function. api_thread.join().expect("Api thread should join"); - match result { - Ok(Ok(())) => Ok(()), - Ok(Err(exit_code)) => Err(ApiServerError::MicroVMStoppedWithoutError(exit_code)), - Err(exit_error) => Err(exit_error), - } + result } diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 1aef9ce77c7..e26e7ef8eee 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -81,7 +81,6 @@ impl From for ExitCode { let exit_code = match value { MainError::ParseArguments(_) => FcExitCode::ArgParsing, MainError::InvalidLogLevel(_) => FcExitCode::BadConfiguration, - MainError::RunWithApi(ApiServerError::MicroVMStoppedWithoutError(code)) => code, MainError::RunWithApi(ApiServerError::MicroVMStoppedWithError(code)) => code, MainError::RunWithoutApiError(RunWithoutApiError::Shutdown(code)) => code, _ => FcExitCode::GenericError, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 3aec20fa36e..e67b31823a7 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -110,7 +110,7 @@ pub mod vstate; use std::collections::HashMap; use std::io; use std::os::unix::io::AsRawFd; -use std::sync::mpsc::{RecvTimeoutError, TryRecvError}; +use std::sync::mpsc::RecvTimeoutError; use std::sync::{Arc, Barrier, Mutex}; use std::time::Duration; @@ -903,23 +903,27 @@ impl MutEventSubscriber for Vmm { // Exit event handling should never do anything more than call 'self.stop()'. let _ = self.vcpus_exit_evt.read(); - let mut exit_code = None; - // Query each vcpu for their exit_code. - for handle in &self.vcpus_handles { - match handle.response_receiver().try_recv() { - Ok(VcpuResponse::Exited(status)) => { - exit_code = Some(status); - // Just use the first encountered exit-code. - break; - } - Ok(_response) => {} // Don't care about these, we are exiting. - Err(TryRecvError::Empty) => {} // Nothing pending in channel - Err(err) => { - panic!("Error while looking for VCPU exit status: {}", err); + let exit_code = 'exit_code: { + // Query each vcpu for their exit_code. + for handle in &self.vcpus_handles { + // Drain all vcpu responses that are pending from this vcpu until we find an + // exit status. + for response in handle.response_receiver().try_iter() { + if let VcpuResponse::Exited(status) = response { + // It could be that some vcpus exited successfully while others + // errored out. Thus make sure that error exits from one vcpu always + // takes precedence over "ok" exits + if status != FcExitCode::Ok { + break 'exit_code status; + } + } } } - } - self.stop(exit_code.unwrap_or(FcExitCode::Ok)); + + // No CPUs exited with error status code, report "Ok" + FcExitCode::Ok + }; + self.stop(exit_code); } else { error!("Spurious EventManager event for handler: Vmm"); } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index d7ee989d4ff..9b52bfe6fd9 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -20,7 +20,7 @@ use super::{ }; use crate::builder::StartMicrovmError; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; -use crate::logger::{error, info, warn, LoggerConfig, *}; +use crate::logger::{info, warn, LoggerConfig, *}; use crate::mmds::data_store::{self, Mmds}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; use crate::resources::VmmConfig; @@ -42,7 +42,7 @@ use crate::vmm_config::net::{ use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; use crate::vmm_config::{self, RateLimiterUpdate}; -use crate::{EventManager, FcExitCode}; +use crate::EventManager; /// This enum represents the public interface of the VMM. Each action contains various /// bits of information (ids, paths, etc.). @@ -247,7 +247,7 @@ pub struct PrebootApiController<'a> { boot_path: bool, // Some PrebootApiRequest errors are irrecoverable and Firecracker // should cleanly teardown if they occur. - fatal_error: Option, + fatal_error: Option, } // TODO Remove when `EventManager` implements `std::fmt::Debug`. @@ -287,6 +287,17 @@ pub type ApiRequest = Box; /// Shorthand type for a response containing a boxed Result. pub type ApiResponse = Box>; +/// Error type for `PrebootApiController::build_microvm_from_requests`. +#[derive(Debug, thiserror::Error, displaydoc::Display, derive_more::From)] +pub enum BuildMicrovmFromRequestsError { + /// Populating MMDS from file failed: {0:?}. + Mmds(data_store::Error), + /// Loading snapshot failed. + Restore, + /// Resuming MicroVM after loading snapshot failed. + Resume, +} + impl<'a> PrebootApiController<'a> { /// Constructor for the PrebootApiController. pub fn new( @@ -320,7 +331,7 @@ impl<'a> PrebootApiController<'a> { boot_timer_enabled: bool, mmds_size_limit: usize, metadata_json: Option<&str>, - ) -> Result<(VmResources, Arc>), FcExitCode> { + ) -> Result<(VmResources, Arc>), BuildMicrovmFromRequestsError> { let mut vm_resources = VmResources::default(); // Silence false clippy warning. Clippy suggests using // VmResources { boot_timer: boot_timer_enabled, ..Default::default() }; but this will @@ -333,16 +344,9 @@ impl<'a> PrebootApiController<'a> { // Init the data store from file, if present. if let Some(data) = metadata_json { - vm_resources - .locked_mmds_or_default() - .put_data( - serde_json::from_str(data) - .expect("MMDS error: metadata provided not valid json"), - ) - .map_err(|err| { - error!("Populating MMDS from file failed: {:?}", err); - crate::FcExitCode::GenericError - })?; + vm_resources.locked_mmds_or_default().put_data( + serde_json::from_str(data).expect("MMDS error: metadata provided not valid json"), + )?; info!("Successfully added metadata to mmds from file"); } @@ -376,8 +380,8 @@ impl<'a> PrebootApiController<'a> { to_api.send(Box::new(res)).expect("one-shot channel closed"); // If any fatal errors were encountered, break the loop. - if let Some(exit_code) = preboot_controller.fatal_error { - return Err(exit_code); + if let Some(preboot_error) = preboot_controller.fatal_error { + return Err(preboot_error); } } @@ -577,7 +581,7 @@ impl<'a> PrebootApiController<'a> { ) .map_err(|err| { // If restore fails, we consider the process is too dirty to recover. - self.fatal_error = Some(FcExitCode::BadConfiguration); + self.fatal_error = Some(BuildMicrovmFromRequestsError::Restore); err })?; // Resume VM @@ -587,7 +591,7 @@ impl<'a> PrebootApiController<'a> { .resume_vm() .map_err(|err| { // If resume fails, we consider the process is too dirty to recover. - self.fatal_error = Some(FcExitCode::BadConfiguration); + self.fatal_error = Some(BuildMicrovmFromRequestsError::Resume); err })?; }