Skip to content

Commit

Permalink
fix(rebuild): set final rebuild stats
Browse files Browse the repository at this point in the history
When rebuild completes, right before the nexus completes the rebuild callback it's possible
that we get a stats request and in such case we have no stats to give.
To address this, then the rebuild backend completes it now sets the final rebuild stats in
the states, which will allow the stats to be retrieved.

Signed-off-by: Tiago Castro <[email protected]>
  • Loading branch information
tiagolobocastro committed Mar 20, 2023
1 parent 4063f73 commit a14c7e6
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,5 @@ ansible-hosts
/test/python/venv/*
/package.json
/rust-toolchain.toml
/test/python/registration_pb2.py
/test/python/registration_pb2_grpc.py
22 changes: 20 additions & 2 deletions io-engine/src/rebuild/rebuild_job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,18 @@ impl RebuildJob {
pub async fn stats(&self) -> RebuildStats {
let (s, r) = oneshot::channel::<RebuildStats>();
self.comms.send(RebuildJobRequest::Stats(s)).await.ok();
r.await.unwrap_or_default()
match r.await {
Ok(stats) => stats,
Err(_) => match self.final_stats() {
Some(stats) => stats,
_ => {
tracing::error!(
rebuild.target = self.dst_uri,
"Rebuild backend terminated without setting final rebuild stats");
RebuildStats::default()
}
},
}
}

/// Get the last error.
Expand Down Expand Up @@ -226,6 +237,11 @@ impl RebuildJob {
self.start_time
}

/// Get the final rebuild statistics.
fn final_stats(&self) -> Option<RebuildStats> {
self.states.read().final_stats().clone()
}

/// Get the rebuild job instances container, we ensure that this can only
/// ever be called on a properly allocated thread
fn get_instances<'a>() -> parking_lot::MutexGuard<'a, RebuildJobInstances> {
Expand Down Expand Up @@ -268,11 +284,13 @@ impl RebuildJob {

fn wake_up(&self) {
let sender = self.comms.send_clone();
let dst_uri = self.dst_uri.clone();
Reactors::master().send_future(async move {
if let Err(error) = sender.send(RebuildJobRequest::WakeUp).await {
error!(
?error,
"Failed to wake up rebuild backend, it has been dropped"
rebuild.target = dst_uri,
"Failed to wake up rebuild backend, it has been dropped",
);
}
});
Expand Down
6 changes: 5 additions & 1 deletion io-engine/src/rebuild/rebuild_job_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,11 @@ impl RebuildJobBackend {

impl Drop for RebuildJobBackend {
fn drop(&mut self) {
tracing::warn!(
let stats = self.stats();
self.states.write().set_final_stats(stats);

tracing::info!(
rebuild.target = self.dst_uri,
"RebuildJobBackend being dropped with done({})",
self.state().done()
);
Expand Down
11 changes: 11 additions & 0 deletions io-engine/src/rebuild/rebuild_state.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::{RebuildError, RebuildOperation};
use crate::rebuild::RebuildStats;

/// Allowed states for a rebuild job.
#[derive(Debug, PartialEq, Copy, Clone)]
Expand Down Expand Up @@ -55,6 +56,7 @@ pub(super) struct RebuildStates {
pub(super) pending: Option<RebuildState>,
/// Last rebuild error, if any.
pub(super) error: Option<RebuildError>,
final_stats: Option<RebuildStats>,
}

impl std::fmt::Display for RebuildStates {
Expand All @@ -70,6 +72,15 @@ impl Default for RebuildState {
}

impl RebuildStates {
/// Get the final rebuild statistics.
pub(super) fn final_stats(&self) -> &Option<RebuildStats> {
&self.final_stats
}
/// Set the final rebuild statistics.
pub(super) fn set_final_stats(&mut self, stats: RebuildStats) {
self.final_stats = Some(stats);
}

/// Set's the next pending state
/// if one is already set then override only if flag is set
pub(super) fn set_pending(
Expand Down

0 comments on commit a14c7e6

Please sign in to comment.