Skip to content

Commit

Permalink
feat: count unhealthy times
Browse files Browse the repository at this point in the history
  • Loading branch information
kristof-mattei committed Mar 14, 2024
1 parent e816578 commit c866ebc
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 39 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ coverage = []
[dependencies]
backtrace = "0.3.69"
color-eyre = "0.6.2"
hashbrown = "0.14.3"
hashbrown = { version = "0.14.3", features = ["serde"] }
hex = "0.4.3"
http = "1.1.0"
http-body-util = "0.1.0"
Expand Down
63 changes: 48 additions & 15 deletions src/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ use std::fmt;
use std::marker::PhantomData;
use std::str::FromStr;

use hashbrown::HashSet;
use serde::de::{MapAccess, SeqAccess, Visitor};
use serde::{Deserialize, Deserializer, Serialize};

fn deserialize_names<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
fn deserialize_names<'de, D>(deserializer: D) -> Result<HashSet<String>, D::Error>
where
D: Deserializer<'de>,
{
struct SeqVisitor();

impl<'de> Visitor<'de> for SeqVisitor {
type Value = Vec<String>;
type Value = HashSet<String>;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a nonempty sequence of items")
Expand All @@ -22,16 +23,18 @@ where
where
M: SeqAccess<'de>,
{
let mut buffer = access.size_hint().map_or_else(Vec::new, Vec::with_capacity);
let mut buffer = access
.size_hint()
.map_or_else(HashSet::new, HashSet::with_capacity);

while let Some(mut value) = access.next_element::<String>()? {
// Docker container name starts with a '/'. I don't know why. But it's useless.
if value.starts_with('/') {
let split = value.split_off(1);

buffer.push(split);
buffer.insert(split);
} else {
buffer.push(value);
buffer.insert(value);
}
}

Expand Down Expand Up @@ -87,15 +90,26 @@ pub struct Container {
pub id: String,
#[serde(deserialize_with = "deserialize_names")]
#[serde(rename(deserialize = "Names"), default)]
pub names: Vec<String>,
pub names: HashSet<String>,
pub state: String,
#[serde(deserialize_with = "deserialize_timeout")]
#[serde(rename(deserialize = "Labels"), default)]
pub timeout: Option<u32>,
}

impl Container {
pub fn get_name(&self) -> Option<String> {
self.names.iter().cloned().reduce(|mut p, n| {
p.push_str(&n);
p
})
}
}

#[cfg(test)]
mod tests {
use hashbrown::HashSet;

use crate::container::Container;

#[test]
Expand All @@ -110,13 +124,13 @@ mod tests {
&[
Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism".into()],
names: HashSet::from_iter(["photoprism".into()]),
state: "running".into(),
timeout: None,
},
Container {
id: "281ea0c72e2e4a41fd2f81df945da9dfbfbc7ea0fe5e59c3d2a8234552e367cf".into(),
names: vec!["whoogle-search".into()],
names: HashSet::from_iter(["whoogle-search".into()]),
state: "running".into(),
timeout: None,
}
Expand All @@ -136,7 +150,26 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism-1".into(), "photoprism-2".into()],
names: HashSet::from_iter(["photoprism-1".into(), "photoprism-2".into()]),
state: "running".into(),
timeout: None,
}][..],
deserialized.unwrap()
);
}

#[test]
fn test_deserialize_duplicate_names() {
let input = r#"[{"Id":"582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae","Names":["/photoprism","/photoprism"],"State":"running"}]"#;

let deserialized: Result<Vec<Container>, _> = serde_json::from_reader(input.as_bytes());

assert!(deserialized.is_ok());

assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: HashSet::from_iter(["photoprism".into()]),
state: "running".into(),
timeout: None,
}][..],
Expand All @@ -155,7 +188,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism".into()],
names: HashSet::from_iter(["photoprism".into()]),
state: "running".into(),
timeout: Some(12),
}][..],
Expand All @@ -174,7 +207,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism".into()],
names: HashSet::from_iter(["photoprism".into()]),
state: "running".into(),
timeout: None,
}][..],
Expand All @@ -193,7 +226,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism".into()],
names: HashSet::from_iter(["photoprism".into()]),
state: "running".into(),
timeout: None,
}][..],
Expand All @@ -212,7 +245,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec![],
names: HashSet::from_iter([]),
state: "running".into(),
timeout: None,
}][..],
Expand All @@ -231,7 +264,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec![],
names: HashSet::from_iter([]),
state: "running".into(),
timeout: None,
}][..],
Expand All @@ -250,7 +283,7 @@ mod tests {
assert_eq!(
&[Container {
id: "582036c7a5e8719bbbc9476e4216bfaf4fd318b61723f41f2e8fe3b60d8182ae".into(),
names: vec!["photoprism-1".into(), "photoprism-2".into()],
names: HashSet::from_iter(["photoprism-1".into(), "photoprism-2".into()]),
state: "running".into(),
timeout: None,
}][..],
Expand Down
29 changes: 18 additions & 11 deletions src/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,24 @@ impl Docker {
}
}

pub async fn check_container_health(&self, app_config: &AppConfig, container_info: Container) {
pub async fn check_container_health(
&self,
app_config: &AppConfig,
container_info: &Container,
times: usize,
) {
let container_short_id = &container_info.id[0..12];

match &container_info.names[..] {
[] => {
match container_info.get_name() {
None => {
event!(Level::ERROR, "Container name of {} is null, which implies container does not exist - don't restart.", container_short_id);
},
container_names => {
Some(container_names) => {
if container_info.state == "restarting" {
event!(
Level::INFO,
"Container {} ({}) found to be restarting - don't restart.",
container_names.join(", "),
container_names,
container_short_id
);
} else {
Expand All @@ -119,30 +124,32 @@ impl Docker {
.unwrap_or(app_config.autoheal_default_stop_timeout);

event!(Level::INFO,
"Container {} ({}) found to be unhealthy - Restarting container now with {}s timeout.",
container_names.join(", "),
container_short_id, timeout
"Container {} ({}) found to be unhealthy {} times. Restarting container now with {}s timeout.",
container_names,
container_short_id,
times,
timeout
);

match self.restart_container(container_short_id, timeout).await {
Ok(()) => {
notify_webhook_success(
app_config,
container_short_id,
&container_names.join(", "),
&container_names,
);
},
Err(e) => {
event!(Level::INFO,
error = ?e,
"Restarting container {} ({}) failed.",
container_names.join(", "),
container_names,
container_short_id
);

notify_webhook_failure(
app_config,
container_names.join(", "),
container_names,
container_short_id.to_owned(),
e,
);
Expand Down
2 changes: 1 addition & 1 deletion src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub fn url_encode(filter: &serde_json::Value) -> String {
#[cfg(test)]
mod tests {
use crate::encoding::url_encode;
use crate::filters::build;
use crate::unhealthy_filters::build;

#[test]
fn test_build_decode_all() {
Expand Down
55 changes: 45 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use app_config::AppConfig;
use docker::Docker;
use docker_config::DockerConfig;
use handlers::set_up_handlers;
use hashbrown::HashMap;
use tokio::time::sleep;
use tracing::metadata::LevelFilter;
use tracing::{event, Level};
Expand All @@ -17,10 +18,10 @@ mod docker;
mod docker_config;
mod encoding;
mod env;
mod filters;
mod handlers;
mod helpers;
mod http_client;
mod unhealthy_filters;
mod webhook;

fn main() -> Result<Infallible, color_eyre::Report> {
Expand Down Expand Up @@ -54,7 +55,7 @@ async fn healer() -> Result<Infallible, color_eyre::Report> {

let docker = Docker::new(
DockerConfig::build()?,
&filters::build(app_config.autoheal_container_label.as_deref()),
&unhealthy_filters::build(app_config.autoheal_container_label.as_deref()),
);

// TODO define failure mode
Expand All @@ -69,9 +70,16 @@ async fn healer() -> Result<Infallible, color_eyre::Report> {
sleep(Duration::from_secs(app_config.autoheal_start_period)).await;
}

let mut history_unhealthy = HashMap::<String, (Option<String>, usize)>::new();

loop {
match docker.get_containers().await {
Ok(containers) => {
let mut current_unhealthy: HashMap<String, Option<String>> = containers
.iter()
.map(|x| (x.id.to_string(), x.get_name()))
.collect::<HashMap<_, _>>();

for container in containers {
if container
.names
Expand All @@ -81,22 +89,49 @@ async fn healer() -> Result<Infallible, color_eyre::Report> {
event!(
Level::INFO,
"Container {} ({}) is unhealthy, but it is excluded",
container.names.join(", "),
container
.get_name()
.as_deref()
.unwrap_or("<UNNAMED CONTAINER>"),
&container.id[0..12],
);

continue;
}

docker.check_container_health(&app_config, container).await;
docker
.check_container_health(
&app_config,
&container,
history_unhealthy
.get(&container.id)
.map_or(1, |(_, t)| *t + 1),
)
.await;
}

history_unhealthy = history_unhealthy
.into_iter()
.filter_map(|(key, (names, times))| {
if let Some(new_name) = current_unhealthy.remove(&key) {
// still unhealthy
// take the new name
Some((key, (new_name, times + 1)))
} else {
// healthy
event!(
Level::INFO,
"Container {} ({}) returned to healthy state.",
names.as_deref().unwrap_or("<UNNAMED CONTAINER>"),
key
);
None
}
})
.collect();
},
Err(e) => {
return Err(wrap_and_report!(
Level::ERROR,
e,
"Failed to fetch container info"
));
Err(err) => {
event!(Level::ERROR, ?err, "Failed to fetch container info");
},
}

Expand Down
2 changes: 1 addition & 1 deletion src/filters.rs → src/unhealthy_filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub fn build(autoheal_container_label_filter: Option<&str>) -> serde_json::Value
mod tests {
use serde_json::json;

use crate::filters::build;
use crate::unhealthy_filters::build;

#[test]
fn test_build_filters_all() {
Expand Down

0 comments on commit c866ebc

Please sign in to comment.