From 63bbf5a229abbaf084a4d7ee628f1bb76f1ee78b Mon Sep 17 00:00:00 2001 From: Ricardo Pchevuzinske Katz Date: Tue, 6 Apr 2021 16:18:58 -0300 Subject: [PATCH 1/2] Add initial terraform manifests for monitoring --- infra/gcp/monitoring/k8s.io/00-provider.tf | 23 ++++++ .../monitoring/k8s.io/10-thockin-k8s-io.tf | 75 +++++++++++++++++++ infra/gcp/monitoring/k8s.io/versions.tf | 8 ++ 3 files changed, 106 insertions(+) create mode 100644 infra/gcp/monitoring/k8s.io/00-provider.tf create mode 100644 infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf create mode 100644 infra/gcp/monitoring/k8s.io/versions.tf diff --git a/infra/gcp/monitoring/k8s.io/00-provider.tf b/infra/gcp/monitoring/k8s.io/00-provider.tf new file mode 100644 index 00000000000..e6dff39ba95 --- /dev/null +++ b/infra/gcp/monitoring/k8s.io/00-provider.tf @@ -0,0 +1,23 @@ + +/* +This file defines: +- Required provider versions +- Storage backend details +*/ + +terraform { + + backend "gcs" { + bucket = "k8s-infra-clusters-terraform" + prefix = "monitoring/state" // $project_name/$cluster_name + } + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 3.63.0" + } + } +} + + diff --git a/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf b/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf new file mode 100644 index 00000000000..98edf8edbc3 --- /dev/null +++ b/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf @@ -0,0 +1,75 @@ +locals { + project_id = "kubernetes-public" + monitored_domain = "thockin-test1.k8s.io" +} + +// Needed to import the notification channel +provider "google" { + project = local.project_id +} + +// Manual step: Create a StackDriver alert channel pointing to a channel in Slack +// It will select the channel here by its display name +data "google_monitoring_notification_channel" "alertchannel" { + display_name = "Kubernetes.io Cert Alert" +} + + +// We can turn this into a module and then add then standardize the resource display names +resource "google_monitoring_uptime_check_config" "uptime_check" { + display_name = "${local.monitored_domain} https" + timeout = "5s" + period = "300s" + + http_check { + path = "/" + port = "443" + use_ssl = true + validate_ssl = true + } + + monitored_resource { + type = "uptime_url" + labels = { + project_id = local.project_id + // Host to be verified (1) + host = local.monitored_domain + } + } +} + + +resource "google_monitoring_alert_policy" "cert_expiration_alert" { + combiner = "OR" + display_name = "${local.monitored_domain} certificate monitor" + enabled = true + notification_channels = [data.google_monitoring_notification_channel.alertchannel.name] + project = local.project_id + + conditions { + display_name = "${local.monitored_domain} expiration days is below the defined threshold" + + condition_threshold { + comparison = "COMPARISON_LT" + + // = 5 minutes failing! may be increased or reduced + duration = "300s" + + // resource.label.host should be changed accordingly with the uptime check created before (1) + filter = "metric.type=\"monitoring.googleapis.com/uptime_check/time_until_ssl_cert_expires\" resource.type=\"uptime_url\" resource.label.\"host\"=\"${local.monitored_domain}\" metric.label.\"checker_location\"=\"usa-iowa\"" + + // Number in days until the cert expires that should trigger an alert + threshold_value = 15 + + aggregations { + alignment_period = "300s" + per_series_aligner = "ALIGN_MEAN" + } + + trigger { + count = 1 + } + } + } +} + diff --git a/infra/gcp/monitoring/k8s.io/versions.tf b/infra/gcp/monitoring/k8s.io/versions.tf new file mode 100644 index 00000000000..07b1984ba39 --- /dev/null +++ b/infra/gcp/monitoring/k8s.io/versions.tf @@ -0,0 +1,8 @@ +/* +This file defines: +- Required Terraform version +*/ + +terraform { + required_version = "~> 0.14.0" +} From d910d904ff4702eab31fa5f0c4f81be763982b20 Mon Sep 17 00:00:00 2001 From: Ricardo Pchevuzinske Katz Date: Wed, 7 Apr 2021 16:02:41 -0300 Subject: [PATCH 2/2] Change terraform version and channel searcher --- infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf | 6 ++++-- infra/gcp/monitoring/k8s.io/versions.tf | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf b/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf index 98edf8edbc3..2feb4578392 100644 --- a/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf +++ b/infra/gcp/monitoring/k8s.io/10-thockin-k8s-io.tf @@ -11,10 +11,12 @@ provider "google" { // Manual step: Create a StackDriver alert channel pointing to a channel in Slack // It will select the channel here by its display name data "google_monitoring_notification_channel" "alertchannel" { - display_name = "Kubernetes.io Cert Alert" + type = "slack" + labels = { + "channel_name" = "#k8s-infra-alerts" + } } - // We can turn this into a module and then add then standardize the resource display names resource "google_monitoring_uptime_check_config" "uptime_check" { display_name = "${local.monitored_domain} https" diff --git a/infra/gcp/monitoring/k8s.io/versions.tf b/infra/gcp/monitoring/k8s.io/versions.tf index 07b1984ba39..969a33f8dff 100644 --- a/infra/gcp/monitoring/k8s.io/versions.tf +++ b/infra/gcp/monitoring/k8s.io/versions.tf @@ -4,5 +4,5 @@ This file defines: */ terraform { - required_version = "~> 0.14.0" + required_version = ">= 0.13.0" }