From 16f305b9f4ef20bd198af73b4dc8a15fec13ef88 Mon Sep 17 00:00:00 2001 From: ibizaman Date: Thu, 26 Dec 2024 01:41:08 +0100 Subject: [PATCH] add alert when certs are close to expiring --- CHANGELOG.md | 5 + flake.nix | 6 +- modules/blocks/monitoring/dashboards/SSL.json | 143 ++++++++++++ modules/blocks/monitoring/rules.json | 129 ++++++++++- modules/blocks/ssl.nix | 46 +++- patches/prometheusnodecertexporter.nix | 219 ++++++++++++++++++ test/modules/nginx.nix | 2 +- 7 files changed, 541 insertions(+), 9 deletions(-) create mode 100644 modules/blocks/monitoring/dashboards/SSL.json create mode 100644 patches/prometheusnodecertexporter.nix diff --git a/CHANGELOG.md b/CHANGELOG.md index 52cfaae2..24cdcc46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,11 @@ Template: # Upcoming Release +## New Features + +- Add dashboard for SSL certificates validity + and alert they did not renew on time. + # v0.2.7 ## New Features diff --git a/flake.nix b/flake.nix index 2d98e587..bdfb8897 100644 --- a/flake.nix +++ b/flake.nix @@ -14,12 +14,16 @@ outputs = { nixpkgs, nix-flake-tests, flake-utils, nmdsrc, ... }: flake-utils.lib.eachDefaultSystem (system: let originPkgs = nixpkgs.legacyPackages.${system}; - patches = [ + patches = originPkgs.lib.optionals (system == "x86_64-linux") [ # Leaving commented out for an example. # (originPkgs.fetchpatch { # url = "https://github.com/NixOS/nixpkgs/pull/317107.patch"; # hash = "sha256-hoLrqV7XtR1hP/m0rV9hjYUBtrSjay0qcPUYlKKuVWk="; # }) + + # Remove when this PR is merged: + # https://github.com/NixOS/nixpkgs/pull/368325 + ./patches/prometheusnodecertexporter.nix ]; patchedNixpkgs = originPkgs.applyPatches { name = "nixpkgs-patched"; diff --git a/modules/blocks/monitoring/dashboards/SSL.json b/modules/blocks/monitoring/dashboards/SSL.json new file mode 100644 index 00000000..9c82733a --- /dev/null +++ b/modules/blocks/monitoring/dashboards/SSL.json @@ -0,0 +1,143 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 16, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "transparent", + "value": 604808 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Last *", + "sortDesc": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "editorMode": "code", + "expr": "ssl_certificate_expiry_seconds", + "legendFormat": "{{exported_hostname}}: {{subject}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Certificate Remaining Validity", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "SSL Certificates", + "uid": "ae818js0bvw8wb", + "version": 8, + "weekStart": "" +} diff --git a/modules/blocks/monitoring/rules.json b/modules/blocks/monitoring/rules.json index 192fbc60..f4482b8e 100644 --- a/modules/blocks/monitoring/rules.json +++ b/modules/blocks/monitoring/rules.json @@ -123,7 +123,134 @@ "summary": "The error budget for a service for the last 1 hour is under 99%" }, "labels": { - "": "", + "role": "sysadmin" + }, + "isPaused": false + }, + { + "uid": "ee817l3a88s1sd", + "title": "Certificate Did Not Renew", + "condition": "C", + "data": [ + { + "refId": "A", + "relativeTimeRange": { + "from": 1800, + "to": 0 + }, + "datasourceUid": "df80f9f5-97d7-4112-91d8-72f523a02b09", + "model": { + "adhocFilters": [], + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "editorMode": "code", + "expr": "ssl_certificate_expiry_seconds", + "interval": "", + "intervalMs": 15000, + "legendFormat": "{{exported_hostname}}: {{subject}} {{path}}", + "maxDataPoints": 43200, + "range": true, + "refId": "A" + } + }, + { + "refId": "B", + "relativeTimeRange": { + "from": 0, + "to": 0 + }, + "datasourceUid": "__expr__", + "model": { + "conditions": [ + { + "evaluator": { + "params": [], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "B" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "A", + "intervalMs": 1000, + "maxDataPoints": 43200, + "reducer": "last", + "refId": "B", + "type": "reduce" + } + }, + { + "refId": "C", + "relativeTimeRange": { + "from": 0, + "to": 0 + }, + "datasourceUid": "__expr__", + "model": { + "conditions": [ + { + "evaluator": { + "params": [ + 604800 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "B", + "intervalMs": 1000, + "maxDataPoints": 43200, + "refId": "C", + "type": "threshold" + } + } + ], + "dashboardUid": "ae818js0bvw8wb", + "panelId": 3, + "noDataState": "NoData", + "execErrState": "Error", + "for": "20m", + "annotations": { + "__dashboardUid__": "ae818js0bvw8wb", + "__panelId__": "3", + "description": "The expiry date of the certificate is 1 week from now.", + "summary": "Certificate did not renew on time." + }, + "labels": { "role": "sysadmin" }, "isPaused": false diff --git a/modules/blocks/ssl.nix b/modules/blocks/ssl.nix index 0ad0f848..0b77b829 100644 --- a/modules/blocks/ssl.nix +++ b/modules/blocks/ssl.nix @@ -4,6 +4,9 @@ let cfg = config.shb.certs; contracts = pkgs.callPackage ../contracts {}; + + inherit (builtins) dirOf; + inherit (lib) flatten mapAttrsToList unique; in { options.shb.certs = { @@ -362,7 +365,7 @@ in cat /etc/static/ssl/certs/ca-bundle.crt > /etc/ssl/certs/ca-bundle.crt cat /etc/static/ssl/certs/ca-bundle.crt > /etc/ssl/certs/ca-certificates.crt - for file in ${lib.concatStringsSep " " (lib.mapAttrsToList (_name: caCfg: caCfg.paths.cert) cfg.cas.selfsigned)}; do + for file in ${lib.concatStringsSep " " (mapAttrsToList (_name: caCfg: caCfg.paths.cert) cfg.cas.selfsigned)}; do cat "$file" >> /etc/ssl/certs/ca-bundle.crt cat "$file" >> /etc/ssl/certs/ca-certificates.crt done @@ -431,7 +434,7 @@ in } # Config for Let's Encrypt cert. { - users.users = lib.mkMerge (lib.mapAttrsToList (name: certCfg: { + users.users = lib.mkMerge (mapAttrsToList (name: certCfg: { ${certCfg.makeAvailableToUser}.extraGroups = lib.mkIf (!(isNull certCfg.makeAvailableToUser)) [ config.security.acme.defaults.group ]; @@ -447,7 +450,7 @@ in server = lib.mkIf certCfg.stagingServer "https://acme-staging-v02.api.letsencrypt.org/directory"; }; }) certCfg.extraDomains; - in lib.mkMerge (lib.flatten (lib.mapAttrsToList (name: certCfg: + in lib.mkMerge (flatten (mapAttrsToList (name: certCfg: [{ "${name}" = { extraDomainNames = [ certCfg.domain ] ++ certCfg.extraDomains; @@ -470,7 +473,7 @@ in enableACME = true; }; }) extraDomains; - in lib.mkMerge (lib.flatten (lib.mapAttrsToList (name: certCfg: + in lib.mkMerge (flatten (mapAttrsToList (name: certCfg: lib.optionals (certCfg.dnsProvider == null) ( [{ virtualHosts."${name}" = { @@ -482,7 +485,7 @@ in )) cfg.certs.letsencrypt)); systemd.services = let - extraDomainsCfg = certCfg: lib.flatten (map (name: + extraDomainsCfg = certCfg: flatten (map (name: lib.optionals (certCfg.additionalEnvironment != {} && certCfg.dnsProvider == null) [{ "acme-${name}".environment = certCfg.additionalEnvironment; }] @@ -493,7 +496,7 @@ in }; }] ) certCfg.extraDomains); - in lib.mkMerge (lib.flatten (lib.mapAttrsToList (name: certCfg: + in lib.mkMerge (flatten (mapAttrsToList (name: certCfg: lib.optionals (certCfg.additionalEnvironment != {} && certCfg.dnsProvider == null) [{ "acme-${certCfg.domain}".environment = certCfg.additionalEnvironment; }] @@ -505,6 +508,37 @@ in }] ++ lib.optionals (certCfg.dnsProvider == null) (extraDomainsCfg certCfg) ) cfg.certs.letsencrypt)); + + services.prometheus.exporters.node-cert = { + enable = true; + listenAddress = "127.0.0.1"; + user = "acme"; + paths = let + pathCfg = name: certCfg: + let + mainDomainPaths = map dirOf [ certCfg.paths.cert certCfg.paths.key ]; + # Not sure this will work for all cases. + mainPath = dirOf (dirOf certCfg.paths.cert); + extraDomainsPath = map (x: "${mainPath}/${x}") certCfg.extraDomains; + in + mainDomainPaths ++ extraDomainsPath; + in + unique (flatten (mapAttrsToList pathCfg cfg.certs.letsencrypt)); + }; + + services.prometheus.scrapeConfigs = let + scrapeCfg = name: certCfg: [{ + job_name = "node-cert-${name}"; + static_configs = [{ + targets = ["127.0.0.1:${toString config.services.prometheus.exporters.node-cert.port}"]; + labels = { + "hostname" = config.networking.hostName; + "domain" = certCfg.domain; + }; + }]; + }]; + in + flatten (mapAttrsToList scrapeCfg cfg.certs.letsencrypt); } ]; } diff --git a/patches/prometheusnodecertexporter.nix b/patches/prometheusnodecertexporter.nix new file mode 100644 index 00000000..03e20046 --- /dev/null +++ b/patches/prometheusnodecertexporter.nix @@ -0,0 +1,219 @@ +index f805920c5b87a..b67f41c4fb12c 100644 +--- a/nixos/modules/services/monitoring/prometheus/exporters.nix ++++ b/nixos/modules/services/monitoring/prometheus/exporters.nix +@@ -66,6 +66,7 @@ let + "nginx" + "nginxlog" + "node" ++ "node-cert" + "nut" + "nvidia-gpu" + "pgbouncer" +diff --git a/nixos/modules/services/monitoring/prometheus/exporters/node-cert.nix b/nixos/modules/services/monitoring/prometheus/exporters/node-cert.nix +new file mode 100644 +index 0000000000000..d8b2004e8e857 +--- /dev/null ++++ b/nixos/modules/services/monitoring/prometheus/exporters/node-cert.nix +@@ -0,0 +1,70 @@ ++{ ++ config, ++ lib, ++ pkgs, ++ ... ++}: ++ ++let ++ cfg = config.services.prometheus.exporters.node-cert; ++ inherit (lib) mkOption types concatStringsSep; ++in ++{ ++ port = 9141; ++ ++ extraOpts = { ++ paths = mkOption { ++ type = types.listOf types.str; ++ description = '' ++ List of paths to search for SSL certificates. ++ ''; ++ }; ++ ++ excludePaths = mkOption { ++ type = types.listOf types.str; ++ description = '' ++ List of paths to exclute from searching for SSL certificates. ++ ''; ++ default = [ ]; ++ }; ++ ++ includeGlobs = mkOption { ++ type = types.listOf types.str; ++ description = '' ++ List files matching a pattern to include. Uses Go blob pattern. ++ ''; ++ default = [ ]; ++ }; ++ ++ excludeGlobs = mkOption { ++ type = types.listOf types.str; ++ description = '' ++ List files matching a pattern to include. Uses Go blob pattern. ++ ''; ++ default = [ ]; ++ }; ++ ++ user = mkOption { ++ type = types.str; ++ description = '' ++ User owning the certs. ++ ''; ++ default = "acme"; ++ }; ++ }; ++ ++ serviceOpts = { ++ serviceConfig = { ++ User = cfg.user; ++ ExecStart = '' ++ ${lib.getExe pkgs.prometheus-node-cert-exporter} \ ++ --listen ${toString cfg.listenAddress}:${toString cfg.port} \ ++ --path ${concatStringsSep "," cfg.paths} \ ++ --exclude-path "${concatStringsSep "," cfg.excludePaths}" \ ++ --include-glob "${concatStringsSep "," cfg.includeGlobs}" \ ++ --exclude-glob "${concatStringsSep "," cfg.excludeGlobs}" \ ++ ${concatStringsSep " \\\n " cfg.extraFlags} ++ ''; ++ }; ++ }; ++} +diff --git a/nixos/tests/prometheus-exporters.nix b/nixos/tests/prometheus-exporters.nix +index c15a3fd20b021..f59d61e69b92e 100644 +--- a/nixos/tests/prometheus-exporters.nix ++++ b/nixos/tests/prometheus-exporters.nix +@@ -1002,6 +1002,49 @@ let + ''; + }; + ++ node-cert = { ++ nodeName = "node_cert"; ++ exporterConfig = { ++ enable = true; ++ paths = ["/run/certs"]; ++ }; ++ exporterTest = '' ++ wait_for_unit("prometheus-node-cert-exporter.service") ++ wait_for_open_port(9141) ++ wait_until_succeeds( ++ "curl -sSf http://localhost:9141/metrics | grep 'ssl_certificate_expiry_seconds{.\\+path=\"/run/certs/node-cert\\.cert\".\\+}'" ++ ) ++ ''; ++ ++ metricProvider = { ++ system.activationScripts.cert.text = '' ++ mkdir -p /run/certs ++ cd /run/certs ++ ++ cat >ca.template <