Skip to content
This repository was archived by the owner on May 5, 2024. It is now read-only.

Commit da57bb2

Browse files
authored
Merge pull request #947 from Truxnell/refactor-prom-rules
Refactor prom rules
2 parents 4ef5650 + 297962a commit da57bb2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+176
-96
lines changed

.github/scripts/validate-kustomize.sh

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env bash
2+
3+
# This script downloads the Flux OpenAPI schemas, then it validates the
4+
# Flux custom resources and the kustomize overlays using kubeval.
5+
# This script is meant to be run locally and in CI before the changes
6+
# are merged on the main branch that's synced by Flux.
7+
8+
# Copyright 2020 The Flux authors. All rights reserved.
9+
#
10+
# Licensed under the Apache License, Version 2.0 (the "License");
11+
# you may not use this file except in compliance with the License.
12+
# You may obtain a copy of the License at
13+
#
14+
# http://www.apache.org/licenses/LICENSE-2.0
15+
#
16+
# Unless required by applicable law or agreed to in writing, software
17+
# distributed under the License is distributed on an "AS IS" BASIS,
18+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19+
# See the License for the specific language governing permissions and
20+
# limitations under the License.
21+
22+
# This script is meant to be run locally and in CI to validate the Kubernetes
23+
# manifests (including Flux custom resources) before changes are merged into
24+
# the branch synced by Flux in-cluster.
25+
26+
# Prerequisites
27+
# - yq v4.6
28+
# - kustomize v4.1
29+
# - kubeval v0.15.x
30+
31+
set -o errexit
32+
33+
echo "INFO - Downloading Flux OpenAPI schemas"
34+
mkdir -p /tmp/flux-crd-schemas/master-standalone-strict
35+
#curl -sL https://github.com/fluxcd/flux2/releases/latest/download/crd-schemas.tar.gz | tar zxf - -C /tmp/flux-crd-schemas/master-standalone-strict
36+
37+
# mirror kustomize-controller build options
38+
kustomize_flags="--load-restrictor=LoadRestrictionsNone --reorder=legacy"
39+
kustomize_config="kustomization.yaml"
40+
41+
42+
# find . -type f -name '*.yaml' -print0 | while IFS= read -r -d $'\0' file;
43+
# do
44+
# echo "INFO - Validating $file"
45+
# yq -e 'true' "$file" > /dev/null
46+
# done
47+
48+
echo "INFO - Validating clusters"
49+
find ./k8s/clusters -type f -name '*.yaml' -maxdepth 1 -print0 | while IFS= read -r -d $'\0' file;
50+
do
51+
kubeval "${file}" --strict --ignore-missing-schemas --additional-schema-locations=file:///tmp/flux-crd-schemas
52+
if [[ ${PIPESTATUS[0]} != 0 ]]; then
53+
exit 1
54+
fi
55+
done
56+
57+
echo "INFO - Validating kustomize overlays"
58+
find . -type f -name $kustomize_config -print0 | while IFS= read -r -d $'\0' file;
59+
do
60+
echo "INFO - Validating kustomization ${file/%$kustomize_config}"
61+
# Secrets are ignored with --skip-kinds due to using SOPS with FluxCD
62+
# shellcheck disable=SC2086
63+
kustomize build "${file/%$kustomize_config}" $kustomize_flags | kubeval --ignore-missing-schemas --strict --additional-schema-locations=file:///tmp/flux-crd-schemas --skip-kinds Secret
64+
if [[ ${PIPESTATUS[0]} != 0 ]]; then
65+
exit 1
66+
fi
67+
done

.pre-commit-config.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,8 @@ repos:
5353
hooks:
5454
- id: fix-smartquotes
5555
- id: fix-ligatures
56+
57+
- repo: https://github.com/zricethezav/gitleaks
58+
rev: v8.5.3
59+
hooks:
60+
- id: gitleaks

k8s/clusters/hegira/flux/orchestration/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ kind: Kustomization
44
resources:
55
- ./cert-manager.yaml
66
- ./rook-ceph.yaml
7-
# # - ./dev.yaml
87
- ./downloads.yaml
98
- ./databases.yaml
109
- ./flux-system.yaml

k8s/clusters/hegira/flux/orchestration/rook-ceph.yaml

-17
Original file line numberDiff line numberDiff line change
@@ -96,20 +96,3 @@ spec:
9696
sourceRef:
9797
kind: GitRepository
9898
name: home-cluster
99-
---
100-
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
101-
kind: Kustomization
102-
metadata:
103-
name: rook-ceph-monitoring
104-
namespace: flux-system
105-
spec:
106-
dependsOn:
107-
- name: rook-ceph-namespace
108-
- name: rook-ceph-operator
109-
interval: 5m
110-
path: "./k8s/manifests/rook-ceph/monitoring"
111-
prune: true
112-
wait: true
113-
sourceRef:
114-
kind: GitRepository
115-
name: home-cluster

k8s/manifests/cert-manager/cert-manager/config/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@ kind: Kustomization
44
resources:
55
- ./secret.sops.yaml
66
- ./clusterissuer.yaml
7-
- ./prometheusrule.yaml

k8s/manifests/databases/postgresql/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@ kind: Kustomization
44
resources:
55
- ./config-pvc.yaml
66
- ./helmrelease.yaml
7-
- ./monitoring

k8s/manifests/flux-system/add-ons/monitoring/kustomization.yaml

-6
This file was deleted.

k8s/manifests/flux-system/add-ons/monitoring/prometheusrules.yaml

-20
This file was deleted.

k8s/manifests/kasten-io/k10/k10-config/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- blueprints
6-
- monitoring
76
- policies
87
- profiles

k8s/manifests/kasten-io/k10/k10-config/monitoring/kustomization.yaml

-6
This file was deleted.

k8s/manifests/kasten-io/k10/k10-config/monitoring/service-monitor.yaml

-23
This file was deleted.

k8s/manifests/kube-system/cilium/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- helmrelease.yaml
6-
- servicemonitor.yaml

k8s/manifests/kube-system/descheduler/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- helmrelease.yaml
6-
- monitoring

k8s/manifests/media/plex/helmrelease.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ spec:
5454
external-dns.alpha.kubernetes.io/cloudflare-proxied: "true"
5555
hajimari.io/enable: "true"
5656
hajimari.io/icon: plex
57-
hajimari.io/appName: plex
57+
hajimari.io/appName: plex # TODO #942 fix hajimari ingress to /web/index (base is 401)
5858
hosts:
5959
- host: "plex.${CLUSTER_DOMAIN}"
6060
paths:

k8s/manifests/media/plex/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- ./pvc.yaml
5+
- ./config-pvc.yaml
66
- ./helmrelease.yaml
77
# - ./probe.yaml
88
- ./endpoint-monitor.yaml

k8s/manifests/system-monitoring/blackbox-exporter/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- helmrelease.yaml
6-
- prometheusrule.yaml

k8s/manifests/rook-ceph/monitoring/kustomization.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/cert-manager/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- prometheusrule.yaml
5+
- cert-manager.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
resources:
5+
- postgresql.yaml
6+
- redis.yaml

k8s/manifests/flux-system/add-ons/monitoring/podmonitor.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/flux-system/flux-system.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,23 @@ spec:
7878

7979
podMetricsEndpoints:
8080
- port: http-prom
81+
---
82+
apiVersion: monitoring.coreos.com/v1
83+
kind: PrometheusRule
84+
metadata:
85+
name: flux-alert-rules
86+
namespace: flux-system
87+
labels:
88+
prometheus: flux-rules
89+
role: alert-rules
90+
spec:
91+
groups:
92+
- name: GitOpsToolkit
93+
rules:
94+
- alert: ReconciliationFailure
95+
expr: max(gotk_reconcile_condition{status="False",type="Ready"}) by (namespace, name, kind) + on(namespace, name, kind) (max(gotk_reconcile_condition{status="Deleted"}) by (namespace, name, kind)) * 2 == 1
96+
for: 10m
97+
labels:
98+
severity: page
99+
annotations:
100+
summary: "{{ $labels.kind }} {{ $labels.namespace }}/{{ $labels.name }} reconciliation has been failing for more than ten minutes."
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
resources:
5+
- flux-system.yaml

k8s/manifests/kasten-io/k10/k10-config/monitoring/prometheus-rule.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/kasten-io/k10.yaml

+23
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,26 @@ spec:
1818
for: 1m
1919
labels:
2020
severity: critical
21+
---
22+
apiVersion: monitoring.coreos.com/v1
23+
kind: ServiceMonitor
24+
metadata:
25+
name: k10
26+
namespace: kasten-io
27+
spec:
28+
namespaceSelector:
29+
matchNames:
30+
- kasten-io
31+
selector:
32+
matchLabels:
33+
app: prometheus
34+
endpoints:
35+
- port: http
36+
scheme: http
37+
path: /k10/prometheus/federate
38+
honorLabels: true
39+
interval: 15s
40+
params:
41+
"match[]":
42+
- '{__name__=~"jobs.*"}'
43+
- '{__name__=~"catalog.*"}'

k8s/manifests/databases/postgresql/monitoring/kustomization.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/kasten-io/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- alert.yaml
5+
- k10.yaml

k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/kustomization.yaml

+8-2
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,11 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- node-exporter.yaml
6-
- upsc.yaml
5+
- cert-manager
6+
- databases
7+
- kasten-io
8+
- network-system
9+
- rook-ceph
10+
- services
11+
- flux-system
12+
- system-monitoring
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
resources:
5+
- cilium.yaml
6+
- descheduler.yaml

k8s/manifests/kube-system/descheduler/monitoring/kustomization.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/rook-ceph/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- metrics.yaml
5+
- rook-ceph.yaml

k8s/manifests/databases/redis/monitoring/kustomization.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/services/kustomization.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
5-
- alerts.yaml
5+
- minio.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: PrometheusRule
4+
metadata:
5+
name: minio-rules
6+
namespace: cert-manager
7+
spec:
8+
groups:
9+
- name: minio.rules
10+
rules:
11+
- alert: MinioS3Errors
12+
expr: |
13+
increase(minio_s3_requests_errors_total[5m]) > 5
14+
for: 0m
15+
labels:
16+
severity: critical
17+
annotations:
18+
description: "Minio is experiencing increased S3 errors. Operations
19+
may not occur as expected, and service may be impacted"
20+
summary: "Minio is experiecing elevated S3 errors."
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
apiVersion: kustomize.config.k8s.io/v1beta1
3+
kind: Kustomization
4+
resources:
5+
- node-exporter.yaml
6+
- blackbox-exporter.yaml
7+
- loki.yaml
8+
- prom-smartctl.yaml
9+
- thanos.yaml

k8s/manifests/system-monitoring/prometheus-smartctl/servicemonitor.yaml renamed to k8s/manifests/system-monitoring/kube-prometheus-stack/prometheus-rules/system-monitoring/prom-smartctl.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ spec:
2525
selector:
2626
matchLabels:
2727
app.kubernetes.io/name: prometheus-smartctl
28-
# FIXME - needs alerts!
28+
# FIXME #943 - needs alerts!

k8s/manifests/system-monitoring/loki/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- helmrelease.yaml
6-
- prometheus-rule.yaml

k8s/manifests/system-monitoring/prometheus-smartctl/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@ kind: Kustomization
44
resources:
55
- ./daemonset.yaml
66
- ./service.yaml
7-
- ./servicemonitor.yaml

k8s/manifests/system-monitoring/thanos/kustomization.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44
resources:
55
- dashboard
6-
- monitoring
76
- secret.sops.yaml
87
- helmrelease.yaml

k8s/manifests/system-monitoring/thanos/monitoring/kustomization.yaml

-5
This file was deleted.

0 commit comments

Comments
 (0)