Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/trusted_registries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ ghcr.io:
aquasecurity: ALL_IMAGES
kyverno: ALL_IMAGES
teutonet: ALL_IMAGES
jimmidyson:
configmap-reload: ALL_TAGS
quay.io:
cilium: ALL_IMAGES
jetstack: ALL_IMAGES
Expand Down
21 changes: 21 additions & 0 deletions charts/base-cluster/README.md.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -315,4 +315,25 @@ upgrade, they will be recreated in version 6.
This also makes kyverno HA, so be aware that kyverno will need more resources in
you cluster.

### 6.x.x -> 7.0.0

This release allows the user to use the predefined k8s ClusterRoles
(`admin`, `edit`, `view`, ...).

This usage might clash with custom roles named `admin`, `edit`, `view`, ... and
therefore needs to be adjusted

### 7.x.x -> 8.0.0

This release migrates the now unsupported `loki-stack` to the normal `loki` helm
chart.

This is a breaking change because, apart from a new storage engine, the deployment
also moves from the `loki` namespace to `monitoring` to keep in line with every
other monitoring deployment, which in turn also deletes the `loki` namespace

This also replaces `promtail` and the `otel-collector` with `alloy`, using
<https://github.com/teutonet/teutonet-helm-charts/blob/main/charts/common/templates/_telemetry.tpl>
makes this a drop-in change.

{{ .Files.Get "values.md" }}
7 changes: 4 additions & 3 deletions charts/base-cluster/templates/ingress/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ spec:
serviceMonitor:
enabled: {{ .Values.monitoring.prometheus.enabled }}
additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 12 }}
{{- if .Values.monitoring.tracing.enabled }}
{{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }}
{{- if and $telemetryConf.enabled .Values.monitoring.prometheus.enabled }}
opentelemetry:
enabled: true
{{- if and .Values.global.imageRegistry false }}
Expand All @@ -40,10 +41,10 @@ spec:
use-gzip: true
enable-brotli: true
enable-underscores-in-headers: true
{{- if .Values.monitoring.tracing.enabled }}
{{- if $telemetryConf.enabled }}
enable-opentelemetry: true
opentelemetry-operation-name: ingress
otlp-collector-host: open-telemetry-collector-opentelemetry-collector.monitoring
otlp-collector-host: {{ $telemetryConf.host }}
{{- end }}
service:
annotations:
Expand Down
14 changes: 8 additions & 6 deletions charts/base-cluster/templates/kyverno/kyverno.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,25 +62,27 @@ spec:
# this only works in version 3
admissionController:
replicas: 3
{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
{{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "jaeger" "serviceProtocol" "grpc") | fromYaml -}}
{{- $telemetryEnabled := and $telemetryConf.enabled .Values.monitoring.prometheus.enabled -}}
{{- if $telemetryEnabled }}
tracing: &tracingConfig
enabled: true
address: open-telemetry-collector-opentelemetry-collector.monitoring
port: 14250 # jaeger-grpc
address: {{ $telemetryConf.host }}
port: {{ $telemetryConf.port }}
{{- end }}
backgroundController:
replicas: 2
{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
{{- if $telemetryEnabled }}
tracing: *tracingConfig
{{- end }}
reportsController:
replicas: 2
{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
{{- if $telemetryEnabled }}
tracing: *tracingConfig
{{- end }}
cleanupController:
replicas: 2
{{- if and .Values.monitoring.tracing.enabled .Values.monitoring.prometheus.enabled }}
{{- if $telemetryEnabled }}
tracing: *tracingConfig
{{- end }}
podDisruptionBudget:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ spec:
namespaces:
- kube-system
- default
{{ $lokiPromtail := dict "resources" (dict "namespaces" (list "loki") "kinds" (list "Pod") "names" (list "loki-promtail-*")) -}}
{{ $alloy := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "alloy-*")) -}}
{{- $syncEtcdSecret := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "sync-etcd-secret-*")) -}}
{{- $nodeExporter := dict "resources" (dict "namespaces" (list "monitoring") "kinds" (list "Pod") "names" (list "kube-prometheus-stack-prometheus-node-exporter-*")) -}}
{{- $nfsServerProvisioner := dict "resources" (dict "namespaces" (list "nfs-server-provisioner") "kinds" (list "Pod") "names" (list "nfs-server-provisioner-0")) -}}
Expand All @@ -46,8 +46,8 @@ spec:
{{- $disallowHostPorts := list -}}

{{- if .Values.monitoring.loki.enabled -}}
{{- $disallowHostPath = append $disallowHostPath $lokiPromtail -}}
{{- $runAsNonRoot = append $runAsNonRoot $lokiPromtail -}}
{{- $disallowHostPath = append $disallowHostPath $alloy -}}
{{- $runAsNonRoot = append $runAsNonRoot $alloy -}}
{{- end -}}
{{- if .Values.monitoring.prometheus.enabled -}}
{{- $disallowHostPath = append $disallowHostPath $syncEtcdSecret -}}
Expand Down
259 changes: 259 additions & 0 deletions charts/base-cluster/templates/monitoring/alloy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
{{- if and .Values.monitoring.prometheus.enabled (or .Values.monitoring.tracing.enabled .Values.monitoring.loki.enabled) -}}
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: alloy
namespace: monitoring
labels: {{- include "common.labels.standard" $ | nindent 4 }}
app.kubernetes.io/component: alloy
app.kubernetes.io/part-of: monitoring
spec:
chart:
spec: {{- include "base-cluster.helm.chartSpec" (dict "repo" "grafana" "chart" "alloy" "context" $) | nindent 6 }}
interval: 1h
driftDetection:
mode: enabled
install:
timeout: 10m0s
crds: Skip
upgrade:
timeout: 10m0s
crds: Skip
dependsOn:
- name: kube-prometheus-stack
namespace: monitoring
values:
{{- if .Values.global.imageRegistry }}
global:
image:
registry: {{ $.Values.global.imageRegistry }}
{{- end }}
alloy:
enableReporting: false
resources: {{- include "common.resources" .Values.monitoring.loki.promtail | nindent 8 }}
{{- if .Values.monitoring.loki.enabled }}
mounts:
varlog: true
{{- end }}
securityContext:
seccompProfile:
type: RuntimeDefault
configMap:
content: |
{{- if .Values.monitoring.loki.enabled }}
discovery.kubernetes "pods" {
role = "pod"
}

discovery.relabel "pods" {
targets = discovery.kubernetes.pods.targets

rule {
source_labels = ["__meta_kubernetes_pod_controller_name"]
regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?"
target_label = "__tmp_controller_name"
}

rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "app"
}

rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "instance"
}

rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "component"
}

rule {
source_labels = ["__meta_kubernetes_pod_node_name"]
target_label = "node_name"
}

rule {
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}

rule {
source_labels = ["namespace", "app"]
separator = "/"
target_label = "job"
}

rule {
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}

rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
target_label = "container"
}

rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
separator = "/"
target_label = "__path__"
replacement = "/var/log/pods/*$1/*.log"
}

rule {
source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"]
separator = "/"
regex = "true/(.*)"
target_label = "__path__"
replacement = "/var/log/pods/*$1/*.log"
}
}

local.file_match "pods" {
path_targets = discovery.relabel.pods.output
}

loki.source.file "pods" {
targets = local.file_match.pods.targets
forward_to = [loki.process.pods.receiver]
}

loki.process "pods" {
forward_to = [loki.write.default.receiver]

stage.cri { }
}

loki.write "default" {
endpoint {
url = "http://loki:3100/loki/api/v1/push"
}
external_labels = {}
}
{{- end }}

{{- if .Values.monitoring.tracing.enabled }}
otelcol.receiver.otlp "default" {
grpc { }

http { }

output {
traces = [otelcol.processor.k8sattributes.default.input]
}
}

otelcol.receiver.jaeger "default" {
protocols {
grpc { }

thrift_http { }

thrift_compact {
max_packet_size = "63KiB488B"
}
}

output {
traces = [otelcol.processor.k8sattributes.default.input]
}
}

otelcol.receiver.zipkin "default" {
output {
traces = [otelcol.processor.k8sattributes.default.input]
}
}

otelcol.processor.k8sattributes "default" {
auth_type = "serviceAccount"

extract {
metadata = ["k8s.namespace.name", "k8s.deployment.name", "k8s.statefulset.name", "k8s.daemonset.name", "k8s.cronjob.name", "k8s.job.name", "k8s.node.name", "k8s.pod.name", "k8s.pod.uid", "k8s.pod.start_time"]
}

pod_association {
source {
from = "resource_attribute"
name = "k8s.pod.ip"
}
}

pod_association {
source {
from = "resource_attribute"
name = "k8s.pod.uid"
}
}

pod_association {
source {
from = "connection"
}
}

output {
traces = [otelcol.processor.batch.default.input]
}
}

otelcol.processor.batch "default" {
output {
traces = [otelcol.exporter.otlp.tempo.input]
}
}

otelcol.exporter.otlp "tempo" {
client {
endpoint = "grafana-tempo-distributor:4317"

tls {
insecure = true
}
}
}
{{- end }}
extraPorts:
- name: jaeger-compact
port: 6831
protocol: UDP
targetPort: 6831
- name: jaeger-grpc
port: 14250
protocol: TCP
targetPort: 14250
- name: jaeger-thrift
port: 14268
protocol: TCP
targetPort: 14268
- name: metrics
port: 8888
protocol: TCP
targetPort: 8888
- name: otlp
port: 4317
appProtocol: grpc
protocol: TCP
targetPort: 4317
- name: otlp-http
port: 4318
protocol: TCP
targetPort: 4318
- name: zipkin
port: 9411
appProtocol: http/protobuf
protocol: TCP
targetPort: 9411
crds:
create: false
controller:
priorityClassName: monitoring-components
serviceMonitor:
enabled: true
additionalLabels: {{- toYaml .Values.monitoring.labels | nindent 10 }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ prometheusSpec:
- __address__
target_label: cluster
replacement: {{ .Values.global.clusterName }}
{{- if .Values.monitoring.tracing.enabled }}
{{- $telemetryConf := include "common.telemetry.conf" (dict "protocol" "otlp") | fromYaml }}
{{- if $telemetryConf.enabled }}
tracingConfig:
clientType: grpc
samplingFraction: "0.1"
insecure: true
endpoint: open-telemetry-collector-opentelemetry-collector.monitoring:4317
endpoint: {{ printf "%s:%d" $telemetryConf.host $telemetryConf.port }}
{{- end }}
{{- end -}}
Loading