diff --git a/.env.mnemonic.alfajores.enc b/.env.mnemonic.alfajores.enc index f2bca4330e7..2d12c36bdbb 100644 Binary files a/.env.mnemonic.alfajores.enc and b/.env.mnemonic.alfajores.enc differ diff --git a/.env.mnemonic.baklava.enc b/.env.mnemonic.baklava.enc index 196363bbde6..50f1142f374 100644 Binary files a/.env.mnemonic.baklava.enc and b/.env.mnemonic.baklava.enc differ diff --git a/packages/celotool/src/lib/prometheus.ts b/packages/celotool/src/lib/prometheus.ts index fcd82926ecc..f16acd7639f 100644 --- a/packages/celotool/src/lib/prometheus.ts +++ b/packages/celotool/src/lib/prometheus.ts @@ -7,7 +7,6 @@ import { fetchEnv, fetchEnvOrFallback, getDynamicEnvVarValue, - isProduction, } from './env-utils' import { installGenericHelmChart, @@ -33,7 +32,7 @@ const kubeServiceAccountName = releaseName // Container registry with latest tags: https://console.cloud.google.com/gcr/images/stackdriver-prometheus/GLOBAL/stackdriver-prometheus-sidecar?gcrImageListsize=30 const sidecarImageTag = '0.8.2' // Prometheus container registry with latest tags: https://hub.docker.com/r/prom/prometheus/tags -const prometheusImageTag = 'v2.25.0' +const prometheusImageTag = 'v2.27.1' const grafanaHelmChartPath = '../helm-charts/grafana' const grafanaReleaseName = 'grafana' @@ -111,6 +110,30 @@ async function helmParameters(context?: string, clusterConfig?: BaseClusterConfi '__name__!~"workqueue_.+"', '__name__!~"nginx_.+"', '__name__!~"etcd_.+"', + '__name__!~"erlang_.+"', + '__name__!~"container_tasks_state"', + '__name__!~"storage_.+"', + '__name__!~"container_memory_[^w].*"', + '__name__!~"rest_client_.+"', + '__name__!~"container_fs_.+"', + '__name__!~"container_file_.+"', + '__name__!~"container_spec_.+"', + '__name__!~"container_start_.+"', + '__name__!~"container_last_.+"', + '__name__!~"kube_pod_[^cs].+"', + '__name__!~"kube_pod_container_[^r].+"', + '__name__!~"kube_pod_container_status_waiting_reason"', + '__name__!~"kube_pod_container_status_terminated_reason"', + '__name__!~"kube_pod_container_status_last_terminated_reason"', + '__name__!~"container_network_.+"', + '__name__!~"container_cpu_user_seconds_total"', + '__name__!~"container_cpu_load_average_10s"', + '__name__!~"container_cpu_system_seconds_total"', + '__name__!~"container_sockets"', + '__name__!~"container_processes"', + '__name__!~"container_threads"', + '__name__!~"container_threads_max"', + '__name__!~"kube_node_status_condition"', ] const usingGCP = !clusterConfig || clusterConfig.cloudProvider === CloudProvider.GCP @@ -153,15 +176,52 @@ async function helmParameters(context?: string, clusterConfig?: BaseClusterConfi ] if (fetchEnvOrFallback(envVar.PROMETHEUS_REMOTE_WRITE_URL, '') !== '') { + const droppedRemoteWriteSeries = [ + 'apiserver_.+', + 'etcd_.+', + 'nginx_.+', + 'erlang_.+', + 'kubelet_[^v].+', + 'container_tasks_state', + 'storage_.+', + 'container_memory_[^w].*', + 'rest_client_.+', + 'container_fs_.+', + 'container_file_.+', + 'container_spec_.+', + 'container_start_.+', + 'container_last_.+', + 'kube_pod_container_status_waiting_reason', + 'kube_pod_container_status_terminated_reason', + 'kube_pod_status_phase', + 'container_network_.+', + 'container_cpu_user_seconds_total', + 'container_cpu_load_average_10s', + 'container_cpu_system_seconds_total', + 'container_sockets', + 'container_processes', + 'container_threads', + 'container_threads_max', + 'kube_node_status_condition', + 'kube_pod_container_status_last_terminated_reason', + 'kube_pod_container_[^r].+', + 'kube_pod_[^cs].+', + 'workqueue_.+', + 'kube_secret_.+', + ] params.push( - `--set remote_write[0].url=${fetchEnv(envVar.PROMETHEUS_REMOTE_WRITE_URL)}`, - `--set remote_write[0].basic_auth.username=${fetchEnv( + `--set remote_write[0].url='${fetchEnv(envVar.PROMETHEUS_REMOTE_WRITE_URL)}'`, + `--set remote_write[0].basic_auth.username='${fetchEnv( envVar.PROMETHEUS_REMOTE_WRITE_USERNAME - )}`, - `--set remote_write[0].basic_auth.password=${fetchEnv( + )}'`, + `--set remote_write[0].basic_auth.password='${fetchEnv( envVar.PROMETHEUS_REMOTE_WRITE_PASSWORD - )}`, - `--set enable_alerts="${isProduction()}"` + )}'`, + `--set remote_write[0].write_relabel_configs[0].source_labels='[__name__]'`, + `--set remote_write[0].write_relabel_configs[0].regex='(${droppedRemoteWriteSeries.join( + '|' + )})'`, + `--set remote_write[0].write_relabel_configs[0].action='drop'` ) } diff --git a/packages/helm-charts/prometheus-stackdriver/templates/configmap.yaml b/packages/helm-charts/prometheus-stackdriver/templates/configmap.yaml index 88d0af0f1d8..9f8635d9dcf 100644 --- a/packages/helm-charts/prometheus-stackdriver/templates/configmap.yaml +++ b/packages/helm-charts/prometheus-stackdriver/templates/configmap.yaml @@ -27,10 +27,8 @@ data: # Label the metrics with a custom label if using multiple prometheus for same environments external_labels: cluster_name: {{ .Values.cluster }} - enable_alerts: {{ .Values.enable_alerts }} {{- with .Values.remote_write }} - remote_write: {{ toYaml . | indent 6 }} {{- end }} diff --git a/packages/helm-charts/prometheus-stackdriver/values.yaml b/packages/helm-charts/prometheus-stackdriver/values.yaml index 0e8cb0d52c6..526fec837c4 100644 --- a/packages/helm-charts/prometheus-stackdriver/values.yaml +++ b/packages/helm-charts/prometheus-stackdriver/values.yaml @@ -34,8 +34,8 @@ serviceAccount: # If not set and create is true, a name is generated using the fullname template name: "" -remote_write: -- url: https://prometheus-us-central1.grafana.net/api/prom/push - basic_auth: - username: 51505 - password: eyIxJ... +remote_write: [] +# - url: https://prometheus-us-central1.grafana.net/api/prom/push +# basic_auth: +# username: 51505 +# password: ey...