diff --git a/00-namespace.yml b/00-namespace.yml deleted file mode 100644 index a6cf001d..00000000 --- a/00-namespace.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -apiVersion: v1 -kind: Namespace -metadata: - name: kafka diff --git a/01-test-namespace.yml b/01-test-namespace.yml deleted file mode 100644 index fbb6e0ef..00000000 --- a/01-test-namespace.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -apiVersion: v1 -kind: Namespace -metadata: - name: test-kafka diff --git a/alertmanager/Kustomization b/alertmanager/Kustomization new file mode 100644 index 00000000..324049ed --- /dev/null +++ b/alertmanager/Kustomization @@ -0,0 +1,5 @@ +resources: +- alert-config.yml +- alert-template-config.yml +- alertmanager.yml +- alert-svc.yml diff --git a/alertmanager/alert-config.yml b/alertmanager/alert-config.yml new file mode 100644 index 00000000..99806f1f --- /dev/null +++ b/alertmanager/alert-config.yml @@ -0,0 +1,65 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: alertmanager + namespace: monitoring +data: + config.yml: |- + global: + # ResolveTimeout is the time after which an alert is declared resolved + # if it has not been updated. + resolve_timeout: 5m + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'smtp.gmail.com:587' + smtp_from: 'foo@bar.com' + smtp_auth_username: 'foo@bar.com' + smtp_auth_password: 'barfoo' + # The API URL to use for Slack notifications. + slack_api_url: 'https://hooks.slack.com/services/some/api/token' + # # The directory from which notification templates are read. + templates: + - '/etc/alertmanager-templates/*.tmpl' + # The root route on which each incoming alert enters. + route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ['alertname', 'cluster', 'service'] + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + #repeat_interval: 1m + repeat_interval: 15m + # A default receiver + # If an alert isn't caught by a route, send it to default. + receiver: default + # All the above attributes are inherited by all child routes and can + # overwritten on each. + # The child route trees. + routes: + # Send severity=slack alerts to slack. + - match: + severity: slack + receiver: slack_alert + # - match: + # severity: email + # receiver: email_alert + receivers: + - name: 'default' + slack_configs: + - channel: '#alertmanager-test' + text: '{{ template "slack.devops.text" . }}' + send_resolved: true + - name: 'slack_alert' + slack_configs: + - channel: '#alertmanager-test' + send_resolved: true diff --git a/alertmanager/alert-svc.yml b/alertmanager/alert-svc.yml new file mode 100644 index 00000000..a002f60c --- /dev/null +++ b/alertmanager/alert-svc.yml @@ -0,0 +1,20 @@ +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + prometheus.io/path: '/metrics' + labels: + name: alertmanager + name: alertmanager + namespace: monitoring +spec: + selector: + app: alertmanager + type: NodePort + ports: + - name: alertmanager + protocol: TCP + port: 9093 + targetPort: 9093 diff --git a/alertmanager/alert-template-config.yml b/alertmanager/alert-template-config.yml new file mode 100644 index 00000000..74451d30 --- /dev/null +++ b/alertmanager/alert-template-config.yml @@ -0,0 +1,177 @@ +apiVersion: v1 +data: + default.tmpl: | + {{ define "__alertmanager" }}AlertManager{{ end }} + {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} + {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} + {{ define "__description" }}{{ end }} + {{ define "__text_alert_list" }}{{ range . }}Labels: + {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Annotations: + {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} + {{ end }}Source: {{ .GeneratorURL }} + {{ end }}{{ end }} + {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} + {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} + {{ define "slack.default.pretext" }}{{ end }} + {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "slack.default.iconemoji" }}{{ end }} + {{ define "slack.default.iconurl" }}{{ end }} + {{ define "slack.default.text" }}{{ end }} + {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} + {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} + {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} + {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} + {{ if gt (len .Alerts.Firing) 0 -}} + Alerts Firing: + {{ template "__text_alert_list" .Alerts.Firing }} + {{- end }} + {{ if gt (len .Alerts.Resolved) 0 -}} + Alerts Resolved: + {{ template "__text_alert_list" .Alerts.Resolved }} + {{- end }} + {{- end }} + {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} + {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} + {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} + {{ define "email.default.html" }} + + + +
+ + +| + |
+
+
+
+
|
+ + |
You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.
", + "editable": true, + "error": false, + "id": 9, + "links": [], + "mode": "html", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + }], + "title": "New row" + }, { + "collapse": false, + "editable": true, + "height": 227, + "panels": [{ + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" + }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 2 + }], + "timeFrom": null, + "timeShift": null, + "title": "Samples ingested (rate-5m)", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [{ + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }], + "xaxis": { + "show": true + } + }, { + "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", + "editable": true, + "error": false, + "id": 8, + "links": [], + "mode": "markdown", + "span": 2.995914043583536, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + }], + "title": "New row" + }, { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [{ + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 2 + }], + "timeFrom": null, + "timeShift": null, + "title": "Target Scrapes (last 5m)", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [{ + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }], + "xaxis": { + "show": true + } + }, { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}} ({{interval}})", + "metric": "", + "refId": "A", + "step": 2 + }], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [{ + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }], + "xaxis": { + "show": true + } + }, { + "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", + "editable": true, + "error": false, + "id": 11, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + }], + "title": "New row" + }, { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [{ + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 2 + }], + "timeFrom": null, + "timeShift": null, + "title": "Rule Eval Duration", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [{ + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "percentunit", + "label": "" + }, { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }], + "xaxis": { + "show": true + } + }, { + "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", + "editable": true, + "error": false, + "id": 15, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + }], + "title": "New row" + }], + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], + "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 0, + "links": [{ + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://www.grafana.org/docs" + }, { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + }], + "gnetId": 2, + "description": "The official, pre-built Prometheus Stats Dashboard." + } + grafana-net-737-dashboard.json: | + { + "__inputs": [{ + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + }], + "__requires": [{ + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.0" + }, { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }], + "id": null, + "title": "Kubernetes Pod Resources", + "description": "Shows resource usage of Kubernetes pods.", + "tags": [ + "kubernetes" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": false, + "sharedCrosshair": false, + "rows": [{ + "collapse": false, + "editable": true, + "height": "250px", + "panels": [{ + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 2 + }], + "thresholds": "65, 90", + "timeFrom": "1m", + "timeShift": null, + "title": "Memory Working Set", + "transparent": false, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "65, 90", + "timeFrom": "1m", + "timeShift": null, + "title": "Cpu Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "180px", + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 10 + }], + "thresholds": "65, 90", + "timeFrom": "1m", + "timeShift": null, + "title": "Filesystem Usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 9, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "20%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 10, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 11, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 12, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": " cores", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "title": "Used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "height": "1px", + "hideTimeOverride": true, + "id": 14, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [{ + "name": "value to text", + "value": 1 + }, { + "name": "range to text", + "value": 2 + }], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [{ + "from": "null", + "text": "N/A", + "to": "null" + }], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [{ + "expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + }], + "thresholds": "", + "timeFrom": "1m", + "title": "Total", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [{ + "op": "=", + "text": "N/A", + "value": "null" + }], + "valueName": "current" + }, { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)", + "thresholdLine": false + }, + "height": "200px", + "id": 32, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "receive", + "metric": "network", + "refId": "A", + "step": 240 + }, { + "expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "transmit", + "metric": "network", + "refId": "B", + "step": 240 + }], + "timeFrom": null, + "timeShift": null, + "title": "Network", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [{ + "format": "Bps", + "label": "transmit / receive", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + }] + }], + "showTitle": true, + "title": "all pods" + }, { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [{ + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "height": "", + "id": 17, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "metric": "container_cpu", + "refId": "A", + "step": 240 + }], + "timeFrom": null, + "timeShift": null, + "title": "Cpu Usage", + "tooltip": { + "msResolution": true, + "shared": false, + "sort": 2, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [{ + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + }] + }, { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 33, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "metric": "", + "refId": "A", + "step": 240 + }], + "timeFrom": null, + "timeShift": null, + "title": "Memory Working Set", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [{ + "format": "bytes", + "label": "used", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + }] + }, { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 16, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }} < in", + "metric": "network", + "refId": "A", + "step": 240 + }, { + "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }} > out", + "metric": "network", + "refId": "B", + "step": 240 + }], + "timeFrom": null, + "timeShift": null, + "title": "Network", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [{ + "format": "Bps", + "label": "transmit / receive", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + }] + }, { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS}", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 34, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [{ + "expr": "sum(container_fs_usage_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod_name }}", + "metric": "network", + "refId": "A", + "step": 240 + }], + "timeFrom": null, + "timeShift": null, + "title": "Filesystem", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "show": true + }, + "yaxes": [{ + "format": "bytes", + "label": "used", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + }] + }], + "showTitle": true, + "title": "each pod" + }], + "time": { + "from": "now-3d", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [{ + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": "label_values(instance)", + "refresh": 1, + "regex": "", + "type": "query" + }, { + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "hide": 0, + "includeAll": true, + "label": "Namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": "", + "type": "query" + }] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 8, + "links": [], + "gnetId": 737 + } + prometheus-datasource.json: | + { + "name": "prometheus", + "type": "prometheus", + "url": "http://prometheus:9090", + "access": "proxy", + "basicAuth": false + } +kind: ConfigMap +metadata: + creationTimestamp: null + name: grafana-import-dashboards + namespace: monitoring diff --git a/grafana/grafana-deploy.yml b/grafana/grafana-deploy.yml new file mode 100644 index 00000000..c7993d80 --- /dev/null +++ b/grafana/grafana-deploy.yml @@ -0,0 +1,66 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana-core + namespace: monitoring + labels: + app: grafana + component: core +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + component: core + spec: + containers: + - image: grafana/grafana:4.2.0 + name: grafana-core + imagePullPolicy: IfNotPresent + # env: + resources: + # keep request = limit to keep this container in guaranteed class + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + env: + # The following env variables set up basic auth twith the default admin user and admin password. + - name: GF_AUTH_BASIC_ENABLED + value: "true" + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana + key: admin-username + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana + key: admin-password + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "false" + # - name: GF_AUTH_ANONYMOUS_ORG_ROLE + # value: Admin + # does not really work, because of template variables in exported dashboards: + # - name: GF_DASHBOARDS_JSON_ENABLED + # value: "true" + readinessProbe: + httpGet: + path: /login + port: 3000 + # initialDelaySeconds: 30 + # timeoutSeconds: 1 + volumeMounts: + - name: grafana-persistent-storage + mountPath: /var/lib/grafana + volumes: + - name: grafana-persistent-storage + emptyDir: {} diff --git a/grafana/grafana-secret.yml b/grafana/grafana-secret.yml new file mode 100644 index 00000000..9023b292 --- /dev/null +++ b/grafana/grafana-secret.yml @@ -0,0 +1,10 @@ +--- +apiVersion: v1 +kind: Secret +data: + admin-password: YWRtaW4= + admin-username: YWRtaW4= +metadata: + name: grafana + namespace: monitoring +type: Opaque diff --git a/grafana/grafana-svc.yml b/grafana/grafana-svc.yml new file mode 100644 index 00000000..ac1e0d7a --- /dev/null +++ b/grafana/grafana-svc.yml @@ -0,0 +1,16 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: monitoring + labels: + app: grafana + component: core +spec: + type: NodePort + ports: + - port: 3000 + selector: + app: grafana + component: core diff --git a/grafana/import-dashboard-job.yml b/grafana/import-dashboard-job.yml new file mode 100644 index 00000000..5bcfdf73 --- /dev/null +++ b/grafana/import-dashboard-job.yml @@ -0,0 +1,80 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: grafana-import-dashboards + namespace: monitoring + labels: + app: grafana + component: import-dashboards +spec: + template: + metadata: + name: grafana-import-dashboards + labels: + app: grafana + component: import-dashboards + spec: + serviceAccountName: prometheus-k8s + initContainers: + - name: wait-for-grafana + image: giantswarm/tiny-tools + args: + - /bin/sh + - -c + - > + set -x; + while [ $(curl -Lsw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200 ]; do + echo '.' + sleep 15; + done + containers: + - name: grafana-import-dashboards + image: giantswarm/tiny-tools + command: ["/bin/sh", "-c"] + workingDir: /opt/grafana-import-dashboards + args: + - > + for file in *-datasource.json ; do + if [ -e "$file" ] ; then + echo "importing $file" && + curl --silent --fail --show-error \ + --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \ + --header "Content-Type: application/json" \ + --data-binary "@$file" ; + echo "" ; + fi + done ; + for file in *-dashboard.json ; do + if [ -e "$file" ] ; then + echo "importing $file" && + ( echo '{"dashboard":'; \ + cat "$file"; \ + echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \ + | jq -c '.' \ + | curl --silent --fail --show-error \ + --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \ + --header "Content-Type: application/json" \ + --data-binary "@-" ; + echo "" ; + fi + done + env: + - name: GF_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana + key: admin-username + - name: GF_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana + key: admin-password + volumeMounts: + - name: config-volume + mountPath: /opt/grafana-import-dashboards + restartPolicy: Never + volumes: + - name: config-volume + configMap: + name: grafana-import-dashboards diff --git a/kafka/Kustomization b/kafka/Kustomization index 0590b6b5..27a9d9fe 100644 --- a/kafka/Kustomization +++ b/kafka/Kustomization @@ -1,5 +1,5 @@ resources: -- 10broker-config.yml -- 20dns.yml -- 30bootstrap-service.yml -- 50kafka.yml +- broker-config.yml +- kafka-headless.yml +- bootstrap-svc.yml +- kafka.yml diff --git a/kafka/30bootstrap-service.yml b/kafka/bootstrap-svc.yml similarity index 88% rename from kafka/30bootstrap-service.yml rename to kafka/bootstrap-svc.yml index 7c2a3376..ab0b8e84 100644 --- a/kafka/30bootstrap-service.yml +++ b/kafka/bootstrap-svc.yml @@ -5,6 +5,7 @@ metadata: name: bootstrap namespace: kafka spec: + type: ClusterIP ports: - port: 9092 selector: diff --git a/kafka/10broker-config.yml b/kafka/broker-config.yml similarity index 100% rename from kafka/10broker-config.yml rename to kafka/broker-config.yml diff --git a/kafka/20dns.yml b/kafka/kafka-headless.yml similarity index 100% rename from kafka/20dns.yml rename to kafka/kafka-headless.yml diff --git a/kafka/50kafka.yml b/kafka/kafka.yml similarity index 99% rename from kafka/50kafka.yml rename to kafka/kafka.yml index a001f1d5..0a15d729 100644 --- a/kafka/50kafka.yml +++ b/kafka/kafka.yml @@ -8,7 +8,7 @@ spec: matchLabels: app: kafka serviceName: "kafka" - replicas: 3 + replicas: 7 updateStrategy: type: RollingUpdate podManagementPolicy: Parallel diff --git a/consumers-prometheus/kminion-service.yaml b/kminions/kminion-service.yaml similarity index 75% rename from consumers-prometheus/kminion-service.yaml rename to kminions/kminion-service.yaml index 04bb8d9f..f0abb551 100644 --- a/consumers-prometheus/kminion-service.yaml +++ b/kminions/kminion-service.yaml @@ -6,7 +6,10 @@ metadata: labels: &labels app: kminion spec: + type: NodePort selector: *labels ports: - name: http port: 8080 + targetPort: 8080 + nodePort: 30077 diff --git a/consumers-prometheus/kminion.yaml b/kminions/kminion.yaml similarity index 100% rename from consumers-prometheus/kminion.yaml rename to kminions/kminion.yaml diff --git a/consumers-prometheus/kustomization.yaml b/kminions/kustomization.yaml similarity index 100% rename from consumers-prometheus/kustomization.yaml rename to kminions/kustomization.yaml diff --git a/linkedin-burrow/burrow-config.yml b/linkedin-burrow/burrow-config.yml index 5fe6dda1..f1d41ceb 100644 --- a/linkedin-burrow/burrow-config.yml +++ b/linkedin-burrow/burrow-config.yml @@ -9,20 +9,17 @@ data: servers=[ "zookeeper:2181" ] timeout=6 root-path="/burrow" - [cluster.local] class-name="kafka" - servers=[ "kafka-0.broker:9092", "kafka-1.broker:9092", "kafka-2.broker:9092" ] + servers=[ "bootstrap:9092" ] topic-refresh=60 offset-refresh=30 - [consumer.local] class-name="kafka" cluster="local" - servers=[ "kafka-0.broker:9092", "kafka-1.broker:9092", "kafka-2.broker:9092" ] + servers=[ "bootstrap:9092" ] group-blacklist="" group-whitelist="" - [consumer.local_zk] class-name="kafka_zk" cluster="local" @@ -31,6 +28,5 @@ data: zookeeper-timeout=30 group-blacklist="" group-whitelist="" - [httpserver.default] address=":8000" diff --git a/linkedin-burrow/burrow-service.yml b/linkedin-burrow/burrow-service.yml index 15eac06a..13c5d6a0 100644 --- a/linkedin-burrow/burrow-service.yml +++ b/linkedin-burrow/burrow-service.yml @@ -4,15 +4,19 @@ metadata: name: burrow namespace: kafka spec: + type: NodePort selector: app: burrow ports: - name: web protocol: TCP port: 80 + nodePort: 30031 - name: api protocol: TCP port: 8000 + nodePort: 30032 - name: prometheus protocol: TCP port: 8080 + nodePort: 30033 diff --git a/avro-tools/avro-tools-config.yml b/maintenance/avro-tools/avro-tools-config.yml similarity index 100% rename from avro-tools/avro-tools-config.yml rename to maintenance/avro-tools/avro-tools-config.yml diff --git a/avro-tools/rest-service.yml b/maintenance/avro-tools/rest-service.yml similarity index 100% rename from avro-tools/rest-service.yml rename to maintenance/avro-tools/rest-service.yml diff --git a/avro-tools/rest.yml b/maintenance/avro-tools/rest.yml similarity index 100% rename from avro-tools/rest.yml rename to maintenance/avro-tools/rest.yml diff --git a/avro-tools/schemas-service.yml b/maintenance/avro-tools/schemas-service.yml similarity index 100% rename from avro-tools/schemas-service.yml rename to maintenance/avro-tools/schemas-service.yml diff --git a/avro-tools/schemas.yml b/maintenance/avro-tools/schemas.yml similarity index 100% rename from avro-tools/schemas.yml rename to maintenance/avro-tools/schemas.yml diff --git a/avro-tools/test/70rest-test1.yml b/maintenance/avro-tools/test/70rest-test1.yml similarity index 100% rename from avro-tools/test/70rest-test1.yml rename to maintenance/avro-tools/test/70rest-test1.yml diff --git a/avro-tools/test/rest-curl.yml b/maintenance/avro-tools/test/rest-curl.yml similarity index 100% rename from avro-tools/test/rest-curl.yml rename to maintenance/avro-tools/test/rest-curl.yml diff --git a/monitoring/manifests-all.yaml b/monitoring/manifests-all.yaml new file mode 100644 index 00000000..dfe6d388 --- /dev/null +++ b/monitoring/manifests-all.yaml @@ -0,0 +1,488 @@ +--- +apiVersion: v1 +data: + prometheus.yaml: | + global: + scrape_interval: 10s + scrape_timeout: 10s + evaluation_interval: 10s + rule_files: + - "/etc/prometheus-rules/*.rules" + scrape_configs: + - job_name: 'jmx-kafka' + static_configs: + - targets: ['54.241.141.202:32401'] + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37 + - job_name: 'kubernetes-nodes' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - source_labels: [__address__] + regex: '(.*):10250' + replacement: '${1}:10255' + target_label: __address__ + + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79 + - job_name: 'kubernetes-endpoints' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119 + - job_name: 'kubernetes-services' + metrics_path: /probe + params: + module: [http_2xx] + kubernetes_sd_configs: + - role: service + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] + action: keep + regex: true + - source_labels: [__address__] + target_label: __param_target + - target_label: __address__ + replacement: blackbox + - source_labels: [__param_target] + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + target_label: kubernetes_name + + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156 + - job_name: 'kubernetes-pods' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: (.+):(?:\d+);(\d+) + replacement: ${1}:${2} + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: keep + regex: 9\d{3} + + - job_name: 'kubernetes-cadvisor' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-core + namespace: monitoring +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-core + namespace: monitoring + labels: + app: prometheus + component: core +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + name: prometheus-main + labels: + app: prometheus + component: core + spec: + serviceAccountName: prometheus-k8s + containers: + - name: prometheus + image: prom/prometheus:v1.7.0 + args: + - '-storage.local.retention=12h' + - '-storage.local.memory-chunks=500000' + - '-config.file=/etc/prometheus/prometheus.yaml' + - '-alertmanager.url=http://alertmanager:9093/' + ports: + - name: webui + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 500M + limits: + cpu: 100m + memory: 500M + volumeMounts: + - name: config-volume + mountPath: /etc/prometheus + - name: rules-volume + mountPath: /etc/prometheus-rules + volumes: + - name: config-volume + configMap: + name: prometheus-core + - name: rules-volume + configMap: + name: prometheus-rules +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: gcr.io/google_containers/kube-state-metrics:v0.5.0 + ports: + - containerPort: 8080 +--- +# --- +# apiVersion: rbac.authorization.k8s.io/v1beta1 +# kind: ClusterRoleBinding +# metadata: +# name: kube-state-metrics +# roleRef: +# apiGroup: rbac.authorization.k8s.io +# kind: ClusterRole +# name: kube-state-metrics +# subjects: +# - kind: ServiceAccount +# name: kube-state-metrics +# namespace: monitoring +# --- +# apiVersion: rbac.authorization.k8s.io/v1beta1 +# kind: ClusterRole +# metadata: +# name: kube-state-metrics +# rules: +# - apiGroups: [""] +# resources: +# - nodes +# - pods +# - services +# - resourcequotas +# - replicationcontrollers +# - limitranges +# verbs: ["list", "watch"] +# - apiGroups: ["apps"] +# resources: +# - daemonsets +# - deployments +# - replicasets +# verbs: ["list", "watch"] +# --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics +spec: + ports: + - name: kube-state-metrics + port: 8080 + protocol: TCP + selector: + app: kube-state-metrics + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-directory-size-metrics + namespace: monitoring + labels: + app: node-directory-size-metrics + annotations: + description: | + This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes. + The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. + The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus. + These are scheduled on every node in the Kubernetes cluster. + To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`. +spec: + selector: + matchLabels: + app: node-directory-size-metrics + template: + metadata: + labels: + app: node-directory-size-metrics + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9102' + description: | + This `Pod` provides metrics in Prometheus format about disk usage on the node. + The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. + The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus. + This `Pod` is scheduled on every node in the Kubernetes cluster. + To choose directories from the node to check just mount them on `read-du` below `/mnt`. + spec: + containers: + - name: read-du + image: giantswarm/tiny-tools + imagePullPolicy: Always + # FIXME threshold via env var + # The + command: + - fish + - --command + - | + touch /tmp/metrics-temp + while true + for directory in (du --bytes --separate-dirs --threshold=100M /mnt) + echo $directory | read size path + echo "node_directory_size_bytes{path=\"$path\"} $size" \ + >> /tmp/metrics-temp + end + mv /tmp/metrics-temp /tmp/metrics + sleep 300 + end + volumeMounts: + - name: host-fs-var + mountPath: /mnt/var + readOnly: true + - name: metrics + mountPath: /tmp + - name: caddy + image: dockermuenster/caddy:0.9.3 + command: + - "caddy" + - "-port=9102" + - "-root=/var/www" + ports: + - containerPort: 9102 + volumeMounts: + - name: metrics + mountPath: /var/www + volumes: + - name: host-fs-var + hostPath: + path: /var + - name: metrics + emptyDir: + medium: Memory +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: prometheus-node-exporter + namespace: monitoring + labels: + app: prometheus + component: node-exporter +spec: + selector: + matchLabels: + app: prometheus + template: + metadata: + name: prometheus-node-exporter + labels: + app: prometheus + component: node-exporter + spec: + containers: + - image: prom/node-exporter:v0.14.0 + name: prometheus-node-exporter + ports: + - name: prom-node-exp + #^ must be an IANA_SVC_NAME (at most 15 characters, ..) + containerPort: 9100 + hostPort: 9100 + hostNetwork: true + hostPID: true +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + name: prometheus-node-exporter + namespace: monitoring + labels: + app: prometheus + component: node-exporter +spec: + clusterIP: None + ports: + - name: prometheus-node-exporter + port: 9100 + protocol: TCP + selector: + app: prometheus + component: node-exporter + type: ClusterIP +--- +apiVersion: v1 +data: + cpu-usage.rules: | + ALERT NodeCPUUsage + IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: High CPU usage detected", + DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})" + } + instance-availability.rules: | + ALERT InstanceDown + IF up == 0 + FOR 1m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} down", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", + } + low-disk-space.rules: | + ALERT NodeLowRootDisk + IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Low root disk space", + DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})" + } + + ALERT NodeLowDataDisk + IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Low data disk space", + DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})" + } + mem-usage.rules: | + ALERT NodeSwapUsage + IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Swap usage detected", + DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})" + } + + ALERT NodeMemoryUsage + IF (((node_memory_MemTotal-node_memory_MemAvailable)/(node_memory_MemTotal)*100)) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: High memory usage detected", + DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})" + } +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-rules + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus + component: core + annotations: + prometheus.io/scrape: 'true' +spec: + type: NodePort + ports: + - port: 9090 + protocol: TCP + name: webui + selector: + app: prometheus + component: core diff --git a/native/distroless.yaml b/native/distroless.yaml deleted file mode 100644 index e00b5205..00000000 --- a/native/distroless.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# The more specific removes are to make sure that there was a shell that we're removing -- op: remove - path: /spec/template/spec/containers/0/readinessProbe/exec -- op: remove - path: /spec/template/spec/containers/0/readinessProbe -- op: remove - path: /spec/template/spec/containers/0/lifecycle/preStop/exec -- op: remove - path: /spec/template/spec/containers/0/lifecycle/preStop diff --git a/native/kustomization.yaml b/native/kustomization.yaml deleted file mode 100644 index 6a489db7..00000000 --- a/native/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -bases: -- ../nonroot -patchesStrategicMerge: -- native-image-zookeeper.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: pzoo - path: distroless.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: zoo - path: distroless.yaml \ No newline at end of file diff --git a/native/native-image-zookeeper.yaml b/native/native-image-zookeeper.yaml deleted file mode 100644 index 5d38e65d..00000000 --- a/native/native-image-zookeeper.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo -spec: - template: - spec: - containers: - - name: zookeeper - image: solsson/kafka:native-zookeeper-server-start@sha256:ba3a0632240b8906a3b5bb6441e98ad9d9de73cb716b156ca68f1b435c819e8b - resources: - requests: - cpu: 10m - memory: 25Mi ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo -spec: - template: - spec: - containers: - - name: zookeeper - image: solsson/kafka:native-zookeeper-server-start@sha256:ba3a0632240b8906a3b5bb6441e98ad9d9de73cb716b156ca68f1b435c819e8b - resources: - requests: - cpu: 10m - memory: 25Mi diff --git a/nonroot/entrypoint-from-image.yaml b/nonroot/entrypoint-from-image.yaml deleted file mode 100644 index fe110c35..00000000 --- a/nonroot/entrypoint-from-image.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- op: remove - path: /spec/template/spec/containers/0/command diff --git a/nonroot/fsgroup-65534.yaml b/nonroot/fsgroup-65534.yaml deleted file mode 100644 index 5ebd6cba..00000000 --- a/nonroot/fsgroup-65534.yaml +++ /dev/null @@ -1,4 +0,0 @@ -- op: add - path: /spec/template/spec/securityContext - value: - fsGroup: 65534 diff --git a/nonroot/kustomization.yaml b/nonroot/kustomization.yaml deleted file mode 100644 index a3526dfb..00000000 --- a/nonroot/kustomization.yaml +++ /dev/null @@ -1,44 +0,0 @@ -bases: -- ../rbac-namespace-default -- ../kafka -- ../zookeeper -patchesStrategicMerge: -- nonroot-image-kafka.yaml -- nonroot-image-zookeeper.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - path: fsgroup-65534.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: pzoo - path: fsgroup-65534.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: zoo - path: fsgroup-65534.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - path: entrypoint-from-image.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: pzoo - path: entrypoint-from-image.yaml -- target: - group: apps - version: v1 - kind: StatefulSet - name: zoo - path: entrypoint-from-image.yaml diff --git a/nonroot/nonroot-image-kafka.yaml b/nonroot/nonroot-image-kafka.yaml deleted file mode 100644 index 8d124f4b..00000000 --- a/nonroot/nonroot-image-kafka.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka -spec: - template: - spec: - initContainers: - - name: init-config - image: solsson/kafka:initutils-nonroot@sha256:8988aca5b34feabe8d7d4e368f74b2ede398f692c7e99a38b262a938d475812c - containers: - - name: broker - image: solsson/kafka:2.5.1-kafka-server-start@sha256:e4d34530e500eb9724e7778595f4cb244df228336090bb7ed5f7dd7433d4d41d - args: - - /etc/kafka/server.properties.$(POD_NAME) diff --git a/nonroot/nonroot-image-zookeeper.yaml b/nonroot/nonroot-image-zookeeper.yaml deleted file mode 100644 index 880a33d2..00000000 --- a/nonroot/nonroot-image-zookeeper.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo -spec: - template: - spec: - initContainers: - - name: init-config - image: solsson/kafka:initutils-nonroot@sha256:8988aca5b34feabe8d7d4e368f74b2ede398f692c7e99a38b262a938d475812c - containers: - - name: zookeeper - image: solsson/kafka:2.5.1-zookeeper-server-start@sha256:b3af82c547b8188fa303520901eee6a526c6e34d87cfd78c1569a3a2c96ad5cd - args: - - /etc/kafka/zookeeper.properties.scale-$(REPLICAS).$(POD_NAME) ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo -spec: - template: - spec: - initContainers: - - name: init-config - image: solsson/kafka:initutils-nonroot@sha256:8988aca5b34feabe8d7d4e368f74b2ede398f692c7e99a38b262a938d475812c - containers: - - name: zookeeper - image: solsson/kafka:2.5.1-zookeeper-server-start@sha256:b3af82c547b8188fa303520901eee6a526c6e34d87cfd78c1569a3a2c96ad5cd - args: - - /etc/kafka/zookeeper.properties.scale-$(REPLICAS).$(POD_NAME) diff --git a/outside-services/outside-0.yml b/outside-services/outside-0.yml deleted file mode 100644 index 7bc12bd7..00000000 --- a/outside-services/outside-0.yml +++ /dev/null @@ -1,15 +0,0 @@ -kind: Service -apiVersion: v1 -metadata: - name: outside-0 - namespace: kafka -spec: - selector: - app: kafka - kafka-broker-id: "0" - ports: - - protocol: TCP - targetPort: 9094 - port: 32400 - nodePort: 32400 - type: NodePort \ No newline at end of file diff --git a/outside-services/outside-1.yml b/outside-services/outside-1.yml index 1642ee02..a0660851 100644 --- a/outside-services/outside-1.yml +++ b/outside-services/outside-1.yml @@ -9,7 +9,7 @@ spec: kafka-broker-id: "1" ports: - protocol: TCP - targetPort: 9094 + targetPort: 5556 port: 32401 nodePort: 32401 - type: NodePort \ No newline at end of file + type: NodePort diff --git a/pixy/Kustomization b/pixy/Kustomization deleted file mode 100644 index 17eaa10f..00000000 --- a/pixy/Kustomization +++ /dev/null @@ -1,3 +0,0 @@ -resources: -- pixy-service.yml -- pixy.yml diff --git a/pixy/pixy.yml b/pixy/pixy.yml deleted file mode 100644 index 190195b4..00000000 --- a/pixy/pixy.yml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: pixy - namespace: kafka - labels: - app: pixy -spec: - replicas: 2 - selector: - matchLabels: - app: pixy - template: - metadata: - labels: - app: pixy - spec: - containers: - - name: pixy - image: mailgun/kafka-pixy:0.17.0@sha256:0b5f4795c0b0d80729fa7415ec70ae4d411e152c6149656dddf01b18184792e0 - ports: - - containerPort: 80 - command: - - kafka-pixy - - -kafkaPeers - - bootstrap.kafka:9092 - - -zookeeperPeers - - zookeeper.kafka:2181 - - -tcpAddr - - 0.0.0.0:80 diff --git a/prometheus/50-kafka-jmx-exporter-patch.yml b/prometheus/kafka-jmx-exporter-patch.yml similarity index 100% rename from prometheus/50-kafka-jmx-exporter-patch.yml rename to prometheus/kafka-jmx-exporter-patch.yml diff --git a/pixy/pixy-service.yml b/prometheus/kafka-jmx-exporter-svc.yml similarity index 51% rename from pixy/pixy-service.yml rename to prometheus/kafka-jmx-exporter-svc.yml index c15b7379..024e53a0 100644 --- a/pixy/pixy-service.yml +++ b/prometheus/kafka-jmx-exporter-svc.yml @@ -1,12 +1,14 @@ -kind: Service apiVersion: v1 +kind: Service metadata: - name: pixy + name: meteric namespace: kafka spec: - selector: - app: pixy ports: - - name: web + - name: http + port: 5556 + nodePort: 30001 protocol: TCP - port: 80 + type: NodePort + selector: + app: kafka diff --git a/prometheus/10-metrics-config.yml b/prometheus/kafka-jmx-metrics-config.yml similarity index 97% rename from prometheus/10-metrics-config.yml rename to prometheus/kafka-jmx-metrics-config.yml index 345e1929..1c89d3cd 100644 --- a/prometheus/10-metrics-config.yml +++ b/prometheus/kafka-jmx-metrics-config.yml @@ -7,6 +7,7 @@ data: jmx-kafka-prometheus.yml: |+ lowercaseOutputName: true + #hostPort: 127.0.0.1: 5555 jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:5555/jmxrmi ssl: false whitelistObjectNames: ["kafka.server:*","kafka.controller:*","java.lang:*"] @@ -42,4 +43,4 @@ data: name: "zookeeper_$4_$5" labels: replicaId: "$2" - memberType: "$3" \ No newline at end of file + memberType: "$3" diff --git a/prometheus/kube-state-deployment.yml b/prometheus/kube-state-deployment.yml new file mode 100644 index 00000000..09df9c74 --- /dev/null +++ b/prometheus/kube-state-deployment.yml @@ -0,0 +1,24 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: gcr.io/google_containers/kube-state-metrics:v0.5.0 + ports: + - containerPort: 8080 diff --git a/prometheus/kube-state-svc.yml b/prometheus/kube-state-svc.yml new file mode 100644 index 00000000..40559e07 --- /dev/null +++ b/prometheus/kube-state-svc.yml @@ -0,0 +1,23 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: monitoring +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + name: kube-state-metrics + namespace: monitoring + labels: + app: kube-state-metrics +spec: + ports: + - name: kube-state-metrics + port: 8080 + protocol: TCP + selector: + app: kube-state-metrics diff --git a/prometheus/kustomization.yaml b/prometheus/kustomization.yml similarity index 82% rename from prometheus/kustomization.yaml rename to prometheus/kustomization.yml index 81c00fed..21eac282 100644 --- a/prometheus/kustomization.yaml +++ b/prometheus/kustomization.yml @@ -4,6 +4,6 @@ bases: #- ../kafka #- ../variants/scale-1 resources: -- 10-metrics-config.yml +- metrics-config.yml patchesStrategicMerge: -- 50-kafka-jmx-exporter-patch.yml +- kafka-jmx-exporter-patch.yml diff --git a/prometheus/node-dir-exporter.yml b/prometheus/node-dir-exporter.yml new file mode 100644 index 00000000..e8240ecd --- /dev/null +++ b/prometheus/node-dir-exporter.yml @@ -0,0 +1,77 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-directory-size-metrics + namespace: monitoring + labels: + app: node-directory-size-metrics + annotations: + description: | + This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes. + The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. + The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus. + These are scheduled on every node in the Kubernetes cluster. + To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`. +spec: + selector: + matchLabels: + app: node-directory-size-metrics + template: + metadata: + labels: + app: node-directory-size-metrics + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '9102' + description: | + This `Pod` provides metrics in Prometheus format about disk usage on the node. + The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. + The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus. + This `Pod` is scheduled on every node in the Kubernetes cluster. + To choose directories from the node to check just mount them on `read-du` below `/mnt`. + spec: + containers: + - name: read-du + image: giantswarm/tiny-tools + imagePullPolicy: Always + # FIXME threshold via env var + # The + command: + - fish + - --command + - | + touch /tmp/metrics-temp + while true + for directory in (du --bytes --separate-dirs --threshold=100M /mnt) + echo $directory | read size path + echo "node_directory_size_bytes{path=\"$path\"} $size" \ + >> /tmp/metrics-temp + end + mv /tmp/metrics-temp /tmp/metrics + sleep 300 + end + volumeMounts: + - name: host-fs-var + mountPath: /mnt/var + readOnly: true + - name: metrics + mountPath: /tmp + - name: caddy + image: dockermuenster/caddy:0.9.3 + command: + - "caddy" + - "-port=9102" + - "-root=/var/www" + ports: + - containerPort: 9102 + volumeMounts: + - name: metrics + mountPath: /var/www + volumes: + - name: host-fs-var + hostPath: + path: /var + - name: metrics + emptyDir: + medium: Memory diff --git a/prometheus/node-exporter-svc.yml b/prometheus/node-exporter-svc.yml new file mode 100644 index 00000000..dd4b5243 --- /dev/null +++ b/prometheus/node-exporter-svc.yml @@ -0,0 +1,21 @@ +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + prometheus.io/scrape: 'true' + name: prometheus-node-exporter + namespace: monitoring + labels: + app: prometheus + component: node-exporter +spec: + clusterIP: None + ports: + - name: prometheus-node-exporter + port: 9100 + protocol: TCP + selector: + app: prometheus + component: node-exporter + type: ClusterIP diff --git a/prometheus/node-exporter.yml b/prometheus/node-exporter.yml new file mode 100644 index 00000000..b9d30584 --- /dev/null +++ b/prometheus/node-exporter.yml @@ -0,0 +1,30 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: prometheus-node-exporter + namespace: monitoring + labels: + app: prometheus + component: node-exporter +spec: + selector: + matchLabels: + app: prometheus + template: + metadata: + name: prometheus-node-exporter + labels: + app: prometheus + component: node-exporter + spec: + containers: + - image: prom/node-exporter:v0.14.0 + name: prometheus-node-exporter + ports: + - name: prom-node-exp + #^ must be an IANA_SVC_NAME (at most 15 characters, ..) + containerPort: 9100 + hostPort: 9100 + hostNetwork: true + hostPID: true diff --git a/prometheus/prometheus-config.yml b/prometheus/prometheus-config.yml new file mode 100644 index 00000000..958a2098 --- /dev/null +++ b/prometheus/prometheus-config.yml @@ -0,0 +1,128 @@ +--- +apiVersion: v1 +data: + prometheus.yaml: | + global: + scrape_interval: 10s + scrape_timeout: 10s + evaluation_interval: 10s + rule_files: + - "/etc/prometheus-rules/*.rules" + scrape_configs: + - job_name: 'jmx-kafka' + static_configs: + - targets: ['54.241.141.202:32401'] + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37 + - job_name: 'kubernetes-nodes' + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - source_labels: [__address__] + regex: '(.*):10250' + replacement: '${1}:10255' + target_label: __address__ + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79 + - job_name: 'kubernetes-endpoints' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119 + - job_name: 'kubernetes-services' + metrics_path: /probe + params: + module: [http_2xx] + kubernetes_sd_configs: + - role: service + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] + action: keep + regex: true + - source_labels: [__address__] + target_label: __param_target + - target_label: __address__ + replacement: blackbox + - source_labels: [__param_target] + target_label: instance + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + target_label: kubernetes_name + # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156 + - job_name: 'kubernetes-pods' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: (.+):(?:\d+);(\d+) + replacement: ${1}:${2} + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: keep + regex: 9\d{3} + - job_name: 'kubernetes-cadvisor' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-core + namespace: monitoring diff --git a/prometheus/prometheus-deployment.yml b/prometheus/prometheus-deployment.yml new file mode 100644 index 00000000..e91bf454 --- /dev/null +++ b/prometheus/prometheus-deployment.yml @@ -0,0 +1,52 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-core + namespace: monitoring + labels: + app: prometheus + component: core +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + name: prometheus-main + labels: + app: prometheus + component: core + spec: + serviceAccountName: prometheus-k8s + containers: + - name: prometheus + image: prom/prometheus:v1.7.0 + args: + - '-storage.local.retention=12h' + - '-storage.local.memory-chunks=500000' + - '-config.file=/etc/prometheus/prometheus.yaml' + - '-alertmanager.url=http://alertmanager:9093/' + ports: + - name: webui + containerPort: 9090 + resources: + requests: + cpu: 100m + memory: 500M + limits: + cpu: 100m + memory: 500M + volumeMounts: + - name: config-volume + mountPath: /etc/prometheus + - name: rules-volume + mountPath: /etc/prometheus-rules + volumes: + - name: config-volume + configMap: + name: prometheus-core + - name: rules-volume + configMap: + name: prometheus-rules diff --git a/prometheus/prometheus-svc.yml b/prometheus/prometheus-svc.yml new file mode 100644 index 00000000..c8f9d09c --- /dev/null +++ b/prometheus/prometheus-svc.yml @@ -0,0 +1,21 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring + labels: + app: prometheus + component: core + annotations: + prometheus.io/scrape: 'true' +spec: + type: NodePort + ports: + - port: 9090 + protocol: TCP + nodeport: 32334 + name: webui + selector: + app: prometheus + component: core diff --git a/prometheus/rules.yml b/prometheus/rules.yml new file mode 100644 index 00000000..7e336caf --- /dev/null +++ b/prometheus/rules.yml @@ -0,0 +1,71 @@ +--- +apiVersion: v1 +data: + cpu-usage.rules: | + ALERT NodeCPUUsage + IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: High CPU usage detected", + DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})" + } + instance-availability.rules: | + ALERT InstanceDown + IF up == 0 + FOR 1m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} down", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", + } + low-disk-space.rules: | + ALERT NodeLowRootDisk + IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Low root disk space", + DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})" + } + ALERT NodeLowDataDisk + IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Low data disk space", + DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})" + } + mem-usage.rules: | + ALERT NodeSwapUsage + IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: Swap usage detected", + DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})" + } + ALERT NodeMemoryUsage + IF (((node_memory_MemTotal-node_memory_MemAvailable)/(node_memory_MemTotal)*100)) > 75 + FOR 2m + LABELS { + severity="page" + } + ANNOTATIONS { + SUMMARY = "{{$labels.instance}}: High memory usage detected", + DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})" + } + +kind: ConfigMap +metadata: + creationTimestamp: null + name: prometheus-rules + namespace: monitoring diff --git a/rbac-namespace-default/Kustomization b/rbac-namespace-default/Kustomization index c2155fb0..6cb5aba7 100644 --- a/rbac-namespace-default/Kustomization +++ b/rbac-namespace-default/Kustomization @@ -1,3 +1,5 @@ resources: - node-reader.yml - pod-labler.yml +- prometheus.yml +- namespace.yml diff --git a/rbac-namespace-default/namespace.yml b/rbac-namespace-default/namespace.yml new file mode 100644 index 00000000..58a46786 --- /dev/null +++ b/rbac-namespace-default/namespace.yml @@ -0,0 +1,12 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: kafka + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: monitoring + diff --git a/variants/prometheus-operator-example/k8s-cluster-rbac.yaml b/rbac-namespace-default/prometheus.yml similarity index 54% rename from variants/prometheus-operator-example/k8s-cluster-rbac.yaml rename to rbac-namespace-default/prometheus.yml index 3f57d21d..4748e38d 100644 --- a/variants/prometheus-operator-example/k8s-cluster-rbac.yaml +++ b/rbac-namespace-default/prometheus.yml @@ -1,30 +1,39 @@ -# Allows the "k8s" prometheus from Prometheus Operator contrib to do service discovery in the kafka namespace --- apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole +kind: ClusterRoleBinding metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: +- kind: ServiceAccount name: prometheus-k8s + namespace: monitoring +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus rules: -- apiGroups: - - "" +- apiGroups: [""] resources: + - nodes + - nodes/proxy - services - endpoints - pods - verbs: - - get - - list - - watch + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: + - configmaps + verbs: ["get"] +- nonResourceURLs: ["/metrics"] + verbs: ["get"] --- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding +apiVersion: v1 +kind: ServiceAccount metadata: name: prometheus-k8s -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s namespace: monitoring diff --git a/variants/aks-managed/aks-storageclass-broker-managed.yaml b/variants/aks-managed/aks-storageclass-broker-managed.yaml deleted file mode 100644 index 50ae160b..00000000 --- a/variants/aks-managed/aks-storageclass-broker-managed.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: kafka-broker -provisioner: kubernetes.io/azure-disk -reclaimPolicy: Retain -parameters: - kind: "Managed" - storageaccounttype: Premium_LRS diff --git a/variants/aks-managed/aks-storageclass-zookeeper-managed.yaml b/variants/aks-managed/aks-storageclass-zookeeper-managed.yaml deleted file mode 100644 index 2c4e0ff1..00000000 --- a/variants/aks-managed/aks-storageclass-zookeeper-managed.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: kafka-zookeeper -provisioner: kubernetes.io/azure-disk -reclaimPolicy: Retain -parameters: - kind: "Managed" - storageaccounttype: Premium_LRS diff --git a/variants/aks-managed/kustomization.yaml b/variants/aks-managed/kustomization.yaml deleted file mode 100644 index aca693de..00000000 --- a/variants/aks-managed/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ -bases: -- ../scale-3-5 -resources: -- aks-storageclass-broker-managed.yaml -- aks-storageclass-zookeeper-managed.yaml -patchesStrategicMerge: -- volume-claims.yaml diff --git a/variants/aks-managed/volume-claims.yaml b/variants/aks-managed/volume-claims.yaml deleted file mode 100644 index fb20db89..00000000 --- a/variants/aks-managed/volume-claims.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-broker - resources: - requests: - storage: 10Gi ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-zookeeper - resources: - requests: - storage: 1Gi diff --git a/variants/aws-gp2/aws-storageclass-broker-gp2.yaml b/variants/aws-gp2/aws-storageclass-broker-gp2.yaml deleted file mode 100644 index 16289b51..00000000 --- a/variants/aws-gp2/aws-storageclass-broker-gp2.yaml +++ /dev/null @@ -1,11 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-broker - labels: - k8s-addon: storage-aws.addons.k8s.io -provisioner: kubernetes.io/aws-ebs -reclaimPolicy: Retain -allowVolumeExpansion: true -parameters: - type: gp2 diff --git a/variants/aws-gp2/aws-storageclass-zookeeper-gp2.yaml b/variants/aws-gp2/aws-storageclass-zookeeper-gp2.yaml deleted file mode 100644 index 1e651df1..00000000 --- a/variants/aws-gp2/aws-storageclass-zookeeper-gp2.yaml +++ /dev/null @@ -1,11 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-zookeeper - labels: - k8s-addon: storage-aws.addons.k8s.io -provisioner: kubernetes.io/aws-ebs -reclaimPolicy: Retain -allowVolumeExpansion: true -parameters: - type: gp2 diff --git a/variants/aws-gp2/kustomization.yaml b/variants/aws-gp2/kustomization.yaml deleted file mode 100644 index ea9f8aac..00000000 --- a/variants/aws-gp2/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ -bases: -- ../scale-3-5 -resources: -- aws-storageclass-broker-gp2.yaml -- aws-storageclass-zookeeper-gp2.yaml -patchesStrategicMerge: -- volume-claims.yaml diff --git a/variants/aws-gp2/volume-claims.yaml b/variants/aws-gp2/volume-claims.yaml deleted file mode 100644 index fb20db89..00000000 --- a/variants/aws-gp2/volume-claims.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-broker - resources: - requests: - storage: 10Gi ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-zookeeper - resources: - requests: - storage: 1Gi diff --git a/variants/dev-small/jmx-disable.yaml b/variants/dev-small/jmx-disable.yaml deleted file mode 100644 index fb7dd092..00000000 --- a/variants/dev-small/jmx-disable.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - template: - spec: - containers: - - name: broker - env: - - name: JMX_PORT - value: "" diff --git a/variants/dev-small/kustomization.yaml b/variants/dev-small/kustomization.yaml deleted file mode 100644 index a22670a3..00000000 --- a/variants/dev-small/kustomization.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# With kubectl -n kafka port-forward kafka-0 9094 -bases: -- ../scale-1 -patchesStrategicMerge: -- jmx-disable.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: listener-localhost.json -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: num-partitions-1.json diff --git a/variants/dev-small/listener-localhost.json b/variants/dev-small/listener-localhost.json deleted file mode 100644 index 496b1664..00000000 --- a/variants/dev-small/listener-localhost.json +++ /dev/null @@ -1,4 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/1", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/2", "value": "advertised.listeners=PLAINTEXT://:9092,OUTSIDE://localhost:9094"} -] diff --git a/variants/dev-small/num-partitions-1.json b/variants/dev-small/num-partitions-1.json deleted file mode 100644 index b8211f7f..00000000 --- a/variants/dev-small/num-partitions-1.json +++ /dev/null @@ -1,4 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "num.partitions=1"} -] diff --git a/variants/gke-regional/gke-storageclass-broker-pd.yaml b/variants/gke-regional/gke-storageclass-broker-pd.yaml deleted file mode 100644 index 25c7ed5b..00000000 --- a/variants/gke-regional/gke-storageclass-broker-pd.yaml +++ /dev/null @@ -1,9 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-broker -provisioner: kubernetes.io/gce-pd -reclaimPolicy: Retain -allowVolumeExpansion: true -parameters: - type: pd-standard diff --git a/variants/gke-regional/gke-storageclass-zookeeper-regional-euwest1cd-ssh.yaml b/variants/gke-regional/gke-storageclass-zookeeper-regional-euwest1cd-ssh.yaml deleted file mode 100644 index 22d9602f..00000000 --- a/variants/gke-regional/gke-storageclass-zookeeper-regional-euwest1cd-ssh.yaml +++ /dev/null @@ -1,11 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-zookeeper-regional -provisioner: kubernetes.io/gce-pd -reclaimPolicy: Retain -allowVolumeExpansion: true -parameters: - type: pd-ssd - replication-type: regional-pd - zones: europe-west1-c, europe-west1-d diff --git a/variants/gke-regional/gke-storageclass-zookeeper-ssd.yaml b/variants/gke-regional/gke-storageclass-zookeeper-ssd.yaml deleted file mode 100644 index 2223d7b2..00000000 --- a/variants/gke-regional/gke-storageclass-zookeeper-ssd.yaml +++ /dev/null @@ -1,9 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: kafka-zookeeper -provisioner: kubernetes.io/gce-pd -reclaimPolicy: Retain -allowVolumeExpansion: true -parameters: - type: pd-ssd diff --git a/variants/gke-regional/kustomization.yaml b/variants/gke-regional/kustomization.yaml deleted file mode 100644 index 44acb293..00000000 --- a/variants/gke-regional/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -bases: -- ../scale-3-5 -resources: -- gke-storageclass-broker-pd.yaml -- gke-storageclass-zookeeper-ssd.yaml -- gke-storageclass-zookeeper-regional-euwest1cd-ssh.yaml -patchesStrategicMerge: -- volume-claims.yaml diff --git a/variants/gke-regional/volume-claims.yaml b/variants/gke-regional/volume-claims.yaml deleted file mode 100644 index 42357194..00000000 --- a/variants/gke-regional/volume-claims.yaml +++ /dev/null @@ -1,48 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-broker - resources: - requests: - storage: 10Gi ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-zookeeper - resources: - requests: - storage: 1Gi ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - storageClassName: kafka-zookeeper-regional - resources: - requests: - storage: 1Gi diff --git a/variants/prometheus-operator-example/additional-scrape-configs.yaml b/variants/prometheus-operator-example/additional-scrape-configs.yaml deleted file mode 100644 index 8c0541a4..00000000 --- a/variants/prometheus-operator-example/additional-scrape-configs.yaml +++ /dev/null @@ -1,11 +0,0 @@ - -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: k8s - namespace: monitoring -spec: - additionalScrapeConfigs: - name: additional-scrape-configs - # See https://github.com/prometheus/prometheus/pull/4131, and upon disagreement see https://github.com/prometheus/prometheus/issues/4484 - key: pods-discovery-by-prometheus-io-annotations.yaml diff --git a/variants/prometheus-operator-example/alertmanager-main-scale-1.yaml b/variants/prometheus-operator-example/alertmanager-main-scale-1.yaml deleted file mode 100644 index 38da8bdf..00000000 --- a/variants/prometheus-operator-example/alertmanager-main-scale-1.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Alertmanager -metadata: - name: main - namespace: monitoring -spec: - replicas: 1 diff --git a/variants/prometheus-operator-example/k8s-kafka-rbac.yaml b/variants/prometheus-operator-example/k8s-kafka-rbac.yaml deleted file mode 100644 index 311961ce..00000000 --- a/variants/prometheus-operator-example/k8s-kafka-rbac.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Allows the "k8s" prometheus from Prometheus Operator contrib to do service discovery in the kafka namespace ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: prometheus-k8s - namespace: kafka -rules: -- apiGroups: - - "" - resources: - - services - - endpoints - - pods - verbs: - - get - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: prometheus-k8s - namespace: kafka -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prometheus-k8s -subjects: -- kind: ServiceAccount - name: prometheus-k8s - namespace: monitoring diff --git a/variants/prometheus-operator-example/k8s-kafka-servicemonitor.yaml b/variants/prometheus-operator-example/k8s-kafka-servicemonitor.yaml deleted file mode 100644 index 35b5b416..00000000 --- a/variants/prometheus-operator-example/k8s-kafka-servicemonitor.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: broker-monitoring - namespace: kafka - labels: - app: kafka -spec: - publishNotReadyAddresses: true - ports: - - name: fromjmx - port: 5556 - selector: - app: kafka ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kafka - namespace: monitoring - labels: - k8s-app: kafka -spec: - namespaceSelector: - matchNames: - - kafka - selector: - matchLabels: - app: kafka - endpoints: - # https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#endpoint - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 120s - scrapeTimeout: 119s - port: fromjmx - scheme: http - path: /metrics diff --git a/variants/prometheus-operator-example/k8s-minion-servicemonitor.yaml b/variants/prometheus-operator-example/k8s-minion-servicemonitor.yaml deleted file mode 100644 index 79bbfa22..00000000 --- a/variants/prometheus-operator-example/k8s-minion-servicemonitor.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: kafka-metrics-minion - namespace: monitoring - labels: - k8s-app: kafka-metrics-minion -spec: - namespaceSelector: - matchNames: - - kafka - selector: - matchLabels: - app: kafka-minion - type: openmetrics - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - interval: 30s - scrapeTimeout: 30s - port: http - scheme: http - path: /metrics diff --git a/variants/prometheus-operator-example/kustomization.yaml b/variants/prometheus-operator-example/kustomization.yaml deleted file mode 100644 index 3fe25c81..00000000 --- a/variants/prometheus-operator-example/kustomization.yaml +++ /dev/null @@ -1,30 +0,0 @@ -bases: -# With prometheus-operator only you must add your own Prometheus and Alertmanager resources -#- github.com/coreos/prometheus-operator?ref=de9a6e1 -- github.com/coreos/kube-prometheus?ref=3a64636 -- ../../consumers-prometheus -# The ../../prometheus base must be edited to point to the chosen kafka base -# Actually to apply the sidecar with apply -k it has to be included with the kafka variant; can't be its own kustomization because you'll get -# either "failed to find an object with apps_v1_StatefulSet|kafka to apply the patch" or "id 'apps_v1_StatefulSet|kafka|~P|zoo|~S' already used" -#- ../../prometheus -resources: -- k8s-kafka-rbac.yaml -# or, to scrape all namespaces -#- k8s-cluster-rbac.yaml -# with base ../../prometheus -#- k8s-kafka-servicemonitor.yaml -# with base ../../consumers-prometheus -- k8s-minion-servicemonitor.yaml -patchesStrategicMerge: -- prometheus-k8s-scale-1.yaml -- prometheus-k8s-2.9.2.yaml -- alertmanager-main-scale-1.yaml -- prometheus-k8s-nodeport.yaml -- additional-scrape-configs.yaml -generatorOptions: - disableNameSuffixHash: true -secretGenerator: -- name: additional-scrape-configs - namespace: monitoring - files: - - scrape-configs/pods-discovery-by-prometheus-io-annotations.yaml diff --git a/variants/prometheus-operator-example/prometheus-k8s-2.9.2.yaml b/variants/prometheus-operator-example/prometheus-k8s-2.9.2.yaml deleted file mode 100644 index ad928e9b..00000000 --- a/variants/prometheus-operator-example/prometheus-k8s-2.9.2.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: k8s - namespace: monitoring -spec: - baseImage: quay.io/prometheus/prometheus - version: v2.7.2 diff --git a/variants/prometheus-operator-example/prometheus-k8s-nodeport.yaml b/variants/prometheus-operator-example/prometheus-k8s-nodeport.yaml deleted file mode 100644 index 3ef4ab1b..00000000 --- a/variants/prometheus-operator-example/prometheus-k8s-nodeport.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: prometheus-k8s - namespace: monitoring -spec: - type: NodePort - ports: - - port: 9090 - nodePort: 32490 diff --git a/variants/prometheus-operator-example/prometheus-k8s-scale-1.yaml b/variants/prometheus-operator-example/prometheus-k8s-scale-1.yaml deleted file mode 100644 index 939ddb73..00000000 --- a/variants/prometheus-operator-example/prometheus-k8s-scale-1.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus -metadata: - name: k8s - namespace: monitoring -spec: - replicas: 1 diff --git a/variants/prometheus-operator-example/scrape-configs/pods-discovery-by-prometheus-io-annotations.yaml b/variants/prometheus-operator-example/scrape-configs/pods-discovery-by-prometheus-io-annotations.yaml deleted file mode 100644 index 4d812001..00000000 --- a/variants/prometheus-operator-example/scrape-configs/pods-discovery-by-prometheus-io-annotations.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Example scrape config for pods -# -# The relabeling allows the actual pod scrape endpoint to be configured via the -# following annotations: -# -# * `prometheus.io/scrape`: Only scrape pods that have a value of `true` -# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. -# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the -# pod's declared ports (default is a port-free target if none are declared). -- job_name: 'kubernetes-pods' - - kubernetes_sd_configs: - - role: pod - - relabel_configs: - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: kubernetes_pod_name diff --git a/variants/scale-1-ephemeral/kafka-scale1-overrides.json b/variants/scale-1-ephemeral/kafka-scale1-overrides.json deleted file mode 100644 index 13187671..00000000 --- a/variants/scale-1-ephemeral/kafka-scale1-overrides.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "default.replication.factor=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "min.insync.replicas=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.replication.factor=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.num.partitions=1"} -] \ No newline at end of file diff --git a/variants/scale-1-ephemeral/kafka.yaml b/variants/scale-1-ephemeral/kafka.yaml deleted file mode 100644 index 436fa722..00000000 --- a/variants/scale-1-ephemeral/kafka.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - template: - spec: - volumes: - - name: data - emptyDir: {} - volumeClaimTemplates: [] diff --git a/variants/scale-1-ephemeral/kustomization.yaml b/variants/scale-1-ephemeral/kustomization.yaml deleted file mode 100644 index 0cef8d2c..00000000 --- a/variants/scale-1-ephemeral/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -bases: -- ../scale-1 -patchesStrategicMerge: -- kafka.yaml -- zookeeper.yaml diff --git a/variants/scale-1-ephemeral/zookeeper.yaml b/variants/scale-1-ephemeral/zookeeper.yaml deleted file mode 100644 index c2810703..00000000 --- a/variants/scale-1-ephemeral/zookeeper.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: pzoo - namespace: kafka ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - template: - spec: - volumes: - - name: data - emptyDir: {} - volumeClaimTemplates: [] ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - template: - spec: - volumes: - - name: data - emptyDir: {} - volumeClaimTemplates: [] diff --git a/variants/scale-1/kafka-scale1-overrides.json b/variants/scale-1/kafka-scale1-overrides.json deleted file mode 100644 index b4dcceca..00000000 --- a/variants/scale-1/kafka-scale1-overrides.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "zookeeper.connect=zoo-0.zoo.$(POD_NAMESPACE).svc.cluster.local:2181" }, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "default.replication.factor=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "min.insync.replicas=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.replication.factor=1"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.num.partitions=1"} -] \ No newline at end of file diff --git a/variants/scale-1/kafka.yaml b/variants/scale-1/kafka.yaml deleted file mode 100644 index 18d01c80..00000000 --- a/variants/scale-1/kafka.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - replicas: 1 diff --git a/variants/scale-1/kustomization.yaml b/variants/scale-1/kustomization.yaml deleted file mode 100644 index ab741e0b..00000000 --- a/variants/scale-1/kustomization.yaml +++ /dev/null @@ -1,13 +0,0 @@ -bases: -- ../../native -patchesStrategicMerge: -- kafka.yaml -- zookeeper.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: kafka-scale1-overrides.json diff --git a/variants/scale-1/zookeeper.yaml b/variants/scale-1/zookeeper.yaml deleted file mode 100644 index ae027b0b..00000000 --- a/variants/scale-1/zookeeper.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: pzoo - namespace: kafka ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - replicas: 0 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - replicas: 1 - template: - spec: - initContainers: - - name: init-config - env: - - name: PZOO_REPLICAS - value: '0' - - name: REPLICAS - value: '1' - - name: ID_OFFSET - value: '1' - containers: - - name: zookeeper - env: - - name: REPLICAS - value: '1' diff --git a/variants/scale-2/kafka-scale2-overrides.json b/variants/scale-2/kafka-scale2-overrides.json deleted file mode 100644 index a18753cb..00000000 --- a/variants/scale-2/kafka-scale2-overrides.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "default.replication.factor=2"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "min.insync.replicas=2"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.replication.factor=2"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "offsets.topic.num.partitions=2"} -] diff --git a/variants/scale-2/kafka.yaml b/variants/scale-2/kafka.yaml deleted file mode 100644 index c80e25a9..00000000 --- a/variants/scale-2/kafka.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka - namespace: kafka -spec: - replicas: 2 diff --git a/variants/scale-2/kustomization.yaml b/variants/scale-2/kustomization.yaml deleted file mode 100644 index 89e86df5..00000000 --- a/variants/scale-2/kustomization.yaml +++ /dev/null @@ -1,13 +0,0 @@ -bases: -- ../../nonroot -patchesStrategicMerge: -- kafka.yaml -- zookeeper.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: kafka-scale2-overrides.json diff --git a/variants/scale-2/zookeeper.yaml b/variants/scale-2/zookeeper.yaml deleted file mode 100644 index 96c8b189..00000000 --- a/variants/scale-2/zookeeper.yaml +++ /dev/null @@ -1,33 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - replicas: 0 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - replicas: 2 - template: - spec: - initContainers: - - name: init-config - env: - # There's no validation on these numbers adding up to a coherent zk config, so watch out - - name: PZOO_REPLICAS - value: '0' - - name: REPLICAS - value: '2' - - name: ID_OFFSET - value: '1' - containers: - - name: zookeeper - env: - - name: REPLICAS - value: '2' diff --git a/variants/scale-3-3/kafka-zookeeper-connect-only-zoo.json b/variants/scale-3-3/kafka-zookeeper-connect-only-zoo.json deleted file mode 100644 index 605bf743..00000000 --- a/variants/scale-3-3/kafka-zookeeper-connect-only-zoo.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": - "zookeeper.connect=zoo-0.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-1.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-2.zoo.$(POD_NAMESPACE).svc.cluster.local:2181" - } -] \ No newline at end of file diff --git a/variants/scale-3-3/kustomization.yaml b/variants/scale-3-3/kustomization.yaml deleted file mode 100644 index 11b82724..00000000 --- a/variants/scale-3-3/kustomization.yaml +++ /dev/null @@ -1,12 +0,0 @@ -bases: -- ../scale-3-5 -patchesStrategicMerge: -- ./only-zoo-3.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: kafka-zookeeper-connect-only-zoo.json diff --git a/variants/scale-3-3/only-zoo-3.yaml b/variants/scale-3-3/only-zoo-3.yaml deleted file mode 100644 index bced2320..00000000 --- a/variants/scale-3-3/only-zoo-3.yaml +++ /dev/null @@ -1,33 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - replicas: 0 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - replicas: 3 - template: - spec: - initContainers: - - name: init-config - env: - # There's no validation on these numbers adding up to a coherent zk config, so watch out - - name: PZOO_REPLICAS - value: '0' - - name: REPLICAS - value: '3' - - name: ID_OFFSET - value: '1' - containers: - - name: zookeeper - env: - - name: REPLICAS - value: '3' diff --git a/variants/scale-3-5-nopzoo/kafka-zookeeper-connect-only-zoo.json b/variants/scale-3-5-nopzoo/kafka-zookeeper-connect-only-zoo.json deleted file mode 100644 index 37c9f1fc..00000000 --- a/variants/scale-3-5-nopzoo/kafka-zookeeper-connect-only-zoo.json +++ /dev/null @@ -1,4 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "zookeeper.connect=zoo:2181"} -] \ No newline at end of file diff --git a/variants/scale-3-5-nopzoo/kustomization.yaml b/variants/scale-3-5-nopzoo/kustomization.yaml deleted file mode 100644 index 5c7ec29d..00000000 --- a/variants/scale-3-5-nopzoo/kustomization.yaml +++ /dev/null @@ -1,12 +0,0 @@ -bases: -- ../scale-3-5 -patchesStrategicMerge: -- ./only-zoo-5.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - namespace: kafka - path: kafka-zookeeper-connect-only-zoo.json diff --git a/variants/scale-3-5-nopzoo/only-zoo-5.yaml b/variants/scale-3-5-nopzoo/only-zoo-5.yaml deleted file mode 100644 index ed59e8de..00000000 --- a/variants/scale-3-5-nopzoo/only-zoo-5.yaml +++ /dev/null @@ -1,33 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - replicas: 0 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - replicas: 5 - template: - spec: - initContainers: - - name: init-config - env: - # There's no validation on these numbers adding up to a coherent zk config, so watch out - - name: PZOO_REPLICAS - value: '0' - - name: REPLICAS - value: '5' - - name: ID_OFFSET - value: '1' - containers: - - name: zookeeper - env: - - name: REPLICAS - value: '5' diff --git a/variants/scale-3-5/kustomization.yaml b/variants/scale-3-5/kustomization.yaml deleted file mode 100644 index 80c534f7..00000000 --- a/variants/scale-3-5/kustomization.yaml +++ /dev/null @@ -1,2 +0,0 @@ -bases: -- ../../nonroot diff --git a/variants/scale-6-9/kafka-6.yaml b/variants/scale-6-9/kafka-6.yaml deleted file mode 100644 index a7e319a7..00000000 --- a/variants/scale-6-9/kafka-6.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: kafka -spec: - replicas: 6 diff --git a/variants/scale-6-9/kafka-zookeeper-connect-only-zoo.json b/variants/scale-6-9/kafka-zookeeper-connect-only-zoo.json deleted file mode 100644 index 06d3bb1a..00000000 --- a/variants/scale-6-9/kafka-zookeeper-connect-only-zoo.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--override"}, - {"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": - "zookeeper.connect=zoo-0.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-1.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-2.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-3.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-4.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-5.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-6.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-7.zoo.$(POD_NAMESPACE).svc.cluster.local:2181,zoo-8.zoo.$(POD_NAMESPACE).svc.cluster.local:2181" - } -] \ No newline at end of file diff --git a/variants/scale-6-9/kustomization.yaml b/variants/scale-6-9/kustomization.yaml deleted file mode 100644 index bd1afdd4..00000000 --- a/variants/scale-6-9/kustomization.yaml +++ /dev/null @@ -1,26 +0,0 @@ -bases: -- ../../native -patchesStrategicMerge: -- zoo-9.yaml -- kafka-6.yaml -patchesJson6902: -- target: - group: apps - version: v1 - kind: StatefulSet - name: kafka - path: kafka-zookeeper-connect-only-zoo.json -# The nonroot image is distroless and doesn't support a shell that the prestop hook needs -- target: - group: apps - version: v1 - kind: StatefulSet - name: zoo - path: lifecycle-remove.json -# The nonroot image is distroless and has neither shell nor the nc command -- target: - group: apps - version: v1 - kind: StatefulSet - name: zoo - path: zoo-readiness-without-shell.yaml diff --git a/variants/scale-6-9/lifecycle-remove.json b/variants/scale-6-9/lifecycle-remove.json deleted file mode 100644 index d6ed5370..00000000 --- a/variants/scale-6-9/lifecycle-remove.json +++ /dev/null @@ -1,4 +0,0 @@ -[ - {"op": "remove", "path": "/spec/template/spec/containers/0/lifecycle"} -] - \ No newline at end of file diff --git a/variants/scale-6-9/zoo-9.yaml b/variants/scale-6-9/zoo-9.yaml deleted file mode 100644 index e2babff0..00000000 --- a/variants/scale-6-9/zoo-9.yaml +++ /dev/null @@ -1,33 +0,0 @@ ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: pzoo - namespace: kafka -spec: - replicas: 0 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - replicas: 9 - template: - spec: - initContainers: - - name: init-config - env: - # There's no validation on these numbers adding up to a coherent zk config, so watch out - - name: PZOO_REPLICAS - value: '0' - - name: REPLICAS - value: '9' - - name: ID_OFFSET - value: '1' - containers: - - name: zookeeper - env: - - name: REPLICAS - value: '9' diff --git a/variants/scale-6-9/zoo-readiness-without-shell.yaml b/variants/scale-6-9/zoo-readiness-without-shell.yaml deleted file mode 100644 index 72fe2284..00000000 --- a/variants/scale-6-9/zoo-readiness-without-shell.yaml +++ /dev/null @@ -1,6 +0,0 @@ -- path: /spec/template/spec/containers/0/readinessProbe - # op: replace - # value: - # tcpSocket: - # port: 2181 - op: remove diff --git a/zookeeper/21zoo-service.yml b/zookeeper/21zoo-service.yml deleted file mode 100644 index 53beaeb7..00000000 --- a/zookeeper/21zoo-service.yml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: zoo - namespace: kafka -spec: - ports: - - port: 2888 - name: peer - - port: 3888 - name: leader-election - clusterIP: None - selector: - app: zookeeper - storage: persistent-regional diff --git a/zookeeper/51zoo.yml b/zookeeper/51zoo.yml deleted file mode 100644 index 52e930f7..00000000 --- a/zookeeper/51zoo.yml +++ /dev/null @@ -1,102 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: zoo - namespace: kafka -spec: - selector: - matchLabels: - app: zookeeper - storage: persistent-regional - serviceName: "zoo" - replicas: 2 - updateStrategy: - type: RollingUpdate - podManagementPolicy: Parallel - template: - metadata: - labels: - app: zookeeper - storage: persistent-regional - annotations: - spec: - terminationGracePeriodSeconds: 10 - initContainers: - - name: init-config - image: solsson/kafka:initutils@sha256:8988aca5b34feabe8d7d4e368f74b2ede398f692c7e99a38b262a938d475812c - command: ['/bin/bash', '/etc/kafka-configmap/init.sh'] - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: ID_OFFSET - value: "4" - volumeMounts: - - name: configmap - mountPath: /etc/kafka-configmap - - name: config - mountPath: /etc/kafka - - name: data - mountPath: /var/lib/zookeeper - containers: - - name: zookeeper - image: solsson/kafka:2.5.1@sha256:5c52620bd8e1bcd47805eb8ca285843168e1684aa27f1ae11ce330c3e12f6b0c - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: KAFKA_LOG4J_OPTS - value: -Dlog4j.configuration=file:/etc/kafka/log4j.properties - command: - - ./bin/zookeeper-server-start.sh - - /etc/kafka/zookeeper.properties.scale-5.$(POD_NAME) - lifecycle: - preStop: - exec: - command: ["sh", "-ce", "kill -s TERM 1; while $(kill -0 1 2>/dev/null); do sleep 1; done"] - ports: - - containerPort: 2181 - name: client - - containerPort: 2888 - name: peer - - containerPort: 3888 - name: leader-election - resources: - requests: - cpu: 10m - memory: 100Mi - limits: - memory: 120Mi - readinessProbe: - exec: - command: - - /bin/sh - - -c - - '[ "imok" = "$(echo ruok | nc -w 1 -q 1 127.0.0.1 2181)" ]' - timeoutSeconds: 2 - periodSeconds: 30 - volumeMounts: - - name: config - mountPath: /etc/kafka - - name: data - mountPath: /var/lib/zookeeper - volumes: - - name: configmap - configMap: - name: zookeeper-config - - name: config - emptyDir: {} - volumeClaimTemplates: - - metadata: - name: data - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 1Gi diff --git a/zookeeper/Kustomization b/zookeeper/Kustomization index 978b228a..7a8093d2 100644 --- a/zookeeper/Kustomization +++ b/zookeeper/Kustomization @@ -1,7 +1,5 @@ resources: -- 10zookeeper-config.yml -- 20pzoo-service.yml -- 21zoo-service.yml -- 30service.yml -- 50pzoo.yml -- 51zoo.yml +- zoo-config.yml +- pzoo-svc.yml +- zoo-headless.yml +- pzoo.yml diff --git a/zookeeper/20pzoo-service.yml b/zookeeper/pzoo-svc.yml similarity index 100% rename from zookeeper/20pzoo-service.yml rename to zookeeper/pzoo-svc.yml diff --git a/zookeeper/50pzoo.yml b/zookeeper/pzoo.yml similarity index 99% rename from zookeeper/50pzoo.yml rename to zookeeper/pzoo.yml index cdaae36b..e79a973e 100644 --- a/zookeeper/50pzoo.yml +++ b/zookeeper/pzoo.yml @@ -9,7 +9,7 @@ spec: app: zookeeper storage: persistent serviceName: "pzoo" - replicas: 3 + replicas: 5 updateStrategy: type: RollingUpdate podManagementPolicy: Parallel diff --git a/zookeeper/10zookeeper-config.yml b/zookeeper/zoo-config.yml similarity index 86% rename from zookeeper/10zookeeper-config.yml rename to zookeeper/zoo-config.yml index 57728474..a48eb1d6 100644 --- a/zookeeper/10zookeeper-config.yml +++ b/zookeeper/zoo-config.yml @@ -19,15 +19,15 @@ data: for N in $(seq $PZOO_REPLICAS); do echo "server.$N=pzoo-$(( $N - 1 )).pzoo.$POD_NAMESPACE.svc.cluster.local:2888:3888:participant" >> /etc/kafka/zookeeper.properties; done for N in $(seq $(( $REPLICAS - $PZOO_REPLICAS ))); do echo "server.$(( $PZOO_REPLICAS + $N ))=zoo-$(( $N - 1 )).zoo.$POD_NAMESPACE.svc.cluster.local:2888:3888:participant" >> /etc/kafka/zookeeper.properties; done } - ln -s /etc/kafka/zookeeper.properties /etc/kafka/zookeeper.properties.scale-$REPLICAS.$POD_NAME + ln -s /etc/kafka/zookeeper.properties /etc/kafka/zookeeper.properties.scale-5.$POD_NAME zookeeper.properties: | - 4lw.commands.whitelist=ruok + 4lw.commands.whitelist=stat,ruok,conf,isro,mntr tickTime=2000 dataDir=/var/lib/zookeeper/data dataLogDir=/var/lib/zookeeper/log clientPort=2181 - maxClientCnxns=3 + maxClientCnxns=0 initLimit=5 syncLimit=2 tcpKeepAlive=true @@ -35,8 +35,11 @@ data: server.1=pzoo-0.pzoo:2888:3888:participant server.2=pzoo-1.pzoo:2888:3888:participant server.3=pzoo-2.pzoo:2888:3888:participant - server.4=zoo-0.zoo:2888:3888:participant - server.5=zoo-1.zoo:2888:3888:participant + server.4=pzoo-3.pzoo:2888:3888:participant + server.5=pzoo-4.pzoo:2888:3888:participant + admin.enableServer=false + autopurge.snapRetainCount=3 + autopurge.purgeInterval=24 log4j.properties: |- log4j.rootLogger=INFO, stdout diff --git a/zookeeper/30service.yml b/zookeeper/zoo-headless.yml similarity index 100% rename from zookeeper/30service.yml rename to zookeeper/zoo-headless.yml