diff --git a/docker/k8s/pmm-client/Dockerfile b/docker/k8s/pmm-client/Dockerfile new file mode 100644 index 00000000000..1c026865bb8 --- /dev/null +++ b/docker/k8s/pmm-client/Dockerfile @@ -0,0 +1,17 @@ +FROM vitess/base AS base + +FROM debian:stretch-slim + +# Copy CA certs for https calls +COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt + +RUN apt-get update && \ + apt-get upgrade -qq && \ + apt-get install wget -qq --no-install-recommends && \ + wget https://www.percona.com/redir/downloads/pmm-client/1.6.1/binary/debian/stretch/x86_64/pmm-client_1.6.1-1.stretch_amd64.deb && \ + dpkg -i pmm-client_1.6.1-1.stretch_amd64.deb && \ + rm pmm-client_1.6.1-1.stretch_amd64.deb && \ + apt-get purge wget -qq && \ + apt-get autoremove -qq && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/helm/vitess/README.md b/helm/vitess/README.md index 018506672e2..fd70314c4c4 100644 --- a/helm/vitess/README.md +++ b/helm/vitess/README.md @@ -217,3 +217,36 @@ vttablet: requests: storage: "100Gi" ``` + +### Enable PMM (Percona Monitoring and Management) + +``` +topology: + cells: + ... + +pmm: + enabled: true + pmmTag: "1.6.1" + client: + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + server: + resources: + limits: + cpu: 2 + memory: 4Gi + dataVolumeClaimSpec: + storageClassName: "default" + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: "150Gi" + env: + metricsMemory: "3000000" +``` \ No newline at end of file diff --git a/helm/vitess/templates/NOTES.txt b/helm/vitess/templates/NOTES.txt index a59ef214f5d..19e3a52941a 100644 --- a/helm/vitess/templates/NOTES.txt +++ b/helm/vitess/templates/NOTES.txt @@ -10,4 +10,4 @@ Then use the following URLs: vtctld: {{$proxyURL}}/services/vtctld:web/app/ vtgate: {{$proxyURL}}/services/vtgate-{{$cell}}:web/ - +{{ if $.Values.pmm.enabled }} pmm: {{$proxyURL}}/services/pmm:web/{{ end }} \ No newline at end of file diff --git a/helm/vitess/templates/_helpers.tpl b/helm/vitess/templates/_helpers.tpl index b6957755df7..29d6fab2409 100644 --- a/helm/vitess/templates/_helpers.tpl +++ b/helm/vitess/templates/_helpers.tpl @@ -57,7 +57,6 @@ x{{$replaced_label}} securityContext: runAsUser: 1000 fsGroup: 2000 - runAsNonRoot: true {{- end -}} ############################# diff --git a/helm/vitess/templates/_pmm.tpl b/helm/vitess/templates/_pmm.tpl new file mode 100644 index 00000000000..ac24260a0ab --- /dev/null +++ b/helm/vitess/templates/_pmm.tpl @@ -0,0 +1,186 @@ +################################### +# pmm Service + Deployment +################################### +{{- define "pmm" -}} +# set tuple values to more recognizable variables +{{- $pmm := index . 0 -}} +{{- $namespace := index . 1 -}} + +################################### +# pmm Service +################################### +kind: Service +apiVersion: v1 +metadata: + name: pmm + labels: + component: pmm + app: vitess +spec: + ports: + - name: web + port: 80 + + selector: + component: pmm + app: vitess + type: ClusterIP +--- +################################### +# pmm StatefulSet +################################### +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: pmm +spec: + serviceName: pmm + replicas: 1 + updateStrategy: + type: RollingUpdate + selector: + matchLabels: + app: vitess + component: pmm + template: + metadata: + labels: + app: vitess + component: pmm + spec: + containers: + - name: pmm + image: "percona/pmm-server:{{ $pmm.pmmTag }}" + + ports: + - name: web + containerPort: 80 + + volumeMounts: + - name: pmmdata + mountPath: /pmmdata + + resources: +{{ toYaml $pmm.server.resources | indent 12 }} + + env: + - name: DISABLE_UPDATES + value: "true" + + - name: DISABLE_TELEMETRY + value: {{ $pmm.server.env.disableTelemetry | quote }} + + - name: METRICS_RESOLUTION + value: {{ $pmm.server.env.metricsResolution | quote }} + + - name: METRICS_RETENTION + value: {{ $pmm.server.env.metricsRetention | quote }} + + - name: QUERIES_RETENTION + value: {{ $pmm.server.env.queriesRetention | quote }} + + - name: METRICS_MEMORY + value: {{ $pmm.server.env.metricsMemory | quote }} + + command: ["bash"] + args: + - "-c" + - | + set -ex + + + if [ ! -f /pmmdata/vitess-init ]; then + # the PV hasn't been initialized, so copy over default + # pmm-server directories before symlinking + mkdir -p /pmmdata + + mv /opt/prometheus/data /pmmdata/data + mv /opt/consul-data /pmmdata + mv /var/lib/mysql /pmmdata + mv /var/lib/grafana /pmmdata + + # initialize the PV and then mark it complete + touch /pmmdata/vitess-init + else + # remove the default directories so we can symlink the + # existing PV directories + rm -Rf /opt/prometheus/data + rm -Rf /opt/consul-data + rm -Rf /var/lib/mysql + rm -Rf /var/lib/grafana + fi + + # symlink pmm-server paths to point to our PV + ln -s /pmmdata/data /opt/prometheus/ + ln -s /pmmdata/consul-data /opt/ + ln -s /pmmdata/mysql /var/lib/ + ln -s /pmmdata/grafana /var/lib/ + + /opt/entrypoint.sh + + volumeClaimTemplates: + - metadata: + name: pmmdata + annotations: +{{ toYaml $pmm.server.dataVolumeClaimAnnotations | indent 10 }} + spec: +{{ toYaml $pmm.server.dataVolumeClaimSpec | indent 8 }} + +{{- end -}} + +################################### +# sidecar container running pmm-client +################################### +{{- define "cont-pmm-client" -}} +{{- $pmm := index . 0 -}} +{{- $namespace := index . 1 -}} + +- name: "pmm-client" + image: "vitess/pmm-client:{{ $pmm.pmmTag }}" + imagePullPolicy: IfNotPresent + volumeMounts: + - name: vtdataroot + mountPath: "/vtdataroot" + ports: + - containerPort: 42001 + name: query-data + - containerPort: 42002 + name: mysql-metrics + + securityContext: + # PMM requires root privileges + runAsUser: 0 + + resources: +{{ toYaml $pmm.client.resources | indent 4 }} + + command: ["bash"] + args: + - "-c" + - | + set -ex + + mkdir -p /vtdataroot/pmm + + # redirect logs to PV + ln -s /vtdataroot/pmm/pmm-mysql-metrics-42002.log /var/log/pmm-mysql-metrics-42002.log + + # --force is used because the pod ip address may have changed + pmm-admin config --server pmm.{{ $namespace }} --force + + # creates a systemd service + # TODO: remove "|| true" after https://jira.percona.com/projects/PMM/issues/PMM-1985 is resolved + pmm-admin add mysql:metrics --user root --socket /vtdataroot/tabletdata/mysql.sock --force || true + + # keep the container alive but still responsive to stop requests + trap : TERM INT; sleep infinity & wait + +- name: pmm-client-metrics-log + image: busybox + command: ["/bin/sh"] + args: ["-c", "tail -n+1 -F /vtdataroot/pmm/pmm-mysql-metrics-42002.log"] + volumeMounts: + - name: vtdataroot + mountPath: /vtdataroot + +{{- end -}} \ No newline at end of file diff --git a/helm/vitess/templates/_vttablet.tpl b/helm/vitess/templates/_vttablet.tpl index 259f5d32837..2cb16cedff3 100644 --- a/helm/vitess/templates/_vttablet.tpl +++ b/helm/vitess/templates/_vttablet.tpl @@ -2,6 +2,8 @@ # vttablet Service ################################### {{- define "vttablet-service" -}} +# set tuple values to more recognizable variables +{{- $pmm := index . 0 -}} apiVersion: v1 kind: Service metadata: @@ -17,6 +19,12 @@ spec: name: web - port: 16002 name: grpc +{{ if $pmm.enabled }} + - port: 42001 + name: query-data + - port: 42002 + name: mysql-metrics +{{ end }} clusterIP: None selector: app: vitess @@ -37,6 +45,7 @@ spec: {{- $defaultVttablet := index . 5 -}} {{- $namespace := index . 6 -}} {{- $config := index . 7 -}} +{{- $pmm := index . 8 -}} # sanitize inputs to create tablet name {{- $cellClean := include "clean-label" $cell.name -}} @@ -96,6 +105,7 @@ spec: {{ include "cont-vttablet" (tuple $topology $cell $keyspace $shard $tablet $defaultVttablet $vitessTag $uid $namespace $config) | indent 8 }} {{ include "cont-mysql-errorlog" . | indent 8 }} {{ include "cont-mysql-slowlog" . | indent 8 }} +{{ if $pmm.enabled }}{{ include "cont-pmm-client" (tuple $pmm $namespace) | indent 8 }}{{ end }} volumes: - name: vt diff --git a/helm/vitess/templates/vitess.yaml b/helm/vitess/templates/vitess.yaml index bb1de5de088..2e5dcdfa5a3 100644 --- a/helm/vitess/templates/vitess.yaml +++ b/helm/vitess/templates/vitess.yaml @@ -1,8 +1,14 @@ # Create global resources. --- # create a single vttablet service -{{ include "vttablet-service" $ }} +{{ include "vttablet-service" (tuple $.Values.pmm) }} --- +{{ if $.Values.pmm.enabled }} +# create the pmm service and stateful set +{{ include "pmm" (tuple $.Values.pmm $.Release.Namespace) }} +--- +{{ end }} + # create an etcd cluster for the global topology {{- $replicas := $.Values.topology.globalCell.replicas | default $.Values.etcd.replicas -}} {{- $version := $.Values.topology.globalCell.version | default $.Values.etcd.version -}} @@ -33,7 +39,7 @@ {{ range $shard := $keyspace.shards }} {{ range $tablet := $shard.tablets }} --- -{{ include "vttablet" (tuple $.Values.topology $cell $keyspace $shard $tablet $.Values.vttablet $.Release.Namespace $.Values.config) }} +{{ include "vttablet" (tuple $.Values.topology $cell $keyspace $shard $tablet $.Values.vttablet $.Release.Namespace $.Values.config $.Values.pmm) }} {{ end }} # range $tablet {{ end }} # range $shard {{ end }} # range $keyspace diff --git a/helm/vitess/values.yaml b/helm/vitess/values.yaml index 1740c0e412f..747ddd0709f 100644 --- a/helm/vitess/values.yaml +++ b/helm/vitess/values.yaml @@ -152,3 +152,58 @@ vttablet: resources: requests: storage: "10Gi" + +# Default values for pmm +pmm: + enabled: false + pmmTag: "1.6.1" + client: + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 200m + memory: 256Mi + server: + resources: + limits: + cpu: 500m + memory: 1Gi + # PVC for pmm + dataVolumeClaimAnnotations: + dataVolumeClaimSpec: + # storageClassName: "pd-ssd" + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: "10Gi" + env: + # DISABLE_TELEMETRY + # With telemetry enabled, your PMM Server sends some statistics to v.percona.com every 24 hours + disableTelemetry: true + + # METRICS_RESOLUTION (Option) + # This option sets the minimum resolution for checking metrics. You should set it if the latency is higher than 1 second + metricsResolution: 1s + + # METRICS_RETENTION (Option) + # This option determines how long metrics are stored at PMM Server. + # The value is passed as a combination of hours, minutes, and seconds, such as 720h0m0s. + # The minutes (a number followed by m) and seconds (a number followed by s) are optional. + metricsRetention: 720h + + # QUERIES_RETENTION + # This option determines how many days queries are stored at PMM Server + queriesRetention: 8 + + # METRICS_MEMORY (Option) -- TODO: automatically calculate based on resource limits + # NOTE: The value must be passed in kilobytes + # NOTE: Make sure to quote this value so it isn't converted into scientific notation + + # By default, Prometheus in PMM Server uses up to 768 MB of memory for storing the most recently used data chunks. + # Depending on the amount of data coming into Prometheus, you may require a higher limit to avoid throttling data ingestion, + # or allow less memory consumption if it is needed for other processes. + # The limit affects only memory reserved for data chunks. Actual RAM usage by Prometheus is higher. + # It is recommended to set this limit to roughly 2/3 of the total memory that you are planning to allow for Prometheus. + metricsMemory: "600000"