Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docker/k8s/pmm-client/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM vitess/base AS base

FROM debian:stretch-slim

# Copy CA certs for https calls
COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt

RUN apt-get update && \
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should put all these RUN commands into script file, and just COPY and RUN that. Currently since these are separate layers, we're still paying for the storage of all the removed files.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I can do that

apt-get upgrade -qq && \
apt-get install wget -qq --no-install-recommends && \
wget https://www.percona.com/redir/downloads/pmm-client/1.6.1/binary/debian/stretch/x86_64/pmm-client_1.6.1-1.stretch_amd64.deb && \
dpkg -i pmm-client_1.6.1-1.stretch_amd64.deb && \
rm pmm-client_1.6.1-1.stretch_amd64.deb && \
apt-get purge wget -qq && \
apt-get autoremove -qq && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
33 changes: 33 additions & 0 deletions helm/vitess/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,36 @@ vttablet:
requests:
storage: "100Gi"
```

### Enable PMM (Percona Monitoring and Management)

```
topology:
cells:
...

pmm:
enabled: true
pmmTag: "1.6.1"
client:
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
server:
resources:
limits:
cpu: 2
memory: 4Gi
dataVolumeClaimSpec:
storageClassName: "default"
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: "150Gi"
env:
metricsMemory: "3000000"
```
2 changes: 1 addition & 1 deletion helm/vitess/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ Then use the following URLs:

vtctld: {{$proxyURL}}/services/vtctld:web/app/
vtgate: {{$proxyURL}}/services/vtgate-{{$cell}}:web/

{{ if $.Values.pmm.enabled }} pmm: {{$proxyURL}}/services/pmm:web/{{ end }}
1 change: 0 additions & 1 deletion helm/vitess/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ x{{$replaced_label}}
securityContext:
runAsUser: 1000
fsGroup: 2000
runAsNonRoot: true
{{- end -}}

#############################
Expand Down
186 changes: 186 additions & 0 deletions helm/vitess/templates/_pmm.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
###################################
# pmm Service + Deployment
###################################
{{- define "pmm" -}}
# set tuple values to more recognizable variables
{{- $pmm := index . 0 -}}
{{- $namespace := index . 1 -}}

###################################
# pmm Service
###################################
kind: Service
apiVersion: v1
metadata:
name: pmm
labels:
component: pmm
app: vitess
spec:
ports:
- name: web
port: 80

selector:
component: pmm
app: vitess
type: ClusterIP
---
###################################
# pmm StatefulSet
###################################
apiVersion: apps/v1beta1
kind: StatefulSet
metadata:
name: pmm
spec:
serviceName: pmm
replicas: 1
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
app: vitess
component: pmm
template:
metadata:
labels:
app: vitess
component: pmm
spec:
containers:
- name: pmm
image: "percona/pmm-server:{{ $pmm.pmmTag }}"

ports:
- name: web
containerPort: 80

volumeMounts:
- name: pmmdata
mountPath: /pmmdata

resources:
{{ toYaml $pmm.server.resources | indent 12 }}

env:
- name: DISABLE_UPDATES
value: "true"

- name: DISABLE_TELEMETRY
value: {{ $pmm.server.env.disableTelemetry | quote }}

- name: METRICS_RESOLUTION
value: {{ $pmm.server.env.metricsResolution | quote }}

- name: METRICS_RETENTION
value: {{ $pmm.server.env.metricsRetention | quote }}

- name: QUERIES_RETENTION
value: {{ $pmm.server.env.queriesRetention | quote }}

- name: METRICS_MEMORY
value: {{ $pmm.server.env.metricsMemory | quote }}

command: ["bash"]
args:
- "-c"
- |
set -ex


if [ ! -f /pmmdata/vitess-init ]; then
# the PV hasn't been initialized, so copy over default
# pmm-server directories before symlinking
mkdir -p /pmmdata

mv /opt/prometheus/data /pmmdata/data
mv /opt/consul-data /pmmdata
mv /var/lib/mysql /pmmdata
mv /var/lib/grafana /pmmdata

# initialize the PV and then mark it complete
touch /pmmdata/vitess-init
else
# remove the default directories so we can symlink the
# existing PV directories
rm -Rf /opt/prometheus/data
rm -Rf /opt/consul-data
rm -Rf /var/lib/mysql
rm -Rf /var/lib/grafana
fi

# symlink pmm-server paths to point to our PV
ln -s /pmmdata/data /opt/prometheus/
ln -s /pmmdata/consul-data /opt/
ln -s /pmmdata/mysql /var/lib/
ln -s /pmmdata/grafana /var/lib/

/opt/entrypoint.sh

volumeClaimTemplates:
- metadata:
name: pmmdata
annotations:
{{ toYaml $pmm.server.dataVolumeClaimAnnotations | indent 10 }}
spec:
{{ toYaml $pmm.server.dataVolumeClaimSpec | indent 8 }}

{{- end -}}

###################################
# sidecar container running pmm-client
###################################
{{- define "cont-pmm-client" -}}
{{- $pmm := index . 0 -}}
{{- $namespace := index . 1 -}}

- name: "pmm-client"
image: "vitess/pmm-client:{{ $pmm.pmmTag }}"
imagePullPolicy: IfNotPresent
volumeMounts:
- name: vtdataroot
mountPath: "/vtdataroot"
ports:
- containerPort: 42001
name: query-data
- containerPort: 42002
name: mysql-metrics

securityContext:
# PMM requires root privileges
runAsUser: 0

resources:
{{ toYaml $pmm.client.resources | indent 4 }}

command: ["bash"]
args:
- "-c"
- |
set -ex

mkdir -p /vtdataroot/pmm

# redirect logs to PV
ln -s /vtdataroot/pmm/pmm-mysql-metrics-42002.log /var/log/pmm-mysql-metrics-42002.log

# --force is used because the pod ip address may have changed
pmm-admin config --server pmm.{{ $namespace }} --force

# creates a systemd service
# TODO: remove "|| true" after https://jira.percona.com/projects/PMM/issues/PMM-1985 is resolved
pmm-admin add mysql:metrics --user root --socket /vtdataroot/tabletdata/mysql.sock --force || true

# keep the container alive but still responsive to stop requests
trap : TERM INT; sleep infinity & wait

- name: pmm-client-metrics-log
image: busybox
command: ["/bin/sh"]
args: ["-c", "tail -n+1 -F /vtdataroot/pmm/pmm-mysql-metrics-42002.log"]
volumeMounts:
- name: vtdataroot
mountPath: /vtdataroot

{{- end -}}
10 changes: 10 additions & 0 deletions helm/vitess/templates/_vttablet.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# vttablet Service
###################################
{{- define "vttablet-service" -}}
# set tuple values to more recognizable variables
{{- $pmm := index . 0 -}}
apiVersion: v1
kind: Service
metadata:
Expand All @@ -17,6 +19,12 @@ spec:
name: web
- port: 16002
name: grpc
{{ if $pmm.enabled }}
- port: 42001
name: query-data
- port: 42002
name: mysql-metrics
{{ end }}
clusterIP: None
selector:
app: vitess
Expand All @@ -37,6 +45,7 @@ spec:
{{- $defaultVttablet := index . 5 -}}
{{- $namespace := index . 6 -}}
{{- $config := index . 7 -}}
{{- $pmm := index . 8 -}}

# sanitize inputs to create tablet name
{{- $cellClean := include "clean-label" $cell.name -}}
Expand Down Expand Up @@ -96,6 +105,7 @@ spec:
{{ include "cont-vttablet" (tuple $topology $cell $keyspace $shard $tablet $defaultVttablet $vitessTag $uid $namespace $config) | indent 8 }}
{{ include "cont-mysql-errorlog" . | indent 8 }}
{{ include "cont-mysql-slowlog" . | indent 8 }}
{{ if $pmm.enabled }}{{ include "cont-pmm-client" (tuple $pmm $namespace) | indent 8 }}{{ end }}

volumes:
- name: vt
Expand Down
10 changes: 8 additions & 2 deletions helm/vitess/templates/vitess.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Create global resources.
---
# create a single vttablet service
{{ include "vttablet-service" $ }}
{{ include "vttablet-service" (tuple $.Values.pmm) }}
---
{{ if $.Values.pmm.enabled }}
# create the pmm service and stateful set
{{ include "pmm" (tuple $.Values.pmm $.Release.Namespace) }}
---
{{ end }}

# create an etcd cluster for the global topology
{{- $replicas := $.Values.topology.globalCell.replicas | default $.Values.etcd.replicas -}}
{{- $version := $.Values.topology.globalCell.version | default $.Values.etcd.version -}}
Expand Down Expand Up @@ -33,7 +39,7 @@
{{ range $shard := $keyspace.shards }}
{{ range $tablet := $shard.tablets }}
---
{{ include "vttablet" (tuple $.Values.topology $cell $keyspace $shard $tablet $.Values.vttablet $.Release.Namespace $.Values.config) }}
{{ include "vttablet" (tuple $.Values.topology $cell $keyspace $shard $tablet $.Values.vttablet $.Release.Namespace $.Values.config $.Values.pmm) }}
{{ end }} # range $tablet
{{ end }} # range $shard
{{ end }} # range $keyspace
Expand Down
55 changes: 55 additions & 0 deletions helm/vitess/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,58 @@ vttablet:
resources:
requests:
storage: "10Gi"

# Default values for pmm
pmm:
enabled: false
pmmTag: "1.6.1"
client:
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
server:
resources:
limits:
cpu: 500m
memory: 1Gi
# PVC for pmm
dataVolumeClaimAnnotations:
dataVolumeClaimSpec:
# storageClassName: "pd-ssd"
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: "10Gi"
env:
# DISABLE_TELEMETRY
# With telemetry enabled, your PMM Server sends some statistics to v.percona.com every 24 hours
disableTelemetry: true

# METRICS_RESOLUTION (Option)
# This option sets the minimum resolution for checking metrics. You should set it if the latency is higher than 1 second
metricsResolution: 1s

# METRICS_RETENTION (Option)
# This option determines how long metrics are stored at PMM Server.
# The value is passed as a combination of hours, minutes, and seconds, such as 720h0m0s.
# The minutes (a number followed by m) and seconds (a number followed by s) are optional.
metricsRetention: 720h

# QUERIES_RETENTION
# This option determines how many days queries are stored at PMM Server
queriesRetention: 8

# METRICS_MEMORY (Option) -- TODO: automatically calculate based on resource limits
# NOTE: The value must be passed in kilobytes
# NOTE: Make sure to quote this value so it isn't converted into scientific notation

# By default, Prometheus in PMM Server uses up to 768 MB of memory for storing the most recently used data chunks.
# Depending on the amount of data coming into Prometheus, you may require a higher limit to avoid throttling data ingestion,
# or allow less memory consumption if it is needed for other processes.
# The limit affects only memory reserved for data chunks. Actual RAM usage by Prometheus is higher.
# It is recommended to set this limit to roughly 2/3 of the total memory that you are planning to allow for Prometheus.
metricsMemory: "600000"