diff --git a/changelog/17.0/17.0.0/summary.md b/changelog/17.0/17.0.0/summary.md index 01bde438688..b48bb8bb0e1 100644 --- a/changelog/17.0/17.0.0/summary.md +++ b/changelog/17.0/17.0.0/summary.md @@ -10,6 +10,7 @@ - **[New stats](#new-stats)** - [Detailed backup and restore stats](#detailed-backup-and-restore-stats) - [VTtablet Error count with code ](#vttablet-error-count-with-code) + - [VReplication stream status for Prometheus](#vreplication-stream-status-for-prometheus) - **[Deprecations and Deletions](#deprecations-and-deletions)** - [Deprecated Stats](#deprecated-stats) - **[VTTablet](#vttablet)** @@ -194,6 +195,24 @@ Some notes to help understand these metrics: We are introducing new error counter `QueryErrorCountsWithCode` for VTTablet. It is similar to existing [QueryErrorCounts](https://github.com/vitessio/vitess/blob/main/go/vt/vttablet/tabletserver/query_engine.go#L174) except it contains errorCode as additional dimension. We will deprecate `QueryErrorCounts` in v18. +#### VReplication stream status for Prometheus + +VReplication publishes the `VReplicationStreamState` status which reports the state of VReplication streams. For example, here's what it looks like in the local cluster example after the MoveTables step: + +``` +"VReplicationStreamState": { + "commerce2customer.1": "Running" +} +``` + +Prior to v17, this data was not available via the Prometheus backend. In v17, workflow states are also published as a Prometheus gauge with a `state` label and a value of `1.0`. For example: + +``` +# HELP vttablet_v_replication_stream_state State of vreplication workflow +# TYPE vttablet_v_replication_stream_state gauge +vttablet_v_replication_stream_state{counts="1",state="Running",workflow="commerce2customer"} 1 +``` + ## Deprecations and Deletions * The deprecated `automation` and `automationservice` protobuf definitions and associated client and server packages have been removed. diff --git a/go/stats/export.go b/go/stats/export.go index 0a335517a14..e98ef0a969c 100644 --- a/go/stats/export.go +++ b/go/stats/export.go @@ -121,6 +121,60 @@ func Publish(name string, v expvar.Var) { publish(name, v) } +// StringMapFuncWithMultiLabels is a multidimensional string map publisher. +// +// Map keys are compound names made with joining multiple strings with '.', +// and are named by corresponding key labels. +// +// Map values are any string, and are named by the value label. +// +// Since the map is returned by the function, we assume it's in the right +// format (meaning each key is of the form 'aaa.bbb.ccc' with as many elements +// as there are in Labels). +// +// Backends which need to provide a numeric value can set a constant value of 1 +// (or whatever is appropriate for the backend) for each key-value pair present +// in the map. +type StringMapFuncWithMultiLabels struct { + StringMapFunc + help string + keyLabels []string + valueLabel string +} + +// Help returns the descriptive help message. +func (s StringMapFuncWithMultiLabels) Help() string { + return s.help +} + +// KeyLabels returns the list of key labels. +func (s StringMapFuncWithMultiLabels) KeyLabels() []string { + return s.keyLabels +} + +// ValueLabel returns the value label. +func (s StringMapFuncWithMultiLabels) ValueLabel() string { + return s.valueLabel +} + +// NewStringMapFuncWithMultiLabels creates a new StringMapFuncWithMultiLabels, +// mapping to the provided function. The key labels correspond with components +// of map keys. The value label names the map values. +func NewStringMapFuncWithMultiLabels(name, help string, keyLabels []string, valueLabel string, f func() map[string]string) *StringMapFuncWithMultiLabels { + t := &StringMapFuncWithMultiLabels{ + StringMapFunc: StringMapFunc(f), + help: help, + keyLabels: keyLabels, + valueLabel: valueLabel, + } + + if name != "" { + publish(name, t) + } + + return t +} + func publish(name string, v expvar.Var) { defaultVarGroup.publish(name, v) } diff --git a/go/stats/export_test.go b/go/stats/export_test.go index 8f788090f59..faf4f4b5d80 100644 --- a/go/stats/export_test.go +++ b/go/stats/export_test.go @@ -20,6 +20,8 @@ import ( "expvar" "reflect" "testing" + + "github.com/stretchr/testify/require" ) func clear() { @@ -157,3 +159,33 @@ func TestParseCommonTags(t *testing.T) { t.Errorf("expected %v, got %v", expected2, res) } } + +func TestStringMapWithMultiLabels(t *testing.T) { + clear() + c := NewStringMapFuncWithMultiLabels("stringMap1", "help", []string{"aaa", "bbb"}, "ccc", func() map[string]string { + m := make(map[string]string) + m["c1a.c1b"] = "1" + m["c2a.c2b"] = "1" + return m + }) + + want1 := `{"c1a.c1b": "1", "c2a.c2b": "1"}` + want2 := `{"c2a.c2b": "1", "c1a.c1b": "1"}` + if s := c.String(); s != want1 && s != want2 { + t.Errorf("want %s or %s, got %s", want1, want2, s) + } + + m := c.StringMapFunc() + require.Len(t, m, 2) + require.Contains(t, m, "c1a.c1b") + require.Equal(t, m["c1a.c1b"], "1") + require.Contains(t, m, "c2a.c2b") + require.Equal(t, m["c2a.c2b"], "1") + + keyLabels := c.KeyLabels() + require.Len(t, keyLabels, 2) + require.Equal(t, keyLabels[0], "aaa") + require.Equal(t, keyLabels[1], "bbb") + + require.Equal(t, c.ValueLabel(), "ccc") +} diff --git a/go/stats/prometheusbackend/collectors.go b/go/stats/prometheusbackend/collectors.go index a1126c0d211..7469167cf74 100644 --- a/go/stats/prometheusbackend/collectors.go +++ b/go/stats/prometheusbackend/collectors.go @@ -395,3 +395,39 @@ func (c *histogramCollector) Collect(ch chan<- prometheus.Metric) { ch <- metric } } + +type stringMapFuncWithMultiLabelsCollector struct { + smf *stats.StringMapFuncWithMultiLabels + desc *prometheus.Desc +} + +func newStringMapFuncWithMultiLabelsCollector(smf *stats.StringMapFuncWithMultiLabels, name string) { + c := &stringMapFuncWithMultiLabelsCollector{ + smf: smf, + desc: prometheus.NewDesc( + name, + smf.Help(), + labelsToSnake(append(smf.KeyLabels(), smf.ValueLabel())), + nil), + } + + prometheus.MustRegister(c) +} + +// Describe implements Collector. +func (c *stringMapFuncWithMultiLabelsCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.desc +} + +// Collect implements Collector. +func (c *stringMapFuncWithMultiLabelsCollector) Collect(ch chan<- prometheus.Metric) { + for lvs, val := range c.smf.StringMapFunc() { + labelValues := append(strings.Split(lvs, "."), val) + metric, err := prometheus.NewConstMetric(c.desc, prometheus.GaugeValue, 1.0, labelValues...) + if err != nil { + log.Errorf("Error adding metric: %s", c.desc) + } else { + ch <- metric + } + } +} diff --git a/go/stats/prometheusbackend/prometheusbackend.go b/go/stats/prometheusbackend/prometheusbackend.go index c3e76797b40..2ae88ebb665 100644 --- a/go/stats/prometheusbackend/prometheusbackend.go +++ b/go/stats/prometheusbackend/prometheusbackend.go @@ -85,6 +85,8 @@ func (be PromBackend) publishPrometheusMetric(name string, v expvar.Var) { newMultiTimingsCollector(st, be.buildPromName(name)) case *stats.Histogram: newHistogramCollector(st, be.buildPromName(name)) + case *stats.StringMapFuncWithMultiLabels: + newStringMapFuncWithMultiLabelsCollector(st, be.buildPromName(name)) case *stats.String, stats.StringFunc, stats.StringMapFunc, *stats.Rates, *stats.RatesFunc: // Silently ignore these types since they don't make sense to // export to Prometheus' data model. diff --git a/go/stats/prometheusbackend/prometheusbackend_test.go b/go/stats/prometheusbackend/prometheusbackend_test.go index 888dd630941..594265153f7 100644 --- a/go/stats/prometheusbackend/prometheusbackend_test.go +++ b/go/stats/prometheusbackend/prometheusbackend_test.go @@ -240,10 +240,33 @@ func TestPrometheusCountersFuncWithMultiLabels(t *testing.T) { checkHandlerForMetricWithMultiLabels(t, name, labels, []string{"bar", "baz"}, 1) } +func TestPrometheusStringMapFuncWithMultiLabels(t *testing.T) { + name := "blah_stringmapfuncwithmultilabels" + keyLabels := []string{"label1", "label2"} + valueLabel := "label3" + + stats.NewStringMapFuncWithMultiLabels(name, "help", keyLabels, valueLabel, func() map[string]string { + m := make(map[string]string) + m["foo.bar"] = "hello" + m["bar.baz"] = "world" + return m + }) + + allLabels := append(keyLabels, valueLabel) + + checkHandlerForMetricWithMultiLabels(t, name, allLabels, []string{"foo", "bar", "hello"}, 1) + checkHandlerForMetricWithMultiLabels(t, name, allLabels, []string{"bar", "baz", "world"}, 1) +} + func checkHandlerForMetricWithMultiLabels(t *testing.T, metric string, labels []string, labelValues []string, value int64) { response := testMetricsHandler(t) - expected := fmt.Sprintf("%s_%s{%s=\"%s\",%s=\"%s\"} %d", namespace, metric, labels[0], labelValues[0], labels[1], labelValues[1], value) + kvPairs := make([]string, 0) + for i := 0; i < len(labels); i++ { + kvPairs = append(kvPairs, fmt.Sprintf("%s=\"%s\"", labels[i], labelValues[i])) + } + + expected := fmt.Sprintf("%s_%s{%s} %d", namespace, metric, strings.Join(kvPairs, ","), value) if !strings.Contains(response.Body.String(), expected) { t.Fatalf("Expected %s got %s", expected, response.Body.String()) diff --git a/go/vt/vttablet/tabletmanager/vreplication/stats.go b/go/vt/vttablet/tabletmanager/vreplication/stats.go index 5b3f55a60f5..58c958ce356 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/stats.go +++ b/go/vt/vttablet/tabletmanager/vreplication/stats.go @@ -62,18 +62,24 @@ type vrStats struct { func (st *vrStats) register() { stats.NewGaugeFunc("VReplicationStreamCount", "Number of vreplication streams", st.numControllers) stats.NewGaugeFunc("VReplicationLagSecondsMax", "Max vreplication seconds behind primary", st.maxReplicationLagSeconds) - stats.Publish("VReplicationStreamState", stats.StringMapFunc(func() map[string]string { - st.mu.Lock() - defer st.mu.Unlock() - result := make(map[string]string, len(st.controllers)) - for _, ct := range st.controllers { - state := ct.blpStats.State.Load() - if state != nil { - result[ct.workflow+"."+fmt.Sprintf("%v", ct.id)] = state.(string) + stats.NewStringMapFuncWithMultiLabels( + "VReplicationStreamState", + "State of vreplication workflow", + []string{"workflow", "counts"}, + "state", + func() map[string]string { + st.mu.Lock() + defer st.mu.Unlock() + result := make(map[string]string, len(st.controllers)) + for _, ct := range st.controllers { + state := ct.blpStats.State.Load() + if state != nil { + result[ct.workflow+"."+fmt.Sprintf("%v", ct.id)] = state.(string) + } } - } - return result - })) + return result + }, + ) stats.NewGaugesFuncWithMultiLabels( "VReplicationLagSeconds", "vreplication seconds behind primary per stream",