diff --git a/changelog/17.0/17.0.0/summary.md b/changelog/17.0/17.0.0/summary.md
index 01bde438688..b48bb8bb0e1 100644
--- a/changelog/17.0/17.0.0/summary.md
+++ b/changelog/17.0/17.0.0/summary.md
@@ -10,6 +10,7 @@
- **[New stats](#new-stats)**
- [Detailed backup and restore stats](#detailed-backup-and-restore-stats)
- [VTtablet Error count with code ](#vttablet-error-count-with-code)
+ - [VReplication stream status for Prometheus](#vreplication-stream-status-for-prometheus)
- **[Deprecations and Deletions](#deprecations-and-deletions)**
- [Deprecated Stats](#deprecated-stats)
- **[VTTablet](#vttablet)**
@@ -194,6 +195,24 @@ Some notes to help understand these metrics:
We are introducing new error counter `QueryErrorCountsWithCode` for VTTablet. It is similar to existing [QueryErrorCounts](https://github.com/vitessio/vitess/blob/main/go/vt/vttablet/tabletserver/query_engine.go#L174) except it contains errorCode as additional dimension.
We will deprecate `QueryErrorCounts` in v18.
+#### VReplication stream status for Prometheus
+
+VReplication publishes the `VReplicationStreamState` status which reports the state of VReplication streams. For example, here's what it looks like in the local cluster example after the MoveTables step:
+
+```
+"VReplicationStreamState": {
+ "commerce2customer.1": "Running"
+}
+```
+
+Prior to v17, this data was not available via the Prometheus backend. In v17, workflow states are also published as a Prometheus gauge with a `state` label and a value of `1.0`. For example:
+
+```
+# HELP vttablet_v_replication_stream_state State of vreplication workflow
+# TYPE vttablet_v_replication_stream_state gauge
+vttablet_v_replication_stream_state{counts="1",state="Running",workflow="commerce2customer"} 1
+```
+
## Deprecations and Deletions
* The deprecated `automation` and `automationservice` protobuf definitions and associated client and server packages have been removed.
diff --git a/go/stats/export.go b/go/stats/export.go
index 0a335517a14..e98ef0a969c 100644
--- a/go/stats/export.go
+++ b/go/stats/export.go
@@ -121,6 +121,60 @@ func Publish(name string, v expvar.Var) {
publish(name, v)
}
+// StringMapFuncWithMultiLabels is a multidimensional string map publisher.
+//
+// Map keys are compound names made with joining multiple strings with '.',
+// and are named by corresponding key labels.
+//
+// Map values are any string, and are named by the value label.
+//
+// Since the map is returned by the function, we assume it's in the right
+// format (meaning each key is of the form 'aaa.bbb.ccc' with as many elements
+// as there are in Labels).
+//
+// Backends which need to provide a numeric value can set a constant value of 1
+// (or whatever is appropriate for the backend) for each key-value pair present
+// in the map.
+type StringMapFuncWithMultiLabels struct {
+ StringMapFunc
+ help string
+ keyLabels []string
+ valueLabel string
+}
+
+// Help returns the descriptive help message.
+func (s StringMapFuncWithMultiLabels) Help() string {
+ return s.help
+}
+
+// KeyLabels returns the list of key labels.
+func (s StringMapFuncWithMultiLabels) KeyLabels() []string {
+ return s.keyLabels
+}
+
+// ValueLabel returns the value label.
+func (s StringMapFuncWithMultiLabels) ValueLabel() string {
+ return s.valueLabel
+}
+
+// NewStringMapFuncWithMultiLabels creates a new StringMapFuncWithMultiLabels,
+// mapping to the provided function. The key labels correspond with components
+// of map keys. The value label names the map values.
+func NewStringMapFuncWithMultiLabels(name, help string, keyLabels []string, valueLabel string, f func() map[string]string) *StringMapFuncWithMultiLabels {
+ t := &StringMapFuncWithMultiLabels{
+ StringMapFunc: StringMapFunc(f),
+ help: help,
+ keyLabels: keyLabels,
+ valueLabel: valueLabel,
+ }
+
+ if name != "" {
+ publish(name, t)
+ }
+
+ return t
+}
+
func publish(name string, v expvar.Var) {
defaultVarGroup.publish(name, v)
}
diff --git a/go/stats/export_test.go b/go/stats/export_test.go
index 8f788090f59..faf4f4b5d80 100644
--- a/go/stats/export_test.go
+++ b/go/stats/export_test.go
@@ -20,6 +20,8 @@ import (
"expvar"
"reflect"
"testing"
+
+ "github.com/stretchr/testify/require"
)
func clear() {
@@ -157,3 +159,33 @@ func TestParseCommonTags(t *testing.T) {
t.Errorf("expected %v, got %v", expected2, res)
}
}
+
+func TestStringMapWithMultiLabels(t *testing.T) {
+ clear()
+ c := NewStringMapFuncWithMultiLabels("stringMap1", "help", []string{"aaa", "bbb"}, "ccc", func() map[string]string {
+ m := make(map[string]string)
+ m["c1a.c1b"] = "1"
+ m["c2a.c2b"] = "1"
+ return m
+ })
+
+ want1 := `{"c1a.c1b": "1", "c2a.c2b": "1"}`
+ want2 := `{"c2a.c2b": "1", "c1a.c1b": "1"}`
+ if s := c.String(); s != want1 && s != want2 {
+ t.Errorf("want %s or %s, got %s", want1, want2, s)
+ }
+
+ m := c.StringMapFunc()
+ require.Len(t, m, 2)
+ require.Contains(t, m, "c1a.c1b")
+ require.Equal(t, m["c1a.c1b"], "1")
+ require.Contains(t, m, "c2a.c2b")
+ require.Equal(t, m["c2a.c2b"], "1")
+
+ keyLabels := c.KeyLabels()
+ require.Len(t, keyLabels, 2)
+ require.Equal(t, keyLabels[0], "aaa")
+ require.Equal(t, keyLabels[1], "bbb")
+
+ require.Equal(t, c.ValueLabel(), "ccc")
+}
diff --git a/go/stats/prometheusbackend/collectors.go b/go/stats/prometheusbackend/collectors.go
index a1126c0d211..7469167cf74 100644
--- a/go/stats/prometheusbackend/collectors.go
+++ b/go/stats/prometheusbackend/collectors.go
@@ -395,3 +395,39 @@ func (c *histogramCollector) Collect(ch chan<- prometheus.Metric) {
ch <- metric
}
}
+
+type stringMapFuncWithMultiLabelsCollector struct {
+ smf *stats.StringMapFuncWithMultiLabels
+ desc *prometheus.Desc
+}
+
+func newStringMapFuncWithMultiLabelsCollector(smf *stats.StringMapFuncWithMultiLabels, name string) {
+ c := &stringMapFuncWithMultiLabelsCollector{
+ smf: smf,
+ desc: prometheus.NewDesc(
+ name,
+ smf.Help(),
+ labelsToSnake(append(smf.KeyLabels(), smf.ValueLabel())),
+ nil),
+ }
+
+ prometheus.MustRegister(c)
+}
+
+// Describe implements Collector.
+func (c *stringMapFuncWithMultiLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
+ ch <- c.desc
+}
+
+// Collect implements Collector.
+func (c *stringMapFuncWithMultiLabelsCollector) Collect(ch chan<- prometheus.Metric) {
+ for lvs, val := range c.smf.StringMapFunc() {
+ labelValues := append(strings.Split(lvs, "."), val)
+ metric, err := prometheus.NewConstMetric(c.desc, prometheus.GaugeValue, 1.0, labelValues...)
+ if err != nil {
+ log.Errorf("Error adding metric: %s", c.desc)
+ } else {
+ ch <- metric
+ }
+ }
+}
diff --git a/go/stats/prometheusbackend/prometheusbackend.go b/go/stats/prometheusbackend/prometheusbackend.go
index c3e76797b40..2ae88ebb665 100644
--- a/go/stats/prometheusbackend/prometheusbackend.go
+++ b/go/stats/prometheusbackend/prometheusbackend.go
@@ -85,6 +85,8 @@ func (be PromBackend) publishPrometheusMetric(name string, v expvar.Var) {
newMultiTimingsCollector(st, be.buildPromName(name))
case *stats.Histogram:
newHistogramCollector(st, be.buildPromName(name))
+ case *stats.StringMapFuncWithMultiLabels:
+ newStringMapFuncWithMultiLabelsCollector(st, be.buildPromName(name))
case *stats.String, stats.StringFunc, stats.StringMapFunc, *stats.Rates, *stats.RatesFunc:
// Silently ignore these types since they don't make sense to
// export to Prometheus' data model.
diff --git a/go/stats/prometheusbackend/prometheusbackend_test.go b/go/stats/prometheusbackend/prometheusbackend_test.go
index 888dd630941..594265153f7 100644
--- a/go/stats/prometheusbackend/prometheusbackend_test.go
+++ b/go/stats/prometheusbackend/prometheusbackend_test.go
@@ -240,10 +240,33 @@ func TestPrometheusCountersFuncWithMultiLabels(t *testing.T) {
checkHandlerForMetricWithMultiLabels(t, name, labels, []string{"bar", "baz"}, 1)
}
+func TestPrometheusStringMapFuncWithMultiLabels(t *testing.T) {
+ name := "blah_stringmapfuncwithmultilabels"
+ keyLabels := []string{"label1", "label2"}
+ valueLabel := "label3"
+
+ stats.NewStringMapFuncWithMultiLabels(name, "help", keyLabels, valueLabel, func() map[string]string {
+ m := make(map[string]string)
+ m["foo.bar"] = "hello"
+ m["bar.baz"] = "world"
+ return m
+ })
+
+ allLabels := append(keyLabels, valueLabel)
+
+ checkHandlerForMetricWithMultiLabels(t, name, allLabels, []string{"foo", "bar", "hello"}, 1)
+ checkHandlerForMetricWithMultiLabels(t, name, allLabels, []string{"bar", "baz", "world"}, 1)
+}
+
func checkHandlerForMetricWithMultiLabels(t *testing.T, metric string, labels []string, labelValues []string, value int64) {
response := testMetricsHandler(t)
- expected := fmt.Sprintf("%s_%s{%s=\"%s\",%s=\"%s\"} %d", namespace, metric, labels[0], labelValues[0], labels[1], labelValues[1], value)
+ kvPairs := make([]string, 0)
+ for i := 0; i < len(labels); i++ {
+ kvPairs = append(kvPairs, fmt.Sprintf("%s=\"%s\"", labels[i], labelValues[i]))
+ }
+
+ expected := fmt.Sprintf("%s_%s{%s} %d", namespace, metric, strings.Join(kvPairs, ","), value)
if !strings.Contains(response.Body.String(), expected) {
t.Fatalf("Expected %s got %s", expected, response.Body.String())
diff --git a/go/vt/vttablet/tabletmanager/vreplication/stats.go b/go/vt/vttablet/tabletmanager/vreplication/stats.go
index 5b3f55a60f5..58c958ce356 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/stats.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/stats.go
@@ -62,18 +62,24 @@ type vrStats struct {
func (st *vrStats) register() {
stats.NewGaugeFunc("VReplicationStreamCount", "Number of vreplication streams", st.numControllers)
stats.NewGaugeFunc("VReplicationLagSecondsMax", "Max vreplication seconds behind primary", st.maxReplicationLagSeconds)
- stats.Publish("VReplicationStreamState", stats.StringMapFunc(func() map[string]string {
- st.mu.Lock()
- defer st.mu.Unlock()
- result := make(map[string]string, len(st.controllers))
- for _, ct := range st.controllers {
- state := ct.blpStats.State.Load()
- if state != nil {
- result[ct.workflow+"."+fmt.Sprintf("%v", ct.id)] = state.(string)
+ stats.NewStringMapFuncWithMultiLabels(
+ "VReplicationStreamState",
+ "State of vreplication workflow",
+ []string{"workflow", "counts"},
+ "state",
+ func() map[string]string {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ result := make(map[string]string, len(st.controllers))
+ for _, ct := range st.controllers {
+ state := ct.blpStats.State.Load()
+ if state != nil {
+ result[ct.workflow+"."+fmt.Sprintf("%v", ct.id)] = state.(string)
+ }
}
- }
- return result
- }))
+ return result
+ },
+ )
stats.NewGaugesFuncWithMultiLabels(
"VReplicationLagSeconds",
"vreplication seconds behind primary per stream",