From f741175dd02c104cfd75132916e2ec74f9801b78 Mon Sep 17 00:00:00 2001 From: Shaun Verch Date: Thu, 2 Jan 2020 21:07:29 -0500 Subject: [PATCH 1/2] Add option to disable per table metrics collection If the user is creating a large number of tables, this could cause a spike in memory. This change allows the user to turn off any metrics that scale with the number of tables. Signed-off-by: Shaun Verch --- go/vt/vttablet/tabletserver/schema/engine.go | 30 ++++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/go/vt/vttablet/tabletserver/schema/engine.go b/go/vt/vttablet/tabletserver/schema/engine.go index a10e36473cd..077d055efac 100644 --- a/go/vt/vttablet/tabletserver/schema/engine.go +++ b/go/vt/vttablet/tabletserver/schema/engine.go @@ -19,6 +19,7 @@ package schema import ( "bytes" "encoding/json" + "flag" "net/http" "sync" "time" @@ -40,6 +41,16 @@ import ( vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" ) +var exposeStatsPerTable = flag.Bool("expose_stats_per_table", true, + "Whether to expose statistics split by table.") + +// This is used in some queries for table information, but is not directly +// related to the number of tables that we report metrics for. It might affect +// that though, because in practice we might not hit any codepaths that report +// stats for that table because we don't even load it. +// +// I don't know whether we silently don't see tables if we have more than this +// number. const maxTableCount = 10000 type notifier func(full map[string]*Table, created, altered, dropped []string) @@ -76,11 +87,18 @@ func NewEngine(checker connpool.MySQLChecker, config tabletenv.TabletConfig) *En } schemaOnce.Do(func() { _ = stats.NewGaugeDurationFunc("SchemaReloadTime", "vttablet keeps table schemas in its own memory and periodically refreshes it from MySQL. This config controls the reload time.", se.ticks.Interval) - _ = stats.NewGaugesFuncWithMultiLabels("TableRows", "table rows created in tabletserver", []string{"Table"}, se.getTableRows) - _ = stats.NewGaugesFuncWithMultiLabels("DataLength", "data length in tabletserver", []string{"Table"}, se.getDataLength) - _ = stats.NewGaugesFuncWithMultiLabels("IndexLength", "index length in tabletserver", []string{"Table"}, se.getIndexLength) - _ = stats.NewGaugesFuncWithMultiLabels("DataFree", "data free in tabletserver", []string{"Table"}, se.getDataFree) - _ = stats.NewGaugesFuncWithMultiLabels("MaxDataLength", "max data length in tabletserver", []string{"Table"}, se.getMaxDataLength) + + var tableLabels []string + if *exposeStatsPerTable { + tableLabels = []string{"Table"} + } else { + tableLabels = []string{} + } + _ = stats.NewGaugesFuncWithMultiLabels("TableRows", "table rows created in tabletserver", tableLabels, se.getTableRows) + _ = stats.NewGaugesFuncWithMultiLabels("DataLength", "data length in tabletserver", tableLabels, se.getDataLength) + _ = stats.NewGaugesFuncWithMultiLabels("IndexLength", "index length in tabletserver", tableLabels, se.getIndexLength) + _ = stats.NewGaugesFuncWithMultiLabels("DataFree", "data free in tabletserver", tableLabels, se.getDataFree) + _ = stats.NewGaugesFuncWithMultiLabels("MaxDataLength", "max data length in tabletserver", tableLabels, se.getMaxDataLength) http.Handle("/debug/schema", se) http.HandleFunc("/schemaz", func(w http.ResponseWriter, r *http.Request) { @@ -193,7 +211,7 @@ func (se *Engine) Open() error { return nil } -// IsOpen() checks if engine is open +// IsOpen checks if engine is open func (se *Engine) IsOpen() bool { se.mu.Lock() defer se.mu.Unlock() From 252b7cd088248c6d3802ad1177816abe6d1df70c Mon Sep 17 00:00:00 2001 From: Shaun Verch Date: Mon, 20 Jan 2020 17:43:34 -0500 Subject: [PATCH 2/2] Added test for metrics endpoint Currently, this is returning something unexpected in my local test. It should return prometheus metrics but instead it returns a JSON object. However this is the outline for how I would test this. Signed-off-by: Shaun Verch --- .../tabletserver/schema/engine_test.go | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/go/vt/vttablet/tabletserver/schema/engine_test.go b/go/vt/vttablet/tabletserver/schema/engine_test.go index a25a4dac0a7..88e7c2967b5 100644 --- a/go/vt/vttablet/tabletserver/schema/engine_test.go +++ b/go/vt/vttablet/tabletserver/schema/engine_test.go @@ -397,6 +397,33 @@ func TestStatsURL(t *testing.T) { request, _ := http.NewRequest("GET", "/debug/schema", nil) response := httptest.NewRecorder() se.ServeHTTP(response, request) + + // Check the status code is what we expect. + if status := response.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusOK) + } +} + +func TestPrometheusStatsURL(t *testing.T) { + db := fakesqldb.New(t) + defer db.Close() + for query, result := range schematest.Queries() { + db.AddQuery(query, result) + } + se := newEngine(10, 1*time.Second, 1*time.Second, true, db) + se.Open() + defer se.Close() + + request, _ := http.NewRequest("GET", "/metrics", nil) + response := httptest.NewRecorder() + se.ServeHTTP(response, request) + + // Check the status code is what we expect. + if status := response.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusOK) + } } type dummyChecker struct {