From b3c3528b1de1dc26b078e7cd35a95c59463e0612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 09:08:52 +0200 Subject: [PATCH 1/9] More GRPC stats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/cortex/modules.go | 4 + pkg/util/grpc/stats/grpc_stats.go | 117 ++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 pkg/util/grpc/stats/grpc_stats.go diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index a10486102e4..3f1cd98c69f 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -13,6 +13,7 @@ import ( httpgrpc_server "github.com/weaveworks/common/httpgrpc/server" "github.com/weaveworks/common/instrument" "github.com/weaveworks/common/server" + "google.golang.org/grpc" "github.com/cortexproject/cortex/pkg/alertmanager" "github.com/cortexproject/cortex/pkg/api" @@ -34,6 +35,7 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/storegateway" "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/grpc/stats" "github.com/cortexproject/cortex/pkg/util/modules" "github.com/cortexproject/cortex/pkg/util/runtimeconfig" "github.com/cortexproject/cortex/pkg/util/services" @@ -86,6 +88,8 @@ func (t *Cortex) initAPI() (services.Service, error) { func (t *Cortex) initServer() (services.Service, error) { // Cortex handles signals on its own. DisableSignalHandling(&t.Cfg.Server) + + t.Cfg.Server.GRPCOptions = append(t.Cfg.Server.GRPCOptions, grpc.StatsHandler(stats.NewStatsHandler(prometheus.DefaultRegisterer))) serv, err := server.New(t.Cfg.Server) if err != nil { return nil, err diff --git a/pkg/util/grpc/stats/grpc_stats.go b/pkg/util/grpc/stats/grpc_stats.go new file mode 100644 index 00000000000..3c60db52305 --- /dev/null +++ b/pkg/util/grpc/stats/grpc_stats.go @@ -0,0 +1,117 @@ +package stats + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/stats" +) + +func NewStatsHandler(r prometheus.Registerer) stats.Handler { + // We don’t particularly care about small requests / responses, + // we want to know more about the big ones. + // Histogram goes linearly from 30MB to 210MB in 8 buckets. + messageSizeBuckets := prometheus.LinearBuckets(30_000_000, 30_000_000, 8) + + return &grpcStatsHandler{ + connectedClients: promauto.With(r).NewGauge(prometheus.GaugeOpts{ + Name: "cortex_grpc_connected_clients", + Help: "Number of clients connected to gRPC server", + }), + + inflightRpc: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_grpc_inflight_requests", + Help: "Number of inflight RPC calls", + }, []string{"method"}), + + receivedMessageSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_grpc_request_size_bytes", + Help: "Size of gRPC requests.", + Buckets: messageSizeBuckets, + }, []string{"method"}), + + sentMessageSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_grpc_response_size_bytes", + Help: "Size of gRPC responses.", + Buckets: messageSizeBuckets, + }, []string{"method"}), + } +} + +type grpcStatsHandler struct { + connectedClients prometheus.Gauge + inflightRpc *prometheus.GaugeVec + receivedMessageSize *prometheus.HistogramVec + sentMessageSize *prometheus.HistogramVec +} + +// Custom type to hide it from other packages. +type contextKey int + +const ( + contextKeyMethodName contextKey = 1 +) + +func (g *grpcStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) context.Context { + return context.WithValue(ctx, contextKeyMethodName, info.FullMethodName) +} + +func (g *grpcStatsHandler) HandleRPC(ctx context.Context, rpcStats stats.RPCStats) { + // We use full method name from context, because not all RPCStats structs have it. + fullMethodName, ok := ctx.Value(contextKeyMethodName).(string) + if !ok { + return + } + + switch s := rpcStats.(type) { + case *stats.Begin: + g.inflightRpc.WithLabelValues(fullMethodName).Inc() + case *stats.End: + g.inflightRpc.WithLabelValues(fullMethodName).Dec() + + case *stats.InHeader: + g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + case *stats.InPayload: + g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + case *stats.InTrailer: + g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + + case *stats.OutHeader: + // OutHeader doesn't have WireLength. + g.sentMessageSize.WithLabelValues(fullMethodName).Observe(estimateSize(s.Header)) + case *stats.OutPayload: + g.sentMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + case *stats.OutTrailer: + // OutTrailer doesn't have valid WireLength (there is deperecated field, always set to 0). + g.sentMessageSize.WithLabelValues(fullMethodName).Observe(estimateSize(s.Trailer)) + } +} + +// This returns estimate for message size for encoding metadata. +// Doesn't take any possible compression into account. +func estimateSize(md metadata.MD) float64 { + result := 0 + for k, vs := range md { + result += len(k) + for _, v := range vs { + result += len(v) + } + } + return float64(result) +} + +func (g *grpcStatsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context { + return ctx +} + +func (g *grpcStatsHandler) HandleConn(_ context.Context, connStats stats.ConnStats) { + switch connStats.(type) { + case *stats.ConnBegin: + g.connectedClients.Inc() + + case *stats.ConnEnd: + g.connectedClients.Inc() + } +} From 5990dceafaf2a5a4344bf6dac5c801fd9a956bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 10:27:25 +0200 Subject: [PATCH 2/9] Added test for grpc stats. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/util/grpc/stats/grpc_stats.go | 43 ++++----- pkg/util/grpc/stats/grpc_stats_test.go | 119 +++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 24 deletions(-) create mode 100644 pkg/util/grpc/stats/grpc_stats_test.go diff --git a/pkg/util/grpc/stats/grpc_stats.go b/pkg/util/grpc/stats/grpc_stats.go index 3c60db52305..51314b0a627 100644 --- a/pkg/util/grpc/stats/grpc_stats.go +++ b/pkg/util/grpc/stats/grpc_stats.go @@ -2,18 +2,16 @@ package stats import ( "context" + "fmt" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "google.golang.org/grpc/metadata" "google.golang.org/grpc/stats" ) func NewStatsHandler(r prometheus.Registerer) stats.Handler { - // We don’t particularly care about small requests / responses, - // we want to know more about the big ones. - // Histogram goes linearly from 30MB to 210MB in 8 buckets. - messageSizeBuckets := prometheus.LinearBuckets(30_000_000, 30_000_000, 8) + const MiB = 1024 * 1024 + messageSizeBuckets := []float64{1 * MiB, 2.5 * MiB, 5 * MiB, 10 * MiB, 25 * MiB, 50 * MiB, 100 * MiB, 250 * MiB} return &grpcStatsHandler{ connectedClients: promauto.With(r).NewGauge(prometheus.GaugeOpts{ @@ -26,6 +24,11 @@ func NewStatsHandler(r prometheus.Registerer) stats.Handler { Help: "Number of inflight RPC calls", }, []string{"method"}), + methodErrors: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_grpc_method_errors_total", + Help: "Number of clients connected to gRPC server", + }, []string{"method"}), + receivedMessageSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ Name: "cortex_grpc_request_size_bytes", Help: "Size of gRPC requests.", @@ -45,6 +48,7 @@ type grpcStatsHandler struct { inflightRpc *prometheus.GaugeVec receivedMessageSize *prometheus.HistogramVec sentMessageSize *prometheus.HistogramVec + methodErrors *prometheus.CounterVec } // Custom type to hide it from other packages. @@ -70,36 +74,27 @@ func (g *grpcStatsHandler) HandleRPC(ctx context.Context, rpcStats stats.RPCStat g.inflightRpc.WithLabelValues(fullMethodName).Inc() case *stats.End: g.inflightRpc.WithLabelValues(fullMethodName).Dec() + if s.Error != nil { + g.methodErrors.WithLabelValues(fullMethodName).Inc() + } case *stats.InHeader: - g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + // Ignored. Cortex doesn't use headers. Furthermore WireLength seems to be incorrect for large headers -- it uses + // length of last frame (16K) even for headers in megabytes. case *stats.InPayload: g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) case *stats.InTrailer: - g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + // Ignored. Cortex doesn't use trailers. case *stats.OutHeader: - // OutHeader doesn't have WireLength. - g.sentMessageSize.WithLabelValues(fullMethodName).Observe(estimateSize(s.Header)) + // Ignored. Cortex doesn't send headers, and since OutHeader doesn't have WireLength, we could only estimate it. case *stats.OutPayload: g.sentMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) case *stats.OutTrailer: - // OutTrailer doesn't have valid WireLength (there is deperecated field, always set to 0). - g.sentMessageSize.WithLabelValues(fullMethodName).Observe(estimateSize(s.Trailer)) - } -} - -// This returns estimate for message size for encoding metadata. -// Doesn't take any possible compression into account. -func estimateSize(md metadata.MD) float64 { - result := 0 - for k, vs := range md { - result += len(k) - for _, v := range vs { - result += len(v) - } + // Ignored, Cortex doesn't use trailers. OutTrailer doesn't have valid WireLength (there is deperecated field, always set to 0). + default: + panic(fmt.Sprintf("Unknown type: %T", rpcStats)) } - return float64(result) } func (g *grpcStatsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context { diff --git a/pkg/util/grpc/stats/grpc_stats_test.go b/pkg/util/grpc/stats/grpc_stats_test.go new file mode 100644 index 00000000000..09e1d921cca --- /dev/null +++ b/pkg/util/grpc/stats/grpc_stats_test.go @@ -0,0 +1,119 @@ +package stats + +import ( + "bytes" + "context" + "crypto/rand" + "net" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/health" + "google.golang.org/grpc/health/grpc_health_v1" +) + +func TestGrpcStats(t *testing.T) { + reg := prometheus.NewRegistry() + stats := NewStatsHandler(reg) + + // Create a GRPC server used to query back the data. + serv := grpc.NewServer(grpc.StatsHandler(stats), grpc.MaxRecvMsgSize(10e6)) + defer serv.GracefulStop() + + listener, err := net.Listen("tcp", "localhost:0") + require.NoError(t, err) + + go func() { + require.NoError(t, serv.Serve(listener)) + }() + + hs := health.NewServer() + grpc_health_v1.RegisterHealthServer(serv, hs) + + closed := false + conn, err := grpc.Dial(listener.Addr().String(), grpc.WithInsecure()) + require.NoError(t, err) + defer func() { + if !closed { + require.NoError(t, conn.Close()) + } + }() + + hc := grpc_health_v1.NewHealthClient(conn) + + // First request (empty). + resp, err := hc.Check(context.Background(), &grpc_health_v1.HealthCheckRequest{}) + require.NoError(t, err) + require.Equal(t, grpc_health_v1.HealthCheckResponse_SERVING, resp.Status) + + // Second request, with large service name. This returns error, which doesn't count as "payload". + _, err = hc.Check(context.Background(), &grpc_health_v1.HealthCheckRequest{ + Service: generateString(t, 8*1024*1024), + }) + require.EqualError(t, err, "rpc error: code = NotFound desc = unknown service") + + err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server + # TYPE cortex_grpc_connected_clients gauge + cortex_grpc_connected_clients 1 + + # HELP cortex_grpc_method_errors_total Number of clients connected to gRPC server + # TYPE cortex_grpc_method_errors_total counter + cortex_grpc_method_errors_total{method="/grpc.health.v1.Health/Check"} 1 + + # HELP cortex_grpc_request_size_bytes Size of gRPC requests. + # TYPE cortex_grpc_request_size_bytes histogram + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 2 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 2 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 2 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 2 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 2 + cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 2 + cortex_grpc_request_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 8.388613e+06 + cortex_grpc_request_size_bytes_count{method="/grpc.health.v1.Health/Check"} 2 + + # HELP cortex_grpc_response_size_bytes Size of gRPC responses. + # TYPE cortex_grpc_response_size_bytes histogram + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 1 + cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 1 + cortex_grpc_response_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 7 + cortex_grpc_response_size_bytes_count{method="/grpc.health.v1.Health/Check"} 1 + `), "cortex_grpc_connected_clients", "cortex_grpc_request_size_bytes", "cortex_grpc_response_size_bytes", "cortex_grpc_method_errors_total") + require.NoError(t, err) + + closed = true + require.NoError(t, conn.Close()) + err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server + # TYPE cortex_grpc_connected_clients gauge + cortex_grpc_connected_clients 0 + `), "cortex_grpc_connected_clients") +} + +func generateString(t *testing.T, size int) string { + // Use random bytes, to avoid compression. + buf := make([]byte, size) + _, err := rand.Read(buf) + require.NoError(t, err) + for ix, b := range buf { + if b < ' ' { + b += ' ' + } + b = b & 0x7f + buf[ix] = b + } + return string(buf) +} From e0b6321987e326fff084e7a7e51c5b269bfb72ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 11:11:31 +0200 Subject: [PATCH 3/9] Added test for streaming payload. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/util/grpc/stats/grpc_stats_test.go | 144 +++++++++++++++++++++++-- 1 file changed, 138 insertions(+), 6 deletions(-) diff --git a/pkg/util/grpc/stats/grpc_stats_test.go b/pkg/util/grpc/stats/grpc_stats_test.go index 09e1d921cca..202e1f7a537 100644 --- a/pkg/util/grpc/stats/grpc_stats_test.go +++ b/pkg/util/grpc/stats/grpc_stats_test.go @@ -4,22 +4,26 @@ import ( "bytes" "context" "crypto/rand" + "fmt" "net" "testing" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" + "github.com/weaveworks/common/httpgrpc" "google.golang.org/grpc" "google.golang.org/grpc/health" "google.golang.org/grpc/health/grpc_health_v1" + + "github.com/cortexproject/cortex/pkg/querier/frontend" ) func TestGrpcStats(t *testing.T) { reg := prometheus.NewRegistry() stats := NewStatsHandler(reg) - // Create a GRPC server used to query back the data. serv := grpc.NewServer(grpc.StatsHandler(stats), grpc.MaxRecvMsgSize(10e6)) defer serv.GracefulStop() @@ -30,8 +34,7 @@ func TestGrpcStats(t *testing.T) { require.NoError(t, serv.Serve(listener)) }() - hs := health.NewServer() - grpc_health_v1.RegisterHealthServer(serv, hs) + grpc_health_v1.RegisterHealthServer(serv, health.NewServer()) closed := false conn, err := grpc.Dial(listener.Addr().String(), grpc.WithInsecure()) @@ -51,7 +54,7 @@ func TestGrpcStats(t *testing.T) { // Second request, with large service name. This returns error, which doesn't count as "payload". _, err = hc.Check(context.Background(), &grpc_health_v1.HealthCheckRequest{ - Service: generateString(t, 8*1024*1024), + Service: generateString(8 * 1024 * 1024), }) require.EqualError(t, err, "rpc error: code = NotFound desc = unknown service") @@ -103,11 +106,140 @@ func TestGrpcStats(t *testing.T) { `), "cortex_grpc_connected_clients") } -func generateString(t *testing.T, size int) string { +func TestGrpcStatsStreaming(t *testing.T) { + reg := prometheus.NewRegistry() + stats := NewStatsHandler(reg) + + serv := grpc.NewServer(grpc.StatsHandler(stats), grpc.MaxSendMsgSize(10e6), grpc.MaxRecvMsgSize(10e6)) + defer serv.GracefulStop() + + listener, err := net.Listen("tcp", "localhost:0") + require.NoError(t, err) + + go func() { + require.NoError(t, serv.Serve(listener)) + }() + + frontend.RegisterFrontendServer(serv, &frontendServer{t: t}) + + conn, err := grpc.Dial(listener.Addr().String(), grpc.WithInsecure(), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(10e6), grpc.MaxCallSendMsgSize(10e6))) + require.NoError(t, err) + defer func() { + require.NoError(t, conn.Close()) + }() + + fc := frontend.NewFrontendClient(conn) + + s, err := fc.Process(context.Background()) + require.NoError(t, err) + + for ix := 0; ix < 5; ix++ { + req, err := s.Recv() + require.NoError(t, err) + + msg := &frontend.ProcessResponse{HttpResponse: &httpgrpc.HTTPResponse{ + Code: 200, + Headers: req.HttpRequest.Headers, + }} + fmt.Println("Client sending:", msg.Size()) + err = s.Send(msg) + require.NoError(t, err) + + err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_inflight_requests Number of inflight RPC calls + # TYPE cortex_grpc_inflight_requests gauge + cortex_grpc_inflight_requests{method="/frontend.Frontend/Process"} 1 + `), "cortex_grpc_inflight_requests") + require.NoError(t, err) + } + require.NoError(t, s.CloseSend()) + + // Wait a second to make sure server notices close. + time.Sleep(1 * time.Second) + err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_inflight_requests Number of inflight RPC calls + # TYPE cortex_grpc_inflight_requests gauge + cortex_grpc_inflight_requests{method="/frontend.Frontend/Process"} 0 + `), "cortex_grpc_inflight_requests") + require.NoError(t, err) + + err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_request_size_bytes Size of gRPC requests. + # TYPE cortex_grpc_request_size_bytes histogram + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 1 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 4 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 5 + cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 5 + cortex_grpc_request_size_bytes_sum{method="/frontend.Frontend/Process"} 8.017448e+06 + cortex_grpc_request_size_bytes_count{method="/frontend.Frontend/Process"} 5 + + # HELP cortex_grpc_response_size_bytes Size of gRPC responses. + # TYPE cortex_grpc_response_size_bytes histogram + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 0 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 2 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 4 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 6 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 6 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 6 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 6 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 6 + cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 6 + cortex_grpc_response_size_bytes_sum{method="/frontend.Frontend/Process"} 2.2234511e+07 + cortex_grpc_response_size_bytes_count{method="/frontend.Frontend/Process"} 6 + `), "cortex_grpc_request_size_bytes", "cortex_grpc_response_size_bytes") + + require.NoError(t, err) +} + +type frontendServer struct { + t *testing.T +} + +func (f frontendServer) Process(server frontend.Frontend_ProcessServer) error { + ix := 0 + for { + ix++ + + msg := &frontend.ProcessRequest{HttpRequest: &httpgrpc.HTTPRequest{ + Method: fmt.Sprintf("%d", ix), + Url: generateString(ix * 512 * 1024), + Headers: []*httpgrpc.Header{ + { + Key: generateString(100 * ix), + Values: []string{generateString(100 * ix), generateString(10000 * ix), generateString(ix * 512 * 1024)}, + }, + }, + }} + + fmt.Println("Server sending:", msg.Size()) + err := server.Send(msg) + + if err != nil { + return err + } + + _, err = server.Recv() + if err != nil { + return err + } + } +} + +func generateString(size int) string { // Use random bytes, to avoid compression. buf := make([]byte, size) _, err := rand.Read(buf) - require.NoError(t, err) + if err != nil { + // Should not happen. + panic(err) + } + + // To avoid invalid UTF-8 sequences (which protobuf complains about), we cleanup the data a bit. for ix, b := range buf { if b < ' ' { b += ' ' From 72391657ff701ce8a00152bb168c3ada178b19b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 11:47:21 +0200 Subject: [PATCH 4/9] Removed default panic case. Fixed test for connected clients. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/util/grpc/stats/grpc_stats.go | 13 +++++-------- pkg/util/grpc/stats/grpc_stats_test.go | 4 ++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/util/grpc/stats/grpc_stats.go b/pkg/util/grpc/stats/grpc_stats.go index 51314b0a627..72814443908 100644 --- a/pkg/util/grpc/stats/grpc_stats.go +++ b/pkg/util/grpc/stats/grpc_stats.go @@ -2,7 +2,6 @@ package stats import ( "context" - "fmt" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -19,7 +18,7 @@ func NewStatsHandler(r prometheus.Registerer) stats.Handler { Help: "Number of clients connected to gRPC server", }), - inflightRpc: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ + inflightRPC: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_grpc_inflight_requests", Help: "Number of inflight RPC calls", }, []string{"method"}), @@ -45,7 +44,7 @@ func NewStatsHandler(r prometheus.Registerer) stats.Handler { type grpcStatsHandler struct { connectedClients prometheus.Gauge - inflightRpc *prometheus.GaugeVec + inflightRPC *prometheus.GaugeVec receivedMessageSize *prometheus.HistogramVec sentMessageSize *prometheus.HistogramVec methodErrors *prometheus.CounterVec @@ -71,9 +70,9 @@ func (g *grpcStatsHandler) HandleRPC(ctx context.Context, rpcStats stats.RPCStat switch s := rpcStats.(type) { case *stats.Begin: - g.inflightRpc.WithLabelValues(fullMethodName).Inc() + g.inflightRPC.WithLabelValues(fullMethodName).Inc() case *stats.End: - g.inflightRpc.WithLabelValues(fullMethodName).Dec() + g.inflightRPC.WithLabelValues(fullMethodName).Dec() if s.Error != nil { g.methodErrors.WithLabelValues(fullMethodName).Inc() } @@ -92,8 +91,6 @@ func (g *grpcStatsHandler) HandleRPC(ctx context.Context, rpcStats stats.RPCStat g.sentMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) case *stats.OutTrailer: // Ignored, Cortex doesn't use trailers. OutTrailer doesn't have valid WireLength (there is deperecated field, always set to 0). - default: - panic(fmt.Sprintf("Unknown type: %T", rpcStats)) } } @@ -107,6 +104,6 @@ func (g *grpcStatsHandler) HandleConn(_ context.Context, connStats stats.ConnSta g.connectedClients.Inc() case *stats.ConnEnd: - g.connectedClients.Inc() + g.connectedClients.Dec() } } diff --git a/pkg/util/grpc/stats/grpc_stats_test.go b/pkg/util/grpc/stats/grpc_stats_test.go index 202e1f7a537..187b365cd7e 100644 --- a/pkg/util/grpc/stats/grpc_stats_test.go +++ b/pkg/util/grpc/stats/grpc_stats_test.go @@ -99,11 +99,15 @@ func TestGrpcStats(t *testing.T) { closed = true require.NoError(t, conn.Close()) + + // Give server little time to update connected clients metric. + time.Sleep(1 * time.Second) err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server # TYPE cortex_grpc_connected_clients gauge cortex_grpc_connected_clients 0 `), "cortex_grpc_connected_clients") + require.NoError(t, err) } func TestGrpcStatsStreaming(t *testing.T) { From de18b6d6dfa5f14879351d941ca8c102cc2ac2bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 11:48:56 +0200 Subject: [PATCH 5/9] Don't use fmt.Println in tests. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/util/grpc/stats/grpc_stats_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/util/grpc/stats/grpc_stats_test.go b/pkg/util/grpc/stats/grpc_stats_test.go index 187b365cd7e..5abdcd52df9 100644 --- a/pkg/util/grpc/stats/grpc_stats_test.go +++ b/pkg/util/grpc/stats/grpc_stats_test.go @@ -124,7 +124,7 @@ func TestGrpcStatsStreaming(t *testing.T) { require.NoError(t, serv.Serve(listener)) }() - frontend.RegisterFrontendServer(serv, &frontendServer{t: t}) + frontend.RegisterFrontendServer(serv, &frontendServer{log: t.Log}) conn, err := grpc.Dial(listener.Addr().String(), grpc.WithInsecure(), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(10e6), grpc.MaxCallSendMsgSize(10e6))) require.NoError(t, err) @@ -145,7 +145,7 @@ func TestGrpcStatsStreaming(t *testing.T) { Code: 200, Headers: req.HttpRequest.Headers, }} - fmt.Println("Client sending:", msg.Size()) + t.Log("Client sending:", msg.Size()) err = s.Send(msg) require.NoError(t, err) @@ -201,7 +201,7 @@ func TestGrpcStatsStreaming(t *testing.T) { } type frontendServer struct { - t *testing.T + log func(args ...interface{}) } func (f frontendServer) Process(server frontend.Frontend_ProcessServer) error { @@ -220,7 +220,7 @@ func (f frontendServer) Process(server frontend.Frontend_ProcessServer) error { }, }} - fmt.Println("Server sending:", msg.Size()) + f.log("Server sending:", msg.Size()) err := server.Send(msg) if err != nil { From 8fb06b54016f35825fd907db8a98634e8265cbd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 11:51:37 +0200 Subject: [PATCH 6/9] CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 440f2a6cb69..494484dc9fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,6 +105,7 @@ * [ENHANCEMENT] Logger: added JSON logging support, configured via the `-log.format=json` CLI flag or its respective YAML config option. #2386 * [ENHANCEMENT] Added new flags `-bigtable.grpc-compression`, `-ingester.client.grpc-compression`, `-querier.frontend-client.grpc-compression` to configure compression used by gRPC. Valid values are `gzip`, `snappy`, or empty string (no compression, default). #2940 * [ENHANCEMENT] Clarify limitations of the `/api/v1/series`, `/api/v1/labels` and `/api/v1/label/{name}/values` endpoints. #2953 +* [ENHANCEMENT] Added metrics for tracking some gRPC stats (most importantly message size histograms). #3036 * [BUGFIX] Fixed a bug with `api/v1/query_range` where no responses would return null values for `result` and empty values for `resultType`. #2962 * [BUGFIX] Fixed a bug in the index intersect code causing storage to return more chunks/series than required. #2796 * [BUGFIX] Fixed the number of reported keys in the background cache queue. #2764 From 82267482b4f73cf3e96470d831886a2a5215ea0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 11:57:47 +0200 Subject: [PATCH 7/9] Move entry to correct version. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 494484dc9fe..a69cce62a2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * [CHANGE] Experimental Delete Series: `/api/v1/admin/tsdb/delete_series` and `/api/v1/admin/tsdb/cancel_delete_request` purger APIs to return status code `204` instead of `200` for success. #2946 * [ENHANCEMENT] Add support for azure storage in China, German and US Government environments. #2988 * [ENHANCEMENT] Query-tee: added a small tolerance to floating point sample values comparison. #2994 +* [ENHANCEMENT] Added metrics for tracking some gRPC stats (most importantly message size histograms). #3036 * [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990 ## 1.3.0 in progress @@ -105,7 +106,6 @@ * [ENHANCEMENT] Logger: added JSON logging support, configured via the `-log.format=json` CLI flag or its respective YAML config option. #2386 * [ENHANCEMENT] Added new flags `-bigtable.grpc-compression`, `-ingester.client.grpc-compression`, `-querier.frontend-client.grpc-compression` to configure compression used by gRPC. Valid values are `gzip`, `snappy`, or empty string (no compression, default). #2940 * [ENHANCEMENT] Clarify limitations of the `/api/v1/series`, `/api/v1/labels` and `/api/v1/label/{name}/values` endpoints. #2953 -* [ENHANCEMENT] Added metrics for tracking some gRPC stats (most importantly message size histograms). #3036 * [BUGFIX] Fixed a bug with `api/v1/query_range` where no responses would return null values for `result` and empty values for `resultType`. #2962 * [BUGFIX] Fixed a bug in the index intersect code causing storage to return more chunks/series than required. #2796 * [BUGFIX] Fixed the number of reported keys in the background cache queue. #2764 From 7262e56d22d02370ffd9fb1010644a9366744026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 13:31:43 +0200 Subject: [PATCH 8/9] Address review feedback. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- pkg/util/grpc/stats/grpc_stats.go | 26 ++--- pkg/util/grpc/stats/grpc_stats_test.go | 154 ++++++++++++------------- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/pkg/util/grpc/stats/grpc_stats.go b/pkg/util/grpc/stats/grpc_stats.go index 72814443908..b9e33253510 100644 --- a/pkg/util/grpc/stats/grpc_stats.go +++ b/pkg/util/grpc/stats/grpc_stats.go @@ -15,28 +15,28 @@ func NewStatsHandler(r prometheus.Registerer) stats.Handler { return &grpcStatsHandler{ connectedClients: promauto.With(r).NewGauge(prometheus.GaugeOpts{ Name: "cortex_grpc_connected_clients", - Help: "Number of clients connected to gRPC server", + Help: "Number of clients connected to gRPC server.", }), inflightRPC: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ Name: "cortex_grpc_inflight_requests", - Help: "Number of inflight RPC calls", + Help: "Number of inflight gRPC calls.", }, []string{"method"}), methodErrors: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_grpc_method_errors_total", - Help: "Number of clients connected to gRPC server", + Help: "Number of errors returned by method.", }, []string{"method"}), - receivedMessageSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_grpc_request_size_bytes", - Help: "Size of gRPC requests.", + receivedPayloadSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_grpc_received_payload_size_bytes", + Help: "Size of received gRPC messages as seen on the wire (eg. compressed, signed, encrypted).", Buckets: messageSizeBuckets, }, []string{"method"}), - sentMessageSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ - Name: "cortex_grpc_response_size_bytes", - Help: "Size of gRPC responses.", + sentPayloadSize: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_grpc_sent_payload_size_bytes", + Help: "Size of sent gRPC messages as seen on the wire (eg. compressed, signed, encrypted).", Buckets: messageSizeBuckets, }, []string{"method"}), } @@ -45,8 +45,8 @@ func NewStatsHandler(r prometheus.Registerer) stats.Handler { type grpcStatsHandler struct { connectedClients prometheus.Gauge inflightRPC *prometheus.GaugeVec - receivedMessageSize *prometheus.HistogramVec - sentMessageSize *prometheus.HistogramVec + receivedPayloadSize *prometheus.HistogramVec + sentPayloadSize *prometheus.HistogramVec methodErrors *prometheus.CounterVec } @@ -81,14 +81,14 @@ func (g *grpcStatsHandler) HandleRPC(ctx context.Context, rpcStats stats.RPCStat // Ignored. Cortex doesn't use headers. Furthermore WireLength seems to be incorrect for large headers -- it uses // length of last frame (16K) even for headers in megabytes. case *stats.InPayload: - g.receivedMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + g.receivedPayloadSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) case *stats.InTrailer: // Ignored. Cortex doesn't use trailers. case *stats.OutHeader: // Ignored. Cortex doesn't send headers, and since OutHeader doesn't have WireLength, we could only estimate it. case *stats.OutPayload: - g.sentMessageSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) + g.sentPayloadSize.WithLabelValues(fullMethodName).Observe(float64(s.WireLength)) case *stats.OutTrailer: // Ignored, Cortex doesn't use trailers. OutTrailer doesn't have valid WireLength (there is deperecated field, always set to 0). } diff --git a/pkg/util/grpc/stats/grpc_stats_test.go b/pkg/util/grpc/stats/grpc_stats_test.go index 5abdcd52df9..d6087492432 100644 --- a/pkg/util/grpc/stats/grpc_stats_test.go +++ b/pkg/util/grpc/stats/grpc_stats_test.go @@ -18,6 +18,7 @@ import ( "google.golang.org/grpc/health/grpc_health_v1" "github.com/cortexproject/cortex/pkg/querier/frontend" + "github.com/cortexproject/cortex/pkg/util/test" ) func TestGrpcStats(t *testing.T) { @@ -59,55 +60,55 @@ func TestGrpcStats(t *testing.T) { require.EqualError(t, err, "rpc error: code = NotFound desc = unknown service") err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server - # TYPE cortex_grpc_connected_clients gauge - cortex_grpc_connected_clients 1 - - # HELP cortex_grpc_method_errors_total Number of clients connected to gRPC server - # TYPE cortex_grpc_method_errors_total counter - cortex_grpc_method_errors_total{method="/grpc.health.v1.Health/Check"} 1 - - # HELP cortex_grpc_request_size_bytes Size of gRPC requests. - # TYPE cortex_grpc_request_size_bytes histogram - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 2 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 2 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 2 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 2 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 2 - cortex_grpc_request_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 2 - cortex_grpc_request_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 8.388613e+06 - cortex_grpc_request_size_bytes_count{method="/grpc.health.v1.Health/Check"} 2 - - # HELP cortex_grpc_response_size_bytes Size of gRPC responses. - # TYPE cortex_grpc_response_size_bytes histogram - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 1 - cortex_grpc_response_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 1 - cortex_grpc_response_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 7 - cortex_grpc_response_size_bytes_count{method="/grpc.health.v1.Health/Check"} 1 - `), "cortex_grpc_connected_clients", "cortex_grpc_request_size_bytes", "cortex_grpc_response_size_bytes", "cortex_grpc_method_errors_total") + # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server. + # TYPE cortex_grpc_connected_clients gauge + cortex_grpc_connected_clients 1 + + # HELP cortex_grpc_method_errors_total Number of errors returned by method. + # TYPE cortex_grpc_method_errors_total counter + cortex_grpc_method_errors_total{method="/grpc.health.v1.Health/Check"} 1 + + # HELP cortex_grpc_received_payload_size_bytes Size of received gRPC messages as seen on the wire (eg. compressed, signed, encrypted). + # TYPE cortex_grpc_received_payload_size_bytes histogram + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 2 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 2 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 2 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 2 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 2 + cortex_grpc_received_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 2 + cortex_grpc_received_payload_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 8.388613e+06 + cortex_grpc_received_payload_size_bytes_count{method="/grpc.health.v1.Health/Check"} 2 + + # HELP cortex_grpc_sent_payload_size_bytes Size of sent gRPC messages as seen on the wire (eg. compressed, signed, encrypted). + # TYPE cortex_grpc_sent_payload_size_bytes histogram + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+06"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+06"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+06"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+07"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+07"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="5.24288e+07"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="1.048576e+08"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="2.62144e+08"} 1 + cortex_grpc_sent_payload_size_bytes_bucket{method="/grpc.health.v1.Health/Check",le="+Inf"} 1 + cortex_grpc_sent_payload_size_bytes_sum{method="/grpc.health.v1.Health/Check"} 7 + cortex_grpc_sent_payload_size_bytes_count{method="/grpc.health.v1.Health/Check"} 1 + `), "cortex_grpc_connected_clients", "cortex_grpc_received_payload_size_bytes", "cortex_grpc_sent_payload_size_bytes", "cortex_grpc_method_errors_total") require.NoError(t, err) closed = true require.NoError(t, conn.Close()) // Give server little time to update connected clients metric. - time.Sleep(1 * time.Second) - err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server - # TYPE cortex_grpc_connected_clients gauge - cortex_grpc_connected_clients 0 - `), "cortex_grpc_connected_clients") - require.NoError(t, err) + test.Poll(t, 1*time.Second, nil, func() interface{} { + return testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_connected_clients Number of clients connected to gRPC server. + # TYPE cortex_grpc_connected_clients gauge + cortex_grpc_connected_clients 0 + `), "cortex_grpc_connected_clients") + }) } func TestGrpcStatsStreaming(t *testing.T) { @@ -150,7 +151,7 @@ func TestGrpcStatsStreaming(t *testing.T) { require.NoError(t, err) err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_grpc_inflight_requests Number of inflight RPC calls + # HELP cortex_grpc_inflight_requests Number of inflight gRPC calls. # TYPE cortex_grpc_inflight_requests gauge cortex_grpc_inflight_requests{method="/frontend.Frontend/Process"} 1 `), "cortex_grpc_inflight_requests") @@ -158,44 +159,43 @@ func TestGrpcStatsStreaming(t *testing.T) { } require.NoError(t, s.CloseSend()) - // Wait a second to make sure server notices close. - time.Sleep(1 * time.Second) - err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_grpc_inflight_requests Number of inflight RPC calls + // Wait until server notices. + test.Poll(t, 1*time.Second, nil, func() interface{} { + return testutil.GatherAndCompare(reg, bytes.NewBufferString(` + # HELP cortex_grpc_inflight_requests Number of inflight gRPC calls. # TYPE cortex_grpc_inflight_requests gauge cortex_grpc_inflight_requests{method="/frontend.Frontend/Process"} 0 `), "cortex_grpc_inflight_requests") - require.NoError(t, err) + }) err = testutil.GatherAndCompare(reg, bytes.NewBufferString(` - # HELP cortex_grpc_request_size_bytes Size of gRPC requests. - # TYPE cortex_grpc_request_size_bytes histogram - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 1 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 4 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 5 - cortex_grpc_request_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 5 - cortex_grpc_request_size_bytes_sum{method="/frontend.Frontend/Process"} 8.017448e+06 - cortex_grpc_request_size_bytes_count{method="/frontend.Frontend/Process"} 5 - - # HELP cortex_grpc_response_size_bytes Size of gRPC responses. - # TYPE cortex_grpc_response_size_bytes histogram - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 0 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 2 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 4 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 6 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 6 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 6 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 6 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 6 - cortex_grpc_response_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 6 - cortex_grpc_response_size_bytes_sum{method="/frontend.Frontend/Process"} 2.2234511e+07 - cortex_grpc_response_size_bytes_count{method="/frontend.Frontend/Process"} 6 - `), "cortex_grpc_request_size_bytes", "cortex_grpc_response_size_bytes") + # HELP cortex_grpc_received_payload_size_bytes Size of received gRPC messages as seen on the wire (eg. compressed, signed, encrypted). + # TYPE cortex_grpc_received_payload_size_bytes histogram + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 1 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 4 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 5 + cortex_grpc_received_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 5 + cortex_grpc_received_payload_size_bytes_sum{method="/frontend.Frontend/Process"} 8.017448e+06 + cortex_grpc_received_payload_size_bytes_count{method="/frontend.Frontend/Process"} 5 + # HELP cortex_grpc_sent_payload_size_bytes Size of sent gRPC messages as seen on the wire (eg. compressed, signed, encrypted). + # TYPE cortex_grpc_sent_payload_size_bytes histogram + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+06"} 0 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+06"} 2 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+06"} 4 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+07"} 6 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+07"} 6 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="5.24288e+07"} 6 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="1.048576e+08"} 6 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="2.62144e+08"} 6 + cortex_grpc_sent_payload_size_bytes_bucket{method="/frontend.Frontend/Process",le="+Inf"} 6 + cortex_grpc_sent_payload_size_bytes_sum{method="/frontend.Frontend/Process"} 2.2234511e+07 + cortex_grpc_sent_payload_size_bytes_count{method="/frontend.Frontend/Process"} 6 + `), "cortex_grpc_received_payload_size_bytes", "cortex_grpc_sent_payload_size_bytes") require.NoError(t, err) } From 2b85cbd3fed16ef9eca058ab281ed6891bb1a77e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Fri, 14 Aug 2020 13:32:52 +0200 Subject: [PATCH 9/9] Added metric names to CHANGELOG entry. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a69cce62a2b..da6f6db5386 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,12 @@ * [CHANGE] Experimental Delete Series: `/api/v1/admin/tsdb/delete_series` and `/api/v1/admin/tsdb/cancel_delete_request` purger APIs to return status code `204` instead of `200` for success. #2946 * [ENHANCEMENT] Add support for azure storage in China, German and US Government environments. #2988 * [ENHANCEMENT] Query-tee: added a small tolerance to floating point sample values comparison. #2994 -* [ENHANCEMENT] Added metrics for tracking some gRPC stats (most importantly message size histograms). #3036 +* [ENHANCEMENT] Added metrics for tracking some gRPC stats (most importantly message size histograms): #3036 + * `cortex_grpc_connected_clients` + * `cortex_grpc_inflight_requests` + * `cortex_grpc_method_errors_total` + * `cortex_grpc_received_payload_size_bytes` + * `cortex_grpc_sent_payload_size_bytes` * [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990 ## 1.3.0 in progress