From d916e637432d27686bc3a85a2531e08a5d30422b Mon Sep 17 00:00:00 2001 From: Jacob Lisi Date: Fri, 18 Sep 2020 11:39:52 -0400 Subject: [PATCH 1/2] feat: add metric and fix querier int test flake Signed-off-by: Jacob Lisi --- integration/querier_sharding_test.go | 3 +++ pkg/querier/frontend/frontend.go | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/integration/querier_sharding_test.go b/integration/querier_sharding_test.go index b382f117fd0..7779ce25271 100644 --- a/integration/querier_sharding_test.go +++ b/integration/querier_sharding_test.go @@ -101,6 +101,9 @@ func runQuerierShardingTest(t *testing.T, sharding bool) { require.NoError(t, err) } + // Wait until both workers connect to the query frontend + require.NoError(t, queryFrontend.WaitSumMetrics(e2e.Equals(2), "cortex_query_frontend_connected_clients")) + wg := sync.WaitGroup{} // Run all queries concurrently to get better distribution of requests between queriers. diff --git a/pkg/querier/frontend/frontend.go b/pkg/querier/frontend/frontend.go index a4e8ea3e29a..54356592e20 100644 --- a/pkg/querier/frontend/frontend.go +++ b/pkg/querier/frontend/frontend.go @@ -76,6 +76,7 @@ type Frontend struct { connectedClients *atomic.Int32 // Metrics. + numClients prometheus.GaugeFunc queueDuration prometheus.Histogram queueLength *prometheus.GaugeVec } @@ -92,6 +93,7 @@ type request struct { // New creates a new frontend. func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer) (*Frontend, error) { + connectedClients := atomic.NewInt32(0) f := &Frontend{ cfg: cfg, log: log, @@ -108,7 +110,12 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist Name: "query_frontend_queue_length", Help: "Number of queries in the queue.", }, []string{"user"}), - connectedClients: atomic.NewInt32(0), + numClients: promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{ + Namespace: "cortex", + Name: "query_frontend_connected_clients", + Help: "Number of worker clients currently connected to the frontend.", + }, func() float64 { return float64(connectedClients.Load()) }), + connectedClients: connectedClients, } f.cond = sync.NewCond(&f.mtx) From 482b71c34eccfb958ec61966c71cbf820adea68b Mon Sep 17 00:00:00 2001 From: Jacob Lisi Date: Fri, 18 Sep 2020 12:14:18 -0400 Subject: [PATCH 2/2] update changelog Signed-off-by: Jacob Lisi --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71b1512d5a1..ad70291f320 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * `user_subring_size` limit YAML config option renamed to `ingestion_tenant_shard_size` * [CHANGE] Dropped "blank Alertmanager configuration; using fallback" message from Info to Debug level. #3205 * [FEATURE] Added support for shuffle-sharding queriers in the query-frontend. When configured (`-frontend.max-queriers-per-user` globally, or using per-user limit `max_queriers_per_user`), each user's requests will be handled by different set of queriers. #3113 +* [ENHANCEMENT] Added `cortex_query_frontend_connected_clients` metric to show the number of workers currently connected to the frontend. #3207 * [ENHANCEMENT] Shuffle sharding: improved shuffle sharding in the write path. Shuffle sharding now should be explicitly enabled via `-distributor.sharding-strategy` CLI flag (or its respective YAML config option) and guarantees stability, consistency, shuffling and balanced zone-awareness properties. #3090 * [ENHANCEMENT] Ingester: added new metric `cortex_ingester_active_series` to track active series more accurately. Also added options to control whether active series tracking is enabled (`-ingester.active-series-enabled`, defaults to false), and how often this metric is updated (`-ingester.active-series-update-period`) and max idle time for series to be considered inactive (`-ingester.active-series-idle-timeout`). #3153 * [BUGFIX] No-longer-needed ingester operations for queries triggered by queriers and rulers are now canceled. #3178