cortexproject · pstibrany · Sep 21, 2020 · Sep 18, 2020 · Sep 18, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
   * `user_subring_size` limit YAML config option renamed to `ingestion_tenant_shard_size`
 * [CHANGE] Dropped "blank Alertmanager configuration; using fallback" message from Info to Debug level. #3205
 * [FEATURE] Added support for shuffle-sharding queriers in the query-frontend. When configured (`-frontend.max-queriers-per-user` globally, or using per-user limit `max_queriers_per_user`), each user's requests will be handled by different set of queriers. #3113
+* [ENHANCEMENT] Added `cortex_query_frontend_connected_clients` metric to show the number of workers currently connected to the frontend. #3207
 * [ENHANCEMENT] Shuffle sharding: improved shuffle sharding in the write path. Shuffle sharding now should be explicitly enabled via `-distributor.sharding-strategy` CLI flag (or its respective YAML config option) and guarantees stability, consistency, shuffling and balanced zone-awareness properties. #3090
 * [ENHANCEMENT] Ingester: added new metric `cortex_ingester_active_series` to track active series more accurately. Also added options to control whether active series tracking is enabled (`-ingester.active-series-enabled`, defaults to false), and how often this metric is updated (`-ingester.active-series-update-period`) and max idle time for series to be considered inactive (`-ingester.active-series-idle-timeout`). #3153
 * [BUGFIX] No-longer-needed ingester operations for queries triggered by queriers and rulers are now canceled. #3178

diff --git a/integration/querier_sharding_test.go b/integration/querier_sharding_test.go
@@ -101,6 +101,9 @@ func runQuerierShardingTest(t *testing.T, sharding bool) {
 		require.NoError(t, err)
 	}
 
+	// Wait until both workers connect to the query frontend
+	require.NoError(t, queryFrontend.WaitSumMetrics(e2e.Equals(2), "cortex_query_frontend_connected_clients"))
+
 	wg := sync.WaitGroup{}
 
 	// Run all queries concurrently to get better distribution of requests between queriers.

diff --git a/pkg/querier/frontend/frontend.go b/pkg/querier/frontend/frontend.go
@@ -76,6 +76,7 @@ type Frontend struct {
 	connectedClients *atomic.Int32
 
 	// Metrics.
+	numClients    prometheus.GaugeFunc
 	queueDuration prometheus.Histogram
 	queueLength   *prometheus.GaugeVec
 }
@@ -92,6 +93,7 @@ type request struct {
 
 // New creates a new frontend.
 func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Registerer) (*Frontend, error) {
+	connectedClients := atomic.NewInt32(0)
 	f := &Frontend{
 		cfg:    cfg,
 		log:    log,
@@ -108,7 +110,12 @@ func New(cfg Config, limits Limits, log log.Logger, registerer prometheus.Regist
 			Name:      "query_frontend_queue_length",
 			Help:      "Number of queries in the queue.",
 		}, []string{"user"}),
-		connectedClients: atomic.NewInt32(0),
+		numClients: promauto.With(registerer).NewGaugeFunc(prometheus.GaugeOpts{
+			Namespace: "cortex",
+			Name:      "query_frontend_connected_clients",
+			Help:      "Number of worker clients currently connected to the frontend.",
+		}, func() float64 { return float64(connectedClients.Load()) }),
+		connectedClients: connectedClients,
 	}
 	f.cond = sync.NewCond(&f.mtx)