@@ -138,12 +138,16 @@ type Compactor struct {
138138	ringSubservicesWatcher  * services.FailureWatcher 
139139
140140	// Metrics. 
141- 	compactionRunsStarted      prometheus.Counter 
142- 	compactionRunsCompleted    prometheus.Counter 
143- 	compactionRunsFailed       prometheus.Counter 
144- 	compactionRunsLastSuccess  prometheus.Gauge 
145- 	blocksMarkedForDeletion    prometheus.Counter 
146- 	garbageCollectedBlocks     prometheus.Counter 
141+ 	compactionRunsStarted           prometheus.Counter 
142+ 	compactionRunsCompleted         prometheus.Counter 
143+ 	compactionRunsFailed            prometheus.Counter 
144+ 	compactionRunsLastSuccess       prometheus.Gauge 
145+ 	compactionRunDiscoveredTenants  prometheus.Gauge 
146+ 	compactionRunSkippedTenants     prometheus.Gauge 
147+ 	compactionRunSucceededTenants   prometheus.Gauge 
148+ 	compactionRunFailedTenants      prometheus.Gauge 
149+ 	blocksMarkedForDeletion         prometheus.Counter 
150+ 	garbageCollectedBlocks          prometheus.Counter 
147151
148152	// TSDB syncer metrics 
149153	syncerMetrics  * syncerMetrics 
@@ -206,6 +210,22 @@ func newCompactor(
206210			Name : "cortex_compactor_last_successful_run_timestamp_seconds" ,
207211			Help : "Unix timestamp of the last successful compaction run." ,
208212		}),
213+ 		compactionRunDiscoveredTenants : promauto .With (registerer ).NewGauge (prometheus.GaugeOpts {
214+ 			Name : "cortex_compactor_tenants_discovered" ,
215+ 			Help : "Number of tenants discovered during the current compaction run. Reset to 0 when compactor is idle." ,
216+ 		}),
217+ 		compactionRunSkippedTenants : promauto .With (registerer ).NewGauge (prometheus.GaugeOpts {
218+ 			Name : "cortex_compactor_tenants_skipped" ,
219+ 			Help : "Number of tenants skipped during the current compaction run. Reset to 0 when compactor is idle." ,
220+ 		}),
221+ 		compactionRunSucceededTenants : promauto .With (registerer ).NewGauge (prometheus.GaugeOpts {
222+ 			Name : "cortex_compactor_tenants_processing_succeeded" ,
223+ 			Help : "Number of tenants successfully processed during the current compaction run. Reset to 0 when compactor is idle." ,
224+ 		}),
225+ 		compactionRunFailedTenants : promauto .With (registerer ).NewGauge (prometheus.GaugeOpts {
226+ 			Name : "cortex_compactor_tenants_processing_failed" ,
227+ 			Help : "Number of tenants failed processing during the current compaction run. Reset to 0 when compactor is idle." ,
228+ 		}),
209229		blocksMarkedForDeletion : promauto .With (registerer ).NewCounter (prometheus.CounterOpts {
210230			Name : "cortex_compactor_blocks_marked_for_deletion_total" ,
211231			Help : "Total number of blocks marked for deletion in compactor." ,
@@ -377,13 +397,23 @@ func (c *Compactor) compactUsersWithRetries(ctx context.Context) {
377397}
378398
379399func  (c  * Compactor ) compactUsers (ctx  context.Context ) error  {
400+ 	// Reset progress metrics once done. 
401+ 	defer  func () {
402+ 		c .compactionRunDiscoveredTenants .Set (0 )
403+ 		c .compactionRunSkippedTenants .Set (0 )
404+ 		c .compactionRunSucceededTenants .Set (0 )
405+ 		c .compactionRunFailedTenants .Set (0 )
406+ 	}()
407+ 
380408	level .Info (c .logger ).Log ("msg" , "discovering users from bucket" )
381409	users , err  :=  c .discoverUsers (ctx )
382410	if  err  !=  nil  {
383411		level .Error (c .logger ).Log ("msg" , "failed to discover users from bucket" , "err" , err )
384412		return  errors .Wrap (err , "failed to discover users from bucket" )
385413	}
414+ 
386415	level .Info (c .logger ).Log ("msg" , "discovered users from bucket" , "users" , len (users ))
416+ 	c .compactionRunDiscoveredTenants .Set (float64 (len (users )))
387417
388418	// When starting multiple compactor replicas nearly at the same time, running in a cluster with 
389419	// a large number of tenants, we may end up in a situation where the 1st user is compacted by 
@@ -403,21 +433,25 @@ func (c *Compactor) compactUsers(ctx context.Context) error {
403433
404434		// Ensure the user ID belongs to our shard. 
405435		if  owned , err  :=  c .ownUser (userID ); err  !=  nil  {
436+ 			c .compactionRunSkippedTenants .Inc ()
406437			level .Warn (c .logger ).Log ("msg" , "unable to check if user is owned by this shard" , "user" , userID , "err" , err )
407438			continue 
408439		} else  if  ! owned  {
440+ 			c .compactionRunSkippedTenants .Inc ()
409441			level .Debug (c .logger ).Log ("msg" , "skipping user because not owned by this shard" , "user" , userID )
410442			continue 
411443		}
412444
413445		level .Info (c .logger ).Log ("msg" , "starting compaction of user blocks" , "user" , userID )
414446
415447		if  err  =  c .compactUser (ctx , userID ); err  !=  nil  {
448+ 			c .compactionRunFailedTenants .Inc ()
416449			level .Error (c .logger ).Log ("msg" , "failed to compact user blocks" , "user" , userID , "err" , err )
417450			errs .Add (errors .Wrapf (err , "failed to compact user blocks (user: %s)" , userID ))
418451			continue 
419452		}
420453
454+ 		c .compactionRunSucceededTenants .Inc ()
421455		level .Info (c .logger ).Log ("msg" , "successfully compacted user blocks" , "user" , userID )
422456	}
423457
0 commit comments