@@ -96,31 +96,39 @@ func newFakeAlertStore() *fakeAlertStore {
9696	}
9797}
9898
99- func  (f  * fakeAlertStore ) GetFullState (ctx  context.Context , user  string ) (alertspb.FullStateDesc , error ) {
99+ func  (f  * fakeAlertStore ) GetFullState (_  context.Context , user  string ) (alertspb.FullStateDesc , error ) {
100100	if  result , ok  :=  f .states [user ]; ok  {
101101		return  result , nil 
102102	}
103103	return  alertspb.FullStateDesc {}, alertspb .ErrNotFound 
104104}
105105
106+ func  (f  * fakeAlertStore ) SetFullState (_  context.Context , user  string , state  alertspb.FullStateDesc ) error  {
107+ 	f .states [user ] =  state 
108+ 	return  nil 
109+ }
110+ 
106111func  TestStateReplication (t  * testing.T ) {
107112	tc  :=  []struct  {
108- 		name               string 
109- 		replicationFactor  int 
110- 		message            * clusterpb.Part 
111- 		results            map [string ]* clusterpb.Part 
113+ 		name                string 
114+ 		replicationFactor   int 
115+ 		message             * clusterpb.Part 
116+ 		replicationResults  map [string ]clusterpb.Part 
117+ 		storeResults        map [string ]clusterpb.Part 
112118	}{
113119		{
114- 			name :              "with a replication factor of <= 1, state is not replicated." ,
115- 			replicationFactor : 1 ,
116- 			message :           & clusterpb.Part {Key : "nflog" , Data : []byte ("OK" )},
117- 			results :           map [string ]* clusterpb.Part {},
120+ 			name :               "with a replication factor of <= 1, state is not replicated but loaded from storage." ,
121+ 			replicationFactor :  1 ,
122+ 			message :            & clusterpb.Part {Key : "nflog" , Data : []byte ("OK" )},
123+ 			replicationResults : map [string ]clusterpb.Part {},
124+ 			storeResults :       map [string ]clusterpb.Part {"user-1" : {Key : "nflog" , Data : []byte ("OK" )}},
118125		},
119126		{
120- 			name :              "with a replication factor of > 1, state is broadcasted for replication." ,
121- 			replicationFactor : 3 ,
122- 			message :           & clusterpb.Part {Key : "nflog" , Data : []byte ("OK" )},
123- 			results :           map [string ]* clusterpb.Part {"user-1" : {Key : "nflog" , Data : []byte ("OK" )}},
127+ 			name :               "with a replication factor of > 1, state is broadcasted for replication." ,
128+ 			replicationFactor :  3 ,
129+ 			message :            & clusterpb.Part {Key : "nflog" , Data : []byte ("OK" )},
130+ 			replicationResults : map [string ]clusterpb.Part {"user-1" : {Key : "nflog" , Data : []byte ("OK" )}},
131+ 			storeResults :       map [string ]clusterpb.Part {},
124132		},
125133	}
126134
@@ -129,9 +137,15 @@ func TestStateReplication(t *testing.T) {
129137			reg  :=  prometheus .NewPedanticRegistry ()
130138			replicator  :=  newFakeReplicator ()
131139			replicator .read  =  readStateResult {res : nil , err : nil }
140+ 
132141			store  :=  newFakeAlertStore ()
133- 			s  :=  newReplicatedStates ("user-1" , tt .replicationFactor , replicator , store , log .NewNopLogger (), reg )
142+ 			for  user , part  :=  range  tt .storeResults  {
143+ 				require .NoError (t , store .SetFullState (context .Background (), user , alertspb.FullStateDesc {
144+ 					State : & clusterpb.FullState {Parts : []clusterpb.Part {part }},
145+ 				}))
146+ 			}
134147
148+ 			s  :=  newReplicatedStates ("user-1" , tt .replicationFactor , replicator , store , log .NewNopLogger (), reg )
135149			require .False (t , s .Ready ())
136150			{
137151				ctx , cancel  :=  context .WithTimeout (context .Background (), 100 * time .Millisecond )
@@ -161,47 +175,32 @@ func TestStateReplication(t *testing.T) {
161175			require .Eventually (t , func () bool  {
162176				replicator .mtx .Lock ()
163177				defer  replicator .mtx .Unlock ()
164- 				return  len (replicator .results ) ==  len (tt .results )
178+ 				return  len (replicator .results ) ==  len (tt .replicationResults )
165179			}, time .Second , time .Millisecond )
166180
167181			if  tt .replicationFactor  >  1  {
182+ 				// If the replication factor is greater than 1, we expect state to be loaded from other Alertmanagers 
168183				assert .NoError (t , testutil .GatherAndCompare (reg , strings .NewReader (` 
169- # HELP alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica. 
170- # TYPE alertmanager_state_fetch_replica_state_failed_total counter 
171- alertmanager_state_fetch_replica_state_failed_total 0 
172- # HELP alertmanager_state_fetch_replica_state_total Number of times we have tried to read and merge the full state from another replica. 
173- # TYPE alertmanager_state_fetch_replica_state_total counter 
174- alertmanager_state_fetch_replica_state_total 1 
175- # HELP alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key. 
176- # TYPE alertmanager_partial_state_merges_failed_total counter 
177- alertmanager_partial_state_merges_failed_total{key="nflog"} 0 
178- # HELP alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key. 
179- # TYPE alertmanager_partial_state_merges_total counter 
180- alertmanager_partial_state_merges_total{key="nflog"} 0 
181184# HELP alertmanager_state_initial_sync_completed_total Number of times we have completed syncing initial state for each possible outcome. 
182185# TYPE alertmanager_state_initial_sync_completed_total counter 
183186alertmanager_state_initial_sync_completed_total{outcome="failed"} 0 
184187alertmanager_state_initial_sync_completed_total{outcome="from-replica"} 1 
185188alertmanager_state_initial_sync_completed_total{outcome="from-storage"} 0 
186189alertmanager_state_initial_sync_completed_total{outcome="user-not-found"} 0 
187- # HELP alertmanager_state_initial_sync_total Number of times we have tried to sync initial state from peers or remote storage. 
188- # TYPE alertmanager_state_initial_sync_total counter 
189- alertmanager_state_initial_sync_total 1 
190- # HELP alertmanager_state_replication_failed_total Number of times we have failed to replicate a state to other alertmanagers. 
191- # TYPE alertmanager_state_replication_failed_total counter 
192- alertmanager_state_replication_failed_total{key="nflog"} 0 
193- # HELP alertmanager_state_replication_total Number of times we have tried to replicate a state to other alertmanagers. 
194- # TYPE alertmanager_state_replication_total counter 
195- alertmanager_state_replication_total{key="nflog"} 1 
196190	` ),
197- 					"alertmanager_state_fetch_replica_state_failed_total" ,
198- 					"alertmanager_state_fetch_replica_state_total" ,
199- 					"alertmanager_partial_state_merges_failed_total" ,
200- 					"alertmanager_partial_state_merges_total" ,
201191					"alertmanager_state_initial_sync_completed_total" ,
202- 					"alertmanager_state_initial_sync_total" ,
203- 					"alertmanager_state_replication_failed_total" ,
204- 					"alertmanager_state_replication_total" ,
192+ 				))
193+ 			} else  {
194+ 				// Replication factor is 1, we expect state to be loaded from storage *instead* of other Alertmanagers 
195+ 				assert .NoError (t , testutil .GatherAndCompare (reg , strings .NewReader (` 
196+ # HELP alertmanager_state_initial_sync_completed_total Number of times we have completed syncing initial state for each possible outcome. 
197+ # TYPE alertmanager_state_initial_sync_completed_total counter 
198+ alertmanager_state_initial_sync_completed_total{outcome="failed"} 0 
199+ alertmanager_state_initial_sync_completed_total{outcome="from-replica"} 0 
200+ alertmanager_state_initial_sync_completed_total{outcome="from-storage"} 1 
201+ alertmanager_state_initial_sync_completed_total{outcome="user-not-found"} 0 
202+ 	` ),
203+ 					"alertmanager_state_initial_sync_completed_total" ,
205204				))
206205
207206			}
0 commit comments