@@ -42,11 +42,11 @@ impl TestState {
4242 TestState { tq_state : TqState :: new ( log) , skipped_actions : 0 }
4343 }
4444
45- fn initial_config_event (
45+ fn initial_config_events (
4646 & self ,
4747 config : GeneratedConfiguration ,
4848 down_nodes : BTreeSet < usize > ,
49- ) -> Event {
49+ ) -> Vec < Event > {
5050 // `tq_state` doesn't create the member universe until the first event is
5151 // applied. We duplicate it here so we can create that initial config
5252 // event.
@@ -65,22 +65,28 @@ impl TestState {
6565 let coordinator =
6666 members. first ( ) . cloned ( ) . expect ( "at least one member" ) ;
6767 let last_committed_epoch = None ;
68+ let crashed_nodes: BTreeSet < _ > = down_nodes
69+ . into_iter ( )
70+ . map ( |index| member_universe[ index] . clone ( ) )
71+ . collect ( ) ;
72+ let should_abort = crashed_nodes. contains ( & coordinator) ;
6873 let config = NexusConfig :: new (
6974 epoch,
7075 last_committed_epoch,
7176 coordinator,
7277 members,
7378 threshold,
7479 ) ;
75- let crashed_nodes = down_nodes
76- . into_iter ( )
77- . map ( |index| member_universe[ index] . clone ( ) )
78- . collect ( ) ;
79- Event :: InitialSetup {
80+ let mut events = vec ! [ Event :: InitialSetup {
8081 member_universe_size: MEMBER_UNIVERSE_SIZE ,
8182 config,
8283 crashed_nodes,
84+ } ] ;
85+
86+ if should_abort {
87+ events. push ( Event :: AbortConfiguration ( epoch) ) ;
8388 }
89+ events
8490 }
8591
8692 // Execute the proptest generated actions
@@ -195,7 +201,20 @@ impl TestState {
195201 }
196202
197203 let id = selector. select ( faultable) . clone ( ) ;
198- vec ! [ Event :: CrashNode ( id) ]
204+ let latest_config = self . tq_state . nexus . latest_config ( ) ;
205+ if id == latest_config. coordinator
206+ && latest_config. op == NexusOp :: Preparing
207+ {
208+ // The `AbortConfiguration` simulates Nexus polling and timing
209+ // out or receiving an error response on node restart because the
210+ // configuration was lost.
211+ vec ! [
212+ Event :: CrashNode ( id. clone( ) ) ,
213+ Event :: AbortConfiguration ( latest_config. epoch) ,
214+ ]
215+ } else {
216+ vec ! [ Event :: CrashNode ( id. clone( ) ) ]
217+ }
199218 }
200219
201220 fn action_to_events_restart_node (
@@ -309,8 +328,7 @@ impl TestState {
309328 return events;
310329 }
311330
312- // If the coordinator has crashed then Nexus should abort.
313- // Crashing is not actually implemented yet, but it will be.
331+ // If the coordinator is currently down then Nexus should abort.
314332 if self
315333 . tq_state
316334 . faults
@@ -346,9 +364,9 @@ impl TestState {
346364 //
347365 // In a real system this request would go over the network, but would
348366 // end up at the same place.
349- let cs = coordinator
350- . get_coordinator_state ( )
351- . expect ( "coordinator is coordinating" ) ;
367+ let cs = coordinator. get_coordinator_state ( ) . unwrap_or_else ( || {
368+ panic ! ( "coordinator is coordinating: {}" , ctx . platform_id ( ) )
369+ } ) ;
352370
353371 // Put the reply on the network
354372 events. push ( Event :: SendNexusReplyOnUnderlay (
@@ -510,11 +528,18 @@ impl TestState {
510528 let nexus_config = NexusConfig :: new (
511529 epoch,
512530 last_committed_epoch,
513- coordinator,
531+ coordinator. clone ( ) ,
514532 new_members,
515533 threshold,
516534 ) ;
517- vec ! [ Event :: Reconfigure ( nexus_config) ]
535+ let mut events = vec ! [ Event :: Reconfigure ( nexus_config) ] ;
536+
537+ if self . tq_state . faults . crashed_nodes . contains ( & coordinator) {
538+ // This simulates a timeout on the reply from the coordinator which
539+ // triggers an abort.
540+ events. push ( Event :: AbortConfiguration ( epoch) ) ;
541+ }
542+ events
518543 }
519544
520545 /// At every point during the running of the test, invariants over the system
@@ -885,10 +910,12 @@ fn test_trust_quorum_protocol(input: TestInput) {
885910 let mut state = TestState :: new ( log. clone ( ) ) ;
886911
887912 // Perform the initial setup
888- let event = state
889- . initial_config_event ( input. initial_config , input. initial_down_nodes ) ;
890- event_log. record ( & event) ;
891- state. tq_state . apply_event ( event) ;
913+ let events = state
914+ . initial_config_events ( input. initial_config , input. initial_down_nodes ) ;
915+ for event in events {
916+ event_log. record ( & event) ;
917+ state. tq_state . apply_event ( event) ;
918+ }
892919
893920 // Start executing the actions
894921 state. run_actions ( input. actions , & mut event_log) ?;
0 commit comments