From 36aad05837ba4ca9175a632cb6c46bee9d313ed4 Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Thu, 25 Sep 2025 05:52:25 -0400 Subject: [PATCH 1/7] feat(op-conductor): rollup-boost partial unhealthiness tolerance Signed-off-by: Yashvardhan Kukreja --- op-conductor/conductor/config.go | 11 ++++ op-conductor/conductor/service.go | 2 + op-conductor/flags/flags.go | 12 ++++ op-conductor/health/monitor.go | 84 ++++++++++++++++++++++++++-- op-conductor/health/monitor_test.go | 86 +++++++++++++++++++++++++++++ 5 files changed, 189 insertions(+), 6 deletions(-) diff --git a/op-conductor/conductor/config.go b/op-conductor/conductor/config.go index b41aed34a729b..90700b5a91498 100644 --- a/op-conductor/conductor/config.go +++ b/op-conductor/conductor/config.go @@ -185,6 +185,8 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) { ExecutionP2pMinPeerCount: ctx.Uint64(flags.HealthcheckExecutionP2pMinPeerCount.Name), ExecutionP2pRPCUrl: executionP2pRpcUrl, ExecutionP2pCheckApi: executionP2pCheckApi, + RollupBoostPartialHealthinessToleranceLimit: ctx.Uint64(flags.HealthCheckRollupBoostPartialHealthinessToleranceLimit.Name), + RollupBoostPartialHealthinessToleranceIntervalSeconds: ctx.Uint64(flags.HealthCheckRollupBoostPartialHealthinessToleranceIntervalSeconds.Name), }, RollupCfg: *rollupCfg, RPCEnableProxy: ctx.Bool(flags.RPCEnableProxy.Name), @@ -225,6 +227,12 @@ type HealthCheckConfig struct { // ExecutionP2pMinPeerCount is the minimum number of EL P2P peers required for the sequencer to be healthy. ExecutionP2pMinPeerCount uint64 + + // RollupBoostPartialHealthinessToleranceLimit is the amount of rollup-boost partial unhealthiness failures to tolerate within a configurable time frame + RollupBoostPartialHealthinessToleranceLimit uint64 + + // RollupBoostPartialHealthinessToleranceIntervalSeconds is the time frame within which `RollupBoostToleratePartialHealthinessToleranceIntervalLimit` is evaluated + RollupBoostPartialHealthinessToleranceIntervalSeconds uint64 } func (c *HealthCheckConfig) Check() error { @@ -251,5 +259,8 @@ func (c *HealthCheckConfig) Check() error { return fmt.Errorf("invalid el p2p check api") } } + if (c.RollupBoostPartialHealthinessToleranceLimit != 0 && c.RollupBoostPartialHealthinessToleranceIntervalSeconds == 0) || (c.RollupBoostPartialHealthinessToleranceLimit == 0 && c.RollupBoostPartialHealthinessToleranceIntervalSeconds != 0) { + return fmt.Errorf("only one of RollupBoostPartialHealthinessToleranceLimit or RollupBoostPartialHealthinessToleranceIntervalSeconds found to be defined. Either define both of them or none.") + } return nil } diff --git a/op-conductor/conductor/service.go b/op-conductor/conductor/service.go index e8e2ef3d35bcc..b9f25710c6bf3 100644 --- a/op-conductor/conductor/service.go +++ b/op-conductor/conductor/service.go @@ -263,6 +263,8 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error { rb, elP2p, c.cfg.HealthCheck.ExecutionP2pMinPeerCount, + c.cfg.HealthCheck.RollupBoostPartialHealthinessToleranceLimit, + c.cfg.HealthCheck.RollupBoostPartialHealthinessToleranceIntervalSeconds, ) c.healthUpdateCh = c.hmon.Subscribe() diff --git a/op-conductor/flags/flags.go b/op-conductor/flags/flags.go index c4a6bc32d8c45..96e6eeafd51ab 100644 --- a/op-conductor/flags/flags.go +++ b/op-conductor/flags/flags.go @@ -180,6 +180,16 @@ var ( EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_EXECUTION_P2P_CHECK_API"), Value: "net", } + HealthCheckRollupBoostPartialHealthinessToleranceLimit = &cli.Uint64Flag{ + Name: "healthcheck.rollup-boost-partial-healthiness-tolerance-limit", + Usage: "Sets the count of rollup-boost partial healthiness failures to occur before marking op-conducto as unhealthy. Default is 0 with which a single occurrence of rollup-boost partial healthiness is enough to set op-conductor as unhealthy", + EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_ROLLUP_BOOST_PARTIAL_HEALTHINESS_TOLERANCE_LIMIT"), + } + HealthCheckRollupBoostPartialHealthinessToleranceIntervalSeconds = &cli.Uint64Flag{ + Name: "healthcheck.rollup-boost-partial-healthiness-tolerance-interval-seconds", + Usage: "The time frame within which rollup-boost partial healthiness tolerance is evaluated", + EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_ROLLUP_BOOST_PARTIAL_HEALTHINESS_TOLERANCE_INTERVAL_SECONDS"), + } ) var requiredFlags = []cli.Flag{ @@ -213,6 +223,8 @@ var optionalFlags = []cli.Flag{ HealthcheckExecutionP2pMinPeerCount, HealthcheckExecutionP2pRPCUrl, HealthcheckExecutionP2pCheckApi, + HealthCheckRollupBoostPartialHealthinessToleranceLimit, + HealthCheckRollupBoostPartialHealthinessToleranceIntervalSeconds, } func init() { diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index 56091139989e0..e62683b0d8426 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -39,7 +39,7 @@ type HealthMonitor interface { // interval is the interval between health checks measured in seconds. // safeInterval is the interval between safe head progress measured in seconds. // minPeerCount is the minimum number of peers required for the sequencer to be healthy. -func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interval, unsafeInterval, safeInterval, minPeerCount uint64, safeEnabled bool, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p apis.P2PClient, supervisor SupervisorHealthAPI, rb client.RollupBoostClient, elP2pClient client.ElP2PClient, minElP2pPeers uint64) HealthMonitor { +func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interval, unsafeInterval, safeInterval, minPeerCount uint64, safeEnabled bool, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p apis.P2PClient, supervisor SupervisorHealthAPI, rb client.RollupBoostClient, elP2pClient client.ElP2PClient, minElP2pPeers uint64, rollupBoostToleratePartialHealthinessToleranceLimit uint64, rollupBoostToleratePartialHealthinessToleranceIntervalSeconds uint64) HealthMonitor { hm := &SequencerHealthMonitor{ log: log, metrics: metrics, @@ -64,6 +64,13 @@ func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interva elP2pClient: elP2pClient, } } + if rollupBoostToleratePartialHealthinessToleranceLimit != 0 { + var err error + hm.timeTolerantRollupBoostPartialHealthinessMgr, err = NewTimeBoundedRotatingCounter(rollupBoostToleratePartialHealthinessToleranceIntervalSeconds, rollupBoostToleratePartialHealthinessToleranceLimit) + if err != nil { + panic(fmt.Errorf("failed to setup health monitor: %w", err)) + } + } return hm } @@ -74,6 +81,63 @@ type ElP2pHealthMonitor struct { elP2pClient client.ElP2PClient } +// this is a type of counter which keeps on incrementing until its reset interval is hit +// this can be used to track time-based rate-limit, error counts, etc. +type timeBoundedRotatingCounter struct { + resetIntervalSeconds uint64 + maxValue uint64 + timeProviderFn func() uint64 + + mut *sync.RWMutex + temporalCache map[int64]uint64 +} + +func NewTimeBoundedRotatingCounter(resetIntervalSeconds, maxValue uint64) (*timeBoundedRotatingCounter, error) { + if resetIntervalSeconds == 0 { + panic("reset interval seconds must be more than 0") + } + return &timeBoundedRotatingCounter{ + resetIntervalSeconds: resetIntervalSeconds, + maxValue: maxValue, + mut: &sync.RWMutex{}, + temporalCache: map[int64]uint64{}, + timeProviderFn: currentTimeProvicer, + }, nil +} + +func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { + // let's take `resetIntervalSeconds` as 60s + // truncatedTimestamp is current timestamp rounded off by 60s (resetIntervalSeconds) + // thereby generating a value which stays same until the next 60s helping track and incrementing the counter corresponding to it for the next 60s + currentTsSeconds := t.timeProviderFn() + truncatedTimestamp := int64(currentTsSeconds / t.resetIntervalSeconds) + t.mut.Lock() + defer t.mut.Unlock() + + // a lazy cleanup subroutine to the clean the cache when it's grown enough, preventing memory leaks + defer func() { + if len(t.temporalCache) > 1000 { + newCache := map[int64]uint64{ + truncatedTimestamp: t.temporalCache[truncatedTimestamp], + } + t.temporalCache = newCache // garbage collector should take care of the old cache + } + }() + + if t.maxValue == 0 || t.temporalCache[truncatedTimestamp] < t.maxValue { + t.temporalCache[truncatedTimestamp]++ + return t.temporalCache[truncatedTimestamp], nil + } + return 0, fmt.Errorf("counter at its max value, please wait %ds for it to be reset", (t.resetIntervalSeconds - (currentTsSeconds % t.resetIntervalSeconds))) +} + +func (t *timeBoundedRotatingCounter) CurrentValue() uint64 { + // no benefit is RLock-ing and returning this value. + currentTsSeconds := time.Now().Unix() + truncatedTimestamp := currentTsSeconds / int64(t.resetIntervalSeconds) + return t.temporalCache[truncatedTimestamp] +} + // SequencerHealthMonitor monitors sequencer health. type SequencerHealthMonitor struct { log log.Logger @@ -93,11 +157,12 @@ type SequencerHealthMonitor struct { timeProviderFn func() uint64 - node dial.RollupClientInterface - p2p apis.P2PClient - supervisor SupervisorHealthAPI - rb client.RollupBoostClient - elP2p *ElP2pHealthMonitor + node dial.RollupClientInterface + p2p apis.P2PClient + supervisor SupervisorHealthAPI + rb client.RollupBoostClient + elP2p *ElP2pHealthMonitor + timeTolerantRollupBoostPartialHealthinessMgr *timeBoundedRotatingCounter } var _ HealthMonitor = (*SequencerHealthMonitor)(nil) @@ -288,8 +353,15 @@ func (hm *SequencerHealthMonitor) checkRollupBoost(ctx context.Context) error { case client.HealthStatusHealthy: return nil case client.HealthStatusPartial: + if hm.timeTolerantRollupBoostPartialHealthinessMgr != nil { + if _, err := hm.timeTolerantRollupBoostPartialHealthinessMgr.Increment(); err == nil { + hm.log.Warn("[Tolerating Failure] Rollup boost is partial failure, builder is down but fallback execution client is up", "err", ErrRollupBoostPartiallyHealthy) + return nil + } + } hm.log.Error("Rollup boost is partial failure, builder is down but fallback execution client is up", "err", ErrRollupBoostPartiallyHealthy) return ErrRollupBoostPartiallyHealthy + case client.HealthStatusUnhealthy: hm.log.Error("Rollup boost total failure, both builder and fallback execution client are down", "err", ErrRollupBoostNotHealthy) return ErrRollupBoostNotHealthy diff --git a/op-conductor/health/monitor_test.go b/op-conductor/health/monitor_test.go index 9bbfd69d53f69..7f2f99f7d3d45 100644 --- a/op-conductor/health/monitor_test.go +++ b/op-conductor/health/monitor_test.go @@ -97,6 +97,8 @@ func (s *HealthMonitorTestSuite) SetupMonitor( return monitor } +type monitorOpts func(*SequencerHealthMonitor) + // SetupMonitorWithRollupBoost creates a HealthMonitor that includes a RollupBoostClient func (s *HealthMonitorTestSuite) SetupMonitorWithRollupBoost( now, unsafeInterval, safeInterval uint64, @@ -104,6 +106,7 @@ func (s *HealthMonitorTestSuite) SetupMonitorWithRollupBoost( mockP2P *p2pMocks.API, mockRollupBoost *clientmocks.RollupBoostClient, elP2pClient client.ElP2PClient, + opts ...monitorOpts, ) *SequencerHealthMonitor { tp := &timeProvider{now: now} if mockP2P == nil { @@ -137,6 +140,9 @@ func (s *HealthMonitorTestSuite) SetupMonitorWithRollupBoost( elP2pClient: elP2pClient, } } + for _, opt := range opts { + opt(monitor) + } err := monitor.Start(context.Background()) s.NoError(err) return monitor @@ -442,6 +448,86 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatus() { s.NoError(monitor.Stop()) } +func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatusWithTolerance() { + s.T().Parallel() + now := uint64(time.Now().Unix()) + + // Setup healthy node conditions + rc := &testutils.MockRollupClient{} + ss1 := mockSyncStatus(now-1, 1, now-3, 0) + + // because 6 healthchecks are going to be expected cause 6 calls of sync status + for i := 0; i < 6; i++ { + rc.ExpectSyncStatus(ss1, nil) + } + + // Setup healthy peer count + pc := &p2pMocks.API{} + ps1 := &p2p.PeerStats{ + Connected: healthyPeerCount, + } + pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) + + // Setup partial rollup boost status (treated as unhealthy) + rb := &clientmocks.RollupBoostClient{} + rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) + + toleranceLimit := uint64(2) + toleranceIntervalSeconds := uint64(3) + + timeBoundedRotatingCounter, err := NewTimeBoundedRotatingCounter(toleranceIntervalSeconds, toleranceLimit) + s.Nil(err) + + // Start monitor with all dependencies as well as tolerance of 2 rollup-boost partial unhealthiness per 3s period + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil, func(shm *SequencerHealthMonitor) { + tp := &timeProvider{now: 1758792282} + timeBoundedRotatingCounter.timeProviderFn = tp.Now + + // pollute the cache of timeBoundRotatingCounter with 999 elements so as to later test the lazy cleanup + // note: the 1000th element will be added by the first healthchecl run + for i := 0; i < 999; i++ { + timeBoundedRotatingCounter.temporalCache[int64(i)] = uint64(1) + } + + shm.timeTolerantRollupBoostPartialHealthinessMgr = timeBoundedRotatingCounter + }) + + healthUpdateCh := monitor.Subscribe() + + // first error is tolerated (time t+1) + healthStatus := <-healthUpdateCh + s.Nil(healthStatus) + s.Len(timeBoundedRotatingCounter.temporalCache, 1000) // lazy cleanup of the cache not done yet as it's within the bounds + + // second error is tolerated as well (time t+2) + healthStatus = <-healthUpdateCh + s.Nil(healthStatus) + s.Len(timeBoundedRotatingCounter.temporalCache, 1000) // no change of the cache until the next reset + + // third error isn't tolerated (time t+3) + healthFailure := <-healthUpdateCh + s.Equal(ErrRollupBoostPartiallyHealthy, healthFailure) + + // by now, because of three healthchecks, three seconds have been simulated to pass (by the timeProviderFn) + // this should reset the time bound counter, thereby allowing partial unhealthiness failures to be tolerated again + + // first error after the reset is tolerated (time t+4) + healthStatus = <-healthUpdateCh + s.Nil(healthStatus) + s.Len(timeBoundedRotatingCounter.temporalCache, 1) // lazy cleanup of the cache done and it's left with only the current value + + // second error after the reset is tolerated as well (time t+5) + healthStatus = <-healthUpdateCh + s.Nil(healthStatus) + s.Len(timeBoundedRotatingCounter.temporalCache, 1) // no change to the cache until the next reset + + // third error after the reset isn't tolerated (time t+6) + healthFailure = <-healthUpdateCh + s.Equal(ErrRollupBoostPartiallyHealthy, healthFailure) + + s.NoError(monitor.Stop()) +} + func (s *HealthMonitorTestSuite) TestRollupBoostHealthy() { s.T().Parallel() now := uint64(time.Now().Unix()) From 354428a8f59d05a3ee02aa81c9f9423f83e0570a Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Fri, 26 Sep 2025 20:37:05 +0530 Subject: [PATCH 2/7] rlock protection to currentValue function Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/monitor.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index e62683b0d8426..19a9f4d2c9c3e 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -132,9 +132,10 @@ func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { } func (t *timeBoundedRotatingCounter) CurrentValue() uint64 { - // no benefit is RLock-ing and returning this value. currentTsSeconds := time.Now().Unix() truncatedTimestamp := currentTsSeconds / int64(t.resetIntervalSeconds) + t.mut.RLock() + defer t.mut.RUnlock() return t.temporalCache[truncatedTimestamp] } From 67c1d1106129eabe70ce71596b5dfdb771602ca1 Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Sun, 28 Sep 2025 04:59:06 +0530 Subject: [PATCH 3/7] timeboundcounter separate file and unit tests Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/monitor.go | 62 +-------- op-conductor/health/timeboundcounter.go | 63 +++++++++ op-conductor/health/timeboundcounter_test.go | 136 +++++++++++++++++++ 3 files changed, 201 insertions(+), 60 deletions(-) create mode 100644 op-conductor/health/timeboundcounter.go create mode 100644 op-conductor/health/timeboundcounter_test.go diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index 19a9f4d2c9c3e..a77e8839573c4 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -50,7 +50,7 @@ func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interva safeEnabled: safeEnabled, safeInterval: safeInterval, minPeerCount: minPeerCount, - timeProviderFn: currentTimeProvicer, + timeProviderFn: currentTimeProvider, node: node, p2p: p2p, supervisor: supervisor, @@ -81,64 +81,6 @@ type ElP2pHealthMonitor struct { elP2pClient client.ElP2PClient } -// this is a type of counter which keeps on incrementing until its reset interval is hit -// this can be used to track time-based rate-limit, error counts, etc. -type timeBoundedRotatingCounter struct { - resetIntervalSeconds uint64 - maxValue uint64 - timeProviderFn func() uint64 - - mut *sync.RWMutex - temporalCache map[int64]uint64 -} - -func NewTimeBoundedRotatingCounter(resetIntervalSeconds, maxValue uint64) (*timeBoundedRotatingCounter, error) { - if resetIntervalSeconds == 0 { - panic("reset interval seconds must be more than 0") - } - return &timeBoundedRotatingCounter{ - resetIntervalSeconds: resetIntervalSeconds, - maxValue: maxValue, - mut: &sync.RWMutex{}, - temporalCache: map[int64]uint64{}, - timeProviderFn: currentTimeProvicer, - }, nil -} - -func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { - // let's take `resetIntervalSeconds` as 60s - // truncatedTimestamp is current timestamp rounded off by 60s (resetIntervalSeconds) - // thereby generating a value which stays same until the next 60s helping track and incrementing the counter corresponding to it for the next 60s - currentTsSeconds := t.timeProviderFn() - truncatedTimestamp := int64(currentTsSeconds / t.resetIntervalSeconds) - t.mut.Lock() - defer t.mut.Unlock() - - // a lazy cleanup subroutine to the clean the cache when it's grown enough, preventing memory leaks - defer func() { - if len(t.temporalCache) > 1000 { - newCache := map[int64]uint64{ - truncatedTimestamp: t.temporalCache[truncatedTimestamp], - } - t.temporalCache = newCache // garbage collector should take care of the old cache - } - }() - - if t.maxValue == 0 || t.temporalCache[truncatedTimestamp] < t.maxValue { - t.temporalCache[truncatedTimestamp]++ - return t.temporalCache[truncatedTimestamp], nil - } - return 0, fmt.Errorf("counter at its max value, please wait %ds for it to be reset", (t.resetIntervalSeconds - (currentTsSeconds % t.resetIntervalSeconds))) -} - -func (t *timeBoundedRotatingCounter) CurrentValue() uint64 { - currentTsSeconds := time.Now().Unix() - truncatedTimestamp := currentTsSeconds / int64(t.resetIntervalSeconds) - t.mut.RLock() - defer t.mut.RUnlock() - return t.temporalCache[truncatedTimestamp] -} - // SequencerHealthMonitor monitors sequencer health. type SequencerHealthMonitor struct { log log.Logger @@ -379,6 +321,6 @@ func calculateTimeDiff(now, then uint64) uint64 { return now - then } -func currentTimeProvicer() uint64 { +func currentTimeProvider() uint64 { return uint64(time.Now().Unix()) } diff --git a/op-conductor/health/timeboundcounter.go b/op-conductor/health/timeboundcounter.go new file mode 100644 index 0000000000000..b570b6cc7ae79 --- /dev/null +++ b/op-conductor/health/timeboundcounter.go @@ -0,0 +1,63 @@ +package health + +import ( + "fmt" + "sync" +) + +// this is a type of counter which keeps on incrementing until its reset interval is hit +// this can be used to track time-based rate-limit, error counts, etc. +type timeBoundedRotatingCounter struct { + resetIntervalSeconds uint64 + maxValue uint64 + timeProviderFn func() uint64 + + mut *sync.RWMutex + temporalCache map[int64]uint64 +} + +func NewTimeBoundedRotatingCounter(resetIntervalSeconds, maxValue uint64) (*timeBoundedRotatingCounter, error) { + if resetIntervalSeconds == 0 { + return nil, fmt.Errorf("reset interval seconds must be more than 0") + } + return &timeBoundedRotatingCounter{ + resetIntervalSeconds: resetIntervalSeconds, + maxValue: maxValue, + mut: &sync.RWMutex{}, + temporalCache: map[int64]uint64{}, + timeProviderFn: currentTimeProvider, + }, nil +} + +func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { + // let's take `resetIntervalSeconds` as 60s + // truncatedTimestamp is current timestamp rounded off by 60s (resetIntervalSeconds) + // thereby generating a value which stays same until the next 60s helping track and incrementing the counter corresponding to it for the next 60s + currentTsSeconds := t.timeProviderFn() + truncatedTimestamp := int64(currentTsSeconds / t.resetIntervalSeconds) + t.mut.Lock() + // a lazy cleanup subroutine to the clean the cache when it's grown enough, preventing memory leaks + defer func() { + defer t.mut.Unlock() + if len(t.temporalCache) > 1000 { + newCache := map[int64]uint64{ + truncatedTimestamp: t.temporalCache[truncatedTimestamp], + } + t.temporalCache = newCache // garbage collector should take care of the old cache + } + }() + + if t.maxValue == 0 || t.temporalCache[truncatedTimestamp] < t.maxValue { + t.temporalCache[truncatedTimestamp]++ + return t.temporalCache[truncatedTimestamp], nil + } + return t.maxValue, fmt.Errorf("counter at its max value, please wait %ds for it to be reset", (t.resetIntervalSeconds - (currentTsSeconds % t.resetIntervalSeconds))) +} + +func (t *timeBoundedRotatingCounter) CurrentValue() uint64 { + currentTsSeconds := t.timeProviderFn() + truncatedTimestamp := int64(currentTsSeconds / t.resetIntervalSeconds) + t.mut.RLock() + defer t.mut.RUnlock() + return t.temporalCache[truncatedTimestamp] +} diff --git a/op-conductor/health/timeboundcounter_test.go b/op-conductor/health/timeboundcounter_test.go new file mode 100644 index 0000000000000..a7d4a522cc1a7 --- /dev/null +++ b/op-conductor/health/timeboundcounter_test.go @@ -0,0 +1,136 @@ +package health + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestTimeBoundedRotatingCounterSetup(t *testing.T) { + t.Parallel() + t.Run("fail with 0 interval seconds value", func(t *testing.T) { + counter, err := NewTimeBoundedRotatingCounter(0, 0) + require.Error(t, err) + require.Nil(t, counter) + }) + + t.Run("succeed with 0 max value", func(t *testing.T) { + counter, err := NewTimeBoundedRotatingCounter(2, 0) + require.NoError(t, err) + require.NotNil(t, counter) + }) +} + +func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { + t.Parallel() + + mockTimeProvider := &timeProvider{now: 0} // every access to .Now() will increment its value simulating a one-second time passing + + maxValue, resetInterval := uint64(2), uint64(6) + counter, err := NewTimeBoundedRotatingCounter(resetInterval, maxValue) + require.NoError(t, err) + require.NotNil(t, counter) + counter.timeProviderFn = mockTimeProvider.Now + + require.Equal(t, int(mockTimeProvider.now), 0) + require.Equal(t, uint64(0), counter.CurrentValue()) + require.Equal(t, int(mockTimeProvider.now), 1) + + newValue, err := counter.Increment() + require.NoError(t, err) + require.Equal(t, uint64(1), newValue) + require.Equal(t, int(mockTimeProvider.now), 2) + require.Equal(t, uint64(1), counter.CurrentValue()) + require.Equal(t, int(mockTimeProvider.now), 3) + + newValue, err = counter.Increment() + require.NoError(t, err) + require.Equal(t, uint64(2), newValue) + require.Equal(t, int(mockTimeProvider.now), 4) + require.Equal(t, uint64(2), counter.CurrentValue()) + require.Equal(t, int(mockTimeProvider.now), 5) + + newValue, err = counter.Increment() + require.Error(t, err) + require.Equal(t, uint64(2), newValue) + require.Equal(t, int(mockTimeProvider.now), 6) + require.Equal(t, uint64(0), counter.CurrentValue()) // the next second counter rotates returning 0 as the current value + require.Equal(t, int(mockTimeProvider.now), 7) + + newValue, err = counter.Increment() + require.NoError(t, err) + require.Equal(t, uint64(1), newValue) + require.Equal(t, int(mockTimeProvider.now), 8) + require.Equal(t, uint64(1), counter.CurrentValue()) + require.Equal(t, int(mockTimeProvider.now), 9) + + newValue, err = counter.Increment() + require.NoError(t, err) + require.Equal(t, uint64(2), newValue) + require.Equal(t, int(mockTimeProvider.now), 10) + require.Equal(t, uint64(2), counter.CurrentValue()) + require.Equal(t, int(mockTimeProvider.now), 11) + + newValue, err = counter.Increment() + require.Error(t, err) + require.Equal(t, uint64(2), newValue) + require.Equal(t, int(mockTimeProvider.now), 12) + require.Equal(t, uint64(0), counter.CurrentValue()) // the next second counter rotates returning 0 as the current value + require.Equal(t, int(mockTimeProvider.now), 13) + +} + +func TestTimeBoundedRotatingCounterConcurrentAccess(t *testing.T) { + mockTimeProvider := &timeProvider{now: 0} + + counter, err := NewTimeBoundedRotatingCounter(1, 9) + require.NoError(t, err) + require.NotNil(t, counter) + counter.timeProviderFn = mockTimeProvider.Now + + wg := &sync.WaitGroup{} + wg.Add(2000) + + write := func() { + defer wg.Done() + defer wg.Done() + _, err := counter.Increment() + require.NoError(t, err) // considering the max value is 9, the increment should never fail + } + read := func() { + defer wg.Done() + counter.CurrentValue() + } + require.NotPanics(t, func() { + for i := 0; i < 1000; i++ { + go write() + go read() + } + wg.Wait() + }) +} + +func TestTimeBoundedRotatingCounterLazyCleanup(t *testing.T) { + mockTimeProvider := &timeProvider{now: 0} + + // a counter with a reset interval of 2 ensuring every two-seconds the counter's cache would track a new key:value + // we'll trigger the 2-second increment by calling .Increment() and .CurrentValue() because both under the hood, would call .Now() of the mockTimeProvider + counter, err := NewTimeBoundedRotatingCounter(2, 9) + require.NoError(t, err) + require.NotNil(t, counter) + counter.timeProviderFn = mockTimeProvider.Now + + for i := 0; i < 1000; i++ { + _, err := counter.Increment() // trigger a 1-second time increase + require.NoError(t, err) + counter.CurrentValue() // trigger another 1-second time increase, causing the counter interval to reset ensuring next Increment would write a new key in the cache + } + + require.Equal(t, 1000, len(counter.temporalCache)) + + // 1001th increment should trigger the lazy cleanup this time + _, err = counter.Increment() + require.NoError(t, err) + require.Equal(t, 1, len(counter.temporalCache)) +} From 6b712d01fac1a18b74f89841fee1af6581a401d9 Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Sun, 28 Sep 2025 05:02:49 +0530 Subject: [PATCH 4/7] comment about the concurrent access bad path testing Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/timeboundcounter_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/op-conductor/health/timeboundcounter_test.go b/op-conductor/health/timeboundcounter_test.go index a7d4a522cc1a7..c7b24f6026af7 100644 --- a/op-conductor/health/timeboundcounter_test.go +++ b/op-conductor/health/timeboundcounter_test.go @@ -81,6 +81,8 @@ func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { } +// To test the bad path: comment out mut.RLock() and mut.RUnlock() in the CurrentValue() method, and run this test again +// you'll see a "fatal error: concurrent map read and map write" func TestTimeBoundedRotatingCounterConcurrentAccess(t *testing.T) { mockTimeProvider := &timeProvider{now: 0} From 12ffb727b047f57fac7416f7331fd208e0ccfb54 Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Tue, 30 Sep 2025 13:55:59 +0530 Subject: [PATCH 5/7] nit Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/timeboundcounter_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/op-conductor/health/timeboundcounter_test.go b/op-conductor/health/timeboundcounter_test.go index c7b24f6026af7..8876fe3ccb9ea 100644 --- a/op-conductor/health/timeboundcounter_test.go +++ b/op-conductor/health/timeboundcounter_test.go @@ -23,7 +23,6 @@ func TestTimeBoundedRotatingCounterSetup(t *testing.T) { } func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { - t.Parallel() mockTimeProvider := &timeProvider{now: 0} // every access to .Now() will increment its value simulating a one-second time passing @@ -95,7 +94,6 @@ func TestTimeBoundedRotatingCounterConcurrentAccess(t *testing.T) { wg.Add(2000) write := func() { - defer wg.Done() defer wg.Done() _, err := counter.Increment() require.NoError(t, err) // considering the max value is 9, the increment should never fail From 5b0dd6ef80515d0a42045a14af4152d9fb5ff7c8 Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Wed, 1 Oct 2025 00:21:57 +0530 Subject: [PATCH 6/7] shift the max value logic to health monitor Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/monitor.go | 25 ++++----- op-conductor/health/monitor_test.go | 55 +++++++++----------- op-conductor/health/timeboundcounter.go | 15 ++---- op-conductor/health/timeboundcounter_test.go | 45 +++++++--------- 4 files changed, 61 insertions(+), 79 deletions(-) diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index a77e8839573c4..6e898fa4c95b1 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -65,8 +65,9 @@ func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interva } } if rollupBoostToleratePartialHealthinessToleranceLimit != 0 { + hm.rollupBoostPartialHealthinessToleranceLimit = rollupBoostToleratePartialHealthinessToleranceLimit var err error - hm.timeTolerantRollupBoostPartialHealthinessMgr, err = NewTimeBoundedRotatingCounter(rollupBoostToleratePartialHealthinessToleranceIntervalSeconds, rollupBoostToleratePartialHealthinessToleranceLimit) + hm.rollupBoostPartialHealthinessToleranceCounter, err = NewTimeBoundedRotatingCounter(rollupBoostToleratePartialHealthinessToleranceIntervalSeconds) if err != nil { panic(fmt.Errorf("failed to setup health monitor: %w", err)) } @@ -100,12 +101,13 @@ type SequencerHealthMonitor struct { timeProviderFn func() uint64 - node dial.RollupClientInterface - p2p apis.P2PClient - supervisor SupervisorHealthAPI - rb client.RollupBoostClient - elP2p *ElP2pHealthMonitor - timeTolerantRollupBoostPartialHealthinessMgr *timeBoundedRotatingCounter + node dial.RollupClientInterface + p2p apis.P2PClient + supervisor SupervisorHealthAPI + rb client.RollupBoostClient + elP2p *ElP2pHealthMonitor + rollupBoostPartialHealthinessToleranceLimit uint64 + rollupBoostPartialHealthinessToleranceCounter *timeBoundedRotatingCounter } var _ HealthMonitor = (*SequencerHealthMonitor)(nil) @@ -296,11 +298,10 @@ func (hm *SequencerHealthMonitor) checkRollupBoost(ctx context.Context) error { case client.HealthStatusHealthy: return nil case client.HealthStatusPartial: - if hm.timeTolerantRollupBoostPartialHealthinessMgr != nil { - if _, err := hm.timeTolerantRollupBoostPartialHealthinessMgr.Increment(); err == nil { - hm.log.Warn("[Tolerating Failure] Rollup boost is partial failure, builder is down but fallback execution client is up", "err", ErrRollupBoostPartiallyHealthy) - return nil - } + if hm.rollupBoostPartialHealthinessToleranceCounter != nil && hm.rollupBoostPartialHealthinessToleranceCounter.CurrentValue() < hm.rollupBoostPartialHealthinessToleranceLimit { + latestValue := hm.rollupBoostPartialHealthinessToleranceCounter.Increment() + hm.log.Debug("Rollup-boost partial unhealthiness failure tolerated", "currentValue", latestValue, "limit", hm.rollupBoostPartialHealthinessToleranceLimit) + return nil } hm.log.Error("Rollup boost is partial failure, builder is down but fallback execution client is up", "err", ErrRollupBoostPartiallyHealthy) return ErrRollupBoostPartiallyHealthy diff --git a/op-conductor/health/monitor_test.go b/op-conductor/health/monitor_test.go index 7f2f99f7d3d45..af85b8ff4270c 100644 --- a/op-conductor/health/monitor_test.go +++ b/op-conductor/health/monitor_test.go @@ -473,57 +473,52 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatusWithTolerance() { rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) toleranceLimit := uint64(2) - toleranceIntervalSeconds := uint64(3) + toleranceIntervalSeconds := uint64(6) - timeBoundedRotatingCounter, err := NewTimeBoundedRotatingCounter(toleranceIntervalSeconds, toleranceLimit) + timeBoundedRotatingCounter, err := NewTimeBoundedRotatingCounter(toleranceIntervalSeconds) s.Nil(err) + tp := &timeProvider{now: 1758792282} + // Start monitor with all dependencies as well as tolerance of 2 rollup-boost partial unhealthiness per 3s period monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil, func(shm *SequencerHealthMonitor) { - tp := &timeProvider{now: 1758792282} timeBoundedRotatingCounter.timeProviderFn = tp.Now - // pollute the cache of timeBoundRotatingCounter with 999 elements so as to later test the lazy cleanup - // note: the 1000th element will be added by the first healthchecl run + // pollute the cache of timeBoundRotatingCounter with 998 elements so as to later test the lazy cleanup + // note: the 999th and 1000th element will be added by the first healthcheck run for i := 0; i < 999; i++ { timeBoundedRotatingCounter.temporalCache[int64(i)] = uint64(1) } - shm.timeTolerantRollupBoostPartialHealthinessMgr = timeBoundedRotatingCounter + shm.rollupBoostPartialHealthinessToleranceCounter = timeBoundedRotatingCounter + shm.rollupBoostPartialHealthinessToleranceLimit = toleranceLimit }) healthUpdateCh := monitor.Subscribe() - // first error is tolerated (time t+1) - healthStatus := <-healthUpdateCh - s.Nil(healthStatus) - s.Len(timeBoundedRotatingCounter.temporalCache, 1000) // lazy cleanup of the cache not done yet as it's within the bounds + s.Eventually(func() bool { + return len(timeBoundedRotatingCounter.temporalCache) == 1000 + }, time.Second*3, time.Second*1) - // second error is tolerated as well (time t+2) - healthStatus = <-healthUpdateCh - s.Nil(healthStatus) - s.Len(timeBoundedRotatingCounter.temporalCache, 1000) // no change of the cache until the next reset + firstHealthStatus := <-healthUpdateCh + secondHealthStatus := <-healthUpdateCh + thirdHealthStatus := <-healthUpdateCh - // third error isn't tolerated (time t+3) - healthFailure := <-healthUpdateCh - s.Equal(ErrRollupBoostPartiallyHealthy, healthFailure) + s.Nil(firstHealthStatus) + s.Nil(secondHealthStatus) + s.Equal(ErrRollupBoostPartiallyHealthy, thirdHealthStatus) - // by now, because of three healthchecks, three seconds have been simulated to pass (by the timeProviderFn) + tp.Now() // simulate another second passing + // by now, because of three healthchecks, six seconds (CurrentValue + Increment + CurrentValue + Increment + CurrentValue + tp.Now()) have been simulated to pass (by the timeProviderFn) // this should reset the time bound counter, thereby allowing partial unhealthiness failures to be tolerated again - // first error after the reset is tolerated (time t+4) - healthStatus = <-healthUpdateCh - s.Nil(healthStatus) - s.Len(timeBoundedRotatingCounter.temporalCache, 1) // lazy cleanup of the cache done and it's left with only the current value - - // second error after the reset is tolerated as well (time t+5) - healthStatus = <-healthUpdateCh - s.Nil(healthStatus) - s.Len(timeBoundedRotatingCounter.temporalCache, 1) // no change to the cache until the next reset + fourthHealthStatus := <-healthUpdateCh + fifthHealthStatus := <-healthUpdateCh + sixthHealthStatus := <-healthUpdateCh - // third error after the reset isn't tolerated (time t+6) - healthFailure = <-healthUpdateCh - s.Equal(ErrRollupBoostPartiallyHealthy, healthFailure) + s.Nil(fourthHealthStatus) + s.Nil(fifthHealthStatus) + s.Equal(ErrRollupBoostPartiallyHealthy, sixthHealthStatus) s.NoError(monitor.Stop()) } diff --git a/op-conductor/health/timeboundcounter.go b/op-conductor/health/timeboundcounter.go index b570b6cc7ae79..58918ead9116d 100644 --- a/op-conductor/health/timeboundcounter.go +++ b/op-conductor/health/timeboundcounter.go @@ -5,31 +5,29 @@ import ( "sync" ) -// this is a type of counter which keeps on incrementing until its reset interval is hit +// this is a type of counter which keeps on incrementing until its reset interval is hit after which it resets to 0 // this can be used to track time-based rate-limit, error counts, etc. type timeBoundedRotatingCounter struct { resetIntervalSeconds uint64 - maxValue uint64 timeProviderFn func() uint64 mut *sync.RWMutex temporalCache map[int64]uint64 } -func NewTimeBoundedRotatingCounter(resetIntervalSeconds, maxValue uint64) (*timeBoundedRotatingCounter, error) { +func NewTimeBoundedRotatingCounter(resetIntervalSeconds uint64) (*timeBoundedRotatingCounter, error) { if resetIntervalSeconds == 0 { return nil, fmt.Errorf("reset interval seconds must be more than 0") } return &timeBoundedRotatingCounter{ resetIntervalSeconds: resetIntervalSeconds, - maxValue: maxValue, mut: &sync.RWMutex{}, temporalCache: map[int64]uint64{}, timeProviderFn: currentTimeProvider, }, nil } -func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { +func (t *timeBoundedRotatingCounter) Increment() uint64 { // let's take `resetIntervalSeconds` as 60s // truncatedTimestamp is current timestamp rounded off by 60s (resetIntervalSeconds) // thereby generating a value which stays same until the next 60s helping track and incrementing the counter corresponding to it for the next 60s @@ -47,11 +45,8 @@ func (t *timeBoundedRotatingCounter) Increment() (uint64, error) { } }() - if t.maxValue == 0 || t.temporalCache[truncatedTimestamp] < t.maxValue { - t.temporalCache[truncatedTimestamp]++ - return t.temporalCache[truncatedTimestamp], nil - } - return t.maxValue, fmt.Errorf("counter at its max value, please wait %ds for it to be reset", (t.resetIntervalSeconds - (currentTsSeconds % t.resetIntervalSeconds))) + t.temporalCache[truncatedTimestamp]++ + return t.temporalCache[truncatedTimestamp] } func (t *timeBoundedRotatingCounter) CurrentValue() uint64 { diff --git a/op-conductor/health/timeboundcounter_test.go b/op-conductor/health/timeboundcounter_test.go index 8876fe3ccb9ea..f27c128bf0d6b 100644 --- a/op-conductor/health/timeboundcounter_test.go +++ b/op-conductor/health/timeboundcounter_test.go @@ -10,13 +10,13 @@ import ( func TestTimeBoundedRotatingCounterSetup(t *testing.T) { t.Parallel() t.Run("fail with 0 interval seconds value", func(t *testing.T) { - counter, err := NewTimeBoundedRotatingCounter(0, 0) + counter, err := NewTimeBoundedRotatingCounter(0) require.Error(t, err) require.Nil(t, counter) }) - t.Run("succeed with 0 max value", func(t *testing.T) { - counter, err := NewTimeBoundedRotatingCounter(2, 0) + t.Run("succeed with non-zero interval seconds value", func(t *testing.T) { + counter, err := NewTimeBoundedRotatingCounter(2) require.NoError(t, err) require.NotNil(t, counter) }) @@ -26,8 +26,8 @@ func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { mockTimeProvider := &timeProvider{now: 0} // every access to .Now() will increment its value simulating a one-second time passing - maxValue, resetInterval := uint64(2), uint64(6) - counter, err := NewTimeBoundedRotatingCounter(resetInterval, maxValue) + resetInterval := uint64(6) + counter, err := NewTimeBoundedRotatingCounter(resetInterval) require.NoError(t, err) require.NotNil(t, counter) counter.timeProviderFn = mockTimeProvider.Now @@ -36,44 +36,38 @@ func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { require.Equal(t, uint64(0), counter.CurrentValue()) require.Equal(t, int(mockTimeProvider.now), 1) - newValue, err := counter.Increment() - require.NoError(t, err) + newValue := counter.Increment() require.Equal(t, uint64(1), newValue) require.Equal(t, int(mockTimeProvider.now), 2) require.Equal(t, uint64(1), counter.CurrentValue()) require.Equal(t, int(mockTimeProvider.now), 3) - newValue, err = counter.Increment() - require.NoError(t, err) + newValue = counter.Increment() require.Equal(t, uint64(2), newValue) require.Equal(t, int(mockTimeProvider.now), 4) require.Equal(t, uint64(2), counter.CurrentValue()) require.Equal(t, int(mockTimeProvider.now), 5) - newValue, err = counter.Increment() - require.Error(t, err) - require.Equal(t, uint64(2), newValue) + newValue = counter.Increment() + require.Equal(t, uint64(3), newValue) require.Equal(t, int(mockTimeProvider.now), 6) require.Equal(t, uint64(0), counter.CurrentValue()) // the next second counter rotates returning 0 as the current value require.Equal(t, int(mockTimeProvider.now), 7) - newValue, err = counter.Increment() - require.NoError(t, err) + newValue = counter.Increment() require.Equal(t, uint64(1), newValue) require.Equal(t, int(mockTimeProvider.now), 8) require.Equal(t, uint64(1), counter.CurrentValue()) require.Equal(t, int(mockTimeProvider.now), 9) - newValue, err = counter.Increment() - require.NoError(t, err) + newValue = counter.Increment() require.Equal(t, uint64(2), newValue) require.Equal(t, int(mockTimeProvider.now), 10) require.Equal(t, uint64(2), counter.CurrentValue()) require.Equal(t, int(mockTimeProvider.now), 11) - newValue, err = counter.Increment() - require.Error(t, err) - require.Equal(t, uint64(2), newValue) + newValue = counter.Increment() + require.Equal(t, uint64(3), newValue) require.Equal(t, int(mockTimeProvider.now), 12) require.Equal(t, uint64(0), counter.CurrentValue()) // the next second counter rotates returning 0 as the current value require.Equal(t, int(mockTimeProvider.now), 13) @@ -85,7 +79,7 @@ func TestTimeBoundedRotatingCounterIncrement(t *testing.T) { func TestTimeBoundedRotatingCounterConcurrentAccess(t *testing.T) { mockTimeProvider := &timeProvider{now: 0} - counter, err := NewTimeBoundedRotatingCounter(1, 9) + counter, err := NewTimeBoundedRotatingCounter(1) require.NoError(t, err) require.NotNil(t, counter) counter.timeProviderFn = mockTimeProvider.Now @@ -95,8 +89,7 @@ func TestTimeBoundedRotatingCounterConcurrentAccess(t *testing.T) { write := func() { defer wg.Done() - _, err := counter.Increment() - require.NoError(t, err) // considering the max value is 9, the increment should never fail + counter.Increment() } read := func() { defer wg.Done() @@ -116,21 +109,19 @@ func TestTimeBoundedRotatingCounterLazyCleanup(t *testing.T) { // a counter with a reset interval of 2 ensuring every two-seconds the counter's cache would track a new key:value // we'll trigger the 2-second increment by calling .Increment() and .CurrentValue() because both under the hood, would call .Now() of the mockTimeProvider - counter, err := NewTimeBoundedRotatingCounter(2, 9) + counter, err := NewTimeBoundedRotatingCounter(2) require.NoError(t, err) require.NotNil(t, counter) counter.timeProviderFn = mockTimeProvider.Now for i := 0; i < 1000; i++ { - _, err := counter.Increment() // trigger a 1-second time increase - require.NoError(t, err) + counter.Increment() // trigger a 1-second time increase counter.CurrentValue() // trigger another 1-second time increase, causing the counter interval to reset ensuring next Increment would write a new key in the cache } require.Equal(t, 1000, len(counter.temporalCache)) // 1001th increment should trigger the lazy cleanup this time - _, err = counter.Increment() - require.NoError(t, err) + counter.Increment() require.Equal(t, 1, len(counter.temporalCache)) } From bc0cdb8a08d3bb50688028ea54ecffc0468ee52a Mon Sep 17 00:00:00 2001 From: Yashvardhan Kukreja Date: Mon, 6 Oct 2025 15:58:10 +0530 Subject: [PATCH 7/7] nit Signed-off-by: Yashvardhan Kukreja --- op-conductor/health/monitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index 6e898fa4c95b1..dc5bd88fbd4b2 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -300,7 +300,7 @@ func (hm *SequencerHealthMonitor) checkRollupBoost(ctx context.Context) error { case client.HealthStatusPartial: if hm.rollupBoostPartialHealthinessToleranceCounter != nil && hm.rollupBoostPartialHealthinessToleranceCounter.CurrentValue() < hm.rollupBoostPartialHealthinessToleranceLimit { latestValue := hm.rollupBoostPartialHealthinessToleranceCounter.Increment() - hm.log.Debug("Rollup-boost partial unhealthiness failure tolerated", "currentValue", latestValue, "limit", hm.rollupBoostPartialHealthinessToleranceLimit) + hm.log.Debug("Rollup boost partial unhealthiness failure tolerated", "currentValue", latestValue, "limit", hm.rollupBoostPartialHealthinessToleranceLimit) return nil } hm.log.Error("Rollup boost is partial failure, builder is down but fallback execution client is up", "err", ErrRollupBoostPartiallyHealthy)