diff --git a/op-conductor/client/mocks/RollupBoostClient.go b/op-conductor/client/mocks/RollupBoostClient.go deleted file mode 100644 index 053e8c18346..00000000000 --- a/op-conductor/client/mocks/RollupBoostClient.go +++ /dev/null @@ -1,88 +0,0 @@ -// Code generated by mockery v2.x.x. DO NOT EDIT. - -package mocks - -import ( - context "context" - - client "github.com/ethereum-optimism/optimism/op-conductor/client" - mock "github.com/stretchr/testify/mock" -) - -// RollupBoostClient is an autogenerated mock type for the RollupBoostClient type -type RollupBoostClient struct { - mock.Mock -} - -// RollupBoostClient_Expecter is a helper object that allows for easy setup of method expectations -type RollupBoostClient_Expecter struct { - mock *mock.Mock -} - -// Expect returns an expecter for RollupBoostClient -func (_m *RollupBoostClient) EXPECT() *RollupBoostClient_Expecter { - return &RollupBoostClient_Expecter{mock: &_m.Mock} -} - -// Healthcheck provides a mock function with given fields: ctx -func (_m *RollupBoostClient) Healthcheck(ctx context.Context) (client.HealthStatus, error) { - ret := _m.Called(ctx) - - var r0 client.HealthStatus - if rf, ok := ret.Get(0).(func(context.Context) client.HealthStatus); ok { - r0 = rf(ctx) - } else { - r0 = ret.Get(0).(client.HealthStatus) - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context) error); ok { - r1 = rf(ctx) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RollupBoostClient_Healthcheck_Call is a *mock.Call that shadows Run/Return methods with type explicit versions for method 'Healthcheck' -type RollupBoostClient_Healthcheck_Call struct { - *mock.Call -} - -// Healthcheck is a helper method to define mock.On call -func (_e *RollupBoostClient_Expecter) Healthcheck(ctx interface{}) *RollupBoostClient_Healthcheck_Call { - return &RollupBoostClient_Healthcheck_Call{Call: _e.mock.On("Healthcheck", ctx)} -} - -func (_c *RollupBoostClient_Healthcheck_Call) Run(run func(ctx context.Context)) *RollupBoostClient_Healthcheck_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context)) - }) - return _c -} - -func (_c *RollupBoostClient_Healthcheck_Call) Return(status client.HealthStatus, err error) *RollupBoostClient_Healthcheck_Call { - _c.Call.Return(status, err) - return _c -} - -func (_c *RollupBoostClient_Healthcheck_Call) RunAndReturn(run func(context.Context) (client.HealthStatus, error)) *RollupBoostClient_Healthcheck_Call { - _c.Call.Return(run) - return _c -} - -type mockConstructorTestingTNewRollupBoostClient interface { - mock.TestingT - Cleanup(func()) -} - -// NewRollupBoostClient creates a new instance of RollupBoostClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -func NewRollupBoostClient(t mockConstructorTestingTNewRollupBoostClient) *RollupBoostClient { - mock := &RollupBoostClient{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/op-conductor/client/mocks/RollupBoostHealthChecker.go b/op-conductor/client/mocks/RollupBoostHealthChecker.go new file mode 100644 index 00000000000..d57b6f94630 --- /dev/null +++ b/op-conductor/client/mocks/RollupBoostHealthChecker.go @@ -0,0 +1,89 @@ +// Code generated by mockery v2.x.x. DO NOT EDIT. + +package mocks + +import ( + context "context" + + client "github.com/ethereum-optimism/optimism/op-conductor/client" + mock "github.com/stretchr/testify/mock" +) + +// RollupBoostHealthChecker is an autogenerated mock type for the RollupBoostHealthChecker type +type RollupBoostHealthChecker struct { + mock.Mock +} + +// RollupBoostHealthChecker_Expecter is a helper object that allows for easy setup of method expectations +type RollupBoostHealthChecker_Expecter struct { + mock *mock.Mock +} + +// Expect returns an expecter for RollupBoostHealthChecker +func (_m *RollupBoostHealthChecker) EXPECT() *RollupBoostHealthChecker_Expecter { + return &RollupBoostHealthChecker_Expecter{mock: &_m.Mock} +} + +// Healthcheck provides a mock function with given fields: ctx +func (_m *RollupBoostHealthChecker) Healthcheck(ctx context.Context) (client.HealthStatus, error) { + ret := _m.Called(ctx) + + var r0 client.HealthStatus + if rf, ok := ret.Get(0).(func(context.Context) client.HealthStatus); ok { + r0 = rf(ctx) + } else { + r0 = ret.Get(0).(client.HealthStatus) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// RollupBoostHealthChecker_Healthcheck_Call is a *mock.Call that shadows Run/Return methods with type explicit versions for method 'Healthcheck' +type RollupBoostHealthChecker_Healthcheck_Call struct { + *mock.Call +} + +// Healthcheck is a helper method to define mock.On call +func (_e *RollupBoostHealthChecker_Expecter) Healthcheck(ctx interface{}) *RollupBoostHealthChecker_Healthcheck_Call { + return &RollupBoostHealthChecker_Healthcheck_Call{Call: _e.mock.On("Healthcheck", ctx)} +} + +func (_c *RollupBoostHealthChecker_Healthcheck_Call) Run(run func(ctx context.Context)) *RollupBoostHealthChecker_Healthcheck_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *RollupBoostHealthChecker_Healthcheck_Call) Return(status client.HealthStatus, err error) *RollupBoostHealthChecker_Healthcheck_Call { + _c.Call.Return(status, err) + return _c +} + +func (_c *RollupBoostHealthChecker_Healthcheck_Call) RunAndReturn(run func(context.Context) (client.HealthStatus, error)) *RollupBoostHealthChecker_Healthcheck_Call { + _c.Call.Return(run) + return _c +} + +type mockConstructorTestingTNewRollupBoostHealthChecker interface { + mock.TestingT + Cleanup(func()) +} + +// NewRollupBoostHealthChecker creates a new instance of RollupBoostHealthChecker. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewRollupBoostHealthChecker(t mockConstructorTestingTNewRollupBoostHealthChecker) *RollupBoostHealthChecker { + mock := &RollupBoostHealthChecker{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + diff --git a/op-conductor/client/rollup_boost.go b/op-conductor/client/rollupboost.go similarity index 61% rename from op-conductor/client/rollup_boost.go rename to op-conductor/client/rollupboost.go index 32d30f46f13..2de606f5069 100644 --- a/op-conductor/client/rollup_boost.go +++ b/op-conductor/client/rollupboost.go @@ -12,6 +12,7 @@ const ( HealthzEndpoint = "/healthz" ) +// HealthStatus represents the health state of rollup-boost. type HealthStatus string const ( @@ -20,26 +21,32 @@ const ( HealthStatusUnhealthy HealthStatus = "unhealthy" ) -type RollupBoostClient interface { +// RollupBoostHealthChecker is the common interface for rollup-boost health checking. +// Both RollupBoostClient and RollupBoostNextClient implement this interface. +type RollupBoostHealthChecker interface { Healthcheck(ctx context.Context) (HealthStatus, error) } -type rollupBoostClient struct { +// RollupBoostClient uses HTTP status codes to determine rollup-boost health. +type RollupBoostClient struct { baseURL string httpClient *http.Client } -func NewRollupBoostClient(baseURL string, httpClient *http.Client) RollupBoostClient { +// NewRollupBoostClient creates a client that interprets HTTP status codes for health. +func NewRollupBoostClient(baseURL string, httpClient *http.Client) *RollupBoostClient { if httpClient == nil { httpClient = http.DefaultClient } - return &rollupBoostClient{ + return &RollupBoostClient{ baseURL: baseURL, httpClient: httpClient, } } -func (c *rollupBoostClient) Healthcheck(ctx context.Context) (HealthStatus, error) { +// Healthcheck returns health status based on HTTP status codes: +// 200 OK = Healthy, 206 Partial Content = Partial, 503 Service Unavailable = Unhealthy +func (c *RollupBoostClient) Healthcheck(ctx context.Context) (HealthStatus, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+HealthzEndpoint, nil) if err != nil { return "", fmt.Errorf("failed to create request: %w", err) @@ -65,3 +72,6 @@ func (c *rollupBoostClient) Healthcheck(ctx context.Context) (HealthStatus, erro return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode) } } + +// Ensure RollupBoostClient implements RollupBoostHealthChecker +var _ RollupBoostHealthChecker = (*RollupBoostClient)(nil) diff --git a/op-conductor/client/rollupboost_next.go b/op-conductor/client/rollupboost_next.go new file mode 100644 index 00000000000..c68da339ba3 --- /dev/null +++ b/op-conductor/client/rollupboost_next.go @@ -0,0 +1,79 @@ +package client + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +// JSON API health status values returned by rollup-boost +const ( + jsonHealthStatusHealthy = "Healthy" + jsonHealthStatusPartial = "PartialContent" + jsonHealthStatusUnhealthy = "ServiceUnavailable" +) + +// RollupBoostNextClient retrieves rollup-boost health using the JSON-based healthcheck endpoint. +type RollupBoostNextClient struct { + url string + httpClient *http.Client +} + +// RollupBoostNextHealthResponse captures the JSON payload returned by the rollup-boost health endpoint. +type RollupBoostNextHealthResponse struct { + Version string `json:"version"` + RollupBoostHealth string `json:"rollup_boost_health"` +} + +// NewRollupBoostNextClient constructs a client for querying the rollup-boost health endpoint. +// The url parameter should be the full URL including path (e.g., "http://localhost:8080/healthz"). +func NewRollupBoostNextClient(url string, httpClient *http.Client) *RollupBoostNextClient { + if httpClient == nil { + httpClient = http.DefaultClient + } + return &RollupBoostNextClient{ + url: url, + httpClient: httpClient, + } +} + +// Healthcheck fetches the rollup-boost health endpoint and interprets the JSON payload. +func (c *RollupBoostNextClient) Healthcheck(ctx context.Context) (HealthStatus, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("failed to make request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var payload RollupBoostNextHealthResponse + // Limit response size to 1 MiB to prevent memory exhaustion from malicious servers + if err := json.NewDecoder(io.LimitReader(resp.Body, 1<<20)).Decode(&payload); err != nil { + return "", fmt.Errorf("failed to decode response: %w", err) + } + + // Map JSON API values to internal constants + switch payload.RollupBoostHealth { + case jsonHealthStatusHealthy: + return HealthStatusHealthy, nil + case jsonHealthStatusPartial: + return HealthStatusPartial, nil + case jsonHealthStatusUnhealthy: + return HealthStatusUnhealthy, nil + default: + return "", fmt.Errorf("unexpected rollup_boost_health: %q", payload.RollupBoostHealth) + } +} + +// Ensure RollupBoostNextClient implements RollupBoostHealthChecker +var _ RollupBoostHealthChecker = (*RollupBoostNextClient)(nil) diff --git a/op-conductor/client/rollupboost_next_test.go b/op-conductor/client/rollupboost_next_test.go new file mode 100644 index 00000000000..e33f2947748 --- /dev/null +++ b/op-conductor/client/rollupboost_next_test.go @@ -0,0 +1,106 @@ +package client + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRollupBoostNextHealthcheck(t *testing.T) { + testCases := []struct { + name string + response interface{} + statusCode int + wantStatus HealthStatus + wantErr string + }{ + { + name: "healthy", + response: RollupBoostNextHealthResponse{ + Version: "1.0.0", + RollupBoostHealth: "Healthy", // JSON API value + }, + statusCode: http.StatusOK, + wantStatus: HealthStatusHealthy, + }, + { + name: "partial", + response: RollupBoostNextHealthResponse{ + Version: "1.0.0", + RollupBoostHealth: "PartialContent", // JSON API value + }, + statusCode: http.StatusOK, + wantStatus: HealthStatusPartial, + }, + { + name: "unhealthy", + response: RollupBoostNextHealthResponse{ + Version: "1.0.0", + RollupBoostHealth: "ServiceUnavailable", // JSON API value + }, + statusCode: http.StatusOK, + wantStatus: HealthStatusUnhealthy, + }, + { + name: "unexpected status code", + response: RollupBoostNextHealthResponse{ + Version: "1.0.0", + RollupBoostHealth: "Healthy", // JSON API value + }, + statusCode: http.StatusAccepted, + wantErr: "unexpected status code: 202", + }, + { + name: "malformed json", + response: "{not-json", + statusCode: http.StatusOK, + wantErr: "failed to decode response", + }, + { + name: "unknown health", + response: RollupBoostNextHealthResponse{ + Version: "1.0.0", + RollupBoostHealth: "Unknown", + }, + statusCode: http.StatusOK, + wantErr: `unexpected rollup_boost_health: "Unknown"`, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, HealthzEndpoint, r.URL.Path) + w.WriteHeader(tc.statusCode) + + switch v := tc.response.(type) { + case string: + _, _ = w.Write([]byte(v)) + default: + require.NoError(t, json.NewEncoder(w).Encode(v)) + } + })) + defer server.Close() + + // Pass full URL including path + client := NewRollupBoostNextClient(server.URL+HealthzEndpoint, server.Client()) + status, err := client.Healthcheck(context.Background()) + + if tc.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.wantErr) + return + } + + require.NoError(t, err) + require.Equal(t, tc.wantStatus, status) + }) + } +} diff --git a/op-conductor/client/rollup_boost_test.go b/op-conductor/client/rollupboost_test.go similarity index 100% rename from op-conductor/client/rollup_boost_test.go rename to op-conductor/client/rollupboost_test.go diff --git a/op-conductor/conductor/config.go b/op-conductor/conductor/config.go index 90700b5a914..f7b45b6e6cc 100644 --- a/op-conductor/conductor/config.go +++ b/op-conductor/conductor/config.go @@ -66,12 +66,23 @@ type Config struct { // SupervisorRPC is the HTTP provider URL for supervisor. SupervisorRPC string - // RollupBoostEnabled is true if the rollup boost is enabled. + // RollupBoostEnabled enables the rollup-boost healthcheck (HTTP status codes). + // When enabled, healthchecks are performed against ExecutionRPC + "/healthz". + // The client internally appends the /healthz path to ExecutionRPC. RollupBoostEnabled bool - // RollupBoostHealthcheckTimeout is the timeout for rollup boost healthcheck. + // RollupBoostHealthcheckTimeout is the timeout for rollup-boost healthchecks (applies to both standard and next). RollupBoostHealthcheckTimeout time.Duration + // RollupBoostNextEnabled enables the next rollup-boost healthcheck (JSON-based). + // Requires RollupBoostNextHealthcheckURL to be set. + RollupBoostNextEnabled bool + + // RollupBoostNextHealthcheckURL is the full URL (including path) for the rollup-boost health endpoint. + // Must include the complete path (e.g., "http://localhost:8080/healthz"). + // Required when RollupBoostNextEnabled is true. + RollupBoostNextHealthcheckURL string + // Paused is true if the conductor should start in a paused state. Paused bool @@ -117,6 +128,12 @@ func (c *Config) Check() error { if c.ExecutionRPC == "" { return fmt.Errorf("missing geth RPC") } + if c.RollupBoostEnabled && c.RollupBoostNextEnabled { + return fmt.Errorf("only one of rollup-boost or rollup-boost next healthchecks can be enabled") + } + if c.RollupBoostNextEnabled && c.RollupBoostNextHealthcheckURL == "" { + return fmt.Errorf("missing rollup-boost next healthcheck URL") + } if err := c.HealthCheck.Check(); err != nil { return errors.Wrap(err, "invalid health check config") } @@ -154,7 +171,6 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) { if executionP2pCheckApi == "" { executionP2pCheckApi = "net" } - return &Config{ ConsensusAddr: ctx.String(flags.ConsensusAddr.Name), ConsensusPort: ctx.Int(flags.ConsensusPort.Name), @@ -174,6 +190,8 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) { SupervisorRPC: ctx.String(flags.SupervisorRPC.Name), RollupBoostEnabled: ctx.Bool(flags.RollupBoostEnabled.Name), RollupBoostHealthcheckTimeout: ctx.Duration(flags.RollupBoostHealthcheckTimeout.Name), + RollupBoostNextEnabled: ctx.Bool(flags.RollupBoostNextEnabled.Name), + RollupBoostNextHealthcheckURL: ctx.String(flags.RollupBoostNextHealthcheckURL.Name), Paused: ctx.Bool(flags.Paused.Name), HealthCheck: HealthCheckConfig{ Interval: ctx.Uint64(flags.HealthCheckInterval.Name), diff --git a/op-conductor/conductor/config_test.go b/op-conductor/conductor/config_test.go new file mode 100644 index 00000000000..a5744c3863a --- /dev/null +++ b/op-conductor/conductor/config_test.go @@ -0,0 +1,25 @@ +package conductor + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestConfigCheckRollupBoostAndNextMutuallyExclusive(t *testing.T) { + cfg := &Config{ + ConsensusAddr: "127.0.0.1", + ConsensusPort: 9000, + RaftServerID: "server-1", + RaftStorageDir: "/tmp/op-conductor", + NodeRPC: "http://node.example", + ExecutionRPC: "http://exec.example", + RollupBoostEnabled: true, + RollupBoostNextEnabled: true, + RollupBoostNextHealthcheckURL: "http://rollupboost.example", + } + + err := cfg.Check() + require.Error(t, err) + require.Contains(t, err.Error(), "only one of rollup-boost or rollup-boost next healthchecks can be enabled") +} diff --git a/op-conductor/conductor/service.go b/op-conductor/conductor/service.go index 33a23fea720..244a3ea5ecb 100644 --- a/op-conductor/conductor/service.go +++ b/op-conductor/conductor/service.go @@ -213,9 +213,14 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error { } node := sources.NewRollupClient(nc) - var rb client.RollupBoostClient + // Create rollup-boost health checker (either standard or next, mutually exclusive) + var rollupBoostHealthChecker client.RollupBoostHealthChecker if c.cfg.RollupBoostEnabled { - rb = client.NewRollupBoostClient(c.cfg.ExecutionRPC, &http.Client{ + rollupBoostHealthChecker = client.NewRollupBoostClient(c.cfg.ExecutionRPC, &http.Client{ + Timeout: c.cfg.RollupBoostHealthcheckTimeout, + }) + } else if c.cfg.RollupBoostNextEnabled { + rollupBoostHealthChecker = client.NewRollupBoostNextClient(c.cfg.RollupBoostNextHealthcheckURL, &http.Client{ Timeout: c.cfg.RollupBoostHealthcheckTimeout, }) } @@ -260,7 +265,7 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error { node, p2p, supervisor, - rb, + rollupBoostHealthChecker, elP2p, c.cfg.HealthCheck.ExecutionP2pMinPeerCount, c.cfg.HealthCheck.RollupBoostPartialHealthinessToleranceLimit, @@ -957,8 +962,8 @@ func (oc *OpConductor) shouldWaitForHealthRecovery() bool { return false } - // Don't wait if rollup boost is enabled and partially healthy - transfer leadership instead - if oc.cfg.RollupBoostEnabled && errors.Is(oc.hcerr, health.ErrRollupBoostPartiallyHealthy) { + // Don't wait if rollup boost healthcheck is enabled and partially healthy - transfer leadership instead + if (oc.cfg.RollupBoostEnabled || oc.cfg.RollupBoostNextEnabled) && errors.Is(oc.hcerr, health.ErrRollupBoostPartiallyHealthy) { return false } diff --git a/op-conductor/flags/flags.go b/op-conductor/flags/flags.go index 96e6eeafd51..2e4bfb04ac9 100644 --- a/op-conductor/flags/flags.go +++ b/op-conductor/flags/flags.go @@ -98,16 +98,27 @@ var ( } RollupBoostEnabled = &cli.BoolFlag{ Name: "rollup-boost.enabled", - Usage: "Should be set to true if execution.rpc points to a rollup boost instance, false otherwise. If true, rollup boost specific healthchecks will be performed against the rollup boost instance.", + Usage: "Enable the rollup-boost healthcheck that uses HTTP status codes (200/206/503). Healthchecks are performed against execution.rpc + '/healthz' (path appended automatically). Mutually exclusive with rollup-boost.next-enabled.", EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "ROLLUP_BOOST_ENABLED"), Value: false, } RollupBoostHealthcheckTimeout = &cli.DurationFlag{ Name: "rollup-boost.healthcheck-timeout", - Usage: "Timeout for rollup boost healthcheck", + Usage: "Timeout for rollup-boost healthchecks (applies to both standard and next)", EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "ROLLUP_BOOST_HEALTHCHECK_TIMEOUT"), Value: 5 * time.Second, } + RollupBoostNextEnabled = &cli.BoolFlag{ + Name: "rollup-boost.next-enabled", + Usage: "Enable rollup-boost healthcheck using JSON response parsing. Requires rollup-boost.next-healthcheck-url. Mutually exclusive with rollup-boost.enabled.", + EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "ROLLUP_BOOST_NEXT_ENABLED"), + Value: false, + } + RollupBoostNextHealthcheckURL = &cli.StringFlag{ + Name: "rollup-boost.next-healthcheck-url", + Usage: "Full URL including path for the rollup-boost health endpoint (e.g., 'http://localhost:8080/healthz'). Required when rollup-boost.next-enabled is true.", + EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "ROLLUP_BOOST_NEXT_HEALTHCHECK_URL"), + } HealthCheckInterval = &cli.Uint64Flag{ Name: "healthcheck.interval", Usage: "Interval between health checks", @@ -219,6 +230,8 @@ var optionalFlags = []cli.Flag{ SupervisorRPC, RollupBoostEnabled, RollupBoostHealthcheckTimeout, + RollupBoostNextEnabled, + RollupBoostNextHealthcheckURL, HealthcheckExecutionP2pEnabled, HealthcheckExecutionP2pMinPeerCount, HealthcheckExecutionP2pRPCUrl, diff --git a/op-conductor/health/monitor.go b/op-conductor/health/monitor.go index dc5bd88fbd4..67e568182b8 100644 --- a/op-conductor/health/monitor.go +++ b/op-conductor/health/monitor.go @@ -39,22 +39,23 @@ type HealthMonitor interface { // interval is the interval between health checks measured in seconds. // safeInterval is the interval between safe head progress measured in seconds. // minPeerCount is the minimum number of peers required for the sequencer to be healthy. -func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interval, unsafeInterval, safeInterval, minPeerCount uint64, safeEnabled bool, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p apis.P2PClient, supervisor SupervisorHealthAPI, rb client.RollupBoostClient, elP2pClient client.ElP2PClient, minElP2pPeers uint64, rollupBoostToleratePartialHealthinessToleranceLimit uint64, rollupBoostToleratePartialHealthinessToleranceIntervalSeconds uint64) HealthMonitor { +// rollupBoostHealthChecker is an optional health checker for rollup-boost (either standard or next client). +func NewSequencerHealthMonitor(log log.Logger, metrics metrics.Metricer, interval, unsafeInterval, safeInterval, minPeerCount uint64, safeEnabled bool, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p apis.P2PClient, supervisor SupervisorHealthAPI, rollupBoostHealthChecker client.RollupBoostHealthChecker, elP2pClient client.ElP2PClient, minElP2pPeers uint64, rollupBoostToleratePartialHealthinessToleranceLimit uint64, rollupBoostToleratePartialHealthinessToleranceIntervalSeconds uint64) HealthMonitor { hm := &SequencerHealthMonitor{ - log: log, - metrics: metrics, - interval: interval, - healthUpdateCh: make(chan error), - rollupCfg: rollupCfg, - unsafeInterval: unsafeInterval, - safeEnabled: safeEnabled, - safeInterval: safeInterval, - minPeerCount: minPeerCount, - timeProviderFn: currentTimeProvider, - node: node, - p2p: p2p, - supervisor: supervisor, - rb: rb, + log: log, + metrics: metrics, + interval: interval, + healthUpdateCh: make(chan error), + rollupCfg: rollupCfg, + unsafeInterval: unsafeInterval, + safeEnabled: safeEnabled, + safeInterval: safeInterval, + minPeerCount: minPeerCount, + timeProviderFn: currentTimeProvider, + node: node, + p2p: p2p, + supervisor: supervisor, + rollupBoostHealthChecker: rollupBoostHealthChecker, } if elP2pClient != nil { @@ -104,7 +105,7 @@ type SequencerHealthMonitor struct { node dial.RollupClientInterface p2p apis.P2PClient supervisor SupervisorHealthAPI - rb client.RollupBoostClient + rollupBoostHealthChecker client.RollupBoostHealthChecker elP2p *ElP2pHealthMonitor rollupBoostPartialHealthinessToleranceLimit uint64 rollupBoostPartialHealthinessToleranceCounter *timeBoundedRotatingCounter @@ -282,18 +283,22 @@ func (hm *SequencerHealthMonitor) checkNodePeerCount(ctx context.Context) error } func (hm *SequencerHealthMonitor) checkRollupBoost(ctx context.Context) error { - // Skip the check if rollup boost client is not configured - if hm.rb == nil { - hm.log.Info("rollup boost client is not configured, skipping health check") + // Skip the check if rollup boost health checker is not configured + if hm.rollupBoostHealthChecker == nil { + hm.log.Debug("rollup-boost health checker is not configured, skipping health check") return nil } - status, err := hm.rb.Healthcheck(ctx) + status, err := hm.rollupBoostHealthChecker.Healthcheck(ctx) if err != nil { - hm.log.Error("health monitor failed to get rollup boost status", "err", err) + hm.log.Error("health monitor failed to get rollup-boost status", "err", err) return ErrRollupBoostConnectionDown } + return hm.handleRollupBoostStatus(status) +} + +func (hm *SequencerHealthMonitor) handleRollupBoostStatus(status client.HealthStatus) error { switch status { case client.HealthStatusHealthy: return nil diff --git a/op-conductor/health/monitor_test.go b/op-conductor/health/monitor_test.go index af85b8ff427..a99047f58fe 100644 --- a/op-conductor/health/monitor_test.go +++ b/op-conductor/health/monitor_test.go @@ -99,12 +99,12 @@ func (s *HealthMonitorTestSuite) SetupMonitor( type monitorOpts func(*SequencerHealthMonitor) -// SetupMonitorWithRollupBoost creates a HealthMonitor that includes a RollupBoostClient +// SetupMonitorWithRollupBoost creates a HealthMonitor that includes a RollupBoostHealthChecker func (s *HealthMonitorTestSuite) SetupMonitorWithRollupBoost( now, unsafeInterval, safeInterval uint64, mockRollupClient *testutils.MockRollupClient, mockP2P *p2pMocks.API, - mockRollupBoost *clientmocks.RollupBoostClient, + mockRollupBoostHealthChecker *clientmocks.RollupBoostHealthChecker, elP2pClient client.ElP2PClient, opts ...monitorOpts, ) *SequencerHealthMonitor { @@ -130,8 +130,8 @@ func (s *HealthMonitorTestSuite) SetupMonitorWithRollupBoost( node: mockRollupClient, p2p: mockP2P, } - if mockRollupBoost != nil { - monitor.rb = mockRollupBoost + if mockRollupBoostHealthChecker != nil { + monitor.rollupBoostHealthChecker = mockRollupBoostHealthChecker } if elP2pClient != nil { monitor.elP2p = &ElP2pHealthMonitor{ @@ -359,26 +359,21 @@ func (s *HealthMonitorTestSuite) TestRollupBoostConnectionDown() { s.T().Parallel() now := uint64(time.Now().Unix()) - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) rc.ExpectSyncStatus(ss1, nil) - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup rollup boost connection failure - rb := &clientmocks.RollupBoostClient{} - rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatus(""), errors.New("connection refused")) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatus(""), errors.New("connection refused")) - // Start monitor with all dependencies - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil) + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, nil) - // Check for connection down error healthUpdateCh := monitor.Subscribe() healthFailure := <-healthUpdateCh s.Equal(ErrRollupBoostConnectionDown, healthFailure) @@ -390,26 +385,21 @@ func (s *HealthMonitorTestSuite) TestRollupBoostNotHealthy() { s.T().Parallel() now := uint64(time.Now().Unix()) - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) rc.ExpectSyncStatus(ss1, nil) - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup unhealthy rollup boost - rb := &clientmocks.RollupBoostClient{} - rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusUnhealthy, nil) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusUnhealthy, nil) - // Start monitor with all dependencies - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil) + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, nil) - // Check for unhealthy status healthUpdateCh := monitor.Subscribe() healthFailure := <-healthUpdateCh s.Equal(ErrRollupBoostNotHealthy, healthFailure) @@ -421,26 +411,21 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatus() { s.T().Parallel() now := uint64(time.Now().Unix()) - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) rc.ExpectSyncStatus(ss1, nil) - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup partial rollup boost status (treated as unhealthy) - rb := &clientmocks.RollupBoostClient{} - rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) - // Start monitor with all dependencies - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil) + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, nil) - // Check for unhealthy status healthUpdateCh := monitor.Subscribe() healthFailure := <-healthUpdateCh s.Equal(ErrRollupBoostPartiallyHealthy, healthFailure) @@ -452,25 +437,21 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatusWithTolerance() { s.T().Parallel() now := uint64(time.Now().Unix()) - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) - // because 6 healthchecks are going to be expected cause 6 calls of sync status for i := 0; i < 6; i++ { rc.ExpectSyncStatus(ss1, nil) } - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup partial rollup boost status (treated as unhealthy) - rb := &clientmocks.RollupBoostClient{} - rb.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).Return(client.HealthStatusPartial, nil) toleranceLimit := uint64(2) toleranceIntervalSeconds := uint64(6) @@ -480,12 +461,9 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatusWithTolerance() { tp := &timeProvider{now: 1758792282} - // Start monitor with all dependencies as well as tolerance of 2 rollup-boost partial unhealthiness per 3s period - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil, func(shm *SequencerHealthMonitor) { + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, nil, func(shm *SequencerHealthMonitor) { timeBoundedRotatingCounter.timeProviderFn = tp.Now - // pollute the cache of timeBoundRotatingCounter with 998 elements so as to later test the lazy cleanup - // note: the 999th and 1000th element will be added by the first healthcheck run for i := 0; i < 999; i++ { timeBoundedRotatingCounter.temporalCache[int64(i)] = uint64(1) } @@ -508,9 +486,7 @@ func (s *HealthMonitorTestSuite) TestRollupBoostPartialStatusWithTolerance() { s.Nil(secondHealthStatus) s.Equal(ErrRollupBoostPartiallyHealthy, thirdHealthStatus) - tp.Now() // simulate another second passing - // by now, because of three healthchecks, six seconds (CurrentValue + Increment + CurrentValue + Increment + CurrentValue + tp.Now()) have been simulated to pass (by the timeProviderFn) - // this should reset the time bound counter, thereby allowing partial unhealthiness failures to be tolerated again + tp.Now() fourthHealthStatus := <-healthUpdateCh fifthHealthStatus := <-healthUpdateCh @@ -528,7 +504,6 @@ func (s *HealthMonitorTestSuite) TestRollupBoostHealthy() { now := uint64(time.Now().Unix()) numSecondsToWait := interval + 1 - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) @@ -536,22 +511,17 @@ func (s *HealthMonitorTestSuite) TestRollupBoostHealthy() { rc.ExpectSyncStatus(ss1, nil) } - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup healthy rollup boost - rb := &clientmocks.RollupBoostClient{} - // // Wait for longer than healthcheck interval before returning healthy status, to verify nothing breaks if rb is slow to respond - rb.EXPECT().Healthcheck(mock.Anything).After(time.Duration(numSecondsToWait)*time.Second).Return(client.HealthStatusHealthy, nil) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).After(time.Duration(numSecondsToWait)*time.Second).Return(client.HealthStatusHealthy, nil) - // Start monitor with all dependencies - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, nil) + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, nil) - // Should report healthy status healthUpdateCh := monitor.Subscribe() healthStatus := <-healthUpdateCh s.Nil(healthStatus) @@ -563,43 +533,22 @@ func (s *HealthMonitorTestSuite) TestRollupBoostNilClient() { s.T().Parallel() now := uint64(time.Now().Unix()) - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) rc.ExpectSyncStatus(ss1, nil) - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Explicitly create a monitor with all other components but nil rollup boost client - tp := &timeProvider{now: now} - monitor := &SequencerHealthMonitor{ - log: s.log, - interval: s.interval, - metrics: &metrics.NoopMetricsImpl{}, - healthUpdateCh: make(chan error), - rollupCfg: s.rollupCfg, - unsafeInterval: 60, - safeInterval: 60, - safeEnabled: true, - minPeerCount: s.minPeerCount, - timeProviderFn: tp.Now, - node: rc, - p2p: pc, - rb: nil, // Explicitly set to nil - } - - err := monitor.Start(context.Background()) - s.NoError(err) + // No rollup boost health checker configured + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, nil, nil) - // Health check should succeed even with nil rb healthUpdateCh := monitor.Subscribe() healthStatus := <-healthUpdateCh - s.Nil(healthStatus, "Health check should succeed with nil rollup boost client") + s.Nil(healthStatus, "Health check should succeed with nil rollup boost health checker") s.NoError(monitor.Stop()) } @@ -609,7 +558,6 @@ func (s *HealthMonitorTestSuite) TestElP2pHealthy() { now := uint64(time.Now().Unix()) numSecondsToWait := interval + 1 - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) @@ -617,26 +565,20 @@ func (s *HealthMonitorTestSuite) TestElP2pHealthy() { rc.ExpectSyncStatus(ss1, nil) } - // Setup healthy rollup boost - rb := &clientmocks.RollupBoostClient{} - // // Wait for longer than healthcheck interval before returning healthy status, to verify nothing breaks if rb is slow to respond - rb.EXPECT().Healthcheck(mock.Anything).After(time.Duration(numSecondsToWait)*time.Second).Return(client.HealthStatusHealthy, nil) + rbChecker := &clientmocks.RollupBoostHealthChecker{} + rbChecker.EXPECT().Healthcheck(mock.Anything).After(time.Duration(numSecondsToWait)*time.Second).Return(client.HealthStatusHealthy, nil) - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Setup healthy el p2p elP2pClient := &clientmocks.ElP2PClient{} elP2pClient.EXPECT().PeerCount(mock.Anything).Return(healthyElP2pPeerCount, nil) - // Start monitor with all dependencies - monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rb, elP2pClient) + monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, rbChecker, elP2pClient) - // Should report healthy status healthUpdateCh := monitor.Subscribe() healthStatus := <-healthUpdateCh s.Nil(healthStatus) @@ -649,7 +591,6 @@ func (s *HealthMonitorTestSuite) TestElP2pHealthyNilClient() { now := uint64(time.Now().Unix()) numSecondsToWait := interval + 1 - // Setup healthy node conditions rc := &testutils.MockRollupClient{} ss1 := mockSyncStatus(now-1, 1, now-3, 0) @@ -657,17 +598,14 @@ func (s *HealthMonitorTestSuite) TestElP2pHealthyNilClient() { rc.ExpectSyncStatus(ss1, nil) } - // Setup healthy peer count pc := &p2pMocks.API{} ps1 := &p2p.PeerStats{ Connected: healthyPeerCount, } pc.EXPECT().PeerStats(mock.Anything).Return(ps1, nil) - // Start monitor with all dependencies monitor := s.SetupMonitorWithRollupBoost(now, 60, 60, rc, pc, nil, nil) - // Should report healthy status healthUpdateCh := monitor.Subscribe() healthStatus := <-healthUpdateCh s.Nil(healthStatus)