Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0178776
feat(reexecution/c): decouple metrics server and collector
RodrigoVillar Oct 12, 2025
4ad3f2b
Merge branch 'master' into rodrigo/decouple-reexecution-metrics
RodrigoVillar Oct 13, 2025
fc83c89
Merge branch 'master' into rodrigo/decouple-reexecution-metrics
RodrigoVillar Oct 13, 2025
848c6ad
docs: improve collectRegistry
RodrigoVillar Oct 13, 2025
be761ac
chore: set default to empty string
RodrigoVillar Oct 13, 2025
ca0b993
docs: benchmark script
RodrigoVillar Oct 13, 2025
a7cb056
chore: simplify metricsMode
RodrigoVillar Oct 13, 2025
d36d4ca
Merge branch 'master' into rodrigo/decouple-reexecution-metrics
RodrigoVillar Oct 13, 2025
c7f3185
chore: unexport metricsMode
RodrigoVillar Oct 13, 2025
9e319d0
chore: clean up
RodrigoVillar Oct 13, 2025
2cc0a79
chore: self-review nits
RodrigoVillar Oct 14, 2025
feb1681
feat(reexecute/c): explicit metrics port
RodrigoVillar Oct 14, 2025
c7c83bb
Merge branch 'master' into rodrigo/support-explicit-metrics-port
RodrigoVillar Oct 20, 2025
a5d4392
chore: nits
RodrigoVillar Oct 20, 2025
4ff8126
Merge branch 'master' into rodrigo/support-explicit-metrics-port
RodrigoVillar Oct 20, 2025
c67da7f
chore: nits
RodrigoVillar Oct 20, 2025
f5e2e14
chore: unexport consts
RodrigoVillar Oct 20, 2025
d871c75
chore: README
RodrigoVillar Oct 20, 2025
540d60f
chore: nit
RodrigoVillar Oct 20, 2025
bca6d48
chore: nit
RodrigoVillar Oct 20, 2025
e381c2c
Merge branch 'master' into rodrigo/support-explicit-metrics-port
RodrigoVillar Oct 20, 2025
e5283df
Merge branch 'master' into rodrigo/support-explicit-metrics-port
RodrigoVillar Oct 28, 2025
ac02cbf
docs: func name
RodrigoVillar Oct 28, 2025
4654206
chore: unify server flags
RodrigoVillar Oct 28, 2025
d18bbb8
chore: nit
RodrigoVillar Oct 28, 2025
df721f1
fix: quote
RodrigoVillar Oct 28, 2025
527b652
feat: implicitly enable metrics server if collector is enabled
RodrigoVillar Oct 28, 2025
f2fc794
chore: nits
RodrigoVillar Oct 28, 2025
f156a76
docs: README
RodrigoVillar Oct 28, 2025
5bb04be
chore: nit
RodrigoVillar Oct 28, 2025
d0b0083
docs: flag usage
RodrigoVillar Oct 28, 2025
6796474
Merge branch 'master' into rodrigo/support-explicit-metrics-port
RodrigoVillar Oct 28, 2025
830e8c1
chore: nits
RodrigoVillar Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/actions/c-chain-reexecution-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ runs:
LABELS=${{ env.LABELS }} \
BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \
RUNNER_NAME=${{ inputs.runner_name }} \
METRICS_SERVER_ENABLED=true \
METRICS_COLLECTOR_ENABLED=true
prometheus_url: ${{ inputs.prometheus-url }}
prometheus_push_url: ${{ inputs.prometheus-push-url }}
Expand Down
8 changes: 4 additions & 4 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ tasks:
END_BLOCK: '{{.END_BLOCK}}'
LABELS: '{{.LABELS | default ""}}'
BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}'
METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}'
METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}'
METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}'
cmd: |
CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \
Expand All @@ -218,7 +218,7 @@ tasks:
END_BLOCK={{.END_BLOCK}} \
LABELS={{.LABELS}} \
BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \
METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \
METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \
METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \
bash -x ./scripts/benchmark_cchain_range.sh

Expand All @@ -234,7 +234,7 @@ tasks:
END_BLOCK: '{{.END_BLOCK | default "250000"}}'
LABELS: '{{.LABELS | default ""}}'
BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}'
METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}'
METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}'
METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}'
cmds:
- task: import-cchain-reexecute-range
Expand All @@ -252,7 +252,7 @@ tasks:
END_BLOCK: '{{.END_BLOCK}}'
LABELS: '{{.LABELS}}'
BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}'
METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}'
METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}'
METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}'

test-bootstrap-monitor-e2e:
Expand Down
4 changes: 2 additions & 2 deletions scripts/benchmark_cchain_range.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -euo pipefail
# END_BLOCK: The ending block height (inclusive).
# LABELS (optional): Comma-separated key=value pairs for metric labels.
# BENCHMARK_OUTPUT_FILE (optional): If set, benchmark output is also written to this file.
# METRICS_SERVER_ENABLED (optional): If set, enables the metrics server.
# METRICS_SERVER_PORT (optional): If set, determines the port the metrics server will listen to.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to see METRICS_SERVER_PORT be in addition to METRICS_SERVER_ENABLED rather than its replacement. The default port should remain zero - dynamic - and that detail shouldn't be required knowledge for those who don't want to set a specific port.

# METRICS_COLLECTOR_ENABLED (optional): If set, enables the metrics collector.

: "${BLOCK_DIR:?BLOCK_DIR must be set}"
Expand All @@ -27,7 +27,7 @@ cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$
--start-block=\"${START_BLOCK}\" \
--end-block=\"${END_BLOCK}\" \
${LABELS:+--labels=\"${LABELS}\"} \
${METRICS_SERVER_ENABLED:+--metrics-server-enabled=\"${METRICS_SERVER_ENABLED}\"} \
${METRICS_SERVER_PORT:+--metrics-server-port=\"${METRICS_SERVER_PORT}\"} \
${METRICS_COLLECTOR_ENABLED:+--metrics-collector-enabled=\"${METRICS_COLLECTOR_ENABLED}\"}"

if [ -n "${BENCHMARK_OUTPUT_FILE:-}" ]; then
Expand Down
23 changes: 17 additions & 6 deletions tests/prometheus_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package tests
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"time"
Expand All @@ -14,7 +15,10 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)

const defaultPrometheusListenAddr = "127.0.0.1:0"
const (
localhostAddr = "127.0.0.1"
defaultMetricsPort = 0
)

// PrometheusServer is a HTTP server that serves Prometheus metrics from the provided
// gahterer.
Expand All @@ -28,25 +32,32 @@ type PrometheusServer struct {
// NewPrometheusServer creates and starts a Prometheus server with the provided gatherer
// listening on 127.0.0.1:0 and serving /ext/metrics.
func NewPrometheusServer(gatherer prometheus.Gatherer) (*PrometheusServer, error) {
return NewPrometheusServerWithPort(gatherer, defaultMetricsPort)
}

// NewPrometheusServerWithPort creates and starts a Prometheus server with the provided gatherer
// listening on 127.0.0.1:port and serving /ext/metrics.
func NewPrometheusServerWithPort(gatherer prometheus.Gatherer, port uint64) (*PrometheusServer, error) {
Comment on lines +38 to +40
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using functional options pattern here, but think this is fine since it's just adding one extra constructor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer the additional constructor for the following reasons:

  1. NewXWithY constructors are idiomatic in Go [ref]
  2. The cost of having to create a new Options struct type along with getters and setters outweighs the benefits of having a single constructor IMO (similarly, passing in a Config struct type seems overkill for a single parameter (port))

server := &PrometheusServer{
gatherer: gatherer,
}

if err := server.start(); err != nil {
serverAddress := fmt.Sprintf("%s:%d", localhostAddr, port)
if err := server.start(serverAddress); err != nil {
return nil, err
}

return server, nil
}

// start the Prometheus server on a dynamic port.
func (s *PrometheusServer) start() error {
// start the Prometheus server on address.
func (s *PrometheusServer) start(address string) error {
mux := http.NewServeMux()
mux.Handle("/ext/metrics", promhttp.HandlerFor(s.gatherer, promhttp.HandlerOpts{}))

listener, err := net.Listen("tcp", defaultPrometheusListenAddr)
listener, err := net.Listen("tcp", address)
if err != nil {
return err
return fmt.Errorf("failed to listen on %s: %w", address, err)
}

s.server = http.Server{
Expand Down
8 changes: 4 additions & 4 deletions tests/reexecute/c/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ export AWS_REGION=us-east-2

If running locally, metrics collection can be customized via the following parameters:

- `METRICS_SERVER_ENABLED`: starts a Prometheus server exporting VM metrics.
- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector (if enabled, then `METRICS_SERVER_ENABLED` must be enabled as well).
- `METRICS_SERVER_PORT`: if set, starts a Prometheus server exporting VM metrics and sets the port the server will listen to.
- `METRICS_COLLECTOR_ENABLED`: starts a Prometheus collector. If `METRICS_SERVER_PORT` is not set, enabling the collector implicitly sets `METRICS_SERVER_PORT` to `0`.

When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables.

Running the re-execution test in CI will always set `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true`.
Running the re-execution test in CI will always set `METRICS_COLLECTOR_ENABLED=true`.

## Quick Start

Expand Down Expand Up @@ -237,7 +237,7 @@ The `CONFIG` parameter currently only supports pre-defined configs and not passi

The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/.

To export metrics for a local run, simply set the Taskfile variables `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line.
To export metrics for a local run, simply set the Taskfile variable `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line.

You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s).

Expand Down
29 changes: 19 additions & 10 deletions tests/reexecute/c/vm_reexecute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var (
executionTimeout time.Duration
labelsArg string

metricsServerEnabledArg bool
metricsServerPort *uint64
metricsCollectorEnabledArg bool

networkUUID string = uuid.NewString()
Expand Down Expand Up @@ -104,7 +104,16 @@ func TestMain(m *testing.M) {
flag.IntVar(&chanSizeArg, "chan-size", 100, "Size of the channel to use for block processing.")
flag.DurationVar(&executionTimeout, "execution-timeout", 0, "Benchmark execution timeout. After this timeout has elapsed, terminate the benchmark without error. If 0, no timeout is applied.")

flag.BoolVar(&metricsServerEnabledArg, "metrics-server-enabled", false, "Whether to enable the metrics server.")
flag.Func("metrics-server-port", "Starts a metrics server and sets the port it will listen to", func(s string) error {
port, err := strconv.ParseUint(s, 10, 64)
if err != nil {
return err
}

metricsServerPort = new(uint64)
*metricsServerPort = port
return nil
})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there a way to simply check if a flag is set without using a Func like this?

Copy link
Contributor Author

@RodrigoVillar RodrigoVillar Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we keep metricsServerPort as a uint64 value, then there's no way to differentiate a client setting METRICS_SERVER_PORT=0 and a client declining to start the metrics server (in both cases, metricsServerPort will be 0).

Edit: as to the usage of flag.Func(), this is necessary as we need to initialize metricsServerPort. Using the following code:

flag.Uint64Var(metricsServerPort, "metrics-server-port", 0, "Starts a metrics server and sets the port it will listen to")

results in a panic as flag.Uint64Var() attempts to write to a nil pointer.

flag.BoolVar(&metricsCollectorEnabledArg, "metrics-collector-enabled", false, "Whether to enable the metrics collector (if true, then metrics-server-enabled must be true as well).")
flag.StringVar(&labelsArg, "labels", "", "Comma separated KV list of metric labels to attach to all exported metrics. Ex. \"owner=tim,runner=snoopy\"")

Expand All @@ -119,9 +128,8 @@ func TestMain(m *testing.M) {

flag.Parse()

if metricsCollectorEnabledArg && !metricsServerEnabledArg {
fmt.Fprint(os.Stderr, "metrics collector is enabled but metrics server is disabled.\n")
os.Exit(1)
if metricsCollectorEnabledArg && metricsServerPort == nil {
metricsServerPort = new(uint64)
}

customLabels, err := parseCustomLabels(labelsArg)
Expand Down Expand Up @@ -157,7 +165,7 @@ func BenchmarkReexecuteRange(b *testing.B) {
startBlockArg,
endBlockArg,
chanSizeArg,
metricsServerEnabledArg,
metricsServerPort,
metricsCollectorEnabledArg,
)
})
Expand All @@ -171,7 +179,7 @@ func benchmarkReexecuteRange(
startBlock uint64,
endBlock uint64,
chanSize int,
metricsServerEnabled bool,
metricsPort *uint64,
metricsCollectorEnabled bool,
) {
r := require.New(b)
Expand All @@ -191,8 +199,8 @@ func benchmarkReexecuteRange(

log := tests.NewDefaultLogger("c-chain-reexecution")

if metricsServerEnabled {
serverAddr := startServer(b, log, prefixGatherer)
if metricsPort != nil {
serverAddr := startServer(b, log, prefixGatherer, *metricsPort)

if metricsCollectorEnabled {
startCollector(b, log, "c-chain-reexecution", labels, serverAddr)
Expand Down Expand Up @@ -565,10 +573,11 @@ func startServer(
tb testing.TB,
log logging.Logger,
gatherer prometheus.Gatherer,
port uint64,
) string {
r := require.New(tb)

server, err := tests.NewPrometheusServer(gatherer)
server, err := tests.NewPrometheusServerWithPort(gatherer, port)
r.NoError(err)

log.Info("metrics endpoint available",
Expand Down