From f7016beb05af0b5036d859000cc15edafa1442ca Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Mon, 16 Feb 2026 17:44:17 +0000 Subject: [PATCH] chore: better agent utilisation in proving test --- spartan/environments/prove-n-tps-real.env | 5 +- .../values/prover-resources-prod-hi-tps.yaml | 82 +++++++++++++++++++ .../src/spartan/n_tps_prove.test.ts | 14 ++++ .../end-to-end/src/spartan/utils/index.ts | 1 + .../end-to-end/src/spartan/utils/k8s.ts | 8 ++ 5 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 spartan/terraform/deploy-aztec-infra/values/prover-resources-prod-hi-tps.yaml diff --git a/spartan/environments/prove-n-tps-real.env b/spartan/environments/prove-n-tps-real.env index 9b8989671922..129abf2e7750 100644 --- a/spartan/environments/prove-n-tps-real.env +++ b/spartan/environments/prove-n-tps-real.env @@ -7,6 +7,7 @@ AZTEC_SLOT_DURATION=72 AZTEC_PROOF_SUBMISSION_EPOCHS=1 AZTEC_LAG_IN_EPOCHS_FOR_VALIDATOR_SET=1 AZTEC_LAG_IN_EPOCHS_FOR_RANDAO=1 +AZTEC_MANA_TARGET=1000000000 # 1B mana CREATE_ETH_DEVNET=true DESTROY_NAMESPACE=true @@ -30,8 +31,8 @@ REAL_VERIFIER=true RPC_REPLICAS=1 RPC_INGRESS_ENABLED=false -PROVER_REPLICAS=200 -PROVER_RESOURCE_PROFILE="prod" +PROVER_REPLICAS=4 +PROVER_RESOURCE_PROFILE="prod-hi-tps" PROVER_PUBLISHER_MNEMONIC_START_INDEX=8000 PROVER_AGENT_POLL_INTERVAL_MS=10000 PUBLISHERS_PER_PROVER=1 diff --git a/spartan/terraform/deploy-aztec-infra/values/prover-resources-prod-hi-tps.yaml b/spartan/terraform/deploy-aztec-infra/values/prover-resources-prod-hi-tps.yaml new file mode 100644 index 000000000000..ffd0347086cc --- /dev/null +++ b/spartan/terraform/deploy-aztec-infra/values/prover-resources-prod-hi-tps.yaml @@ -0,0 +1,82 @@ +node: + node: + resources: + requests: + cpu: "7.5" + memory: "55Gi" + + nodeJsOptions: + - "--max-old-space-size=61440" + + nodeSelector: + local-ssd: "false" + node-type: "network" + cores: "8" + hi-mem: "true" + + persistence: + enabled: true + statefulSet: + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 16Gi + +broker: + replicaCount: 1 + + node: + resources: + requests: + cpu: "7.5" + memory: "55Gi" + + nodeJsOptions: + - "--max-old-space-size=61440" + + nodeSelector: + local-ssd: "false" + node-type: "network" + cores: "8" + hi-mem: "true" + + persistence: + enabled: true + statefulSet: + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 64Gi +agent: + replicaCount: 4 + + node: + env: + # the pod will be scheduled on a 32-core VM + HARDWARE_CONCURRENCY: "32" + resources: + requests: + memory: "115Gi" + cpu: "31" + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-spot + operator: Exists + + tolerations: + - key: "cloud.google.com/gke-spot" + operator: "Equal" + value: "true" + effect: "NoSchedule" diff --git a/yarn-project/end-to-end/src/spartan/n_tps_prove.test.ts b/yarn-project/end-to-end/src/spartan/n_tps_prove.test.ts index a98c2a43fd08..5c1750c41b24 100644 --- a/yarn-project/end-to-end/src/spartan/n_tps_prove.test.ts +++ b/yarn-project/end-to-end/src/spartan/n_tps_prove.test.ts @@ -31,6 +31,7 @@ import { type WorkerWalletWrapper, createWorkerWalletClient } from './setup_test import { ProvingMetrics } from './tx_metrics.js'; import { getExternalIP, + scaleProverAgents, setupEnvironment, startPortForwardForEthereum, startPortForwardForPrometeheus, @@ -44,6 +45,8 @@ if (!Number.isFinite(TARGET_TPS)) { throw new Error('Invalid TPS: ' + process.env.TPS); } +const TARGET_PROVER_AGENTS = parseInt(process.env.TARGET_PROVER_AGENTS ?? '200'); + const epochDurationSlots = config.AZTEC_EPOCH_DURATION; const slotDurationSeconds = config.AZTEC_SLOT_DURATION; const epochDurationSeconds = epochDurationSlots * slotDurationSeconds; @@ -357,6 +360,9 @@ describe(`prove ${TARGET_TPS}TPS test`, () => { ); await sleep(secondsToWait * 1000); } + + // scale to 10 agents in order to be able to prove the current epoch which contains up to 10 account contracts and the benchmark contract + await scaleProverAgents(config.NAMESPACE, 10, logger); }); it(`sends ${TARGET_TPS} TPS for a full epoch and waits for proof`, async () => { @@ -371,10 +377,18 @@ describe(`prove ${TARGET_TPS}TPS test`, () => { const msPerTx = 1000 / TARGET_TPS; logger.info(`Will send ${txsToSend} transactions at ${TARGET_TPS} TPS over ${epochDurationSeconds} seconds`); + const scaleUpAtTx = Math.max(0, txsToSend - Math.ceil(TARGET_TPS * 8 * slotDurationSeconds)); const sentTxs: TxHash[] = []; const sendStartTime = performance.now(); for (let i = 0; i < txsToSend; i++) { + if (i === scaleUpAtTx) { + logger.info(`Scaling prover agents to ${TARGET_PROVER_AGENTS} (8 slots before end of tx sending)`); + void scaleProverAgents(config.NAMESPACE, TARGET_PROVER_AGENTS, logger).catch(err => + logger.error(`Failed to scale prover agents: ${err}`), + ); + } + const loopStart = performance.now(); // look for a wallet with an available tx diff --git a/yarn-project/end-to-end/src/spartan/utils/index.ts b/yarn-project/end-to-end/src/spartan/utils/index.ts index 5d945f48e758..b4ecc612825f 100644 --- a/yarn-project/end-to-end/src/spartan/utils/index.ts +++ b/yarn-project/end-to-end/src/spartan/utils/index.ts @@ -25,6 +25,7 @@ export { getRPCEndpoint, getEthereumEndpoint, createResilientPrometheusConnection, + scaleProverAgents, } from './k8s.js'; // Chaos Mesh diff --git a/yarn-project/end-to-end/src/spartan/utils/k8s.ts b/yarn-project/end-to-end/src/spartan/utils/k8s.ts index 70088963fbea..e9329839b7bb 100644 --- a/yarn-project/end-to-end/src/spartan/utils/k8s.ts +++ b/yarn-project/end-to-end/src/spartan/utils/k8s.ts @@ -522,6 +522,14 @@ export function createResilientPrometheusConnection( return { connect, runAlertCheck }; } +/** Scales the prover-agent Deployment to the given number of replicas. */ +export async function scaleProverAgents(namespace: string, replicas: number, log: Logger): Promise { + const label = 'app.kubernetes.io/component=prover-agent'; + const command = `kubectl scale deployment -l ${label} -n ${namespace} --replicas=${replicas} --timeout=2m`; + log.info(`Scaling prover agents to ${replicas}: ${command}`); + await execAsync(command); +} + export function getChartDir(spartanDir: string, chartName: string) { return path.join(spartanDir.trim(), chartName); }