From 37f5d62bc426d43a62cceae6acc1763534de83eb Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 14:44:53 +0000 Subject: [PATCH 1/7] type: description From 8ba9689a38a39769f69ccebb060e9651eb473d8b Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 15:02:01 +0000 Subject: [PATCH 2/7] chore: add tcmalloc benchmarking setup for bb benches Install tcmalloc at benchmark build time and default to using it via LD_PRELOAD for native benchmark execution. Set ALLOCATOR=default to use the system allocator instead. --- barretenberg/cpp/bootstrap.sh | 1 + barretenberg/cpp/scripts/run_bench.sh | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/bootstrap.sh b/barretenberg/cpp/bootstrap.sh index da73553be6e5..bd93f776c26d 100755 --- a/barretenberg/cpp/bootstrap.sh +++ b/barretenberg/cpp/bootstrap.sh @@ -412,6 +412,7 @@ function test { function build_bench { set -eu + sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev if ! cache_download barretenberg-benchmarks-$hash.zst; then # Run builds in parallel with different targets per preset parallel --line-buffered denoise ::: \ diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index 64df1e05eadd..cbc5f06bc637 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -14,13 +14,20 @@ filter=$4 export GTEST_COLOR=1 export HARDWARE_CONCURRENCY=${CPUS:-8} +# Use tcmalloc by default for benchmarks. Set ALLOCATOR=default to use the system allocator. +libarch=$(uname -m) +BENCH_PRELOAD="/usr/lib/${libarch}-linux-gnu/libtcmalloc.so" +if [ "${ALLOCATOR:-}" = "default" ]; then + BENCH_PRELOAD="" +fi + mkdir -p bench-out/$(dirname $name) export MEMUSAGE_OUT="bench-out/$name-peak-memory-mb.txt" case $arch in native) - memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter + LD_PRELOAD="${BENCH_PRELOAD}" memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter ;; wasm) memusage ./scripts/wasmtime.sh $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter From 4e229aee43b615778aa53a60a1d8ab1408ec8ceb Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 16:06:14 +0000 Subject: [PATCH 3/7] fix: use libtcmalloc-minimal4t64 runtime package instead of -dev The -dev package has unmet dependencies (libunwind-dev) in the CI image. We only need the runtime .so for LD_PRELOAD. --- barretenberg/cpp/bootstrap.sh | 2 +- barretenberg/cpp/scripts/run_bench.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/barretenberg/cpp/bootstrap.sh b/barretenberg/cpp/bootstrap.sh index bd93f776c26d..171ae995fbcd 100755 --- a/barretenberg/cpp/bootstrap.sh +++ b/barretenberg/cpp/bootstrap.sh @@ -412,7 +412,7 @@ function test { function build_bench { set -eu - sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev + sudo apt-get update && sudo apt-get install -y libtcmalloc-minimal4t64 if ! cache_download barretenberg-benchmarks-$hash.zst; then # Run builds in parallel with different targets per preset parallel --line-buffered denoise ::: \ diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index cbc5f06bc637..6729554605bb 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -16,7 +16,7 @@ export HARDWARE_CONCURRENCY=${CPUS:-8} # Use tcmalloc by default for benchmarks. Set ALLOCATOR=default to use the system allocator. libarch=$(uname -m) -BENCH_PRELOAD="/usr/lib/${libarch}-linux-gnu/libtcmalloc.so" +BENCH_PRELOAD="/usr/lib/${libarch}-linux-gnu/libtcmalloc_minimal.so.4" if [ "${ALLOCATOR:-}" = "default" ]; then BENCH_PRELOAD="" fi From 0095c470e72edbff24c8f2fb6cf525b67e8063a9 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 18:02:51 +0000 Subject: [PATCH 4/7] chore: install tcmalloc in Docker image, revert default usage Install libtcmalloc-minimal4t64 in the build image so it's available for opt-in benchmarking via LD_PRELOAD. Remove the default tcmalloc usage from run_bench.sh and the apt-get from build_bench. --- barretenberg/cpp/bootstrap.sh | 1 - barretenberg/cpp/scripts/run_bench.sh | 9 +-------- build-images/src/Dockerfile | 2 ++ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/barretenberg/cpp/bootstrap.sh b/barretenberg/cpp/bootstrap.sh index 171ae995fbcd..da73553be6e5 100755 --- a/barretenberg/cpp/bootstrap.sh +++ b/barretenberg/cpp/bootstrap.sh @@ -412,7 +412,6 @@ function test { function build_bench { set -eu - sudo apt-get update && sudo apt-get install -y libtcmalloc-minimal4t64 if ! cache_download barretenberg-benchmarks-$hash.zst; then # Run builds in parallel with different targets per preset parallel --line-buffered denoise ::: \ diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index 6729554605bb..64df1e05eadd 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -14,20 +14,13 @@ filter=$4 export GTEST_COLOR=1 export HARDWARE_CONCURRENCY=${CPUS:-8} -# Use tcmalloc by default for benchmarks. Set ALLOCATOR=default to use the system allocator. -libarch=$(uname -m) -BENCH_PRELOAD="/usr/lib/${libarch}-linux-gnu/libtcmalloc_minimal.so.4" -if [ "${ALLOCATOR:-}" = "default" ]; then - BENCH_PRELOAD="" -fi - mkdir -p bench-out/$(dirname $name) export MEMUSAGE_OUT="bench-out/$name-peak-memory-mb.txt" case $arch in native) - LD_PRELOAD="${BENCH_PRELOAD}" memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter + memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter ;; wasm) memusage ./scripts/wasmtime.sh $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter diff --git a/build-images/src/Dockerfile b/build-images/src/Dockerfile index 5d97d07f0c4d..1dec168b15fa 100644 --- a/build-images/src/Dockerfile +++ b/build-images/src/Dockerfile @@ -107,6 +107,8 @@ RUN apt update && \ # Python (clang bindings for wasm bindgen.) python3 \ python3-clang \ + # Alternative memory allocator (use via LD_PRELOAD for benchmarking) + libtcmalloc-minimal4t64 \ # Unminimize ubuntu installation. unminimize \ && apt-get -y autoremove \ From 1d1357edfc2d687945624688ac120cb40d0f7d68 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 18:04:11 +0000 Subject: [PATCH 5/7] chore: default to tcmalloc for native benchmarks Use LD_PRELOAD with tcmalloc_minimal from the Docker image by default in run_bench.sh. Set ALLOCATOR=default to use the system allocator. --- barretenberg/cpp/scripts/run_bench.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index 64df1e05eadd..d2a4d738b2f7 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -14,13 +14,19 @@ filter=$4 export GTEST_COLOR=1 export HARDWARE_CONCURRENCY=${CPUS:-8} +# Use tcmalloc by default for native benchmarks. Set ALLOCATOR=default to disable. +BENCH_PRELOAD="" +if [ "${ALLOCATOR:-tcmalloc}" = "tcmalloc" ]; then + BENCH_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libtcmalloc_minimal.so.4" +fi + mkdir -p bench-out/$(dirname $name) export MEMUSAGE_OUT="bench-out/$name-peak-memory-mb.txt" case $arch in native) - memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter + LD_PRELOAD="${BENCH_PRELOAD}" memusage $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter ;; wasm) memusage ./scripts/wasmtime.sh $bin --benchmark_out=./bench-out/$name.json --benchmark_filter=$filter From a8c40c5e0c564e4dde3e1cc1166203574d500332 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 18:59:40 +0000 Subject: [PATCH 6/7] chore: make tcmalloc opt-in via ALLOCATOR=tcmalloc --- barretenberg/cpp/scripts/run_bench.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index d2a4d738b2f7..ef876466a554 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -14,9 +14,9 @@ filter=$4 export GTEST_COLOR=1 export HARDWARE_CONCURRENCY=${CPUS:-8} -# Use tcmalloc by default for native benchmarks. Set ALLOCATOR=default to disable. +# Set ALLOCATOR=tcmalloc to use tcmalloc via LD_PRELOAD (installed in build image). BENCH_PRELOAD="" -if [ "${ALLOCATOR:-tcmalloc}" = "tcmalloc" ]; then +if [ "${ALLOCATOR:-}" = "tcmalloc" ]; then BENCH_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libtcmalloc_minimal.so.4" fi From 4eabbdb58b86b93cd6c691c3b696ddf064cdbdf7 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Feb 2026 19:03:58 +0000 Subject: [PATCH 7/7] chore: tcmalloc in release image, opt-in for benchmarks Install tcmalloc in the release image with LD_PRELOAD always set for production. For benchmarks, set ALLOCATOR=tcmalloc to install and use it via LD_PRELOAD. --- barretenberg/cpp/scripts/run_bench.sh | 3 ++- build-images/src/Dockerfile | 2 -- release-image/Dockerfile.base | 5 ++++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/barretenberg/cpp/scripts/run_bench.sh b/barretenberg/cpp/scripts/run_bench.sh index ef876466a554..205f26109569 100755 --- a/barretenberg/cpp/scripts/run_bench.sh +++ b/barretenberg/cpp/scripts/run_bench.sh @@ -14,9 +14,10 @@ filter=$4 export GTEST_COLOR=1 export HARDWARE_CONCURRENCY=${CPUS:-8} -# Set ALLOCATOR=tcmalloc to use tcmalloc via LD_PRELOAD (installed in build image). +# Set ALLOCATOR=tcmalloc to use tcmalloc via LD_PRELOAD. BENCH_PRELOAD="" if [ "${ALLOCATOR:-}" = "tcmalloc" ]; then + sudo apt-get update -qq && sudo apt-get install -y -qq libtcmalloc-minimal4t64 BENCH_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libtcmalloc_minimal.so.4" fi diff --git a/build-images/src/Dockerfile b/build-images/src/Dockerfile index 1dec168b15fa..5d97d07f0c4d 100644 --- a/build-images/src/Dockerfile +++ b/build-images/src/Dockerfile @@ -107,8 +107,6 @@ RUN apt update && \ # Python (clang bindings for wasm bindgen.) python3 \ python3-clang \ - # Alternative memory allocator (use via LD_PRELOAD for benchmarking) - libtcmalloc-minimal4t64 \ # Unminimize ubuntu installation. unminimize \ && apt-get -y autoremove \ diff --git a/release-image/Dockerfile.base b/release-image/Dockerfile.base index 8e61fcb0babe..6ffdf0dee01e 100644 --- a/release-image/Dockerfile.base +++ b/release-image/Dockerfile.base @@ -23,7 +23,8 @@ RUN apt update && apt install -y \ netcat-openbsd \ parallel \ curl \ - gnupg && \ + gnupg \ + libtcmalloc-minimal4t64 && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt update && apt install nodejs && \ @@ -33,6 +34,8 @@ COPY --from=build /opt/foundry/bin/anvil /opt/foundry/bin/anvil COPY --from=build /opt/foundry/bin/forge /opt/foundry/bin/forge COPY --from=build /opt/foundry/bin/cast /opt/foundry/bin/cast ENV PATH="/opt/foundry/bin:$PATH" FOUNDRY_DISABLE_NIGHTLY_WARNING="1" +# Use tcmalloc for reduced memory fragmentation and improved allocation performance. +ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4" # Copy in production dependencies. COPY --from=build /usr/src/yarn-project/node_modules /usr/src/yarn-project/node_modules # We install a symlink to yarn-project's node_modules at a location that all portalled packages can find as they