AztecProtocol · AztecBot · Oct 29, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/barretenberg/cpp/CMakeLists.txt b/barretenberg/cpp/CMakeLists.txt
@@ -35,6 +35,7 @@ option(ENABLE_STACKTRACES "Enable stack traces on assertion" OFF)
 option(ENABLE_TRACY "Enable low-medium overhead profiling for memory and performance with tracy" OFF)
 option(ENABLE_PIC "Builds with position independent code" OFF)
 option(SYNTAX_ONLY "only check syntax (-fsyntax-only)" OFF)
+option(ENABLE_WASM_BENCH "Enable BB_BENCH benchmarking support in WASM builds (dev only, not for releases)" OFF)
 option(AVM "enable building of vm2 module and bb-avm" ON)
 
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
@@ -97,6 +98,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "wasm32")
     set(OMP_MULTITHREADING OFF)
     set(ENABLE_PAR_ALGOS 0)
     add_compile_definitions(_WASI_EMULATED_PROCESS_CLOCKS=1)
+    if(ENABLE_WASM_BENCH)
+        message(STATUS "Enabling WASM benchmarking support (ENABLE_WASM_BENCH)")
+        add_compile_definitions(ENABLE_WASM_BENCH)
+    endif()
 endif()
 
 set(CMAKE_C_STANDARD 11)

diff --git a/barretenberg/cpp/bootstrap.sh b/barretenberg/cpp/bootstrap.sh
@@ -45,11 +45,15 @@ function inject_version {
 function build_preset() {
   local preset=$1
   shift
-  local avm_transpiler_flag=""
+  local cmake_args=()
   if [ "${AVM_TRANSPILER:-1}" -eq 0 ]; then
-    avm_transpiler_flag="-DAVM_TRANSPILER_LIB="
+    cmake_args+=(-DAVM_TRANSPILER_LIB=)
   fi
-  cmake --fresh --preset "$preset" $avm_transpiler_flag
+  # Auto-enable ENABLE_WASM_BENCH for wasm-threads preset on non-semver builds
+  if [[ "$preset" == "wasm-threads" ]] && ! semver check "$REF_NAME"; then
+    cmake_args+=(-DENABLE_WASM_BENCH=ON)
+  fi
+  cmake --fresh --preset "$preset" "${cmake_args[@]}"
   cmake --build --preset "$preset" "$@"
 }
 

diff --git a/barretenberg/cpp/scripts/test_chonk_standalone_vks_havent_changed.sh b/barretenberg/cpp/scripts/test_chonk_standalone_vks_havent_changed.sh
@@ -13,8 +13,8 @@ cd ..
 # - Generate a hash for versioning: sha256sum bb-chonk-inputs.tar.gz
 # - Upload the compressed results: aws s3 cp bb-chonk-inputs.tar.gz s3://aztec-ci-artifacts/protocol/bb-chonk-inputs-[hash(0:8)].tar.gz
 # Note: In case of the "Test suite failed to run ... Unexpected token 'with' " error, need to run: docker pull aztecprotocol/build:3.0
-pinned_short_hash="bd98634a"
-pinned_chonk_inputs_url="https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com/protocol/bb-chonk-inputs-${pinned_short_hash}.tar.gz"
+pinned_short_hash="abdb6bae"
+pinned_chonk_inputs_url="https://aztec-ci-artifacts.s3.us-east-2.amazonaws.com/protocol/bb-civc-inputs-${pinned_short_hash}.tar.gz"
 
 function compress_and_upload {
     # 1) Compress the results

diff --git a/barretenberg/cpp/scripts/wasmtime.sh b/barretenberg/cpp/scripts/wasmtime.sh
@@ -9,6 +9,7 @@ exec wasmtime run \
   ${HARDWARE_CONCURRENCY:+--env HARDWARE_CONCURRENCY} \
   --env HOME \
   ${MAIN_ARGS:+--env MAIN_ARGS} \
+  ${BB_BENCH:+--env BB_BENCH} \
   --dir=$HOME/.bb-crs \
   --dir=. \
   "$@"
diff --git a/barretenberg/cpp/src/barretenberg/bb/cli.cpp b/barretenberg/cpp/src/barretenberg/bb/cli.cpp
@@ -559,12 +559,13 @@ int parse_and_run_cli_command(int argc, char* argv[])
     debug_logging = flags.debug;
     verbose_logging = debug_logging || flags.verbose;
     slow_low_memory = flags.slow_low_memory;
-#ifndef __wasm__
+#if !defined(__wasm__) || defined(ENABLE_WASM_BENCH)
     if (!flags.storage_budget.empty()) {
         storage_budget = parse_size_string(flags.storage_budget);
     }
     if (print_bench || !bench_out.empty() || !bench_out_hierarchical.empty()) {
         bb::detail::use_bb_bench = true;
+        vinfo("BB_BENCH enabled via --print_bench or --bench_out");
     }
 #endif
 
@@ -659,9 +660,11 @@ int parse_and_run_cli_command(int argc, char* argv[])
                         "<ivc-inputs.msgpack> (default ./ivc-inputs.msgpack)");
                 }
                 api.prove(flags, ivc_inputs_path, output_path);
-#ifndef __wasm__
+#if !defined(__wasm__) || defined(ENABLE_WASM_BENCH)
                 if (print_bench) {
+                    vinfo("Printing BB_BENCH results...");
                     bb::detail::GLOBAL_BENCH_STATS.print_aggregate_counts_hierarchical(std::cout);
+                    std::cout << std::flush;
                 }
                 if (!bench_out.empty()) {
                     std::ofstream file(bench_out);
@@ -687,7 +690,7 @@ int parse_and_run_cli_command(int argc, char* argv[])
             UltraHonkAPI api;
             if (prove->parsed()) {
                 api.prove(flags, bytecode_path, witness_path, vk_path, output_path);
-#ifndef __wasm__
+#if !defined(__wasm__) || defined(ENABLE_WASM_BENCH)
                 if (print_bench) {
                     bb::detail::GLOBAL_BENCH_STATS.print_aggregate_counts_hierarchical(std::cout);
                 }

diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
@@ -49,7 +49,7 @@ template <typename Flavor, typename Relation> void execute_relation_for_univaria
 }
 
 // Ultra relations (Sumcheck prover work)
-BENCHMARK(execute_relation_for_univariates<UltraFlavor, UltraArithmeticRelation<Fr>>);
+BENCHMARK(execute_relation_for_univariates<UltraFlavor, ArithmeticRelation<Fr>>);
 BENCHMARK(execute_relation_for_univariates<UltraFlavor, DeltaRangeConstraintRelation<Fr>>);
 BENCHMARK(execute_relation_for_univariates<UltraFlavor, EllipticRelation<Fr>>);
 BENCHMARK(execute_relation_for_univariates<UltraFlavor, MemoryRelation<Fr>>);
@@ -64,7 +64,7 @@ BENCHMARK(execute_relation_for_univariates<MegaFlavor, Poseidon2ExternalRelation
 BENCHMARK(execute_relation_for_univariates<MegaFlavor, Poseidon2InternalRelation<Fr>>);
 
 // Ultra relations (verifier work)
-BENCHMARK(execute_relation_for_values<UltraFlavor, UltraArithmeticRelation<Fr>>);
+BENCHMARK(execute_relation_for_values<UltraFlavor, ArithmeticRelation<Fr>>);
 BENCHMARK(execute_relation_for_values<UltraFlavor, DeltaRangeConstraintRelation<Fr>>);
 BENCHMARK(execute_relation_for_values<UltraFlavor, EllipticRelation<Fr>>);
 BENCHMARK(execute_relation_for_values<UltraFlavor, MemoryRelation<Fr>>);

diff --git a/...etenberg/cpp/src/barretenberg/boomerang_value_detection/graph_description_goblin.test.cpp b/...etenberg/cpp/src/barretenberg/boomerang_value_detection/graph_description_goblin.test.cpp
@@ -99,10 +99,12 @@ TEST_F(BoomerangGoblinRecursiveVerifierTests, graph_description_basic)
         ASSERT_TRUE(verified);
     }
     auto translator_pairing_points = output.points_accumulator;
-    translator_pairing_points.P0.x.fix_witness();
-    translator_pairing_points.P0.y.fix_witness();
-    translator_pairing_points.P1.x.fix_witness();
-    translator_pairing_points.P1.y.fix_witness();
+    // BIGGROUP_AUDITTODO: It seems suspicious that we have to fix these witnesses here to make this test pass. Seems to
+    // defeat the purpose of the test.
+    translator_pairing_points.P0.x().fix_witness();
+    translator_pairing_points.P0.y().fix_witness();
+    translator_pairing_points.P1.x().fix_witness();
+    translator_pairing_points.P1.y().fix_witness();
     info("Recursive Verifier: num gates = ", builder.num_gates());
     auto graph = cdg::StaticAnalyzer(builder, false);
     auto variables_in_one_gate = graph.get_variables_in_one_gate();

diff --git a/...arretenberg/boomerang_value_detection/graph_description_ultra_recursive_verifier.test.cpp b/...arretenberg/boomerang_value_detection/graph_description_ultra_recursive_verifier.test.cpp
@@ -118,10 +118,12 @@ template <typename RecursiveFlavor> class BoomerangRecursiveVerifierTest : publi
         StdlibProof stdlib_inner_proof(outer_circuit, inner_proof);
         VerifierOutput output = verifier.template verify_proof<DefaultIO<OuterBuilder>>(stdlib_inner_proof);
         PairingObject pairing_points = output.points_accumulator;
-        pairing_points.P0.x.fix_witness();
-        pairing_points.P0.y.fix_witness();
-        pairing_points.P1.x.fix_witness();
-        pairing_points.P1.y.fix_witness();
+        // BIGGROUP_AUDITTODO: It seems suspicious that we have to fix these witnesses here to make this test pass.
+        // Seems to defeat the purpose of the test.
+        pairing_points.P0.x().fix_witness();
+        pairing_points.P0.y().fix_witness();
+        pairing_points.P1.x().fix_witness();
+        pairing_points.P1.y().fix_witness();
         if constexpr (HasIPAAccumulator<OuterFlavor>) {
             output.ipa_claim.set_public();
             outer_circuit.ipa_proof = output.ipa_proof.get_value();

diff --git a/barretenberg/cpp/src/barretenberg/circuit_checker/ultra_circuit_checker.hpp b/barretenberg/cpp/src/barretenberg/circuit_checker/ultra_circuit_checker.hpp
@@ -18,7 +18,7 @@ namespace bb {
 class UltraCircuitChecker {
   public:
     using FF = bb::fr;
-    using Arithmetic = UltraArithmeticRelation<FF>;
+    using Arithmetic = ArithmeticRelation<FF>;
     using Elliptic = EllipticRelation<FF>;
     using Memory = MemoryRelation<FF>;
     using NonNativeField = NonNativeFieldRelation<FF>;

diff --git a/barretenberg/cpp/src/barretenberg/common/bb_bench.cpp b/barretenberg/cpp/src/barretenberg/common/bb_bench.cpp
@@ -1,7 +1,7 @@
 #include "barretenberg/common/assert.hpp"
 #include <cstdint>
 #include <sys/types.h>
-#ifndef __wasm__
+#if !defined(__wasm__) || defined(ENABLE_WASM_BENCH)
 #include "barretenberg/serialize/msgpack_impl.hpp"
 #include "bb_bench.hpp"
 #include <algorithm>
@@ -182,7 +182,7 @@ void AggregateEntry::add_thread_time_sample(const TimeAndCount& stats)
     // Account for aggregate time and count
     time += stats.time;
     count += stats.count;
-    time_max = std::max(static_cast<size_t>(stats.time), time_max);
+    time_max = std::max(stats.time, time_max);
     // Use Welford's method to be able to track the variance
     num_threads++;
     double delta = static_cast<double>(stats.time) - time_mean;
@@ -300,12 +300,12 @@ void GlobalBenchStatsContainer::print_aggregate_counts(std::ostream& os, size_t
 // Serializable structure for a single benchmark entry (msgpack-compatible)
 struct SerializableEntry {
     std::string parent;
-    std::size_t time;
-    std::size_t time_max;
+    uint64_t time;
+    uint64_t time_max;
     double time_mean;
     double time_stddev;
-    std::size_t count;
-    std::size_t num_threads;
+    uint64_t count;
+    uint64_t num_threads;
 
     MSGPACK_FIELDS(parent, time, time_max, time_mean, time_stddev, count, num_threads);
 };
@@ -578,7 +578,7 @@ void GlobalBenchStatsContainer::print_aggregate_counts_hierarchical(std::ostream
     uint64_t total_time = 0;
     for (const auto& [_, parent_map] : aggregated) {
         if (auto it = parent_map.find(""); it != parent_map.end()) {
-            total_time = std::max(static_cast<size_t>(total_time), it->second.time_max);
+            total_time = std::max(total_time, it->second.time_max);
         }
     }
 

diff --git a/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp b/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp
@@ -58,11 +58,11 @@ struct AggregateEntry {
     // For convenience, even though redundant with map store
     OperationKey key;
     OperationKey parent;
-    std::size_t time = 0;
-    std::size_t count = 0;
+    uint64_t time = 0;
+    uint64_t count = 0;
     size_t num_threads = 0;
     double time_mean = 0;
-    std::size_t time_max = 0;
+    uint64_t time_max = 0;
     double time_stddev = 0;
 
     // Welford's algorithm state
@@ -106,19 +106,19 @@ extern GlobalBenchStatsContainer GLOBAL_BENCH_STATS;
 // but doesn't provide recursive parent-child relationships through the entire call stack.
 struct TimeStats {
     TimeStatsEntry* parent = nullptr;
-    std::size_t count = 0;
-    std::size_t time = 0;
+    uint64_t count = 0;
+    uint64_t time = 0;
     // Used if the parent changes from last call - chains to handle multiple parent contexts
     std::unique_ptr<TimeStats> next;
 
     TimeStats() = default;
-    TimeStats(TimeStatsEntry* parent_ptr, std::size_t count_val, std::size_t time_val)
+    TimeStats(TimeStatsEntry* parent_ptr, uint64_t count_val, uint64_t time_val)
         : parent(parent_ptr)
         , count(count_val)
         , time(time_val)
     {}
 
-    void track(TimeStatsEntry* current_parent, std::size_t time_val)
+    void track(TimeStatsEntry* current_parent, uint64_t time_val)
     {
         // Try to track with current stats if parent matches
         // Check if 'next' already handles this parent to avoid creating duplicates
@@ -138,7 +138,7 @@ struct TimeStats {
 
   private:
     // Returns true if successfully tracked (parent matches), false otherwise
-    bool raw_track(TimeStatsEntry* expected_parent, std::size_t time_val)
+    bool raw_track(TimeStatsEntry* expected_parent, uint64_t time_val)
     {
         if (parent != expected_parent) {
             return false;
@@ -181,7 +181,7 @@ template <OperationLabel Op> struct ThreadBenchStats {
 struct BenchReporter {
     TimeStatsEntry* parent;
     TimeStatsEntry* stats;
-    std::size_t time;
+    uint64_t time;
     BenchReporter(TimeStatsEntry* entry);
     ~BenchReporter();
 };
@@ -196,7 +196,7 @@ struct BenchReporter {
 #define BB_BENCH_ONLY_NAME(name) (void)0
 #define BB_BENCH_ENABLE_NESTING() (void)0
 #define BB_BENCH_ONLY() (void)0
-#elif defined __wasm__
+#elif defined __wasm__ && !defined ENABLE_WASM_BENCH
 #define BB_TRACY() (void)0
 #define BB_TRACY_NAME(name) (void)0
 #define BB_BENCH_TRACY() (void)0

diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/acir_format.test.cpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/acir_format.test.cpp
@@ -4,6 +4,7 @@
 
 #include "acir_format.hpp"
 #include "acir_format_mocks.hpp"
+#include "acir_to_constraint_buf.hpp"
 #include "barretenberg/common/streams.hpp"
 #include "barretenberg/op_queue/ecc_op_queue.hpp"
 
@@ -341,3 +342,82 @@ TEST_F(AcirFormatTests, TestBigAdd)
 
     EXPECT_TRUE(CircuitChecker::check(builder));
 }
+
+// Helper function to convert a uint256_t to a 32-byte vector in big-endian format
+std::vector<uint8_t> to_bytes_be(uint256_t value)
+{
+    std::vector<uint8_t> bytes(32, 0);
+    for (size_t i = 0; i < 32; i++) {
+        bytes[31 - i] = static_cast<uint8_t>(value & 0xFF);
+        value >>= 8;
+    }
+    return bytes;
+}
+
+/**
+ * @brief Test for bug fix where expressions with distinct witnesses requiring more than one width-4 gate
+ *        were incorrectly processed when they initially appeared to fit in width-3 gates
+ *
+ * @details This test verifies the fix for a bug in handle_arithmetic where an expression with:
+ *  - 1 mul term using witnesses (w0 * w1)
+ *  - 3 additional linear terms using distinct witnesses (w2, w3, w4)
+ *
+ *  Such expressions have ≤3 linear combinations and ≤1 mul term, appearing to fit in a
+ *  poly_triple (width-3) gate. However, with all 5 witnesses distinct, serialize_arithmetic_gate
+ *  correctly returns all zeros, indicating it cannot fit in a width-3 gate.
+ *
+ *  The bug: old code would check if poly_triple was all zeros, and if so, directly add to
+ *  quad_constraints via serialize_mul_quad_gate. But it did this inside the initial
+ *  might_fit_in_polytriple check, so it would never properly go through the mul_quad processing
+ *  logic that handles the general case with >4 witnesses.
+ *
+ *  The fix: now uses a needs_to_be_parsed_as_mul_quad flag that is set when poly_triple fails,
+ *  and processes through the proper mul_quad logic path, which splits into multiple gates.
+ *
+ *  Expression: w0 * w1 + w2 + w3 + w4 = 10
+ *  With witnesses: w0=0, w1=1, w2=2, w3=3, w4=4
+ *  Evaluation: 0*1 + 2 + 3 + 4 = 9, but we set q_c = -9, so constraint is: 9 - 9 = 0
+ */
+TEST_F(AcirFormatTests, TestArithmeticGateWithDistinctWitnessesRegression)
+{
+    // Create an ACIR expression: w0 * w1 + w2 + w3 + w4 - 9 = 0
+    // This has 1 mul term and 3 linear terms with all 5 distinct witnesses (requires multiple width-4 gates)
+    Acir::Expression expr{ .mul_terms = { std::make_tuple(
+                               to_bytes_be(1), Acir::Witness{ .value = 0 }, Acir::Witness{ .value = 1 }) },
+                           .linear_combinations = { std::make_tuple(to_bytes_be(1), Acir::Witness{ .value = 2 }),
+                                                    std::make_tuple(to_bytes_be(1), Acir::Witness{ .value = 3 }),
+                                                    std::make_tuple(to_bytes_be(1), Acir::Witness{ .value = 4 }) },
+                           .q_c = to_bytes_be(static_cast<uint256_t>(fr(-9))) };
+
+    Acir::Opcode::AssertZero assert_zero{ .value = expr };
+
+    // Create an ACIR circuit with this opcode
+    Acir::Circuit circuit{
+        .current_witness_index = 5,
+        .opcodes = { Acir::Opcode{ .value = assert_zero } },
+        .return_values = {},
+    };
+
+    Acir::Program program{ .functions = { circuit } };
+
+    // Serialize the program to bytes
+    auto program_bytes = program.bincodeSerialize();
+
+    // Process through circuit_buf_to_acir_format (this calls handle_arithmetic internally)
+    AcirFormat constraint_system = circuit_buf_to_acir_format(std::move(program_bytes));
+
+    // The key assertion: this expression should end up in big_quad_constraints, not poly_triple_constraints
+    // or single quad_constraints, because it needs 5 witness slots (all distinct)
+    EXPECT_EQ(constraint_system.poly_triple_constraints.size(), 0);
+    EXPECT_EQ(constraint_system.quad_constraints.size(), 0);
+    EXPECT_EQ(constraint_system.big_quad_constraints.size(), 1);
+
+    // Now verify the constraint system with valid witness assignments
+    // We need: w0 * w1 + w2 + w3 + w4 = 9
+    // Use values: w0=0, w1=1, w2=2, w3=3, w4=4, so 0*1 + 2 + 3 + 4 = 9
+    WitnessVector witness{ 0, 1, 2, 3, 4 };
+    AcirProgram acir_program{ constraint_system, witness };
+    auto builder = create_circuit(acir_program);
+
+    EXPECT_TRUE(CircuitChecker::check(builder));
+}
diff --git a/barretenberg/cpp/src/barretenberg/dsl/acir_format/acir_to_constraint_buf.cpp b/barretenberg/cpp/src/barretenberg/dsl/acir_format/acir_to_constraint_buf.cpp
@@ -459,7 +459,9 @@ std::pair<uint32_t, uint32_t> is_assert_equal(Acir::Opcode::AssertZero const& ar
 void handle_arithmetic(Acir::Opcode::AssertZero const& arg, AcirFormat& af, size_t opcode_index)
 {
     // If the expression fits in a polytriple, we use it.
-    if (arg.value.linear_combinations.size() <= 3 && arg.value.mul_terms.size() <= 1) {
+    bool might_fit_in_polytriple = arg.value.linear_combinations.size() <= 3 && arg.value.mul_terms.size() <= 1;
+    bool needs_to_be_parsed_as_mul_quad = !might_fit_in_polytriple;
+    if (might_fit_in_polytriple) {
         poly_triple pt = serialize_arithmetic_gate(arg.value);
 
         auto assert_equal = is_assert_equal(arg, pt, af);
@@ -502,15 +504,14 @@ void handle_arithmetic(Acir::Opcode::AssertZero const& arg, AcirFormat& af, size
         // gate. This is the case if the linear terms are all distinct witness from the multiplication term. In that
         // case, the serialize_arithmetic_gate() function will return a poly_triple with all 0's, and we use a width-4
         // gate instead. We could probably always use a width-4 gate in fact.
-        if (pt == poly_triple{ 0, 0, 0, 0, 0, 0, 0, 0 }) {
-            af.quad_constraints.push_back(serialize_mul_quad_gate(arg.value));
-            af.original_opcode_indices.quad_constraints.push_back(opcode_index);
-
-        } else {
+        if (pt != poly_triple{ 0, 0, 0, 0, 0, 0, 0, 0 }) {
             af.poly_triple_constraints.push_back(pt);
             af.original_opcode_indices.poly_triple_constraints.push_back(opcode_index);
+        } else {
+            needs_to_be_parsed_as_mul_quad = true;
         }
-    } else {
+    }
+    if (needs_to_be_parsed_as_mul_quad) {
         std::vector<mul_quad_<fr>> mul_quads;
         // We try to use a single mul_quad gate to represent the expression.
         if (arg.value.mul_terms.size() <= 1) {