diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp deleted file mode 100644 index 646c5f8a179a..000000000000 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include - -#include "barretenberg/benchmark/ultra_bench/mock_circuits.hpp" -#include "barretenberg/common/google_bb_bench.hpp" -#include "barretenberg/stdlib_circuit_builders/ultra_circuit_builder.hpp" -#include "barretenberg/ultra_honk/decider_prover.hpp" -#include "barretenberg/ultra_honk/oink_prover.hpp" -#include "barretenberg/ultra_honk/ultra_prover.hpp" - -using namespace benchmark; -using namespace bb; - -// The rounds to measure -enum { - PREAMBLE, - WIRE_COMMITMENTS, - SORTED_LIST_ACCUMULATOR, - LOG_DERIVATIVE_INVERSE, - GRAND_PRODUCT_COMPUTATION, - GENERATE_ALPHAS, - RELATION_CHECK -}; - -/** - * @details Benchmark Goblin ultrahonk by performing all the rounds, but only measuring one. - * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set their - * iterations to 1. - * @param state - The google benchmark state. - * @param prover - The Goblin ultrahonk prover. - * @param index - The pass to measure. - **/ -BB_PROFILE void test_round_inner(State& state, MegaProver& prover, size_t index) noexcept -{ - auto time_if_index = [&](size_t target_index, auto&& func) -> void { - GOOGLE_BB_BENCH_REPORTER(state); - if (index == target_index) { - state.ResumeTiming(); - } - - func(); - if (index == target_index) { - state.PauseTiming(); - } else { - // We don't actually want to write to user-defined counters - GOOGLE_BB_BENCH_REPORTER_CANCEL(); - } - }; - // why is this mega if the name of file is ultra - auto verification_key = std::make_shared(prover.prover_instance->get_precomputed()); - OinkProver oink_prover(prover.prover_instance, verification_key, prover.transcript); - time_if_index(PREAMBLE, [&] { oink_prover.execute_preamble_round(); }); - time_if_index(WIRE_COMMITMENTS, [&] { oink_prover.execute_wire_commitments_round(); }); - time_if_index(SORTED_LIST_ACCUMULATOR, [&] { oink_prover.execute_sorted_list_accumulator_round(); }); - time_if_index(LOG_DERIVATIVE_INVERSE, [&] { oink_prover.execute_log_derivative_inverse_round(); }); - time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { oink_prover.execute_grand_product_computation_round(); }); - time_if_index(GENERATE_ALPHAS, [&] { prover.prover_instance->alphas = oink_prover.generate_alphas_round(); }); - - prover.generate_gate_challenges(); - - DeciderProver_ decider_prover(prover.prover_instance, prover.transcript); - time_if_index(RELATION_CHECK, [&] { decider_prover.execute_relation_check_rounds(); }); -} -BB_PROFILE static void test_round(State& state, size_t index) noexcept -{ - auto log2_num_gates = static_cast(state.range(0)); - bb::srs::init_file_crs_factory(bb::srs::bb_crs_path()); - - // TODO(https://github.com/AztecProtocol/barretenberg/issues/761) benchmark both sparse and dense circuits - auto prover = bb::mock_circuits::get_prover( - &bb::mock_circuits::generate_basic_arithmetic_circuit, log2_num_gates); - for (auto _ : state) { - state.PauseTiming(); - test_round_inner(state, prover, index); - state.ResumeTiming(); - // NOTE: google bench is very finnicky, must end in ResumeTiming() for correctness - } -} -#define ROUND_BENCHMARK(round) \ - static void ROUND_##round(State& state) noexcept \ - { \ - test_round(state, round); \ - } \ - BENCHMARK(ROUND_##round)->DenseRange(12, 19)->Unit(kMillisecond) - -// Fast rounds take a long time to benchmark because of how we compute statistical significance. -// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. -ROUND_BENCHMARK(PREAMBLE)->Iterations(1); -ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); -ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); -ROUND_BENCHMARK(LOG_DERIVATIVE_INVERSE)->Iterations(1); -ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); -ROUND_BENCHMARK(GENERATE_ALPHAS)->Iterations(1); -ROUND_BENCHMARK(RELATION_CHECK); - -BENCHMARK_MAIN(); diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp index b37165fd4ae2..9630b1b33988 100644 --- a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp +++ b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp @@ -14,6 +14,7 @@ */ #include "barretenberg/common/bb_bench.hpp" +#include "barretenberg/common/ref_span.hpp" #include "barretenberg/constants.hpp" #include "barretenberg/ecc/batched_affine_addition/batched_affine_addition.hpp" #include "barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp" @@ -25,6 +26,8 @@ #include "barretenberg/srs/global_crs.hpp" #include +#include +#include #include #include @@ -103,6 +106,82 @@ template class CommitmentKey { Commitment point(r); return point; }; + /** + * @brief Batch commitment to multiple polynomials + * @details Uses batch_multi_scalar_mul for more efficient processing when committing to multiple polynomials + * + * @param polynomials vector of polynomial spans to commit to + * @return std::vector vector of commitments, one for each polynomial + */ + std::vector batch_commit(RefSpan> polynomials, + size_t max_batch_size = std::numeric_limits::max()) const + { + BB_BENCH_NAME("CommitmentKey::batch_commit"); + + // We can only commit max_batch_size at a time + // This is to prevent excessive memory usage in the pippenger algorithm + // First batch, create the commitments vector + std::vector commitments; + + max_batch_size = 1; + for (size_t i = 0; i < polynomials.size();) { + // Note: have to be careful how we compute this to not overlow e.g. max_batch_size + 1 would + size_t batch_size = std::min(max_batch_size, polynomials.size() - i); + size_t batch_end = i + batch_size; + + // Prepare spans for batch MSM + std::vector> points_spans; + // Note, we need to const_cast unfortunately as pippenger takes non-const spans + // as it converts back and forth from montgomery form + std::vector> scalar_spans; + + for (auto& polynomial : polynomials.subspan(i, batch_end - i)) { + std::span point_table = srs->get_monomial_points().subspan(polynomial.start_index()); + size_t consumed_srs = polynomial.start_index() + polynomial.size(); + if (consumed_srs > srs->get_monomial_size()) { + throw_or_abort(format("Attempting to commit to a polynomial that needs ", + consumed_srs, + " points with an SRS of size ", + srs->get_monomial_size())); + } + scalar_spans.emplace_back(polynomial.coeffs()); + points_spans.emplace_back(point_table); + } + + // Perform batch MSM + auto results = scalar_multiplication::MSM::batch_multi_scalar_mul(points_spans, scalar_spans, false); + for (const auto& result : results) { + commitments.emplace_back(result); + } + i += batch_size; + } + return commitments; + }; + + // helper builder struct for constructing a batch to commit at once + struct CommitBatch { + CommitmentKey* key; + RefVector> wires; + std::vector labels; + void commit_and_send_to_verifier(auto transcript, size_t max_batch_size = std::numeric_limits::max()) + { + std::vector commitments = key->batch_commit(wires, max_batch_size); + for (size_t i = 0; i < commitments.size(); ++i) { + transcript->send_to_verifier(labels[i], commitments[i]); + } + } + + void add_to_batch(Polynomial& poly, const std::string& label, bool mask) + { + if (mask) { + poly.mask(); + } + wires.push_back(poly); + labels.push_back(label); + } + }; + + CommitBatch start_batch() { return CommitBatch{ this, {}, {} }; } /** * @brief Efficiently commit to a polynomial whose nonzero elements are arranged in discrete blocks @@ -253,7 +332,7 @@ template class CommitmentKey { return result; } - enum class CommitType { Default, Structured, Sparse, StructuredNonZeroComplement }; + enum class CommitType { Default, StructuredNonZeroComplement }; Commitment commit_with_type(PolynomialSpan poly, CommitType type, @@ -261,9 +340,6 @@ template class CommitmentKey { size_t final_active_wire_idx = 0) { switch (type) { - case CommitType::Structured: - case CommitType::Sparse: - return commit(poly); case CommitType::StructuredNonZeroComplement: return commit_structured_with_nonzero_complement(poly, active_ranges, final_active_wire_idx); case CommitType::Default: diff --git a/barretenberg/cpp/src/barretenberg/common/assert.hpp b/barretenberg/cpp/src/barretenberg/common/assert.hpp index 46c0ac193109..6c422e629fab 100644 --- a/barretenberg/cpp/src/barretenberg/common/assert.hpp +++ b/barretenberg/cpp/src/barretenberg/common/assert.hpp @@ -1,5 +1,7 @@ #pragma once +#include "barretenberg/common/compiler_hints.hpp" +#include "barretenberg/common/throw_or_abort.hpp" #include #include @@ -59,7 +61,7 @@ struct AssertGuard { #else #define ASSERT_IN_CONSTEXPR(expression, ...) \ do { \ - if (!(expression)) { \ + if (!(BB_LIKELY(expression))) { \ info("Assertion failed: (" #expression ")"); \ __VA_OPT__(info("Reason : ", __VA_ARGS__);) \ bb::assert_failure(""); \ @@ -68,7 +70,7 @@ struct AssertGuard { #define ASSERT(expression, ...) \ do { \ - if (!(expression)) { \ + if (!(BB_LIKELY(expression))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #expression ")"; \ __VA_OPT__(oss << " | Reason: " << __VA_ARGS__;) \ @@ -80,7 +82,7 @@ struct AssertGuard { do { \ auto _actual = (actual); \ auto _expected = (expected); \ - if (!(_actual == _expected)) { \ + if (!(BB_LIKELY(_actual == _expected))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #actual " == " #expected ")\n"; \ oss << " Actual : " << _actual << "\n"; \ @@ -94,7 +96,7 @@ struct AssertGuard { do { \ auto _actual = (actual); \ auto _expected = (expected); \ - if (!(_actual != _expected)) { \ + if (!(BB_LIKELY(_actual != _expected))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #actual " != " #expected ")\n"; \ oss << " Actual : " << _actual << "\n"; \ @@ -108,7 +110,7 @@ struct AssertGuard { do { \ auto _left = (left); \ auto _right = (right); \ - if (!(_left > _right)) { \ + if (!(BB_LIKELY(_left > _right))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #left " > " #right ")\n"; \ oss << " Left : " << _left << "\n"; \ @@ -122,7 +124,7 @@ struct AssertGuard { do { \ auto _left = (left); \ auto _right = (right); \ - if (!(_left >= _right)) { \ + if (!(BB_LIKELY(_left >= _right))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #left " >= " #right ")\n"; \ oss << " Left : " << _left << "\n"; \ @@ -136,7 +138,7 @@ struct AssertGuard { do { \ auto _left = (left); \ auto _right = (right); \ - if (!(_left < _right)) { \ + if (!(BB_LIKELY(_left < _right))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #left " < " #right ")\n"; \ oss << " Left : " << _left << "\n"; \ @@ -150,7 +152,7 @@ struct AssertGuard { do { \ auto _left = (left); \ auto _right = (right); \ - if (!(_left <= _right)) { \ + if (!(BB_LIKELY(_left <= _right))) { \ std::ostringstream oss; \ oss << "Assertion failed: (" #left " <= " #right ")\n"; \ oss << " Left : " << _left << "\n"; \ diff --git a/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp b/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp index 9884492871fa..27e7da6e10c8 100644 --- a/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp +++ b/barretenberg/cpp/src/barretenberg/common/bb_bench.hpp @@ -23,6 +23,7 @@ extern bool use_bb_bench; // Compile-time string // See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/ template struct OperationLabel { + constexpr static std::size_t size() { return N; } // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) constexpr OperationLabel(const char (&str)[N]) { @@ -35,6 +36,14 @@ template struct OperationLabel { char value[N]; }; +template constexpr auto concat() +{ + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + char result_cstr[op1.size() + op2.size() - 1] = {}; + std::copy(op1.value, op1.value + op1.size() - 1, result_cstr); + std::copy(op2.value, op2.value + op2.size(), result_cstr + op1.size() - 1); + return OperationLabel{ result_cstr }; +} struct TimeStats; struct TimeStatsEntry; using OperationKey = std::string_view; diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index 27fc905ebcdb..d3801dcbd363 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -41,7 +41,7 @@ class ThreadPool { do_iterations(); { - BB_BENCH_NAME("spinning main thread"); + // BB_BENCH_NAME("spinning main thread"); std::unique_lock lock(tasks_mutex); complete_condition_.wait(lock, [this] { return complete_ == num_iterations_; }); } @@ -72,7 +72,7 @@ class ThreadPool { } iteration = iteration_++; } - BB_BENCH_NAME("do_iterations()"); + // BB_BENCH_NAME("do_iterations()"); task_(iteration); { std::unique_lock lock(tasks_mutex); diff --git a/barretenberg/cpp/src/barretenberg/common/ref_span.hpp b/barretenberg/cpp/src/barretenberg/common/ref_span.hpp index a08f85760376..74d5da476584 100644 --- a/barretenberg/cpp/src/barretenberg/common/ref_span.hpp +++ b/barretenberg/cpp/src/barretenberg/common/ref_span.hpp @@ -27,7 +27,7 @@ template class RefSpan { {} // Constructor from an array of pointers and size - RefSpan(T** ptr_array, std::size_t size) + RefSpan(T* const* ptr_array, std::size_t size) : storage(ptr_array) , array_size(size) {} @@ -57,6 +57,18 @@ template class RefSpan { // Get size of the RefSpan constexpr std::size_t size() const { return array_size; } + RefSpan subspan(std::size_t offset, std::size_t count) + { + // NOTE: like std::span, assumes the caller ensures offset and count are within bounds. + return RefSpan(storage + offset, count); + } + + RefSpan subspan(std::size_t offset) + { + // NOTE: like std::span, assumes the caller ensures offset and count are within bounds. + return RefSpan(storage + offset, array_size - offset); + } + // Iterator implementation class iterator { public: diff --git a/barretenberg/cpp/src/barretenberg/common/ref_vector.hpp b/barretenberg/cpp/src/barretenberg/common/ref_vector.hpp index 759e10fa7517..579dc8a7667c 100644 --- a/barretenberg/cpp/src/barretenberg/common/ref_vector.hpp +++ b/barretenberg/cpp/src/barretenberg/common/ref_vector.hpp @@ -97,7 +97,7 @@ template class RefVector { std::size_t size() const { return storage.size(); } - void push_back(T& element) { storage.push_back(element); } + void push_back(T& element) { storage.push_back(&element); } iterator begin() const { return iterator(this, 0); } iterator end() const { return iterator(this, storage.size()); } diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index d1e644eefe6a..4ec5afc398c3 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -10,6 +10,8 @@ namespace bb { +// Useful for programatically benching different thread counts +void set_hardware_concurrency(size_t num_cores); inline size_t get_num_cpus() { return env_hardware_concurrency(); diff --git a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp index 804d32c34f37..6f62fdeb4a53 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.cpp @@ -115,7 +115,7 @@ void MSM::transform_scalar_and_get_nonzero_scalar_indices(std::span std::vector::ThreadWorkUnits> MSM::get_work_units( - std::vector>& scalars, std::vector>& msm_scalar_indices) noexcept + std::span> scalars, std::vector>& msm_scalar_indices) noexcept { const size_t num_msms = scalars.size(); @@ -599,9 +599,10 @@ void MSM::consume_point_schedule(std::span point_schedule } // We do some branchless programming here to minimize instruction pipeline flushes - // TODO(@zac-williamson, cc @ludamad) check these ternary operators are not branching! - // We are iterating through our points and can come across the following scenarios: - // 1: The next 2 points in `point_schedule` belong to the *same* bucket + // TODO(@zac-williamson, cc @ludamad) check these ternary operators are not branching! -> (ludamad: they don't, + // but its not clear that the conditional move is fundamentally less expensive) + // We are iterating through our points and + // can come across the following scenarios: 1: The next 2 points in `point_schedule` belong to the *same* bucket // (happy path - can put both points into affine_addition_scratch_space) // 2: The next 2 points have different bucket destinations AND point_schedule[point_it].bucket contains a point // (happyish path - we can put points[lhs_schedule] and buckets[lhs_bucket] into @@ -761,8 +762,8 @@ void MSM::consume_point_schedule(std::span point_schedule */ template std::vector MSM::batch_multi_scalar_mul( - std::vector>& points, - std::vector>& scalars, + std::span> points, + std::span> scalars, bool handle_edge_cases) noexcept { BB_ASSERT_EQ(points.size(), scalars.size()); diff --git a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp index 206a3cfdffa4..0003f6e2eeed 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp @@ -94,7 +94,7 @@ template class MSM { static void transform_scalar_and_get_nonzero_scalar_indices(std::span scalars, std::vector& consolidated_indices) noexcept; - static std::vector get_work_units(std::vector>& scalars, + static std::vector get_work_units(std::span> scalars, std::vector>& msm_scalar_indices) noexcept; static uint32_t get_scalar_slice(const ScalarField& scalar, size_t round, size_t normal_slice_size) noexcept; static size_t get_optimal_log_num_buckets(const size_t num_points) noexcept; @@ -122,8 +122,8 @@ template class MSM { size_t num_input_points_processed, size_t num_queued_affine_points) noexcept; - static std::vector batch_multi_scalar_mul(std::vector>& points, - std::vector>& scalars, + static std::vector batch_multi_scalar_mul(std::span> points, + std::span> scalars, bool handle_edge_cases = true) noexcept; static AffineElement msm(std::span points, PolynomialSpan _scalars, diff --git a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.test.cpp index fda09738c114..a818ffd834f8 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/scalar_multiplication/scalar_multiplication.test.cpp @@ -1,5 +1,6 @@ #include "scalar_multiplication.hpp" #include "barretenberg/api/file_io.hpp" +#include "barretenberg/common/thread.hpp" #include "barretenberg/ecc/curves/bn254/bn254.hpp" #include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp" #include "barretenberg/ecc/curves/types.hpp" @@ -356,6 +357,7 @@ TYPED_TEST(ScalarMultiplicationTest, PippengerLowMemory) TYPED_TEST(ScalarMultiplicationTest, BatchMultiScalarMul) { + BB_BENCH_NAME("BatchMultiScalarMul"); SCALAR_MULTIPLICATION_TYPE_ALIASES using AffineElement = typename Curve::AffineElement; @@ -479,6 +481,90 @@ TYPED_TEST(ScalarMultiplicationTest, MSMEmptyPolynomial) EXPECT_EQ(result, Curve::Group::affine_point_at_infinity); } +// Helper function to generate scalars with specified sparsity +template +std::vector generate_sparse_scalars(size_t num_scalars, double sparsity_rate, auto& rng) +{ + std::vector scalars(num_scalars); + for (size_t i = 0; i < num_scalars; ++i) { + // Generate random value to determine if this scalar should be zero + double rand_val = static_cast(rng.get_random_uint32()) / static_cast(UINT32_MAX); + if (rand_val < sparsity_rate) { + scalars[i] = 0; + } else { + scalars[i] = ScalarField::random_element(&rng); + } + } + return scalars; +} + +// Test different MSM strategies with detailed benchmarking +// NOTE this requres BB_BENCH=1 to be set before the test command +TYPED_TEST(ScalarMultiplicationTest, BenchBatchMsm) +{ +#ifndef __wasm__ + if (!bb::detail::use_bb_bench) { +#else + { +#endif + std::cout + << "Skipping BatchMultiScalarMulStrategyComparison as BB_BENCH=1 is not passed (OR we are in wasm).\n"; + return; + } + SCALAR_MULTIPLICATION_TYPE_ALIASES + + using AffineElement = typename Curve::AffineElement; + + const size_t num_msms = 3; + const size_t msm_max_size = 1 << 17; + const double max_sparsity = 0.1; + + // Generate test data with varying sparsity + std::vector> all_points; + std::vector> all_scalars; + std::vector all_commitments; + std::vector> scalar_storage; + + for (size_t i = 0; i < num_msms; ++i) { + // Generate random sizes and density of 0s + const size_t size = engine.get_random_uint64() % msm_max_size; + const double sparsity = engine.get_random_uint8() / 255.0 * max_sparsity; + auto scalars = generate_sparse_scalars(size, sparsity, engine); + scalar_storage.push_back(std::move(scalars)); + + std::span points(&TestFixture::generators[i], size); + all_points.push_back(points); + all_scalars.push_back(scalar_storage.back()); + all_commitments.push_back(TestFixture::naive_msm(all_scalars.back(), all_points.back())); + } + auto func = [&](size_t num_threads) { + set_hardware_concurrency(num_threads); + // Strategy 1: Individual MSMs + { + BB_BENCH_NAME((bb::detail::concat())); + for (size_t i = 0; i < num_msms; ++i) { + std::vector> single_points = { all_points[i] }; + std::vector> single_scalars = { all_scalars[i] }; + auto result = scalar_multiplication::MSM::batch_multi_scalar_mul(single_points, single_scalars); + EXPECT_EQ(result[0], all_commitments[i]); + } + } + // Strategy 2: Batch + { + BB_BENCH_NAME((bb::detail::concat())); + auto result = scalar_multiplication::MSM::batch_multi_scalar_mul(all_points, all_scalars); + EXPECT_EQ(result, all_commitments); + } + }; + // call lambda with template param + func.template operator()<"1 thread ">(1); + func.template operator()<"2 threads ">(2); + func.template operator()<"4 threads ">(4); + func.template operator()<"8 threads ">(8); + func.template operator()<"16 threads ">(16); + func.template operator()<"32 threads ">(32); +} + TEST(ScalarMultiplication, SmallInputsExplicit) { uint256_t x0(0x68df84429941826a, 0xeb08934ed806781c, 0xc14b6a2e4f796a73, 0x08dc1a9a11a3c8db); diff --git a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp index 7e82657b5fa4..0ddf669d6bf1 100644 --- a/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/eccvm/eccvm_prover.cpp @@ -64,25 +64,11 @@ void ECCVMProver::execute_wire_commitments_round() const size_t circuit_size = key->circuit_size; unmasked_witness_size = circuit_size - NUM_DISABLED_ROWS_IN_SUMCHECK; - CommitmentKey::CommitType commit_type = - (circuit_size > key->real_size) ? CommitmentKey::CommitType::Structured : CommitmentKey::CommitType::Default; - - // Commit to wires whose length is bounded by the real size of the ECCVM - for (const auto& [wire, label] : zip_view(key->polynomials.get_wires_without_accumulators(), - commitment_labels.get_wires_without_accumulators())) { - // TODO(https://github.com/AztecProtocol/barretenberg/issues/1240) Structured Polynomials in - // ECCVM/Translator/MegaZK - const size_t start = circuit_size == wire.size() ? 0 : 1; - std::vector> active_ranges{ { start, key->real_size + start }, - { unmasked_witness_size, circuit_size } }; - commit_to_witness_polynomial(wire, label, commit_type, active_ranges); - } - - // The accumulators are populated until the 2^{CONST_ECCVM_LOG_N}, therefore we commit to a full-sized polynomial - for (const auto& [wire, label] : - zip_view(key->polynomials.get_accumulators(), commitment_labels.get_accumulators())) { - commit_to_witness_polynomial(wire, label); + auto batch = key->commitment_key.start_batch(); + for (const auto& [wire, label] : zip_view(key->polynomials.get_wires(), commitment_labels.get_wires())) { + batch.add_to_batch(wire, label, /* mask for zk? */ true); } + batch.commit_and_send_to_verifier(transcript); } /** diff --git a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp index 5fe706d527f2..c5049fa5caa7 100644 --- a/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp +++ b/barretenberg/cpp/src/barretenberg/env/hardware_concurrency.cpp @@ -9,27 +9,42 @@ #include #endif -extern "C" { - -#ifdef NO_MULTITHREADING -uint32_t env_hardware_concurrency() +static uint32_t& _get_num_cores() { - return 1; +#ifdef NO_MULTITHREADING + static uint32_t cores = 1; +#else + static const char* val = std::getenv("HARDWARE_CONCURRENCY"); + static uint32_t cores = + val != nullptr ? static_cast(std::stoul(val)) : std::thread::hardware_concurrency(); +#endif + return cores; } + +namespace bb { +// only for testing purposes currently +void set_hardware_concurrency([[maybe_unused]] size_t num_cores) +{ +#ifdef NO_MULTITHREADING + throw_or_abort("Cannot set hardware concurrency when multithreading is disabled."); #else + _get_num_cores() = static_cast(num_cores); +#endif +} +} // namespace bb + +extern "C" { + uint32_t env_hardware_concurrency() { #ifndef __wasm__ try { #endif - static auto val = std::getenv("HARDWARE_CONCURRENCY"); - static const uint32_t cores = val ? (uint32_t)std::stoul(val) : std::thread::hardware_concurrency(); - return cores; + return _get_num_cores(); #ifndef __wasm__ } catch (std::exception const&) { throw std::runtime_error("HARDWARE_CONCURRENCY invalid."); } #endif } -#endif -} \ No newline at end of file +} diff --git a/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp b/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp index 87c425dcf699..ce42f4bcc195 100644 --- a/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/translator_vm/translator_prover.cpp @@ -74,19 +74,21 @@ void TranslatorProver::commit_to_witness_polynomial(Polynomial& polynomial, cons */ void TranslatorProver::execute_wire_and_sorted_constraints_commitments_round() { - + BB_BENCH_NAME("TranslatorProver::execute_wire_and_sorted_constraints_commitments_round"); + auto batch = key->proving_key->commitment_key.start_batch(); for (const auto& [wire, label] : zip_view(key->proving_key->polynomials.get_wires(), commitment_labels.get_wires())) { - commit_to_witness_polynomial(wire, label); + batch.add_to_batch(wire, label, /*mask for zk?*/ false); } // The ordered range constraints are of full circuit size. for (const auto& [ordered_range_constraint, label] : zip_view(key->proving_key->polynomials.get_ordered_range_constraints(), commitment_labels.get_ordered_range_constraints())) { - commit_to_witness_polynomial(ordered_range_constraint, label); + batch.add_to_batch(ordered_range_constraint, label, /*mask for zk?*/ false); } + batch.commit_and_send_to_verifier(transcript); } /** diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/mega_honk.test.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/mega_honk.test.cpp index 2bb44fcfb710..2d091752567c 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/mega_honk.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/mega_honk.test.cpp @@ -170,8 +170,6 @@ TYPED_TEST(MegaHonkTests, BasicStructured) // In MegaZKFlavor, we mask witness polynomials by placing random values at the indices `dyadic_circuit_size`-i for // i=1,2,3. This mechanism does not work with structured polynomials yet. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/1240) Structured Polynomials in - // ECCVM/Translator/MegaZK if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping 'BasicStructured' test for MegaZKFlavor."; } @@ -209,8 +207,6 @@ TYPED_TEST(MegaHonkTests, DynamicVirtualSizeIncrease) // In MegaZKFlavor, we mask witness polynomials by placing random values at the indices `dyadic_circuit_size`-i for // i=1,2,3. This mechanism does not work with structured polynomials yet. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/1240) Structured Polynomials in - // ECCVM/Translator/MegaZK if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping 'DynamicVirtualSizeIncrease' test for MegaZKFlavor."; } @@ -474,8 +470,6 @@ TYPED_TEST(MegaHonkTests, PolySwap) using Flavor = TypeParam; // In MegaZKFlavor, we mask witness polynomials by placing random values at the indices `dyadic_circuit_size`-i, for // i=1,2,3. This mechanism does not work with structured polynomials yet. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/1240) Structured Polynomials in - // ECCVM/Translator/MegaZK if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping 'PolySwap' test for MegaZKFlavor."; } diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index a4343cdd6fe9..c7bf1b0257f1 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -81,26 +81,27 @@ template void OinkProver::execute_wire_commit BB_BENCH_NAME("OinkProver::execute_wire_commitments_round"); // Commit to the first three wire polynomials // We only commit to the fourth wire polynomial after adding memory recordss - { - auto commit_type = (prover_instance->get_is_structured()) ? CommitmentKey::CommitType::Structured - : CommitmentKey::CommitType::Default; + auto batch = prover_instance->commitment_key.start_batch(); + // Commit to the first three wire polynomials + // We only commit to the fourth wire polynomial after adding memory records - commit_to_witness_polynomial(prover_instance->polynomials.w_l, commitment_labels.w_l, commit_type); - commit_to_witness_polynomial(prover_instance->polynomials.w_r, commitment_labels.w_r, commit_type); - commit_to_witness_polynomial(prover_instance->polynomials.w_o, commitment_labels.w_o, commit_type); - } + batch.add_to_batch(prover_instance->polynomials.w_l, commitment_labels.w_l, /*mask?*/ Flavor::HasZK); + batch.add_to_batch(prover_instance->polynomials.w_r, commitment_labels.w_r, /*mask?*/ Flavor::HasZK); + batch.add_to_batch(prover_instance->polynomials.w_o, commitment_labels.w_o, /*mask?*/ Flavor::HasZK); if constexpr (IsMegaFlavor) { // Commit to Goblin ECC op wires. - // To avoid possible issues with the current work on the merge protocol, they are not - // masked in MegaZKFlavor + // Note even with zk, we do not mask here. The masking for these is done differently. + // It is necessary that "random" ops are added to the op_queue, which is then used to populate these ecc op + // wires. This is more holistic and obviates the need to extend with random values. + bool mask_ecc_op_polys = false; // Flavor::HasZK + for (auto [polynomial, label] : zip_view(prover_instance->polynomials.get_ecc_op_wires(), commitment_labels.get_ecc_op_wires())) { { BB_BENCH_NAME("COMMIT::ecc_op_wires"); - transcript->send_to_verifier(domain_separator + label, - prover_instance->commitment_key.commit(polynomial)); + batch.add_to_batch(polynomial, domain_separator + label, mask_ecc_op_polys); }; } @@ -109,10 +110,11 @@ template void OinkProver::execute_wire_commit zip_view(prover_instance->polynomials.get_databus_entities(), commitment_labels.get_databus_entities())) { { BB_BENCH_NAME("COMMIT::databus"); - commit_to_witness_polynomial(polynomial, label); + batch.add_to_batch(polynomial, label, /*mask?*/ Flavor::HasZK); } } } + batch.commit_and_send_to_verifier(transcript); } /** @@ -137,23 +139,14 @@ template void OinkProver::execute_sorted_list eta_three); // Commit to lookup argument polynomials and the finalized (i.e. with memory records) fourth wire polynomial - { - BB_BENCH_NAME("COMMIT::lookup_counts_tags"); - commit_to_witness_polynomial(prover_instance->polynomials.lookup_read_counts, - commitment_labels.lookup_read_counts, - CommitmentKey::CommitType::Sparse); - - commit_to_witness_polynomial(prover_instance->polynomials.lookup_read_tags, - commitment_labels.lookup_read_tags, - CommitmentKey::CommitType::Sparse); - } - { - BB_BENCH_NAME("COMMIT::wires"); - auto commit_type = (prover_instance->get_is_structured()) ? CommitmentKey::CommitType::Structured - : CommitmentKey::CommitType::Default; - commit_to_witness_polynomial( - prover_instance->polynomials.w_4, domain_separator + commitment_labels.w_4, commit_type); - } + auto batch = prover_instance->commitment_key.start_batch(); + batch.add_to_batch( + prover_instance->polynomials.lookup_read_counts, commitment_labels.lookup_read_counts, /*mask?*/ Flavor::HasZK); + batch.add_to_batch( + prover_instance->polynomials.lookup_read_tags, commitment_labels.lookup_read_tags, /*mask?*/ Flavor::HasZK); + batch.add_to_batch( + prover_instance->polynomials.w_4, domain_separator + commitment_labels.w_4, /*mask?*/ Flavor::HasZK); + batch.commit_and_send_to_verifier(transcript); } /** @@ -171,23 +164,19 @@ template void OinkProver::execute_log_derivat WitnessComputation::compute_logderivative_inverses( prover_instance->polynomials, prover_instance->dyadic_size(), prover_instance->relation_parameters); - { - BB_BENCH_NAME("COMMIT::lookup_inverses"); - commit_to_witness_polynomial(prover_instance->polynomials.lookup_inverses, - commitment_labels.lookup_inverses, - CommitmentKey::CommitType::Sparse); - } + auto batch = prover_instance->commitment_key.start_batch(); + batch.add_to_batch(prover_instance->polynomials.lookup_inverses, + commitment_labels.lookup_inverses, + /*mask?*/ Flavor::HasZK); // If Mega, commit to the databus inverse polynomials and send if constexpr (IsMegaFlavor) { for (auto [polynomial, label] : zip_view(prover_instance->polynomials.get_databus_inverses(), commitment_labels.get_databus_inverses())) { - { - BB_BENCH_NAME("COMMIT::databus_inverses"); - commit_to_witness_polynomial(polynomial, label, CommitmentKey::CommitType::Sparse); - } + batch.add_to_batch(polynomial, label, /*mask?*/ Flavor::HasZK); }; } + batch.commit_and_send_to_verifier(transcript); } /** diff --git a/barretenberg/cpp/src/barretenberg/vm2/constraining/prover.cpp b/barretenberg/cpp/src/barretenberg/vm2/constraining/prover.cpp index cbacafaeda5e..fb2681dc6031 100644 --- a/barretenberg/cpp/src/barretenberg/vm2/constraining/prover.cpp +++ b/barretenberg/cpp/src/barretenberg/vm2/constraining/prover.cpp @@ -12,9 +12,16 @@ #include "barretenberg/vm2/common/constants.hpp" #include "barretenberg/vm2/constraining/polynomials.hpp" #include "barretenberg/vm2/tooling/stats.hpp" +#include namespace bb::avm2 { +// TODO(AD): @facundo - tune this value +// The number of polynomials to compute MSMs for at once. it could be computed heuristically based on a max memory size +// (maybe that would be the env var?) +const size_t AVM_MAX_MSM_BATCH_SIZE = + getenv("AVM_MAX_MSM_BATCH_SIZE") != nullptr ? std::stoul(getenv("AVM_MAX_MSM_BATCH_SIZE")) : 4; + using Flavor = AvmFlavor; using FF = Flavor::FF; @@ -72,14 +79,16 @@ void AvmProver::execute_public_inputs_round() */ void AvmProver::execute_wire_commitments_round() { + BB_BENCH_NAME("AvmProver::execute_wire_commitments_round"); // Commit to all polynomials (apart from logderivative inverse polynomials, which are committed to in the later // logderivative phase) auto wire_polys = prover_polynomials.get_wires(); const auto& labels = prover_polynomials.get_wires_labels(); + auto batch = commitment_key.start_batch(); for (size_t idx = 0; idx < wire_polys.size(); ++idx) { - auto comm = commitment_key.commit(wire_polys[idx]); - transcript->send_to_verifier(labels[idx], comm); + batch.add_to_batch(wire_polys[idx], labels[idx], /*mask for zk?*/ false); } + batch.commit_and_send_to_verifier(transcript, AVM_MAX_MSM_BATCH_SIZE); } void AvmProver::execute_log_derivative_inverse_round() @@ -109,13 +118,16 @@ void AvmProver::execute_log_derivative_inverse_round() void AvmProver::execute_log_derivative_inverse_commitments_round() { + BB_BENCH_NAME("AvmProver::execute_log_derivative_inverse_commitments_round"); + auto batch = commitment_key.start_batch(); // Commit to all logderivative inverse polynomials and send to verifier for (auto [derived_poly, commitment, label] : zip_view(prover_polynomials.get_derived(), witness_commitments.get_derived(), prover_polynomials.get_derived_labels())) { - commitment = commitment_key.commit(derived_poly); - transcript->send_to_verifier(label, commitment); + + batch.add_to_batch(derived_poly, label, /*mask for zk?*/ false); } + batch.commit_and_send_to_verifier(transcript, AVM_MAX_MSM_BATCH_SIZE); } /** diff --git a/barretenberg/docs/scripts/build_docs.sh b/barretenberg/docs/scripts/build_docs.sh index 5b90d0657f5e..d67f5433282d 100755 --- a/barretenberg/docs/scripts/build_docs.sh +++ b/barretenberg/docs/scripts/build_docs.sh @@ -20,7 +20,7 @@ mkdir -p ../docs/static/api/ cp -R docs/build/* ../docs/static/api/ # NOTE(AD): hack - but was blocked and couldn't figure out why we had two examples for something called 'if' with different casing. -rm ../docs/static/api/if-example.html +rm -f ../docs/static/api/if-example.html echo "Doxygen documentation successfully built and copied to Docusaurus!" echo "You can now build and serve the Docusaurus site to view the integrated documentation." diff --git a/ci3/aws_request_instance_type b/ci3/aws_request_instance_type index 61cb0857f2f5..0429ff912e25 100755 --- a/ci3/aws_request_instance_type +++ b/ci3/aws_request_instance_type @@ -36,7 +36,7 @@ launch_spec=$(cat <