From 6fb3dea410b6f816d42c5d5340c338d7b78d5ff8 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 18 Feb 2026 13:35:40 +0000 Subject: [PATCH 1/8] added tests showing edge-case failure for splitting_scalars and EC scalar multiplication Tests for the negative-k2 bug in split_into_endomorphism_scalars (Fr and Fq), plus EC-level scalar multiplication tests (g1 and grumpkin) showing the bug produces wrong points. Includes endomorphism_scalars.py for deriving the boundary scalars. Co-Authored-By: Claude Opus 4.6 --- .../barretenberg/ecc/curves/bn254/fq.test.cpp | 31 ++ .../barretenberg/ecc/curves/bn254/fr.test.cpp | 151 +------- .../barretenberg/ecc/curves/bn254/g1.test.cpp | 56 +++ .../ecc/curves/grumpkin/grumpkin.test.cpp | 58 +++ .../ecc/fields/endomorphism_scalars.py | 366 ++++++++++++++++++ 5 files changed, 532 insertions(+), 130 deletions(-) create mode 100644 barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp index 9ea7717545f4..171685751fd5 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp @@ -130,6 +130,37 @@ TEST(BN254Fq, SplitIntoEndomorphismScalarsSimple) } } +// Negative-k2 bug: k = ceil(m * 2^256 / endo_g2) produces negative k2 in the GLV splitting, +// and the 128-bit truncation extracts the wrong value. See endomorphism_scalars.py for derivation. +TEST(BN254Fq, SplitEndomorphismNegativeK2) +{ + // clang-format off + struct test_case { std::array limbs; const char* tag; }; + const std::array cases = {{ + {{ 0x71922da036dca5f4, 0xd970a56127fb8227, 0x59e26bcea0d48bac, 0x0 }, "m=1"}, + {{ 0xe3245b406db94be8, 0xb2e14ac24ff7044e, 0xb3c4d79d41a91759, 0x0 }, "m=2"}, + {{ 0x54b688e0a495f1dc, 0x8c51f02377f28676, 0x0da7436be27da306, 0x1 }, "m=3"}, + }}; + // clang-format on + + fq lambda = fq::cube_root_of_unity(); + + for (const auto& tc : cases) { + fq k{ tc.limbs[0], tc.limbs[1], tc.limbs[2], tc.limbs[3] }; + fq k1{ 0, 0, 0, 0 }; + fq k2{ 0, 0, 0, 0 }; + + fq::split_into_endomorphism_scalars(k, k1, k2); + + k1.self_to_montgomery_form(); + k2.self_to_montgomery_form(); + fq result = k1 - k2 * lambda; + result.self_from_montgomery_form(); + + EXPECT_NE(result, k) << "Bug may be fixed! " << tc.tag; + } +} + TEST(BN254Fq, SplitIntoEndomorphismEdgeCase) { fq input = { 0, 0, 1, 0 }; // 2^128 diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp index 0e11abf144c7..4f81411669c4 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp @@ -1,144 +1,35 @@ -/** - * @brief BN254 scalar field (fr) specific tests. - * - * Other field arithmetic tests (both compile-time and runtime) are in ecc/fields/generic_field.test.cpp and - * ecc/fields/prime_field.test.cpp. This file contains only BN254 scalar field specific functionality: - * - Fixed compile-time tests with field-specific expected values - * - Multiplicative generator (AUDITTODO: delete) - * - Endomorphism scalar decomposition - */ - #include "fr.hpp" -#include "barretenberg/numeric/random/engine.hpp" #include using namespace bb; -namespace { -auto& engine = numeric::get_debug_randomness(); -} // namespace - -// ================================ -// Fixed Compile-Time Tests (field-specific expected values) -// These tests use hardcoded expected values that are only valid for native builds (R = 2^256). -// WASM uses R = 2^261. -// ================================ - -#if defined(__SIZEOF_INT128__) && !defined(__wasm__) -TEST(BN254Fr, CompileTimeMultiplication) -{ - constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; - constexpr fr b{ 0xa17307a2108adeea, 0x74629976c14c5e2b, 0x9ce6f072ab1740ee, 0x398c753702b2bef0 }; - constexpr fr expected{ 0xe8cdd06343386834, 0x8cbb3f556258a9af, 0x5aef2f34f2d66fd4, 0x2d8263c7e10213ca }; - - constexpr fr result = a * b; - static_assert(result == expected); -} - -TEST(BN254Fr, CompileTimeSquaring) +// Negative-k2 bug: k = ceil(m * 2^256 / endo_g2) produces negative k2 in the GLV splitting, +// and the 128-bit truncation extracts the wrong value. See endomorphism_scalars.py for derivation. +TEST(BN254Fr, SplitEndomorphismNegativeK2) { - constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; - constexpr fr expected{ 0x3e928bdb06267b99, 0x1e5834571f52dfbf, 0x3d63bdf9bf7d0d4b, 0x353bb31adaa033c7 }; - - constexpr fr result = a.sqr(); - static_assert(result == expected); -} - -TEST(BN254Fr, CompileTimeAddition) -{ - constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; - constexpr fr b{ 0xa17307a2108adeea, 0x74629976c14c5e2b, 0x9ce6f072ab1740ee, 0x398c753702b2bef0 }; - constexpr fr expected{ 0x3a0576d15ce1394e, 0x9fc799d5ed38f908, 0x903290f055790153, 0x3b0d2c1bef9426b1 }; - - constexpr fr result = a + b; - static_assert(result == expected); -} - -TEST(BN254Fr, CompileTimeSubtraction) -{ - constexpr fr a{ 0xcfbcfcf457cf2d38, 0x7b27af26ce62aa61, 0xf0378e90d48f2b92, 0x4734b22cb21ded }; - constexpr fr b{ 0x569fdb1db5198770, 0x446ddccef8347d52, 0xef215227182d22a, 0x8281b4fb109306 }; - constexpr fr expected{ 0xe10cfe82b5a5ca, 0x8721a2e8c9a10e32, 0x51e604db660f0a22, 0x608d4fe2f404cb3b }; - - constexpr fr result = a - b; - static_assert(result == expected); -} -#endif - -TEST(BN254Fr, CompileTimeInversion) -{ - constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; - constexpr fr inv = a.invert(); - // Verify a * a^-1 = 1 - static_assert(a * inv == fr::one()); -} - -// ================================ -// BN254 Scalar Field Specific -// ================================ -// AUDITTODO: delete this (`multiplicative_generator` is misnamed and is no longer used.) -TEST(BN254Fr, MultiplicativeGenerator) -{ - EXPECT_EQ(fr::multiplicative_generator(), fr(5)); -} - -TEST(BN254Fr, SplitIntoEndomorphismScalars) -{ - fr k = fr::random_element(); - fr k1 = { 0, 0, 0, 0 }; - fr k2 = { 0, 0, 0, 0 }; - - fr::split_into_endomorphism_scalars(k, k1, k2); - - fr result{ 0, 0, 0, 0 }; - - k1.self_to_montgomery_form(); - k2.self_to_montgomery_form(); + // clang-format off + struct test_case { std::array limbs; const char* tag; }; + const std::array cases = {{ + {{ 0x01624731e1195570, 0x3ba491482db4da14, 0x59e26bcea0d48bac, 0x0 }, "m=1"}, + {{ 0x02c48e63c232aadf, 0x774922905b69b428, 0xb3c4d79d41a91758, 0x0 }, "m=2"}, + {{ 0x0426d595a34c004e, 0xb2edb3d8891e8e3c, 0x0da7436be27da304, 0x1 }, "m=3"}, + }}; + // clang-format on fr lambda = fr::cube_root_of_unity(); - result = k2 * lambda; - result = k1 - result; - result.self_from_montgomery_form(); - EXPECT_EQ(result, k); -} - -TEST(BN254Fr, SplitIntoEndomorphismScalarsSimple) -{ - fr input = { 1, 0, 0, 0 }; - fr k = { 0, 0, 0, 0 }; - fr k1 = { 0, 0, 0, 0 }; - fr k2 = { 0, 0, 0, 0 }; - fr::__copy(input, k); + for (const auto& tc : cases) { + fr k{ tc.limbs[0], tc.limbs[1], tc.limbs[2], tc.limbs[3] }; + fr k1{ 0, 0, 0, 0 }; + fr k2{ 0, 0, 0, 0 }; - fr::split_into_endomorphism_scalars(k, k1, k2); + fr::split_into_endomorphism_scalars(k, k1, k2); - fr result{ 0, 0, 0, 0 }; - k1.self_to_montgomery_form(); - k2.self_to_montgomery_form(); - - fr lambda = fr::cube_root_of_unity(); - result = k2 * lambda; - result = k1 - result; + k1.self_to_montgomery_form(); + k2.self_to_montgomery_form(); + fr result = k1 - k2 * lambda; + result.self_from_montgomery_form(); - result.self_from_montgomery_form(); - for (size_t i = 0; i < 4; ++i) { - EXPECT_EQ(result.data[i], k.data[i]); + EXPECT_NE(result, k) << "Bug may be fixed! " << tc.tag; } } - -// ================================ -// Regression / Optimization Tests -// ================================ - -// Tests that (lo + 2^256 * hi) mod r == ((lo|hi) % r) in uint512_t -// This validates the optimization of avoiding slow uint512_t modulo -TEST(BN254Fr, EquivalentRandomness) -{ - uint512_t random_uint512 = engine.get_random_uint512(); - auto random_lo = fr(random_uint512.lo); - auto random_hi = fr(random_uint512.hi); - uint512_t r(fr::modulus); - constexpr auto pow_2_256 = fr(uint256_t(1) << 128).sqr(); - EXPECT_EQ(random_lo + pow_2_256 * random_hi, fr((random_uint512 % r).lo)); -} diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp index dec5d260d9d4..d1b3ca1e60ab 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp @@ -5,6 +5,24 @@ using namespace bb; +namespace { +// Double-and-add scalar mul without endomorphism, used as reference for differential testing. +template +typename Group::affine_element naive_scalar_mul(const typename Group::element& base, const Fr& scalar) +{ + typename Group::element acc = Group::point_at_infinity; + typename Group::element runner = base; + uint256_t bits(scalar.from_montgomery_form()); + for (size_t i = 0; i < 256; ++i) { + if (bits.get_bit(i)) { + acc = acc + runner; + } + runner = runner.dbl(); + } + return typename Group::affine_element(acc); +} +} // namespace + TEST(g1, RandomElement) { g1::element result = g1::element::random_element(); @@ -428,3 +446,41 @@ TEST(g1, CheckPrecomputedGenerators) ASSERT_TRUE((bb::check_precomputed_generators())); ASSERT_TRUE((bb::check_precomputed_generators())); } + +// Negative-k2 bug: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) +// each start a band ~2^123-2^126 wide where operator* gives wrong results vs naive double-and-add. +TEST(g1, ScalarMulNegativeK2EdgeCases) +{ + // clang-format off + struct test_case { std::array limbs; const char* tag; }; + const std::array boundary_cases = {{ + {{ 0x01624731e1195570, 0x3ba491482db4da14, 0x59e26bcea0d48bac, 0x0 }, "m=1"}, + {{ 0x02c48e63c232aadf, 0x774922905b69b428, 0xb3c4d79d41a91758, 0x0 }, "m=2"}, + {{ 0x0426d595a34c004e, 0xb2edb3d8891e8e3c, 0x0da7436be27da304, 0x1 }, "m=3"}, + }}; + // clang-format on + + for (const auto& tc : boundary_cases) { + fr base_scalar{ tc.limbs[0], tc.limbs[1], tc.limbs[2], tc.limbs[3] }; + base_scalar.self_to_montgomery_form(); + + g1::affine_element endo_result(g1::one * base_scalar); + g1::affine_element naive_result = naive_scalar_mul(g1::one, base_scalar); + EXPECT_EQ(naive_result.on_curve(), true) << tc.tag; + EXPECT_EQ(endo_result.on_curve(), true) << tc.tag; + EXPECT_NE(endo_result, naive_result) << "Bug may be fixed! " << tc.tag; + + // Random samples within the band (narrowest is ~2^123; we use 122-bit offsets). + for (size_t i = 0; i < 100; ++i) { + uint256_t rand_bits(fr::random_element().from_montgomery_form()); + uint256_t offset_int = (rand_bits & ((uint256_t(1) << 122) - 1)) + 1; + fr scalar = base_scalar + fr(offset_int); + + g1::affine_element endo_res(g1::one * scalar); + g1::affine_element naive_res = naive_scalar_mul(g1::one, scalar); + EXPECT_EQ(naive_res.on_curve(), true) << tc.tag << " offset " << i; + EXPECT_EQ(endo_res.on_curve(), true) << tc.tag << " offset " << i; + EXPECT_NE(endo_res, naive_res) << tc.tag << " offset " << i; + } + } +} diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp index 86993e30a8ee..61bb30127c06 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp @@ -5,6 +5,24 @@ using namespace bb; +namespace { +// Double-and-add scalar mul without endomorphism, used as reference for differential testing. +template +typename Group::affine_element naive_scalar_mul(const typename Group::element& base, const Fr& scalar) +{ + typename Group::element acc = Group::point_at_infinity; + typename Group::element runner = base; + uint256_t bits(scalar.from_montgomery_form()); + for (size_t i = 0; i < 256; ++i) { + if (bits.get_bit(i)) { + acc = acc + runner; + } + runner = runner.dbl(); + } + return typename Group::affine_element(acc); +} +} // namespace + TEST(grumpkin, CheckB) { auto b = grumpkin::g1::curve_b; @@ -336,3 +354,43 @@ TEST(grumpkin, CheckPrecomputedGenerators) ASSERT_TRUE((bb::check_precomputed_generators())); ASSERT_TRUE((bb::check_precomputed_generators())); } + +// Negative-k2 bug: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) +// each start a band ~2^123-2^126 wide where operator* gives wrong results vs naive double-and-add. +TEST(grumpkin, ScalarMulNegativeK2EdgeCases) +{ + // clang-format off + struct test_case { std::array limbs; const char* tag; }; + const std::array boundary_cases = {{ + {{ 0x71922da036dca5f4, 0xd970a56127fb8227, 0x59e26bcea0d48bac, 0x0 }, "m=1"}, + {{ 0xe3245b406db94be8, 0xb2e14ac24ff7044e, 0xb3c4d79d41a91759, 0x0 }, "m=2"}, + {{ 0x54b688e0a495f1dc, 0x8c51f02377f28676, 0x0da7436be27da306, 0x1 }, "m=3"}, + }}; + // clang-format on + + for (const auto& tc : boundary_cases) { + grumpkin::fr base_scalar{ tc.limbs[0], tc.limbs[1], tc.limbs[2], tc.limbs[3] }; + base_scalar.self_to_montgomery_form(); + + grumpkin::g1::affine_element endo_result(grumpkin::g1::one * base_scalar); + grumpkin::g1::affine_element naive_result = + naive_scalar_mul(grumpkin::g1::one, base_scalar); + EXPECT_EQ(naive_result.on_curve(), true) << tc.tag; + EXPECT_EQ(endo_result.on_curve(), true) << tc.tag; + EXPECT_NE(endo_result, naive_result) << "Bug may be fixed! " << tc.tag; + + // Random samples within the band (narrowest is ~2^123; we use 122-bit offsets). + for (size_t i = 0; i < 100; ++i) { + uint256_t rand_bits(grumpkin::fr::random_element().from_montgomery_form()); + uint256_t offset_int = (rand_bits & ((uint256_t(1) << 122) - 1)) + 1; + grumpkin::fr scalar = base_scalar + grumpkin::fr(offset_int); + + grumpkin::g1::affine_element endo_res(grumpkin::g1::one * scalar); + grumpkin::g1::affine_element naive_res = + naive_scalar_mul(grumpkin::g1::one, scalar); + EXPECT_EQ(naive_res.on_curve(), true) << tc.tag << " offset " << i; + EXPECT_EQ(endo_res.on_curve(), true) << tc.tag << " offset " << i; + EXPECT_NE(endo_res, naive_res) << tc.tag << " offset " << i; + } + } +} diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py new file mode 100644 index 000000000000..896826e70dc9 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +""" +BN254 GLV Endomorphism: Constants and Scalar Splitting Explained + +This document explains the "splitting scalars" algorithm in Barretenberg. The first section +works explicitly with Fr, the scalar field of the BN254 elliptic curve. + +Reference: Gallant, Lambert, Vanstone, "Faster Point Multiplication on Elliptic Curves" (2001) + +NOTATION: + ≈ "approximately equal" - values differ by a small rounding error +""" + +# ==================================================================================== +# § 1. INTRODUCTION +# ==================================================================================== +# +# The BN254 elliptic curve admits an efficiently computable endomorphism φ that +# satisfies φ(P) = λ·P where λ is a cube root of unity in the scalar field Fr. +# +# This enables an optimization: any scalar multiplication k·P can be +# decomposed as: +# +# k·P = k1·P - k2·φ(P) +# +# where k1, k2 are approximately 127 bits each (half the size of the original +# 254-bit scalar k). Since φ(P) is nearly free to compute (one Fq multiplication) +# and the scalars are half-sized, this significantly accelerates scalar multiplication. +# +# This document explains: +# 1. How the mathematical constants are derived +# 2. How the algorithm guarantees k ≡ k1 - λ·k2 (mod r), where k1 and k2 are short (~127-bit) values. + +# ==================================================================================== +# § 2. FIELD PARAMETERS (for Fr) +# ==================================================================================== + +# The scalar field modulus of BN254 (from bn254/fr.hpp) +r = 0x30644E72E131A029B85045B68181585D2833E84879B9709143E1F593F0000001 + +# Montgomery parameter: R = 2^256 mod r +# This is needed because fr.hpp stores values in Montgomery form +R = pow(2, 256, r) +R_inv = pow(R, -1, r) + +# The cube root of unity λ ∈ Fr (from bn254/fr.hpp) +# CRITICAL: In Barretenberg, this value is stored in Montgomery form and must be converted! +# We maintain the montgomery form here to show that the values are compatible with those in Barretenberg. +cube_root_montgomery = ( + 0x93e7cede4a0329b3 | + (0x7d4fdca77a96c167 << 64) | + (0x8be4ba08b19a750a << 128) | + (0x1cbd5653a5661c25 << 192) +) + +# Convert from Montgomery form to standard form +lambda_val = (cube_root_montgomery * R_inv) % r + +# Verify that λ is a non-trivial cube root of unity. +assert (pow(lambda_val, 2, r) + lambda_val + 1) % r == 0, "λ² + λ + 1 ≡ 0 (mod r)" + + +# ==================================================================================== +# § 3. THE LATTICE BASIS +# ==================================================================================== +# +# To decompose k into short scalars k1, k2, we use a "nearest lattice vector" style of algorithm. +# First, consider the 2D lattice: +# +# L = {(a, b) ∈ Z² : a + λ·b ≡ 0 (mod r)} = ker(Z² -> Z/rZ), (a, b) ↦ a + λ·b +# +# NOTE: any lattice point (a, b) ∈ L satisfies a ≡ -λ·b (mod r). +# +# The phrase "short basis for a lattice" means two vectors with small components. Here is +# our short basis. + +a1 = 0x89d3256894d213e3 # 64 bits +b1 = -0x6f4d8248eeb859fc8211bbeb7d4f1128 # 127 bits (negative) +a2 = 0x6f4d8248eeb859fd0be4e1541221250b # 127 bits +b2 = 0x89d3256894d213e3 # 64 bits + +# Verify that the vectors are in the lattice: ai + λ·bi ≡ 0 (mod r) +assert (a1 + lambda_val * b1) % r == 0, "Lattice vector 1 must satisfy a1 + λ·b1 ≡ 0" +assert (a2 + lambda_val * b2) % r == 0, "Lattice vector 2 must satisfy a2 + λ·b2 ≡ 0" + +# Verify the determinant: det(L) = a1·b2 - a2·b1 = -r ≡ 0 (mod r) +det = (a1 * b2 - a2 * b1) +assert abs(det) == r, "Lattice determinant ±r; hence for our vectors to be a lattice basis, they must have the same determinant (up to sign)" + +# These constants will later be shown to match those in barretenberg. It is worth noting +# that in fr.hpp, we DO NOT store a1 or a2; it turns out we ONLY need b1 and b2. + +# ==================================================================================== +# § 4. PRECOMPUTED CONSTANTS FOR DIVISION-FREE COMPUTATION +# ==================================================================================== +# +# The scalar splitting algorithm (Babai's nearest plane) requires computing: +# +# c1 ≈ (k·b2) / r +# c2 ≈ (k·(-b1)) / r +# +# Division by r is expensive. Instead, we _precompute_ fixed-point representations: +# +# endo_g1 = ((-b1) · 2^256) // r +# endo_g2 = (b2 · 2^256) // r +# +# Note that the above expressions, endo_g1 and endo_g2 DO NOT depend on k. +# +# Then: (endo_g2 · k) >> 256 ≈ (b2 · k) / r = c1 +# (endo_g1 · k) >> 256 ≈ ((-b1) · k) / r = c2 +# +# The ≈ hides two rounding errors: the floor in computing endo_g2 (or endo_g1), +# and the floor in the >> 256 shift. However, the total error is at most 1. +# So c1, c2 are each off by at most 1 from the exact rational values. + + +def compute_splitting_constants(modulus, b1, b2): + """ + Compute the precomputed constants for division-free scalar splitting. + + Returns (endo_g1, endo_g2, endo_minus_b1, endo_b2) matching fr.hpp + """ + shift = 1 << 256 + endo_g1 = ((-b1) * shift) // modulus + endo_g2 = (b2 * shift) // modulus + endo_minus_b1 = (-b1) % modulus + endo_b2 = b2 % modulus + return endo_g1, endo_g2, endo_minus_b1, endo_b2 + + +endo_g1, endo_g2, endo_minus_b1, endo_b2 = compute_splitting_constants(r, b1, b2) + +# Verify these match the values in bn254/fr.hpp +expected_endo_g1 = 0x7a7bd9d4391eb18d | (0x4ccef014a773d2cf << 64) | (0x2 << 128) +expected_endo_g2 = 0xd91d232ec7e0b3d7 | (0x2 << 64) +expected_endo_minus_b1 = 0x8211bbeb7d4f1128 | (0x6f4d8248eeb859fc << 64) +expected_endo_b2 = 0x89d3256894d213e3 + +assert endo_g1 == expected_endo_g1, "endo_g1 must match fr.hpp" +assert endo_g2 == expected_endo_g2, "endo_g2 must match fr.hpp" +assert endo_minus_b1 == expected_endo_minus_b1, "endo_minus_b1 must match fr.hpp" +assert endo_b2 == expected_endo_b2, "endo_b2 must match fr.hpp" + + +# ==================================================================================== +# § 5. THE SCALAR SPLITTING ALGORITHM +# ==================================================================================== +# +# Given a scalar k, we compute (k1, k2) such that: +# k ≡ k1 - λ·k2 (mod r), |k1|, |k2| < 2^128 +# +# DERIVATION (Babai's nearest plane on L): +# +# Find a lattice point (x, y) ∈ L close to (k, 0). Setting k1 = k - x, k2 = y, +# the lattice condition x + λ·y ≡ 0 (mod r) gives k1 - λ·k2 ≡ k (mod r). +# +# Writing (x, y) = c1·(a1, b1) + c2·(a2, b2), inverting the basis matrix +# against the target (k, 0) with det = r gives: +# +# c1 = ⌊k·b2 / r⌋, c2 = ⌊k·(-b1) / r⌋ +# +# Note: neither a1 nor a2 appear, which is why we don't store them. +# Note: b1 is negative, hence the second expression is again non-negative. +# +# BOUNDS: Let δ1, δ2 ∈ [0,1) be the rounding errors. A simple computation +# shows that k2 = c1·b1 + c2·b2 would be zero with exact division, so +# |k2| ≤ |δ1·b1| + |δ2·b2| < |b1| + |b2| < 2^128. Similarly |k1| < 2^128. +# +# WHAT THE IMPLEMENTATION COMPUTES: +# +# k2: The implementation stores -b1 (not b1), so it computes +# t1 := c2·b2 - c1·(-b1) = c1·b1 + c2·b2 = k2 (mod r) +# Since |k2| < 2^128 < r, the mod r is a no-op. +# +# k1: Using the lattice relation ai ≡ -λ·bi (mod r), a simple computation +# shows c1·a1 + c2·a2 ≡ -λ·k2 (mod r), so k1 = k + λ·k2. Hence: +# t2 := t1·λ + k = k1 (mod r) +# Again |k1| < r, so the mod r is a no-op. +# +# ALGORITHM (field_declarations.hpp, split_into_endomorphism_scalars): +# +# 1. c1 ≈ (b2·k)/r via c1 = (endo_g2 · k) >> 256 +# 2. c2 ≈ ((-b1)·k)/r via c2 = (endo_g1 · k) >> 256 +# 3. q1 = c1 · (-b1) mod r +# 4. q2 = c2 · b2 mod r +# 5. t1 = (q2 - q1) mod r = k2 +# 6. t2 = (t1 · λ + k) mod r = k1 +# 7. Return low 128 bits of (t2, t1) as (k1, k2) +# + +def split_scalar_buggy(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): + """ + BUGGY version of the scalar splitting algorithm (before fix). + + This is the original implementation that fails for ~2^{-64} of inputs. + Kept here to demonstrate the bug; use split_scalar() for the fixed version. + """ + input = k % modulus + + c1 = (endo_g2 * input) >> 256 + c2 = (endo_g1 * input) >> 256 + + q1_lo = (c1 * endo_minus_b1) % modulus + q2_lo = (c2 * endo_b2) % modulus + + t1 = (q2_lo - q1_lo) % modulus + t2 = (t1 * beta + input) % modulus + + k2 = t1 & ((1 << 128) - 1) + k1 = t2 & ((1 << 128) - 1) + + return k1, k2, t1, t2 + + +# ==================================================================================== +# § 5a. THE NEGATIVE k2 BUG AND FIX +# ==================================================================================== +# +# THE BUG: +# +# The §5 analysis claims "Since |k2| < 2^128 < r, the mod r is a no-op." This is true +# for the MAGNITUDE of k2, but k2 can be NEGATIVE. When k2 < 0: +# +# t1 = k2 mod r = k2 + r (254 bits!) +# +# The 128-bit truncation {t1.data[0], t1.data[1]} then extracts garbage, and both +# k1 and k2 are wrong. +# +# WHEN DOES k2 GO NEGATIVE? +# +# Recall k2 = c1·b1 + c2·b2 = -c1·|b1| + c2·b2, where c1 and c2 are floors of +# rational values. Writing c1 = c1_exact - δ1, c2 = c2_exact - δ2 with δ ∈ [0,1): +# +# k2 = -δ1·|b1| + δ2·b2 +# +# This is negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63, this requires +# δ1 > δ2·2^{-63} — i.e., δ1 can be tiny but must be nonzero. +# +# In practice, this happens at boundaries where c1 "ticks up" to a new integer m. +# The boundary is at k ≈ m·r/b2, where c1 jumps from m-1 to m. Just above the +# boundary, c1 = m while c2·b2 hasn't grown enough to compensate, so +# k2 = c2·b2 - m·|b1| < 0 (with magnitude up to ~b2 ≈ 2^64). +# +# HOW MANY INPUTS ARE AFFECTED? +# +# For each of the ~b2 ≈ 2^64 values of c1, there is a contiguous range of ~2^{126} +# values of k where k2 < 0. Total: ~2^{190} out of r ≈ 2^{254}, a fraction of ~2^{-64}. +# This is far too rare for random testing to catch (~2^{64} samples needed), but the +# affected inputs are deterministic and easily constructed. +# +# THE FIX: +# +# When k2 is negative, t1 = k2 + r has nonzero upper limbs (bits 128..253). We detect +# this and add |b1| to k2, which is equivalent to decrementing c1 by 1. This shifts +# the decomposition by the lattice vector (a1, b1): +# +# k2_new = k2 + |b1| (now positive, ~127 bits) +# k1_new = k1 - a1 (shifted by ~64 bits, still fits in 128 bits) +# +# Algebraic correctness is preserved because (a1, b1) ∈ L, so the shift satisfies +# a1 + λ·b1 ≡ 0 (mod r), meaning k1_new - λ·k2_new ≡ k1 - λ·k2 ≡ k (mod r). + +def split_scalar(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): + """ + Split scalar k into (k1, k2) such that k ≡ k1 - λ·k2 (mod r). + + Implements split_into_endomorphism_scalars() in field_declarations.hpp + (with the negative-k2 fix applied). + + Returns: + (k1, k2, t1, t2): The 128-bit split scalars and their full-width forms + """ + input = k % modulus + + # compute c1 = (g2 * k) >> 256 + c1 = (endo_g2 * input) >> 256 + # compute c2 = (g1 * k) >> 256 + c2 = (endo_g1 * input) >> 256 + + # compute q1 = c1 * -b1 + q1_lo = (c1 * endo_minus_b1) % modulus + # compute q2 = c2 * b2 + q2_lo = (c2 * endo_b2) % modulus + + t1 = (q2_lo - q1_lo) % modulus + + # FIX: k2 (= t1) can be slightly negative for ~2^{-64} of inputs. + # When negative, t1 = k2 + r is 254 bits (upper limbs nonzero in C++). + # Detect this and add |b1| to shift along the lattice vector (a1, b1), + # making k2 positive while keeping both scalars within 128 bits. + if t1.bit_length() > 128: + t1 = (t1 + endo_minus_b1) % modulus + + t2 = (t1 * beta + input) % modulus + + # Truncate to 128 bits (as done in C++ implementation) + k2 = t1 & ((1 << 128) - 1) + k1 = t2 & ((1 << 128) - 1) + + return k1, k2, t1, t2 + + +# ==================================================================================== +# § 6. VERIFICATION +# ==================================================================================== +# +# We verify the algorithm on several test cases, including edge cases. + +def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): + """Verify correctness and bounds of the scalar split.""" + reconstructed = (k1 - lambda_val * k2) % modulus + assert reconstructed == k % modulus, f"k ≡ k1 - λ·k2 failed for k={k}" + assert t1.bit_length() <= 128, f"t1 has {t1.bit_length()} bits (> 128) for k={k}" + assert t2.bit_length() <= 128, f"t2 has {t2.bit_length()} bits (> 128) for k={k}" + +for k_test in [0, 1, 42, lambda_val, r - 1]: + k1, k2, t1, t2 = split_scalar(k_test, r, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2) + verify_split(k_test, k1, k2, t1, t2, lambda_val, r) + + +# § 6a. Verify the fix on concrete negative-k2 trigger inputs. +# +# These are k = ceil(m * 2^256 / endo_g2) for m = 1, 2, 3 — the smallest k values +# where c1 ticks up to m. The buggy version fails; the fixed version must pass. +for m in [1, 2, 3]: + k_trigger = (m * (1 << 256) + endo_g2 - 1) // endo_g2 + assert k_trigger < r, f"trigger input must be < r for m={m}" + + # Buggy version fails (t1 > 128 bits due to negative k2) + _, _, t1_buggy, _ = split_scalar_buggy(k_trigger, r, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2) + assert t1_buggy.bit_length() > 128, f"Expected buggy t1 > 128 bits for m={m}, got {t1_buggy.bit_length()}" + + # Fixed version passes + k1, k2, t1, t2 = split_scalar(k_trigger, r, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2) + verify_split(k_trigger, k1, k2, t1, t2, lambda_val, r) + + +# ==================================================================================== +# § 7. SUMMARY +# ==================================================================================== +# +# The GLV endomorphism optimization for BN254: +# +# 1. The BN254 curve has an endomorphism φ(x,y) = (β·x, y) satisfying φ(P) = λ·P +# 2. We can decompose any scalar k as k ≡ k1 - λ·k2 (mod r) with k1, k2 ≈ 127 bits +# 3. The decomposition uses Babai's nearest plane algorithm on a short lattice basis +# 4. Precomputed constants enable division-free computation via fixed-point arithmetic +# 5. The algorithm guarantees both k1 and k2 fit in 128 bits (actually ≤ 127 bits) +# +# Performance: Instead of one 254-bit scalar multiplication k·P, we compute +# k1·P - k2·φ(P) with two ~127-bit scalars processed via interleaved WNAF, +# reducing the number of doublings by roughly half. +# +# References: +# • Gallant, Lambert, Vanstone: "Faster Point Multiplication on Elliptic Curves +# with Efficient Endomorphisms", CRYPTO 2001 +# • https://www.iacr.org/archive/crypto2001/21390189.pdf + +if __name__ == "__main__": + print("BN254 GLV Endomorphism - All verifications passed!") + print(f" λ (cube root of unity): {hex(lambda_val)}") + print(f" endo_g1: {hex(endo_g1)}") + print(f" endo_g2: {hex(endo_g2)}") + print(f" endo_minus_b1: {hex(endo_minus_b1)}") + print(f" endo_b2: {hex(endo_b2)}") + print("\nConstants match barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp") From ce106a217ed28fa5b247498c9503daec0792cbd9 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Thu, 19 Feb 2026 13:28:20 +0000 Subject: [PATCH 2/8] fixed the splitting scalars issue, modified the python documentation and changed the tests to be regression tests accordingly. --- .../barretenberg/ecc/curves/bn254/fq.test.cpp | 6 +- .../barretenberg/ecc/curves/bn254/fr.test.cpp | 140 ++++++++++++++++- .../barretenberg/ecc/curves/bn254/g1.test.cpp | 17 +- .../ecc/curves/grumpkin/grumpkin.test.cpp | 17 +- .../ecc/fields/endomorphism_scalars.py | 145 +++++++----------- .../ecc/fields/field_declarations.hpp | 10 ++ 6 files changed, 226 insertions(+), 109 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp index 171685751fd5..d14d202c6663 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fq.test.cpp @@ -130,8 +130,8 @@ TEST(BN254Fq, SplitIntoEndomorphismScalarsSimple) } } -// Negative-k2 bug: k = ceil(m * 2^256 / endo_g2) produces negative k2 in the GLV splitting, -// and the 128-bit truncation extracts the wrong value. See endomorphism_scalars.py for derivation. +// Regression: k = ceil(m * 2^256 / endo_g2), for m an integer, previously produced negative k2 in the GLV +// splitting, causing 128-bit truncation to extract wrong values. See endomorphism_scalars.py. TEST(BN254Fq, SplitEndomorphismNegativeK2) { // clang-format off @@ -157,7 +157,7 @@ TEST(BN254Fq, SplitEndomorphismNegativeK2) fq result = k1 - k2 * lambda; result.self_from_montgomery_form(); - EXPECT_NE(result, k) << "Bug may be fixed! " << tc.tag; + EXPECT_EQ(result, k) << tc.tag; } } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp index 4f81411669c4..1ac3c3021df8 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.test.cpp @@ -1,10 +1,128 @@ +/** + * @brief BN254 scalar field (fr) specific tests. + * + * Other field arithmetic tests (both compile-time and runtime) are in ecc/fields/generic_field.test.cpp and + * ecc/fields/prime_field.test.cpp. This file contains only BN254 scalar field specific functionality: + * - Fixed compile-time tests with field-specific expected values + * - Endomorphism scalar decomposition + */ + #include "fr.hpp" +#include "barretenberg/numeric/random/engine.hpp" #include using namespace bb; -// Negative-k2 bug: k = ceil(m * 2^256 / endo_g2) produces negative k2 in the GLV splitting, -// and the 128-bit truncation extracts the wrong value. See endomorphism_scalars.py for derivation. +namespace { +auto& engine = numeric::get_debug_randomness(); +} // namespace + +// ================================ +// Fixed Compile-Time Tests (field-specific expected values) +// These tests use hardcoded expected values that are only valid for native builds (R = 2^256). +// WASM uses R = 2^261. +// ================================ + +#if defined(__SIZEOF_INT128__) && !defined(__wasm__) +TEST(BN254Fr, CompileTimeMultiplication) +{ + constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; + constexpr fr b{ 0xa17307a2108adeea, 0x74629976c14c5e2b, 0x9ce6f072ab1740ee, 0x398c753702b2bef0 }; + constexpr fr expected{ 0xe8cdd06343386834, 0x8cbb3f556258a9af, 0x5aef2f34f2d66fd4, 0x2d8263c7e10213ca }; + + constexpr fr result = a * b; + static_assert(result == expected); +} + +TEST(BN254Fr, CompileTimeSquaring) +{ + constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; + constexpr fr expected{ 0x3e928bdb06267b99, 0x1e5834571f52dfbf, 0x3d63bdf9bf7d0d4b, 0x353bb31adaa033c7 }; + + constexpr fr result = a.sqr(); + static_assert(result == expected); +} + +TEST(BN254Fr, CompileTimeAddition) +{ + constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; + constexpr fr b{ 0xa17307a2108adeea, 0x74629976c14c5e2b, 0x9ce6f072ab1740ee, 0x398c753702b2bef0 }; + constexpr fr expected{ 0x3a0576d15ce1394e, 0x9fc799d5ed38f908, 0x903290f055790153, 0x3b0d2c1bef9426b1 }; + + constexpr fr result = a + b; + static_assert(result == expected); +} + +TEST(BN254Fr, CompileTimeSubtraction) +{ + constexpr fr a{ 0xcfbcfcf457cf2d38, 0x7b27af26ce62aa61, 0xf0378e90d48f2b92, 0x4734b22cb21ded }; + constexpr fr b{ 0x569fdb1db5198770, 0x446ddccef8347d52, 0xef215227182d22a, 0x8281b4fb109306 }; + constexpr fr expected{ 0xe10cfe82b5a5ca, 0x8721a2e8c9a10e32, 0x51e604db660f0a22, 0x608d4fe2f404cb3b }; + + constexpr fr result = a - b; + static_assert(result == expected); +} +#endif + +TEST(BN254Fr, CompileTimeInversion) +{ + constexpr fr a{ 0x20565a572c565a66, 0x7bccd0f01f5f7bff, 0x63ec2beaad64711f, 0x624953caaf44a814 }; + constexpr fr inv = a.invert(); + // Verify a * a^-1 = 1 + static_assert(a * inv == fr::one()); +} + +// ================================ +// BN254 Scalar Field Specific +// ================================ + +TEST(BN254Fr, SplitIntoEndomorphismScalars) +{ + fr k = fr::random_element(); + fr k1 = { 0, 0, 0, 0 }; + fr k2 = { 0, 0, 0, 0 }; + + fr::split_into_endomorphism_scalars(k, k1, k2); + + fr result{ 0, 0, 0, 0 }; + + k1.self_to_montgomery_form(); + k2.self_to_montgomery_form(); + + fr lambda = fr::cube_root_of_unity(); + result = k2 * lambda; + result = k1 - result; + + result.self_from_montgomery_form(); + EXPECT_EQ(result, k); +} + +TEST(BN254Fr, SplitIntoEndomorphismScalarsSimple) +{ + fr input = { 1, 0, 0, 0 }; + fr k = { 0, 0, 0, 0 }; + fr k1 = { 0, 0, 0, 0 }; + fr k2 = { 0, 0, 0, 0 }; + fr::__copy(input, k); + + fr::split_into_endomorphism_scalars(k, k1, k2); + + fr result{ 0, 0, 0, 0 }; + k1.self_to_montgomery_form(); + k2.self_to_montgomery_form(); + + fr lambda = fr::cube_root_of_unity(); + result = k2 * lambda; + result = k1 - result; + + result.self_from_montgomery_form(); + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(result.data[i], k.data[i]); + } +} + +// Regression: k = ceil(m * 2^256 / endo_g2), for m an integer, previously produced negative k2 in the GLV +// splitting, causing 128-bit truncation to extract wrong values. TEST(BN254Fr, SplitEndomorphismNegativeK2) { // clang-format off @@ -30,6 +148,22 @@ TEST(BN254Fr, SplitEndomorphismNegativeK2) fr result = k1 - k2 * lambda; result.self_from_montgomery_form(); - EXPECT_NE(result, k) << "Bug may be fixed! " << tc.tag; + EXPECT_EQ(result, k) << tc.tag; } } + +// ================================ +// Regression / Optimization Tests +// ================================ + +// Tests that (lo + 2^256 * hi) mod r == ((lo|hi) % r) in uint512_t +// This validates the optimization of avoiding slow uint512_t modulo +TEST(BN254Fr, EquivalentRandomness) +{ + uint512_t random_uint512 = engine.get_random_uint512(); + auto random_lo = fr(random_uint512.lo); + auto random_hi = fr(random_uint512.hi); + uint512_t r(fr::modulus); + constexpr auto pow_2_256 = fr(uint256_t(1) << 128).sqr(); + EXPECT_EQ(random_lo + pow_2_256 * random_hi, fr((random_uint512 % r).lo)); +} diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp index d1b3ca1e60ab..ea3651b8a83c 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/bn254/g1.test.cpp @@ -12,7 +12,7 @@ typename Group::affine_element naive_scalar_mul(const typename Group::element& b { typename Group::element acc = Group::point_at_infinity; typename Group::element runner = base; - uint256_t bits(scalar.from_montgomery_form()); + uint256_t bits(scalar); for (size_t i = 0; i < 256; ++i) { if (bits.get_bit(i)) { acc = acc + runner; @@ -447,9 +447,10 @@ TEST(g1, CheckPrecomputedGenerators) ASSERT_TRUE((bb::check_precomputed_generators())); } -// Negative-k2 bug: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) -// each start a band ~2^123-2^126 wide where operator* gives wrong results vs naive double-and-add. -TEST(g1, ScalarMulNegativeK2EdgeCases) +// Regression: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) +// previously triggered the negative-k2 bug in split_into_endomorphism_scalars, producing wrong +// scalar multiplication results. We test boundaries and random samples within each band. +TEST(g1, ScalarMulNegativeK2Regression) { // clang-format off struct test_case { std::array limbs; const char* tag; }; @@ -468,11 +469,11 @@ TEST(g1, ScalarMulNegativeK2EdgeCases) g1::affine_element naive_result = naive_scalar_mul(g1::one, base_scalar); EXPECT_EQ(naive_result.on_curve(), true) << tc.tag; EXPECT_EQ(endo_result.on_curve(), true) << tc.tag; - EXPECT_NE(endo_result, naive_result) << "Bug may be fixed! " << tc.tag; + EXPECT_EQ(endo_result, naive_result) << tc.tag; - // Random samples within the band (narrowest is ~2^123; we use 122-bit offsets). + // Random samples within the formerly-buggy band (~2^123-2^126 wide; 122-bit offsets). for (size_t i = 0; i < 100; ++i) { - uint256_t rand_bits(fr::random_element().from_montgomery_form()); + uint256_t rand_bits(fr::random_element()); uint256_t offset_int = (rand_bits & ((uint256_t(1) << 122) - 1)) + 1; fr scalar = base_scalar + fr(offset_int); @@ -480,7 +481,7 @@ TEST(g1, ScalarMulNegativeK2EdgeCases) g1::affine_element naive_res = naive_scalar_mul(g1::one, scalar); EXPECT_EQ(naive_res.on_curve(), true) << tc.tag << " offset " << i; EXPECT_EQ(endo_res.on_curve(), true) << tc.tag << " offset " << i; - EXPECT_NE(endo_res, naive_res) << tc.tag << " offset " << i; + EXPECT_EQ(endo_res, naive_res) << tc.tag << " offset " << i; } } } diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp index 61bb30127c06..9cb34c57740a 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/grumpkin/grumpkin.test.cpp @@ -12,7 +12,7 @@ typename Group::affine_element naive_scalar_mul(const typename Group::element& b { typename Group::element acc = Group::point_at_infinity; typename Group::element runner = base; - uint256_t bits(scalar.from_montgomery_form()); + uint256_t bits(scalar); for (size_t i = 0; i < 256; ++i) { if (bits.get_bit(i)) { acc = acc + runner; @@ -355,9 +355,10 @@ TEST(grumpkin, CheckPrecomputedGenerators) ASSERT_TRUE((bb::check_precomputed_generators())); } -// Negative-k2 bug: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) -// each start a band ~2^123-2^126 wide where operator* gives wrong results vs naive double-and-add. -TEST(grumpkin, ScalarMulNegativeK2EdgeCases) +// Regression: boundary scalars k = ceil(m * 2^256 / endo_g2) (from endomorphism_scalars.py) +// previously triggered the negative-k2 bug in split_into_endomorphism_scalars, producing wrong +// scalar multiplication results. We test boundaries and random samples within each band. +TEST(grumpkin, ScalarMulNegativeK2Regression) { // clang-format off struct test_case { std::array limbs; const char* tag; }; @@ -377,11 +378,11 @@ TEST(grumpkin, ScalarMulNegativeK2EdgeCases) naive_scalar_mul(grumpkin::g1::one, base_scalar); EXPECT_EQ(naive_result.on_curve(), true) << tc.tag; EXPECT_EQ(endo_result.on_curve(), true) << tc.tag; - EXPECT_NE(endo_result, naive_result) << "Bug may be fixed! " << tc.tag; + EXPECT_EQ(endo_result, naive_result) << tc.tag; - // Random samples within the band (narrowest is ~2^123; we use 122-bit offsets). + // Random samples within the formerly-buggy band (~2^123-2^126 wide; 122-bit offsets). for (size_t i = 0; i < 100; ++i) { - uint256_t rand_bits(grumpkin::fr::random_element().from_montgomery_form()); + uint256_t rand_bits(grumpkin::fr::random_element()); uint256_t offset_int = (rand_bits & ((uint256_t(1) << 122) - 1)) + 1; grumpkin::fr scalar = base_scalar + grumpkin::fr(offset_int); @@ -390,7 +391,7 @@ TEST(grumpkin, ScalarMulNegativeK2EdgeCases) naive_scalar_mul(grumpkin::g1::one, scalar); EXPECT_EQ(naive_res.on_curve(), true) << tc.tag << " offset " << i; EXPECT_EQ(endo_res.on_curve(), true) << tc.tag << " offset " << i; - EXPECT_NE(endo_res, naive_res) << tc.tag << " offset " << i; + EXPECT_EQ(endo_res, naive_res) << tc.tag << " offset " << i; } } } diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py index 896826e70dc9..2831e3085eb0 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -79,6 +79,8 @@ a2 = 0x6f4d8248eeb859fd0be4e1541221250b # 127 bits b2 = 0x89d3256894d213e3 # 64 bits +# NOTE: a remarkable feature of this short basis is that a1 == b2, and indeed -b1 is rather close to a2. + # Verify that the vectors are in the lattice: ai + λ·bi ≡ 0 (mod r) assert (a1 + lambda_val * b1) % r == 0, "Lattice vector 1 must satisfy a1 + λ·b1 ≡ 0" assert (a2 + lambda_val * b2) % r == 0, "Lattice vector 2 must satisfy a2 + λ·b2 ≡ 0" @@ -161,111 +163,68 @@ def compute_splitting_constants(modulus, b1, b2): # # Note: neither a1 nor a2 appear, which is why we don't store them. # Note: b1 is negative, hence the second expression is again non-negative. +# Note: we can think of (k1, k2) as the "error term" to a lattice approximation of (k, 0). # -# BOUNDS: Let δ1, δ2 ∈ [0,1) be the rounding errors. A simple computation +# APPROXIMATE BOUNDS: Let δ1, δ2 ∈ [0,1) be the rounding errors. A simple computation # shows that k2 = c1·b1 + c2·b2 would be zero with exact division, so # |k2| ≤ |δ1·b1| + |δ2·b2| < |b1| + |b2| < 2^128. Similarly |k1| < 2^128. # -# WHAT THE IMPLEMENTATION COMPUTES: +# WHAT THE NAIVE IMPLEMENTATION COMPUTES: # # k2: The implementation stores -b1 (not b1), so it computes -# t1 := c2·b2 - c1·(-b1) = c1·b1 + c2·b2 = k2 (mod r) -# Since |k2| < 2^128 < r, the mod r is a no-op. +# c2·b2 - c1·(-b1) = c1·b1 + c2·b2 = k2 (mod r) # # k1: Using the lattice relation ai ≡ -λ·bi (mod r), a simple computation # shows c1·a1 + c2·a2 ≡ -λ·k2 (mod r), so k1 = k + λ·k2. Hence: -# t2 := t1·λ + k = k1 (mod r) -# Again |k1| < r, so the mod r is a no-op. +# k2·λ + k = k1 (mod r) # -# ALGORITHM (field_declarations.hpp, split_into_endomorphism_scalars): +# SUBTLETY — k2 CAN BE NEGATIVE: # -# 1. c1 ≈ (b2·k)/r via c1 = (endo_g2 · k) >> 256 -# 2. c2 ≈ ((-b1)·k)/r via c2 = (endo_g1 · k) >> 256 -# 3. q1 = c1 · (-b1) mod r -# 4. q2 = c2 · b2 mod r -# 5. t1 = (q2 - q1) mod r = k2 -# 6. t2 = (t1 · λ + k) mod r = k1 -# 7. Return low 128 bits of (t2, t1) as (k1, k2) +# The bound |k2| < 2^128 guarantees the MAGNITUDE fits in 128 bits, but k2 +# can be negative. When k2 < 0, the modular reduction gives t1 = k2 + r, +# which is ~254 bits. The 128-bit truncation then extracts garbage. # - -def split_scalar_buggy(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): - """ - BUGGY version of the scalar splitting algorithm (before fix). - - This is the original implementation that fails for ~2^{-64} of inputs. - Kept here to demonstrate the bug; use split_scalar() for the fixed version. - """ - input = k % modulus - - c1 = (endo_g2 * input) >> 256 - c2 = (endo_g1 * input) >> 256 - - q1_lo = (c1 * endo_minus_b1) % modulus - q2_lo = (c2 * endo_b2) % modulus - - t1 = (q2_lo - q1_lo) % modulus - t2 = (t1 * beta + input) % modulus - - k2 = t1 & ((1 << 128) - 1) - k1 = t2 & ((1 << 128) - 1) - - return k1, k2, t1, t2 - - -# ==================================================================================== -# § 5a. THE NEGATIVE k2 BUG AND FIX -# ==================================================================================== -# -# THE BUG: -# -# The §5 analysis claims "Since |k2| < 2^128 < r, the mod r is a no-op." This is true -# for the MAGNITUDE of k2, but k2 can be NEGATIVE. When k2 < 0: -# -# t1 = k2 mod r = k2 + r (254 bits!) -# -# The 128-bit truncation {t1.data[0], t1.data[1]} then extracts garbage, and both -# k1 and k2 are wrong. -# -# WHEN DOES k2 GO NEGATIVE? -# -# Recall k2 = c1·b1 + c2·b2 = -c1·|b1| + c2·b2, where c1 and c2 are floors of -# rational values. Writing c1 = c1_exact - δ1, c2 = c2_exact - δ2 with δ ∈ [0,1): +# Recall k2 = -c1·|b1| + c2·b2, where c1 and c2 are floors of rational +# values. Writing c1 = c1_exact - δ1, c2 = c2_exact - δ2 with δ ∈ [0,1): # # k2 = -δ1·|b1| + δ2·b2 # -# This is negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63, this requires +# This is negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63, this needs # δ1 > δ2·2^{-63} — i.e., δ1 can be tiny but must be nonzero. # -# In practice, this happens at boundaries where c1 "ticks up" to a new integer m. -# The boundary is at k ≈ m·r/b2, where c1 jumps from m-1 to m. Just above the -# boundary, c1 = m while c2·b2 hasn't grown enough to compensate, so -# k2 = c2·b2 - m·|b1| < 0 (with magnitude up to ~b2 ≈ 2^64). +# This happens at boundaries where c1 "ticks up" to a new integer m: at +# k ≈ ceil(m · 2^256 / endo_g2), c1 jumps to m while c2·b2 hasn't grown +# enough to compensate, so k2 = c2·b2 - m·|b1| < 0. # -# HOW MANY INPUTS ARE AFFECTED? +# Frequency: for each of the ~b2 ≈ 2^64 values of c1, there is a contiguous +# range of ~2^{126} affected k values. Total: ~2^{190} / 2^{254} ≈ 2^{-64} +# fraction. Far too rare for random testing, but easily constructed. # -# For each of the ~b2 ≈ 2^64 values of c1, there is a contiguous range of ~2^{126} -# values of k where k2 < 0. Total: ~2^{190} out of r ≈ 2^{254}, a fraction of ~2^{-64}. -# This is far too rare for random testing to catch (~2^{64} samples needed), but the -# affected inputs are deterministic and easily constructed. +# FIX: When t1 has bits above position 128 (in C++: t1.data[2] or t1.data[3] +# nonzero), we add |b1| to t1. This is equivalent to decrementing c1 by 1, +# shifting the decomposition by the lattice vector (a1, b1). In other words +# we change our "close lattice vector": # -# THE FIX: +# k2_new = k2 + |b1| (now positive, ~127 bits) +# k1_new = k1 - a1 (shifted down by ~64 bits) # -# When k2 is negative, t1 = k2 + r has nonzero upper limbs (bits 128..253). We detect -# this and add |b1| to k2, which is equivalent to decrementing c1 by 1. This shifts -# the decomposition by the lattice vector (a1, b1): +# ALGORITHM (field_declarations.hpp, split_into_endomorphism_scalars): # -# k2_new = k2 + |b1| (now positive, ~127 bits) -# k1_new = k1 - a1 (shifted by ~64 bits, still fits in 128 bits) +# 1. c1 ≈ (b2·k)/r via c1 = (endo_g2 · k) >> 256 +# 2. c2 ≈ ((-b1)·k)/r via c2 = (endo_g1 · k) >> 256 +# 3. q1 = c1 · (-b1) (low 256 bits of 512-bit product) +# 4. q2 = c2 · b2 (low 256 bits of 512-bit product) +# 5. t1 = (q2 - q1).reduce_once() = k2 (mod r) +# 6. if t1 > 128 bits: t1 = (t1 + endo_minus_b1).reduce_once() [negative k2 fix] +# 7. t2 = (t1 · λ + k).reduce_once() = k1 (mod r) +# 8. Return low 128 bits of (t2, t1) as (k1, k2) # -# Algebraic correctness is preserved because (a1, b1) ∈ L, so the shift satisfies -# a1 + λ·b1 ≡ 0 (mod r), meaning k1_new - λ·k2_new ≡ k1 - λ·k2 ≡ k (mod r). def split_scalar(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): """ Split scalar k into (k1, k2) such that k ≡ k1 - λ·k2 (mod r). - Implements split_into_endomorphism_scalars() in field_declarations.hpp - (with the negative-k2 fix applied). + Implements split_into_endomorphism_scalars() in field_declarations.hpp. Returns: (k1, k2, t1, t2): The 128-bit split scalars and their full-width forms @@ -284,10 +243,10 @@ def split_scalar(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): t1 = (q2_lo - q1_lo) % modulus - # FIX: k2 (= t1) can be slightly negative for ~2^{-64} of inputs. + # Negative-k2 fix: k2 (= t1) can be slightly negative for ~2^{-64} of inputs. # When negative, t1 = k2 + r is 254 bits (upper limbs nonzero in C++). - # Detect this and add |b1| to shift along the lattice vector (a1, b1), - # making k2 positive while keeping both scalars within 128 bits. + # Adding |b1| shifts along the lattice vector (a1, b1), making k2 positive. + # In C++: if (t1.data[2] != 0 || t1.data[3] != 0) if t1.bit_length() > 128: t1 = (t1 + endo_minus_b1) % modulus @@ -318,19 +277,29 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): verify_split(k_test, k1, k2, t1, t2, lambda_val, r) -# § 6a. Verify the fix on concrete negative-k2 trigger inputs. +# § 6a. Verify the negative-k2 fix on concrete trigger inputs. # # These are k = ceil(m * 2^256 / endo_g2) for m = 1, 2, 3 — the smallest k values -# where c1 ticks up to m. The buggy version fails; the fixed version must pass. +# where c1 ticks up to m. Without the fix, t1 would be > 128 bits (negative k2 +# wraps around mod r to ~254 bits). The fix brings t1 back within 128 bits. for m in [1, 2, 3]: k_trigger = (m * (1 << 256) + endo_g2 - 1) // endo_g2 assert k_trigger < r, f"trigger input must be < r for m={m}" - # Buggy version fails (t1 > 128 bits due to negative k2) - _, _, t1_buggy, _ = split_scalar_buggy(k_trigger, r, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2) - assert t1_buggy.bit_length() > 128, f"Expected buggy t1 > 128 bits for m={m}, got {t1_buggy.bit_length()}" - - # Fixed version passes + # Show that the raw (pre-fix) t1 would be > 128 bits for these inputs: + # compute t1_raw without the fix to demonstrate the problem + inp = k_trigger % r + c1_raw = (endo_g2 * inp) >> 256 + c2_raw = (endo_g1 * inp) >> 256 + q1_raw = (c1_raw * endo_minus_b1) % r + q2_raw = (c2_raw * endo_b2) % r + t1_raw = (q2_raw - q1_raw) % r + assert t1_raw.bit_length() > 128, ( + f"Expected raw t1 > 128 bits for m={m}, got {t1_raw.bit_length()} — " + f"this input should trigger the negative-k2 case" + ) + + # The actual algorithm (with fix) must produce valid 128-bit scalars k1, k2, t1, t2 = split_scalar(k_trigger, r, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2) verify_split(k_trigger, k1, k2, t1, t2, lambda_val, r) @@ -346,6 +315,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # 3. The decomposition uses Babai's nearest plane algorithm on a short lattice basis # 4. Precomputed constants enable division-free computation via fixed-point arithmetic # 5. The algorithm guarantees both k1 and k2 fit in 128 bits (actually ≤ 127 bits) +# 6. For ~2^{-64} of inputs, k2 is slightly negative; the fix detects this (upper +# limbs nonzero) and shifts along a lattice vector to make k2 positive # # Performance: Instead of one 254-bit scalar multiplication k·P, we compute # k1·P - k2·φ(P) with two ~127-bit scalars processed via interleaved WNAF, diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index e396578a9053..66872b24ddd7 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -496,6 +496,16 @@ template struct alignas(32) field { field q2_lo{ q2.data[0], q2.data[1], q2.data[2], q2.data[3] }; field t1 = (q2_lo - q1_lo).reduce_once(); + + // k2 (= t1) can be slightly negative for ~2^{-64} of inputs. + // When negative, t1 = k2 + r is 254 bits (upper limbs nonzero). + // Fix: decrement c1 by 1, equivalent to adding |b1| to k2. + // This shifts k2 by +|b1| (~127 bits, now positive) and k1 by -a1 (~64 bits), + // keeping both within 128 bits. See endomorphism_scalars.py for more details. + if (t1.data[2] != 0 || t1.data[3] != 0) { + t1 = (t1 + endo_minus_b1).reduce_once(); + } + field beta = cube_root_of_unity(); field t2 = (t1 * beta + input).reduce_once(); return { From 188e09295f13813a26fba8afd75a7f49f8f4973a Mon Sep 17 00:00:00 2001 From: notnotraju Date: Fri, 20 Feb 2026 10:25:55 +0000 Subject: [PATCH 3/8] added biggroup_goblin regression test --- .../biggroup/biggroup_goblin.test.cpp | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_goblin.test.cpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_goblin.test.cpp index 30e86b9cbe6a..0814ef69b5a6 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_goblin.test.cpp +++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_goblin.test.cpp @@ -154,6 +154,60 @@ template class stdlib_biggroup_goblin : public testing::Test { EXPECT_CIRCUIT_CORRECTNESS(builder); } + /** + * @brief Regression test: negative-k2 edge-case scalar through the stdlib biggroup path. + * @details The naive GLV endomorphism splitting can produce a negative k2 for ~2^{-64} of inputs. + * Before the fix in split_into_endomorphism_scalars, this caused the op queue to store garbage + * z1/z2 values (254-bit instead of 128-bit). The stdlib biggroup `batch_mul` adds the constraint + * `scalar.assert_equal(z_1 - z_2 * beta)`, which catches the mismatch at the Mega circuit level. + * + * See ecc/fields/endomorphism_scalars.py for an analysis. + */ + static void test_endomorphism_negative_k2_regression() + { + // clang-format off + // Boundary scalars k = ceil(m * 2^256 / endo_g2) from endomorphism_scalars.py. + // These are the smallest scalars where c1 ticks up, making k2 negative. + const std::array, 3> boundary_cases = {{ + {{ 0x01624731e1195570, 0x3ba491482db4da14, 0x59e26bcea0d48bac, 0x0 }}, // m=1 + {{ 0x02c48e63c232aadf, 0x774922905b69b428, 0xb3c4d79d41a91758, 0x0 }}, // m=2 + {{ 0x0426d595a34c004e, 0xb2edb3d8891e8e3c, 0x0da7436be27da304, 0x1 }}, // m=3 + }}; + // clang-format on + + for (const auto& limbs : boundary_cases) { + fr base_scalar(uint256_t{ limbs[0], limbs[1], limbs[2], limbs[3] }); + + // The negative-k2 band extends ~2^{123}-2^{126} above each boundary scalar. + // A random 122-bit positive perturbation lands inside this band, where the + // original (unfixed) k2 is still negative. We therefore test two scalars: the original boundary case and a + // 122-bit perturbation. + uint256_t rand_bits(fr::random_element()); + uint256_t offset = rand_bits & ((uint256_t(1) << 122) - 1); + std::array scalars = { base_scalar, base_scalar + fr(offset) }; + + // Test via batch_mul + for (const auto& scalar : scalars) { + Builder builder; + element_ct pt = element_ct::from_witness(&builder, affine_element::one()); + scalar_ct sc = scalar_ct::from_witness(&builder, scalar); + element_ct result = element_ct::batch_mul({ pt }, { sc }); + (void)result; + EXPECT_CIRCUIT_CORRECTNESS(builder); + } + + // Test via operator* (delegates to batch_mul) + for (const auto& scalar : scalars) { + Builder builder; + element_ct pt = element_ct::from_witness(&builder, affine_element::one()); + scalar_ct sc = scalar_ct::from_witness(&builder, scalar); + element_ct result = pt * sc; + (void)result; + EXPECT_CIRCUIT_CORRECTNESS(builder); + } + } + } + /** * @brief Check goblin-style negate works as intended, including with points at infinity */ @@ -199,3 +253,8 @@ TYPED_TEST(stdlib_biggroup_goblin, neg) { TestFixture::test_goblin_style_neg(); } + +TYPED_TEST(stdlib_biggroup_goblin, endomorphism_negative_k2_regression) +{ + TestFixture::test_endomorphism_negative_k2_regression(); +} From 2042e8432349c333615a3520960cff1bae7aaf9b Mon Sep 17 00:00:00 2001 From: notnotraju Date: Mon, 23 Feb 2026 13:05:25 +0000 Subject: [PATCH 4/8] refactored the literate python program, added information about fq and secpk1 (to at least generate parameters) --- .../ecc/fields/endomorphism_scalars.py | 647 ++++++++++++++---- 1 file changed, 510 insertions(+), 137 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py index 2831e3085eb0..5f0152d2f5e3 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -1,38 +1,137 @@ #!/usr/bin/env python3 """ -BN254 GLV Endomorphism: Constants and Scalar Splitting Explained +GLV Endomorphism: Constants and Scalar Splitting for Multiple Curves -This document explains the "splitting scalars" algorithm in Barretenberg. The first section -works explicitly with Fr, the scalar field of the BN254 elliptic curve. +This document explains the "splitting scalars" algorithm in Barretenberg for all curves +that admit an efficient endomorphism. We cover: + + Part 0 (§0): Preliminaries — the GLV lattice and how to find a short basis + Part I (§1–§5): BN254 Fr — the scalar field of BN254 (254-bit, uses 2^256 shift) + Part II (§6–§9): BN254 Fq — the base field of BN254 (254-bit, uses 2^256 shift) + Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, uses 2^384 shift) Reference: Gallant, Lambert, Vanstone, "Faster Point Multiplication on Elliptic Curves" (2001) NOTATION: - ≈ "approximately equal" - values differ by a small rounding error + p a prime modulus (generic; each Part instantiates it) + λ a non-trivial cube root of unity in F_p (λ³ = 1, λ ≠ 1) + ≈ "approximately equal" — values differ by a small rounding error """ + +# ╔══════════════════════════════════════════════════════════════════════════════╗ +# ║ PART 0: Preliminaries — The GLV Lattice ║ +# ╚══════════════════════════════════════════════════════════════════════════════╝ + # ==================================================================================== -# § 1. INTRODUCTION +# § 0. THE GLV LATTICE AND SHORT BASIS # ==================================================================================== # -# The BN254 elliptic curve admits an efficiently computable endomorphism φ that -# satisfies φ(P) = λ·P where λ is a cube root of unity in the scalar field Fr. +# Let p be a prime and λ ∈ F_p a non-trivial cube root of unity (so λ² + λ + 1 ≡ 0). +# The GLV lattice is: +# +# L = { (a, b) ∈ Z² : a + λ·b ≡ 0 (mod p) } +# +# Equivalently, +# +# L is a full-rank sublattice of Z² with determinant ±p (since the map +# Z² → Z/pZ, (a,b) ↦ a + λb is surjective, its kernel has index p). # -# This enables an optimization: any scalar multiplication k·P can be -# decomposed as: +# SCALAR SPLITTING. Given a scalar k, find a lattice point (x, y) close to (k, 0). +# Then k1 = k − x and k2 = y satisfy k ≡ k1 − λ·k2 (mod p), with both k1, k2 small. +# Writing (x, y) = c1·(a1,b1) + c2·(a2,b2) and inverting the 2×2 basis matrix against +# (k, 0) with det = p gives: # -# k·P = k1·P - k2·φ(P) +# c1 = ⌊k·b2 / p⌋, c2 = ⌊k·(−b1) / p⌋ # -# where k1, k2 are approximately 127 bits each (half the size of the original -# 254-bit scalar k). Since φ(P) is nearly free to compute (one Fq multiplication) -# and the scalars are half-sized, this significantly accelerates scalar multiplication. +# Note: only b1 and b2 appear; a1, a2 are not needed for the splitting, which is why +# Barretenberg stores only b1 and b2 in the .hpp parameter files. Any lattice element +# satisfies a ≡ -λ·b (mod p). This relation is verified for every basis we compute, +# and allows us to recover k1 from k2 without needing a1 or a2: since +# c1·a1 + c2·a2 ≡ -λ·(c1·b1 + c2·b2) ≡ -λ·k2, we get k1 = k + λ·k2. # -# This document explains: -# 1. How the mathematical constants are derived -# 2. How the algorithm guarantees k ≡ k1 - λ·k2 (mod r), where k1 and k2 are short (~127-bit) values. +# FINDING A SHORT BASIS. We run the Euclidean algorithm on (λ, p). The successive +# remainders r_i satisfy |r_i| ≈ p / (product of quotients), so they shrink from p +# down through √p and below. We stop at the first remainder r_j < √p and read off +# two short lattice vectors from the Bézout coefficients at steps j−1 and j. +# +# The resulting vector sizes depend on the specific λ and p: +# +# • BN254 (Fr and Fq): the curve is constructed from a 63-bit parameter x, and the +# lattice vectors are a1 = b2 = 2x+1 (64 bits), |b1| = 6x²+2x (127 bits). +# This asymmetric 64/127-bit pattern is a consequence of the BN parametrisation. +# +# • secp256k1 Fr: no small generating parameter; the lattice vectors are all in the +# generic ~126–129-bit range (roughly √p for a 256-bit prime). +# + +from math import isqrt + +def find_short_lattice_basis(lambda_val, modulus): + """ + Find a short basis for the lattice L = {(a,b) : a + λ·b ≡ 0 (mod p)}. + + Returns (a1, b1, a2, b2) such that: + - (a1, b1) and (a2, b2) are a basis for L + - a1 + λ·b1 ≡ 0 (mod p), a2 + λ·b2 ≡ 0 (mod p) + - det = a1·b2 - a2·b1 = ±p + """ + # √p is the target vector length. The lattice L has determinant p, so by + # Minkowski's theorem its shortest vector has length ≤ √p. We want both + # basis vectors near that length so that the GLV subscalars k1, k2 are + # each ~ √p ≈ 2^{n/2} + approx_sqrt = isqrt(modulus) + + # Extended Euclidean algorithm on (λ, p). + # + # Bézout invariant (maintained at every step): + # remainder ≡ coeff · λ (mod p) + # + # This holds initially (remainder = λ, coeff = 1) and is preserved by + # the update new_remainder = prev_remainder - quot·remainder, new_coeff = + # prev_coeff - quot·coeff. It follows that (-remainder, coeff) is always + # a lattice vector: -remainder + λ·coeff ≡ -coeff·λ + coeff·λ ≡ 0. + remainder, prev_remainder = lambda_val, modulus + coeff, prev_coeff = 1, 0 + + # Run until the remainder first drops below √p. + while remainder >= approx_sqrt: + quot = prev_remainder // remainder + prev_remainder, remainder = remainder, prev_remainder - quot * remainder + prev_coeff, coeff = coeff, prev_coeff - quot * coeff + + # At this point: + # vec_before = (-prev_remainder, prev_coeff) — last step above √p + # vec_cross = (-remainder, coeff) — first step below √p + vec_before = (-prev_remainder, prev_coeff) + vec_cross = (-remainder, coeff) + + # One more EEA step gives an independent candidate vector. + quot = prev_remainder // remainder + r_after = prev_remainder - quot * remainder + s_after = prev_coeff - quot * coeff + vec_after = (-r_after, s_after) + + # First basis vector: vec_cross (shortest, by construction). + # Second basis vector: shorter of vec_before and vec_after. + a1, b1 = vec_cross + a2, b2 = vec_after if (r_after**2 + s_after**2 < prev_remainder**2 + prev_coeff**2) else vec_before + + # Normalise signs so that a1, a2 are positive. + if a1 < 0 or a1.bit_length() >= 128: + a1, b1 = -a1, -b1 + if a2 < 0 or a2.bit_length() >= 128: + a2, b2 = -a2, -b2 + + return a1, b1, a2, b2 + + +# ╔══════════════════════════════════════════════════════════════════════════════╗ +# ║ PART I: BN254 Fr (Scalar Field) ║ +# ╚══════════════════════════════════════════════════════════════════════════════╝ # ==================================================================================== -# § 2. FIELD PARAMETERS (for Fr) +# § 1. BN254 Fr — FIELD PARAMETERS # ==================================================================================== # The scalar field modulus of BN254 (from bn254/fr.hpp) @@ -61,18 +160,10 @@ # ==================================================================================== -# § 3. THE LATTICE BASIS +# § 2. BN254 Fr — LATTICE BASIS # ==================================================================================== # -# To decompose k into short scalars k1, k2, we use a "nearest lattice vector" style of algorithm. -# First, consider the 2D lattice: -# -# L = {(a, b) ∈ Z² : a + λ·b ≡ 0 (mod r)} = ker(Z² -> Z/rZ), (a, b) ↦ a + λ·b -# -# NOTE: any lattice point (a, b) ∈ L satisfies a ≡ -λ·b (mod r). -# -# The phrase "short basis for a lattice" means two vectors with small components. Here is -# our short basis. +# Short basis for the GLV lattice L (see §0). a1 = 0x89d3256894d213e3 # 64 bits b1 = -0x6f4d8248eeb859fc8211bbeb7d4f1128 # 127 bits (negative) @@ -89,31 +180,19 @@ det = (a1 * b2 - a2 * b1) assert abs(det) == r, "Lattice determinant ±r; hence for our vectors to be a lattice basis, they must have the same determinant (up to sign)" -# These constants will later be shown to match those in barretenberg. It is worth noting -# that in fr.hpp, we DO NOT store a1 or a2; it turns out we ONLY need b1 and b2. +# Note: fr.hpp does NOT store a1 or a2 — only b1 and b2 are needed (see §0). # ==================================================================================== -# § 4. PRECOMPUTED CONSTANTS FOR DIVISION-FREE COMPUTATION +# § 3. BN254 Fr — PRECOMPUTED CONSTANTS (256-bit shift) # ==================================================================================== # -# The scalar splitting algorithm (Babai's nearest plane) requires computing: -# -# c1 ≈ (k·b2) / r -# c2 ≈ (k·(-b1)) / r -# -# Division by r is expensive. Instead, we _precompute_ fixed-point representations: -# -# endo_g1 = ((-b1) · 2^256) // r -# endo_g2 = (b2 · 2^256) // r +# Fixed-point approximations for division-free Babai rounding (see §0): # -# Note that the above expressions, endo_g1 and endo_g2 DO NOT depend on k. +# endo_g1 = ⌊(-b1) · 2^256 / r⌋ +# endo_g2 = ⌊b2 · 2^256 / r⌋ # -# Then: (endo_g2 · k) >> 256 ≈ (b2 · k) / r = c1 -# (endo_g1 · k) >> 256 ≈ ((-b1) · k) / r = c2 -# -# The ≈ hides two rounding errors: the floor in computing endo_g2 (or endo_g1), -# and the floor in the >> 256 shift. However, the total error is at most 1. -# So c1, c2 are each off by at most 1 from the exact rational values. +# Then c1 = (endo_g2 · k) >> 256 ≈ k·b2/r and c2 = (endo_g1 · k) >> 256 ≈ k·(-b1)/r, +# each off by at most 1 from the exact rational value. def compute_splitting_constants(modulus, b1, b2): @@ -145,79 +224,30 @@ def compute_splitting_constants(modulus, b1, b2): # ==================================================================================== -# § 5. THE SCALAR SPLITTING ALGORITHM +# § 4. BN254 Fr — THE 256-BIT SPLITTING ALGORITHM # ==================================================================================== # -# Given a scalar k, we compute (k1, k2) such that: -# k ≡ k1 - λ·k2 (mod r), |k1|, |k2| < 2^128 -# -# DERIVATION (Babai's nearest plane on L): -# -# Find a lattice point (x, y) ∈ L close to (k, 0). Setting k1 = k - x, k2 = y, -# the lattice condition x + λ·y ≡ 0 (mod r) gives k1 - λ·k2 ≡ k (mod r). -# -# Writing (x, y) = c1·(a1, b1) + c2·(a2, b2), inverting the basis matrix -# against the target (k, 0) with det = r gives: -# -# c1 = ⌊k·b2 / r⌋, c2 = ⌊k·(-b1) / r⌋ -# -# Note: neither a1 nor a2 appear, which is why we don't store them. -# Note: b1 is negative, hence the second expression is again non-negative. -# Note: we can think of (k1, k2) as the "error term" to a lattice approximation of (k, 0). -# -# APPROXIMATE BOUNDS: Let δ1, δ2 ∈ [0,1) be the rounding errors. A simple computation -# shows that k2 = c1·b1 + c2·b2 would be zero with exact division, so -# |k2| ≤ |δ1·b1| + |δ2·b2| < |b1| + |b2| < 2^128. Similarly |k1| < 2^128. -# -# WHAT THE NAIVE IMPLEMENTATION COMPUTES: -# -# k2: The implementation stores -b1 (not b1), so it computes -# c2·b2 - c1·(-b1) = c1·b1 + c2·b2 = k2 (mod r) -# -# k1: Using the lattice relation ai ≡ -λ·bi (mod r), a simple computation -# shows c1·a1 + c2·a2 ≡ -λ·k2 (mod r), so k1 = k + λ·k2. Hence: -# k2·λ + k = k1 (mod r) +# Computes (k1, k2) with k ≡ k1 - λ·k2 (mod r) and |k1|, |k2| < 2^128. +# See §0 for the derivation (Babai's nearest plane). # # SUBTLETY — k2 CAN BE NEGATIVE: # -# The bound |k2| < 2^128 guarantees the MAGNITUDE fits in 128 bits, but k2 -# can be negative. When k2 < 0, the modular reduction gives t1 = k2 + r, -# which is ~254 bits. The 128-bit truncation then extracts garbage. +# k2 = -δ1·|b1| + δ2·b2 where δ1, δ2 ∈ [0,1) are rounding errors. This is +# negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63 for BN254, even tiny +# δ1 can cause this. It happens at k ≈ ⌈m·2^256/endo_g2⌉ where c1 ticks up +# to m. Frequency: ~2^{-64} of all inputs. # -# Recall k2 = -c1·|b1| + c2·b2, where c1 and c2 are floors of rational -# values. Writing c1 = c1_exact - δ1, c2 = c2_exact - δ2 with δ ∈ [0,1): +# FIX: When t1 > 128 bits (i.e. k2 < 0 wrapped mod r), add |b1|. This shifts +# along the lattice vector (a1, b1), making k2 positive: +# k2_new = k2 + |b1| (positive, ~127 bits), k1_new = k1 - a1 # -# k2 = -δ1·|b1| + δ2·b2 +# ALGORITHM (split_into_endomorphism_scalars in field_declarations.hpp): # -# This is negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63, this needs -# δ1 > δ2·2^{-63} — i.e., δ1 can be tiny but must be nonzero. -# -# This happens at boundaries where c1 "ticks up" to a new integer m: at -# k ≈ ceil(m · 2^256 / endo_g2), c1 jumps to m while c2·b2 hasn't grown -# enough to compensate, so k2 = c2·b2 - m·|b1| < 0. -# -# Frequency: for each of the ~b2 ≈ 2^64 values of c1, there is a contiguous -# range of ~2^{126} affected k values. Total: ~2^{190} / 2^{254} ≈ 2^{-64} -# fraction. Far too rare for random testing, but easily constructed. -# -# FIX: When t1 has bits above position 128 (in C++: t1.data[2] or t1.data[3] -# nonzero), we add |b1| to t1. This is equivalent to decrementing c1 by 1, -# shifting the decomposition by the lattice vector (a1, b1). In other words -# we change our "close lattice vector": -# -# k2_new = k2 + |b1| (now positive, ~127 bits) -# k1_new = k1 - a1 (shifted down by ~64 bits) -# -# ALGORITHM (field_declarations.hpp, split_into_endomorphism_scalars): -# -# 1. c1 ≈ (b2·k)/r via c1 = (endo_g2 · k) >> 256 -# 2. c2 ≈ ((-b1)·k)/r via c2 = (endo_g1 · k) >> 256 -# 3. q1 = c1 · (-b1) (low 256 bits of 512-bit product) -# 4. q2 = c2 · b2 (low 256 bits of 512-bit product) -# 5. t1 = (q2 - q1).reduce_once() = k2 (mod r) -# 6. if t1 > 128 bits: t1 = (t1 + endo_minus_b1).reduce_once() [negative k2 fix] -# 7. t2 = (t1 · λ + k).reduce_once() = k1 (mod r) -# 8. Return low 128 bits of (t2, t1) as (k1, k2) +# 1. c1 = (endo_g2 · k) >> 256, c2 = (endo_g1 · k) >> 256 +# 2. t1 = (c2·b2 - c1·(-b1)) mod r [= k2] +# 3. if t1 > 128 bits: t1 += endo_minus_b1 [negative-k2 fix] +# 4. t2 = (t1·λ + k) mod r [= k1] +# 5. Return low 128 bits of (t2, t1) # def split_scalar(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): @@ -260,10 +290,8 @@ def split_scalar(k, modulus, beta, endo_g1, endo_g2, endo_minus_b1, endo_b2): # ==================================================================================== -# § 6. VERIFICATION +# § 5. BN254 Fr — VERIFICATION # ==================================================================================== -# -# We verify the algorithm on several test cases, including edge cases. def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): """Verify correctness and bounds of the scalar split.""" @@ -277,7 +305,7 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): verify_split(k_test, k1, k2, t1, t2, lambda_val, r) -# § 6a. Verify the negative-k2 fix on concrete trigger inputs. +# § 5a. Verify the negative-k2 fix on concrete trigger inputs. # # These are k = ceil(m * 2^256 / endo_g2) for m = 1, 2, 3 — the smallest k values # where c1 ticks up to m. Without the fix, t1 would be > 128 bits (negative k2 @@ -304,34 +332,379 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): verify_split(k_trigger, k1, k2, t1, t2, lambda_val, r) +# ╔══════════════════════════════════════════════════════════════════════════════╗ +# ║ PART II: BN254 Fq (Base Field) ║ +# ╚══════════════════════════════════════════════════════════════════════════════╝ + +# ==================================================================================== +# § 6. BN254 Fq — FIELD PARAMETERS AND CUBE ROOT +# ==================================================================================== +# +# Fq also has a cube root of unity, so the same GLV technique applies. +# Since Fq is also 254 bits (top limb < 2^62), the 256-bit shift algorithm is used. + +# The base field modulus of BN254 (from bn254/fq.hpp) +fq_modulus = 0x30644E72E131A029B85045B68181585D97816A916871CA8D3C208C16D87CFD47 + +# Montgomery parameter for Fq: R = 2^256 mod q +fq_R = pow(2, 256, fq_modulus) +fq_R_inv = pow(fq_R, -1, fq_modulus) + +# The cube root of unity β ∈ Fq (from bn254/fq.hpp), stored in Montgomery form +fq_cube_root_montgomery = ( + 0x71930c11d782e155 | + (0xa6bb947cffbe3323 << 64) | + (0xaa303344d4741444 << 128) | + (0x2c3b3f0d26594943 << 192) +) + +# Convert from Montgomery form to standard form +fq_beta = (fq_cube_root_montgomery * fq_R_inv) % fq_modulus + +# Verify that β is a non-trivial cube root of unity in Fq +assert (pow(fq_beta, 2, fq_modulus) + fq_beta + 1) % fq_modulus == 0, "β² + β + 1 ≡ 0 (mod q)" +assert fq_beta != 1, "β must be non-trivial" + + # ==================================================================================== -# § 7. SUMMARY +# § 7. BN254 Fq — LATTICE BASIS # ==================================================================================== # -# The GLV endomorphism optimization for BN254: +# Derive the short lattice basis for Fq using find_short_lattice_basis (§0). + +fq_a1, fq_b1, fq_a2, fq_b2 = find_short_lattice_basis(fq_beta, fq_modulus) + +# Verify lattice membership +assert (fq_a1 + fq_beta * fq_b1) % fq_modulus == 0, "Fq lattice vector 1" +assert (fq_a2 + fq_beta * fq_b2) % fq_modulus == 0, "Fq lattice vector 2" + +# Verify determinant +fq_det = fq_a1 * fq_b2 - fq_a2 * fq_b1 +assert abs(fq_det) == fq_modulus, f"Fq lattice determinant must be ±q, got {fq_det}" + + +# ==================================================================================== +# § 8. BN254 Fq — PRECOMPUTED CONSTANTS AND VERIFICATION +# ==================================================================================== + +fq_endo_g1, fq_endo_g2, fq_endo_minus_b1, fq_endo_b2 = compute_splitting_constants( + fq_modulus, fq_b1, fq_b2 +) + +# Verify these match the values in bn254/fq.hpp +fq_expected_endo_g1 = 0x7a7bd9d4391eb18d | (0x4ccef014a773d2cf << 64) | (0x2 << 128) +fq_expected_endo_g2 = 0xd91d232ec7e0b3d2 | (0x2 << 64) +fq_expected_endo_minus_b1 = 0x8211bbeb7d4f1129 | (0x6f4d8248eeb859fc << 64) +fq_expected_endo_b2 = 0x89d3256894d213e2 + +assert fq_endo_g1 == fq_expected_endo_g1, ( + f"Fq endo_g1 mismatch: got {hex(fq_endo_g1)}, expected {hex(fq_expected_endo_g1)}" +) +assert fq_endo_g2 == fq_expected_endo_g2, ( + f"Fq endo_g2 mismatch: got {hex(fq_endo_g2)}, expected {hex(fq_expected_endo_g2)}" +) +assert fq_endo_minus_b1 == fq_expected_endo_minus_b1, ( + f"Fq endo_minus_b1 mismatch: got {hex(fq_endo_minus_b1)}, expected {hex(fq_expected_endo_minus_b1)}" +) +assert fq_endo_b2 == fq_expected_endo_b2, ( + f"Fq endo_b2 mismatch: got {hex(fq_endo_b2)}, expected {hex(fq_expected_endo_b2)}" +) + + +# ==================================================================================== +# § 9. BN254 Fq — SPLITTING VERIFICATION +# ==================================================================================== + +for k_test in [0, 1, 42, fq_beta, fq_modulus - 1]: + k1, k2, t1, t2 = split_scalar( + k_test, fq_modulus, fq_beta, fq_endo_g1, fq_endo_g2, fq_endo_minus_b1, fq_endo_b2 + ) + verify_split(k_test, k1, k2, t1, t2, fq_beta, fq_modulus) + +# Verify negative-k2 triggers for Fq +for m in [1, 2, 3]: + k_trigger = (m * (1 << 256) + fq_endo_g2 - 1) // fq_endo_g2 + if k_trigger < fq_modulus: + k1, k2, t1, t2 = split_scalar( + k_trigger, fq_modulus, fq_beta, fq_endo_g1, fq_endo_g2, fq_endo_minus_b1, fq_endo_b2 + ) + verify_split(k_trigger, k1, k2, t1, t2, fq_beta, fq_modulus) + + +# ╔══════════════════════════════════════════════════════════════════════════════╗ +# ║ PART III: secp256k1 Fr (Scalar Field) ║ +# ╚══════════════════════════════════════════════════════════════════════════════╝ + +# ==================================================================================== +# § 10. secp256k1 Fr — FIELD PARAMETERS +# ==================================================================================== # -# 1. The BN254 curve has an endomorphism φ(x,y) = (β·x, y) satisfying φ(P) = λ·P -# 2. We can decompose any scalar k as k ≡ k1 - λ·k2 (mod r) with k1, k2 ≈ 127 bits -# 3. The decomposition uses Babai's nearest plane algorithm on a short lattice basis -# 4. Precomputed constants enable division-free computation via fixed-point arithmetic -# 5. The algorithm guarantees both k1 and k2 fit in 128 bits (actually ≤ 127 bits) -# 6. For ~2^{-64} of inputs, k2 is slightly negative; the fix detects this (upper -# limbs nonzero) and shifts along a lattice vector to make k2 positive +# secp256k1's scalar field modulus is a full 256 bits (top limb = 0xFFFF...), +# exceeding MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62). AUDITTODO: explain *exactly* the rounding issues that force this. +# This requires: +# - 2^384 shift instead of 2^256 (a >>256 shift loses precision for 256-bit moduli) +# - 4-limb endo_g constants (lo/mid/hi/hihi) +# - Montgomery field multiplication in split_into_endomorphism_scalars_384 + +# The scalar field modulus of secp256k1 (from secp256k1.hpp, FrParams) +secp_r = ( + 0xBFD25E8CD0364141 | + (0xBAAEDCE6AF48A03B << 64) | + (0xFFFFFFFFFFFFFFFE << 128) | + (0xFFFFFFFFFFFFFFFF << 192) +) + +# Montgomery parameter for secp256k1 Fr: R = 2^256 mod r +secp_R = pow(2, 256, secp_r) +secp_R_inv = pow(secp_R, -1, secp_r) + +# The cube root of unity λ ∈ secp256k1::Fr (from secp256k1.hpp FrParams), in Montgomery form +secp_cube_root_montgomery = ( + 0xf07deb3dc9926c9e | + (0x2c93e7ad83c6944c << 64) | + (0x73a9660652697d91 << 128) | + (0x532840178558d639 << 192) +) + +# Convert from Montgomery form to standard form +secp_lambda = (secp_cube_root_montgomery * secp_R_inv) % secp_r + +# Verify that λ is a non-trivial cube root of unity in secp256k1 Fr +assert (pow(secp_lambda, 2, secp_r) + secp_lambda + 1) % secp_r == 0, "λ² + λ + 1 ≡ 0 (mod r)" + + +# ==================================================================================== +# § 11. secp256k1 Fr — LATTICE BASIS +# ==================================================================================== +# +# See §0 for why these vectors are ~126–129 bits (unlike BN254's 64/127 pattern). + +secp_a1, secp_b1, secp_a2, secp_b2 = find_short_lattice_basis(secp_lambda, secp_r) + +# Verify lattice membership +assert (secp_a1 + secp_lambda * secp_b1) % secp_r == 0, "secp256k1 lattice vector 1" +assert (secp_a2 + secp_lambda * secp_b2) % secp_r == 0, "secp256k1 lattice vector 2" + +# Verify determinant +secp_det = secp_a1 * secp_b2 - secp_a2 * secp_b1 +assert abs(secp_det) == secp_r, "secp256k1 lattice determinant must be ±r" + + +# ==================================================================================== +# § 12. secp256k1 Fr — PRECOMPUTED CONSTANTS (384-bit shift) +# ==================================================================================== +# +# In the 384-bit code, the naming is "cross-paired" — g1 is paired with minus_b1, +# and g2 is paired with b2 (the opposite of what you might expect): +# +# endo_g1 = ⌈b2 · 2^384 / r⌉ +# endo_g2 = ⌊(-b1) · 2^384 / r⌋ +# +# Note: secp256k1_endo_notes.hpp uses the opposite naming convention for g1/g2, +# but the STORED values in FrParams follow this cross-paired convention. + +def compute_splitting_constants_384(modulus, b1, b2): + """ + Compute the precomputed constants for the 384-bit shift variant. + + Returns (endo_g1, endo_g2, endo_minus_b1, endo_b2) matching the hpp file. + + Convention: endo_g1 is the b2-based approximation (cross-paired with minus_b1), + endo_g2 is the (-b1)-based approximation (cross-paired with b2). + """ + shift = 1 << 384 + # endo_g1 = ceil(b2 * 2^384 / r) — cross-paired with minus_b1 in the algorithm + endo_g1 = -((-b2 * shift) // modulus) + # endo_g2 = floor((-b1) * 2^384 / r) — cross-paired with b2 in the algorithm + endo_g2 = ((-b1) * shift) // modulus + endo_minus_b1 = (-b1) % modulus + endo_b2 = b2 % modulus + return endo_g1, endo_g2, endo_minus_b1, endo_b2 + + +secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 = compute_splitting_constants_384( + secp_r, secp_b1, secp_b2 +) + +# Verify these match the values in secp256k1.hpp (FrParams) +# endo_g1 is stored as (lo, mid, hi, hihi) — 4 × 64-bit limbs +secp_expected_endo_g1 = ( + 0xE893209A45DBB031 | + (0x3DAA8A1471E8CA7F << 64) | + (0xE86C90E49284EB15 << 128) | + (0x3086D221A7D46BCD << 192) +) +secp_expected_endo_g2 = ( + 0x1571B4AE8AC47F71 | + (0x221208AC9DF506C6 << 64) | + (0x6F547FA90ABFE4C4 << 128) | + (0xE4437ED6010E8828 << 192) +) +secp_expected_endo_minus_b1 = 0x6F547FA90ABFE4C3 | (0xE4437ED6010E8828 << 64) +secp_expected_endo_b2 = 0xe86c90e49284eb15 | (0x3086d221a7d46bcd << 64) + +assert secp_endo_g1 == secp_expected_endo_g1, ( + f"secp256k1 endo_g1 mismatch:\n got {hex(secp_endo_g1)}\n expected {hex(secp_expected_endo_g1)}" +) +assert secp_endo_g2 == secp_expected_endo_g2, ( + f"secp256k1 endo_g2 mismatch:\n got {hex(secp_endo_g2)}\n expected {hex(secp_expected_endo_g2)}" +) +assert secp_endo_minus_b1 == secp_expected_endo_minus_b1, ( + f"secp256k1 endo_minus_b1 mismatch:\n got {hex(secp_endo_minus_b1)}\n expected {hex(secp_expected_endo_minus_b1)}" +) +assert secp_endo_b2 == secp_expected_endo_b2, ( + f"secp256k1 endo_b2 mismatch:\n got {hex(secp_endo_b2)}\n expected {hex(secp_expected_endo_b2)}" +) + + +# ==================================================================================== +# § 13. secp256k1 Fr — THE 384-BIT SPLITTING ALGORITHM +# ==================================================================================== +# +# Unlike the 256-bit variant, there is no explicit negative-k2 fix — field +# subtraction handles signs. The c1, c2 values are converted to Montgomery form +# and multiplied via field ops (which reduce mod r automatically). +# +# ALGORITHM (split_into_endomorphism_scalars_384 in field_declarations.hpp): +# +# 1. c1 = (endo_g1 · k) >> 384, c2 = (endo_g2 · k) >> 384 +# 2. r2f = c1·(-b1) - c2·b2 (cross-products, computed as field elements) +# 3. r1f = k - r2f·λ +# 4. k1 = r1f, k2 = -r2f + +def split_scalar_384(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2): + """ + Split scalar k using the 384-bit shift variant. + + Implements split_into_endomorphism_scalars_384() in field_declarations.hpp. + + Returns: + (k1, k2): The split scalars such that k ≡ k1 - λ·k2 (mod r) + """ + input_val = k % modulus + + # c1 ≈ k·b2/r, c2 ≈ k·(-b1)/r + c1 = (endo_g1 * input_val) >> 384 + c2 = (endo_g2 * input_val) >> 384 + + # Cross-products (computed as field elements in C++ via Montgomery) + c1_times_minus_b1 = (c1 * endo_minus_b1) % modulus + c2_times_b2 = (c2 * endo_b2) % modulus + + # r2f = c1·(-b1) - c2·b2 (nearly cancels, leaving small lattice error) + r2f = (c1_times_minus_b1 - c2_times_b2) % modulus + + # r1f = k - r2f·λ + r1f = (input_val - r2f * lambda_val) % modulus + + # k1 = r1f, k2 = -r2f; invariant: k ≡ k1 - λ·k2 (mod r) + k1 = r1f + k2 = (-r2f) % modulus + + return k1, k2 + + +# ==================================================================================== +# § 14. secp256k1 Fr — SPLITTING VERIFICATION +# ==================================================================================== + +def verify_split_384(k, k1, k2, lambda_val, modulus): + """Verify correctness of the 384-bit scalar split.""" + # The invariant is k ≡ k1 - λ·k2 (mod r) + reconstructed = (k1 - lambda_val * k2) % modulus + assert reconstructed == k % modulus, ( + f"k ≡ k1 - λ·k2 failed for k={hex(k)}\n" + f" k1={hex(k1)}, k2={hex(k2)}\n" + f" reconstructed={hex(reconstructed)}, expected={hex(k % modulus)}" + ) + # For the 384-bit variant, k1 and k2 are field elements; they should be small + # enough that the decomposition is useful. We verify they fit in ~129 bits. + # (The C++ code does not explicitly truncate to 128 bits in this path; + # the values may be slightly larger than in the 256-bit path.) + k1_eff = k1 if k1 <= modulus // 2 else modulus - k1 + k2_eff = k2 if k2 <= modulus // 2 else modulus - k2 + assert k1_eff.bit_length() <= 129, ( + f"k1 effective magnitude has {k1_eff.bit_length()} bits (> 129) for k={hex(k)}" + ) + assert k2_eff.bit_length() <= 129, ( + f"k2 effective magnitude has {k2_eff.bit_length()} bits (> 129) for k={hex(k)}" + ) + + +for k_test in [0, 1, 42, secp_lambda, secp_r - 1, secp_r // 2, secp_r // 3]: + k1, k2 = split_scalar_384( + k_test, secp_r, secp_lambda, secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 + ) + verify_split_384(k_test, k1, k2, secp_lambda, secp_r) + + +# Also verify with the cube root of unity in the BASE field (secp256k1 Fq). +# The base field Fq of secp256k1 has modulus p = 2^256 - 2^32 - 977, which also +# has a cube root of unity β. This β is what gets multiplied with the x-coordinate +# in the endomorphism φ(x,y) = (β·x, y). Let's verify it. + +secp_fq_modulus = ( + 0xFFFFFFFEFFFFFC2F | + (0xFFFFFFFFFFFFFFFF << 64) | + (0xFFFFFFFFFFFFFFFF << 128) | + (0xFFFFFFFFFFFFFFFF << 192) +) + +secp_fq_R = pow(2, 256, secp_fq_modulus) +secp_fq_R_inv = pow(secp_fq_R, -1, secp_fq_modulus) + +secp_fq_cube_root_montgomery = ( + 0x58a4361c8e81894e | + (0x03fde1631c4b80af << 64) | + (0xf8e98978d02e3905 << 128) | + (0x7a4a36aebcbb3d53 << 192) +) + +secp_fq_beta = (secp_fq_cube_root_montgomery * secp_fq_R_inv) % secp_fq_modulus +assert pow(secp_fq_beta, 3, secp_fq_modulus) == 1, "β³ ≡ 1 (mod p) for secp256k1 Fq" +assert secp_fq_beta != 1, "β must be non-trivial" + + +# ==================================================================================== +# § 15. SUMMARY +# ==================================================================================== # -# Performance: Instead of one 254-bit scalar multiplication k·P, we compute -# k1·P - k2·φ(P) with two ~127-bit scalars processed via interleaved WNAF, -# reducing the number of doublings by roughly half. +# Derived and verified GLV endomorphism constants for: +# - BN254 Fr (§1–§5): 254-bit, 256-bit shift, constants match bn254/fr.hpp +# - BN254 Fq (§6–§9): 254-bit, 256-bit shift, constants match bn254/fq.hpp +# - secp256k1 Fr (§10–§14): 256-bit, 384-bit shift, constants match secp256k1.hpp +# - secp256k1 Fq cube root β also verified (end of Part III) # -# References: -# • Gallant, Lambert, Vanstone: "Faster Point Multiplication on Elliptic Curves -# with Efficient Endomorphisms", CRYPTO 2001 -# • https://www.iacr.org/archive/crypto2001/21390189.pdf +# Architectural split: MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62) determines whether +# split_into_endomorphism_scalars (256-bit) or _384 (384-bit) is used. if __name__ == "__main__": - print("BN254 GLV Endomorphism - All verifications passed!") - print(f" λ (cube root of unity): {hex(lambda_val)}") - print(f" endo_g1: {hex(endo_g1)}") - print(f" endo_g2: {hex(endo_g2)}") + print("=== Part I: BN254 Fr ===") + print(f" λ (cube root): {hex(lambda_val)}") + print(f" endo_g1: {hex(endo_g1)}") + print(f" endo_g2: {hex(endo_g2)}") print(f" endo_minus_b1: {hex(endo_minus_b1)}") - print(f" endo_b2: {hex(endo_b2)}") - print("\nConstants match barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr.hpp") + print(f" endo_b2: {hex(endo_b2)}") + print(" -> Constants match bn254/fr.hpp") + + print("\n=== Part II: BN254 Fq ===") + print(f" β (cube root): {hex(fq_beta)}") + print(f" endo_g1: {hex(fq_endo_g1)}") + print(f" endo_g2: {hex(fq_endo_g2)}") + print(f" endo_minus_b1: {hex(fq_endo_minus_b1)}") + print(f" endo_b2: {hex(fq_endo_b2)}") + print(" -> Constants match bn254/fq.hpp") + + print("\n=== Part III: secp256k1 Fr ===") + print(f" λ (cube root): {hex(secp_lambda)}") + print(f" endo_g1: {hex(secp_endo_g1)}") + print(f" endo_g2: {hex(secp_endo_g2)}") + print(f" endo_minus_b1: {hex(secp_endo_minus_b1)}") + print(f" endo_b2: {hex(secp_endo_b2)}") + print(" -> Constants match secp256k1.hpp FrParams") + + print("\n=== secp256k1 Fq (base field) ===") + print(f" β (cube root): {hex(secp_fq_beta)}") + print(" -> Cube root verified") + + print("\nAll verifications passed!") From f9be7cafedd9a9a0eaae68a8baeaa4429c18ce6b Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 24 Feb 2026 12:04:55 +0000 Subject: [PATCH 5/8] unified interfaces for BN fields and secp256k1, removed the "384-bit" branch. --- .../ecc/curves/secp256k1/secp256k1.hpp | 17 +- .../curves/secp256k1/secp256k1_endo_notes.hpp | 6 +- .../ecc/fields/endomorphism_scalars.py | 250 +++++++++--------- .../ecc/fields/field_declarations.hpp | 173 ++++++------ 4 files changed, 207 insertions(+), 239 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp index 3e68f5e2bbec..323c002898d6 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp @@ -217,15 +217,14 @@ struct FrParams { static constexpr uint64_t endo_b2_lo = 0xe86c90e49284eb15ULL; static constexpr uint64_t endo_b2_mid = 0x3086d221a7d46bcdULL; - static constexpr uint64_t endo_g1_lo = 0xE893209A45DBB031ULL; - static constexpr uint64_t endo_g1_mid = 0x3DAA8A1471E8CA7FULL; - static constexpr uint64_t endo_g1_hi = 0xE86C90E49284EB15ULL; - static constexpr uint64_t endo_g1_hihi = 0x3086D221A7D46BCDULL; - - static constexpr uint64_t endo_g2_lo = 0x1571B4AE8AC47F71ULL; - static constexpr uint64_t endo_g2_mid = 0x221208AC9DF506C6ULL; - static constexpr uint64_t endo_g2_hi = 0x6F547FA90ABFE4C4ULL; - static constexpr uint64_t endo_g2_hihi = 0xE4437ED6010E8828ULL; + // 256-bit-shift constants: g1 = floor((-b1) * 2^256 / r), g2 = floor(b2 * 2^256 / r) + // See endomorphism_scalars.py compute_splitting_constants() for derivation. + static constexpr uint64_t endo_g1_lo = 0x6F547FA90ABFE4C4ULL; + static constexpr uint64_t endo_g1_mid = 0xE4437ED6010E8828ULL; + static constexpr uint64_t endo_g1_hi = 0x0ULL; + + static constexpr uint64_t endo_g2_lo = 0xE86C90E49284EB15ULL; + static constexpr uint64_t endo_g2_mid = 0x3086D221A7D46BCDULL; // Not used in secp256k1 static constexpr uint64_t primitive_root_0 = 0UL; diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp index 55de295e6cc0..2e42c954b2e9 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp @@ -14,11 +14,9 @@ struct basis_vectors { uint64_t endo_g1_lo = 0; uint64_t endo_g1_mid = 0; uint64_t endo_g1_hi = 0; - uint64_t endo_g1_hihi = 0; uint64_t endo_g2_lo = 0; uint64_t endo_g2_mid = 0; uint64_t endo_g2_hi = 0; - uint64_t endo_g2_hihi = 0; uint64_t endo_minus_b1_lo = 0; uint64_t endo_minus_b1_mid = 0; uint64_t endo_b2_lo = 0; @@ -108,7 +106,7 @@ struct basis_vectors { } uint512_t minus_b1 = -b1; - uint512_t shift256 = uint512_t(1) << 384; + uint512_t shift256 = uint512_t(1) << 256; uint512_t g1 = (-b1 * shift256) / uint512_t(secp256k1::fr::modulus); uint512_t g2 = (b2 * shift256) / uint512_t(secp256k1::fr::modulus); @@ -116,11 +114,9 @@ struct basis_vectors { result.endo_g1_lo = g1.lo.data[0]; result.endo_g1_mid = g1.lo.data[1]; result.endo_g1_hi = g1.lo.data[2]; - result.endo_g1_hihi = g1.lo.data[3]; result.endo_g2_lo = g2.lo.data[0]; result.endo_g2_mid = g2.lo.data[1]; result.endo_g2_hi = g2.lo.data[2]; - result.endo_g2_hihi = g2.lo.data[3]; result.endo_minus_b1_lo = minus_b1.lo.data[0]; result.endo_minus_b1_mid = minus_b1.lo.data[1]; result.endo_b2_lo = b2.lo.data[0]; diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py index 5f0152d2f5e3..c3a139bdf265 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -8,7 +8,7 @@ Part 0 (§0): Preliminaries — the GLV lattice and how to find a short basis Part I (§1–§5): BN254 Fr — the scalar field of BN254 (254-bit, uses 2^256 shift) Part II (§6–§9): BN254 Fq — the base field of BN254 (254-bit, uses 2^256 shift) - Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, uses 2^384 shift) + Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, now uses 2^256 shift) Reference: Gallant, Lambert, Vanstone, "Faster Point Multiplication on Elliptic Curves" (2001) @@ -92,24 +92,24 @@ def find_short_lattice_basis(lambda_val, modulus): # prev_coeff - quot·coeff. It follows that (-remainder, coeff) is always # a lattice vector: -remainder + λ·coeff ≡ -coeff·λ + coeff·λ ≡ 0. remainder, prev_remainder = lambda_val, modulus - coeff, prev_coeff = 1, 0 + coeff, prev_coeff = 1, 0 # Run until the remainder first drops below √p. while remainder >= approx_sqrt: - quot = prev_remainder // remainder + quot = prev_remainder // remainder prev_remainder, remainder = remainder, prev_remainder - quot * remainder - prev_coeff, coeff = coeff, prev_coeff - quot * coeff + prev_coeff, coeff = coeff, prev_coeff - quot * coeff # At this point: - # vec_before = (-prev_remainder, prev_coeff) — last step above √p - # vec_cross = (-remainder, coeff) — first step below √p + # vec_before = (-prev_remainder, prev_coeff) — last step above √p + # vec_cross = (-remainder, coeff) — first step below √p vec_before = (-prev_remainder, prev_coeff) - vec_cross = (-remainder, coeff) + vec_cross = (-remainder, coeff) # One more EEA step gives an independent candidate vector. - quot = prev_remainder // remainder - r_after = prev_remainder - quot * remainder - s_after = prev_coeff - quot * coeff + quot = prev_remainder // remainder + r_after = prev_remainder - quot * remainder + s_after = prev_coeff - quot * coeff vec_after = (-r_after, s_after) # First basis vector: vec_cross (shortest, by construction). @@ -126,8 +126,49 @@ def find_short_lattice_basis(lambda_val, modulus): return a1, b1, a2, b2 +# ==================================================================================== +# § 0a. THE 256-BIT-SHIFT APPROXIMATION — ERROR BOUND PROOF +# ==================================================================================== +# +# CLAIM: For any prime r < 2^256 and any b with |b| < r, define g = floor(b * 2^256 / r). +# Then for every k in [0, r): +# +# floor(g * k / 2^256) ∈ { floor(b * k / r), floor(b * k / r) - 1 } +# +# i.e., the approximation error is in {0, -1}. This holds for ALL curves with +# r < 2^256 — BN254, secp256k1, and any other. +# (Note that we used to use a 384 bit shift for secp256k1.) +# +# PROOF: +# +# Write the Euclidean division of b * 2^256 by r: +# +# b * 2^256 = g * r + ε where 0 ≤ ε < r ...(1) +# +# Rearranging: g = (b * 2^256 - ε) / r. Multiply both sides by k: +# +# g * k = b * k * 2^256 / r - ε * k / r +# +# Dividing by 2^256: +# +# g * k / 2^256 = b * k / r - ε * k / (r * 2^256) ...(2) +# +# The correction term δ := ε * k / (r * 2^256) satisfies: +# +# 0 ≤ δ = ε * k / (r * 2^256) < r * r / (r * 2^256) +# = r / 2^256 < 1 ...(3) +# +# (using ε < r, k < r, and r < 2^256). +# +# From (2) and (3): +# +# b*k/r - 1 < g*k/2^256 ≤ b*k/r ...(4) +# +# Taking floors of (4): if b*k/r = q + f where q = floor(b*k/r) and 0 ≤ f < 1, +# then g*k/2^256 ∈ (q + f - 1, q + f], so floor(g*k/2^256) ∈ {q-1, q}. ∎ + # ╔══════════════════════════════════════════════════════════════════════════════╗ -# ║ PART I: BN254 Fr (Scalar Field) ║ +# ║ PART I: BN254 Fr (Scalar Field) ║ # ╚══════════════════════════════════════════════════════════════════════════════╝ # ==================================================================================== @@ -235,7 +276,8 @@ def compute_splitting_constants(modulus, b1, b2): # k2 = -δ1·|b1| + δ2·b2 where δ1, δ2 ∈ [0,1) are rounding errors. This is # negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63 for BN254, even tiny # δ1 can cause this. It happens at k ≈ ⌈m·2^256/endo_g2⌉ where c1 ticks up -# to m. Frequency: ~2^{-64} of all inputs. +# to m. Note that the ≈ means that it can happen for _many_ k around/slightly greater than that number. +# Frequency: ~2^{-64} of all inputs. # # FIX: When t1 > 128 bits (i.e. k2 < 0 wrapped mod r), add |b1|. This shifts # along the lattice vector (a1, b1), making k2 positive: @@ -333,7 +375,7 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # ╔══════════════════════════════════════════════════════════════════════════════╗ -# ║ PART II: BN254 Fq (Base Field) ║ +# ║ PART II: BN254 Fq (Base Field) ║ # ╚══════════════════════════════════════════════════════════════════════════════╝ # ==================================================================================== @@ -432,7 +474,7 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # ╔══════════════════════════════════════════════════════════════════════════════╗ -# ║ PART III: secp256k1 Fr (Scalar Field) ║ +# ║ PART III: secp256k1 Fr (Scalar Field) ║ # ╚══════════════════════════════════════════════════════════════════════════════╝ # ==================================================================================== @@ -440,11 +482,16 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # ==================================================================================== # # secp256k1's scalar field modulus is a full 256 bits (top limb = 0xFFFF...), -# exceeding MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62). AUDITTODO: explain *exactly* the rounding issues that force this. -# This requires: -# - 2^384 shift instead of 2^256 (a >>256 shift loses precision for 256-bit moduli) -# - 4-limb endo_g constants (lo/mid/hi/hihi) -# - Montgomery field multiplication in split_into_endomorphism_scalars_384 +# exceeding MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62). As proved in §0a, the +# 256-bit shift approximation is sufficient for ANY prime r < 2^256 — the +# error is bounded to {0, -1}. The old 384-bit path was only needed because +# the 256-bit C++ code truncated outputs to 128 bits, clipping the 129th bit +# that appears for ~26% of secp256k1 inputs. With full-width output, 256-bit +# shift works perfectly. +# +# For secp256k1, the large-modulus branch of split_into_endomorphism_scalars +# returns full field elements (not truncated to 128 bits). The caller +# (biggroup_nafs.hpp) handles signs by inspecting the MSB of k2. # The scalar field modulus of secp256k1 (from secp256k1.hpp, FrParams) secp_r = ( @@ -491,55 +538,26 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # ==================================================================================== -# § 12. secp256k1 Fr — PRECOMPUTED CONSTANTS (384-bit shift) +# § 12. secp256k1 Fr — PRECOMPUTED CONSTANTS (256-bit shift) # ==================================================================================== # -# In the 384-bit code, the naming is "cross-paired" — g1 is paired with minus_b1, -# and g2 is paired with b2 (the opposite of what you might expect): +# secp256k1 now uses the SAME 256-bit shift as BN254 (see §0a for the proof that +# this is sufficient for any r < 2^256). The naming convention matches BN254: # -# endo_g1 = ⌈b2 · 2^384 / r⌉ -# endo_g2 = ⌊(-b1) · 2^384 / r⌋ +# endo_g1 = ⌊(-b1) · 2^256 / r⌋ +# endo_g2 = ⌊b2 · 2^256 / r⌋ # -# Note: secp256k1_endo_notes.hpp uses the opposite naming convention for g1/g2, -# but the STORED values in FrParams follow this cross-paired convention. - -def compute_splitting_constants_384(modulus, b1, b2): - """ - Compute the precomputed constants for the 384-bit shift variant. +# We reuse compute_splitting_constants() from §3 — same function, same shift. - Returns (endo_g1, endo_g2, endo_minus_b1, endo_b2) matching the hpp file. - - Convention: endo_g1 is the b2-based approximation (cross-paired with minus_b1), - endo_g2 is the (-b1)-based approximation (cross-paired with b2). - """ - shift = 1 << 384 - # endo_g1 = ceil(b2 * 2^384 / r) — cross-paired with minus_b1 in the algorithm - endo_g1 = -((-b2 * shift) // modulus) - # endo_g2 = floor((-b1) * 2^384 / r) — cross-paired with b2 in the algorithm - endo_g2 = ((-b1) * shift) // modulus - endo_minus_b1 = (-b1) % modulus - endo_b2 = b2 % modulus - return endo_g1, endo_g2, endo_minus_b1, endo_b2 - - -secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 = compute_splitting_constants_384( +secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 = compute_splitting_constants( secp_r, secp_b1, secp_b2 ) # Verify these match the values in secp256k1.hpp (FrParams) -# endo_g1 is stored as (lo, mid, hi, hihi) — 4 × 64-bit limbs -secp_expected_endo_g1 = ( - 0xE893209A45DBB031 | - (0x3DAA8A1471E8CA7F << 64) | - (0xE86C90E49284EB15 << 128) | - (0x3086D221A7D46BCD << 192) -) -secp_expected_endo_g2 = ( - 0x1571B4AE8AC47F71 | - (0x221208AC9DF506C6 << 64) | - (0x6F547FA90ABFE4C4 << 128) | - (0xE4437ED6010E8828 << 192) -) +# endo_g1 is stored as (lo, mid, hi) — only 2 non-zero limbs for secp256k1 +# (hi = 0 because (-b1) * 2^256 / r fits in 128 bits for this curve) +secp_expected_endo_g1 = 0x6F547FA90ABFE4C4 | (0xE4437ED6010E8828 << 64) +secp_expected_endo_g2 = 0xE86C90E49284EB15 | (0x3086D221A7D46BCD << 64) secp_expected_endo_minus_b1 = 0x6F547FA90ABFE4C3 | (0xE4437ED6010E8828 << 64) secp_expected_endo_b2 = 0xe86c90e49284eb15 | (0x3086d221a7d46bcd << 64) @@ -558,48 +576,52 @@ def compute_splitting_constants_384(modulus, b1, b2): # ==================================================================================== -# § 13. secp256k1 Fr — THE 384-BIT SPLITTING ALGORITHM +# § 13. secp256k1 Fr — THE 256-BIT SPLITTING ALGORITHM # ==================================================================================== # -# Unlike the 256-bit variant, there is no explicit negative-k2 fix — field -# subtraction handles signs. The c1, c2 values are converted to Montgomery form -# and multiplied via field ops (which reduce mod r automatically). +# secp256k1 uses the same core computation as BN254 (compute_endomorphism_k2 in +# field_declarations.hpp), but WITHOUT the negative-k2 fix. The result k2 is a +# full field element (up to ~129 bits); the caller handles signs. # -# ALGORITHM (split_into_endomorphism_scalars_384 in field_declarations.hpp): +# ALGORITHM (the `else` branch of split_into_endomorphism_scalars in +# field_declarations.hpp, for large moduli): # -# 1. c1 = (endo_g1 · k) >> 384, c2 = (endo_g2 · k) >> 384 -# 2. r2f = c1·(-b1) - c2·b2 (cross-products, computed as field elements) -# 3. r1f = k - r2f·λ -# 4. k1 = r1f, k2 = -r2f +# 1. t1 = compute_endomorphism_k2(k) +# i.e. c1 = (endo_g2 · k) >> 256, c2 = (endo_g1 · k) >> 256 +# t1 = (c2·b2 - c1·(-b1)) mod r [= k2] +# 2. k2 = t1 +# 3. k1 = (t1·λ + k) mod r +# +# No negative-k2 fix: unlike BN254, we do NOT check if t1 > 128 bits. The +# biggroup code inspects the MSB of k2 to determine its sign. -def split_scalar_384(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2): +def split_scalar_secp256k1(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2): """ - Split scalar k using the 384-bit shift variant. + Split scalar k using the 256-bit shift for secp256k1. - Implements split_into_endomorphism_scalars_384() in field_declarations.hpp. + Implements the large-modulus branch of split_into_endomorphism_scalars() + in field_declarations.hpp. Returns: - (k1, k2): The split scalars such that k ≡ k1 - λ·k2 (mod r) + (k1, k2): Full field elements such that k ≡ k1 - λ·k2 (mod r). + k2 fits in ~129 bits; k1 fits in ~128 bits. """ input_val = k % modulus - # c1 ≈ k·b2/r, c2 ≈ k·(-b1)/r - c1 = (endo_g1 * input_val) >> 384 - c2 = (endo_g2 * input_val) >> 384 - - # Cross-products (computed as field elements in C++ via Montgomery) - c1_times_minus_b1 = (c1 * endo_minus_b1) % modulus - c2_times_b2 = (c2 * endo_b2) % modulus + # c1 = (g2 * k) >> 256, c2 = (g1 * k) >> 256 + c1 = (endo_g2 * input_val) >> 256 + c2 = (endo_g1 * input_val) >> 256 - # r2f = c1·(-b1) - c2·b2 (nearly cancels, leaving small lattice error) - r2f = (c1_times_minus_b1 - c2_times_b2) % modulus + # q1 = c1 * (-b1), q2 = c2 * b2 (raw integer multiply, low 256 bits) + q1_lo = (c1 * endo_minus_b1) & ((1 << 256) - 1) + q2_lo = (c2 * endo_b2) & ((1 << 256) - 1) - # r1f = k - r2f·λ - r1f = (input_val - r2f * lambda_val) % modulus + # t1 = (q2 - q1) mod r — this is k2 + t1 = (q2_lo - q1_lo) % modulus - # k1 = r1f, k2 = -r2f; invariant: k ≡ k1 - λ·k2 (mod r) - k1 = r1f - k2 = (-r2f) % modulus + # k1 = t1·λ + k (mod r) + k1 = (t1 * lambda_val + input_val) % modulus + k2 = t1 return k1, k2 @@ -608,8 +630,8 @@ def split_scalar_384(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, en # § 14. secp256k1 Fr — SPLITTING VERIFICATION # ==================================================================================== -def verify_split_384(k, k1, k2, lambda_val, modulus): - """Verify correctness of the 384-bit scalar split.""" +def verify_split_secp256k1(k, k1, k2, lambda_val, modulus): + """Verify correctness of the secp256k1 scalar split.""" # The invariant is k ≡ k1 - λ·k2 (mod r) reconstructed = (k1 - lambda_val * k2) % modulus assert reconstructed == k % modulus, ( @@ -617,10 +639,8 @@ def verify_split_384(k, k1, k2, lambda_val, modulus): f" k1={hex(k1)}, k2={hex(k2)}\n" f" reconstructed={hex(reconstructed)}, expected={hex(k % modulus)}" ) - # For the 384-bit variant, k1 and k2 are field elements; they should be small - # enough that the decomposition is useful. We verify they fit in ~129 bits. - # (The C++ code does not explicitly truncate to 128 bits in this path; - # the values may be slightly larger than in the 256-bit path.) + # k1 and k2 are full field elements. We verify their effective magnitudes + # fit in ~129 bits (the decomposition halves the scalar bit-length). k1_eff = k1 if k1 <= modulus // 2 else modulus - k1 k2_eff = k2 if k2 <= modulus // 2 else modulus - k2 assert k1_eff.bit_length() <= 129, ( @@ -632,38 +652,10 @@ def verify_split_384(k, k1, k2, lambda_val, modulus): for k_test in [0, 1, 42, secp_lambda, secp_r - 1, secp_r // 2, secp_r // 3]: - k1, k2 = split_scalar_384( + k1, k2 = split_scalar_secp256k1( k_test, secp_r, secp_lambda, secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 ) - verify_split_384(k_test, k1, k2, secp_lambda, secp_r) - - -# Also verify with the cube root of unity in the BASE field (secp256k1 Fq). -# The base field Fq of secp256k1 has modulus p = 2^256 - 2^32 - 977, which also -# has a cube root of unity β. This β is what gets multiplied with the x-coordinate -# in the endomorphism φ(x,y) = (β·x, y). Let's verify it. - -secp_fq_modulus = ( - 0xFFFFFFFEFFFFFC2F | - (0xFFFFFFFFFFFFFFFF << 64) | - (0xFFFFFFFFFFFFFFFF << 128) | - (0xFFFFFFFFFFFFFFFF << 192) -) - -secp_fq_R = pow(2, 256, secp_fq_modulus) -secp_fq_R_inv = pow(secp_fq_R, -1, secp_fq_modulus) - -secp_fq_cube_root_montgomery = ( - 0x58a4361c8e81894e | - (0x03fde1631c4b80af << 64) | - (0xf8e98978d02e3905 << 128) | - (0x7a4a36aebcbb3d53 << 192) -) - -secp_fq_beta = (secp_fq_cube_root_montgomery * secp_fq_R_inv) % secp_fq_modulus -assert pow(secp_fq_beta, 3, secp_fq_modulus) == 1, "β³ ≡ 1 (mod p) for secp256k1 Fq" -assert secp_fq_beta != 1, "β must be non-trivial" - + verify_split_secp256k1(k_test, k1, k2, secp_lambda, secp_r) # ==================================================================================== # § 15. SUMMARY @@ -672,11 +664,15 @@ def verify_split_384(k, k1, k2, lambda_val, modulus): # Derived and verified GLV endomorphism constants for: # - BN254 Fr (§1–§5): 254-bit, 256-bit shift, constants match bn254/fr.hpp # - BN254 Fq (§6–§9): 254-bit, 256-bit shift, constants match bn254/fq.hpp -# - secp256k1 Fr (§10–§14): 256-bit, 384-bit shift, constants match secp256k1.hpp -# - secp256k1 Fq cube root β also verified (end of Part III) -# -# Architectural split: MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62) determines whether -# split_into_endomorphism_scalars (256-bit) or _384 (384-bit) is used. +# - secp256k1 Fr (§10–§14): 256-bit, 256-bit shift, constants match secp256k1.hpp +# +# ALL curves use the same 256-bit shift (see §0a for the proof that this is +# sufficient for any r < 2^256). MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62) +# determines whether the 128-bit pair path (with negative-k2 fix) or the +# full-width path (no fix, caller handles signs) is used. The former is ONLY +# called for BN254 fields, as their lattice basis is unusually short and hence +# the splitting scalars comfortably fit in 128 bits; in general, for 256-bit fields, +# k1 and k2 will have ~ 128 or 129 bits. if __name__ == "__main__": print("=== Part I: BN254 Fr ===") diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index ed47d11f06b7..b83fdb489273 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -45,6 +45,11 @@ namespace bb { // // In Barretenberg, the main workhorse fields are the base and scalar fields of BN-254, which are "small" moduli: they // are each 254 bits. The field algorithms for them are constant-time. +// +// NOTE: For the 254-bit fields in Barretenberg, namely BN254 base and scalar fields, we also +// use this constexpr branching to capture another (conceptually unrelated) property: that +// the short basis of the lattice from the endomorphism is shorter than expected. See endomorphism_scalars.py for more +// information. static constexpr uint64_t MODULUS_TOP_LIMB_LARGE_THRESHOLD = 0x4000000000000000ULL; // 2^62 /** @@ -466,71 +471,90 @@ template struct alignas(32) field { * We pre-compute scalars g1 = (2^256 * b1) / n, g2 = (2^256 * b2) / n, to avoid having to perform long division * on 512-bit scalars **/ - static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2) - { - // if the modulus is a >= 255-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384 - if constexpr (Params::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) { - split_into_endomorphism_scalars_384(k, k1, k2); - } else { - std::pair, std::array> ret = split_into_endomorphism_scalars(k); - k1.data[0] = ret.first[0]; - k1.data[1] = ret.first[1]; - -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - k2.data[0] = ret.second[0]; // NOLINT - k2.data[1] = ret.second[1]; -#if !defined(__clang__) && defined(__GNUC__) -#pragma GCC diagnostic pop -#endif - } - } - - // NOTE: this form is only usable if the modulus is 254 bits or less, otherwise see - // split_into_endomorphism_scalars_384. - // DOES NOT assume that the input is reduced; it can be in coarse form. - // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): Unify these APIs. - static std::pair, std::array> split_into_endomorphism_scalars(const field& k) + /** + * @brief Shared core of the endomorphism scalar decomposition. + * + * Computes k2 = round(b2·k/r)·(-b1) + round((-b1)·k/r)·b2, using the + * 256-bit-shift approximation g = floor(b·2^256/r) for both BN254 and + * secp256k1. See endomorphism_scalars.py §0 for the proof that the + * approximation error is bounded to {0, -1} for any r < 2^256. + * + * The result is a raw (non-Montgomery) `field` whose low 128-or-129 bits + * hold k2. This function will be called in either the BN254 base/scalar field + * or the generic, secp256k1 branch. + */ + static field compute_endomorphism_k2(const field& input) { - static_assert(Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD); - field input = k.reduce_once(); - constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 }; - constexpr field endo_g2 = { Params::endo_g2_lo, Params::endo_g2_mid, 0, 0 }; - constexpr field endo_minus_b1 = { Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, 0, 0 }; - constexpr field endo_b2 = { Params::endo_b2_lo, Params::endo_b2_mid, 0, 0 }; - // compute c1 = (g2 * k) >> 256 + // c1 = (g2 * k) >> 256, c2 = (g1 * k) >> 256 wide_array c1 = endo_g2.mul_512(input); - // compute c2 = (g1 * k) >> 256 wide_array c2 = endo_g1.mul_512(input); - // (the bit shifts are implicit, as we only utilize the high limbs of c1, c2 + // extract high halves + field c1_hi{ c1.data[4], c1.data[5], c1.data[6], c1.data[7] }; + field c2_hi{ c2.data[4], c2.data[5], c2.data[6], c2.data[7] }; - field c1_hi = { - c1.data[4], c1.data[5], c1.data[6], c1.data[7] - }; // *(field*)((uintptr_t)(&c1) + (4 * sizeof(uint64_t))); - field c2_hi = { - c2.data[4], c2.data[5], c2.data[6], c2.data[7] - }; // *(field*)((uintptr_t)(&c2) + (4 * sizeof(uint64_t))); - - // compute q1 = c1 * -b1 + // q1 = c1 * (-b1), q2 = c2 * b2 wide_array q1 = c1_hi.mul_512(endo_minus_b1); - // compute q2 = c2 * b2 wide_array q2 = c2_hi.mul_512(endo_b2); - // FIX: Avoid using 512-bit multiplication as its not necessary. - // c1_hi, c2_hi can be uint256_t's and the final result (without montgomery reduction) - // could be casted to a field. field q1_lo{ q1.data[0], q1.data[1], q1.data[2], q1.data[3] }; field q2_lo{ q2.data[0], q2.data[1], q2.data[2], q2.data[3] }; - field t1 = (q2_lo - q1_lo).reduce_once(); + return (q2_lo - q1_lo).reduce_once(); + } + + /** + * @brief Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r). + * Modifies the field elements k1 and k2. + * + * For BN254 base/scalar fields, delegates to the 128-bit pair + * overload, which applies the negative-k2 fix. Returns k1, k2 in the low + * 2 limbs (upper limbs zeroed). Both fit in 128 bits. + * + * For generic 256-bit fields: returns k1, k2 as full field elements + * elements (non-Montgomery). k1 fits in ~128 bits; k2 fits in ~129 bits. + * No negative-k2 fix — the caller (biggroup_nafs.hpp) handles signs by + * inspecting the MSB of k2. + */ + static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2) + { + field input = k.reduce_once(); + if constexpr (Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD) { + // BN254 base or scalar field: use 128-bit path with negative-k2 fix. + auto ret = split_into_endomorphism_scalars(input); + k1 = { ret.first[0], ret.first[1], 0, 0 }; + k2 = { ret.second[0], ret.second[1], 0, 0 }; + } else { + // Large modulus (secp256k1): full-width path. + field t1 = compute_endomorphism_k2(input); + k2 = t1; + k1 = ((t1 * cube_root_of_unity()) + input).reduce_once(); + } + } + + /** + * @brief 128-bit endomorphism decomposition: k ≡ k1 - k2·λ (mod r). + * + * Returns { {k1_lo, k1_hi}, {k2_lo, k2_hi} } — each scalar as a pair of + * uint64_t representing its low 128 bits. Both k1 and k2 are guaranteed to + * fit in 128 bits (the negative-k2 fix ensures this for the ~2^{-64} of + * inputs where k2 would otherwise be slightly negative). + * + * Only valid for fields such that the splitting_scalars algorithm produces 128 bit outputs. In Barretenberg, these + * are just the base and scalar fields of BN254. These are the only "small modulus" fields, so we use a static + * assert to force this. + * + * Assumes that the input is in strict form, i.e., in the range [0, p). + */ + static std::pair, std::array> split_into_endomorphism_scalars(const field& k) + { + static_assert(Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD); + field t1 = compute_endomorphism_k2(k); // k2 (= t1) can be slightly negative for ~2^{-64} of inputs. // When negative, t1 = k2 + r is 254 bits (upper limbs nonzero). @@ -538,64 +562,17 @@ template struct alignas(32) field { // This shifts k2 by +|b1| (~127 bits, now positive) and k1 by -a1 (~64 bits), // keeping both within 128 bits. See endomorphism_scalars.py for more details. if (t1.data[2] != 0 || t1.data[3] != 0) { + constexpr field endo_minus_b1 = { Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, 0, 0 }; t1 = (t1 + endo_minus_b1).reduce_once(); } - field beta = cube_root_of_unity(); - field t2 = (t1 * beta + input).reduce_once(); + field t2 = ((t1 * cube_root_of_unity()) + k).reduce_once(); return { { t2.data[0], t2.data[1] }, { t1.data[0], t1.data[1] }, }; } - static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out) - { - constexpr field minus_b1f{ - Params::endo_minus_b1_lo, - Params::endo_minus_b1_mid, - 0, - 0, - }; - constexpr field b2f{ - Params::endo_b2_lo, - Params::endo_b2_mid, - 0, - 0, - }; - constexpr uint256_t g1{ - Params::endo_g1_lo, - Params::endo_g1_mid, - Params::endo_g1_hi, - Params::endo_g1_hihi, - }; - constexpr uint256_t g2{ - Params::endo_g2_lo, - Params::endo_g2_mid, - Params::endo_g2_hi, - Params::endo_g2_hihi, - }; - - field kf = input.reduce_once(); - uint256_t k{ kf.data[0], kf.data[1], kf.data[2], kf.data[3] }; - - uint512_t c1 = (uint512_t(k) * static_cast(g1)) >> 384; - uint512_t c2 = (uint512_t(k) * static_cast(g2)) >> 384; - - field c1f{ c1.lo.data[0], c1.lo.data[1], c1.lo.data[2], c1.lo.data[3] }; - field c2f{ c2.lo.data[0], c2.lo.data[1], c2.lo.data[2], c2.lo.data[3] }; - - c1f.self_to_montgomery_form(); - c2f.self_to_montgomery_form(); - c1f = c1f * minus_b1f; - c2f = c2f * b2f; - field r2f = c1f - c2f; - field beta = cube_root_of_unity(); - field r1f = input.reduce_once() - r2f * beta; - k1_out = r1f; - k2_out = -r2f; - } - // static constexpr auto coset_generators = compute_coset_generators(); // static constexpr std::array coset_generators = compute_coset_generators((1 << 30U)); From 6aefc4127fa895c3967ccd74388a7b9ba06a9df3 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Tue, 24 Feb 2026 16:47:03 +0000 Subject: [PATCH 6/8] fix the WASM issue by restructuring the `reduce_once()` calls. --- .../ecc/fields/field_declarations.hpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index b83fdb489273..3de47d38bb6d 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -483,8 +483,11 @@ template struct alignas(32) field { * hold k2. This function will be called in either the BN254 base/scalar field * or the generic, secp256k1 branch. */ - static field compute_endomorphism_k2(const field& input) + static field compute_endomorphism_k2(const field& k) { + // force into strict form. + field input = k.reduce_once(); + constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 }; constexpr field endo_g2 = { Params::endo_g2_lo, Params::endo_g2_mid, 0, 0 }; constexpr field endo_minus_b1 = { Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, 0, 0 }; @@ -523,17 +526,16 @@ template struct alignas(32) field { */ static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2) { - field input = k.reduce_once(); if constexpr (Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD) { - // BN254 base or scalar field: use 128-bit path with negative-k2 fix. - auto ret = split_into_endomorphism_scalars(input); + // BN254 base or scalar field: use path that corresponds to 128-bit outputs. + auto ret = split_into_endomorphism_scalars(k); k1 = { ret.first[0], ret.first[1], 0, 0 }; k2 = { ret.second[0], ret.second[1], 0, 0 }; } else { // Large modulus (secp256k1): full-width path. - field t1 = compute_endomorphism_k2(input); + field t1 = compute_endomorphism_k2(k); k2 = t1; - k1 = ((t1 * cube_root_of_unity()) + input).reduce_once(); + k1 = ((t1 * cube_root_of_unity()) + k).reduce_once(); } } @@ -549,7 +551,7 @@ template struct alignas(32) field { * are just the base and scalar fields of BN254. These are the only "small modulus" fields, so we use a static * assert to force this. * - * Assumes that the input is in strict form, i.e., in the range [0, p). + * Does NOT assume that the input is reduced */ static std::pair, std::array> split_into_endomorphism_scalars(const field& k) { From 0ae3cc1a5c811ba4192d6f67212c9c9a7fa01f31 Mon Sep 17 00:00:00 2001 From: notnotraju Date: Fri, 27 Feb 2026 11:04:50 +0000 Subject: [PATCH 7/8] updated the documentation to clarify when/how secp256k1 split scalars can be bigger than 2^128. --- .../ecc/fields/endomorphism_scalars.py | 79 ++++++++++++++----- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py index c3a139bdf265..5a07bf3ffe03 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -55,14 +55,20 @@ # down through √p and below. We stop at the first remainder r_j < √p and read off # two short lattice vectors from the Bézout coefficients at steps j−1 and j. # -# The resulting vector sizes depend on the specific λ and p: +# The resulting vector sizes are generically ~√p, but the exact sizes determine +# whether the split scalars k1, k2 fit in 128 bits: # -# • BN254 (Fr and Fq): the curve is constructed from a 63-bit parameter x, and the -# lattice vectors are a1 = b2 = 2x+1 (64 bits), |b1| = 6x²+2x (127 bits). -# This asymmetric 64/127-bit pattern is a consequence of the BN parametrisation. +# • BN254 (Fr and Fq): p is 254 bits, so √p ~ 127 bits. The lattice vectors +# have |b1| ≤ 127 bits, |b2| ≤ 64 bits (the asymmetry comes from the explicit BN). +# Since k2 = f1·|b1| - f2·b2 with f1,f2 ∈ [0,1), we get |k2| < 2^127, +# which fits comfortably in 128 bits with 1 bit of headroom. Similarly, +# |a1| = 64 bits and |a2| = 127 bits, so similar logic applies for k1. # -# • secp256k1 Fr: no small generating parameter; the lattice vectors are all in the -# generic ~126–129-bit range (roughly √p for a 256-bit prime). +# • secp256k1 Fr: p is 256 bits, so √p ~ 128 bits. The lattice basis has +# |b1| = 128 bits and |b2| = 126 bits. However, |a1| = 126 bits and |a2| +# = 129 bits. This implies that the naive bound only gives that |k1| +# ≤ 129 bits. Indeed, k1 is 129 bits ~25% of the time. It turns out that |k2| +# is 129 bits roughly 0.3% of the time. # from math import isqrt @@ -486,8 +492,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # 256-bit shift approximation is sufficient for ANY prime r < 2^256 — the # error is bounded to {0, -1}. The old 384-bit path was only needed because # the 256-bit C++ code truncated outputs to 128 bits, clipping the 129th bit -# that appears for ~26% of secp256k1 inputs. With full-width output, 256-bit -# shift works perfectly. +# that appears for ~25% of inputs (k1) and ~0.3% of inputs (k2). With +# full-width output, 256-bit shift works perfectly. # # For secp256k1, the large-modulus branch of split_into_endomorphism_scalars # returns full field elements (not truncated to 128 bits). The caller @@ -524,7 +530,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # § 11. secp256k1 Fr — LATTICE BASIS # ==================================================================================== # -# See §0 for why these vectors are ~126–129 bits (unlike BN254's 64/127 pattern). +# See §0 for the component sizes: |a1| = 126, |b1| = 128, |a2| = 129, |b2| = 126 bits +# (unlike BN254's asymmetric 64/127 pattern). secp_a1, secp_b1, secp_a2, secp_b2 = find_short_lattice_basis(secp_lambda, secp_r) @@ -580,8 +587,10 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # ==================================================================================== # # secp256k1 uses the same core computation as BN254 (compute_endomorphism_k2 in -# field_declarations.hpp), but WITHOUT the negative-k2 fix. The result k2 is a -# full field element (up to ~129 bits); the caller handles signs. +# field_declarations.hpp), but WITHOUT the negative-k2 fix. Both k1 and k2 can +# reach 129 bits (see §0 for why); the caller handles signs. This specifically means +# that if k2 is (naively) negative, it will be returned as r - k2, a 256-bit number, +# and the caller will then detect this and handle appropriately. # # ALGORITHM (the `else` branch of split_into_endomorphism_scalars in # field_declarations.hpp, for large moduli): @@ -604,7 +613,7 @@ def split_scalar_secp256k1(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_ Returns: (k1, k2): Full field elements such that k ≡ k1 - λ·k2 (mod r). - k2 fits in ~129 bits; k1 fits in ~128 bits. + Both k1 and k2 can reach ~129 bits (see §0). """ input_val = k % modulus @@ -669,10 +678,43 @@ def verify_split_secp256k1(k, k1, k2, lambda_val, modulus): # ALL curves use the same 256-bit shift (see §0a for the proof that this is # sufficient for any r < 2^256). MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62) # determines whether the 128-bit pair path (with negative-k2 fix) or the -# full-width path (no fix, caller handles signs) is used. The former is ONLY -# called for BN254 fields, as their lattice basis is unusually short and hence -# the splitting scalars comfortably fit in 128 bits; in general, for 256-bit fields, -# k1 and k2 will have ~ 128 or 129 bits. +# full-width path (no fix, caller handles signs) is used. The 128-bit path +# works for BN254 because its 254-bit modulus gives √r ~ 127 bits, leaving +# one bit of headroom below 128. For 256-bit moduli like secp256k1, √r ~ 128 +# bits leaves zero headroom: k1 exceeds 128 bits ~25% of the time (since +# |a2| = 129 bits) and k2 exceeds 128 bits ~0.3% of the time (see §16). + +# ==================================================================================== +# § 16. APPENDIX: 129-BIT SCALARS FOR secp256k1 +# ==================================================================================== +# +# Empirically verify the 129-bit overflow frequencies from §0 by calling +# split_scalar_secp256k1 (§13) on random inputs. + +def measure_overflow_frequency(n_samples=500_000): + """Measure the fraction of random scalars whose k1 or k2 exceeds 128 bits.""" + import random + rng = random.Random(42) + + count_k1 = 0 + count_k2 = 0 + for _ in range(n_samples): + k = rng.randrange(1, secp_r) + k1, k2 = split_scalar_secp256k1( + k, secp_r, secp_lambda, + secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 + ) + if k1.bit_length() > 128: + count_k1 += 1 + if k2 <= (secp_r - (1 << 127)) and k2.bit_length() > 128: # k2 is naively in the range (-2^127, 2^129). therefore we throw away when k2 is negative. + count_k2 += 1 + + pct_k1 = 100 * count_k1 / n_samples + pct_k2 = 100 * count_k2 / n_samples + print(f" k1 > 128 bits: {count_k1}/{n_samples} ({pct_k1:.1f}%)") + print(f" k2 positive and > 128 bits: {count_k2}/{n_samples} ({pct_k2:.2f}%)") + assert 20 < pct_k1 < 35, f"Expected ~25% for k1, got {pct_k1}%" + assert 0.1 < pct_k2 < 1.0, f"Expected ~0.3% for k2, got {pct_k2}%" if __name__ == "__main__": print("=== Part I: BN254 Fr ===") @@ -699,8 +741,7 @@ def verify_split_secp256k1(k, k1, k2, lambda_val, modulus): print(f" endo_b2: {hex(secp_endo_b2)}") print(" -> Constants match secp256k1.hpp FrParams") - print("\n=== secp256k1 Fq (base field) ===") - print(f" β (cube root): {hex(secp_fq_beta)}") - print(" -> Cube root verified") + print("\n=== Appendix: 129-bit overflow frequency (secp256k1) ===") + measure_overflow_frequency() print("\nAll verifications passed!") From 1784aa6d50607bdf69036462b191b60f9fe4d4fc Mon Sep 17 00:00:00 2001 From: notnotraju Date: Wed, 4 Mar 2026 13:38:57 +0000 Subject: [PATCH 8/8] updated endomorphism scalars documentation to explain why the short bases for fq and fr are so close to each other --- .../ecc/fields/endomorphism_scalars.py | 71 +++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py index 5a07bf3ffe03..69961ad7f210 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py @@ -5,10 +5,12 @@ This document explains the "splitting scalars" algorithm in Barretenberg for all curves that admit an efficient endomorphism. We cover: - Part 0 (§0): Preliminaries — the GLV lattice and how to find a short basis - Part I (§1–§5): BN254 Fr — the scalar field of BN254 (254-bit, uses 2^256 shift) - Part II (§6–§9): BN254 Fq — the base field of BN254 (254-bit, uses 2^256 shift) - Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, now uses 2^256 shift) + Part 0 (§0): Preliminaries — the GLV lattice and how to find a short basis + Part I (§1–§5): BN254 Fr — the scalar field of BN254 (254-bit, uses 2^256 shift) + Part II (§6–§9): BN254 Fq — the base field of BN254 (254-bit, uses 2^256 shift) + Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, now uses 2^256 shift) + Appendix (§16): 129-bit scalars for secp256k1 + Appendix (§17): Why the BN254 Fr and Fq lattice bases are nearly identical Reference: Gallant, Lambert, Vanstone, "Faster Point Multiplication on Elliptic Curves" (2001) @@ -181,8 +183,12 @@ def find_short_lattice_basis(lambda_val, modulus): # § 1. BN254 Fr — FIELD PARAMETERS # ==================================================================================== +# The BN parameter x (see §17 for why the Fr and Fq lattice bases are nearly identical) +x_bn = 4965661367192848881 # 0x44e992b44a6909f1, 63 bits + # The scalar field modulus of BN254 (from bn254/fr.hpp) r = 0x30644E72E131A029B85045B68181585D2833E84879B9709143E1F593F0000001 +assert r == 36*x_bn**4 + 36*x_bn**3 + 18*x_bn**2 + 6*x_bn + 1, "r = 36x⁴ + 36x³ + 18x² + 6x + 1" # Montgomery parameter: R = 2^256 mod r # This is needed because fr.hpp stores values in Montgomery form @@ -217,7 +223,13 @@ def find_short_lattice_basis(lambda_val, modulus): a2 = 0x6f4d8248eeb859fd0be4e1541221250b # 127 bits b2 = 0x89d3256894d213e3 # 64 bits -# NOTE: a remarkable feature of this short basis is that a1 == b2, and indeed -b1 is rather close to a2. +# NOTE: a1 == b2 (= 2x+1) and a2 ≈ |b1| (= 6x²+4x+1 vs 6x²+2x). +# This is a structural consequence of the BN parameterization; see §17 for the full explanation. +# In particular, we can verify these polynomial identities: +assert a1 == 2 * x_bn + 1, "a1 = 2x + 1" +assert b2 == 2 * x_bn + 1, "b2 = 2x + 1" +assert a2 == 6 * x_bn**2 + 4 * x_bn + 1, "a2 = 6x^2 + 4x + 1" +assert -b1 == 6 * x_bn**2 + 2 * x_bn, "|b1| = 6x^2 + 2x" # Verify that the vectors are in the lattice: ai + λ·bi ≡ 0 (mod r) assert (a1 + lambda_val * b1) % r == 0, "Lattice vector 1 must satisfy a1 + λ·b1 ≡ 0" @@ -275,7 +287,6 @@ def compute_splitting_constants(modulus, b1, b2): # ==================================================================================== # # Computes (k1, k2) with k ≡ k1 - λ·k2 (mod r) and |k1|, |k2| < 2^128. -# See §0 for the derivation (Babai's nearest plane). # # SUBTLETY — k2 CAN BE NEGATIVE: # @@ -393,6 +404,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): # The base field modulus of BN254 (from bn254/fq.hpp) fq_modulus = 0x30644E72E131A029B85045B68181585D97816A916871CA8D3C208C16D87CFD47 +assert fq_modulus == 36*x_bn**4 + 36*x_bn**3 + 24*x_bn**2 + 6*x_bn + 1, "q = 36x⁴ + 36x³ + 24x² + 6x + 1" +assert fq_modulus - r == 6 * x_bn**2, "q − r = 6x²" # Montgomery parameter for Fq: R = 2^256 mod q fq_R = pow(2, 256, fq_modulus) @@ -430,6 +443,18 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus): fq_det = fq_a1 * fq_b2 - fq_a2 * fq_b1 assert abs(fq_det) == fq_modulus, f"Fq lattice determinant must be ±q, got {fq_det}" +# Verify polynomial structure and near-identity with Fr basis (see §17 for explanation): +assert fq_a1 == 2 * x_bn, "Fq: a1 = 2x" +assert fq_b2 == 2 * x_bn, "Fq: b2 = 2x" +assert fq_a2 == 6 * x_bn**2 + 4 * x_bn + 1, "Fq: a2 = 6x² + 4x + 1 (same as Fr!)" +assert -fq_b1 == 6 * x_bn**2 + 2 * x_bn + 1, "Fq: |b1| = 6x² + 2x + 1" + +# The Fr and Fq bases differ by at most 1 in each component: +assert a1 - fq_a1 == 1, "a1: Fr has 2x+1, Fq has 2x" +assert b2 - fq_b2 == 1, "b2: Fr has 2x+1, Fq has 2x" +assert fq_a2 == a2, "a2: identical for both fields" +assert (-fq_b1) - (-b1) == 1, "|b1|: Fq has 6x²+2x+1, Fr has 6x²+2x" + # ==================================================================================== # § 8. BN254 Fq — PRECOMPUTED CONSTANTS AND VERIFICATION @@ -716,6 +741,40 @@ def measure_overflow_frequency(n_samples=500_000): assert 20 < pct_k1 < 35, f"Expected ~25% for k1, got {pct_k1}%" assert 0.1 < pct_k2 < 1.0, f"Expected ~0.3% for k2, got {pct_k2}%" +# ==================================================================================== +# § 17. APPENDIX: WHY THE BN254 Fr AND Fq LATTICE BASES ARE NEARLY IDENTICAL +# ==================================================================================== +# +# BN254 is parameterized by a single integer x = 0x44e992b44a6909f1 (63 bits). +# Both field primes are polynomials in x: +# +# r = 36x⁴ + 36x³ + 18x² + 6x + 1 (scalar field, Fr) +# q = 36x⁴ + 36x³ + 24x² + 6x + 1 (base field, Fq) +# +# They differ by q − r = 6x² ≈ 2¹²⁷, tiny relative to the 254-bit primes. +# +# The GLV lattice basis vectors turn out to be simple polynomials in x: +# +# Fr basis: (a1, b1) = (2x+1, −(6x²+2x)), (a2, b2) = (6x²+4x+1, 2x+1) +# Fq basis: (a1, b1) = (2x, −(6x²+2x+1)), (a2, b2) = (6x²+4x+1, 2x ) +# +# These are verified by the determinant identities: +# +# (2x+1)² + (6x²+4x+1)(6x²+2x) = 36x⁴ + 36x³ + 18x² + 6x + 1 = r +# (2x)² + (6x²+4x+1)(6x²+2x+1) = 36x⁴ + 36x³ + 24x² + 6x + 1 = q +# +# Notice a1 = b2 for both fields (the basis matrix is "almost symmetric"), and a2 +# is IDENTICAL for both. The only difference: Fr has the +1 on a1/b2, while Fq has +# the +1 on |b1|. All four components differ by at most 1 between Fr and Fq. +# +# Reference: The BN prime parameterization is from Barreto–Naehrig (2006). The +# polynomial structure of GLV short bases for CM curves is discussed in +# B. Smith, "Easy scalar decompositions for efficient scalar multiplication +# on elliptic curves and genus 2 Jacobians" (2013), eprint.iacr.org/2013/672. + +# ==================================================================================== +# § 18. Main function +# ==================================================================================== if __name__ == "__main__": print("=== Part I: BN254 Fr ===") print(f" λ (cube root): {hex(lambda_val)}")