diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp
index 3e68f5e2bbec..323c002898d6 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1.hpp
@@ -217,15 +217,14 @@ struct FrParams {
     static constexpr uint64_t endo_b2_lo = 0xe86c90e49284eb15ULL;
     static constexpr uint64_t endo_b2_mid = 0x3086d221a7d46bcdULL;
 
-    static constexpr uint64_t endo_g1_lo = 0xE893209A45DBB031ULL;
-    static constexpr uint64_t endo_g1_mid = 0x3DAA8A1471E8CA7FULL;
-    static constexpr uint64_t endo_g1_hi = 0xE86C90E49284EB15ULL;
-    static constexpr uint64_t endo_g1_hihi = 0x3086D221A7D46BCDULL;
-
-    static constexpr uint64_t endo_g2_lo = 0x1571B4AE8AC47F71ULL;
-    static constexpr uint64_t endo_g2_mid = 0x221208AC9DF506C6ULL;
-    static constexpr uint64_t endo_g2_hi = 0x6F547FA90ABFE4C4ULL;
-    static constexpr uint64_t endo_g2_hihi = 0xE4437ED6010E8828ULL;
+    // 256-bit-shift constants: g1 = floor((-b1) * 2^256 / r), g2 = floor(b2 * 2^256 / r)
+    // See endomorphism_scalars.py compute_splitting_constants() for derivation.
+    static constexpr uint64_t endo_g1_lo = 0x6F547FA90ABFE4C4ULL;
+    static constexpr uint64_t endo_g1_mid = 0xE4437ED6010E8828ULL;
+    static constexpr uint64_t endo_g1_hi = 0x0ULL;
+
+    static constexpr uint64_t endo_g2_lo = 0xE86C90E49284EB15ULL;
+    static constexpr uint64_t endo_g2_mid = 0x3086D221A7D46BCDULL;
 
     // Not used in secp256k1
     static constexpr uint64_t primitive_root_0 = 0UL;
diff --git a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp
index 55de295e6cc0..2e42c954b2e9 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/curves/secp256k1/secp256k1_endo_notes.hpp
@@ -14,11 +14,9 @@ struct basis_vectors {
     uint64_t endo_g1_lo = 0;
     uint64_t endo_g1_mid = 0;
     uint64_t endo_g1_hi = 0;
-    uint64_t endo_g1_hihi = 0;
     uint64_t endo_g2_lo = 0;
     uint64_t endo_g2_mid = 0;
     uint64_t endo_g2_hi = 0;
-    uint64_t endo_g2_hihi = 0;
     uint64_t endo_minus_b1_lo = 0;
     uint64_t endo_minus_b1_mid = 0;
     uint64_t endo_b2_lo = 0;
@@ -108,7 +106,7 @@ struct basis_vectors {
     }
 
     uint512_t minus_b1 = -b1;
-    uint512_t shift256 = uint512_t(1) << 384;
+    uint512_t shift256 = uint512_t(1) << 256;
     uint512_t g1 = (-b1 * shift256) / uint512_t(secp256k1::fr::modulus);
     uint512_t g2 = (b2 * shift256) / uint512_t(secp256k1::fr::modulus);
 
@@ -116,11 +114,9 @@ struct basis_vectors {
     result.endo_g1_lo = g1.lo.data[0];
     result.endo_g1_mid = g1.lo.data[1];
     result.endo_g1_hi = g1.lo.data[2];
-    result.endo_g1_hihi = g1.lo.data[3];
     result.endo_g2_lo = g2.lo.data[0];
     result.endo_g2_mid = g2.lo.data[1];
     result.endo_g2_hi = g2.lo.data[2];
-    result.endo_g2_hihi = g2.lo.data[3];
     result.endo_minus_b1_lo = minus_b1.lo.data[0];
     result.endo_minus_b1_mid = minus_b1.lo.data[1];
     result.endo_b2_lo = b2.lo.data[0];
diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py
index 5f0152d2f5e3..69961ad7f210 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py
+++ b/barretenberg/cpp/src/barretenberg/ecc/fields/endomorphism_scalars.py
@@ -5,10 +5,12 @@
 This document explains the "splitting scalars" algorithm in Barretenberg for all curves
 that admit an efficient endomorphism. We cover:
 
-  Part 0   (§0):     Preliminaries — the GLV lattice and how to find a short basis
-  Part I   (§1–§5):  BN254 Fr  — the scalar field of BN254 (254-bit, uses 2^256 shift)
-  Part II  (§6–§9):  BN254 Fq  — the base field of BN254 (254-bit, uses 2^256 shift)
-  Part III (§10–§14): secp256k1 Fr — the scalar field of secp256k1 (256-bit, uses 2^384 shift)
+  Part 0   (§0):      Preliminaries — the GLV lattice and how to find a short basis
+  Part I   (§1–§5):   BN254 Fr  — the scalar field of BN254 (254-bit, uses 2^256 shift)
+  Part II  (§6–§9):   BN254 Fq  — the base field of BN254 (254-bit, uses 2^256 shift)
+  Part III (§10–§14):  secp256k1 Fr — the scalar field of secp256k1 (256-bit, now uses 2^256 shift)
+  Appendix (§16):      129-bit scalars for secp256k1
+  Appendix (§17):      Why the BN254 Fr and Fq lattice bases are nearly identical
 
 Reference: Gallant, Lambert, Vanstone, "Faster Point Multiplication on Elliptic Curves" (2001)
 
@@ -55,14 +57,20 @@
 # down through √p and below.  We stop at the first remainder r_j < √p and read off
 # two short lattice vectors from the Bézout coefficients at steps j−1 and j.
 #
-# The resulting vector sizes depend on the specific λ and p:
+# The resulting vector sizes are generically ~√p, but the exact sizes determine
+# whether the split scalars k1, k2 fit in 128 bits:
 #
-#   • BN254 (Fr and Fq): the curve is constructed from a 63-bit parameter x, and the
-#     lattice vectors are a1 = b2 = 2x+1 (64 bits), |b1| = 6x²+2x (127 bits).
-#     This asymmetric 64/127-bit pattern is a consequence of the BN parametrisation.
+#   • BN254 (Fr and Fq): p is 254 bits, so √p ~ 127 bits.  The lattice vectors
+#     have |b1| ≤ 127 bits, |b2| ≤ 64 bits (the asymmetry comes from the explicit BN).
+#     Since k2 = f1·|b1| - f2·b2 with f1,f2 ∈ [0,1), we get |k2| < 2^127,
+#     which fits comfortably in 128 bits with 1 bit of headroom. Similarly,
+#     |a1|  = 64 bits and |a2| = 127 bits, so similar logic applies for k1.
 #
-#   • secp256k1 Fr: no small generating parameter; the lattice vectors are all in the
-#     generic ~126–129-bit range (roughly √p for a 256-bit prime).
+#   • secp256k1 Fr: p is 256 bits, so √p ~ 128 bits.  The lattice basis has
+#     |b1| = 128 bits and |b2| = 126 bits. However, |a1| = 126 bits and |a2|
+#     = 129 bits. This implies that the naive bound only gives that |k1|
+#     ≤ 129 bits. Indeed, k1 is 129 bits ~25% of the time. It turns out that |k2|
+#     is 129 bits roughly 0.3% of the time.
 #
 
 from math import isqrt
@@ -92,24 +100,24 @@ def find_short_lattice_basis(lambda_val, modulus):
     # prev_coeff - quot·coeff.  It follows that (-remainder, coeff) is always
     # a lattice vector: -remainder + λ·coeff ≡ -coeff·λ + coeff·λ ≡ 0.
     remainder, prev_remainder = lambda_val, modulus
-    coeff,     prev_coeff     = 1, 0
+    coeff, prev_coeff = 1, 0
 
     # Run until the remainder first drops below √p.
     while remainder >= approx_sqrt:
-        quot                      = prev_remainder // remainder
+        quot = prev_remainder // remainder
         prev_remainder, remainder = remainder, prev_remainder - quot * remainder
-        prev_coeff,     coeff     = coeff,     prev_coeff     - quot * coeff
+        prev_coeff, coeff = coeff, prev_coeff - quot * coeff
 
     # At this point:
-    #   vec_before = (-prev_remainder, prev_coeff)  — last step above √p
-    #   vec_cross  = (-remainder,      coeff)        — first step below √p
+    #   vec_before = (-prev_remainder, prev_coeff) — last step above √p
+    #   vec_cross = (-remainder, coeff) — first step below √p
     vec_before = (-prev_remainder, prev_coeff)
-    vec_cross  = (-remainder,      coeff)
+    vec_cross = (-remainder, coeff)
 
     # One more EEA step gives an independent candidate vector.
-    quot      = prev_remainder // remainder
-    r_after   = prev_remainder - quot * remainder
-    s_after   = prev_coeff     - quot * coeff
+    quot = prev_remainder // remainder
+    r_after = prev_remainder - quot * remainder
+    s_after = prev_coeff - quot * coeff
     vec_after = (-r_after, s_after)
 
     # First basis vector: vec_cross (shortest, by construction).
@@ -126,16 +134,61 @@ def find_short_lattice_basis(lambda_val, modulus):
     return a1, b1, a2, b2
 
 
+# ====================================================================================
+# § 0a. THE 256-BIT-SHIFT APPROXIMATION — ERROR BOUND PROOF
+# ====================================================================================
+#
+# CLAIM: For any prime r < 2^256 and any b with |b| < r, define g = floor(b * 2^256 / r).
+# Then for every k in [0, r):
+#
+#     floor(g * k / 2^256) ∈ { floor(b * k / r),  floor(b * k / r) - 1 }
+#
+# i.e., the approximation error is in {0, -1}.  This holds for ALL curves with
+# r < 2^256 — BN254, secp256k1, and any other.
+# (Note that we used to use a 384 bit shift for secp256k1.)
+#
+# PROOF:
+#
+# Write the Euclidean division of b * 2^256 by r:
+#
+#     b * 2^256 = g * r + ε        where  0 ≤ ε < r           ...(1)
+#
+# Rearranging:  g = (b * 2^256 - ε) / r.  Multiply both sides by k:
+#
+#     g * k = b * k * 2^256 / r  -  ε * k / r
+#
+# Dividing by 2^256:
+#
+#     g * k / 2^256 = b * k / r  -  ε * k / (r * 2^256)       ...(2)
+#
+# The correction term δ := ε * k / (r * 2^256) satisfies:
+#
+#     0 ≤ δ = ε * k / (r * 2^256) < r * r / (r * 2^256)
+#            = r / 2^256 < 1                                   ...(3)
+#
+# (using ε < r, k < r, and r < 2^256).
+#
+# From (2) and (3):
+#
+#     b*k/r - 1 < g*k/2^256 ≤ b*k/r                           ...(4)
+#
+# Taking floors of (4): if b*k/r = q + f where q = floor(b*k/r) and 0 ≤ f < 1,
+# then g*k/2^256 ∈ (q + f - 1, q + f], so floor(g*k/2^256) ∈ {q-1, q}.       ∎
+
 # ╔══════════════════════════════════════════════════════════════════════════════╗
-# ║              PART I: BN254 Fr (Scalar Field)                               ║
+# ║              PART I: BN254 Fr (Scalar Field)                                 ║
 # ╚══════════════════════════════════════════════════════════════════════════════╝
 
 # ====================================================================================
 # § 1. BN254 Fr — FIELD PARAMETERS
 # ====================================================================================
 
+# The BN parameter x (see §17 for why the Fr and Fq lattice bases are nearly identical)
+x_bn = 4965661367192848881  # 0x44e992b44a6909f1, 63 bits
+
 # The scalar field modulus of BN254 (from bn254/fr.hpp)
 r = 0x30644E72E131A029B85045B68181585D2833E84879B9709143E1F593F0000001
+assert r == 36*x_bn**4 + 36*x_bn**3 + 18*x_bn**2 + 6*x_bn + 1, "r = 36x⁴ + 36x³ + 18x² + 6x + 1"
 
 # Montgomery parameter: R = 2^256 mod r
 # This is needed because fr.hpp stores values in Montgomery form
@@ -170,7 +223,13 @@ def find_short_lattice_basis(lambda_val, modulus):
 a2 = 0x6f4d8248eeb859fd0be4e1541221250b  # 127 bits
 b2 = 0x89d3256894d213e3                   # 64 bits
 
-# NOTE: a remarkable feature of this short basis is that a1 == b2, and indeed -b1 is rather close to a2.
+# NOTE: a1 == b2 (= 2x+1) and a2 ≈ |b1| (= 6x²+4x+1 vs 6x²+2x).
+# This is a structural consequence of the BN parameterization; see §17 for the full explanation.
+# In particular, we can verify these polynomial identities:
+assert a1 == 2 * x_bn + 1, "a1 = 2x + 1"
+assert b2 == 2 * x_bn + 1, "b2 = 2x + 1"
+assert a2 == 6 * x_bn**2 + 4 * x_bn + 1, "a2 = 6x^2 + 4x + 1"
+assert -b1 == 6 * x_bn**2 + 2 * x_bn, "|b1| = 6x^2 + 2x"
 
 # Verify that the vectors are in the lattice: ai + λ·bi ≡ 0 (mod r)
 assert (a1 + lambda_val * b1) % r == 0, "Lattice vector 1 must satisfy a1 + λ·b1 ≡ 0"
@@ -228,14 +287,14 @@ def compute_splitting_constants(modulus, b1, b2):
 # ====================================================================================
 #
 # Computes (k1, k2) with k ≡ k1 - λ·k2 (mod r) and |k1|, |k2| < 2^128.
-# See §0 for the derivation (Babai's nearest plane).
 #
 # SUBTLETY — k2 CAN BE NEGATIVE:
 #
 # k2 = -δ1·|b1| + δ2·b2 where δ1, δ2 ∈ [0,1) are rounding errors. This is
 # negative when δ1·|b1| > δ2·b2. Since |b1|/b2 ≈ 2^63 for BN254, even tiny
 # δ1 can cause this. It happens at k ≈ ⌈m·2^256/endo_g2⌉ where c1 ticks up
-# to m. Frequency: ~2^{-64} of all inputs.
+# to m. Note that the ≈ means that it can happen for _many_ k around/slightly greater than that number.
+# Frequency: ~2^{-64} of all inputs.
 #
 # FIX: When t1 > 128 bits (i.e. k2 < 0 wrapped mod r), add |b1|. This shifts
 # along the lattice vector (a1, b1), making k2 positive:
@@ -333,7 +392,7 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 
 
 # ╔══════════════════════════════════════════════════════════════════════════════╗
-# ║                      PART II: BN254 Fq (Base Field)                        ║
+# ║                      PART II: BN254 Fq (Base Field)                          ║
 # ╚══════════════════════════════════════════════════════════════════════════════╝
 
 # ====================================================================================
@@ -345,6 +404,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 
 # The base field modulus of BN254 (from bn254/fq.hpp)
 fq_modulus = 0x30644E72E131A029B85045B68181585D97816A916871CA8D3C208C16D87CFD47
+assert fq_modulus == 36*x_bn**4 + 36*x_bn**3 + 24*x_bn**2 + 6*x_bn + 1, "q = 36x⁴ + 36x³ + 24x² + 6x + 1"
+assert fq_modulus - r == 6 * x_bn**2, "q − r = 6x²"
 
 # Montgomery parameter for Fq: R = 2^256 mod q
 fq_R = pow(2, 256, fq_modulus)
@@ -382,6 +443,18 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 fq_det = fq_a1 * fq_b2 - fq_a2 * fq_b1
 assert abs(fq_det) == fq_modulus, f"Fq lattice determinant must be ±q, got {fq_det}"
 
+# Verify polynomial structure and near-identity with Fr basis (see §17 for explanation):
+assert fq_a1 == 2 * x_bn, "Fq: a1 = 2x"
+assert fq_b2 == 2 * x_bn, "Fq: b2 = 2x"
+assert fq_a2 == 6 * x_bn**2 + 4 * x_bn + 1, "Fq: a2 = 6x² + 4x + 1 (same as Fr!)"
+assert -fq_b1 == 6 * x_bn**2 + 2 * x_bn + 1, "Fq: |b1| = 6x² + 2x + 1"
+
+# The Fr and Fq bases differ by at most 1 in each component:
+assert a1 - fq_a1 == 1, "a1: Fr has 2x+1, Fq has 2x"
+assert b2 - fq_b2 == 1, "b2: Fr has 2x+1, Fq has 2x"
+assert fq_a2 == a2, "a2: identical for both fields"
+assert (-fq_b1) - (-b1) == 1, "|b1|: Fq has 6x²+2x+1, Fr has 6x²+2x"
+
 
 # ====================================================================================
 # § 8. BN254 Fq — PRECOMPUTED CONSTANTS AND VERIFICATION
@@ -432,7 +505,7 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 
 
 # ╔══════════════════════════════════════════════════════════════════════════════╗
-# ║               PART III: secp256k1 Fr (Scalar Field)                        ║
+# ║               PART III: secp256k1 Fr (Scalar Field)                          ║
 # ╚══════════════════════════════════════════════════════════════════════════════╝
 
 # ====================================================================================
@@ -440,11 +513,16 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 # ====================================================================================
 #
 # secp256k1's scalar field modulus is a full 256 bits (top limb = 0xFFFF...),
-# exceeding MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62). AUDITTODO: explain *exactly* the rounding issues that force this.
-# This requires:
-#   - 2^384 shift instead of 2^256 (a >>256 shift loses precision for 256-bit moduli)
-#   - 4-limb endo_g constants (lo/mid/hi/hihi)
-#   - Montgomery field multiplication in split_into_endomorphism_scalars_384
+# exceeding MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62). As proved in §0a, the
+# 256-bit shift approximation is sufficient for ANY prime r < 2^256 — the
+# error is bounded to {0, -1}. The old 384-bit path was only needed because
+# the 256-bit C++ code truncated outputs to 128 bits, clipping the 129th bit
+# that appears for ~25% of inputs (k1) and ~0.3% of inputs (k2).  With
+# full-width output, 256-bit shift works perfectly.
+#
+# For secp256k1, the large-modulus branch of split_into_endomorphism_scalars
+# returns full field elements (not truncated to 128 bits). The caller
+# (biggroup_nafs.hpp) handles signs by inspecting the MSB of k2.
 
 # The scalar field modulus of secp256k1 (from secp256k1.hpp, FrParams)
 secp_r = (
@@ -477,7 +555,8 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 # § 11. secp256k1 Fr — LATTICE BASIS
 # ====================================================================================
 #
-# See §0 for why these vectors are ~126–129 bits (unlike BN254's 64/127 pattern).
+# See §0 for the component sizes: |a1| = 126, |b1| = 128, |a2| = 129, |b2| = 126 bits
+# (unlike BN254's asymmetric 64/127 pattern).
 
 secp_a1, secp_b1, secp_a2, secp_b2 = find_short_lattice_basis(secp_lambda, secp_r)
 
@@ -491,55 +570,26 @@ def verify_split(k, k1, k2, t1, t2, lambda_val, modulus):
 
 
 # ====================================================================================
-# § 12. secp256k1 Fr — PRECOMPUTED CONSTANTS (384-bit shift)
+# § 12. secp256k1 Fr — PRECOMPUTED CONSTANTS (256-bit shift)
 # ====================================================================================
 #
-# In the 384-bit code, the naming is "cross-paired" — g1 is paired with minus_b1,
-# and g2 is paired with b2 (the opposite of what you might expect):
+# secp256k1 now uses the SAME 256-bit shift as BN254 (see §0a for the proof that
+# this is sufficient for any r < 2^256). The naming convention matches BN254:
 #
-#     endo_g1 = ⌈b2 · 2^384 / r⌉
-#     endo_g2 = ⌊(-b1) · 2^384 / r⌋
+#     endo_g1 = ⌊(-b1) · 2^256 / r⌋
+#     endo_g2 = ⌊b2 · 2^256 / r⌋
 #
-# Note: secp256k1_endo_notes.hpp uses the opposite naming convention for g1/g2,
-# but the STORED values in FrParams follow this cross-paired convention.
-
-def compute_splitting_constants_384(modulus, b1, b2):
-    """
-    Compute the precomputed constants for the 384-bit shift variant.
+# We reuse compute_splitting_constants() from §3 — same function, same shift.
 
-    Returns (endo_g1, endo_g2, endo_minus_b1, endo_b2) matching the hpp file.
-
-    Convention: endo_g1 is the b2-based approximation (cross-paired with minus_b1),
-                endo_g2 is the (-b1)-based approximation (cross-paired with b2).
-    """
-    shift = 1 << 384
-    # endo_g1 = ceil(b2 * 2^384 / r)  — cross-paired with minus_b1 in the algorithm
-    endo_g1 = -((-b2 * shift) // modulus)
-    # endo_g2 = floor((-b1) * 2^384 / r) — cross-paired with b2 in the algorithm
-    endo_g2 = ((-b1) * shift) // modulus
-    endo_minus_b1 = (-b1) % modulus
-    endo_b2 = b2 % modulus
-    return endo_g1, endo_g2, endo_minus_b1, endo_b2
-
-
-secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 = compute_splitting_constants_384(
+secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2 = compute_splitting_constants(
     secp_r, secp_b1, secp_b2
 )
 
 # Verify these match the values in secp256k1.hpp (FrParams)
-# endo_g1 is stored as (lo, mid, hi, hihi) — 4 × 64-bit limbs
-secp_expected_endo_g1 = (
-    0xE893209A45DBB031 |
-    (0x3DAA8A1471E8CA7F << 64) |
-    (0xE86C90E49284EB15 << 128) |
-    (0x3086D221A7D46BCD << 192)
-)
-secp_expected_endo_g2 = (
-    0x1571B4AE8AC47F71 |
-    (0x221208AC9DF506C6 << 64) |
-    (0x6F547FA90ABFE4C4 << 128) |
-    (0xE4437ED6010E8828 << 192)
-)
+# endo_g1 is stored as (lo, mid, hi) — only 2 non-zero limbs for secp256k1
+# (hi = 0 because (-b1) * 2^256 / r fits in 128 bits for this curve)
+secp_expected_endo_g1 = 0x6F547FA90ABFE4C4 | (0xE4437ED6010E8828 << 64)
+secp_expected_endo_g2 = 0xE86C90E49284EB15 | (0x3086D221A7D46BCD << 64)
 secp_expected_endo_minus_b1 = 0x6F547FA90ABFE4C3 | (0xE4437ED6010E8828 << 64)
 secp_expected_endo_b2 = 0xe86c90e49284eb15 | (0x3086d221a7d46bcd << 64)
 
@@ -558,48 +608,54 @@ def compute_splitting_constants_384(modulus, b1, b2):
 
 
 # ====================================================================================
-# § 13. secp256k1 Fr — THE 384-BIT SPLITTING ALGORITHM
+# § 13. secp256k1 Fr — THE 256-BIT SPLITTING ALGORITHM
 # ====================================================================================
 #
-# Unlike the 256-bit variant, there is no explicit negative-k2 fix — field
-# subtraction handles signs. The c1, c2 values are converted to Montgomery form
-# and multiplied via field ops (which reduce mod r automatically).
+# secp256k1 uses the same core computation as BN254 (compute_endomorphism_k2 in
+# field_declarations.hpp), but WITHOUT the negative-k2 fix. Both k1 and k2 can
+# reach 129 bits (see §0 for why); the caller handles signs. This specifically means
+# that if k2 is (naively) negative, it will be returned as r - k2, a 256-bit number,
+# and the caller will then detect this and handle appropriately.
+#
+# ALGORITHM (the `else` branch of split_into_endomorphism_scalars in
+# field_declarations.hpp, for large moduli):
 #
-# ALGORITHM (split_into_endomorphism_scalars_384 in field_declarations.hpp):
+#   1. t1 = compute_endomorphism_k2(k)
+#      i.e. c1 = (endo_g2 · k) >> 256,  c2 = (endo_g1 · k) >> 256
+#           t1 = (c2·b2 - c1·(-b1)) mod r                             [= k2]
+#   2. k2 = t1
+#   3. k1 = (t1·λ + k) mod r
 #
-#   1. c1 = (endo_g1 · k) >> 384,  c2 = (endo_g2 · k) >> 384
-#   2. r2f = c1·(-b1) - c2·b2      (cross-products, computed as field elements)
-#   3. r1f = k - r2f·λ
-#   4. k1 = r1f, k2 = -r2f
+# No negative-k2 fix: unlike BN254, we do NOT check if t1 > 128 bits. The
+# biggroup code inspects the MSB of k2 to determine its sign.
 
-def split_scalar_384(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2):
+def split_scalar_secp256k1(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, endo_b2):
     """
-    Split scalar k using the 384-bit shift variant.
+    Split scalar k using the 256-bit shift for secp256k1.
 
-    Implements split_into_endomorphism_scalars_384() in field_declarations.hpp.
+    Implements the large-modulus branch of split_into_endomorphism_scalars()
+    in field_declarations.hpp.
 
     Returns:
-        (k1, k2): The split scalars such that k ≡ k1 - λ·k2 (mod r)
+        (k1, k2): Full field elements such that k ≡ k1 - λ·k2 (mod r).
+                   Both k1 and k2 can reach ~129 bits (see §0).
     """
     input_val = k % modulus
 
-    # c1 ≈ k·b2/r,  c2 ≈ k·(-b1)/r
-    c1 = (endo_g1 * input_val) >> 384
-    c2 = (endo_g2 * input_val) >> 384
-
-    # Cross-products (computed as field elements in C++ via Montgomery)
-    c1_times_minus_b1 = (c1 * endo_minus_b1) % modulus
-    c2_times_b2 = (c2 * endo_b2) % modulus
+    # c1 = (g2 * k) >> 256,  c2 = (g1 * k) >> 256
+    c1 = (endo_g2 * input_val) >> 256
+    c2 = (endo_g1 * input_val) >> 256
 
-    # r2f = c1·(-b1) - c2·b2 (nearly cancels, leaving small lattice error)
-    r2f = (c1_times_minus_b1 - c2_times_b2) % modulus
+    # q1 = c1 * (-b1),  q2 = c2 * b2  (raw integer multiply, low 256 bits)
+    q1_lo = (c1 * endo_minus_b1) & ((1 << 256) - 1)
+    q2_lo = (c2 * endo_b2) & ((1 << 256) - 1)
 
-    # r1f = k - r2f·λ
-    r1f = (input_val - r2f * lambda_val) % modulus
+    # t1 = (q2 - q1) mod r  — this is k2
+    t1 = (q2_lo - q1_lo) % modulus
 
-    # k1 = r1f, k2 = -r2f;  invariant: k ≡ k1 - λ·k2 (mod r)
-    k1 = r1f
-    k2 = (-r2f) % modulus
+    # k1 = t1·λ + k  (mod r)
+    k1 = (t1 * lambda_val + input_val) % modulus
+    k2 = t1
 
     return k1, k2
 
@@ -608,8 +664,8 @@ def split_scalar_384(k, modulus, lambda_val, endo_g1, endo_g2, endo_minus_b1, en
 # § 14. secp256k1 Fr — SPLITTING VERIFICATION
 # ====================================================================================
 
-def verify_split_384(k, k1, k2, lambda_val, modulus):
-    """Verify correctness of the 384-bit scalar split."""
+def verify_split_secp256k1(k, k1, k2, lambda_val, modulus):
+    """Verify correctness of the secp256k1 scalar split."""
     # The invariant is k ≡ k1 - λ·k2 (mod r)
     reconstructed = (k1 - lambda_val * k2) % modulus
     assert reconstructed == k % modulus, (
@@ -617,10 +673,8 @@ def verify_split_384(k, k1, k2, lambda_val, modulus):
         f"  k1={hex(k1)}, k2={hex(k2)}\n"
         f"  reconstructed={hex(reconstructed)}, expected={hex(k % modulus)}"
     )
-    # For the 384-bit variant, k1 and k2 are field elements; they should be small
-    # enough that the decomposition is useful. We verify they fit in ~129 bits.
-    # (The C++ code does not explicitly truncate to 128 bits in this path;
-    # the values may be slightly larger than in the 256-bit path.)
+    # k1 and k2 are full field elements. We verify their effective magnitudes
+    # fit in ~129 bits (the decomposition halves the scalar bit-length).
     k1_eff = k1 if k1 <= modulus // 2 else modulus - k1
     k2_eff = k2 if k2 <= modulus // 2 else modulus - k2
     assert k1_eff.bit_length() <= 129, (
@@ -632,52 +686,95 @@ def verify_split_384(k, k1, k2, lambda_val, modulus):
 
 
 for k_test in [0, 1, 42, secp_lambda, secp_r - 1, secp_r // 2, secp_r // 3]:
-    k1, k2 = split_scalar_384(
+    k1, k2 = split_scalar_secp256k1(
         k_test, secp_r, secp_lambda, secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2
     )
-    verify_split_384(k_test, k1, k2, secp_lambda, secp_r)
-
+    verify_split_secp256k1(k_test, k1, k2, secp_lambda, secp_r)
 
-# Also verify with the cube root of unity in the BASE field (secp256k1 Fq).
-# The base field Fq of secp256k1 has modulus p = 2^256 - 2^32 - 977, which also
-# has a cube root of unity β. This β is what gets multiplied with the x-coordinate
-# in the endomorphism φ(x,y) = (β·x, y). Let's verify it.
-
-secp_fq_modulus = (
-    0xFFFFFFFEFFFFFC2F |
-    (0xFFFFFFFFFFFFFFFF << 64) |
-    (0xFFFFFFFFFFFFFFFF << 128) |
-    (0xFFFFFFFFFFFFFFFF << 192)
-)
+# ====================================================================================
+# § 15. SUMMARY
+# ====================================================================================
+#
+# Derived and verified GLV endomorphism constants for:
+#   - BN254 Fr     (§1–§5):   254-bit, 256-bit shift, constants match bn254/fr.hpp
+#   - BN254 Fq     (§6–§9):   254-bit, 256-bit shift, constants match bn254/fq.hpp
+#   - secp256k1 Fr  (§10–§14): 256-bit, 256-bit shift, constants match secp256k1.hpp
+#
+# ALL curves use the same 256-bit shift (see §0a for the proof that this is
+# sufficient for any r < 2^256). MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62)
+# determines whether the 128-bit pair path (with negative-k2 fix) or the
+# full-width path (no fix, caller handles signs) is used. The 128-bit path
+# works for BN254 because its 254-bit modulus gives √r ~ 127 bits, leaving
+# one bit of headroom below 128. For 256-bit moduli like secp256k1, √r ~ 128
+# bits leaves zero headroom: k1 exceeds 128 bits ~25% of the time (since
+# |a2| = 129 bits) and k2 exceeds 128 bits ~0.3% of the time (see §16).
 
-secp_fq_R = pow(2, 256, secp_fq_modulus)
-secp_fq_R_inv = pow(secp_fq_R, -1, secp_fq_modulus)
+# ====================================================================================
+# § 16. APPENDIX: 129-BIT SCALARS FOR secp256k1
+# ====================================================================================
+#
+# Empirically verify the 129-bit overflow frequencies from §0 by calling
+# split_scalar_secp256k1 (§13) on random inputs.
 
-secp_fq_cube_root_montgomery = (
-    0x58a4361c8e81894e |
-    (0x03fde1631c4b80af << 64) |
-    (0xf8e98978d02e3905 << 128) |
-    (0x7a4a36aebcbb3d53 << 192)
-)
+def measure_overflow_frequency(n_samples=500_000):
+    """Measure the fraction of random scalars whose k1 or k2 exceeds 128 bits."""
+    import random
+    rng = random.Random(42)
 
-secp_fq_beta = (secp_fq_cube_root_montgomery * secp_fq_R_inv) % secp_fq_modulus
-assert pow(secp_fq_beta, 3, secp_fq_modulus) == 1, "β³ ≡ 1 (mod p) for secp256k1 Fq"
-assert secp_fq_beta != 1, "β must be non-trivial"
+    count_k1 = 0
+    count_k2 = 0
+    for _ in range(n_samples):
+        k = rng.randrange(1, secp_r)
+        k1, k2 = split_scalar_secp256k1(
+            k, secp_r, secp_lambda,
+            secp_endo_g1, secp_endo_g2, secp_endo_minus_b1, secp_endo_b2
+        )
+        if k1.bit_length() > 128:
+            count_k1 += 1
+        if k2 <= (secp_r - (1 << 127)) and k2.bit_length() > 128: # k2 is naively in the range (-2^127, 2^129). therefore we throw away when k2 is negative.
+            count_k2 += 1
 
+    pct_k1 = 100 * count_k1 / n_samples
+    pct_k2 = 100 * count_k2 / n_samples
+    print(f"  k1 > 128 bits: {count_k1}/{n_samples} ({pct_k1:.1f}%)")
+    print(f"  k2 positive and > 128 bits: {count_k2}/{n_samples} ({pct_k2:.2f}%)")
+    assert 20 < pct_k1 < 35, f"Expected ~25% for k1, got {pct_k1}%"
+    assert 0.1 < pct_k2 < 1.0, f"Expected ~0.3% for k2, got {pct_k2}%"
 
 # ====================================================================================
-# § 15. SUMMARY
+# § 17. APPENDIX: WHY THE BN254 Fr AND Fq LATTICE BASES ARE NEARLY IDENTICAL
 # ====================================================================================
 #
-# Derived and verified GLV endomorphism constants for:
-#   - BN254 Fr     (§1–§5):   254-bit, 256-bit shift, constants match bn254/fr.hpp
-#   - BN254 Fq     (§6–§9):   254-bit, 256-bit shift, constants match bn254/fq.hpp
-#   - secp256k1 Fr  (§10–§14): 256-bit, 384-bit shift, constants match secp256k1.hpp
-#   - secp256k1 Fq cube root β also verified (end of Part III)
+# BN254 is parameterized by a single integer x = 0x44e992b44a6909f1 (63 bits).
+# Both field primes are polynomials in x:
+#
+#     r = 36x⁴ + 36x³ + 18x² + 6x + 1    (scalar field, Fr)
+#     q = 36x⁴ + 36x³ + 24x² + 6x + 1    (base field, Fq)
 #
-# Architectural split: MODULUS_TOP_LIMB_LARGE_THRESHOLD (2^62) determines whether
-# split_into_endomorphism_scalars (256-bit) or _384 (384-bit) is used.
+# They differ by q − r = 6x² ≈ 2¹²⁷, tiny relative to the 254-bit primes.
+#
+# The GLV lattice basis vectors turn out to be simple polynomials in x:
+#
+#     Fr basis:  (a1, b1) = (2x+1, −(6x²+2x)),    (a2, b2) = (6x²+4x+1, 2x+1)
+#     Fq basis:  (a1, b1) = (2x,   −(6x²+2x+1)),  (a2, b2) = (6x²+4x+1, 2x  )
+#
+# These are verified by the determinant identities:
+#
+#     (2x+1)² + (6x²+4x+1)(6x²+2x)   = 36x⁴ + 36x³ + 18x² + 6x + 1 = r
+#     (2x)²   + (6x²+4x+1)(6x²+2x+1) = 36x⁴ + 36x³ + 24x² + 6x + 1 = q
+#
+# Notice a1 = b2 for both fields (the basis matrix is "almost symmetric"), and a2
+# is IDENTICAL for both. The only difference: Fr has the +1 on a1/b2, while Fq has
+# the +1 on |b1|. All four components differ by at most 1 between Fr and Fq.
+#
+# Reference: The BN prime parameterization is from Barreto–Naehrig (2006). The
+# polynomial structure of GLV short bases for CM curves is discussed in
+# B. Smith, "Easy scalar decompositions for efficient scalar multiplication
+# on elliptic curves and genus 2 Jacobians" (2013), eprint.iacr.org/2013/672.
 
+# ====================================================================================
+# § 18. Main function
+# ====================================================================================
 if __name__ == "__main__":
     print("=== Part I: BN254 Fr ===")
     print(f"  λ (cube root): {hex(lambda_val)}")
@@ -703,8 +800,7 @@ def verify_split_384(k, k1, k2, lambda_val, modulus):
     print(f"  endo_b2:       {hex(secp_endo_b2)}")
     print("  -> Constants match secp256k1.hpp FrParams")
 
-    print("\n=== secp256k1 Fq (base field) ===")
-    print(f"  β (cube root): {hex(secp_fq_beta)}")
-    print("  -> Cube root verified")
+    print("\n=== Appendix: 129-bit overflow frequency (secp256k1) ===")
+    measure_overflow_frequency()
 
     print("\nAll verifications passed!")
diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
index ed47d11f06b7..3de47d38bb6d 100644
--- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
+++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
@@ -45,6 +45,11 @@ namespace bb {
 //
 // In Barretenberg, the main workhorse fields are the base and scalar fields of BN-254, which are "small" moduli: they
 // are each 254 bits. The field algorithms for them are constant-time.
+//
+// NOTE: For the 254-bit fields in Barretenberg, namely BN254 base and scalar fields, we also
+// use this constexpr branching to capture another (conceptually unrelated) property: that
+// the short basis of the lattice from the endomorphism is shorter than expected. See endomorphism_scalars.py for more
+// information.
 static constexpr uint64_t MODULUS_TOP_LIMB_LARGE_THRESHOLD = 0x4000000000000000ULL; // 2^62
 
 /**
@@ -466,71 +471,92 @@ template <class Params_> struct alignas(32) field {
      * We pre-compute scalars g1 = (2^256 * b1) / n, g2 = (2^256 * b2) / n, to avoid having to perform long division
      * on 512-bit scalars
      **/
-    static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2)
-    {
-        // if the modulus is a >= 255-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384
-        if constexpr (Params::modulus_3 >= MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
-            split_into_endomorphism_scalars_384(k, k1, k2);
-        } else {
-            std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> ret = split_into_endomorphism_scalars(k);
-            k1.data[0] = ret.first[0];
-            k1.data[1] = ret.first[1];
-
-#if !defined(__clang__) && defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Warray-bounds"
-#endif
-            k2.data[0] = ret.second[0]; // NOLINT
-            k2.data[1] = ret.second[1];
-#if !defined(__clang__) && defined(__GNUC__)
-#pragma GCC diagnostic pop
-#endif
-        }
-    }
-
-    // NOTE: this form is only usable if the modulus is 254 bits or less, otherwise see
-    // split_into_endomorphism_scalars_384.
-    // DOES NOT assume that the input is reduced; it can be in coarse form.
-    // TODO(https://github.com/AztecProtocol/barretenberg/issues/851): Unify these APIs.
-    static std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> split_into_endomorphism_scalars(const field& k)
+    /**
+     * @brief Shared core of the endomorphism scalar decomposition.
+     *
+     * Computes k2 = round(b2·k/r)·(-b1) + round((-b1)·k/r)·b2, using the
+     * 256-bit-shift approximation g = floor(b·2^256/r) for both BN254 and
+     * secp256k1. See endomorphism_scalars.py §0 for the proof that the
+     * approximation error is bounded to {0, -1} for any r < 2^256.
+     *
+     * The result is a raw (non-Montgomery) `field` whose low 128-or-129 bits
+     * hold k2. This function will be called in either the BN254 base/scalar field
+     * or the generic, secp256k1 branch.
+     */
+    static field compute_endomorphism_k2(const field& k)
     {
-        static_assert(Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD);
+        // force into strict form.
         field input = k.reduce_once();
 
         constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 };
-
         constexpr field endo_g2 = { Params::endo_g2_lo, Params::endo_g2_mid, 0, 0 };
-
         constexpr field endo_minus_b1 = { Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, 0, 0 };
-
         constexpr field endo_b2 = { Params::endo_b2_lo, Params::endo_b2_mid, 0, 0 };
 
-        // compute c1 = (g2 * k) >> 256
+        // c1 = (g2 * k) >> 256,  c2 = (g1 * k) >> 256
         wide_array c1 = endo_g2.mul_512(input);
-        // compute c2 = (g1 * k) >> 256
         wide_array c2 = endo_g1.mul_512(input);
 
-        // (the bit shifts are implicit, as we only utilize the high limbs of c1, c2
-
-        field c1_hi = {
-            c1.data[4], c1.data[5], c1.data[6], c1.data[7]
-        }; // *(field*)((uintptr_t)(&c1) + (4 * sizeof(uint64_t)));
-        field c2_hi = {
-            c2.data[4], c2.data[5], c2.data[6], c2.data[7]
-        }; // *(field*)((uintptr_t)(&c2) + (4 * sizeof(uint64_t)));
+        // extract high halves
+        field c1_hi{ c1.data[4], c1.data[5], c1.data[6], c1.data[7] };
+        field c2_hi{ c2.data[4], c2.data[5], c2.data[6], c2.data[7] };
 
-        // compute q1 = c1 * -b1
+        // q1 = c1 * (-b1),  q2 = c2 * b2
         wide_array q1 = c1_hi.mul_512(endo_minus_b1);
-        // compute q2 = c2 * b2
         wide_array q2 = c2_hi.mul_512(endo_b2);
 
-        // FIX: Avoid using 512-bit multiplication as its not necessary.
-        // c1_hi, c2_hi can be uint256_t's and the final result (without montgomery reduction)
-        // could be casted to a field.
         field q1_lo{ q1.data[0], q1.data[1], q1.data[2], q1.data[3] };
         field q2_lo{ q2.data[0], q2.data[1], q2.data[2], q2.data[3] };
 
-        field t1 = (q2_lo - q1_lo).reduce_once();
+        return (q2_lo - q1_lo).reduce_once();
+    }
+
+    /**
+     * @brief Full-width endomorphism decomposition: k ≡ k1 - k2·λ (mod r).
+     * Modifies the field elements k1 and k2.
+     *
+     * For BN254 base/scalar fields, delegates to the 128-bit pair
+     * overload, which applies the negative-k2 fix. Returns k1, k2 in the low
+     * 2 limbs (upper limbs zeroed). Both fit in 128 bits.
+     *
+     * For generic 256-bit fields: returns k1, k2 as full field elements
+     * elements (non-Montgomery). k1 fits in ~128 bits; k2 fits in ~129 bits.
+     * No negative-k2 fix — the caller (biggroup_nafs.hpp) handles signs by
+     * inspecting the MSB of k2.
+     */
+    static void split_into_endomorphism_scalars(const field& k, field& k1, field& k2)
+    {
+        if constexpr (Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD) {
+            // BN254 base or scalar field: use path that corresponds to 128-bit outputs.
+            auto ret = split_into_endomorphism_scalars(k);
+            k1 = { ret.first[0], ret.first[1], 0, 0 };
+            k2 = { ret.second[0], ret.second[1], 0, 0 };
+        } else {
+            // Large modulus (secp256k1): full-width path.
+            field t1 = compute_endomorphism_k2(k);
+            k2 = t1;
+            k1 = ((t1 * cube_root_of_unity()) + k).reduce_once();
+        }
+    }
+
+    /**
+     * @brief 128-bit endomorphism decomposition: k ≡ k1 - k2·λ (mod r).
+     *
+     * Returns { {k1_lo, k1_hi}, {k2_lo, k2_hi} } — each scalar as a pair of
+     * uint64_t representing its low 128 bits. Both k1 and k2 are guaranteed to
+     * fit in 128 bits (the negative-k2 fix ensures this for the ~2^{-64} of
+     * inputs where k2 would otherwise be slightly negative).
+     *
+     * Only valid for fields such that the splitting_scalars algorithm produces 128 bit outputs. In Barretenberg, these
+     * are just the base and scalar fields of BN254. These are the only "small modulus" fields, so we use a static
+     * assert to force this.
+     *
+     * Does NOT assume that the input is reduced
+     */
+    static std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> split_into_endomorphism_scalars(const field& k)
+    {
+        static_assert(Params::modulus_3 < MODULUS_TOP_LIMB_LARGE_THRESHOLD);
+        field t1 = compute_endomorphism_k2(k);
 
         // k2 (= t1) can be slightly negative for ~2^{-64} of inputs.
         // When negative, t1 = k2 + r is 254 bits (upper limbs nonzero).
@@ -538,64 +564,17 @@ template <class Params_> struct alignas(32) field {
         // This shifts k2 by +|b1| (~127 bits, now positive) and k1 by -a1 (~64 bits),
         // keeping both within 128 bits. See endomorphism_scalars.py for more details.
         if (t1.data[2] != 0 || t1.data[3] != 0) {
+            constexpr field endo_minus_b1 = { Params::endo_minus_b1_lo, Params::endo_minus_b1_mid, 0, 0 };
             t1 = (t1 + endo_minus_b1).reduce_once();
         }
 
-        field beta = cube_root_of_unity();
-        field t2 = (t1 * beta + input).reduce_once();
+        field t2 = ((t1 * cube_root_of_unity()) + k).reduce_once();
         return {
             { t2.data[0], t2.data[1] },
             { t1.data[0], t1.data[1] },
         };
     }
 
-    static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out)
-    {
-        constexpr field minus_b1f{
-            Params::endo_minus_b1_lo,
-            Params::endo_minus_b1_mid,
-            0,
-            0,
-        };
-        constexpr field b2f{
-            Params::endo_b2_lo,
-            Params::endo_b2_mid,
-            0,
-            0,
-        };
-        constexpr uint256_t g1{
-            Params::endo_g1_lo,
-            Params::endo_g1_mid,
-            Params::endo_g1_hi,
-            Params::endo_g1_hihi,
-        };
-        constexpr uint256_t g2{
-            Params::endo_g2_lo,
-            Params::endo_g2_mid,
-            Params::endo_g2_hi,
-            Params::endo_g2_hihi,
-        };
-
-        field kf = input.reduce_once();
-        uint256_t k{ kf.data[0], kf.data[1], kf.data[2], kf.data[3] };
-
-        uint512_t c1 = (uint512_t(k) * static_cast<uint512_t>(g1)) >> 384;
-        uint512_t c2 = (uint512_t(k) * static_cast<uint512_t>(g2)) >> 384;
-
-        field c1f{ c1.lo.data[0], c1.lo.data[1], c1.lo.data[2], c1.lo.data[3] };
-        field c2f{ c2.lo.data[0], c2.lo.data[1], c2.lo.data[2], c2.lo.data[3] };
-
-        c1f.self_to_montgomery_form();
-        c2f.self_to_montgomery_form();
-        c1f = c1f * minus_b1f;
-        c2f = c2f * b2f;
-        field r2f = c1f - c2f;
-        field beta = cube_root_of_unity();
-        field r1f = input.reduce_once() - r2f * beta;
-        k1_out = r1f;
-        k2_out = -r2f;
-    }
-
     // static constexpr auto coset_generators = compute_coset_generators();
     // static constexpr std::array<field, 15> coset_generators = compute_coset_generators((1 << 30U));