Neptune-Crypto · aszepieniec · May 22, 2024 · May 12, 2024 · May 13, 2024 · May 13, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -40,6 +40,10 @@ jobs:
         run: cargo clippy --all-targets -- -D warnings
 
       - name: Run tests
+        # Temporary workaround for Windows path issue, see:
+        # https://github.com/nextest-rs/nextest/issues/1493#issuecomment-2106331574
+        env:
+          RUSTUP_WINDOWS_PATH_ADD_BIN: 1
         run: cargo nextest run --no-fail-fast --all-targets
 
         # doctests are special [^1] but this step does not incur a performance penalty [^2]

diff --git a/cliff.toml b/cliff.toml
@@ -58,6 +58,7 @@ commit_parsers = [
   { body = ".*security",          group = "<!-- 70 --> 🔒️ Security" },
   { message = "^revert",          group = "<!-- 80 --> ⏪️ Revert" },
   { message = "^style",           group = "<!-- 90 --> 🎨 Styling" },
+  { message = "^support",         skip = true },
 ]
 
 protect_breaking_commits = false

diff --git a/twenty-first/Cargo.toml b/twenty-first/Cargo.toml
@@ -67,15 +67,19 @@ name = "evaluation"
 harness = false
 
 [[bench]]
-name = "poly_mod_reduce"
+name = "extrapolation"
 harness = false
 
 [[bench]]
-name = "interpolation"
+name = "coset_extrapolation"
 harness = false
 
 [[bench]]
-name = "poly_div"
+name = "poly_mod_reduce"
+harness = false
+
+[[bench]]
+name = "interpolation"
 harness = false
 
 [[bench]]

diff --git a/twenty-first/benches/coset_extrapolation.rs b/twenty-first/benches/coset_extrapolation.rs
@@ -0,0 +1,37 @@
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::BenchmarkId;
+use criterion::Criterion;
+
+use twenty_first::math::other::random_elements;
+use twenty_first::prelude::*;
+
+criterion_main!(benches);
+criterion_group!(
+    name = benches;
+    config = Criterion::default().sample_size(10);
+    targets = coset_extrapolation<{ 1 << 18 }, { 1 << 8 }>,
+              coset_extrapolation<{ 1 << 19 }, { 1 << 8 }>,
+              coset_extrapolation<{ 1 << 20 }, { 1 << 8 }>,
+              coset_extrapolation<{ 1 << 21 }, { 1 << 8 }>,
+              coset_extrapolation<{ 1 << 22 }, { 1 << 8 }>,
+              coset_extrapolation<{ 1 << 23 }, { 1 << 8 }>,
+);
+
+fn coset_extrapolation<const SIZE: usize, const NUM_POINTS: usize>(c: &mut Criterion) {
+    let log2_of_size = SIZE.ilog2();
+    let mut group = c.benchmark_group(format!(
+        "Fast extrapolation of length-{SIZE} codeword in {NUM_POINTS} Points"
+    ));
+
+    let codeword = random_elements(SIZE);
+    let offset = BFieldElement::new(7);
+    let eval_points: Vec<BFieldElement> = random_elements(NUM_POINTS);
+
+    let id = BenchmarkId::new("Fast Codeword Extrapolation", log2_of_size);
+    group.bench_function(id, |b| {
+        b.iter(|| Polynomial::<BFieldElement>::coset_extrapolate(offset, &codeword, &eval_points))
+    });
+
+    group.finish();
+}
diff --git a/twenty-first/benches/evaluation.rs b/twenty-first/benches/evaluation.rs
@@ -4,6 +4,7 @@ use criterion::BenchmarkId;
 use criterion::Criterion;
 
 use twenty_first::math::other::random_elements;
+use twenty_first::math::zerofier_tree::ZerofierTree;
 use twenty_first::prelude::*;
 
 criterion_main!(benches);
@@ -36,15 +37,18 @@ fn evaluation<const SIZE: usize, const NUM_POINTS: usize>(c: &mut Criterion) {
         b.iter(|| poly.iterative_batch_evaluate(&eval_points))
     });
 
-    // `vector_batch_evaluate` exists, but is super slow. Put it here if you plan to run benchmarks
-    // during a coffee break.
+    let id = BenchmarkId::new("Zerofier Tree", log2_of_size);
+    group.bench_function(id, |b| {
+        b.iter(|| ZerofierTree::new_from_domain(&eval_points))
+    });
 
     let id = BenchmarkId::new("Divide-and-Conquer", log2_of_size);
+    let zerofier_tree = ZerofierTree::new_from_domain(&eval_points);
     group.bench_function(id, |b| {
-        b.iter(|| poly.divide_and_conquer_batch_evaluate(&eval_points))
+        b.iter(|| poly.divide_and_conquer_batch_evaluate(&zerofier_tree))
     });
 
-    let id = BenchmarkId::new("Dispatcher", log2_of_size);
+    let id = BenchmarkId::new("Entrypoint", log2_of_size);
     group.bench_function(id, |b| b.iter(|| poly.batch_evaluate(&eval_points)));
 
     group.finish();

diff --git a/twenty-first/benches/extrapolation.rs b/twenty-first/benches/extrapolation.rs
@@ -0,0 +1,95 @@
+use criterion::criterion_group;
+use criterion::criterion_main;
+use criterion::BenchmarkId;
+use criterion::Criterion;
+
+use twenty_first::math::ntt::intt;
+use twenty_first::math::other::random_elements;
+use twenty_first::math::traits::PrimitiveRootOfUnity;
+use twenty_first::math::zerofier_tree::ZerofierTree;
+use twenty_first::prelude::*;
+
+criterion_main!(benches);
+criterion_group!(
+    name = benches;
+    config = Criterion::default().sample_size(10);
+    targets = extrapolation<{ 1 << 18 }, { 1 << 6 }>,
+              extrapolation<{ 1 << 18 }, { 1 << 7 }>,
+              extrapolation<{ 1 << 18 }, { 1 << 8 }>,
+              extrapolation<{ 1 << 19 }, { 1 << 6 }>,
+              extrapolation<{ 1 << 19 }, { 1 << 7 }>,
+              extrapolation<{ 1 << 19 }, { 1 << 8 }>,
+              extrapolation<{ 1 << 20 }, { 1 << 6 }>,
+              extrapolation<{ 1 << 20 }, { 1 << 7 }>,
+              extrapolation<{ 1 << 20 }, { 1 << 8 }>,
+);
+
+fn intt_then_evaluate(
+    codeword: &[BFieldElement],
+    offset: BFieldElement,
+    zerofier_tree: &ZerofierTree<BFieldElement>,
+    shift_coefficients: &[BFieldElement],
+    tail_length: usize,
+) -> Vec<BFieldElement> {
+    let omega = BFieldElement::primitive_root_of_unity(codeword.len() as u64).unwrap();
+    let log_domain_length = codeword.len().ilog2();
+    let mut coefficients = codeword.to_vec();
+    intt(&mut coefficients, omega, log_domain_length);
+    let polynomial: Polynomial<BFieldElement> = Polynomial::new(coefficients)
+        .scale(offset.inverse())
+        .reduce_by_ntt_friendly_modulus(shift_coefficients, tail_length);
+    polynomial.divide_and_conquer_batch_evaluate(zerofier_tree)
+}
+
+fn extrapolation<const SIZE: usize, const NUM_POINTS: usize>(c: &mut Criterion) {
+    let log2_of_size = SIZE.ilog2();
+    let mut group = c.benchmark_group(format!(
+        "Extrapolation of length-{SIZE} codeword in {NUM_POINTS} Points"
+    ));
+
+    let codeword = random_elements(SIZE);
+    let offset = BFieldElement::new(7);
+    let eval_points: Vec<BFieldElement> = random_elements(NUM_POINTS);
+
+    let zerofier_tree = ZerofierTree::new_from_domain(&eval_points);
+    let modulus = zerofier_tree.zerofier();
+    let preprocessing_data =
+        Polynomial::fast_modular_coset_interpolate_preprocess(SIZE, offset, &modulus);
+
+    let id = BenchmarkId::new("INTT-then-Evaluate", log2_of_size);
+    group.bench_function(id, |b| {
+        b.iter(|| {
+            intt_then_evaluate(
+                &codeword,
+                offset,
+                &zerofier_tree,
+                &preprocessing_data.shift_coefficients,
+                preprocessing_data.tail_length,
+            )
+        })
+    });
+
+    // We used to have another benchmark here that used barycentric evaluation
+    // (from `fri.rs` in repo triton-vm) inside of a loop over all points. It
+    // was never close to faster.
+    let id = BenchmarkId::new("Fast Codeword Extrapolation", log2_of_size);
+    group.bench_function(id, |b| {
+        b.iter(|| {
+            let minimal_interpolant =
+        Polynomial::<BFieldElement>::fast_modular_coset_interpolate_with_zerofiers_and_ntt_friendly_multiple(
+            &codeword,
+                offset,
+                &modulus,
+                &preprocessing_data
+            );
+            minimal_interpolant.divide_and_conquer_batch_evaluate(&zerofier_tree)
+        })
+    });
+
+    let id = BenchmarkId::new("Dispatcher (includes preprocessing)", log2_of_size);
+    group.bench_function(id, |b| {
+        b.iter(|| Polynomial::coset_extrapolate(offset, &codeword, &eval_points))
+    });
+
+    group.finish();
+}
diff --git a/twenty-first/benches/poly_div.rs b/twenty-first/benches/poly_div.rs
diff --git a/twenty-first/benches/poly_mod_reduce.rs b/twenty-first/benches/poly_mod_reduce.rs
@@ -30,12 +30,6 @@ fn poly_mod_reduce<const SIZE_LHS: usize, const SIZE_RHS: usize>(c: &mut Criteri
     let id = BenchmarkId::new("long division", log2_of_size);
     group.bench_function(id, |b| b.iter(|| lhs.clone() % rhs.clone()));
 
-    // despite its name, `.fast_divide()` is slow – ignore for big inputs
-    if SIZE_LHS < 1 << 13 && SIZE_RHS < 1 << 13 {
-        let id = BenchmarkId::new("fast division", log2_of_size);
-        group.bench_function(id, |b| b.iter(|| lhs.fast_divide(&rhs)));
-    }
-
     let id = BenchmarkId::new("fast reduce", log2_of_size);
     group.bench_function(id, |b| b.iter(|| lhs.fast_reduce(&rhs)));
 

diff --git a/twenty-first/benches/zerofier.rs b/twenty-first/benches/zerofier.rs
@@ -35,7 +35,7 @@ fn zerofier<const SIZE: usize>(c: &mut Criterion) {
     let id = BenchmarkId::new("Fast", SIZE);
     group.bench_function(id, |b| b.iter(|| Polynomial::fast_zerofier(&roots)));
 
-    let id = BenchmarkId::new("Fastest of the three", SIZE);
+    let id = BenchmarkId::new("Dispatcher", SIZE);
     group.bench_function(id, |b| b.iter(|| Polynomial::zerofier(&roots)));
 
     group.finish();

diff --git a/twenty-first/src/lib.rs b/twenty-first/src/lib.rs
@@ -67,6 +67,12 @@ pub(crate) mod tests {
         implements_usual_auto_traits::<mock::mmr::MockMmr<Tip5>>();
         implements_usual_auto_traits::<util_types::algebraic_hasher::Domain>();
         implements_usual_auto_traits::<util_types::mmr::mmr_accumulator::MmrAccumulator<Tip5>>();
+        implements_usual_auto_traits::<math::zerofier_tree::Branch<BFieldElement>>();
+        implements_usual_auto_traits::<math::zerofier_tree::Leaf<BFieldElement>>();
+        implements_usual_auto_traits::<math::zerofier_tree::ZerofierTree<BFieldElement>>();
+        implements_usual_auto_traits::<
+            math::polynomial::ModularInterpolationPreprocessingData<BFieldElement>,
+        >();
     }
 
     #[test]

diff --git a/twenty-first/src/math.rs b/twenty-first/src/math.rs
@@ -9,3 +9,4 @@ pub mod polynomial;
 pub mod tip5;
 pub mod traits;
 pub mod x_field_element;
+pub mod zerofier_tree;
diff --git a/twenty-first/src/math/b_field_element.rs b/twenty-first/src/math/b_field_element.rs
@@ -215,6 +215,9 @@ impl BFieldElement {
     /// 2^128 mod P; this is used for conversion of elements into Montgomery representation.
     const R2: u64 = 0xFFFFFFFE00000001;
 
+    /// -2^-1
+    pub const MINUS_TWO_INVERSE: Self = Self::new(9223372034707292160);
+
     #[inline]
     pub const fn new(value: u64) -> Self {
         Self(Self::montyred((value as u128) * (Self::R2 as u128)))
@@ -1300,4 +1303,9 @@ mod b_prime_field_element_test {
     fn bfe_macro_produces_same_result_as_calling_new(value: u64) {
         prop_assert_eq!(BFieldElement::new(value), bfe!(value));
     }
+
+    #[test]
+    fn const_minus_two_inverse_is_really_minus_two_inverse() {
+        assert_eq!(bfe!(-2).inverse(), BFieldElement::MINUS_TWO_INVERSE);
+    }
 }