From 2ef4894e8da18ca0145ab2af3df73f4bb303f398 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 01:00:45 -0800
Subject: [PATCH 1/3] feat: update halo2-ecc to v0.3.0

* add multi-thread witness assignment support for `variable_base_msm`
  and `fixed_base_msm`
* batch size 100 MSM witness generation went from 500ms -> 100ms
---
 Cargo.toml                                    |   4 +-
 halo2-base/benches/inner_product.rs           |   2 +-
 halo2-base/benches/mul.rs                     |   2 +-
 halo2-base/examples/inner_product.rs          |   2 +-
 halo2-base/src/gates/builder.rs               |  40 +-
 halo2-base/src/gates/flex_gate.rs             |  61 +-
 halo2-base/src/gates/mod.rs                   |   3 +
 halo2-base/src/gates/range.rs                 |  37 +-
 halo2-base/src/gates/tests.rs                 |   8 +-
 halo2-base/src/lib.rs                         |  24 +-
 halo2-base/src/utils.rs                       |  29 +-
 halo2-ecc/Cargo.toml                          |   1 +
 halo2-ecc/benches/fixed_base_msm.rs           | 249 +++----
 halo2-ecc/benches/fp_mul.rs                   | 197 +++--
 halo2-ecc/benches/msm.rs                      | 343 +++------
 .../bn254}/bench_ec_add.config                |   0
 .../bn254}/bench_fixed_msm.config             |   0
 .../bn254}/bench_msm.config                   |   1 +
 .../bn254}/bench_pairing.config               |   0
 .../bn254}/ec_add_circuit.config              |   0
 .../bn254}/fixed_msm_circuit.config           |   0
 halo2-ecc/configs/bn254/msm_circuit.config    |   1 +
 .../bn254}/pairing_circuit.config             |   0
 .../secp256k1}/bench_ecdsa.config             |   0
 .../secp256k1}/ecdsa_circuit.config           |   0
 halo2-ecc/src/bigint/add_no_carry.rs          |  33 +-
 halo2-ecc/src/bigint/big_is_equal.rs          |  56 +-
 halo2-ecc/src/bigint/big_is_zero.rs           |  49 +-
 halo2-ecc/src/bigint/big_less_than.rs         |  12 +-
 halo2-ecc/src/bigint/carry_mod.rs             | 214 ++----
 .../src/bigint/check_carry_mod_to_zero.rs     | 138 +---
 halo2-ecc/src/bigint/check_carry_to_zero.rs   |  85 +--
 halo2-ecc/src/bigint/mod.rs                   | 183 ++---
 halo2-ecc/src/bigint/mul_no_carry.rs          |  47 +-
 halo2-ecc/src/bigint/negative.rs              |  12 +-
 .../src/bigint/scalar_mul_and_add_no_carry.rs |  49 +-
 halo2-ecc/src/bigint/scalar_mul_no_carry.rs   |  35 +-
 halo2-ecc/src/bigint/select.rs                |  50 +-
 halo2-ecc/src/bigint/select_by_indicator.rs   |  58 +-
 halo2-ecc/src/bigint/sub.rs                   |  63 +-
 halo2-ecc/src/bigint/sub_no_carry.rs          |  30 +-
 .../src/bn254/configs/msm_circuit.config      |   1 -
 halo2-ecc/src/bn254/final_exp.rs              | 105 ++-
 halo2-ecc/src/bn254/mod.rs                    |  10 +-
 halo2-ecc/src/bn254/pairing.rs                | 236 +++---
 .../src/bn254/results/msm_bench_internal.csv  |   7 -
 .../src/bn254/results/msm_bench_m2_simple.csv |   6 -
 .../results/msm_bench_m2_simple_plus.csv      |   6 -
 .../bn254/results/pairing_bench_results.txt   | 692 ------------------
 halo2-ecc/src/bn254/tests/ec_add.rs           | 317 ++------
 halo2-ecc/src/bn254/tests/fixed_base_msm.rs   | 385 +++-------
 halo2-ecc/src/bn254/tests/mod.rs              |  25 +-
 halo2-ecc/src/bn254/tests/msm.rs              | 456 ++++--------
 halo2-ecc/src/bn254/tests/pairing.rs          | 346 +++------
 halo2-ecc/src/ecc/ecdsa.rs                    |  45 +-
 halo2-ecc/src/ecc/fixed_base.rs               | 246 ++++---
 halo2-ecc/src/ecc/fixed_base_pippenger.rs     |  28 +-
 halo2-ecc/src/ecc/mod.rs                      | 511 +++++++------
 halo2-ecc/src/ecc/pippenger.rs                | 268 +++++--
 halo2-ecc/src/ecc/tests.rs                    | 191 ++---
 halo2-ecc/src/fields/fp.rs                    | 341 ++++-----
 halo2-ecc/src/fields/fp12.rs                  | 197 +++--
 halo2-ecc/src/fields/fp2.rs                   | 207 +++---
 halo2-ecc/src/fields/mod.rs                   | 227 +++---
 halo2-ecc/src/fields/tests.rs                 | 302 +++-----
 halo2-ecc/src/lib.rs                          |   7 +-
 .../src/secp256k1/results/ecdsa_bench_m1.csv  |  10 -
 .../secp256k1/results/ecdsa_bench_results.txt | 253 -------
 68 files changed, 2668 insertions(+), 4875 deletions(-)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_ec_add.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_fixed_msm.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_msm.config (92%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/bench_pairing.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/ec_add_circuit.config (100%)
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/fixed_msm_circuit.config (100%)
 create mode 100644 halo2-ecc/configs/bn254/msm_circuit.config
 rename halo2-ecc/{src/bn254/configs => configs/bn254}/pairing_circuit.config (100%)
 rename halo2-ecc/{src/secp256k1/configs => configs/secp256k1}/bench_ecdsa.config (100%)
 rename halo2-ecc/{src/secp256k1/configs => configs/secp256k1}/ecdsa_circuit.config (100%)
 delete mode 100644 halo2-ecc/src/bn254/configs/msm_circuit.config
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_internal.csv
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
 delete mode 100644 halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
 delete mode 100644 halo2-ecc/src/bn254/results/pairing_bench_results.txt
 delete mode 100644 halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
 delete mode 100644 halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt

diff --git a/Cargo.toml b/Cargo.toml
index a21fa775..9d8d2d5c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [workspace]
 members = [
     "halo2-base",
-    # "halo2-ecc",
+    "halo2-ecc",
     "hashes/zkevm-keccak",
 ]
 
@@ -31,7 +31,7 @@ debug-assertions = false
 lto = "fat" 
 # `codegen-units = 1` can lead to WORSE performance - always bench to find best profile for your machine!
 # codegen-units = 1
-panic = "abort"
+panic = "unwind"
 incremental = false
 
 # For performance profiling
diff --git a/halo2-base/benches/inner_product.rs b/halo2-base/benches/inner_product.rs
index 5d2902ae..9454faa3 100644
--- a/halo2-base/benches/inner_product.rs
+++ b/halo2-base/benches/inner_product.rs
@@ -73,7 +73,7 @@ fn bench(c: &mut Criterion) {
                 let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
                 let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
                 inner_prod_bench(builder.main(0), a, b);
-                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+                let circuit = GateCircuitBuilder::prover(builder, break_points.clone());
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-base/benches/mul.rs b/halo2-base/benches/mul.rs
index 97514e47..16687e08 100644
--- a/halo2-base/benches/mul.rs
+++ b/halo2-base/benches/mul.rs
@@ -56,7 +56,7 @@ fn bench(c: &mut Criterion) {
                 let mut builder = GateThreadBuilder::new(true);
                 // do the computation
                 mul_bench(builder.main(0), inputs);
-                let circuit = GateCircuitBuilder::witness_gen(builder, break_points.clone());
+                let circuit = GateCircuitBuilder::prover(builder, break_points.clone());
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-base/examples/inner_product.rs b/halo2-base/examples/inner_product.rs
index d7976f47..8572817e 100644
--- a/halo2-base/examples/inner_product.rs
+++ b/halo2-base/examples/inner_product.rs
@@ -68,7 +68,7 @@ fn main() {
     let a = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
     let b = (0..5).map(|_| Fr::random(OsRng)).collect_vec();
     inner_prod_bench(builder.main(0), a, b);
-    let circuit = GateCircuitBuilder::witness_gen(builder, break_points);
+    let circuit = GateCircuitBuilder::prover(builder, break_points);
 
     let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
     create_proof::<
diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index c5601b18..c049ba28 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -13,8 +13,8 @@ use crate::{
 use serde::{Deserialize, Serialize};
 use std::{cell::RefCell, collections::HashMap};
 
-type ThreadBreakPoints = Vec<usize>;
-type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
+pub type ThreadBreakPoints = Vec<usize>;
+pub type MultiPhaseThreadBreakPoints = Vec<ThreadBreakPoints>;
 
 #[derive(Clone, Debug, Default)]
 pub struct GateThreadBuilder<F: ScalarField> {
@@ -33,12 +33,28 @@ impl<F: ScalarField> GateThreadBuilder<F> {
         Self { threads, thread_count: 1, witness_gen_only, use_unknown: false }
     }
 
+    pub fn mock() -> Self {
+        Self::new(false)
+    }
+
+    pub fn keygen() -> Self {
+        Self::new(false)
+    }
+
+    pub fn prover() -> Self {
+        Self::new(true)
+    }
+
     pub fn unknown(self, use_unknown: bool) -> Self {
         Self { use_unknown, ..self }
     }
 
     pub fn main(&mut self, phase: usize) -> &mut Context<F> {
-        self.threads[phase].first_mut().unwrap()
+        if self.threads[phase].is_empty() {
+            self.new_thread(phase)
+        } else {
+            self.threads[phase].last_mut().unwrap()
+        }
     }
 
     pub fn witness_gen_only(&self) -> bool {
@@ -141,10 +157,11 @@ impl<F: ScalarField> GateThreadBuilder<F> {
             let mut row_offset = 0;
             let mut lookup_offset = 0;
             let mut lookup_col = 0;
-            for ctx in threads {
+            for mut ctx in threads {
                 let mut basic_gate = config.basic_gates[phase]
                         .get(gate_index)
                         .unwrap_or_else(|| panic!("NOT ENOUGH ADVICE COLUMNS IN PHASE {phase}. Perhaps blinding factors were not taken into account. The max non-poisoned rows is {max_rows}"));
+                ctx.selector.resize(ctx.advice.len(), false);
 
                 for (i, (advice, q)) in ctx.advice.iter().zip(ctx.selector.into_iter()).enumerate()
                 {
@@ -214,6 +231,8 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     }
                 }
 
+                // warning: currently we assume equality constraints in thread i only involves threads <= i
+                // I guess a fix is to just rerun this several times?
                 for (left, right) in ctx.advice_equality_constraints {
                     let (left, _) = assigned_advices[&(left.context_id, left.offset)];
                     let (right, _) = assigned_advices[&(right.context_id, right.offset)];
@@ -364,7 +383,7 @@ impl<F: ScalarField> GateCircuitBuilder<F> {
         Self { builder: RefCell::new(builder.unknown(false)), break_points: RefCell::new(vec![]) }
     }
 
-    pub fn witness_gen(
+    pub fn prover(
         builder: GateThreadBuilder<F>,
         break_points: MultiPhaseThreadBreakPoints,
     ) -> Self {
@@ -441,11 +460,11 @@ impl<F: ScalarField> RangeCircuitBuilder<F> {
         Self(GateCircuitBuilder::mock(builder))
     }
 
-    pub fn witness_gen(
+    pub fn prover(
         builder: GateThreadBuilder<F>,
         break_points: MultiPhaseThreadBreakPoints,
     ) -> Self {
-        Self(GateCircuitBuilder::witness_gen(builder, break_points))
+        Self(GateCircuitBuilder::prover(builder, break_points))
     }
 }
 
@@ -529,3 +548,10 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
         )
     }
 }
+
+#[derive(Clone, Copy, Debug)]
+pub enum CircuitBuilderStage {
+    Keygen,
+    Prover,
+    Mock,
+}
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index c4bbc4b4..a70de4b8 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -146,7 +146,7 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = *a.value() + b.value();
-        ctx.assign_region_last(vec![a, b, Constant(F::one()), Witness(out_val)], vec![0])
+        ctx.assign_region_last([a, b, Constant(F::one()), Witness(out_val)], [0])
     }
 
     /// Copies a, b and constrains `a + b * (-1) = out`
@@ -161,7 +161,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let out_val = *a.value() - b.value();
         // slightly better to not have to compute -F::one() since F::one() is cached
-        ctx.assign_region(vec![Witness(out_val), b, Constant(F::one()), a], vec![0]);
+        ctx.assign_region([Witness(out_val), b, Constant(F::one()), a], [0]);
         ctx.get(-4)
     }
 
@@ -169,10 +169,7 @@ pub trait GateInstructions<F: ScalarField> {
     fn neg(&self, ctx: &mut Context<F>, a: impl Into<QuantumCell<F>>) -> AssignedValue<F> {
         let a = a.into();
         let out_val = -*a.value();
-        ctx.assign_region(
-            vec![a, Witness(out_val), Constant(F::one()), Constant(F::zero())],
-            vec![0],
-        );
+        ctx.assign_region([a, Witness(out_val), Constant(F::one()), Constant(F::zero())], [0]);
         ctx.get(-3)
     }
 
@@ -187,7 +184,7 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = *a.value() * b.value();
-        ctx.assign_region_last(vec![Constant(F::zero()), a, b, Witness(out_val)], vec![0])
+        ctx.assign_region_last([Constant(F::zero()), a, b, Witness(out_val)], [0])
     }
 
     /// a * b + c
@@ -202,7 +199,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let c = c.into();
         let out_val = *a.value() * b.value() + c.value();
-        ctx.assign_region_last(vec![c, a, b, Witness(out_val)], vec![0])
+        ctx.assign_region_last([c, a, b, Witness(out_val)], [0])
     }
 
     /// (1 - a) * b = b - a * b
@@ -215,16 +212,13 @@ pub trait GateInstructions<F: ScalarField> {
         let a = a.into();
         let b = b.into();
         let out_val = (F::one() - a.value()) * b.value();
-        ctx.assign_region_smart(vec![Witness(out_val), a, b, b], vec![0], vec![(2, 3)], []);
+        ctx.assign_region_smart([Witness(out_val), a, b, b], [0], [(2, 3)], []);
         ctx.get(-4)
     }
 
     /// Constrain x is 0 or 1.
     fn assert_bit(&self, ctx: &mut Context<F>, x: AssignedValue<F>) {
-        ctx.assign_region(
-            vec![Constant(F::zero()), Existing(x), Existing(x), Existing(x)],
-            vec![0],
-        );
+        ctx.assign_region([Constant(F::zero()), Existing(x), Existing(x), Existing(x)], [0]);
     }
 
     fn div_unsafe(
@@ -238,7 +232,7 @@ pub trait GateInstructions<F: ScalarField> {
         // TODO: if really necessary, make `c` of type `Assigned<F>`
         // this would require the API using `Assigned<F>` instead of `F` everywhere, so leave as last resort
         let c = b.value().invert().unwrap() * a.value();
-        ctx.assign_region(vec![Constant(F::zero()), Witness(c), b, a], vec![0]);
+        ctx.assign_region([Constant(F::zero()), Witness(c), b, a], [0]);
         ctx.get(-3)
     }
 
@@ -387,7 +381,7 @@ pub trait GateInstructions<F: ScalarField> {
         let b = b.into();
         let not_b_val = F::one() - b.value();
         let out_val = *a.value() + b.value() - *a.value() * b.value();
-        let cells = vec![
+        let cells = [
             Witness(not_b_val),
             Constant(F::one()),
             b,
@@ -397,7 +391,7 @@ pub trait GateInstructions<F: ScalarField> {
             Witness(not_b_val),
             Witness(out_val),
         ];
-        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], vec![]);
+        ctx.assign_region_smart(cells, [0, 4], [(0, 6), (2, 4)], []);
         ctx.last().unwrap()
     }
 
@@ -447,13 +441,13 @@ pub trait GateInstructions<F: ScalarField> {
 
         let (inv_last_bit, last_bit) = {
             ctx.assign_region(
-                vec![
+                [
                     Witness(F::one() - bits[k - 1].value()),
                     Existing(bits[k - 1]),
                     Constant(F::one()),
                     Constant(F::one()),
                 ],
-                vec![0],
+                [0],
             );
             (ctx.get(-4), ctx.get(-3))
         };
@@ -465,13 +459,13 @@ pub trait GateInstructions<F: ScalarField> {
             for old_idx in 0..(1 << idx) {
                 let inv_prod_val = (F::one() - bit.value()) * indicator[offset + old_idx].value();
                 ctx.assign_region(
-                    vec![
+                    [
                         Witness(inv_prod_val),
                         Existing(indicator[offset + old_idx]),
                         Existing(*bit),
                         Existing(indicator[offset + old_idx]),
                     ],
-                    vec![0],
+                    [0],
                 );
                 indicator.push(ctx.get(-4));
 
@@ -499,7 +493,7 @@ pub trait GateInstructions<F: ScalarField> {
             let ind_val = F::from(idx_val == i);
             let val = if idx_val == i { *idx.value() } else { F::zero() };
             ctx.assign_region_smart(
-                vec![
+                [
                     Constant(F::zero()),
                     Witness(ind_val),
                     idx,
@@ -508,9 +502,9 @@ pub trait GateInstructions<F: ScalarField> {
                     Witness(ind_val),
                     Constant(F::zero()),
                 ],
-                vec![0, 3],
-                vec![(1, 5)],
-                vec![],
+                [0, 3],
+                [(1, 5)],
+                [],
             );
             // need to use assigned idx after i > 0 so equality constraint holds
             if i == 0 {
@@ -576,7 +570,7 @@ pub trait GateInstructions<F: ScalarField> {
             (F::zero(), Assigned::Rational(F::one(), *x))
         };
 
-        let cells = vec![
+        let cells = [
             Witness(is_zero),
             Existing(a),
             WitnessFraction(inv),
@@ -586,7 +580,7 @@ pub trait GateInstructions<F: ScalarField> {
             Witness(is_zero),
             Constant(F::zero()),
         ];
-        ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6)], []);
+        ctx.assign_region_smart(cells, [0, 4], [(0, 6)], []);
         ctx.get(-2)
     }
 
@@ -843,7 +837,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             // | a - b | 1 | b | a |
             // | b | sel | a - b | out |
             GateStrategy::Vertical => {
-                let cells = vec![
+                let cells = [
                     Witness(diff_val),
                     Constant(F::one()),
                     b,
@@ -853,7 +847,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
                     Witness(diff_val),
                     Witness(out_val),
                 ];
-                ctx.assign_region_smart(cells, vec![0, 4], vec![(0, 6), (2, 4)], []);
+                ctx.assign_region_smart(cells, [0, 4], [(0, 6), (2, 4)], []);
                 ctx.last().unwrap()
             }
         }
@@ -875,7 +869,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
         let not_bc_val = F::one() - bc_val;
         let not_a_val = *a.value() - F::one();
         let out_val = bc_val + a.value() - bc_val * a.value();
-        let cells = vec![
+        let cells = [
             Witness(not_bc_val),
             b,
             c,
@@ -888,7 +882,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             Constant(F::one()),
             a,
         ];
-        ctx.assign_region_smart(cells, vec![0, 3, 7], vec![(4, 7), (0, 5)], []);
+        ctx.assign_region_smart(cells, [0, 3, 7], [(4, 7), (0, 5)], []);
         ctx.get(-5)
     }
 
@@ -904,21 +898,22 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             .as_ref()
             .iter()
             .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
+            .map(|x| Witness(F::from(x)))
             .take(range_bits)
-            .map(|x| F::from(x));
+            .collect::<Vec<_>>();
 
         let mut bit_cells = Vec::with_capacity(range_bits);
         let row_offset = ctx.advice.len();
         let acc = self.inner_product(
             ctx,
-            bits.map(Witness),
+            bits,
             self.pow_of_two[..range_bits].iter().map(|c| Constant(*c)),
         );
         ctx.constrain_equal(&a, &acc);
         debug_assert!(range_bits > 0);
         bit_cells.push(ctx.get(row_offset as isize));
         for i in 1..range_bits {
-            bit_cells.push(ctx.get((row_offset + 1 + 3 * (i - 2)) as isize));
+            bit_cells.push(ctx.get((row_offset + 1 + 3 * (i - 1)) as isize));
         }
 
         for bit_cell in &bit_cells {
diff --git a/halo2-base/src/gates/mod.rs b/halo2-base/src/gates/mod.rs
index 6bdde332..705542b1 100644
--- a/halo2-base/src/gates/mod.rs
+++ b/halo2-base/src/gates/mod.rs
@@ -4,3 +4,6 @@ pub mod range;
 
 #[cfg(test)]
 pub mod tests;
+
+pub use flex_gate::{GateChip, GateInstructions};
+pub use range::{RangeChip, RangeInstructions};
diff --git a/halo2-base/src/gates/range.rs b/halo2-base/src/gates/range.rs
index 6c41e8bb..ff5e4ae3 100644
--- a/halo2-base/src/gates/range.rs
+++ b/halo2-base/src/gates/range.rs
@@ -1,13 +1,5 @@
 use crate::{
     gates::flex_gate::{FlexGateConfig, GateInstructions, GateStrategy, MAX_PHASE},
-    utils::{
-        biguint_to_fe, bit_length, decompose_fe_to_u64_limbs, fe_to_biguint, BigPrimeField,
-        ScalarField,
-    },
-    AssignedValue,
-    QuantumCell::{self, Constant, Existing, Witness},
-};
-use crate::{
     halo2_proofs::{
         circuit::{Layouter, Value},
         plonk::{
@@ -15,8 +7,12 @@ use crate::{
         },
         poly::Rotation,
     },
-    utils::PrimeField,
-    Context,
+    utils::{
+        biguint_to_fe, bit_length, decompose_fe_to_u64_limbs, fe_to_biguint, BigPrimeField,
+        ScalarField,
+    },
+    AssignedValue, Context,
+    QuantumCell::{self, Constant, Existing, Witness},
 };
 use num_bigint::BigUint;
 use num_integer::Integer;
@@ -249,17 +245,14 @@ pub trait RangeInstructions<F: ScalarField> {
         a_num_bits: usize,
     ) -> (AssignedValue<F>, AssignedValue<F>)
     where
-        F: PrimeField,
+        F: BigPrimeField,
     {
         let a = a.into();
         let b = b.into();
         let a_val = fe_to_biguint(a.value());
         let (div, rem) = a_val.div_mod_floor(&b);
         let [div, rem] = [div, rem].map(|v| biguint_to_fe(&v));
-        ctx.assign_region(
-            vec![Witness(rem), Constant(biguint_to_fe(&b)), Witness(div), a],
-            vec![0],
-        );
+        ctx.assign_region([Witness(rem), Constant(biguint_to_fe(&b)), Witness(div), a], [0]);
         let rem = ctx.get(-4);
         let div = ctx.get(-2);
         self.check_big_less_than_safe(
@@ -301,8 +294,8 @@ pub trait RangeInstructions<F: ScalarField> {
         let x_fe = self.gate().pow_of_two()[b_num_bits];
         let [div, div_hi, div_lo, rem] = [div, div_hi, div_lo, rem].map(|v| biguint_to_fe(&v));
         ctx.assign_region(
-            vec![Witness(div_lo), Witness(div_hi), Constant(x_fe), Witness(div), Witness(rem)],
-            vec![0],
+            [Witness(div_lo), Witness(div_hi), Constant(x_fe), Witness(div), Witness(rem)],
+            [0],
         );
         let [div_lo, div_hi, div, rem] = [-5, -4, -2, -1].map(|i| ctx.get(i));
         self.range_check(ctx, div_lo, b_num_bits);
@@ -337,7 +330,7 @@ pub trait RangeInstructions<F: ScalarField> {
         };
         let two = self.gate().get_field_element(2u64);
         let h_v = (*a_v - bit_v) * two.invert().unwrap();
-        ctx.assign_region(vec![Witness(bit_v), Witness(h_v), Constant(two), Existing(a)], vec![0]);
+        ctx.assign_region([Witness(bit_v), Witness(h_v), Constant(two), Existing(a)], [0]);
 
         let half = ctx.get(-3);
         self.range_check(ctx, half, limb_bits - 1);
@@ -450,7 +443,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
             RangeStrategy::Vertical => {
                 let shift_a_val = pow_of_two + a.value();
                 // | a + 2^(num_bits) - b | b | 1 | a + 2^(num_bits) | - 2^(num_bits) | 1 | a |
-                let cells = vec![
+                let cells = [
                     Witness(shift_a_val - b.value()),
                     b,
                     Constant(F::one()),
@@ -459,7 +452,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
                     Constant(F::one()),
                     a,
                 ];
-                ctx.assign_region(cells, vec![0, 3]);
+                ctx.assign_region(cells, [0, 3]);
                 ctx.get(-7)
             }
         };
@@ -487,7 +480,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
         let shifted_cell = match self.strategy {
             RangeStrategy::Vertical => {
                 ctx.assign_region(
-                    vec![
+                    [
                         Witness(shifted_val),
                         b,
                         Constant(F::one()),
@@ -496,7 +489,7 @@ impl<F: ScalarField> RangeInstructions<F> for RangeChip<F> {
                         Constant(F::one()),
                         a,
                     ],
-                    vec![0, 3],
+                    [0, 3],
                 );
                 ctx.get(-7)
             }
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index 01371c28..cf6a3cb6 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -36,7 +36,7 @@ fn gate_tests<F: ScalarField>(ctx: &mut Context<F>, inputs: [F; 3]) {
 fn test_gates() {
     let k = 6;
     let inputs = [10u64, 12u64, 120u64].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
     // auto-tune circuit
@@ -51,7 +51,7 @@ fn test_gates() {
 fn test_multithread_gates() {
     let k = 6;
     let inputs = [10u64, 12u64, 120u64].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
     let thread_ids = (0..4).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
@@ -120,7 +120,7 @@ fn range_tests<F: BigPrimeField>(
 fn test_range_single() {
     let k = 11;
     let inputs = [100, 101].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     range_tests(builder.main(0), 3, inputs, 8, 8);
 
     // auto-tune circuit
@@ -135,7 +135,7 @@ fn test_range_single() {
 fn test_range_multicolumn() {
     let k = 5;
     let inputs = [100, 101].map(Fr::from);
-    let mut builder = GateThreadBuilder::new(false);
+    let mut builder = GateThreadBuilder::mock();
     range_tests(builder.main(0), 3, inputs, 8, 8);
 
     // auto-tune circuit
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index 1bff40c8..ccf4f973 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -233,18 +233,22 @@ impl<F: ScalarField> Context<F> {
     ) where
         Q: Into<QuantumCell<F>>,
     {
-        for input in inputs {
-            self.assign_cell(input);
-        }
-
-        if !self.witness_gen_only {
-            let row_offset = self.selector.len();
+        if self.witness_gen_only {
+            for input in inputs {
+                self.assign_cell(input);
+            }
+        } else {
+            let row_offset = self.advice.len();
+            // note: row_offset may not equal self.selector.len() at this point if we previously used `load_constant` or `load_witness`
+            for input in inputs {
+                self.assign_cell(input);
+            }
             self.selector.resize(self.advice.len(), false);
             for offset in gate_offsets {
                 *self
                     .selector
                     .get_mut(row_offset.checked_add_signed(offset).expect("Invalid gate offset"))
-                    .expect("Gate offset out of bounds") = true;
+                    .expect("Invalid selector offset") = true;
             }
         }
     }
@@ -322,11 +326,17 @@ impl<F: ScalarField> Context<F> {
 
     pub fn load_witness(&mut self, witness: F) -> AssignedValue<F> {
         self.assign_cell(QuantumCell::Witness(witness));
+        if !self.witness_gen_only {
+            self.selector.resize(self.advice.len(), false);
+        }
         self.last().unwrap()
     }
 
     pub fn load_constant(&mut self, c: F) -> AssignedValue<F> {
         self.assign_cell(QuantumCell::Constant(c));
+        if !self.witness_gen_only {
+            self.selector.resize(self.advice.len(), false);
+        }
         self.last().unwrap()
     }
 
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 5c0c0a47..6802b71c 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -48,13 +48,10 @@ where
     }
 }
 
-// Later: will need to separate PrimeField from ScalarField when Goldilocks is introduced
-#[cfg(feature = "halo2-axiom")]
-pub trait PrimeField = BigPrimeField;
+// Later: will need to separate BigPrimeField from ScalarField when Goldilocks is introduced
+
 #[cfg(feature = "halo2-pse")]
 pub trait BigPrimeField = FieldExt<Repr = [u8; 32]> + Hash;
-#[cfg(feature = "halo2-pse")]
-pub trait PrimeField = BigPrimeField;
 
 #[cfg(feature = "halo2-pse")]
 pub trait ScalarField = FieldExt + Hash;
@@ -105,16 +102,16 @@ pub fn log2_ceil(x: u64) -> usize {
     (u64::BITS - x.leading_zeros() - (x & (x - 1) == 0) as u32) as usize
 }
 
-pub fn modulus<F: PrimeField>() -> BigUint {
+pub fn modulus<F: BigPrimeField>() -> BigUint {
     fe_to_biguint(&-F::one()) + 1u64
 }
 
-pub fn power_of_two<F: PrimeField>(n: usize) -> F {
+pub fn power_of_two<F: BigPrimeField>(n: usize) -> F {
     biguint_to_fe(&(BigUint::one() << n))
 }
 
 /// assume `e` less than modulus of F
-pub fn biguint_to_fe<F: PrimeField>(e: &BigUint) -> F {
+pub fn biguint_to_fe<F: BigPrimeField>(e: &BigUint) -> F {
     #[cfg(feature = "halo2-axiom")]
     {
         F::from_u64_digits(&e.to_u64_digits())
@@ -130,7 +127,7 @@ pub fn biguint_to_fe<F: PrimeField>(e: &BigUint) -> F {
 }
 
 /// assume `|e|` less than modulus of F
-pub fn bigint_to_fe<F: PrimeField>(e: &BigInt) -> F {
+pub fn bigint_to_fe<F: BigPrimeField>(e: &BigInt) -> F {
     #[cfg(feature = "halo2-axiom")]
     {
         let (sign, digits) = e.to_u64_digits();
@@ -158,7 +155,7 @@ pub fn fe_to_biguint<F: ff::PrimeField>(fe: &F) -> BigUint {
     BigUint::from_bytes_le(fe.to_repr().as_ref())
 }
 
-pub fn fe_to_bigint<F: PrimeField>(fe: &F) -> BigInt {
+pub fn fe_to_bigint<F: BigPrimeField>(fe: &F) -> BigInt {
     // TODO: `F` should just have modulus as lazy_static or something
     let modulus = modulus::<F>();
     let e = fe_to_biguint(fe);
@@ -169,7 +166,7 @@ pub fn fe_to_bigint<F: PrimeField>(fe: &F) -> BigInt {
     }
 }
 
-pub fn decompose<F: PrimeField>(e: &F, number_of_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose<F: BigPrimeField>(e: &F, number_of_limbs: usize, bit_len: usize) -> Vec<F> {
     if bit_len > 64 {
         decompose_biguint(&fe_to_biguint(e), number_of_limbs, bit_len)
     } else {
@@ -194,7 +191,11 @@ pub fn decompose_fe_to_u64_limbs<F: ScalarField>(
     }
 }
 
-pub fn decompose_biguint<F: PrimeField>(e: &BigUint, num_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose_biguint<F: BigPrimeField>(
+    e: &BigUint,
+    num_limbs: usize,
+    bit_len: usize,
+) -> Vec<F> {
     debug_assert!(bit_len > 64 && bit_len <= 128);
     let mut e = e.iter_u64_digits();
 
@@ -224,7 +225,7 @@ pub fn decompose_biguint<F: PrimeField>(e: &BigUint, num_limbs: usize, bit_len:
         .collect()
 }
 
-pub fn decompose_bigint<F: PrimeField>(e: &BigInt, num_limbs: usize, bit_len: usize) -> Vec<F> {
+pub fn decompose_bigint<F: BigPrimeField>(e: &BigInt, num_limbs: usize, bit_len: usize) -> Vec<F> {
     if e.is_negative() {
         decompose_biguint::<F>(e.magnitude(), num_limbs, bit_len).into_iter().map(|x| -x).collect()
     } else {
@@ -232,7 +233,7 @@ pub fn decompose_bigint<F: PrimeField>(e: &BigInt, num_limbs: usize, bit_len: us
     }
 }
 
-pub fn decompose_bigint_option<F: PrimeField>(
+pub fn decompose_bigint_option<F: BigPrimeField>(
     value: Value<&BigInt>,
     number_of_limbs: usize,
     bit_len: usize,
diff --git a/halo2-ecc/Cargo.toml b/halo2-ecc/Cargo.toml
index a142200d..0d5041b2 100644
--- a/halo2-ecc/Cargo.toml
+++ b/halo2-ecc/Cargo.toml
@@ -13,6 +13,7 @@ rand = "0.8"
 rand_chacha = "0.3.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
+rayon = "1.6.1"
 
 # arithmetic
 ff = "0.12"
diff --git a/halo2-ecc/benches/fixed_base_msm.rs b/halo2-ecc/benches/fixed_base_msm.rs
index 0bdf7e12..5c9589e4 100644
--- a/halo2-ecc/benches/fixed_base_msm.rs
+++ b/halo2-ecc/benches/fixed_base_msm.rs
@@ -1,166 +1,98 @@
-use criterion::{criterion_group, criterion_main};
-use criterion::{BenchmarkId, Criterion};
-
-#[allow(unused_imports)]
-use ff::PrimeField as _;
-use halo2_base::utils::modulus;
-use pprof::criterion::{Output, PProfProfiler};
-
 use ark_std::{end_timer, start_timer};
-use halo2_base::SKIP_FIRST_PASS;
-use rand_core::OsRng;
-use serde::{Deserialize, Serialize};
-use std::marker::PhantomData;
-
+use halo2_base::gates::{
+    builder::{
+        CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints, RangeCircuitBuilder,
+    },
+    RangeChip,
+};
 use halo2_base::halo2_proofs::{
     arithmetic::Field,
-    circuit::{Layouter, SimpleFloorPlanner, Value},
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+    halo2curves::bn256::{Bn256, Fr, G1Affine},
     plonk::*,
     poly::kzg::{
         commitment::{KZGCommitmentScheme, ParamsKZG},
         multiopen::ProverSHPLONK,
     },
-    transcript::TranscriptWriterBuffer,
-    transcript::{Blake2bWrite, Challenge255},
-};
-use halo2_base::{gates::GateInstructions, utils::PrimeField};
-use halo2_ecc::{
-    ecc::EccChip,
-    fields::fp::{FpConfig, FpStrategy},
+    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
+use halo2_ecc::{bn254::FpChip, ecc::EccChip, fields::PrimeField};
+use rand::rngs::OsRng;
+use std::sync::Mutex;
 
-type FpChip<F> = FpConfig<F, Fq>;
+use criterion::{criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion};
 
-#[derive(Serialize, Deserialize, Debug)]
+use pprof::criterion::{Output, PProfProfiler};
+// Thanks to the example provided by @jebbow in his article
+// https://www.jibbow.com/posts/criterion-flamegraphs/
+
+#[derive(Clone, Copy, Debug)]
 struct MSMCircuitParams {
-    strategy: FpStrategy,
     degree: u32,
-    num_advice: usize,
-    num_lookup_advice: usize,
-    num_fixed: usize,
     lookup_bits: usize,
     limb_bits: usize,
     num_limbs: usize,
     batch_size: usize,
-    radix: usize,
-    clump_factor: usize,
 }
 
-const BEST_100_CONFIG: MSMCircuitParams = MSMCircuitParams {
-    strategy: FpStrategy::Simple,
-    degree: 20,
-    num_advice: 10,
-    num_lookup_advice: 1,
-    num_fixed: 1,
-    lookup_bits: 19,
-    limb_bits: 88,
-    num_limbs: 3,
-    batch_size: 100,
-    radix: 0,
-    clump_factor: 4,
-};
+const BEST_100_CONFIG: MSMCircuitParams =
+    MSMCircuitParams { degree: 20, lookup_bits: 19, limb_bits: 88, num_limbs: 3, batch_size: 100 };
 
 const TEST_CONFIG: MSMCircuitParams = BEST_100_CONFIG;
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    clump_factor: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    #[allow(clippy::too_many_arguments)]
-    pub fn configure(meta: &mut ConstraintSystem<F>, params: MSMCircuitParams) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            modulus::<Fq>(),
-            0,
-            params.degree as usize,
-        );
-        MSMConfig { fp_chip, clump_factor: params.clump_factor }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
+fn fixed_base_msm_bench(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
     bases: Vec<G1Affine>,
-    scalars: Vec<Option<Fr>>,
-    _marker: PhantomData<F>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let scalars_assigned = scalars
+        .iter()
+        .map(|scalar| vec![builder.main(0).load_witness(*scalar)])
+        .collect::<Vec<_>>();
+    drop(builder);
+
+    ecc_chip.fixed_base_msm(thread_pool, &bases, scalars_assigned, Fr::NUM_BITS as usize);
 }
 
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: self.bases.clone(),
-            scalars: vec![None; self.scalars.len()],
-            _marker: PhantomData,
+fn fixed_base_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fixed_base_msm_bench(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let params = TEST_CONFIG;
-
-        MSMConfig::<Fr>::configure(meta, params)
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "fixed base msm",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let witness_time = start_timer!(|| "Witness generation");
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config
-                        .fp_chip
-                        .range
-                        .gate
-                        .assign_witnesses(ctx, vec![scalar.map_or(Value::unknown(), Value::known)]);
-                    scalars_assigned.push(assignment);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-
-                let _msm = ecc_chip.fixed_base_msm::<G1Affine>(
-                    ctx,
-                    &self.bases,
-                    &scalars_assigned,
-                    Fr::NUM_BITS as usize,
-                    0,
-                    config.clump_factor,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
@@ -168,39 +100,36 @@ fn bench(c: &mut Criterion) {
 
     let k = config.degree;
     let mut rng = OsRng;
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..config.batch_size {
-        let new_pt = G1Affine::random(&mut rng);
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-    let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
+    let circuit = fixed_base_msm_circuit(
+        config,
+        CircuitBuilderStage::Keygen,
+        vec![G1Affine::generator(); config.batch_size],
+        vec![Fr::zero(); config.batch_size],
+        None,
+    );
 
     let params = ParamsKZG::<Bn256>::setup(k, &mut rng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
+    drop(circuit);
 
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..config.batch_size).map(|_| (G1Affine::random(&mut rng), Fr::random(&mut rng))).unzip();
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
     group.bench_with_input(
         BenchmarkId::new("fixed base msm", k),
-        &(&params, &pk),
-        |b, &(params, pk)| {
+        &(&params, &pk, &bases, &scalars),
+        |b, &(params, pk, bases, scalars)| {
             b.iter(|| {
-                let mut bases = Vec::new();
-                let mut scalars = Vec::new();
-                for _ in 0..config.batch_size {
-                    let new_pt = G1Affine::random(&mut rng);
-                    bases.push(new_pt);
-
-                    let new_scalar = Some(Fr::random(&mut rng));
-                    scalars.push(new_scalar);
-                }
-
-                let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
+                let circuit = fixed_base_msm_circuit(
+                    config,
+                    CircuitBuilderStage::Prover,
+                    bases.clone(),
+                    scalars.clone(),
+                    Some(break_points.clone()),
+                );
 
                 let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
                 create_proof::<
diff --git a/halo2-ecc/benches/fp_mul.rs b/halo2-ecc/benches/fp_mul.rs
index d49162e0..c2de04ce 100644
--- a/halo2-ecc/benches/fp_mul.rs
+++ b/halo2-ecc/benches/fp_mul.rs
@@ -1,25 +1,28 @@
-use std::marker::PhantomData;
-
-use halo2_base::halo2_proofs::{
-    arithmetic::Field,
-    circuit::*,
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
-    plonk::*,
-    poly::kzg::{
-        commitment::{KZGCommitmentScheme, ParamsKZG},
-        multiopen::ProverSHPLONK,
+use ark_std::{end_timer, start_timer};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
     },
-    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
+    halo2_proofs::{
+        arithmetic::Field,
+        halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+        plonk::*,
+        poly::kzg::{
+            commitment::{KZGCommitmentScheme, ParamsKZG},
+            multiopen::ProverSHPLONK,
+        },
+        transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
+    },
+    Context,
 };
+use halo2_ecc::fields::fp::FpChip;
+use halo2_ecc::fields::{FieldChip, PrimeField};
 use rand::rngs::OsRng;
 
-use halo2_base::{
-    utils::{fe_to_bigint, modulus, PrimeField},
-    SKIP_FIRST_PASS,
-};
-use halo2_ecc::fields::fp::{FpConfig, FpStrategy};
-use halo2_ecc::fields::FieldChip;
-
 use criterion::{criterion_group, criterion_main};
 use criterion::{BenchmarkId, Criterion};
 
@@ -29,106 +32,88 @@ use pprof::criterion::{Output, PProfProfiler};
 
 const K: u32 = 19;
 
-#[derive(Default)]
-struct MyCircuit<F> {
-    a: Value<Fq>,
-    b: Value<Fq>,
-    _marker: PhantomData<F>,
-}
-
-const NUM_ADVICE: usize = 2;
-const NUM_FIXED: usize = 1;
-
-impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-    type Config = FpConfig<F, Fq>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self::default()
+fn fp_mul_bench<F: PrimeField>(
+    ctx: &mut Context<F>,
+    lookup_bits: usize,
+    limb_bits: usize,
+    num_limbs: usize,
+    _a: Fq,
+    _b: Fq,
+) {
+    std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+    let range = RangeChip::<F>::default(lookup_bits);
+    let chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+
+    let [a, b] = [_a, _b].map(|x| chip.load_private(ctx, FpChip::<F, Fq>::fe_to_witness(&x)));
+    for _ in 0..2857 {
+        chip.mul(ctx, &a, &b);
     }
+}
 
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        FpConfig::<F, _>::configure(
-            meta,
-            FpStrategy::Simple,
-            &[NUM_ADVICE],
-            &[1],
-            NUM_FIXED,
-            K as usize - 1,
-            88,
-            3,
-            modulus::<Fq>(),
-            0,
-            K as usize,
-        )
-    }
-
-    fn synthesize(&self, chip: Self::Config, mut layouter: impl Layouter<F>) -> Result<(), Error> {
-        chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "fp",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = chip.new_context(region);
-                let ctx = &mut aux;
-
-                let a_assigned = chip.load_private(ctx, self.a.as_ref().map(fe_to_bigint));
-                let b_assigned = chip.load_private(ctx, self.b.as_ref().map(fe_to_bigint));
-
-                for _ in 0..2857 {
-                    chip.mul(ctx, &a_assigned, &b_assigned);
-                }
-
-                // IMPORTANT: this copies advice cells to enable lookup
-                // This is not optional.
-                chip.finalize(ctx);
-
-                Ok(())
-            },
-        )
-    }
+fn fp_mul_circuit(
+    stage: CircuitBuilderStage,
+    a: Fq,
+    b: Fq,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = K as usize;
+    let mut builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fp_mul_bench(builder.main(0), k - 1, 88, 3, a, b);
+
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
+        }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
-    let a = Fq::random(OsRng);
-    let b = Fq::random(OsRng);
-
-    let circuit = MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+    let circuit = fp_mul_circuit(CircuitBuilderStage::Keygen, Fq::zero(), Fq::zero(), None);
 
     let params = ParamsKZG::<Bn256>::setup(K, OsRng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
 
+    let a = Fq::random(OsRng);
+    let b = Fq::random(OsRng);
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
-    group.bench_with_input(BenchmarkId::new("fp mul", K), &(&params, &pk), |b, &(params, pk)| {
-        b.iter(|| {
-            let rng = OsRng;
-            let a = Fq::random(OsRng);
-            let b = Fq::random(OsRng);
-
-            let circuit =
-                MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
-
-            let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
-            create_proof::<
-                KZGCommitmentScheme<Bn256>,
-                ProverSHPLONK<'_, Bn256>,
-                Challenge255<G1Affine>,
-                _,
-                Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
-                _,
-            >(params, pk, &[circuit], &[&[]], rng, &mut transcript)
-            .expect("prover should not fail");
-        })
-    });
+    group.bench_with_input(
+        BenchmarkId::new("fp mul", K),
+        &(&params, &pk, a, b),
+        |bencher, &(params, pk, a, b)| {
+            bencher.iter(|| {
+                let circuit =
+                    fp_mul_circuit(CircuitBuilderStage::Prover, a, b, Some(break_points.clone()));
+
+                let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+                create_proof::<
+                    KZGCommitmentScheme<Bn256>,
+                    ProverSHPLONK<'_, Bn256>,
+                    Challenge255<G1Affine>,
+                    _,
+                    Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
+                    _,
+                >(params, pk, &[circuit], &[&[]], OsRng, &mut transcript)
+                .expect("prover should not fail");
+            })
+        },
+    );
     group.finish()
 }
 
diff --git a/halo2-ecc/benches/msm.rs b/halo2-ecc/benches/msm.rs
index 22be806e..76141425 100644
--- a/halo2-ecc/benches/msm.rs
+++ b/halo2-ecc/benches/msm.rs
@@ -1,224 +1,112 @@
-use criterion::{criterion_group, criterion_main};
-use criterion::{BenchmarkId, Criterion};
-
-use halo2_base::utils::modulus;
-use pprof::criterion::{Output, PProfProfiler};
-
 use ark_std::{end_timer, start_timer};
-use halo2_base::SKIP_FIRST_PASS;
-use rand_core::OsRng;
-use serde::{Deserialize, Serialize};
-use std::marker::PhantomData;
-
+use halo2_base::gates::{
+    builder::{
+        CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints, RangeCircuitBuilder,
+    },
+    RangeChip,
+};
 use halo2_base::halo2_proofs::{
     arithmetic::Field,
-    circuit::{Layouter, SimpleFloorPlanner, Value},
-    halo2curves::bn256::{Bn256, Fq, Fr, G1Affine},
+    halo2curves::bn256::{Bn256, Fr, G1Affine},
     plonk::*,
     poly::kzg::{
         commitment::{KZGCommitmentScheme, ParamsKZG},
         multiopen::ProverSHPLONK,
     },
-    transcript::TranscriptWriterBuffer,
-    transcript::{Blake2bWrite, Challenge255},
-};
-use halo2_base::{
-    gates::GateInstructions,
-    utils::{biguint_to_fe, fe_to_biguint, PrimeField},
-    QuantumCell::Witness,
-};
-use halo2_ecc::{
-    ecc::EccChip,
-    fields::fp::{FpConfig, FpStrategy},
+    transcript::{Blake2bWrite, Challenge255, TranscriptWriterBuffer},
 };
-use num_bigint::BigUint;
+use halo2_ecc::{bn254::FpChip, ecc::EccChip, fields::PrimeField};
+use rand::rngs::OsRng;
+use std::sync::Mutex;
 
-type FpChip<F> = FpConfig<F, Fq>;
+use criterion::{criterion_group, criterion_main};
+use criterion::{BenchmarkId, Criterion};
 
-#[derive(Serialize, Deserialize, Debug)]
+use pprof::criterion::{Output, PProfProfiler};
+// Thanks to the example provided by @jebbow in his article
+// https://www.jibbow.com/posts/criterion-flamegraphs/
+
+#[derive(Clone, Copy, Debug)]
 struct MSMCircuitParams {
-    strategy: FpStrategy,
     degree: u32,
-    num_advice: usize,
-    num_lookup_advice: usize,
-    num_fixed: usize,
     lookup_bits: usize,
     limb_bits: usize,
     num_limbs: usize,
     batch_size: usize,
-    window_bits: usize,
+    clump_factor: usize,
 }
 
 const BEST_100_CONFIG: MSMCircuitParams = MSMCircuitParams {
-    strategy: FpStrategy::Simple,
     degree: 19,
-    num_advice: 20,
-    num_lookup_advice: 3,
-    num_fixed: 1,
     lookup_bits: 18,
     limb_bits: 90,
     num_limbs: 3,
     batch_size: 100,
-    window_bits: 4,
+    clump_factor: 4,
 };
-
 const TEST_CONFIG: MSMCircuitParams = BEST_100_CONFIG;
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-    window_bits: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    #[allow(clippy::too_many_arguments)]
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        window_bits: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, window_bits }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<Option<G1Affine>>,
-    scalars: Vec<Option<Fr>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
+fn msm_bench(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(0);
+    let scalars_assigned =
+        scalars.iter().map(|scalar| vec![ctx.load_witness(*scalar)]).collect::<Vec<_>>();
+    let bases_assigned =
+        bases.iter().map(|base| ecc_chip.load_private(ctx, (base.x, base.y))).collect::<Vec<_>>();
+    drop(builder);
+
+    ecc_chip.variable_base_msm_in::<G1Affine>(
+        thread_pool,
+        &bases_assigned,
+        scalars_assigned,
+        Fr::NUM_BITS as usize,
+        params.clump_factor,
+        0,
+    );
 }
 
-impl<F: PrimeField> Default for MSMCircuit<F> {
-    fn default() -> Self {
-        Self {
-            bases: vec![None; 10],
-            scalars: vec![None; 10],
-            batch_size: 10,
-            _marker: PhantomData,
+fn msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    msm_bench(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-}
-
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: vec![None; self.batch_size],
-            scalars: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let params: MSMCircuitParams = TEST_CONFIG;
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            modulus::<Fq>(),
-            params.batch_size,
-            params.window_bits,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "MSM",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let witness_time = start_timer!(|| "Witness Generation");
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config.fp_chip.range.gate.assign_region_last(
-                        ctx,
-                        vec![Witness(scalar.map_or(Value::unknown(), Value::known))],
-                        vec![],
-                    );
-                    scalars_assigned.push(vec![assignment]);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-                let mut bases_assigned = Vec::new();
-                for base in &self.bases {
-                    let base_assigned = ecc_chip.load_private(
-                        ctx,
-                        (
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.x))))
-                                .unwrap_or(Value::unknown()),
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.y))))
-                                .unwrap_or(Value::unknown()),
-                        ),
-                    );
-                    bases_assigned.push(base_assigned);
-                }
-
-                let _msm = ecc_chip.variable_base_msm::<G1Affine>(
-                    ctx,
-                    &bases_assigned,
-                    &scalars_assigned,
-                    254,
-                    config.window_bits,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 fn bench(c: &mut Criterion) {
@@ -226,55 +114,50 @@ fn bench(c: &mut Criterion) {
 
     let k = config.degree;
     let mut rng = OsRng;
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..config.batch_size {
-        let new_pt = Some(G1Affine::random(&mut rng));
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-    let circuit =
-        MSMCircuit::<Fr> { bases, scalars, batch_size: config.batch_size, _marker: PhantomData };
+    let circuit = msm_circuit(
+        config,
+        CircuitBuilderStage::Keygen,
+        vec![G1Affine::generator(); config.batch_size],
+        vec![Fr::one(); config.batch_size],
+        None,
+    );
 
     let params = ParamsKZG::<Bn256>::setup(k, &mut rng);
     let vk = keygen_vk(&params, &circuit).expect("vk should not fail");
     let pk = keygen_pk(&params, vk, &circuit).expect("pk should not fail");
+    let break_points = circuit.0.break_points.take();
+    drop(circuit);
 
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..config.batch_size).map(|_| (G1Affine::random(&mut rng), Fr::random(&mut rng))).unzip();
     let mut group = c.benchmark_group("plonk-prover");
     group.sample_size(10);
-    group.bench_with_input(BenchmarkId::new("msm", k), &(&params, &pk), |b, &(params, pk)| {
-        b.iter(|| {
-            let mut bases = Vec::new();
-            let mut scalars = Vec::new();
-            for _ in 0..config.batch_size {
-                let new_pt = Some(G1Affine::random(&mut rng));
-                bases.push(new_pt);
-
-                let new_scalar = Some(Fr::random(&mut rng));
-                scalars.push(new_scalar);
-            }
-
-            let circuit = MSMCircuit::<Fr> {
-                bases,
-                scalars,
-                batch_size: config.batch_size,
-                _marker: PhantomData,
-            };
+    group.bench_with_input(
+        BenchmarkId::new("msm", k),
+        &(&params, &pk, &bases, &scalars),
+        |b, &(params, pk, bases, scalars)| {
+            b.iter(|| {
+                let circuit = msm_circuit(
+                    config,
+                    CircuitBuilderStage::Prover,
+                    bases.clone(),
+                    scalars.clone(),
+                    Some(break_points.clone()),
+                );
 
-            let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
-            create_proof::<
-                KZGCommitmentScheme<Bn256>,
-                ProverSHPLONK<'_, Bn256>,
-                Challenge255<G1Affine>,
-                _,
-                Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
-                _,
-            >(params, pk, &[circuit], &[&[]], &mut rng, &mut transcript)
-            .expect("prover should not fail");
-        })
-    });
+                let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
+                create_proof::<
+                    KZGCommitmentScheme<Bn256>,
+                    ProverSHPLONK<'_, Bn256>,
+                    Challenge255<G1Affine>,
+                    _,
+                    Blake2bWrite<Vec<u8>, G1Affine, Challenge255<_>>,
+                    _,
+                >(params, pk, &[circuit], &[&[]], &mut rng, &mut transcript)
+                .expect("prover should not fail");
+            })
+        },
+    );
     group.finish()
 }
 
diff --git a/halo2-ecc/src/bn254/configs/bench_ec_add.config b/halo2-ecc/configs/bn254/bench_ec_add.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_ec_add.config
rename to halo2-ecc/configs/bn254/bench_ec_add.config
diff --git a/halo2-ecc/src/bn254/configs/bench_fixed_msm.config b/halo2-ecc/configs/bn254/bench_fixed_msm.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_fixed_msm.config
rename to halo2-ecc/configs/bn254/bench_fixed_msm.config
diff --git a/halo2-ecc/src/bn254/configs/bench_msm.config b/halo2-ecc/configs/bn254/bench_msm.config
similarity index 92%
rename from halo2-ecc/src/bn254/configs/bench_msm.config
rename to halo2-ecc/configs/bn254/bench_msm.config
index 1d1f769c..d665c0a8 100644
--- a/halo2-ecc/src/bn254/configs/bench_msm.config
+++ b/halo2-ecc/configs/bn254/bench_msm.config
@@ -1,3 +1,4 @@
+{"strategy":"Simple","degree":16,"num_advice":170,"num_lookup_advice":23,"num_fixed":1,"lookup_bits":15,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":17,"num_advice":84,"num_lookup_advice":11,"num_fixed":1,"lookup_bits":16,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":18,"num_advice":42,"num_lookup_advice":6,"num_fixed":1,"lookup_bits":17,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
 {"strategy":"Simple","degree":19,"num_advice":20,"num_lookup_advice":3,"num_fixed":1,"lookup_bits":18,"limb_bits":90,"num_limbs":3,"batch_size":100,"window_bits":4}
diff --git a/halo2-ecc/src/bn254/configs/bench_pairing.config b/halo2-ecc/configs/bn254/bench_pairing.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/bench_pairing.config
rename to halo2-ecc/configs/bn254/bench_pairing.config
diff --git a/halo2-ecc/src/bn254/configs/ec_add_circuit.config b/halo2-ecc/configs/bn254/ec_add_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/ec_add_circuit.config
rename to halo2-ecc/configs/bn254/ec_add_circuit.config
diff --git a/halo2-ecc/src/bn254/configs/fixed_msm_circuit.config b/halo2-ecc/configs/bn254/fixed_msm_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/fixed_msm_circuit.config
rename to halo2-ecc/configs/bn254/fixed_msm_circuit.config
diff --git a/halo2-ecc/configs/bn254/msm_circuit.config b/halo2-ecc/configs/bn254/msm_circuit.config
new file mode 100644
index 00000000..f66f6077
--- /dev/null
+++ b/halo2-ecc/configs/bn254/msm_circuit.config
@@ -0,0 +1 @@
+{"strategy":"Simple","degree":17,"num_advice":84,"num_lookup_advice":11,"num_fixed":1,"lookup_bits":16,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/configs/pairing_circuit.config b/halo2-ecc/configs/bn254/pairing_circuit.config
similarity index 100%
rename from halo2-ecc/src/bn254/configs/pairing_circuit.config
rename to halo2-ecc/configs/bn254/pairing_circuit.config
diff --git a/halo2-ecc/src/secp256k1/configs/bench_ecdsa.config b/halo2-ecc/configs/secp256k1/bench_ecdsa.config
similarity index 100%
rename from halo2-ecc/src/secp256k1/configs/bench_ecdsa.config
rename to halo2-ecc/configs/secp256k1/bench_ecdsa.config
diff --git a/halo2-ecc/src/secp256k1/configs/ecdsa_circuit.config b/halo2-ecc/configs/secp256k1/ecdsa_circuit.config
similarity index 100%
rename from halo2-ecc/src/secp256k1/configs/ecdsa_circuit.config
rename to halo2-ecc/configs/secp256k1/ecdsa_circuit.config
diff --git a/halo2-ecc/src/bigint/add_no_carry.rs b/halo2-ecc/src/bigint/add_no_carry.rs
index 8cc687d4..e7d920a8 100644
--- a/halo2-ecc/src/bigint/add_no_carry.rs
+++ b/halo2-ecc/src/bigint/add_no_carry.rs
@@ -1,34 +1,35 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
 
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.add(ctx, Existing(a_limb), Existing(b_limb)))
+        .map(|(&a_limb, &b_limb)| gate.add(ctx, a_limb, b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+// pass by reference to avoid cloning the BigInt in CRTInteger, unclear if this is optimal
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.add(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a + b);
+    let out_native = gate.add(ctx, a.native, b.native);
+    let out_val = &a.value + &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/big_is_equal.rs b/halo2-ecc/src/bigint/big_is_equal.rs
index f963937f..f64a3fae 100644
--- a/halo2-ecc/src/bigint/big_is_equal.rs
+++ b/halo2-ecc/src/bigint/big_is_equal.rs
@@ -1,47 +1,45 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 
-// given OverflowInteger<F>'s `a` and `b` of the same shape,
-// returns whether `a == b`
-pub fn assign<'v, F: PrimeField>(
+/// Given OverflowInteger<F>'s `a` and `b` of the same shape,
+/// returns whether `a == b`.
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_eq!(k, b.limbs.len());
-    assert_ne!(k, 0);
+    debug_assert_eq!(k, b.limbs.len());
+    debug_assert_ne!(k, 0);
 
     let mut a_limbs = a.limbs.iter();
     let mut b_limbs = b.limbs.iter();
-    let mut partial =
-        gate.is_equal(ctx, Existing(a_limbs.next().unwrap()), Existing(b_limbs.next().unwrap()));
-    for (a_limb, b_limb) in a_limbs.zip(b_limbs) {
-        let eq_limb = gate.is_equal(ctx, Existing(a_limb), Existing(b_limb));
-        partial = gate.and(ctx, Existing(&eq_limb), Existing(&partial));
+    let mut partial = gate.is_equal(ctx, *a_limbs.next().unwrap(), *b_limbs.next().unwrap());
+    for (&a_limb, &b_limb) in a_limbs.zip(b_limbs) {
+        let eq_limb = gate.is_equal(ctx, a_limb, b_limb);
+        partial = gate.and(ctx, eq_limb, partial);
     }
     partial
 }
 
-pub fn wrapper<'v, F: PrimeField>(
+pub fn wrapper<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> AssignedValue<F> {
     assign(gate, ctx, &a.truncation, &b.truncation)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> AssignedValue<F> {
+    debug_assert_eq!(a.value, b.value);
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.is_equal(ctx, Existing(&a.native), Existing(&b.native));
-    gate.and(ctx, Existing(&out_trunc), Existing(&out_native))
+    let out_native = gate.is_equal(ctx, a.native, b.native);
+    gate.and(ctx, out_trunc, out_native)
 }
diff --git a/halo2-ecc/src/bigint/big_is_zero.rs b/halo2-ecc/src/bigint/big_is_zero.rs
index 4ab84fa3..5014d194 100644
--- a/halo2-ecc/src/bigint/big_is_zero.rs
+++ b/halo2-ecc/src/bigint/big_is_zero.rs
@@ -1,46 +1,47 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
+use num_bigint::BigInt;
+use num_traits::Zero;
 
 /// assume you know that the limbs of `a` are all in [0, 2^{a.max_limb_bits})
-pub fn positive<'v, F: PrimeField>(
+pub fn positive<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'v, F>,
-    a: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_ne!(k, 0);
+    debug_assert_ne!(k, 0);
     debug_assert!(a.max_limb_bits as u32 + k.ilog2() < F::CAPACITY);
 
-    let sum = gate.sum(ctx, a.limbs.iter().map(Existing));
-    gate.is_zero(ctx, &sum)
+    let sum = gate.sum(ctx, a.limbs.iter().copied());
+    gate.is_zero(ctx, sum)
 }
 
 // given OverflowInteger<F> `a`, returns whether `a == 0`
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+) -> AssignedValue<F> {
     let k = a.limbs.len();
-    assert_ne!(k, 0);
+    debug_assert_ne!(k, 0);
 
     let mut a_limbs = a.limbs.iter();
-    let mut partial = gate.is_zero(ctx, a_limbs.next().unwrap());
-    for a_limb in a_limbs {
+    let mut partial = gate.is_zero(ctx, *a_limbs.next().unwrap());
+    for &a_limb in a_limbs {
         let limb_is_zero = gate.is_zero(ctx, a_limb);
-        partial = gate.and(ctx, Existing(&limb_is_zero), Existing(&partial));
+        partial = gate.and(ctx, limb_is_zero, partial);
     }
     partial
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-) -> AssignedValue<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+) -> AssignedValue<F> {
+    debug_assert_eq!(a.value, BigInt::zero());
     let out_trunc = assign::<F>(gate, ctx, &a.truncation);
-    let out_native = gate.is_zero(ctx, &a.native);
-    gate.and(ctx, Existing(&out_trunc), Existing(&out_native))
+    let out_native = gate.is_zero(ctx, a.native);
+    gate.and(ctx, out_trunc, out_native)
 }
diff --git a/halo2-ecc/src/bigint/big_less_than.rs b/halo2-ecc/src/bigint/big_less_than.rs
index 52528870..276de18c 100644
--- a/halo2-ecc/src/bigint/big_less_than.rs
+++ b/halo2-ecc/src/bigint/big_less_than.rs
@@ -1,16 +1,16 @@
 use super::OverflowInteger;
-use halo2_base::{gates::RangeInstructions, utils::PrimeField, AssignedValue, Context};
+use halo2_base::{gates::RangeInstructions, utils::ScalarField, AssignedValue, Context};
 
 // given OverflowInteger<F>'s `a` and `b` of the same shape,
 // returns whether `a < b`
-pub fn assign<'a, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
-    b: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> AssignedValue<'a, F> {
+) -> AssignedValue<F> {
     // a < b iff a - b has underflow
     let (_, underflow) = super::sub::assign::<F>(range, ctx, a, b, limb_bits, limb_base);
     underflow
diff --git a/halo2-ecc/src/bigint/carry_mod.rs b/halo2-ecc/src/bigint/carry_mod.rs
index 111f31d5..4b266cf3 100644
--- a/halo2-ecc/src/bigint/carry_mod.rs
+++ b/halo2-ecc/src/bigint/carry_mod.rs
@@ -1,12 +1,11 @@
 use super::{check_carry_to_zero, CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{range::RangeStrategy, GateInstructions, RangeInstructions},
-    utils::{biguint_to_fe, decompose_bigint_option, value_to_option, PrimeField},
+    utils::{decompose_bigint, BigPrimeField},
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
+use num_bigint::BigInt;
 use num_integer::Integer;
 use num_traits::{One, Signed};
 use std::{cmp::max, iter};
@@ -20,11 +19,14 @@ use std::{cmp::max, iter};
 // We constrain `a = out + modulus * quotient` and range check `out` and `quotient`
 //
 // Assumption: the leading two bits (in big endian) are 1, and `abs(a) <= 2^{n * k - 1 + F::NUM_BITS - 2}` (A weaker assumption is also enough, but this is good enough for forseeable use cases)
-pub fn crt<'a, F: PrimeField>(
+
+// This is currently optimized for limbs greater than 64 bits, so we need `F` to be a `BigPrimeField`
+// In the future we'll need a slightly different implementation for limbs that fit in 32 or 64 bits (e.g., `F` is Goldilocks)
+pub fn crt<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
     // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     k_bits: usize, // = a.len().bits()
     modulus: &BigInt,
     mod_vec: &[F],
@@ -32,22 +34,12 @@ pub fn crt<'a, F: PrimeField>(
     limb_bits: usize,
     limb_bases: &[F],
     limb_base_big: &BigInt,
-) -> CRTInteger<'a, F> {
+) -> CRTInteger<F> {
     let n = limb_bits;
     let k = a.truncation.limbs.len();
     let trunc_len = n * k;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("carry_mod(crt) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        // safety check:
-        a.value
-            .as_ref()
-            .map(|a| assert!(a.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2));
-    }
+    debug_assert!(a.value.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2);
 
     // in order for CRT method to work, we need `abs(out + modulus * quotient - a) < 2^{trunc_len - 1} * native_modulus::<F>`
     // this is ensured if `0 <= out < 2^{n*k}` and
@@ -55,7 +47,7 @@ pub fn crt<'a, F: PrimeField>(
     // which is ensured if
     // `abs(modulus * quotient) < 2^{trunc_len - 1 + F::NUM_BITS - 1} <= 2^{trunc_len - 1} * native_modulus::<F> - abs(a)` given our assumption `abs(a) <= 2^{n * k - 1 + F::NUM_BITS - 2}`
     let quot_max_bits = trunc_len - 1 + (F::NUM_BITS as usize) - 1 - (modulus.bits() as usize);
-    assert!(quot_max_bits < trunc_len);
+    debug_assert!(quot_max_bits < trunc_len);
     // Let n' <= quot_max_bits - n(k-1) - 1
     // If quot[i] <= 2^n for i < k - 1 and quot[k-1] <= 2^{n'} then
     // quot < 2^{n(k-1)+1} + 2^{n' + n(k-1)} = (2+2^{n'}) 2^{n(k-1)} < 2^{n'+1} * 2^{n(k-1)} <= 2^{quot_max_bits - n(k-1)} * 2^{n(k-1)}
@@ -69,26 +61,17 @@ pub fn crt<'a, F: PrimeField>(
     // we need to find `out_vec` as a proper BigInt with k limbs
     // we need to find `quot_vec` as a proper BigInt with k limbs
 
-    // we need to constrain that `sum_i out_vec[i] * 2^{n*i} = out_native` in `F`
-    // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
-    let (out_val, out_vec, quot_vec) = if let Some(a_big) = value_to_option(a.value.as_ref()) {
-        let (quot_val, out_val) = a_big.div_mod_floor(modulus);
+    let (quot_val, out_val) = a.value.div_mod_floor(modulus);
 
-        debug_assert!(out_val < (BigInt::one() << (n * k)));
-        debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
+    debug_assert!(out_val < (BigInt::one() << (n * k)));
+    debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
 
-        (
-            Value::known(out_val.clone()),
-            // decompose_bigint_option just throws away signed limbs in index >= k
-            decompose_bigint_option::<F>(Value::known(&out_val), k, n),
-            decompose_bigint_option::<F>(Value::known(&quot_val), k, n),
-        )
-    } else {
-        (Value::unknown(), vec![Value::unknown(); k], vec![Value::unknown(); k])
-    };
+    // decompose_bigint just throws away signed limbs in index >= k
+    let out_vec = decompose_bigint::<F>(&out_val, k, n);
+    let quot_vec = decompose_bigint::<F>(&quot_val, k, n);
 
-    // let out_native = out_val.as_ref().map(|a| bigint_to_fe::<F>(a));
-    // let quot_native = quot_val.map(|a| bigint_to_fe::<F>(&a));
+    // we need to constrain that `sum_i out_vec[i] * 2^{n*i} = out_native` in `F`
+    // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
 
     // assert!(modulus < &(BigUint::one() << (n * k)));
     assert_eq!(mod_vec.len(), k);
@@ -107,76 +90,46 @@ pub fn crt<'a, F: PrimeField>(
     let mut quot_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut out_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut check_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
-    let mut tmp_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
 
-    // match chip.strategy {
     // strategies where we carry out school-book multiplication in some form:
     //    BigIntStrategy::Simple => {
     for (i, (a_limb, (quot_v, out_v))) in
         a.truncation.limbs.iter().zip(quot_vec.into_iter().zip(out_vec.into_iter())).enumerate()
     {
-        let (quot_cell, out_cell, check_cell) = {
-            let prod = range.gate().inner_product_left(
-                ctx,
-                quot_assigned.iter().map(|a| Existing(a)).chain(iter::once(Witness(quot_v))),
-                mod_vec[..=i].iter().rev().map(|c| Constant(*c)),
-                &mut tmp_assigned,
-            );
-            // let gate_index = prod.column();
-
-            let quot_cell = tmp_assigned.pop().unwrap();
-            let out_cell;
-            let check_cell;
-            // perform step 2: compute prod - a + out
-            let temp1 = prod.value().zip(a_limb.value()).map(|(prod, a)| *prod - a);
-            let check_val = temp1 + out_v;
-
-            // This is to take care of edge case where we switch columns to handle overlap
-            let alloc = ctx.advice_alloc.get_mut(range.gate().context_id()).unwrap();
-            if alloc.1 + 6 >= ctx.max_rows {
-                // edge case, we need to copy the last `prod` cell
-                // dbg!(*alloc);
-                alloc.1 = 0;
-                alloc.0 += 1;
-                range.gate().assign_region_last(ctx, [Existing(&prod)], []);
-            }
-            match range.strategy() {
-                RangeStrategy::Vertical => {
-                    // transpose of:
-                    // | prod | -1 | a | prod - a | 1 | out | prod - a + out
-                    // where prod is at relative row `offset`
-                    let mut assignments = range.gate().assign_region(
-                        ctx,
-                        [
-                            Constant(-F::one()),
-                            Existing(a_limb),
-                            Witness(temp1),
-                            Constant(F::one()),
-                            Witness(out_v),
-                            Witness(check_val),
-                        ],
-                        [(-1, None), (2, None)],
-                    );
-                    check_cell = assignments.pop().unwrap();
-                    out_cell = assignments.pop().unwrap();
-                }
-                RangeStrategy::PlonkPlus => {
-                    // | prod | a | out | prod - a + out |
-                    // selector columns:
-                    // | 1    | 0 | 0   |
-                    // | 0    | -1| 1   |
-                    let mut assignments = range.gate().assign_region(
-                        ctx,
-                        [Existing(a_limb), Witness(out_v), Witness(check_val)],
-                        [(-1, Some([F::zero(), -F::one(), F::one()]))],
-                    );
-                    check_cell = assignments.pop().unwrap();
-                    out_cell = assignments.pop().unwrap();
-                }
+        let (prod, new_quot_cell) = range.gate().inner_product_left_last(
+            ctx,
+            quot_assigned.iter().map(|a| Existing(*a)).chain(iter::once(Witness(quot_v))),
+            mod_vec[..=i].iter().rev().map(|c| Constant(*c)),
+        );
+        // let gate_index = prod.column();
+
+        let out_cell;
+        let check_cell;
+        // perform step 2: compute prod - a + out
+        let temp1 = *prod.value() - a_limb.value();
+        let check_val = temp1 + out_v;
+
+        match range.strategy() {
+            RangeStrategy::Vertical => {
+                // transpose of:
+                // | prod | -1 | a | prod - a | 1 | out | prod - a + out
+                // where prod is at relative row `offset`
+                ctx.assign_region(
+                    [
+                        Constant(-F::one()),
+                        Existing(*a_limb),
+                        Witness(temp1),
+                        Constant(F::one()),
+                        Witness(out_v),
+                        Witness(check_val),
+                    ],
+                    [-1, 2], // note the NEGATIVE index! this is using gate overlapping with the previous inner product call
+                );
+                check_cell = ctx.last().unwrap();
+                out_cell = ctx.get(-2);
             }
-            (quot_cell, out_cell, check_cell)
-        };
-        quot_assigned.push(quot_cell);
+        }
+        quot_assigned.push(new_quot_cell);
         out_assigned.push(out_cell);
         check_assigned.push(check_cell);
     }
@@ -186,32 +139,21 @@ pub fn crt<'a, F: PrimeField>(
     // range check limbs of `out` are in [0, 2^n) except last limb should be in [0, 2^out_last_limb_bits)
     for (out_index, out_cell) in out_assigned.iter().enumerate() {
         let limb_bits = if out_index == k - 1 { out_last_limb_bits } else { n };
-        range.range_check(ctx, out_cell, limb_bits);
+        range.range_check(ctx, *out_cell, limb_bits);
     }
 
     // range check that quot_cell in quot_assigned is in [-2^n, 2^n) except for last cell check it's in [-2^quot_last_limb_bits, 2^quot_last_limb_bits)
     for (q_index, quot_cell) in quot_assigned.iter().enumerate() {
         let limb_bits = if q_index == k - 1 { quot_last_limb_bits } else { n };
-        let limb_base = if q_index == k - 1 {
-            biguint_to_fe(&(BigUint::one() << limb_bits))
-        } else {
-            limb_bases[1]
-        };
+        let limb_base =
+            if q_index == k - 1 { range.gate().pow_of_two()[limb_bits] } else { limb_bases[1] };
 
         // compute quot_cell + 2^n and range check with n + 1 bits
-        let quot_shift = {
-            let out_val = quot_cell.value().map(|a| limb_base + a);
-            // | quot_cell | 2^n | 1 | quot_cell + 2^n |
-            range.gate().assign_region_last(
-                ctx,
-                [Existing(quot_cell), Constant(limb_base), Constant(F::one()), Witness(out_val)],
-                [(0, None)],
-            )
-        };
-        range.range_check(ctx, &quot_shift, limb_bits + 1);
+        let quot_shift = range.gate().add(ctx, *quot_cell, Constant(limb_base));
+        range.range_check(ctx, quot_shift, limb_bits + 1);
     }
 
-    let check_overflow_int = &OverflowInteger::construct(
+    let check_overflow_int = OverflowInteger::construct(
         check_assigned,
         max(max(limb_bits, a.truncation.max_limb_bits) + 1, 2 * n + k_bits),
     );
@@ -226,40 +168,30 @@ pub fn crt<'a, F: PrimeField>(
         limb_base_big,
     );
 
-    // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-    let out_native_assigned = OverflowInteger::<F>::evaluate(
+    // Constrain `quot_native = sum_i quot_assigned[i] * 2^{n*i}` in `F`
+    let quot_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &out_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        quot_assigned,
+        limb_bases.iter().copied(),
     );
 
-    // Constrain `quot_native = sum_i quot_assigned[i] * 2^{n*i}` in `F`
-    let quot_native_assigned = OverflowInteger::<F>::evaluate(
+    // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
+    let out_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &quot_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        out_assigned.iter().copied(),
+        limb_bases.iter().copied(),
     );
 
-    // TODO: we can save 1 cell by connecting `out_native_assigned` computation with the following:
+    // We save 1 cell by connecting `out_native` computation with the following:
 
     // Check `out + modulus * quotient - a = 0` in native field
     // | out | modulus | quotient | a |
-    let _native_computation = range.gate().assign_region_last(
-        ctx,
-        [
-            Existing(&out_native_assigned),
-            Constant(mod_native),
-            Existing(&quot_native_assigned),
-            Existing(&a.native),
-        ],
-        [(0, None)],
+    ctx.assign_region(
+        [Constant(mod_native), Existing(quot_native), Existing(a.native)],
+        [-1], // negative index because -1 relative offset is `out_native` assigned value
     );
 
-    CRTInteger::construct(
-        OverflowInteger::construct(out_assigned, limb_bits),
-        out_native_assigned,
-        out_val,
-    )
+    CRTInteger::construct(OverflowInteger::construct(out_assigned, limb_bits), out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs b/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
index 38453da0..db6f9084 100644
--- a/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
+++ b/halo2-ecc/src/bigint/check_carry_mod_to_zero.rs
@@ -1,12 +1,11 @@
 use super::{check_carry_to_zero, CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{biguint_to_fe, decompose_bigint_option, value_to_option, PrimeField},
+    utils::{decompose_bigint, BigPrimeField},
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
+use num_bigint::BigInt;
 use num_integer::Integer;
 use num_traits::{One, Signed, Zero};
 use std::{cmp::max, iter};
@@ -14,11 +13,10 @@ use std::{cmp::max, iter};
 // same as carry_mod::crt but `out = 0` so no need to range check
 //
 // Assumption: the leading two bits (in big endian) are 1, and `a.max_size <= 2^{n * k - 1 + F::NUM_BITS - 2}` (A weaker assumption is also enough)
-pub fn crt<'a, F: PrimeField>(
+pub fn crt<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
-    // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     k_bits: usize, // = a.len().bits()
     modulus: &BigInt,
     mod_vec: &[F],
@@ -31,17 +29,7 @@ pub fn crt<'a, F: PrimeField>(
     let k = a.truncation.limbs.len();
     let trunc_len = n * k;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("check_carry_mod(crt) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        // safety check:
-        a.value
-            .as_ref()
-            .map(|a| assert!(a.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2));
-    }
+    debug_assert!(a.value.bits() as usize <= n * k - 1 + (F::NUM_BITS as usize) - 2);
 
     // see carry_mod.rs for explanation
     let quot_max_bits = trunc_len - 1 + (F::NUM_BITS as usize) - 1 - (modulus.bits() as usize);
@@ -53,19 +41,15 @@ pub fn crt<'a, F: PrimeField>(
     // we need to find `quot_native` as a native F element
 
     // we need to constrain that `sum_i quot_vec[i] * 2^{n*i} = quot_native` in `F`
-    let quot_vec = if let Some(a_big) = value_to_option(a.value.as_ref()) {
-        let (quot_val, _out_val) = a_big.div_mod_floor(modulus);
+    let (quot_val, _out_val) = a.value.div_mod_floor(modulus);
 
-        // only perform safety checks in display mode so we can turn them off in production
-        debug_assert_eq!(_out_val, BigInt::zero());
-        debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
+    // only perform safety checks in display mode so we can turn them off in production
+    debug_assert_eq!(_out_val, BigInt::zero());
+    debug_assert!(quot_val.abs() < (BigInt::one() << quot_max_bits));
 
-        decompose_bigint_option::<F>(Value::known(&quot_val), k, n)
-    } else {
-        vec![Value::unknown(); k]
-    };
+    let quot_vec = decompose_bigint::<F>(&quot_val, k, n);
 
-    //assert!(modulus < &(BigUint::one() << (n * k)));
+    debug_assert!(modulus < &(BigInt::one() << (n * k)));
 
     // We need to show `modulus * quotient - a` is:
     // - congruent to `0 (mod 2^trunc_len)`
@@ -81,43 +65,24 @@ pub fn crt<'a, F: PrimeField>(
 
     let mut quot_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
     let mut check_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
-    let mut tmp_assigned: Vec<AssignedValue<F>> = Vec::with_capacity(k);
 
     // match chip.strategy {
     //    BigIntStrategy::Simple => {
     for (i, (a_limb, quot_v)) in a.truncation.limbs.iter().zip(quot_vec.into_iter()).enumerate() {
-        let (quot_cell, check_cell) = {
-            let prod = range.gate().inner_product_left(
-                ctx,
-                quot_assigned.iter().map(Existing).chain(iter::once(Witness(quot_v))),
-                mod_vec[0..=i].iter().rev().map(|c| Constant(*c)),
-                &mut tmp_assigned,
-            );
-
-            let quot_cell = tmp_assigned.pop().unwrap();
-            // perform step 2: compute prod - a + out
-            // transpose of:
-            // | prod | -1 | a | prod - a |
-
-            // This is to take care of edge case where we switch columns to handle overlap
-            let alloc = ctx.advice_alloc.get_mut(range.gate().context_id()).unwrap();
-            if alloc.1 + 3 >= ctx.max_rows {
-                // edge case, we need to copy the last `prod` cell
-                alloc.1 = 0;
-                alloc.0 += 1;
-                range.gate().assign_region_last(ctx, vec![Existing(&prod)], vec![]);
-            }
-
-            let check_val = prod.value().zip(a_limb.value()).map(|(prod, a)| *prod - a);
-            let check_cell = range.gate().assign_region_last(
-                ctx,
-                vec![Constant(-F::one()), Existing(a_limb), Witness(check_val)],
-                vec![(-1, None)],
-            );
-
-            (quot_cell, check_cell)
-        };
-        quot_assigned.push(quot_cell);
+        let (prod, new_quot_cell) = range.gate().inner_product_left_last(
+            ctx,
+            quot_assigned.iter().map(|x| Existing(*x)).chain(iter::once(Witness(quot_v))),
+            mod_vec[0..=i].iter().rev().map(|c| Constant(*c)),
+        );
+
+        // perform step 2: compute prod - a + out
+        // transpose of:
+        // | prod | -1 | a | prod - a |
+        let check_val = *prod.value() - a_limb.value();
+        let check_cell = ctx
+            .assign_region_last([Constant(-F::one()), Existing(*a_limb), Witness(check_val)], [-1]);
+
+        quot_assigned.push(new_quot_cell);
         check_assigned.push(check_cell);
     }
     //    }
@@ -126,35 +91,16 @@ pub fn crt<'a, F: PrimeField>(
     // range check that quot_cell in quot_assigned is in [-2^n, 2^n) except for last cell check it's in [-2^quot_last_limb_bits, 2^quot_last_limb_bits)
     for (q_index, quot_cell) in quot_assigned.iter().enumerate() {
         let limb_bits = if q_index == k - 1 { quot_last_limb_bits } else { n };
-        let limb_base = if q_index == k - 1 {
-            biguint_to_fe(&(BigUint::one() << limb_bits))
-        } else {
-            limb_bases[1]
-        };
+        let limb_base =
+            if q_index == k - 1 { range.gate().pow_of_two()[limb_bits] } else { limb_bases[1] };
 
         // compute quot_cell + 2^n and range check with n + 1 bits
-        let quot_shift = {
-            // TODO: unnecessary clone
-            let out_val = quot_cell.value().map(|a| limb_base + a);
-            // | quot_cell | 2^n | 1 | quot_cell + 2^n |
-            range.gate().assign_region_last(
-                ctx,
-                vec![
-                    Existing(quot_cell),
-                    Constant(limb_base),
-                    Constant(F::one()),
-                    Witness(out_val),
-                ],
-                vec![(0, None)],
-            )
-        };
-        range.range_check(ctx, &quot_shift, limb_bits + 1);
+        let quot_shift = range.gate().add(ctx, *quot_cell, Constant(limb_base));
+        range.range_check(ctx, quot_shift, limb_bits + 1);
     }
 
-    let check_overflow_int = &OverflowInteger::construct(
-        check_assigned,
-        max(a.truncation.max_limb_bits, 2 * n + k_bits),
-    );
+    let check_overflow_int =
+        OverflowInteger::construct(check_assigned, max(a.truncation.max_limb_bits, 2 * n + k_bits));
 
     // check that `modulus * quotient - a == 0 mod 2^{trunc_len}` after carry
     check_carry_to_zero::truncate::<F>(
@@ -167,23 +113,17 @@ pub fn crt<'a, F: PrimeField>(
     );
 
     // Constrain `quot_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-    let quot_native_assigned = OverflowInteger::<F>::evaluate(
+    let quot_native = OverflowInteger::<F>::evaluate(
         range.gate(),
-        /*chip,*/ ctx,
-        &quot_assigned,
-        limb_bases.iter().cloned(),
+        ctx,
+        quot_assigned,
+        limb_bases.iter().copied(),
     );
 
     // Check `0 + modulus * quotient - a = 0` in native field
     // | 0 | modulus | quotient | a |
-    let _native_computation = range.gate().assign_region(
-        ctx,
-        vec![
-            Constant(F::zero()),
-            Constant(mod_native),
-            Existing(&quot_native_assigned),
-            Existing(&a.native),
-        ],
-        vec![(0, None)],
+    ctx.assign_region(
+        [Constant(F::zero()), Constant(mod_native), Existing(quot_native), Existing(a.native)],
+        [0],
     );
 }
diff --git a/halo2-ecc/src/bigint/check_carry_to_zero.rs b/halo2-ecc/src/bigint/check_carry_to_zero.rs
index e718b128..fa2f5648 100644
--- a/halo2-ecc/src/bigint/check_carry_to_zero.rs
+++ b/halo2-ecc/src/bigint/check_carry_to_zero.rs
@@ -1,13 +1,11 @@
 use super::OverflowInteger;
-use crate::halo2_proofs::circuit::Value;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{bigint_to_fe, biguint_to_fe, fe_to_bigint, value_to_option, PrimeField},
+    utils::{bigint_to_fe, fe_to_bigint, BigPrimeField},
     Context,
     QuantumCell::{Constant, Existing, Witness},
 };
-use num_bigint::{BigInt, BigUint};
-use num_traits::One;
+use num_bigint::BigInt;
 
 // check that `a` carries to `0 mod 2^{a.limb_bits * a.limbs.len()}`
 // same as `assign` above except we need to provide `c_{k - 1}` witness as well
@@ -26,10 +24,10 @@ use num_traits::One;
 // a_i * 2^{n*w} + a_{i - 1} * 2^{n*(w-1)} + ... + a_{i - w} + c_{i - w - 1} = c_i * 2^{n*(w+1)}
 // which is valid as long as `(m - n + EPSILON) + n * (w+1) < native_modulus::<F>().bits() - 1`
 // so we only need to range check `c_i` every `w + 1` steps, starting with `i = w`
-pub fn truncate<'a, F: PrimeField>(
+pub fn truncate<F: BigPrimeField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
     limb_base_big: &BigInt,
@@ -37,27 +35,16 @@ pub fn truncate<'a, F: PrimeField>(
     let k = a.limbs.len();
     let max_limb_bits = a.max_limb_bits;
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("check_carry_to_zero(trunc) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-    }
-
-    let mut carries: Vec<Value<BigInt>> = Vec::with_capacity(k);
+    let mut carries = Vec::with_capacity(k);
 
     for a_limb in a.limbs.iter() {
-        let a_val = a_limb.value();
-        let carry = a_val.map(|a_fe| {
-            let a_val_big = fe_to_bigint(a_fe);
-            if carries.is_empty() {
-                // warning: using >> on negative integer produces undesired effect
-                a_val_big / limb_base_big
-            } else {
-                let carry_val = value_to_option(carries.last().unwrap().as_ref()).unwrap();
-                (a_val_big + carry_val) / limb_base_big
-            }
-        });
+        let a_val_big = fe_to_bigint(a_limb.value());
+        let carry = if let Some(carry_val) = carries.last() {
+            (a_val_big + carry_val) / limb_base_big
+        } else {
+            // warning: using >> on negative integer produces undesired effect
+            a_val_big / limb_base_big
+        };
         carries.push(carry);
     }
 
@@ -69,44 +56,30 @@ pub fn truncate<'a, F: PrimeField>(
     // `window = w + 1` valid as long as `range_bits + n * (w+1) < native_modulus::<F>().bits() - 1`
     // let window = (F::NUM_BITS as usize - 2 - range_bits) / limb_bits;
     // assert!(window > 0);
+    // In practice, we are currently always using window = 1 so the above is commented out
 
-    // TODO: maybe we can also cache these bigints
-    let shift_val = biguint_to_fe::<F>(&(BigUint::one() << range_bits));
+    let shift_val = range.gate().pow_of_two()[range_bits];
     // let num_windows = (k - 1) / window + 1; // = ((k - 1) - (window - 1) + window - 1) / window + 1;
 
     let mut previous = None;
-    for (a_limb, carry) in a.limbs.iter().zip(carries.iter()) {
-        let neg_carry_val = carry.as_ref().map(|c| bigint_to_fe::<F>(&-c));
-        let neg_carry = range
-            .gate()
-            .assign_region(
-                ctx,
-                vec![
-                    Existing(a_limb),
-                    Witness(neg_carry_val),
-                    Constant(limb_base),
-                    previous.as_ref().map(Existing).unwrap_or_else(|| Constant(F::zero())),
-                ],
-                vec![(0, None)],
-            )
-            .into_iter()
-            .nth(1)
-            .unwrap();
+    for (a_limb, carry) in a.limbs.into_iter().zip(carries.into_iter()) {
+        let neg_carry_val = bigint_to_fe(&-carry);
+        ctx.assign_region(
+            [
+                Existing(a_limb),
+                Witness(neg_carry_val),
+                Constant(limb_base),
+                previous.map(Existing).unwrap_or_else(|| Constant(F::zero())),
+            ],
+            [0],
+        );
+        let neg_carry = ctx.get(-3);
 
         // i in 0..num_windows {
         // let idx = std::cmp::min(window * i + window - 1, k - 1);
         // let carry_cell = &neg_carry_assignments[idx];
-        let shifted_carry = {
-            let shift_carry_val = Value::known(shift_val) + neg_carry.value();
-            let cells = vec![
-                Existing(&neg_carry),
-                Constant(F::one()),
-                Constant(shift_val),
-                Witness(shift_carry_val),
-            ];
-            range.gate().assign_region_last(ctx, cells, vec![(0, None)])
-        };
-        range.range_check(ctx, &shifted_carry, range_bits + 1);
+        let shifted_carry = range.gate().add(ctx, neg_carry, Constant(shift_val));
+        range.range_check(ctx, shifted_carry, range_bits + 1);
 
         previous = Some(neg_carry);
     }
diff --git a/halo2-ecc/src/bigint/mod.rs b/halo2-ecc/src/bigint/mod.rs
index 41e080d5..a8c93bd2 100644
--- a/halo2-ecc/src/bigint/mod.rs
+++ b/halo2-ecc/src/bigint/mod.rs
@@ -1,17 +1,12 @@
-use crate::halo2_proofs::{
-    circuit::{Cell, Value},
-    plonk::ConstraintSystem,
-};
+use crate::halo2_proofs::circuit::Cell;
 use halo2_base::{
-    gates::flex_gate::{FlexGateConfig, GateInstructions},
-    utils::{biguint_to_fe, decompose_biguint, fe_to_biguint, PrimeField},
+    gates::flex_gate::GateInstructions,
+    utils::{biguint_to_fe, decompose_biguint, fe_to_biguint, BigPrimeField, ScalarField},
     AssignedValue, Context,
-    QuantumCell::{Constant, Existing, Witness},
+    QuantumCell::Constant,
 };
-use itertools::Itertools;
 use num_bigint::{BigInt, BigUint};
 use num_traits::Zero;
-use std::{marker::PhantomData, rc::Rc};
 
 pub mod add_no_carry;
 pub mod big_is_equal;
@@ -45,51 +40,50 @@ impl Default for BigIntStrategy {
 }
 
 #[derive(Clone, Debug)]
-pub struct OverflowInteger<'v, F: PrimeField> {
-    pub limbs: Vec<AssignedValue<'v, F>>,
+pub struct OverflowInteger<F: ScalarField> {
+    pub limbs: Vec<AssignedValue<F>>,
     // max bits of a limb, ignoring sign
     pub max_limb_bits: usize,
     // the standard limb bit that we use for pow of two limb base - to reduce overhead we just assume this is inferred from context (e.g., the chip stores it), so we stop storing it here
     // pub limb_bits: usize,
 }
 
-impl<'v, F: PrimeField> OverflowInteger<'v, F> {
-    pub fn construct(limbs: Vec<AssignedValue<'v, F>>, max_limb_bits: usize) -> Self {
+impl<F: ScalarField> OverflowInteger<F> {
+    pub fn construct(limbs: Vec<AssignedValue<F>>, max_limb_bits: usize) -> Self {
         Self { limbs, max_limb_bits }
     }
 
     // convenience function for testing
     #[cfg(test)]
-    pub fn to_bigint(&self, limb_bits: usize) -> Value<BigInt> {
+    pub fn to_bigint(&self, limb_bits: usize) -> BigInt
+    where
+        F: BigPrimeField,
+    {
         use halo2_base::utils::fe_to_bigint;
 
-        self.limbs.iter().rev().fold(Value::known(BigInt::zero()), |acc, acell| {
-            acc.zip(acell.value()).map(|(acc, x)| (acc << limb_bits) + fe_to_bigint(x))
-        })
+        self.limbs
+            .iter()
+            .rev()
+            .fold(BigInt::zero(), |acc, acell| (acc << limb_bits) + fe_to_bigint(acell.value()))
     }
 
     pub fn evaluate(
         gate: &impl GateInstructions<F>,
-        // chip: &BigIntConfig<F>,
-        ctx: &mut Context<'_, F>,
-        limbs: &[AssignedValue<'v, F>],
+        ctx: &mut Context<F>,
+        limbs: impl IntoIterator<Item = AssignedValue<F>>,
         limb_bases: impl IntoIterator<Item = F>,
-    ) -> AssignedValue<'v, F> {
+    ) -> AssignedValue<F> {
         // Constrain `out_native = sum_i out_assigned[i] * 2^{n*i}` in `F`
-        gate.inner_product(
-            ctx,
-            limbs.iter().map(|a| Existing(a)),
-            limb_bases.into_iter().map(|c| Constant(c)),
-        )
+        gate.inner_product(ctx, limbs, limb_bases.into_iter().map(|c| Constant(c)))
     }
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedOverflowInteger<F: PrimeField> {
+pub struct FixedOverflowInteger<F: ScalarField> {
     pub limbs: Vec<F>,
 }
 
-impl<F: PrimeField> FixedOverflowInteger<F> {
+impl<F: BigPrimeField> FixedOverflowInteger<F> {
     pub fn construct(limbs: Vec<F>) -> Self {
         Self { limbs }
     }
@@ -109,30 +103,25 @@ impl<F: PrimeField> FixedOverflowInteger<F> {
             .fold(BigUint::zero(), |acc, x| (acc << limb_bits) + fe_to_biguint(x))
     }
 
-    pub fn assign<'v>(
-        self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
-        limb_bits: usize,
-    ) -> OverflowInteger<'v, F> {
-        let assigned_limbs = gate.assign_region(ctx, self.limbs.into_iter().map(Constant), vec![]);
+    pub fn assign(self, ctx: &mut Context<F>, limb_bits: usize) -> OverflowInteger<F> {
+        let assigned_limbs = self.limbs.into_iter().map(|limb| ctx.load_constant(limb)).collect();
         OverflowInteger::construct(assigned_limbs, limb_bits)
     }
 
     /// only use case is when coeffs has only a single 1, rest are 0
-    pub fn select_by_indicator<'v>(
+    pub fn select_by_indicator(
         gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
+        ctx: &mut Context<F>,
         a: &[Self],
-        coeffs: &[AssignedValue<'v, F>],
+        coeffs: &[AssignedValue<F>],
         limb_bits: usize,
-    ) -> OverflowInteger<'v, F> {
+    ) -> OverflowInteger<F> {
         let k = a[0].limbs.len();
 
         let out_limbs = (0..k)
             .map(|idx| {
                 let int_limbs = a.iter().map(|a| Constant(a.limbs[idx]));
-                gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+                gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
             })
             .collect();
 
@@ -141,7 +130,7 @@ impl<F: PrimeField> FixedOverflowInteger<F> {
 }
 
 #[derive(Clone, Debug)]
-pub struct CRTInteger<'v, F: PrimeField> {
+pub struct CRTInteger<F: ScalarField> {
     // keep track of an integer `a` using CRT as `a mod 2^t` and `a mod n`
     // where `t = truncation.limbs.len() * truncation.limb_bits`
     //       `n = modulus::<Fn>`
@@ -153,31 +142,31 @@ pub struct CRTInteger<'v, F: PrimeField> {
 
     // the IMPLICIT ASSUMPTION: `value (mod 2^t) = truncation` && `value (mod n) = native`
     // this struct should only be used if the implicit assumption above is satisfied
-    pub truncation: OverflowInteger<'v, F>,
-    pub native: AssignedValue<'v, F>,
-    pub value: Value<BigInt>,
+    pub truncation: OverflowInteger<F>,
+    pub native: AssignedValue<F>,
+    pub value: BigInt,
 }
 
-impl<'v, F: PrimeField> CRTInteger<'v, F> {
+impl<F: ScalarField> CRTInteger<F> {
     pub fn construct(
-        truncation: OverflowInteger<'v, F>,
-        native: AssignedValue<'v, F>,
-        value: Value<BigInt>,
+        truncation: OverflowInteger<F>,
+        native: AssignedValue<F>,
+        value: BigInt,
     ) -> Self {
         Self { truncation, native, value }
     }
 
-    pub fn native(&self) -> &AssignedValue<'v, F> {
+    pub fn native(&self) -> &AssignedValue<F> {
         &self.native
     }
 
-    pub fn limbs(&self) -> &[AssignedValue<'v, F>] {
+    pub fn limbs(&self) -> &[AssignedValue<F>] {
         self.truncation.limbs.as_slice()
     }
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedCRTInteger<F: PrimeField> {
+pub struct FixedCRTInteger<F: ScalarField> {
     // keep track of an integer `a` using CRT as `a mod 2^t` and `a mod n`
     // where `t = truncation.limbs.len() * truncation.limb_bits`
     //       `n = modulus::<Fn>`
@@ -194,13 +183,13 @@ pub struct FixedCRTInteger<F: PrimeField> {
 }
 
 #[derive(Clone, Debug)]
-pub struct FixedAssignedCRTInteger<F: PrimeField> {
+pub struct FixedAssignedCRTInteger<F: ScalarField> {
     pub truncation: FixedOverflowInteger<F>,
     pub limb_fixed_cells: Vec<Cell>,
     pub value: BigUint,
 }
 
-impl<F: PrimeField> FixedCRTInteger<F> {
+impl<F: BigPrimeField> FixedCRTInteger<F> {
     pub fn construct(truncation: FixedOverflowInteger<F>, value: BigUint) -> Self {
         Self { truncation, value }
     }
@@ -212,90 +201,14 @@ impl<F: PrimeField> FixedCRTInteger<F> {
         Self { truncation, value }
     }
 
-    pub fn assign<'a>(
+    pub fn assign(
         self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
+        ctx: &mut Context<F>,
         limb_bits: usize,
         native_modulus: &BigUint,
-    ) -> CRTInteger<'a, F> {
-        let assigned_truncation = self.truncation.assign(gate, ctx, limb_bits);
-        let assigned_native = {
-            let native_cells = vec![Constant(biguint_to_fe(&(&self.value % native_modulus)))];
-            gate.assign_region_last(ctx, native_cells, vec![])
-        };
-        CRTInteger::construct(assigned_truncation, assigned_native, Value::known(self.value.into()))
-    }
-
-    pub fn assign_without_caching<'a>(
-        self,
-        gate: &impl GateInstructions<F>,
-        ctx: &mut Context<'_, F>,
-        limb_bits: usize,
-        native_modulus: &BigUint,
-    ) -> CRTInteger<'a, F> {
-        let fixed_cells = self
-            .truncation
-            .limbs
-            .iter()
-            .map(|limb| ctx.assign_fixed_without_caching(*limb))
-            .collect_vec();
-        let assigned_limbs = gate.assign_region(
-            ctx,
-            self.truncation.limbs.into_iter().map(|v| Witness(Value::known(v))),
-            vec![],
-        );
-        for (cell, acell) in fixed_cells.iter().zip(assigned_limbs.iter()) {
-            #[cfg(feature = "halo2-axiom")]
-            ctx.region.constrain_equal(cell, acell.cell());
-            #[cfg(feature = "halo2-pse")]
-            ctx.region.constrain_equal(*cell, acell.cell()).unwrap();
-        }
-        let assigned_native = {
-            let native_val = biguint_to_fe(&(&self.value % native_modulus));
-            let cell = ctx.assign_fixed_without_caching(native_val);
-            let acell =
-                gate.assign_region_last(ctx, vec![Witness(Value::known(native_val))], vec![]);
-
-            #[cfg(feature = "halo2-axiom")]
-            ctx.region.constrain_equal(&cell, acell.cell());
-            #[cfg(feature = "halo2-pse")]
-            ctx.region.constrain_equal(cell, acell.cell()).unwrap();
-
-            acell
-        };
-        CRTInteger::construct(
-            OverflowInteger::construct(assigned_limbs, limb_bits),
-            assigned_native,
-            Value::known(self.value.into()),
-        )
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-#[allow(dead_code)]
-pub struct BigIntConfig<F: PrimeField> {
-    // everything is empty if strategy is `Simple` or `SimplePlus`
-    strategy: BigIntStrategy,
-    context_id: Rc<String>,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField> BigIntConfig<F> {
-    pub fn configure(
-        _meta: &mut ConstraintSystem<F>,
-        strategy: BigIntStrategy,
-        _limb_bits: usize,
-        _num_limbs: usize,
-        _gate: &FlexGateConfig<F>,
-        context_id: String,
-    ) -> Self {
-        // let mut q_dot_constant = HashMap::new();
-        /*
-        match strategy {
-            _ => {}
-        }
-        */
-        Self { strategy, _marker: PhantomData, context_id: Rc::new(context_id) }
+    ) -> CRTInteger<F> {
+        let assigned_truncation = self.truncation.assign(ctx, limb_bits);
+        let assigned_native = ctx.load_constant(biguint_to_fe(&(&self.value % native_modulus)));
+        CRTInteger::construct(assigned_truncation, assigned_native, self.value.into())
     }
 }
diff --git a/halo2-ecc/src/bigint/mul_no_carry.rs b/halo2-ecc/src/bigint/mul_no_carry.rs
index 637c17e6..b6d5e745 100644
--- a/halo2-ecc/src/bigint/mul_no_carry.rs
+++ b/halo2-ecc/src/bigint/mul_no_carry.rs
@@ -1,35 +1,27 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context, QuantumCell::Existing};
 
-pub fn truncate<'v, F: PrimeField>(
+pub fn truncate<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    // _chip: &BigIntConfig<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     num_limbs_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
+) -> OverflowInteger<F> {
     let k = a.limbs.len();
-    assert!(k > 0);
     assert_eq!(k, b.limbs.len());
+    debug_assert!(k > 0);
 
-    #[cfg(feature = "display")]
-    {
-        let key = format!("mul_no_carry(truncate) length {k}");
-        let count = ctx.op_count.entry(key).or_insert(0);
-        *count += 1;
-
-        assert!(
-            num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits <= F::NUM_BITS as usize - 2
-        );
-    }
+    debug_assert!(
+        num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits <= F::NUM_BITS as usize - 2
+    );
 
     let out_limbs = (0..k)
         .map(|i| {
             gate.inner_product(
                 ctx,
-                a.limbs[..=i].iter().map(Existing),
-                b.limbs[..=i].iter().rev().map(Existing),
+                a.limbs[..=i].iter().copied(),
+                b.limbs[..=i].iter().rev().map(|x| Existing(*x)),
             )
         })
         .collect();
@@ -37,17 +29,16 @@ pub fn truncate<'v, F: PrimeField>(
     OverflowInteger::construct(out_limbs, num_limbs_log2_ceil + a.max_limb_bits + b.max_limb_bits)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    // chip: &BigIntConfig<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     num_limbs_log2_ceil: usize,
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     let out_trunc = truncate::<F>(gate, ctx, &a.truncation, &b.truncation, num_limbs_log2_ceil);
-    let out_native = gate.mul(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref() * b.value.as_ref();
+    let out_native = gate.mul(ctx, a.native, b.native);
+    let out_val = &a.value * &b.value;
 
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/negative.rs b/halo2-ecc/src/bigint/negative.rs
index 60183c3f..45a7d817 100644
--- a/halo2-ecc/src/bigint/negative.rs
+++ b/halo2-ecc/src/bigint/negative.rs
@@ -1,11 +1,11 @@
 use super::OverflowInteger;
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    let out_limbs = a.limbs.iter().map(|limb| gate.neg(ctx, Existing(limb))).collect();
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    let out_limbs = a.limbs.into_iter().map(|limb| gate.neg(ctx, limb)).collect();
     OverflowInteger::construct(out_limbs, a.max_limb_bits)
 }
diff --git a/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs b/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
index 1c64e24f..579aff01 100644
--- a/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
+++ b/halo2-ecc/src/bigint/scalar_mul_and_add_no_carry.rs
@@ -1,49 +1,43 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::GateInstructions,
-    utils::{log2_ceil, PrimeField},
+    utils::{log2_ceil, ScalarField},
     Context,
-    QuantumCell::{Constant, Existing, Witness},
+    QuantumCell::Constant,
 };
 use std::cmp::max;
 
 /// compute a * c + b = b + a * c
 // this is uniquely suited for our simple gate
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     c_f: F,
     c_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
 
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| {
-            let out_val = a_limb.value().zip(b_limb.value()).map(|(a, b)| c_f * a + b);
-            gate.assign_region_last(
-                ctx,
-                vec![Existing(b_limb), Existing(a_limb), Constant(c_f), Witness(out_val)],
-                vec![(0, None)],
-            )
-        })
+        .map(|(&a_limb, &b_limb)| gate.mul_add(ctx, a_limb, Constant(c_f), b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits + c_log2_ceil, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+/// compute a * c + b = b + a * c
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     c: i64,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
 
     let (c_f, c_abs) = if c >= 0 {
         let c_abs = u64::try_from(c).unwrap();
@@ -54,14 +48,7 @@ pub fn crt<'v, F: PrimeField>(
     };
 
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation, c_f, log2_ceil(c_abs));
-    let out_native = {
-        let out_val = b.native.value().zip(a.native.value()).map(|(b, a)| c_f * a + b);
-        gate.assign_region_last(
-            ctx,
-            vec![Existing(&b.native), Existing(&a.native), Constant(c_f), Witness(out_val)],
-            vec![(0, None)],
-        )
-    };
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a * c + b);
+    let out_native = gate.mul_add(ctx, a.native, Constant(c_f), b.native);
+    let out_val = &a.value * c + &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/scalar_mul_no_carry.rs b/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
index 4aff4b0c..60029e92 100644
--- a/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
+++ b/halo2-ecc/src/bigint/scalar_mul_no_carry.rs
@@ -1,29 +1,28 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::GateInstructions,
-    utils::{log2_ceil, PrimeField},
+    utils::{log2_ceil, ScalarField},
     Context,
-    QuantumCell::{Constant, Existing},
+    QuantumCell::Constant,
 };
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
     c_f: F,
     c_log2_ceil: usize,
-) -> OverflowInteger<'v, F> {
-    let out_limbs =
-        a.limbs.iter().map(|limb| gate.mul(ctx, Existing(limb), Constant(c_f))).collect();
+) -> OverflowInteger<F> {
+    let out_limbs = a.limbs.into_iter().map(|limb| gate.mul(ctx, limb, Constant(c_f))).collect();
     OverflowInteger::construct(out_limbs, a.max_limb_bits + c_log2_ceil)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
     c: i64,
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     let (c_f, c_abs) = if c >= 0 {
         let c_abs = u64::try_from(c).unwrap();
         (F::from(c_abs), c_abs)
@@ -32,15 +31,11 @@ pub fn crt<'v, F: PrimeField>(
         (-F::from(c_abs), c_abs)
     };
 
-    let out_limbs = a
-        .truncation
-        .limbs
-        .iter()
-        .map(|limb| gate.mul(ctx, Existing(limb), Constant(c_f)))
-        .collect();
+    let out_limbs =
+        a.truncation.limbs.iter().map(|limb| gate.mul(ctx, *limb, Constant(c_f))).collect();
 
-    let out_native = gate.mul(ctx, Existing(&a.native), Constant(c_f));
-    let out_val = a.value.as_ref().map(|a| a * c);
+    let out_native = gate.mul(ctx, a.native, Constant(c_f));
+    let out_val = &a.value * c;
 
     CRTInteger::construct(
         OverflowInteger::construct(out_limbs, a.truncation.max_limb_bits + log2_ceil(c_abs)),
diff --git a/halo2-ecc/src/bigint/select.rs b/halo2-ecc/src/bigint/select.rs
index aa296164..1146eeb5 100644
--- a/halo2-ecc/src/bigint/select.rs
+++ b/halo2-ecc/src/bigint/select.rs
@@ -1,41 +1,39 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-    sel: &AssignedValue<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: OverflowInteger<F>,
+    b: OverflowInteger<F>,
+    sel: AssignedValue<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let out_limbs = a
         .limbs
-        .iter()
-        .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.select(ctx, Existing(a_limb), Existing(b_limb), Existing(sel)))
+        .into_iter()
+        .zip(b.limbs.into_iter())
+        .map(|(a_limb, b_limb)| gate.select(ctx, a_limb, b_limb, sel))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits))
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-    sel: &AssignedValue<'v, F>,
-) -> CRTInteger<'v, F> {
-    assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+    sel: AssignedValue<F>,
+) -> CRTInteger<F> {
+    debug_assert_eq!(a.truncation.limbs.len(), b.truncation.limbs.len());
     let out_limbs = a
         .truncation
         .limbs
         .iter()
         .zip(b.truncation.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.select(ctx, Existing(a_limb), Existing(b_limb), Existing(sel)))
+        .map(|(&a_limb, &b_limb)| gate.select(ctx, a_limb, b_limb, sel))
         .collect();
 
     let out_trunc = OverflowInteger::construct(
@@ -43,13 +41,7 @@ pub fn crt<'v, F: PrimeField>(
         max(a.truncation.max_limb_bits, b.truncation.max_limb_bits),
     );
 
-    let out_native = gate.select(ctx, Existing(&a.native), Existing(&b.native), Existing(sel));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).zip(sel.value()).map(|((a, b), s)| {
-        if s.is_zero_vartime() {
-            b.clone()
-        } else {
-            a.clone()
-        }
-    });
+    let out_native = gate.select(ctx, a.native, b.native, sel);
+    let out_val = if sel.value().is_zero_vartime() { b.value.clone() } else { a.value.clone() };
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bigint/select_by_indicator.rs b/halo2-ecc/src/bigint/select_by_indicator.rs
index 87597804..30aa5ab2 100644
--- a/halo2-ecc/src/bigint/select_by_indicator.rs
+++ b/halo2-ecc/src/bigint/select_by_indicator.rs
@@ -1,25 +1,22 @@
 use super::{CRTInteger, OverflowInteger};
-use crate::halo2_proofs::circuit::Value;
-use halo2_base::{
-    gates::GateInstructions, utils::PrimeField, AssignedValue, Context, QuantumCell::Existing,
-};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, AssignedValue, Context};
 use num_bigint::BigInt;
 use num_traits::Zero;
 use std::cmp::max;
 
 /// only use case is when coeffs has only a single 1, rest are 0
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &[OverflowInteger<'v, F>],
-    coeffs: &[AssignedValue<'v, F>],
-) -> OverflowInteger<'v, F> {
+    ctx: &mut Context<F>,
+    a: &[OverflowInteger<F>],
+    coeffs: &[AssignedValue<F>],
+) -> OverflowInteger<F> {
     let k = a[0].limbs.len();
 
     let out_limbs = (0..k)
         .map(|idx| {
-            let int_limbs = a.iter().map(|a| Existing(&a.limbs[idx]));
-            gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+            let int_limbs = a.iter().map(|a| a.limbs[idx]);
+            gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
         })
         .collect();
 
@@ -29,20 +26,20 @@ pub fn assign<'v, F: PrimeField>(
 }
 
 /// only use case is when coeffs has only a single 1, rest are 0
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &[CRTInteger<'v, F>],
-    coeffs: &[AssignedValue<'v, F>],
+    ctx: &mut Context<F>,
+    a: &[CRTInteger<F>],
+    coeffs: &[AssignedValue<F>],
     limb_bases: &[F],
-) -> CRTInteger<'v, F> {
+) -> CRTInteger<F> {
     assert_eq!(a.len(), coeffs.len());
     let k = a[0].truncation.limbs.len();
 
     let out_limbs = (0..k)
         .map(|idx| {
-            let int_limbs = a.iter().map(|a| Existing(&a.truncation.limbs[idx]));
-            gate.select_by_indicator(ctx, int_limbs, coeffs.iter())
+            let int_limbs = a.iter().map(|a| a.truncation.limbs[idx]);
+            gate.select_by_indicator(ctx, int_limbs, coeffs.iter().copied())
         })
         .collect();
 
@@ -50,19 +47,22 @@ pub fn crt<'v, F: PrimeField>(
 
     let out_trunc = OverflowInteger::construct(out_limbs, max_limb_bits);
     let out_native = if a.len() > k {
-        OverflowInteger::<F>::evaluate(gate, ctx, &out_trunc.limbs, limb_bases[..k].iter().cloned())
+        OverflowInteger::<F>::evaluate(
+            gate,
+            ctx,
+            out_trunc.limbs.iter().copied(),
+            limb_bases[..k].iter().copied(),
+        )
     } else {
-        let a_native = a.iter().map(|x| Existing(&x.native));
-        gate.select_by_indicator(ctx, a_native, coeffs.iter())
+        let a_native = a.iter().map(|x| x.native);
+        gate.select_by_indicator(ctx, a_native, coeffs.iter().copied())
     };
-    let out_val = a.iter().zip(coeffs.iter()).fold(Value::known(BigInt::zero()), |acc, (x, y)| {
-        acc.zip(x.value.as_ref()).zip(y.value()).map(|((a, x), y)| {
-            if y.is_zero_vartime() {
-                a
-            } else {
-                x.clone()
-            }
-        })
+    let out_val = a.iter().zip(coeffs.iter()).fold(BigInt::zero(), |acc, (x, y)| {
+        if y.value().is_zero_vartime() {
+            acc
+        } else {
+            x.value.clone()
+        }
     });
 
     CRTInteger::construct(out_trunc, out_native, out_val)
diff --git a/halo2-ecc/src/bigint/sub.rs b/halo2-ecc/src/bigint/sub.rs
index 5e987f0c..2d4d83ff 100644
--- a/halo2-ecc/src/bigint/sub.rs
+++ b/halo2-ecc/src/bigint/sub.rs
@@ -1,61 +1,54 @@
 use super::{CRTInteger, OverflowInteger};
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::PrimeField,
+    utils::ScalarField,
     AssignedValue, Context,
     QuantumCell::{Constant, Existing, Witness},
 };
 
 /// Should only be called on integers a, b in proper representation with all limbs having at most `limb_bits` number of bits
-pub fn assign<'a, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &OverflowInteger<'a, F>,
-    b: &OverflowInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> (OverflowInteger<'a, F>, AssignedValue<'a, F>) {
-    assert!(a.max_limb_bits <= limb_bits);
-    assert!(b.max_limb_bits <= limb_bits);
-    assert_eq!(a.limbs.len(), b.limbs.len());
+) -> (OverflowInteger<F>, AssignedValue<F>) {
+    debug_assert!(a.max_limb_bits <= limb_bits);
+    debug_assert!(b.max_limb_bits <= limb_bits);
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let k = a.limbs.len();
     let mut out_limbs = Vec::with_capacity(k);
 
     let mut borrow: Option<AssignedValue<F>> = None;
-    for (a_limb, b_limb) in a.limbs.iter().zip(b.limbs.iter()) {
+    for (&a_limb, &b_limb) in a.limbs.iter().zip(b.limbs.iter()) {
         let (bottom, lt) = match borrow {
             None => {
-                let lt = range.is_less_than(ctx, Existing(a_limb), Existing(b_limb), limb_bits);
-                (b_limb.clone(), lt)
+                let lt = range.is_less_than(ctx, a_limb, b_limb, limb_bits);
+                (b_limb, lt)
             }
             Some(borrow) => {
-                let b_plus_borrow = range.gate().add(ctx, Existing(b_limb), Existing(&borrow));
-                let lt = range.is_less_than(
-                    ctx,
-                    Existing(a_limb),
-                    Existing(&b_plus_borrow),
-                    limb_bits + 1,
-                );
+                let b_plus_borrow = range.gate().add(ctx, b_limb, borrow);
+                let lt = range.is_less_than(ctx, a_limb, b_plus_borrow, limb_bits + 1);
                 (b_plus_borrow, lt)
             }
         };
         let out_limb = {
             // | a | lt | 2^n | a + lt * 2^n | -1 | bottom | a + lt * 2^n - bottom
-            let a_with_borrow_val =
-                a_limb.value().zip(lt.value()).map(|(a, lt)| limb_base * lt + a);
-            let out_val = a_with_borrow_val.zip(bottom.value()).map(|(ac, b)| ac - b);
-            range.gate().assign_region_last(
-                ctx,
-                vec![
+            let a_with_borrow_val = limb_base * lt.value() + a_limb.value();
+            let out_val = a_with_borrow_val - bottom.value();
+            ctx.assign_region_last(
+                [
                     Existing(a_limb),
-                    Existing(&lt),
+                    Existing(lt),
                     Constant(limb_base),
                     Witness(a_with_borrow_val),
                     Constant(-F::one()),
-                    Existing(&bottom),
+                    Existing(bottom),
                     Witness(out_val),
                 ],
-                vec![(0, None), (3, None)],
+                [0, 3],
             )
         };
         out_limbs.push(out_limb);
@@ -65,17 +58,17 @@ pub fn assign<'a, F: PrimeField>(
 }
 
 // returns (a-b, underflow), where underflow is nonzero iff a < b
-pub fn crt<'a, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     range: &impl RangeInstructions<F>,
-    ctx: &mut Context<'a, F>,
-    a: &CRTInteger<'a, F>,
-    b: &CRTInteger<'a, F>,
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
     limb_bits: usize,
     limb_base: F,
-) -> (CRTInteger<'a, F>, AssignedValue<'a, F>) {
+) -> (CRTInteger<F>, AssignedValue<F>) {
     let (out_trunc, underflow) =
         assign::<F>(range, ctx, &a.truncation, &b.truncation, limb_bits, limb_base);
-    let out_native = range.gate().sub(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a - b);
+    let out_native = range.gate().sub(ctx, a.native, b.native);
+    let out_val = &a.value - &b.value;
     (CRTInteger::construct(out_trunc, out_native, out_val), underflow)
 }
diff --git a/halo2-ecc/src/bigint/sub_no_carry.rs b/halo2-ecc/src/bigint/sub_no_carry.rs
index 2226027d..ae4bb8a3 100644
--- a/halo2-ecc/src/bigint/sub_no_carry.rs
+++ b/halo2-ecc/src/bigint/sub_no_carry.rs
@@ -1,32 +1,32 @@
 use super::{CRTInteger, OverflowInteger};
-use halo2_base::{gates::GateInstructions, utils::PrimeField, Context, QuantumCell::Existing};
+use halo2_base::{gates::GateInstructions, utils::ScalarField, Context};
 use std::cmp::max;
 
-pub fn assign<'v, F: PrimeField>(
+pub fn assign<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &OverflowInteger<'v, F>,
-    b: &OverflowInteger<'v, F>,
-) -> OverflowInteger<'v, F> {
-    assert_eq!(a.limbs.len(), b.limbs.len());
+    ctx: &mut Context<F>,
+    a: &OverflowInteger<F>,
+    b: &OverflowInteger<F>,
+) -> OverflowInteger<F> {
+    debug_assert_eq!(a.limbs.len(), b.limbs.len());
     let out_limbs = a
         .limbs
         .iter()
         .zip(b.limbs.iter())
-        .map(|(a_limb, b_limb)| gate.sub(ctx, Existing(a_limb), Existing(b_limb)))
+        .map(|(&a_limb, &b_limb)| gate.sub(ctx, a_limb, b_limb))
         .collect();
 
     OverflowInteger::construct(out_limbs, max(a.max_limb_bits, b.max_limb_bits) + 1)
 }
 
-pub fn crt<'v, F: PrimeField>(
+pub fn crt<F: ScalarField>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'_, F>,
-    a: &CRTInteger<'v, F>,
-    b: &CRTInteger<'v, F>,
-) -> CRTInteger<'v, F> {
+    ctx: &mut Context<F>,
+    a: &CRTInteger<F>,
+    b: &CRTInteger<F>,
+) -> CRTInteger<F> {
     let out_trunc = assign::<F>(gate, ctx, &a.truncation, &b.truncation);
-    let out_native = gate.sub(ctx, Existing(&a.native), Existing(&b.native));
-    let out_val = a.value.as_ref().zip(b.value.as_ref()).map(|(a, b)| a - b);
+    let out_native = gate.sub(ctx, a.native, b.native);
+    let out_val = &a.value - &b.value;
     CRTInteger::construct(out_trunc, out_native, out_val)
 }
diff --git a/halo2-ecc/src/bn254/configs/msm_circuit.config b/halo2-ecc/src/bn254/configs/msm_circuit.config
deleted file mode 100644
index 9246e19f..00000000
--- a/halo2-ecc/src/bn254/configs/msm_circuit.config
+++ /dev/null
@@ -1 +0,0 @@
-{"strategy":"Simple","degree":20,"num_advice":10,"num_lookup_advice":2,"num_fixed":1,"lookup_bits":19,"limb_bits":88,"num_limbs":3,"batch_size":100,"window_bits":4}
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/final_exp.rs b/halo2-ecc/src/bn254/final_exp.rs
index e131f7d5..9ab45daa 100644
--- a/halo2-ecc/src/bn254/final_exp.rs
+++ b/halo2-ecc/src/bn254/final_exp.rs
@@ -5,34 +5,34 @@ use crate::halo2_proofs::{
 };
 use crate::{
     ecc::get_naf,
-    fields::{fp12::mul_no_carry_w6, FieldChip, FieldExtPoint},
+    fields::{fp12::mul_no_carry_w6, FieldChip, FieldExtPoint, PrimeField},
 };
 use halo2_base::{
     gates::GateInstructions,
-    utils::{fe_to_biguint, modulus, PrimeField},
+    utils::{fe_to_biguint, modulus},
     Context,
-    QuantumCell::{Constant, Existing},
+    QuantumCell::Constant,
 };
 use num_bigint::BigUint;
 
 const XI_0: i64 = 9;
 
-impl<'a, F: PrimeField> Fp12Chip<'a, F> {
+impl<'chip, F: PrimeField> Fp12Chip<'chip, F> {
     // computes a ** (p ** power)
     // only works for p = 3 (mod 4) and p = 1 (mod 6)
-    pub fn frobenius_map<'v>(
+    pub fn frobenius_map(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
         power: usize,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         assert_eq!(modulus::<Fq>() % 4u64, BigUint::from(3u64));
         assert_eq!(modulus::<Fq>() % 6u64, BigUint::from(1u64));
         assert_eq!(a.coeffs.len(), 12);
         let pow = power % 12;
         let mut out_fp2 = Vec::with_capacity(6);
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
         for i in 0..6 {
             let frob_coeff = FROBENIUS_COEFF_FQ12_C1[pow].pow_vartime([i as u64]);
             // possible optimization (not implemented): load `frob_coeff` as we multiply instead of loading first
@@ -68,12 +68,12 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // exp is in little-endian
-    pub fn pow<'v>(
+    pub fn pow(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
         exp: Vec<u64>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         let mut res = a.clone();
         let mut is_started = false;
         let naf = get_naf(exp);
@@ -106,10 +106,10 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
     /// in = g0 + g2 w + g4 w^2 + g1 w^3 + g3 w^4 + g5 w^5 where g_i = g_i0 + g_i1 * u are elements of Fp2
     /// out = Compress(in) = [ g2, g3, g4, g5 ]
-    pub fn cyclotomic_compress<'v>(
+    pub fn cyclotomic_compress(
         &self,
-        a: &FieldExtPoint<FpPoint<'v, F>>,
-    ) -> Vec<FieldExtPoint<FpPoint<'v, F>>> {
+        a: &FieldExtPoint<FpPoint<F>>,
+    ) -> Vec<FieldExtPoint<FpPoint<F>>> {
         let g2 = FieldExtPoint::construct(vec![a.coeffs[1].clone(), a.coeffs[1 + 6].clone()]);
         let g3 = FieldExtPoint::construct(vec![a.coeffs[4].clone(), a.coeffs[4 + 6].clone()]);
         let g4 = FieldExtPoint::construct(vec![a.coeffs[2].clone(), a.coeffs[2 + 6].clone()]);
@@ -129,14 +129,14 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     ///     if g2 = 0:
     ///         g1 = (2 g4 * g5)/g3
     ///         g0 = (2 g1^2 - 3 g3 * g4) * c + 1    
-    pub fn cyclotomic_decompress<'v>(
+    pub fn cyclotomic_decompress(
         &self,
-        ctx: &mut Context<'v, F>,
-        compression: Vec<FieldExtPoint<FpPoint<'v, F>>>,
-    ) -> FieldExtPoint<FpPoint<'v, F>> {
-        let [g2, g3, g4, g5]: [FieldExtPoint<FpPoint<'v, F>>; 4] = compression.try_into().unwrap();
+        ctx: &mut Context<F>,
+        compression: Vec<FieldExtPoint<FpPoint<F>>>,
+    ) -> FieldExtPoint<FpPoint<F>> {
+        let [g2, g3, g4, g5]: [FieldExtPoint<FpPoint<F>>; 4] = compression.try_into().unwrap();
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
         let g5_sq = fp2_chip.mul_no_carry(ctx, &g5, &g5);
         let g5_sq_c = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, &g5_sq);
 
@@ -156,7 +156,7 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
         let g2_is_zero = fp2_chip.is_zero(ctx, &g2);
         // resulting `g1` is already in "carried" format (witness is in `[0, p)`)
-        let g1 = fp2_chip.select(ctx, &g1_0, &g1_1, &g2_is_zero);
+        let g1 = fp2_chip.select(ctx, &g1_0, &g1_1, g2_is_zero);
 
         // share the computation of 2 g1^2 between the two cases
         let g1_sq = fp2_chip.mul_no_carry(ctx, &g1, &g1);
@@ -166,20 +166,16 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
         let g3_g4 = fp2_chip.mul_no_carry(ctx, &g3, &g4);
         let g3_g4_3 = fp2_chip.scalar_mul_no_carry(ctx, &g3_g4, 3);
         let temp = fp2_chip.add_no_carry(ctx, &g1_sq_2, &g2_g5);
-        let temp = fp2_chip.select(ctx, &g1_sq_2, &temp, &g2_is_zero);
+        let temp = fp2_chip.select(ctx, &g1_sq_2, &temp, g2_is_zero);
         let temp = fp2_chip.sub_no_carry(ctx, &temp, &g3_g4_3);
         let mut g0 = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, &temp);
 
         // compute `g0 + 1`
-        g0.coeffs[0].truncation.limbs[0] = fp2_chip.range().gate.add(
-            ctx,
-            Existing(&g0.coeffs[0].truncation.limbs[0]),
-            Constant(F::one()),
-        );
-        g0.coeffs[0].native =
-            fp2_chip.range().gate.add(ctx, Existing(&g0.coeffs[0].native), Constant(F::one()));
+        g0.coeffs[0].truncation.limbs[0] =
+            fp2_chip.gate().add(ctx, g0.coeffs[0].truncation.limbs[0], Constant(F::one()));
+        g0.coeffs[0].native = fp2_chip.gate().add(ctx, g0.coeffs[0].native, Constant(F::one()));
         g0.coeffs[0].truncation.max_limb_bits += 1;
-        g0.coeffs[0].value = g0.coeffs[0].value.as_ref().map(|v| v + 1usize);
+        g0.coeffs[0].value += 1usize;
 
         // finally, carry g0
         g0 = fp2_chip.carry_mod(ctx, &g0);
@@ -217,18 +213,18 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     //  A_ij = (g_i + g_j)(g_i + c g_j)
     //  B_ij = g_i g_j
 
-    pub fn cyclotomic_square<'v>(
+    pub fn cyclotomic_square(
         &self,
-        ctx: &mut Context<'v, F>,
-        compression: &[FieldExtPoint<FpPoint<'v, F>>],
-    ) -> Vec<FieldExtPoint<FpPoint<'v, F>>> {
+        ctx: &mut Context<F>,
+        compression: &[FieldExtPoint<FpPoint<F>>],
+    ) -> Vec<FieldExtPoint<FpPoint<F>>> {
         assert_eq!(compression.len(), 4);
         let g2 = &compression[0];
         let g3 = &compression[1];
         let g4 = &compression[2];
         let g5 = &compression[3];
 
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
 
         let g2_plus_g3 = fp2_chip.add_no_carry(ctx, g2, g3);
         let cg3 = mul_no_carry_w6::<F, FpChip<F>, XI_0>(fp2_chip.fp_chip, ctx, g3);
@@ -266,12 +262,12 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // exp is in little-endian
-    pub fn cyclotomic_pow<'v>(
+    pub fn cyclotomic_pow(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: FieldExtPoint<FpPoint<'v, F>>,
+        ctx: &mut Context<F>,
+        a: FieldExtPoint<FpPoint<F>>,
         exp: Vec<u64>,
-    ) -> FieldExtPoint<FpPoint<'v, F>> {
+    ) -> FieldExtPoint<FpPoint<F>> {
         let mut compression = self.cyclotomic_compress(&a);
         let mut out = None;
         let mut is_started = false;
@@ -304,11 +300,11 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
 
     #[allow(non_snake_case)]
     // use equation for (p^4 - p^2 + 1)/r in Section 5 of https://eprint.iacr.org/2008/490.pdf for BN curves
-    pub fn hard_part_BN<'v>(
+    pub fn hard_part_BN(
         &self,
-        ctx: &mut Context<'v, F>,
-        m: <Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        m: <Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         // x = BN_X
 
         // m^p
@@ -372,25 +368,24 @@ impl<'a, F: PrimeField> Fp12Chip<'a, F> {
     }
 
     // out = in^{ (q^6 - 1)*(q^2 + 1) }
-    pub fn easy_part<'v>(
+    pub fn easy_part(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         // a^{q^6} = conjugate of a
         let f1 = self.conjugate(ctx, a);
         let f2 = self.divide(ctx, &f1, a);
         let f3 = self.frobenius_map(ctx, &f2, 2);
-        let f = self.mul(ctx, &f3, &f2);
-        f
+        self.mul(ctx, &f3, &f2)
     }
 
     // out = in^{(q^12 - 1)/r}
-    pub fn final_exp<'v>(
+    pub fn final_exp(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &<Self as FieldChip<F>>::FieldPoint<'v>,
-    ) -> <Self as FieldChip<F>>::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &<Self as FieldChip<F>>::FieldPoint,
+    ) -> <Self as FieldChip<F>>::FieldPoint {
         let f0 = self.easy_part(ctx, a);
         let f = self.hard_part_BN(ctx, f0);
         f
diff --git a/halo2-ecc/src/bn254/mod.rs b/halo2-ecc/src/bn254/mod.rs
index 5f5db57b..6640f729 100644
--- a/halo2-ecc/src/bn254/mod.rs
+++ b/halo2-ecc/src/bn254/mod.rs
@@ -7,11 +7,11 @@ use crate::{
 pub mod final_exp;
 pub mod pairing;
 
-type FpChip<F> = fp::FpConfig<F, Fq>;
-type FpPoint<'v, F> = CRTInteger<'v, F>;
-type FqPoint<'v, F> = FieldExtPoint<FpPoint<'v, F>>;
-type Fp2Chip<'a, F> = fp2::Fp2Chip<'a, F, FpChip<F>, Fq2>;
-type Fp12Chip<'a, F> = fp12::Fp12Chip<'a, F, FpChip<F>, Fq12, 9>;
+pub type FpChip<'range, F> = fp::FpChip<'range, F, Fq>;
+pub type FpPoint<F> = CRTInteger<F>;
+pub type FqPoint<F> = FieldExtPoint<FpPoint<F>>;
+pub type Fp2Chip<'chip, F> = fp2::Fp2Chip<'chip, F, FpChip<'chip, F>, Fq2>;
+pub type Fp12Chip<'chip, F> = fp12::Fp12Chip<'chip, F, FpChip<'chip, F>, Fq12, 9>;
 
 #[cfg(test)]
 pub(crate) mod tests;
diff --git a/halo2-ecc/src/bn254/pairing.rs b/halo2-ecc/src/bn254/pairing.rs
index 2502ea48..cc4c9a87 100644
--- a/halo2-ecc/src/bn254/pairing.rs
+++ b/halo2-ecc/src/bn254/pairing.rs
@@ -1,20 +1,14 @@
 #![allow(non_snake_case)]
 use super::{Fp12Chip, Fp2Chip, FpChip, FpPoint, FqPoint};
-use crate::halo2_proofs::{
-    circuit::Value,
-    halo2curves::bn256::{self, G1Affine, G2Affine, SIX_U_PLUS_2_NAF},
-    halo2curves::bn256::{Fq, Fq2, FROBENIUS_COEFF_FQ12_C1},
-    plonk::ConstraintSystem,
+use crate::halo2_proofs::halo2curves::bn256::{
+    G1Affine, G2Affine, FROBENIUS_COEFF_FQ12_C1, SIX_U_PLUS_2_NAF,
 };
 use crate::{
     ecc::{EcPoint, EccChip},
-    fields::{fp::FpStrategy, fp12::mul_no_carry_w6},
-    fields::{FieldChip, FieldExtPoint},
-};
-use halo2_base::{
-    utils::{biguint_to_fe, fe_to_biguint, PrimeField},
-    Context,
+    fields::fp12::mul_no_carry_w6,
+    fields::{FieldChip, FieldExtPoint, PrimeField},
 };
+use halo2_base::Context;
 use num_bigint::BigUint;
 
 const XI_0: i64 = 9;
@@ -27,12 +21,12 @@ const XI_0: i64 = 9;
 //  line_{Psi(Q0), Psi(Q1)}(P) where Psi(x,y) = (w^2 x, w^3 y)
 //  - equals w^3 (y_1 - y_2) X + w^2 (x_2 - x_1) Y + w^5 (x_1 y_2 - x_2 y_1) =: out3 * w^3 + out2 * w^2 + out5 * w^5 where out2, out3, out5 are Fp2 points
 // Output is [None, None, out2, out3, None, out5] as vector of `Option<FqPoint>`s
-pub fn sparse_line_function_unequal<'a, F: PrimeField>(
+pub fn sparse_line_function_unequal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    Q: (&EcPoint<F, FqPoint<'a, F>>, &EcPoint<F, FqPoint<'a, F>>),
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> Vec<Option<FqPoint<'a, F>>> {
+    ctx: &mut Context<F>,
+    Q: (&EcPoint<F, FqPoint<F>>, &EcPoint<F, FqPoint<F>>),
+    P: &EcPoint<F, FpPoint<F>>,
+) -> Vec<Option<FqPoint<F>>> {
     let (x_1, y_1) = (&Q.0.x, &Q.0.y);
     let (x_2, y_2) = (&Q.1.x, &Q.1.y);
     let (X, Y) = (&P.x, &P.y);
@@ -66,12 +60,12 @@ pub fn sparse_line_function_unequal<'a, F: PrimeField>(
 //  line_{Psi(Q), Psi(Q)}(P) where Psi(x,y) = (w^2 x, w^3 y)
 //  - equals (3x^3 - 2y^2)(XI_0 + u) + w^4 (-3 x^2 * Q.x) + w^3 (2 y * Q.y) =: out0 + out4 * w^4 + out3 * w^3 where out0, out3, out4 are Fp2 points
 // Output is [out0, None, None, out3, out4, None] as vector of `Option<FqPoint>`s
-pub fn sparse_line_function_equal<'a, F: PrimeField>(
+pub fn sparse_line_function_equal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    Q: &EcPoint<F, FqPoint<'a, F>>,
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> Vec<Option<FqPoint<'a, F>>> {
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
+) -> Vec<Option<FqPoint<F>>> {
     let (x, y) = (&Q.x, &Q.y);
     assert_eq!(x.coeffs.len(), 2);
     assert_eq!(y.coeffs.len(), 2);
@@ -101,12 +95,12 @@ pub fn sparse_line_function_equal<'a, F: PrimeField>(
 
 // multiply Fp12 point `a` with Fp12 point `b` where `b` is len 6 vector of Fp2 points, where some are `None` to represent zero.
 // Assumes `b` is not vector of all `None`s
-pub fn sparse_fp12_multiply<'a, F: PrimeField>(
+pub fn sparse_fp12_multiply<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    a: &FqPoint<'a, F>,
-    b_fp2_coeffs: &Vec<Option<FqPoint<'a, F>>>,
-) -> FieldExtPoint<FpPoint<'a, F>> {
+    ctx: &mut Context<F>,
+    a: &FqPoint<F>,
+    b_fp2_coeffs: &Vec<Option<FqPoint<F>>>,
+) -> FieldExtPoint<FpPoint<F>> {
     assert_eq!(a.coeffs.len(), 12);
     assert_eq!(b_fp2_coeffs.len(), 6);
     let mut a_fp2_coeffs = Vec::with_capacity(6);
@@ -168,13 +162,13 @@ pub fn sparse_fp12_multiply<'a, F: PrimeField>(
 // - P is point in E(Fp)
 // Output:
 // - out = g * l_{Psi(Q0), Psi(Q1)}(P) as Fp12 point
-pub fn fp12_multiply_with_line_unequal<'a, F: PrimeField>(
+pub fn fp12_multiply_with_line_unequal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    g: &FqPoint<'a, F>,
-    Q: (&EcPoint<F, FqPoint<'a, F>>, &EcPoint<F, FqPoint<'a, F>>),
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> FqPoint<'a, F> {
+    ctx: &mut Context<F>,
+    g: &FqPoint<F>,
+    Q: (&EcPoint<F, FqPoint<F>>, &EcPoint<F, FqPoint<F>>),
+    P: &EcPoint<F, FpPoint<F>>,
+) -> FqPoint<F> {
     let line = sparse_line_function_unequal::<F>(fp2_chip, ctx, Q, P);
     sparse_fp12_multiply::<F>(fp2_chip, ctx, g, &line)
 }
@@ -185,13 +179,13 @@ pub fn fp12_multiply_with_line_unequal<'a, F: PrimeField>(
 // - P is point in E(Fp)
 // Output:
 // - out = g * l_{Psi(Q), Psi(Q)}(P) as Fp12 point
-pub fn fp12_multiply_with_line_equal<'a, F: PrimeField>(
+pub fn fp12_multiply_with_line_equal<F: PrimeField>(
     fp2_chip: &Fp2Chip<F>,
-    ctx: &mut Context<'a, F>,
-    g: &FqPoint<'a, F>,
-    Q: &EcPoint<F, FqPoint<'a, F>>,
-    P: &EcPoint<F, FpPoint<'a, F>>,
-) -> FqPoint<'a, F> {
+    ctx: &mut Context<F>,
+    g: &FqPoint<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
+) -> FqPoint<F> {
     let line = sparse_line_function_equal::<F>(fp2_chip, ctx, Q, P);
     sparse_fp12_multiply::<F>(fp2_chip, ctx, g, &line)
 }
@@ -214,13 +208,13 @@ pub fn fp12_multiply_with_line_equal<'a, F: PrimeField>(
 //  - `0 <= loop_count < r` and `loop_count < p` (to avoid [loop_count]Q' = Frob_p(Q'))
 //  - x^3 + b = 0 has no solution in Fp2, i.e., the y-coordinate of Q cannot be 0.
 
-pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    P: &EcPoint<F, FpPoint<'b, F>>,
+pub fn miller_loop_BN<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    P: &EcPoint<F, FpPoint<F>>,
     pseudo_binary_encoding: &[i8],
-) -> FqPoint<'b, F> {
+) -> FqPoint<F> {
     let mut i = pseudo_binary_encoding.len() - 1;
     while pseudo_binary_encoding[i] == 0 {
         i -= 1;
@@ -257,7 +251,7 @@ pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
 
     loop {
         if i != last_index - 1 {
-            let fp12_chip = Fp12Chip::<F>::construct(ecc_chip.field_chip.fp_chip);
+            let fp12_chip = Fp12Chip::<F>::new(ecc_chip.field_chip.fp_chip);
             let f_sq = fp12_chip.mul(ctx, &f, &f);
             f = fp12_multiply_with_line_equal::<F>(ecc_chip.field_chip(), ctx, &f_sq, &R, P);
         }
@@ -299,12 +293,12 @@ pub fn miller_loop_BN<'a, 'b, F: PrimeField>(
 
 // let pairs = [(a_i, b_i)], a_i in G_1, b_i in G_2
 // output is Prod_i e'(a_i, b_i), where e'(a_i, b_i) is the output of `miller_loop_BN(b_i, a_i)`
-pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    pairs: Vec<(&EcPoint<F, FpPoint<'b, F>>, &EcPoint<F, FqPoint<'b, F>>)>,
+pub fn multi_miller_loop_BN<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    pairs: Vec<(&EcPoint<F, FpPoint<F>>, &EcPoint<F, FqPoint<F>>)>,
     pseudo_binary_encoding: &[i8],
-) -> FqPoint<'b, F> {
+) -> FqPoint<F> {
     let mut i = pseudo_binary_encoding.len() - 1;
     while pseudo_binary_encoding[i] == 0 {
         i -= 1;
@@ -344,7 +338,7 @@ pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
 
     i -= 1;
     let mut r = pairs.iter().map(|pair| pair.1.clone()).collect::<Vec<_>>();
-    let fp12_chip = Fp12Chip::<F>::construct(ecc_chip.field_chip.fp_chip);
+    let fp12_chip = Fp12Chip::<F>::new(ecc_chip.field_chip.fp_chip);
     loop {
         if i != last_index - 1 {
             f = fp12_chip.mul(ctx, &f, &f);
@@ -401,13 +395,13 @@ pub fn multi_miller_loop_BN<'a, 'b, F: PrimeField>(
 // - coeff[1][2], coeff[1][3] as assigned cells: this is an optimization to avoid loading new constants
 // Output:
 // - (coeff[1][2] * x^p, coeff[1][3] * y^p) point in E(Fp2)
-pub fn twisted_frobenius<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    c2: &FqPoint<'b, F>,
-    c3: &FqPoint<'b, F>,
-) -> EcPoint<F, FqPoint<'b, F>> {
+pub fn twisted_frobenius<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    c2: &FqPoint<F>,
+    c3: &FqPoint<F>,
+) -> EcPoint<F, FqPoint<F>> {
     assert_eq!(c2.coeffs.len(), 2);
     assert_eq!(c3.coeffs.len(), 2);
 
@@ -424,13 +418,13 @@ pub fn twisted_frobenius<'a, 'b, F: PrimeField>(
 // - Q = (x, y) point in E(Fp2)
 // Output:
 // - (coeff[1][2] * x^p, coeff[1][3] * -y^p) point in E(Fp2)
-pub fn neg_twisted_frobenius<'a, 'b, F: PrimeField>(
-    ecc_chip: &EccChip<F, Fp2Chip<'a, F>>,
-    ctx: &mut Context<'b, F>,
-    Q: &EcPoint<F, FqPoint<'b, F>>,
-    c2: &FqPoint<'b, F>,
-    c3: &FqPoint<'b, F>,
-) -> EcPoint<F, FqPoint<'b, F>> {
+pub fn neg_twisted_frobenius<F: PrimeField>(
+    ecc_chip: &EccChip<F, Fp2Chip<F>>,
+    ctx: &mut Context<F>,
+    Q: &EcPoint<F, FqPoint<F>>,
+    c2: &FqPoint<F>,
+    c3: &FqPoint<F>,
+) -> EcPoint<F, FqPoint<F>> {
     assert_eq!(c2.coeffs.len(), 2);
     assert_eq!(c3.coeffs.len(), 2);
 
@@ -442,80 +436,38 @@ pub fn neg_twisted_frobenius<'a, 'b, F: PrimeField>(
 }
 
 // To avoid issues with mutably borrowing twice (not allowed in Rust), we only store fp_chip and construct g2_chip and fp12_chip in scope when needed for temporary mutable borrows
-pub struct PairingChip<'a, F: PrimeField> {
-    pub fp_chip: &'a FpChip<F>,
+pub struct PairingChip<'chip, F: PrimeField> {
+    pub fp_chip: &'chip FpChip<'chip, F>,
 }
 
-impl<'a, F: PrimeField> PairingChip<'a, F> {
-    pub fn construct(fp_chip: &'a FpChip<F>) -> Self {
+impl<'chip, F: PrimeField> PairingChip<'chip, F> {
+    pub fn new(fp_chip: &'chip FpChip<F>) -> Self {
         Self { fp_chip }
     }
 
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        context_id: usize,
-        k: usize,
-    ) -> FpChip<F> {
-        FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            halo2_base::utils::modulus::<Fq>(),
-            context_id,
-            k,
-        )
+    pub fn load_private_g1(&self, ctx: &mut Context<F>, point: G1Affine) -> EcPoint<F, FpPoint<F>> {
+        let g1_chip = EccChip::new(self.fp_chip);
+        g1_chip.load_private(ctx, (point.x, point.y))
     }
 
-    pub fn load_private_g1<'v>(
+    pub fn load_private_g2(
         &self,
-        ctx: &mut Context<'_, F>,
-        point: Value<G1Affine>,
-    ) -> EcPoint<F, FpPoint<'v, F>> {
-        // go from pse/pairing::bn256::Fq to forked Fq
-        let convert_fp = |x: bn256::Fq| biguint_to_fe(&fe_to_biguint(&x));
-        let g1_chip = EccChip::construct(self.fp_chip.clone());
-        g1_chip
-            .load_private(ctx, (point.map(|pt| convert_fp(pt.x)), point.map(|pt| convert_fp(pt.y))))
-    }
-
-    pub fn load_private_g2<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        point: Value<G2Affine>,
-    ) -> EcPoint<F, FieldExtPoint<FpPoint<'v, F>>> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
-        // go from pse/pairing::bn256::Fq2 to forked public Fq2
-        let convert_fp2 = |c0: bn256::Fq, c1: bn256::Fq| Fq2 {
-            c0: biguint_to_fe(&fe_to_biguint(&c0)),
-            c1: biguint_to_fe(&fe_to_biguint(&c1)),
-        };
-        let x = point.map(|pt| convert_fp2(pt.x.c0, pt.x.c1));
-        let y = point.map(|pt| convert_fp2(pt.y.c0, pt.y.c1));
-
-        g2_chip.load_private(ctx, (x, y))
+        ctx: &mut Context<F>,
+        point: G2Affine,
+    ) -> EcPoint<F, FieldExtPoint<FpPoint<F>>> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
+        g2_chip.load_private(ctx, (point.x, point.y))
     }
 
-    pub fn miller_loop<'v>(
+    pub fn miller_loop(
         &self,
-        ctx: &mut Context<'v, F>,
-        Q: &EcPoint<F, FqPoint<'v, F>>,
-        P: &EcPoint<F, FpPoint<'v, F>>,
-    ) -> FqPoint<'v, F> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
+        ctx: &mut Context<F>,
+        Q: &EcPoint<F, FqPoint<F>>,
+        P: &EcPoint<F, FpPoint<F>>,
+    ) -> FqPoint<F> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
         miller_loop_BN::<F>(
             &g2_chip,
             ctx,
@@ -525,13 +477,13 @@ impl<'a, F: PrimeField> PairingChip<'a, F> {
         )
     }
 
-    pub fn multi_miller_loop<'v>(
+    pub fn multi_miller_loop(
         &self,
-        ctx: &mut Context<'v, F>,
-        pairs: Vec<(&EcPoint<F, FpPoint<'v, F>>, &EcPoint<F, FqPoint<'v, F>>)>,
-    ) -> FqPoint<'v, F> {
-        let fp2_chip = Fp2Chip::<F>::construct(self.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip);
+        ctx: &mut Context<F>,
+        pairs: Vec<(&EcPoint<F, FpPoint<F>>, &EcPoint<F, FqPoint<F>>)>,
+    ) -> FqPoint<F> {
+        let fp2_chip = Fp2Chip::<F>::new(self.fp_chip);
+        let g2_chip = EccChip::new(&fp2_chip);
         multi_miller_loop_BN::<F>(
             &g2_chip,
             ctx,
@@ -540,20 +492,20 @@ impl<'a, F: PrimeField> PairingChip<'a, F> {
         )
     }
 
-    pub fn final_exp<'v>(&self, ctx: &mut Context<'v, F>, f: &FqPoint<'v, F>) -> FqPoint<'v, F> {
-        let fp12_chip = Fp12Chip::<F>::construct(self.fp_chip);
+    pub fn final_exp(&self, ctx: &mut Context<F>, f: &FqPoint<F>) -> FqPoint<F> {
+        let fp12_chip = Fp12Chip::<F>::new(self.fp_chip);
         fp12_chip.final_exp(ctx, f)
     }
 
     // optimal Ate pairing
-    pub fn pairing<'v>(
+    pub fn pairing(
         &self,
-        ctx: &mut Context<'v, F>,
-        Q: &EcPoint<F, FqPoint<'v, F>>,
-        P: &EcPoint<F, FpPoint<'v, F>>,
-    ) -> FqPoint<'v, F> {
+        ctx: &mut Context<F>,
+        Q: &EcPoint<F, FqPoint<F>>,
+        P: &EcPoint<F, FpPoint<F>>,
+    ) -> FqPoint<F> {
         let f0 = self.miller_loop(ctx, Q, P);
-        let fp12_chip = Fp12Chip::<F>::construct(self.fp_chip);
+        let fp12_chip = Fp12Chip::<F>::new(self.fp_chip);
         // final_exp implemented in final_exp module
         fp12_chip.final_exp(ctx, &f0)
     }
diff --git a/halo2-ecc/src/bn254/results/msm_bench_internal.csv b/halo2-ecc/src/bn254/results/msm_bench_internal.csv
deleted file mode 100644
index 173d5ce1..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_internal.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,89,12,1,16,88,3,100,4,20.523902161s,30976,20.769379ms
-18,45,6,1,17,88,3,100,4,14.604765528s,15808,63.163377ms
-19,22,3,1,18,90,3,100,4,9.98081942s,7936,9.676845ms
-20,11,2,1,19,90,3,100,4,10.668871495s,4352,6.639454ms
-21,6,1,1,20,88,3,100,4,13.530348447s,2496,5.640048ms
-21,21,3,1,20,88,3,400,4,35.876681956s,7712,8.85568ms
diff --git a/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv b/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
deleted file mode 100644
index 49ab0447..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_m2_simple.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,96,12,1,16,88,3,100,4,56.599245791s,33056,20.015083ms
-18,48,6,1,17,88,3,100,4,58.546402708s,16736,11.798ms
-19,24,3,1,18,90,3,100,4,61.127382s,8512,6.766125ms
-20,12,2,1,19,90,3,100,4,72.688734375s,4704,5.345125ms
-21,6,1,1,20,88,3,100,4,84.217528875s,2496,3.600333ms
diff --git a/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv b/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
deleted file mode 100644
index 9179220f..00000000
--- a/halo2-ecc/src/bn254/results/msm_bench_m2_simple_plus.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time
-17,80,12,1,16,88,3,100,4,50.274314958s,36128,17.630791ms
-18,40,6,1,17,88,3,100,4,50.396009708s,18272,10.080583ms
-19,20,3,1,18,90,3,100,4,51.876326291s,9280,6.106458ms
-20,10,2,1,19,90,3,100,4,63.421609541s,5088,4.518875ms
-21,5,1,1,20,88,3,100,4,81.70901675s,2752,4.345875ms
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/results/pairing_bench_results.txt b/halo2-ecc/src/bn254/results/pairing_bench_results.txt
deleted file mode 100644
index 09371a73..00000000
--- a/halo2-ecc/src/bn254/results/pairing_bench_results.txt
+++ /dev/null
@@ -1,692 +0,0 @@
----------------------- degree = 22 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 27.278246583s
-Time elapsed in generating vkey: 13.992930625s
-test bn254::tests::bench_pairing has been running for over 60 seconds
-Time elapsed in generating pkey: 45.861797958s
-Time elapsed in filling circuit: 243.584µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x23f8716681b3e55143936e24d3079b55ffe7c32514d3173641e2dfb3d8a3cf43,
-                c1: 0x0e9b64529161e90b93578425fba75c6bae408067d455b43a8d677ff96ff4b48a,
-            },
-            c1: Fq2 {
-                c0: 0x25c4d1d4420efbde924c9d584c8ee8849790b5cc8333bc367416c5ba8ae9b4a7,
-                c1: 0x1894c7927cfc56dd4f7bc27f8b0e738f83d49115289e028370a3650153e1382a,
-            },
-            c2: Fq2 {
-                c0: 0x12e2be8f7be66544d2d17c27ff5565254b007c19a106908d0c8c5ef2533527ad,
-                c1: 0x24dd9c30ab633691db6a9b8c18c55b778f4c8fe59e01ff9bbcb9f97047c1b43c,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x254654440f137e53b4493944308d8bd8e7fa497e13bceaf35403b43f77cbcc46,
-                c1: 0x0bf54ea3d4fb2ff28168286aec8186cd51fac9e3385d9e13cb76aa971b585927,
-            },
-            c1: Fq2 {
-                c0: 0x29a510bde714c164057fcbd4549c2dfc86676586259026478a91b446d2e7a6cc,
-                c1: 0x0da6ab5a9bb17606af566b4442deb87cafb60c4fba72c94bc138a0dd004c6aa2,
-            },
-            c2: Fq2 {
-                c0: 0x0f06348301fa8f811bfaa3fec3539035f469c57e94f92bee40c90d0c293f65ee,
-                c1: 0x0670aebbdf131f76e53f43db7d5fa877e422b9c01c06dd6d95c9508c397b1715,
-            },
-        },
-    },
-)
-circuit f: [
-    "23f8716681b3e55143936e24d3079b55ffe7c32514d3173641e2dfb3d8a3cf43",
-    "254654440f137e53b4493944308d8bd8e7fa497e13bceaf35403b43f77cbcc46",
-    "25c4d1d4420efbde924c9d584c8ee8849790b5cc8333bc367416c5ba8ae9b4a7",
-    "29a510bde714c164057fcbd4549c2dfc86676586259026478a91b446d2e7a6cc",
-    "12e2be8f7be66544d2d17c27ff5565254b007c19a106908d0c8c5ef2533527ad",
-    "f06348301fa8f811bfaa3fec3539035f469c57e94f92bee40c90d0c293f65ee",
-    "e9b64529161e90b93578425fba75c6bae408067d455b43a8d677ff96ff4b48a",
-    "bf54ea3d4fb2ff28168286aec8186cd51fac9e3385d9e13cb76aa971b585927",
-    "1894c7927cfc56dd4f7bc27f8b0e738f83d49115289e028370a3650153e1382a",
-    "da6ab5a9bb17606af566b4442deb87cafb60c4fba72c94bc138a0dd004c6aa2",
-    "24dd9c30ab633691db6a9b8c18c55b778f4c8fe59e01ff9bbcb9f97047c1b43c",
-    "670aebbdf131f76e53f43db7d5fa877e422b9c01c06dd6d95c9508c397b1715",
-]
-Using:
-advice columns: 1
-special lookup advice columns: 0
-fixed columns: 1
-lookup bits: 21
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 3103660
-minimum rows used by an advice column: 3103660
-total cells used: 3103660
-cells used in special lookup column: 0
-maximum rows used by a fixed column: 123
-Proving time: 108.858797333s
-Verify time: 6.111ms
----------------------- degree = 21 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 14.360272041s
-Time elapsed in generating vkey: 13.883735625s
-Time elapsed in generating pkey: 28.111832s
-Time elapsed in filling circuit: 177.542µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x22ad6747989f8e27f11c763a86a2ff7ba59143261d2b51847fe89f61236b6923,
-                c1: 0x01479c3ec4201f5ac83097e7450193d946cf98ceb6f0f4a393e507c00f5c30e3,
-            },
-            c1: Fq2 {
-                c0: 0x08155e00679fae3e1eead89cbd825fcc0fb8f69d37460501fc2d67aba3a4c965,
-                c1: 0x1d442215d46a21dd8971cb8d8bd1078a4f117191b6659afacaa3ed8dd5ceea58,
-            },
-            c2: Fq2 {
-                c0: 0x29eb579a4d76f02089a837062ea2d888395ff69bd3e18da435ca8f8dad3dd8a0,
-                c1: 0x173c006e100b050f7d14d432033b4e75834f142918ec2f1b3e53eabc01aeba82,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x0f50f5f331fc4cd6ffdf0be5d9155267f624f44921b1cbda440557e133e44ce4,
-                c1: 0x07bb802031009b543aeb5aab1f9ed97cc1e366fbe76c91121646aab734a09967,
-            },
-            c1: Fq2 {
-                c0: 0x1a4a1607542a0c80f8be2a58495f0de2ae09b3cb7498f8a3f2c38547c182bc67,
-                c1: 0x22c61e14a0bb8ec22e2bd50d098fd07607373942550d245ffbaf4c5c11184390,
-            },
-            c2: Fq2 {
-                c0: 0x242ad4b8a7720b04c984fa5d037197e052b46208deb39530e96987a2c7aff545,
-                c1: 0x10481d8523aba6e0874cca444df2b50124f97878ccb1ac380c003faf150a1e13,
-            },
-        },
-    },
-)
-circuit f: [
-    "22ad6747989f8e27f11c763a86a2ff7ba59143261d2b51847fe89f61236b6923",
-    "f50f5f331fc4cd6ffdf0be5d9155267f624f44921b1cbda440557e133e44ce4",
-    "8155e00679fae3e1eead89cbd825fcc0fb8f69d37460501fc2d67aba3a4c965",
-    "1a4a1607542a0c80f8be2a58495f0de2ae09b3cb7498f8a3f2c38547c182bc67",
-    "29eb579a4d76f02089a837062ea2d888395ff69bd3e18da435ca8f8dad3dd8a0",
-    "242ad4b8a7720b04c984fa5d037197e052b46208deb39530e96987a2c7aff545",
-    "1479c3ec4201f5ac83097e7450193d946cf98ceb6f0f4a393e507c00f5c30e3",
-    "7bb802031009b543aeb5aab1f9ed97cc1e366fbe76c91121646aab734a09967",
-    "1d442215d46a21dd8971cb8d8bd1078a4f117191b6659afacaa3ed8dd5ceea58",
-    "22c61e14a0bb8ec22e2bd50d098fd07607373942550d245ffbaf4c5c11184390",
-    "173c006e100b050f7d14d432033b4e75834f142918ec2f1b3e53eabc01aeba82",
-    "10481d8523aba6e0874cca444df2b50124f97878ccb1ac380c003faf150a1e13",
-]
-Using:
-advice columns: 2
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 20
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 1551833
-minimum rows used by an advice column: 1551827
-total cells used: 3103660
-cells used in special lookup column: 308580
-maximum rows used by a fixed column: 122
-Proving time: 63.351649125s
-Verify time: 6.473708ms
----------------------- degree = 20 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 6.695991083s
-Time elapsed in generating vkey: 11.556116042s
-Time elapsed in generating pkey: 19.479236833s
-Time elapsed in filling circuit: 226.917µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x022bb48caccf55d91d55ff55d710e39558a25ba3b6930d92fec4127c265865aa,
-                c1: 0x04318ee2315b10db240f026aa76a2e4f9c1875965a46ca293cf0e276d65006a8,
-            },
-            c1: Fq2 {
-                c0: 0x0a0ca918d2d0b7111dc22357c18376922308df36a000928d4c02c5f388afbf4e,
-                c1: 0x23d5eaad802ba805dfcc6f005eef6d13ffaa64641cb4a205f7ba70b40cca8751,
-            },
-            c2: Fq2 {
-                c0: 0x14c2d1b10c7970409bf8c4bf564f17f059b81fd34ef7b86485626781d7c71b18,
-                c1: 0x038357ea89bfa04a495788198b3e3b155e3ccae084d7e7062a2fdcc4d1f4e922,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x2bbb58af82344ee1bf2a42ca85eb30461bd7d218c06b753460aa691e178d0200,
-                c1: 0x2ccf4d8ef48c734f1bb67de0b4dfe6c1ea9ccc09f2fc3ec83aebc0d728ee16bc,
-            },
-            c1: Fq2 {
-                c0: 0x1a4d77be8aacdd1579871582b7d546754be44f30f35f431e5f6e7e9d2eda0914,
-                c1: 0x0f367615d4dbc13994439c14b9165fbd9ef1c7d8dd4b8a38e6340ca763f3dab0,
-            },
-            c2: Fq2 {
-                c0: 0x24fbcd5185511db58ebfccb2e67089a10adba2187c08aba4326d72a04c1bc8f9,
-                c1: 0x18295c30d104fcc7cf9acbd5c867b6bb73d29e338f9a81a8e190366c7ee5c22d,
-            },
-        },
-    },
-)
-circuit f: [
-    "22bb48caccf55d91d55ff55d710e39558a25ba3b6930d92fec4127c265865aa",
-    "2bbb58af82344ee1bf2a42ca85eb30461bd7d218c06b753460aa691e178d0200",
-    "a0ca918d2d0b7111dc22357c18376922308df36a000928d4c02c5f388afbf4e",
-    "1a4d77be8aacdd1579871582b7d546754be44f30f35f431e5f6e7e9d2eda0914",
-    "14c2d1b10c7970409bf8c4bf564f17f059b81fd34ef7b86485626781d7c71b18",
-    "24fbcd5185511db58ebfccb2e67089a10adba2187c08aba4326d72a04c1bc8f9",
-    "4318ee2315b10db240f026aa76a2e4f9c1875965a46ca293cf0e276d65006a8",
-    "2ccf4d8ef48c734f1bb67de0b4dfe6c1ea9ccc09f2fc3ec83aebc0d728ee16bc",
-    "23d5eaad802ba805dfcc6f005eef6d13ffaa64641cb4a205f7ba70b40cca8751",
-    "f367615d4dbc13994439c14b9165fbd9ef1c7d8dd4b8a38e6340ca763f3dab0",
-    "38357ea89bfa04a495788198b3e3b155e3ccae084d7e7062a2fdcc4d1f4e922",
-    "18295c30d104fcc7cf9acbd5c867b6bb73d29e338f9a81a8e190366c7ee5c22d",
-]
-Using:
-advice columns: 4
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 19
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 790857
-minimum rows used by an advice column: 790848
-total cells used: 3163402
-cells used in special lookup column: 328494
-maximum rows used by a fixed column: 124
-Proving time: 40.914635041s
-Verify time: 3.633167ms
----------------------- degree = 19 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 3.455247625s
-Time elapsed in generating vkey: 8.962057125s
-Time elapsed in generating pkey: 13.224039791s
-Time elapsed in filling circuit: 232.709µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x2ade60c33f9a7b1acd140334d69b2186701493103dfe9f2384e352d9796bc645,
-                c1: 0x124a21c731563d40b8e118d7ecfb3cdb50daec7ce2423378a1e7579267440be2,
-            },
-            c1: Fq2 {
-                c0: 0x0f6b8a821874a1860d72530a4d7df4f25f08ff55398eaafab066e8e1a84ba470,
-                c1: 0x1e6efc75b0312943b255eea0cffb66ba481e92a4330acbe7d05e3a885082b740,
-            },
-            c2: Fq2 {
-                c0: 0x28a05f0adde4bdc662f2b960ef376dc117320332195be6d676101462371505f8,
-                c1: 0x198b381f93299dff95093ec8d74fccc25f892594fa685d76f279ebc67f13813f,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x0045bbe8b6c32da3b11e8d815ded2f1b1ab50c7d68a5e3b3d1e44e21121383c4,
-                c1: 0x1be966a56fa5e2a21cbe45788431e40745255ff787adc64e035a97777791d867,
-            },
-            c1: Fq2 {
-                c0: 0x26ef726f580d504d182217ba7be760090f11ec28f4bdd2f1ac2b943e49ce120d,
-                c1: 0x14d6f5d9083460e35d2458b875867e1fee78fce756c89b49e721b6a1608a87fb,
-            },
-            c2: Fq2 {
-                c0: 0x12adfa0800a551b4a199a0f3ade37c3542fb1242edf5eafabfe7f498f48cf877,
-                c1: 0x22c04697e24856621b46040d887b21fca95c866a4d871597f374387f011c4edb,
-            },
-        },
-    },
-)
-circuit f: [
-    "2ade60c33f9a7b1acd140334d69b2186701493103dfe9f2384e352d9796bc645",
-    "45bbe8b6c32da3b11e8d815ded2f1b1ab50c7d68a5e3b3d1e44e21121383c4",
-    "f6b8a821874a1860d72530a4d7df4f25f08ff55398eaafab066e8e1a84ba470",
-    "26ef726f580d504d182217ba7be760090f11ec28f4bdd2f1ac2b943e49ce120d",
-    "28a05f0adde4bdc662f2b960ef376dc117320332195be6d676101462371505f8",
-    "12adfa0800a551b4a199a0f3ade37c3542fb1242edf5eafabfe7f498f48cf877",
-    "124a21c731563d40b8e118d7ecfb3cdb50daec7ce2423378a1e7579267440be2",
-    "1be966a56fa5e2a21cbe45788431e40745255ff787adc64e035a97777791d867",
-    "1e6efc75b0312943b255eea0cffb66ba481e92a4330acbe7d05e3a885082b740",
-    "14d6f5d9083460e35d2458b875867e1fee78fce756c89b49e721b6a1608a87fb",
-    "198b381f93299dff95093ec8d74fccc25f892594fa685d76f279ebc67f13813f",
-    "22c04697e24856621b46040d887b21fca95c866a4d871597f374387f011c4edb",
-]
-Using:
-advice columns: 7
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 18
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 452273
-minimum rows used by an advice column: 452262
-total cells used: 3165880
-cells used in special lookup column: 309000
-maximum rows used by a fixed column: 121
-Proving time: 29.8487535s
-Verify time: 4.560708ms
----------------------- degree = 18 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 1.841088083s
-Time elapsed in generating vkey: 8.741611583s
-Time elapsed in generating pkey: 10.582710834s
-Time elapsed in filling circuit: 126.166µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x1dc9ee36016f9ea889bd56ba2b94df07e3e0daa5b9c00f3dc264e1a1efc21385,
-                c1: 0x0e919401ce3c8c939ba0cc7dc25ee919e38c2a8dff85985ba533670f0ca94410,
-            },
-            c1: Fq2 {
-                c0: 0x14a115385087b77e61873fe0db0615a8ff4f22a7b3a412e95203d36b10ad46d1,
-                c1: 0x27cf2ae2f8f8588f22d939f759ca37d0c77d5fdfc61c4194761bfc4dc22c5175,
-            },
-            c2: Fq2 {
-                c0: 0x1b530d147488378d4956ae1d570209cc2c05a8d9fbeb935b6a20b10185178092,
-                c1: 0x1fd389970c3bc02a1b57cfe114630127d34f35e8291e468205e4f999a4e4ffae,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x283c2a15ce4fb8ed8fc2df9071182a0df68789dfb1576008221d910e4deba67f,
-                c1: 0x286a770c6015d6451541ce952dc70b447f3742a75529957ca4155aa7d24d6f12,
-            },
-            c1: Fq2 {
-                c0: 0x2d951b3f59ce21e12cc7167473c3c47601758101c3693b3fc23b28321b6bb5a9,
-                c1: 0x1bae636562fa5fa38856b8dc4be9bd7ed46e89f72440da50ba266786250cfba6,
-            },
-            c2: Fq2 {
-                c0: 0x2a61ed99d11015c08d4b7fef72b520834460c6754eae89b5e3d0e668d95bc5cf,
-                c1: 0x25578eadd72707995b8311db6600f6eccfd03231175779bbbbff128bb28b1684,
-            },
-        },
-    },
-)
-circuit f: [
-    "1dc9ee36016f9ea889bd56ba2b94df07e3e0daa5b9c00f3dc264e1a1efc21385",
-    "283c2a15ce4fb8ed8fc2df9071182a0df68789dfb1576008221d910e4deba67f",
-    "14a115385087b77e61873fe0db0615a8ff4f22a7b3a412e95203d36b10ad46d1",
-    "2d951b3f59ce21e12cc7167473c3c47601758101c3693b3fc23b28321b6bb5a9",
-    "1b530d147488378d4956ae1d570209cc2c05a8d9fbeb935b6a20b10185178092",
-    "2a61ed99d11015c08d4b7fef72b520834460c6754eae89b5e3d0e668d95bc5cf",
-    "e919401ce3c8c939ba0cc7dc25ee919e38c2a8dff85985ba533670f0ca94410",
-    "286a770c6015d6451541ce952dc70b447f3742a75529957ca4155aa7d24d6f12",
-    "27cf2ae2f8f8588f22d939f759ca37d0c77d5fdfc61c4194761bfc4dc22c5175",
-    "1bae636562fa5fa38856b8dc4be9bd7ed46e89f72440da50ba266786250cfba6",
-    "1fd389970c3bc02a1b57cfe114630127d34f35e8291e468205e4f999a4e4ffae",
-    "25578eadd72707995b8311db6600f6eccfd03231175779bbbbff128bb28b1684",
-]
-Using:
-advice columns: 13
-special lookup advice columns: 2
-fixed columns: 1
-lookup bits: 17
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 251554
-minimum rows used by an advice column: 251543
-total cells used: 3270142
-cells used in special lookup column: 364074
-maximum rows used by a fixed column: 124
-Proving time: 25.221047792s
-Verify time: 6.036083ms
----------------------- degree = 17 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 902.645375ms
-Time elapsed in generating vkey: 9.427334041s
-Time elapsed in generating pkey: 9.44115625s
-Time elapsed in filling circuit: 157.709µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x2f6b5fb559355078b85623bc1314b30c03eee6d39fd0fa7a18c5dd7797610748,
-                c1: 0x1677069d40821974bdf767cf3d07dc7dbd9aa24a700d847928f28b8506ae0ee2,
-            },
-            c1: Fq2 {
-                c0: 0x28f407b2820778fe3ddb941c30ddbfb4444519b3a040cdc957c9b53f26c11514,
-                c1: 0x02d10f22fc47193dd4c87e6886adab94890ac7e7dc4797185db00bede7257f0a,
-            },
-            c2: Fq2 {
-                c0: 0x23d1256f3e68b2a4e459411e579f7b9003cec25940f501f513f94dbdc258a815,
-                c1: 0x211c94870a2d8ef5aa31f8d0f3370f7b5baee369d56c14d50094061593820895,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x132ecd2ac18921c2bf93fa25705efc55a028ac277381d61ddc2964d90e4b1b7d,
-                c1: 0x07cb8e1067f0249fea57384457aa428310f9f389e8206219bdec48af133e5ccf,
-            },
-            c1: Fq2 {
-                c0: 0x2e05ca4dced66f3951a3627bd7092b3e50abd7ad12380d231f27b4fe1b8c1081,
-                c1: 0x26d7f859fede048d5d7259e96644f01878ea3b010cea4c2a699506495916285b,
-            },
-            c2: Fq2 {
-                c0: 0x24137579a8c915ec245765a2d9466d9775a397ef714e233c2833ea31b935a002,
-                c1: 0x081076c36be6a1301eab412b3846e8551763b7240d637aff1c8414d5eb9534b6,
-            },
-        },
-    },
-)
-circuit f: [
-    "2f6b5fb559355078b85623bc1314b30c03eee6d39fd0fa7a18c5dd7797610748",
-    "132ecd2ac18921c2bf93fa25705efc55a028ac277381d61ddc2964d90e4b1b7d",
-    "28f407b2820778fe3ddb941c30ddbfb4444519b3a040cdc957c9b53f26c11514",
-    "2e05ca4dced66f3951a3627bd7092b3e50abd7ad12380d231f27b4fe1b8c1081",
-    "23d1256f3e68b2a4e459411e579f7b9003cec25940f501f513f94dbdc258a815",
-    "24137579a8c915ec245765a2d9466d9775a397ef714e233c2833ea31b935a002",
-    "1677069d40821974bdf767cf3d07dc7dbd9aa24a700d847928f28b8506ae0ee2",
-    "7cb8e1067f0249fea57384457aa428310f9f389e8206219bdec48af133e5ccf",
-    "2d10f22fc47193dd4c87e6886adab94890ac7e7dc4797185db00bede7257f0a",
-    "26d7f859fede048d5d7259e96644f01878ea3b010cea4c2a699506495916285b",
-    "211c94870a2d8ef5aa31f8d0f3370f7b5baee369d56c14d50094061593820895",
-    "81076c36be6a1301eab412b3846e8551763b7240d637aff1c8414d5eb9534b6",
-]
-Using:
-advice columns: 26
-special lookup advice columns: 3
-fixed columns: 1
-lookup bits: 16
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 127100
-minimum rows used by an advice column: 127084
-total cells used: 3304378
-cells used in special lookup column: 375486
-maximum rows used by a fixed column: 124
-Suggestions:
-Have you tried using 26 advice columns?
-Have you tried using 3 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 21.984741416s
-Verify time: 9.293792ms
----------------------- degree = 16 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 480.439875ms
-Time elapsed in generating vkey: 9.627523708s
-test bn254::tests::bench_pairing has been running for over 60 seconds
-Time elapsed in generating pkey: 8.621027917s
-Time elapsed in filling circuit: 132.291µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x05fa25049d6fa380dd098e4db39cbc79ed45abb26bed7eec92affe6c56a7dfc0,
-                c1: 0x0f358196c1ec664b5dd6830ba49182f2105b80109d4ff41921c0d62c63d76c04,
-            },
-            c1: Fq2 {
-                c0: 0x04e06254d755c656aab953f287b774040901d00d2cb73217505b3b08035b3d24,
-                c1: 0x0c21194592f686402b84fed5a02784fc7cb276fb4f9bfd4402f403f528669395,
-            },
-            c2: Fq2 {
-                c0: 0x22e815ce69f4837104a94592dd4097c331f25b1622bc830196cd6f2edfd2975e,
-                c1: 0x10ca8d29eae1de72055333b9fafe49eda152f89f73538a61bc044fe764cb1e7a,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1ad38e7cceecc2cab831f9140255c1386c697ba7bdc922b6792266e1dd864ddb,
-                c1: 0x29b5248c5b7567b81792b7811def89f1a69c98bbbe8c5c85cbf3b420c48c071b,
-            },
-            c1: Fq2 {
-                c0: 0x08aef22052f4b02f02590fcd20fc3d4a6a08c28969a77f97710238a6f02c5858,
-                c1: 0x29b2268d4f95279da44634aa984d6a68bdc92769bdaf488ef2463baa04fc8aab,
-            },
-            c2: Fq2 {
-                c0: 0x27a760074bc589429b62fbe7c91632148fca433c5d8bd3aaf6d9b22e21d6811a,
-                c1: 0x1ac058c71295b3f6539fe4b0248f05da1e8c42c3c8e19ab635e4fe279142f350,
-            },
-        },
-    },
-)
-circuit f: [
-    "5fa25049d6fa380dd098e4db39cbc79ed45abb26bed7eec92affe6c56a7dfc0",
-    "1ad38e7cceecc2cab831f9140255c1386c697ba7bdc922b6792266e1dd864ddb",
-    "4e06254d755c656aab953f287b774040901d00d2cb73217505b3b08035b3d24",
-    "8aef22052f4b02f02590fcd20fc3d4a6a08c28969a77f97710238a6f02c5858",
-    "22e815ce69f4837104a94592dd4097c331f25b1622bc830196cd6f2edfd2975e",
-    "27a760074bc589429b62fbe7c91632148fca433c5d8bd3aaf6d9b22e21d6811a",
-    "f358196c1ec664b5dd6830ba49182f2105b80109d4ff41921c0d62c63d76c04",
-    "29b5248c5b7567b81792b7811def89f1a69c98bbbe8c5c85cbf3b420c48c071b",
-    "c21194592f686402b84fed5a02784fc7cb276fb4f9bfd4402f403f528669395",
-    "29b2268d4f95279da44634aa984d6a68bdc92769bdaf488ef2463baa04fc8aab",
-    "10ca8d29eae1de72055333b9fafe49eda152f89f73538a61bc044fe764cb1e7a",
-    "1ac058c71295b3f6539fe4b0248f05da1e8c42c3c8e19ab635e4fe279142f350",
-]
-Using:
-advice columns: 51
-special lookup advice columns: 6
-fixed columns: 1
-lookup bits: 15
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 64994
-minimum rows used by an advice column: 64975
-total cells used: 3314074
-cells used in special lookup column: 358398
-maximum rows used by a fixed column: 121
-Suggestions:
-Have you tried using 51 advice columns?
-Have you tried using 6 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 20.796019042s
-Verify time: 14.581125ms
----------------------- degree = 15 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 259.291208ms
-Time elapsed in generating vkey: 10.518665125s
-Time elapsed in generating pkey: 8.573772417s
-Time elapsed in filling circuit: 132.416µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x12667b6947e8758e8b58f334c77592aefcc7ed257ed4c5b89d06079a3767fa2c,
-                c1: 0x1d9d38b3ffae375807bf68c3ed7bc8721d31b48641913fc6b762f3c8aa8712c8,
-            },
-            c1: Fq2 {
-                c0: 0x001fbcce1b77e363c5b595e762b0ea8a97c40e0d7d773c7f5af88c458acabb36,
-                c1: 0x1a891a0f23f9b9afe9c825d227786fd0772cb1be65d98047b44bd84d34610ff4,
-            },
-            c2: Fq2 {
-                c0: 0x02cc1a3715d113e79d83d9e5a2f0c7c62d947ae0732e848c63af34c91b4dffdb,
-                c1: 0x25235641f460563a5a84b1d09bea2ae36c9da6e0658f4f69a3b260986fac0be6,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x24508435ac8500731c958875652476306507746838c6a6751ac7da75413bf448,
-                c1: 0x0fe0778ce2f8a314e97775fb7a6f597351058ee63b350a5dc475c008d44c59c8,
-            },
-            c1: Fq2 {
-                c0: 0x248ecfeb01938008b3f4358997c69e7b123e06505fe3b9e2a10a186848d4dc36,
-                c1: 0x02d4435962585d1b92b4ed87bf1629ec33ac31e89e0ad9c3900c485f4cc96ff5,
-            },
-            c2: Fq2 {
-                c0: 0x291792a23dcc866c0f4a807f19366ab587d62484fa0c8df5d79269661e880018,
-                c1: 0x203eed4d7cb2b3089f65a83773fa48e3a8c79305929c29788f32fab0bce17659,
-            },
-        },
-    },
-)
-circuit f: [
-    "12667b6947e8758e8b58f334c77592aefcc7ed257ed4c5b89d06079a3767fa2c",
-    "24508435ac8500731c958875652476306507746838c6a6751ac7da75413bf448",
-    "1fbcce1b77e363c5b595e762b0ea8a97c40e0d7d773c7f5af88c458acabb36",
-    "248ecfeb01938008b3f4358997c69e7b123e06505fe3b9e2a10a186848d4dc36",
-    "2cc1a3715d113e79d83d9e5a2f0c7c62d947ae0732e848c63af34c91b4dffdb",
-    "291792a23dcc866c0f4a807f19366ab587d62484fa0c8df5d79269661e880018",
-    "1d9d38b3ffae375807bf68c3ed7bc8721d31b48641913fc6b762f3c8aa8712c8",
-    "fe0778ce2f8a314e97775fb7a6f597351058ee63b350a5dc475c008d44c59c8",
-    "1a891a0f23f9b9afe9c825d227786fd0772cb1be65d98047b44bd84d34610ff4",
-    "2d4435962585d1b92b4ed87bf1629ec33ac31e89e0ad9c3900c485f4cc96ff5",
-    "25235641f460563a5a84b1d09bea2ae36c9da6e0658f4f69a3b260986fac0be6",
-    "203eed4d7cb2b3089f65a83773fa48e3a8c79305929c29788f32fab0bce17659",
-]
-Using:
-advice columns: 106
-special lookup advice columns: 14
-fixed columns: 1
-lookup bits: 14
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 32757
-minimum rows used by an advice column: 32734
-total cells used: 3470860
-cells used in special lookup column: 430980
-maximum rows used by a fixed column: 126
-Suggestions:
-Have you tried using 106 advice columns?
-Have you tried using 14 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 23.438619875s
-Verify time: 25.235459ms
----------------------- degree = 14 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 121.8535ms
-Time elapsed in generating vkey: 12.176505416s
-Time elapsed in generating pkey: 9.08229325s
-Time elapsed in filling circuit: 225.417µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x058742387deecf38a640a1d950f087c4133e9b04c28c64107651788816d19e17,
-                c1: 0x2d300ef28ee6280b7daa508ff4a2c802e4c74daf963dd4da66b25c43cc01a890,
-            },
-            c1: Fq2 {
-                c0: 0x11ccac753ce7c2f373544f01ff370629ba335688e35a7f7831a8d43c99dfe063,
-                c1: 0x1208b3837c8829bddc9abd52e38f07e6cf847edb4124f820162b570b754b307a,
-            },
-            c2: Fq2 {
-                c0: 0x0478c3cd6061ccb56cba3d7dd9468db138fa76d8032dd3aaf3b96c4dae3cdf0e,
-                c1: 0x2497ad50d789c263a47310f084003a9b85e53c9e4859fbf25f67bdba9297c5e2,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1014151950b5258db9fc15b68037b0a3945303a16c38f70d508c79bf948b8289,
-                c1: 0x2558754a750d53d5e33f7a199c4b539bf1628beaff1340b3a4f749d4a66ac2bf,
-            },
-            c1: Fq2 {
-                c0: 0x23c09080e8d84838d9a05c89c110a2d5788e18b1b67f2a8ac9b65584f4bf35dc,
-                c1: 0x172cee9f44a3964dd47ba733f384a2d4e92e44e21d54244adb89705c6abdb902,
-            },
-            c2: Fq2 {
-                c0: 0x2588706eaef78e17546dae15833ff04b2f0e728261134f7505bce4cca690c174,
-                c1: 0x0d9991fe897d2dc6b432bb9680658a197689bd50b2ce020af9d789b7b2893929,
-            },
-        },
-    },
-)
-circuit f: [
-    "58742387deecf38a640a1d950f087c4133e9b04c28c64107651788816d19e17",
-    "1014151950b5258db9fc15b68037b0a3945303a16c38f70d508c79bf948b8289",
-    "11ccac753ce7c2f373544f01ff370629ba335688e35a7f7831a8d43c99dfe063",
-    "23c09080e8d84838d9a05c89c110a2d5788e18b1b67f2a8ac9b65584f4bf35dc",
-    "478c3cd6061ccb56cba3d7dd9468db138fa76d8032dd3aaf3b96c4dae3cdf0e",
-    "2588706eaef78e17546dae15833ff04b2f0e728261134f7505bce4cca690c174",
-    "2d300ef28ee6280b7daa508ff4a2c802e4c74daf963dd4da66b25c43cc01a890",
-    "2558754a750d53d5e33f7a199c4b539bf1628beaff1340b3a4f749d4a66ac2bf",
-    "1208b3837c8829bddc9abd52e38f07e6cf847edb4124f820162b570b754b307a",
-    "172cee9f44a3964dd47ba733f384a2d4e92e44e21d54244adb89705c6abdb902",
-    "2497ad50d789c263a47310f084003a9b85e53c9e4859fbf25f67bdba9297c5e2",
-    "d9991fe897d2dc6b432bb9680658a197689bd50b2ce020af9d789b7b2893929",
-]
-Using:
-advice columns: 213
-special lookup advice columns: 26
-fixed columns: 1
-lookup bits: 13
-limb bits: 91
-num limbs: 3
-maximum rows used by an advice column: 16354
-minimum rows used by an advice column: 16329
-total cells used: 3480556
-cells used in special lookup column: 413892
-maximum rows used by a fixed column: 123
-Suggestions:
-Have you tried using 213 advice columns?
-Have you tried using 26 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 24.405766042s
-Verify time: 60.342208ms
----------------------- degree = 13 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 72.068125ms
-Time elapsed in generating vkey: 14.526949125s
-Time elapsed in generating pkey: 9.846653125s
-Time elapsed in filling circuit: 130.083µs
-optimal ate pairing:
-actual f: Gt(
-    Fq12 {
-        c0: Fq6 {
-            c0: Fq2 {
-                c0: 0x1638bb2426c4d48ca6700f3e4e2cc368a080453bfb24354f1cc2993d623adebb,
-                c1: 0x1794dbdefc6661d4912240a3c6af6f96a4b6ce94f8de3d12c1be1a19a39f7c2f,
-            },
-            c1: Fq2 {
-                c0: 0x03d44155bf3610456062d73e1d5acc91b6396336fac651c0a4eb09f1911a0402,
-                c1: 0x240fcaffc3feb0322791e710fd0d557c3c7c0f82afe1766d35c84ac06c2fb175,
-            },
-            c2: Fq2 {
-                c0: 0x0ed1775fab5830bf0b46b1ab2dba21c2daeeeef48e82ed177c617984a292de8a,
-                c1: 0x276a48d858ed9858dc3c74addae7746c867e82ec1550d76ca3be4c57ad5a04e6,
-            },
-        },
-        c1: Fq6 {
-            c0: Fq2 {
-                c0: 0x1f96d977e7d8e633b44eec985debfd5b687667dc54fd4fbca31f53f1dca5c5eb,
-                c1: 0x2bc6d16be77e9ceb4c077611a5570a53561e32ac82899d9cb390284bf060ab95,
-            },
-            c1: Fq2 {
-                c0: 0x0f7fb989442a1a81ca7656368988fe4a94f3a505d7d733b73afe723617e48481,
-                c1: 0x24d845ded32529b7caf59eb04951a6d8bf2fbe7960db5536286620d0b239847a,
-            },
-            c2: Fq2 {
-                c0: 0x118fa4c34c476ad206983f9cc1067fcfe9157113c1bf903a1ded48e3f6a0a171,
-                c1: 0x1a4768079c715ddce2adc1c1b13cb42b5c16a5d1c538362d0b940ba0cf428ee2,
-            },
-        },
-    },
-)
-circuit f: [
-    "1638bb2426c4d48ca6700f3e4e2cc368a080453bfb24354f1cc2993d623adebb",
-    "1f96d977e7d8e633b44eec985debfd5b687667dc54fd4fbca31f53f1dca5c5eb",
-    "3d44155bf3610456062d73e1d5acc91b6396336fac651c0a4eb09f1911a0402",
-    "f7fb989442a1a81ca7656368988fe4a94f3a505d7d733b73afe723617e48481",
-    "ed1775fab5830bf0b46b1ab2dba21c2daeeeef48e82ed177c617984a292de8a",
-    "118fa4c34c476ad206983f9cc1067fcfe9157113c1bf903a1ded48e3f6a0a171",
-    "1794dbdefc6661d4912240a3c6af6f96a4b6ce94f8de3d12c1be1a19a39f7c2f",
-    "2bc6d16be77e9ceb4c077611a5570a53561e32ac82899d9cb390284bf060ab95",
-    "240fcaffc3feb0322791e710fd0d557c3c7c0f82afe1766d35c84ac06c2fb175",
-    "24d845ded32529b7caf59eb04951a6d8bf2fbe7960db5536286620d0b239847a",
-    "276a48d858ed9858dc3c74addae7746c867e82ec1550d76ca3be4c57ad5a04e6",
-    "1a4768079c715ddce2adc1c1b13cb42b5c16a5d1c538362d0b940ba0cf428ee2",
-]
-Using:
-advice columns: 446
-special lookup advice columns: 60
-fixed columns: 1
-lookup bits: 12
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 8173
-minimum rows used by an advice column: 8147
-total cells used: 3637342
-cells used in special lookup column: 486474
-maximum rows used by a fixed column: 127
-Suggestions:
-Have you tried using 445 advice columns?
-Have you tried using 60 lookup columns?
-Have you tried using 1 fixed columns?
-Proving time: 28.738033583s
-Verify time: 104.588125ms
\ No newline at end of file
diff --git a/halo2-ecc/src/bn254/tests/ec_add.rs b/halo2-ecc/src/bn254/tests/ec_add.rs
index 08dc9fb1..30c52aa5 100644
--- a/halo2-ecc/src/bn254/tests/ec_add.rs
+++ b/halo2-ecc/src/bn254/tests/ec_add.rs
@@ -1,15 +1,19 @@
-use std::env::set_var;
 use std::fs;
-use std::{env::var, fs::File};
+use std::fs::File;
+use std::io::{BufRead, BufReader};
 
 use super::*;
-use crate::fields::FieldChip;
-use crate::halo2_proofs::halo2curves::{bn256::G2Affine, FieldExt};
+use crate::fields::{FieldChip, FpStrategy};
+use crate::halo2_proofs::halo2curves::bn256::G2Affine;
 use group::cofactor::CofactorCurveAffine;
-use halo2_base::SKIP_FIRST_PASS;
+use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+use halo2_base::gates::RangeChip;
+use halo2_base::utils::fs::gen_srs;
+use halo2_base::Context;
+use itertools::Itertools;
 use rand_core::OsRng;
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct CircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -22,270 +26,95 @@ struct CircuitParams {
     batch_size: usize,
 }
 
-#[derive(Clone, Debug)]
-struct Config<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-}
+fn g2_add_test<F: PrimeField>(ctx: &mut Context<F>, params: CircuitParams, _points: Vec<G2Affine>) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<F>::default(params.lookup_bits);
+    let fp_chip = FpChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+    let fp2_chip = Fp2Chip::<F>::new(&fp_chip);
+    let g2_chip = EccChip::new(&fp2_chip);
 
-impl<F: PrimeField> Config<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        Self { fp_chip, batch_size }
-    }
-}
+    let points = _points.iter().map(|pt| g2_chip.assign_point(ctx, *pt)).collect::<Vec<_>>();
 
-struct EcAddCircuit<F: PrimeField> {
-    points: Vec<Option<G2Affine>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
-}
+    let acc = g2_chip.sum::<G2Affine>(ctx, points.iter());
 
-impl<F: PrimeField> Default for EcAddCircuit<F> {
-    fn default() -> Self {
-        Self { points: vec![None; 100], batch_size: 100, _marker: PhantomData }
-    }
-}
-
-impl Circuit<Fr> for EcAddCircuit<Fr> {
-    type Config = Config<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            points: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
-        }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("EC_ADD_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/ec_add_circuit.config".to_string());
-        let params: CircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        Config::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.points.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-        let fp2_chip = Fp2Chip::<Fr>::construct(&config.fp_chip);
-        let g2_chip = EccChip::construct(fp2_chip.clone());
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "G2 add",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let display = self.points[0].is_some();
-                let points = self
-                    .points
-                    .iter()
-                    .cloned()
-                    .map(|pt| {
-                        g2_chip.assign_point(ctx, pt.map(Value::known).unwrap_or(Value::unknown()))
-                    })
-                    .collect::<Vec<_>>();
-
-                let acc = g2_chip.sum::<G2Affine>(ctx, points.iter());
-
-                #[cfg(feature = "display")]
-                if display {
-                    let answer = self
-                        .points
-                        .iter()
-                        .fold(G2Affine::identity(), |a, b| (a + b.unwrap()).to_affine());
-                    let x = fp2_chip.get_assigned_value(&acc.x);
-                    let y = fp2_chip.get_assigned_value(&acc.y);
-                    x.map(|x| assert_eq!(answer.x, x));
-                    y.map(|y| assert_eq!(answer.y, y));
-                }
-
-                config.fp_chip.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                if display {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+    let answer = _points.iter().fold(G2Affine::identity(), |a, b| (a + b).to_affine());
+    let x = fp2_chip.get_assigned_value(&acc.x);
+    let y = fp2_chip.get_assigned_value(&acc.y);
+    assert_eq!(answer.x, x);
+    assert_eq!(answer.y, y);
 }
 
 #[test]
 fn test_ec_add() {
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/ec_add_circuit.config");
-    set_var("EC_ADD_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .unwrap_or_else(|_| panic!("{folder:?} file should exist"));
-    let params: CircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
+    let path = "configs/bn254/ec_add_circuit.config";
+    let params: CircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
 
-    let mut rng = OsRng;
-
-    let mut points = Vec::new();
-    for _ in 0..params.batch_size {
-        let new_pt = Some(G2Affine::random(&mut rng));
-        points.push(new_pt);
-    }
+    let k = params.degree;
+    let points = (0..params.batch_size).map(|_| G2Affine::random(OsRng)).collect_vec();
 
-    let circuit =
-        EcAddCircuit::<Fr> { points, batch_size: params.batch_size, _marker: PhantomData };
+    let mut builder = GateThreadBuilder::<Fr>::mock();
+    g2_add_test(builder.main(0), params, points);
 
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    builder.config(k as usize, Some(20));
+    let circuit = RangeCircuitBuilder::mock(builder);
+    MockProver::run(k, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[test]
 fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
-    use std::io::BufRead;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_ec_add.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_ec_add.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
 
-    folder.push("results/ec_add_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let results_path = "results/bn254/ec_add_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
-
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
+    fs::create_dir_all("data").unwrap();
 
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: CircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
         let mut rng = OsRng;
 
-        {
-            folder.pop();
-            folder.push("configs/ec_add_circuit.tmp.config");
-            set_var("EC_ADD_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
         let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
+        let params = gen_srs(k);
         end_timer!(params_time);
 
-        let circuit = EcAddCircuit::<Fr> {
-            points: vec![None; bench_params.batch_size],
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
+        let start0 = start_timer!(|| "Witness generation for empty circuit");
+        let circuit = {
+            let points = vec![G2Affine::generator(); bench_params.batch_size];
+            let mut builder = GateThreadBuilder::<Fr>::keygen();
+            g2_add_test(builder.main(0), bench_params, points);
+            builder.config(k as usize, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         };
+        end_timer!(start0);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut points = Vec::new();
-        for _ in 0..bench_params.batch_size {
-            let new_pt = Some(G2Affine::random(&mut rng));
-            points.push(new_pt);
-        }
-
-        let proof_circuit = EcAddCircuit::<Fr> {
-            points,
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
 
         // create a proof
+        let points = (0..bench_params.batch_size).map(|_| G2Affine::random(&mut rng)).collect_vec();
         let proof_time = start_timer!(|| "Proving time");
+        let proof_circuit = {
+            let mut builder = GateThreadBuilder::<Fr>::prover();
+            g2_add_test(builder.main(0), bench_params, points);
+            builder.config(k as usize, Some(20));
+            RangeCircuitBuilder::prover(builder, break_points)
+        };
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -299,8 +128,8 @@ fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "ec_add_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/ec_add_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -309,27 +138,27 @@ fn bench_ec_add() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.limb_bits,
                 bench_params.num_limbs,
                 bench_params.batch_size,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("EC_ADD_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/bn254/tests/fixed_base_msm.rs b/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
index c7239d9d..f16560f4 100644
--- a/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
+++ b/halo2-ecc/src/bn254/tests/fixed_base_msm.rs
@@ -1,12 +1,30 @@
-use std::{env::var, fs::File};
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    sync::Mutex,
+};
 
 #[allow(unused_imports)]
 use crate::ecc::fixed_base::FixedEcPoint;
+use crate::fields::{FpStrategy, PrimeField};
 
 use super::*;
-use halo2_base::{halo2_proofs::halo2curves::bn256::G1, SKIP_FIRST_PASS};
-
-#[derive(Serialize, Deserialize, Debug)]
+#[allow(unused_imports)]
+use ff::PrimeField as _;
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    halo2_proofs::halo2curves::bn256::G1,
+    utils::fs::gen_srs,
+};
+use rand_core::OsRng;
+
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct MSMCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -21,274 +39,108 @@ struct MSMCircuitParams {
     clump_factor: usize,
 }
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
-    _radix: usize,
-    _clump_factor: usize,
-}
-
-impl<F: PrimeField> MSMConfig<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        _radix: usize,
-        _clump_factor: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, _radix, _clump_factor }
+fn fixed_base_msm_test(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let scalars_assigned = scalars
+        .iter()
+        .map(|scalar| vec![builder.main(0).load_witness(*scalar)])
+        .collect::<Vec<_>>();
+    drop(builder);
+
+    let msm = ecc_chip.fixed_base_msm(thread_pool, &bases, scalars_assigned, Fr::NUM_BITS as usize);
+
+    let mut elts: Vec<G1> = Vec::new();
+    for (base, scalar) in bases.iter().zip(scalars.iter()) {
+        elts.push(base * scalar);
     }
-}
+    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
 
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<G1Affine>,
-    scalars: Vec<Option<Fr>>,
-    _marker: PhantomData<F>,
+    let msm_x = msm.x.value;
+    let msm_y = msm.y.value;
+    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
+    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
 }
 
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: self.bases.clone(),
-            scalars: vec![None; self.scalars.len()],
-            _marker: PhantomData,
+fn random_fixed_base_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..params.batch_size).map(|_| (G1Affine::random(OsRng), Fr::random(OsRng))).unzip();
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    fixed_base_msm_test(&builder, params, bases, scalars);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("FIXED_MSM_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/fixed_msm_circuit.config".to_string());
-        let params: MSMCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            params.radix,
-            params.clump_factor,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "fixed base msm",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-                let witness_time = start_timer!(|| "Witness generation");
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config
-                        .fp_chip
-                        .range
-                        .gate
-                        .assign_witnesses(ctx, vec![scalar.map_or(Value::unknown(), Value::known)]);
-                    scalars_assigned.push(assignment);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-
-                // baseline
-                /*
-                let msm = {
-                    let sm = self.bases.iter().zip(scalars_assigned.iter()).map(|(base, scalar)|
-                        ecc_chip.fixed_base_scalar_mult(ctx, &FixedEcPoint::<Fr, G1Affine>::from_g1(base, config.fp_chip.num_limbs, config.fp_chip.limb_bits), scalar, Fr::NUM_BITS as usize, 4)).collect::<Vec<_>>();
-                    ecc_chip.sum::<G1Affine>(ctx, sm.iter())
-                };
-                */
-
-                let msm = ecc_chip.fixed_base_msm::<G1Affine>(
-                    ctx,
-                    &self.bases,
-                    &scalars_assigned,
-                    Fr::NUM_BITS as usize,
-                    config._radix,
-                    config._clump_factor,
-                );
-
-                config.fp_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                #[cfg(feature = "display")]
-                if self.scalars[0].is_some() {
-                    let mut elts: Vec<G1> = Vec::new();
-                    for (base, scalar) in self.bases.iter().zip(&self.scalars) {
-                        elts.push(base * biguint_to_fe::<Fr>(&fe_to_biguint(&scalar.unwrap())));
-                    }
-                    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
-
-                    let msm_x = value_to_option(msm.x.value).unwrap();
-                    let msm_y = value_to_option(msm.y.value).unwrap();
-                    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
-                    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
-                }
-
-                #[cfg(feature = "display")]
-                if self.scalars[0].is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
-#[cfg(test)]
 #[test]
 fn test_fixed_base_msm() {
-    use std::env::set_var;
-
-    use crate::halo2_proofs::arithmetic::Field;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/fixed_msm_circuit.config");
-    set_var("FIXED_MSM_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/fixed_msm_circuit.config file should exist");
-    let params: MSMCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = rand::thread_rng();
-
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..params.batch_size {
-        bases.push(G1Affine::random(&mut rng));
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-
-    let circuit = MSMCircuit::<Fr> { bases, scalars, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/fixed_msm_circuit.config";
+    let params: MSMCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_fixed_base_msm_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
-#[cfg(test)]
 #[test]
 fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
-    use std::{
-        env::{set_var, var},
-        fs,
-        io::BufRead,
-    };
-
-    use halo2_base::utils::fs::gen_srs;
-    use rand_core::OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_fixed_msm.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/fixed_msm_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_fixed_msm.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/fixed_msm_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: MSMCircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-        let mut rng = OsRng;
-
-        {
-            folder.pop();
-            folder.push("configs/fixed_msm_circuit.tmp.config");
-            set_var("FIXED_MSM_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params = gen_srs(bench_params.degree);
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
+        let rng = OsRng;
 
+        let params = gen_srs(k);
         println!("{bench_params:?}");
 
-        let mut bases = Vec::new();
-        let mut scalars = Vec::new();
-        for _idx in 0..bench_params.batch_size {
-            bases.push(G1Affine::random(&mut rng));
-
-            let new_scalar = Some(Fr::random(&mut rng));
-            scalars.push(new_scalar);
-        }
         let circuit =
-            MSMCircuit::<Fr> { bases, scalars: vec![None; scalars.len()], _marker: PhantomData };
+            random_fixed_base_msm_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
@@ -298,9 +150,15 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let circuit = MSMCircuit::<Fr> { scalars, ..circuit };
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit = random_fixed_base_msm_circuit(
+            bench_params,
+            CircuitBuilderStage::Prover,
+            Some(break_points),
+        );
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -308,14 +166,15 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            MSMCircuit<Fr>,
+            _,
         >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/
+                msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -324,27 +183,27 @@ fn bench_fixed_base_msm() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.limb_bits,
                 bench_params.num_limbs,
                 bench_params.batch_size,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("FIXED_MSM_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/bn254/tests/mod.rs b/halo2-ecc/src/bn254/tests/mod.rs
index 763bd127..b373d51e 100644
--- a/halo2-ecc/src/bn254/tests/mod.rs
+++ b/halo2-ecc/src/bn254/tests/mod.rs
@@ -1,34 +1,25 @@
 #![allow(non_snake_case)]
-use ark_std::{end_timer, start_timer};
-use group::Curve;
-use serde::{Deserialize, Serialize};
-use std::io::Write;
-use std::marker::PhantomData;
-
 use super::pairing::PairingChip;
 use super::*;
 use crate::halo2_proofs::{
-    circuit::{Layouter, SimpleFloorPlanner, Value},
     dev::MockProver,
     halo2curves::bn256::{pairing, Bn256, Fr, G1Affine},
     plonk::*,
-    poly::commitment::{Params, ParamsProver},
+    poly::commitment::ParamsProver,
     poly::kzg::{
-        commitment::{KZGCommitmentScheme, ParamsKZG},
+        commitment::KZGCommitmentScheme,
         multiopen::{ProverSHPLONK, VerifierSHPLONK},
         strategy::SingleStrategy,
     },
     transcript::{Blake2bRead, Blake2bWrite, Challenge255},
     transcript::{TranscriptReadBuffer, TranscriptWriterBuffer},
 };
-use crate::{ecc::EccChip, fields::fp::FpStrategy};
-use halo2_base::{
-    gates::GateInstructions,
-    utils::{biguint_to_fe, fe_to_biguint, value_to_option, PrimeField},
-    QuantumCell::Witness,
-};
-use num_bigint::BigUint;
-use num_traits::Num;
+use crate::{ecc::EccChip, fields::PrimeField};
+use ark_std::{end_timer, start_timer};
+use group::Curve;
+use halo2_base::utils::fe_to_biguint;
+use serde::{Deserialize, Serialize};
+use std::io::Write;
 
 pub mod ec_add;
 pub mod fixed_base_msm;
diff --git a/halo2-ecc/src/bn254/tests/msm.rs b/halo2-ecc/src/bn254/tests/msm.rs
index 4195c0f8..269c757c 100644
--- a/halo2-ecc/src/bn254/tests/msm.rs
+++ b/halo2-ecc/src/bn254/tests/msm.rs
@@ -1,11 +1,25 @@
-use std::{env::var, fs::File};
-
-use crate::halo2_proofs::arithmetic::FieldExt;
-use halo2_base::SKIP_FIRST_PASS;
+use crate::fields::FpStrategy;
+use ff::{Field, PrimeField};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    utils::fs::gen_srs,
+};
+use rand_core::OsRng;
+use std::{
+    fs::{self, File},
+    io::{BufRead, BufReader},
+    sync::Mutex,
+};
 
 use super::*;
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct MSMCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -19,346 +33,133 @@ struct MSMCircuitParams {
     window_bits: usize,
 }
 
-#[derive(Clone, Debug)]
-struct MSMConfig<F: PrimeField> {
-    fp_chip: FpChip<F>,
-    batch_size: usize,
+fn msm_test(
+    thread_pool: &Mutex<GateThreadBuilder<Fr>>,
+    params: MSMCircuitParams,
+    bases: Vec<G1Affine>,
+    scalars: Vec<Fr>,
     window_bits: usize,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<Fr>::default(params.lookup_bits);
+    let fp_chip = FpChip::<Fr>::new(&range, params.limb_bits, params.num_limbs);
+    let ecc_chip = EccChip::new(&fp_chip);
+
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(0);
+    let scalars_assigned =
+        scalars.iter().map(|scalar| vec![ctx.load_witness(*scalar)]).collect::<Vec<_>>();
+    let bases_assigned =
+        bases.iter().map(|base| ecc_chip.load_private(ctx, (base.x, base.y))).collect::<Vec<_>>();
+    drop(builder);
+
+    let msm = ecc_chip.variable_base_msm_in::<G1Affine>(
+        thread_pool,
+        &bases_assigned,
+        scalars_assigned,
+        Fr::NUM_BITS as usize,
+        window_bits,
+        0,
+    );
+
+    let msm_answer = bases
+        .iter()
+        .zip(scalars.iter())
+        .map(|(base, scalar)| base * scalar)
+        .reduce(|a, b| a + b)
+        .unwrap()
+        .to_affine();
+
+    let msm_x = msm.x.value;
+    let msm_y = msm.y.value;
+    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
+    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
 }
 
-impl<F: PrimeField> MSMConfig<F> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        batch_size: usize,
-        window_bits: usize,
-        context_id: usize,
-        k: usize,
-    ) -> Self {
-        let fp_chip = FpChip::<F>::configure(
-            meta,
-            strategy,
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            limb_bits,
-            num_limbs,
-            p,
-            context_id,
-            k,
-        );
-        MSMConfig { fp_chip, batch_size, window_bits }
-    }
-}
-
-struct MSMCircuit<F: PrimeField> {
-    bases: Vec<Option<G1Affine>>,
-    scalars: Vec<Option<Fr>>,
-    batch_size: usize,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField> Default for MSMCircuit<F> {
-    fn default() -> Self {
-        Self {
-            bases: vec![None; 10],
-            scalars: vec![None; 10],
-            batch_size: 10,
-            _marker: PhantomData,
+fn random_msm_circuit(
+    params: MSMCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+    let builder = Mutex::new(builder);
+
+    let (bases, scalars): (Vec<_>, Vec<_>) =
+        (0..params.batch_size).map(|_| (G1Affine::random(OsRng), Fr::random(OsRng))).unzip();
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    msm_test(&builder, params, bases, scalars, params.window_bits);
+
+    let builder = builder.into_inner().unwrap();
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
         }
-    }
-}
-
-impl Circuit<Fr> for MSMCircuit<Fr> {
-    type Config = MSMConfig<Fr>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self {
-            bases: vec![None; self.batch_size],
-            scalars: vec![None; self.batch_size],
-            batch_size: self.batch_size,
-            _marker: PhantomData,
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
         }
-    }
-
-    fn configure(meta: &mut ConstraintSystem<Fr>) -> Self::Config {
-        let path = var("MSM_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/msm_circuit.config".to_string());
-        let params: MSMCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        MSMConfig::<Fr>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            BigUint::from_str_radix(&Fq::MODULUS[2..], 16).unwrap(),
-            params.batch_size,
-            params.window_bits,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<Fr>,
-    ) -> Result<(), Error> {
-        assert_eq!(config.batch_size, self.scalars.len());
-        assert_eq!(config.batch_size, self.bases.len());
-
-        config.fp_chip.load_lookup_table(&mut layouter)?;
-
-        let mut first_pass = SKIP_FIRST_PASS;
-        layouter.assign_region(
-            || "MSM",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.fp_chip.new_context(region);
-                let ctx = &mut aux;
-
-                let witness_time = start_timer!(|| "Witness generation");
-                let mut scalars_assigned = Vec::new();
-                for scalar in &self.scalars {
-                    let assignment = config.fp_chip.range.gate.assign_region_smart(
-                        ctx,
-                        vec![Witness(scalar.map_or(Value::unknown(), Value::known))],
-                        vec![],
-                        vec![],
-                        vec![],
-                    );
-                    scalars_assigned.push(vec![assignment.last().unwrap().clone()]);
-                }
-
-                let ecc_chip = EccChip::construct(config.fp_chip.clone());
-                let mut bases_assigned = Vec::new();
-                for base in &self.bases {
-                    let base_assigned = ecc_chip.load_private(
-                        ctx,
-                        (
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.x))))
-                                .unwrap_or(Value::unknown()),
-                            base.map(|pt| Value::known(biguint_to_fe(&fe_to_biguint(&pt.y))))
-                                .unwrap_or(Value::unknown()),
-                        ),
-                    );
-                    bases_assigned.push(base_assigned);
-                }
-
-                let msm = ecc_chip.variable_base_msm::<G1Affine>(
-                    ctx,
-                    &bases_assigned,
-                    &scalars_assigned,
-                    254,
-                    config.window_bits,
-                );
-
-                ecc_chip.field_chip.finalize(ctx);
-                end_timer!(witness_time);
-
-                if self.scalars[0].is_some() {
-                    let mut elts = Vec::new();
-                    for (base, scalar) in self.bases.iter().zip(&self.scalars) {
-                        elts.push(base.unwrap() * scalar.unwrap());
-                    }
-                    let msm_answer = elts.into_iter().reduce(|a, b| a + b).unwrap().to_affine();
-
-                    let msm_x = value_to_option(msm.x.value).unwrap();
-                    let msm_y = value_to_option(msm.y.value).unwrap();
-                    assert_eq!(msm_x, fe_to_biguint(&msm_answer.x).into());
-                    assert_eq!(msm_y, fe_to_biguint(&msm_answer.y).into());
-                }
-
-                #[cfg(feature = "display")]
-                if self.bases[0].is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
-#[cfg(test)]
 #[test]
 fn test_msm() {
-    use std::env::set_var;
-
-    use crate::halo2_proofs::arithmetic::Field;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/msm_circuit.config");
-    set_var("MSM_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/msm_circuit.config file should exist");
-    let params: MSMCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = rand::thread_rng();
-
-    let mut bases = Vec::new();
-    let mut scalars = Vec::new();
-    for _ in 0..params.batch_size {
-        let new_pt = Some(G1Affine::random(&mut rng));
-        bases.push(new_pt);
-
-        let new_scalar = Some(Fr::random(&mut rng));
-        scalars.push(new_scalar);
-    }
-
-    let circuit =
-        MSMCircuit::<Fr> { bases, scalars, batch_size: params.batch_size, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/msm_circuit.config";
+    let params: MSMCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_msm_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
-#[cfg(test)]
 #[test]
 fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
-    use std::{env::set_var, fs, io::BufRead};
-
-    use rand_core::OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_msm.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/msm_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let config_path = "configs/bn254/bench_msm.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/msm_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,batch_size,window_bits,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: MSMCircuitParams = serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-        let mut rng = OsRng;
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
+        let rng = OsRng;
 
-        {
-            folder.pop();
-            folder.push("configs/msm_circuit.tmp.config");
-            set_var("MSM_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
-        end_timer!(params_time);
+        let params = gen_srs(k);
+        println!("{bench_params:?}");
 
-        let circuit = MSMCircuit::<Fr> {
-            bases: vec![None; bench_params.batch_size],
-            scalars: vec![None; bench_params.batch_size],
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
+        let circuit = random_msm_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
 
-        /*
-        let vk_size = {
-            folder.push(format!(
-                "msm_circuit_{}_{}_{}_{}_{}_{}_{}_{}_{}.vkey",
-                bench_params.degree,
-                bench_params.num_advice,
-                bench_params.num_lookup_advice,
-                bench_params.num_fixed,
-                bench_params.lookup_bits,
-                bench_params.limb_bits,
-                bench_params.num_limbs,
-                bench_params.batch_size,
-                bench_params.window_bits,
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            vk.write(&mut fd).unwrap();
-            fd.metadata().unwrap().len()
-        };
-        */
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut bases = Vec::new();
-        let mut scalars = Vec::new();
-        for _idx in 0..bench_params.batch_size {
-            let new_pt = Some(G1Affine::random(&mut rng));
-            bases.push(new_pt);
-
-            let new_scalar = Some(Fr::random(&mut rng));
-            scalars.push(new_scalar);
-        }
-
-        println!("{bench_params:?}");
-        let proof_circuit = MSMCircuit::<Fr> {
-            bases,
-            scalars,
-            batch_size: bench_params.batch_size,
-            _marker: PhantomData,
-        };
-
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit =
+            random_msm_circuit(bench_params, CircuitBuilderStage::Prover, Some(break_points));
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -366,14 +167,14 @@ fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            MSMCircuit<Fr>,
-        >(&params, &pk, &[proof_circuit], &[&[]], rng, &mut transcript)?;
+            _,
+        >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/msm_circuit_proof_{}_{}_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -383,29 +184,28 @@ fn bench_msm() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.num_limbs,
                 bench_params.batch_size,
                 bench_params.window_bits
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierSHPLONK<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
 
-        fs::remove_file(var("MSM_CONFIG").unwrap())?;
-
         writeln!(
             fs_results,
             "{},{},{},{},{},{},{},{},{},{:?},{},{:?}",
diff --git a/halo2-ecc/src/bn254/tests/pairing.rs b/halo2-ecc/src/bn254/tests/pairing.rs
index 20e5be89..e8194f58 100644
--- a/halo2-ecc/src/bn254/tests/pairing.rs
+++ b/halo2-ecc/src/bn254/tests/pairing.rs
@@ -1,14 +1,26 @@
 use std::{
-    env::{set_var, var},
     fs::{self, File},
+    io::{BufRead, BufReader},
 };
 
 use super::*;
-use crate::halo2_proofs::halo2curves::bn256::G2Affine;
-use halo2_base::SKIP_FIRST_PASS;
+use crate::fields::FieldChip;
+use crate::{fields::FpStrategy, halo2_proofs::halo2curves::bn256::G2Affine};
+use halo2_base::{
+    gates::{
+        builder::{
+            CircuitBuilderStage, GateThreadBuilder, MultiPhaseThreadBreakPoints,
+            RangeCircuitBuilder,
+        },
+        RangeChip,
+    },
+    halo2_proofs::poly::kzg::multiopen::{ProverGWC, VerifierGWC},
+    utils::fs::gen_srs,
+    Context,
+};
 use rand_core::OsRng;
 
-#[derive(Serialize, Deserialize)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 struct PairingCircuitParams {
     strategy: FpStrategy,
     degree: u32,
@@ -20,257 +32,111 @@ struct PairingCircuitParams {
     num_limbs: usize,
 }
 
-#[derive(Default)]
-struct PairingCircuit<F: PrimeField> {
-    P: Option<G1Affine>,
-    Q: Option<G2Affine>,
-    _marker: PhantomData<F>,
+fn pairing_test<F: PrimeField>(
+    ctx: &mut Context<F>,
+    params: PairingCircuitParams,
+    P: G1Affine,
+    Q: G2Affine,
+) {
+    std::env::set_var("LOOKUP_BITS", params.lookup_bits.to_string());
+    let range = RangeChip::<F>::default(params.lookup_bits);
+    let fp_chip = FpChip::<F>::new(&range, params.limb_bits, params.num_limbs);
+    let chip = PairingChip::new(&fp_chip);
+
+    let P_assigned = chip.load_private_g1(ctx, P);
+    let Q_assigned = chip.load_private_g2(ctx, Q);
+
+    // test optimal ate pairing
+    let f = chip.pairing(ctx, &Q_assigned, &P_assigned);
+
+    let actual_f = pairing(&P, &Q);
+    let fp12_chip = Fp12Chip::new(&fp_chip);
+    // cannot directly compare f and actual_f because `Gt` has private field `Fq12`
+    assert_eq!(format!("Gt({:?})", fp12_chip.get_assigned_value(&f)), format!("{actual_f:?}"));
 }
 
-impl<F: PrimeField> Circuit<F> for PairingCircuit<F> {
-    type Config = FpChip<F>;
-    type FloorPlanner = SimpleFloorPlanner; // V1;
-
-    fn without_witnesses(&self) -> Self {
-        Self::default()
-    }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        let path = var("PAIRING_CONFIG")
-            .unwrap_or_else(|_| "./src/bn254/configs/pairing_circuit.config".to_string());
-        let params: PairingCircuitParams = serde_json::from_reader(
-            File::open(&path).unwrap_or_else(|_| panic!("{path:?} file should exist")),
-        )
-        .unwrap();
-
-        PairingChip::<F>::configure(
-            meta,
-            params.strategy,
-            &[params.num_advice],
-            &[params.num_lookup_advice],
-            params.num_fixed,
-            params.lookup_bits,
-            params.limb_bits,
-            params.num_limbs,
-            0,
-            params.degree as usize,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        config.range.load_lookup_table(&mut layouter)?;
-        let chip = PairingChip::<F>::construct(&config);
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "pairing",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = config.new_context(region);
-                let ctx = &mut aux;
-
-                let P_assigned =
-                    chip.load_private_g1(ctx, self.P.map(Value::known).unwrap_or(Value::unknown()));
-                let Q_assigned =
-                    chip.load_private_g2(ctx, self.Q.map(Value::known).unwrap_or(Value::unknown()));
-
-                /*
-                // test miller loop without final exp
-                {
-                    let f = chip.miller_loop(ctx, &Q_assigned, &P_assigned)?;
-                    for fc in &f.coeffs {
-                        assert_eq!(fc.value, fc.truncation.to_bigint());
-                    }
-                    if self.P != None {
-                        let actual_f = multi_miller_loop(&[(
-                            &self.P.unwrap(),
-                            &G2Prepared::from_affine(self.Q.unwrap()),
-                        )]);
-                        let f_val: Vec<String> =
-                            f.coeffs.iter().map(|x| x.value.clone().unwrap().to_str_radix(16)).collect();
-                        println!("single miller loop:");
-                        println!("actual f: {:#?}", actual_f);
-                        println!("circuit f: {:#?}", f_val);
-                    }
-                }
-                */
-
-                // test optimal ate pairing
-                {
-                    let f = chip.pairing(ctx, &Q_assigned, &P_assigned);
-                    #[cfg(feature = "display")]
-                    for fc in &f.coeffs {
-                        assert_eq!(
-                            value_to_option(fc.value.clone()),
-                            value_to_option(fc.truncation.to_bigint(chip.fp_chip.limb_bits))
-                        );
-                    }
-                    #[cfg(feature = "display")]
-                    if self.P.is_some() {
-                        let actual_f = pairing(&self.P.unwrap(), &self.Q.unwrap());
-                        let f_val: Vec<String> = f
-                            .coeffs
-                            .iter()
-                            .map(|x| value_to_option(x.value.clone()).unwrap().to_str_radix(16))
-                            //.map(|x| x.to_bigint().clone().unwrap().to_str_radix(16))
-                            .collect();
-                        println!("optimal ate pairing:");
-                        println!("actual f: {actual_f:#?}");
-                        println!("circuit f: {f_val:#?}");
-                    }
-                }
-
-                // IMPORTANT: this copies cells to the lookup advice column to perform range check lookups
-                // This is not optional.
-                config.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                if self.P.is_some() {
-                    ctx.print_stats(&["Range"]);
-                }
-                Ok(())
-            },
-        )
-    }
+fn random_pairing_circuit(
+    params: PairingCircuitParams,
+    stage: CircuitBuilderStage,
+    break_points: Option<MultiPhaseThreadBreakPoints>,
+) -> RangeCircuitBuilder<Fr> {
+    let k = params.degree as usize;
+    let mut builder = match stage {
+        CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
+        CircuitBuilderStage::Prover => GateThreadBuilder::prover(),
+        CircuitBuilderStage::Keygen => GateThreadBuilder::keygen(),
+    };
+
+    let P = G1Affine::random(OsRng);
+    let Q = G2Affine::random(OsRng);
+
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
+    pairing_test::<Fr>(builder.main(0), params, P, Q);
+
+    let circuit = match stage {
+        CircuitBuilderStage::Mock => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::mock(builder)
+        }
+        CircuitBuilderStage::Keygen => {
+            builder.config(k, Some(20));
+            RangeCircuitBuilder::keygen(builder)
+        }
+        CircuitBuilderStage::Prover => RangeCircuitBuilder::prover(builder, break_points.unwrap()),
+    };
+    end_timer!(start0);
+    circuit
 }
 
 #[test]
 fn test_pairing() {
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-    folder.push("configs/pairing_circuit.config");
-    set_var("PAIRING_CONFIG", &folder);
-    let params_str = std::fs::read_to_string(folder.as_path())
-        .expect("src/bn254/configs/pairing_circuit.config file should exist");
-    let params: PairingCircuitParams = serde_json::from_str(params_str.as_str()).unwrap();
-    let k = params.degree;
-
-    let mut rng = OsRng;
-
-    let P = Some(G1Affine::random(&mut rng));
-    let Q = Some(G2Affine::random(&mut rng));
-
-    let circuit = PairingCircuit::<Fr> { P, Q, _marker: PhantomData };
-
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    let path = "configs/bn254/pairing_circuit.config";
+    let params: PairingCircuitParams = serde_json::from_reader(
+        File::open(path).unwrap_or_else(|e| panic!("{path} does not exist: {e:?}")),
+    )
+    .unwrap();
+
+    let circuit = random_pairing_circuit(params, CircuitBuilderStage::Mock, None);
+    MockProver::run(params.degree, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[test]
 fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
-    use std::io::BufRead;
-
-    use crate::halo2_proofs::poly::kzg::multiopen::{ProverGWC, VerifierGWC};
-
-    let mut rng = OsRng;
-
-    let mut folder = std::path::PathBuf::new();
-    folder.push("./src/bn254");
-
-    folder.push("configs/bench_pairing.config");
-    let bench_params_file = std::fs::File::open(folder.as_path())?;
-    folder.pop();
-    folder.pop();
-
-    folder.push("results/pairing_bench.csv");
-    let mut fs_results = std::fs::File::create(folder.as_path()).unwrap();
-    folder.pop();
-    folder.pop();
+    let rng = OsRng;
+    let config_path = "configs/bn254/bench_pairing.config";
+    let bench_params_file =
+        File::open(config_path).unwrap_or_else(|e| panic!("{config_path} does not exist: {e:?}"));
+    fs::create_dir_all("results/bn254").unwrap();
+    fs::create_dir_all("data").unwrap();
+
+    let results_path = "results/bn254/pairing_bench.csv";
+    let mut fs_results = File::create(results_path).unwrap();
     writeln!(fs_results, "degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time")?;
-    folder.push("data");
-    if !folder.is_dir() {
-        std::fs::create_dir(folder.as_path())?;
-    }
 
-    let mut params_folder = std::path::PathBuf::new();
-    params_folder.push("./params");
-    if !params_folder.is_dir() {
-        std::fs::create_dir(params_folder.as_path())?;
-    }
-
-    let bench_params_reader = std::io::BufReader::new(bench_params_file);
+    let bench_params_reader = BufReader::new(bench_params_file);
     for line in bench_params_reader.lines() {
         let bench_params: PairingCircuitParams =
             serde_json::from_str(line.unwrap().as_str()).unwrap();
-        println!(
-            "---------------------- degree = {} ------------------------------",
-            bench_params.degree
-        );
-
-        {
-            folder.pop();
-            folder.push("configs/pairing_circuit.tmp.config");
-            set_var("PAIRING_CONFIG", &folder);
-            let mut f = std::fs::File::create(folder.as_path())?;
-            write!(f, "{}", serde_json::to_string(&bench_params).unwrap())?;
-            folder.pop();
-            folder.pop();
-            folder.push("data");
-        }
-        let params_time = start_timer!(|| "Params construction");
-        let params = {
-            params_folder.push(format!("kzg_bn254_{}.srs", bench_params.degree));
-            let fd = std::fs::File::open(params_folder.as_path());
-            let params = if let Ok(mut f) = fd {
-                println!("Found existing params file. Reading params...");
-                ParamsKZG::<Bn256>::read(&mut f).unwrap()
-            } else {
-                println!("Creating new params file...");
-                let mut f = std::fs::File::create(params_folder.as_path())?;
-                let params = ParamsKZG::<Bn256>::setup(bench_params.degree, &mut rng);
-                params.write(&mut f).unwrap();
-                params
-            };
-            params_folder.pop();
-            params
-        };
+        let k = bench_params.degree;
+        println!("---------------------- degree = {k} ------------------------------",);
 
-        let circuit = PairingCircuit::<Fr>::default();
-        end_timer!(params_time);
+        let params = gen_srs(k);
+        let circuit = random_pairing_circuit(bench_params, CircuitBuilderStage::Keygen, None);
 
         let vk_time = start_timer!(|| "Generating vkey");
         let vk = keygen_vk(&params, &circuit)?;
         end_timer!(vk_time);
 
-        /*
-        let vk_size = {
-            folder.push(format!(
-                "pairing_circuit_{}_{}_{}_{}_{}_{}_{}.vkey",
-                bench_params.degree,
-                bench_params.num_advice,
-                bench_params.num_lookup_advice,
-                bench_params.num_fixed,
-                bench_params.lookup_bits,
-                bench_params.limb_bits,
-                bench_params.num_limbs
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            vk.write(&mut fd).unwrap();
-            fd.metadata().unwrap().len()
-        };
-        */
-
         let pk_time = start_timer!(|| "Generating pkey");
         let pk = keygen_pk(&params, vk, &circuit)?;
         end_timer!(pk_time);
 
-        let mut rng = OsRng;
-        let P = Some(G1Affine::random(&mut rng));
-        let Q = Some(G2Affine::random(&mut rng));
-        let proof_circuit = PairingCircuit::<Fr> { P, Q, _marker: PhantomData };
-
+        let break_points = circuit.0.break_points.take();
+        drop(circuit);
         // create a proof
         let proof_time = start_timer!(|| "Proving time");
+        let circuit =
+            random_pairing_circuit(bench_params, CircuitBuilderStage::Prover, Some(break_points));
         let mut transcript = Blake2bWrite::<_, _, Challenge255<_>>::init(vec![]);
         create_proof::<
             KZGCommitmentScheme<Bn256>,
@@ -278,14 +144,14 @@ fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
             Challenge255<G1Affine>,
             _,
             Blake2bWrite<Vec<u8>, G1Affine, Challenge255<G1Affine>>,
-            PairingCircuit<Fr>,
-        >(&params, &pk, &[proof_circuit], &[&[]], rng, &mut transcript)?;
+            _,
+        >(&params, &pk, &[circuit], &[&[]], rng, &mut transcript)?;
         let proof = transcript.finalize();
         end_timer!(proof_time);
 
         let proof_size = {
-            folder.push(format!(
-                "pairing_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
+            let path = format!(
+                "data/pairing_circuit_proof_{}_{}_{}_{}_{}_{}_{}.data",
                 bench_params.degree,
                 bench_params.num_advice,
                 bench_params.num_lookup_advice,
@@ -293,27 +159,27 @@ fn bench_pairing() -> Result<(), Box<dyn std::error::Error>> {
                 bench_params.lookup_bits,
                 bench_params.limb_bits,
                 bench_params.num_limbs
-            ));
-            let mut fd = std::fs::File::create(folder.as_path()).unwrap();
-            folder.pop();
-            fd.write_all(&proof).unwrap();
-            fd.metadata().unwrap().len()
+            );
+            let mut fd = File::create(&path)?;
+            fd.write_all(&proof)?;
+            let size = fd.metadata().unwrap().len();
+            fs::remove_file(path)?;
+            size
         };
 
         let verify_time = start_timer!(|| "Verify time");
         let verifier_params = params.verifier_params();
         let strategy = SingleStrategy::new(&params);
         let mut transcript = Blake2bRead::<_, _, Challenge255<_>>::init(&proof[..]);
-        assert!(verify_proof::<
+        verify_proof::<
             KZGCommitmentScheme<Bn256>,
             VerifierGWC<'_, Bn256>,
             Challenge255<G1Affine>,
             Blake2bRead<&[u8], G1Affine, Challenge255<G1Affine>>,
             SingleStrategy<'_, Bn256>,
         >(verifier_params, pk.get_vk(), strategy, &[&[]], &mut transcript)
-        .is_ok());
+        .unwrap();
         end_timer!(verify_time);
-        fs::remove_file(var("PAIRING_CONFIG").unwrap())?;
 
         writeln!(
             fs_results,
diff --git a/halo2-ecc/src/ecc/ecdsa.rs b/halo2-ecc/src/ecc/ecdsa.rs
index 005f5c39..874c185f 100644
--- a/halo2-ecc/src/ecc/ecdsa.rs
+++ b/halo2-ecc/src/ecc/ecdsa.rs
@@ -1,10 +1,9 @@
 use crate::bigint::{big_less_than, CRTInteger};
-use crate::fields::{fp::FpConfig, FieldChip};
+use crate::fields::{fp::FpChip, FieldChip, PrimeField};
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{modulus, CurveAffineExt, PrimeField},
+    utils::CurveAffineExt,
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 
 use super::fixed_base;
@@ -14,25 +13,21 @@ use super::{ec_add_unequal, scalar_multiply, EcPoint};
 // p = coordinate field modulus
 // n = scalar field modulus
 // Only valid when p is very close to n in size (e.g. for Secp256k1)
-pub fn ecdsa_verify_no_pubkey_check<'v, F: PrimeField, CF: PrimeField, SF: PrimeField, GA>(
-    base_chip: &FpConfig<F, CF>,
-    ctx: &mut Context<'v, F>,
-    pubkey: &EcPoint<F, <FpConfig<F, CF> as FieldChip<F>>::FieldPoint<'v>>,
-    r: &CRTInteger<'v, F>,
-    s: &CRTInteger<'v, F>,
-    msghash: &CRTInteger<'v, F>,
+pub fn ecdsa_verify_no_pubkey_check<F: PrimeField, CF: PrimeField, SF: PrimeField, GA>(
+    base_chip: &FpChip<F, CF>,
+    ctx: &mut Context<F>,
+    pubkey: &EcPoint<F, <FpChip<F, CF> as FieldChip<F>>::FieldPoint>,
+    r: &CRTInteger<F>,
+    s: &CRTInteger<F>,
+    msghash: &CRTInteger<F>,
     var_window_bits: usize,
     fixed_window_bits: usize,
-) -> AssignedValue<'v, F>
+) -> AssignedValue<F>
 where
     GA: CurveAffineExt<Base = CF, ScalarExt = SF>,
 {
-    let scalar_chip = FpConfig::<F, SF>::construct(
-        base_chip.range.clone(),
-        base_chip.limb_bits,
-        base_chip.num_limbs,
-        modulus::<SF>(),
-    );
+    let scalar_chip =
+        FpChip::<F, SF>::new(base_chip.range, base_chip.limb_bits, base_chip.num_limbs);
     let n = scalar_chip.load_constant(ctx, scalar_chip.p.to_biguint().unwrap());
 
     // check r,s are in [1, n - 1]
@@ -50,7 +45,7 @@ where
         base_chip,
         ctx,
         &GA::generator(),
-        &u1.truncation.limbs,
+        u1.truncation.limbs.clone(),
         base_chip.limb_bits,
         fixed_window_bits,
     );
@@ -58,7 +53,7 @@ where
         base_chip,
         ctx,
         pubkey,
-        &u2.truncation.limbs,
+        u2.truncation.limbs.clone(),
         base_chip.limb_bits,
         var_window_bits,
     );
@@ -69,7 +64,7 @@ where
     // coordinates of u1_mul and u2_mul are in proper bigint form, and lie in but are not constrained to [0, n)
     // we therefore need hard inequality here
     let u1_u2_x_eq = base_chip.is_equal(ctx, &u1_mul.x, &u2_mul.x);
-    let u1_u2_not_neg = base_chip.range.gate().not(ctx, Existing(&u1_u2_x_eq));
+    let u1_u2_not_neg = base_chip.range.gate().not(ctx, u1_u2_x_eq);
 
     // compute (x1, y1) = u1 * G + u2 * pubkey and check (r mod n) == x1 as integers
     // WARNING: For optimization reasons, does not reduce x1 mod n, which is
@@ -98,10 +93,10 @@ where
     );
 
     // check (r in [1, n - 1]) and (s in [1, n - 1]) and (u1_mul != - u2_mul) and (r == x1 mod n)
-    let res1 = base_chip.range.gate().and(ctx, Existing(&r_valid), Existing(&s_valid));
-    let res2 = base_chip.range.gate().and(ctx, Existing(&res1), Existing(&u1_small));
-    let res3 = base_chip.range.gate().and(ctx, Existing(&res2), Existing(&u2_small));
-    let res4 = base_chip.range.gate().and(ctx, Existing(&res3), Existing(&u1_u2_not_neg));
-    let res5 = base_chip.range.gate().and(ctx, Existing(&res4), Existing(&equal_check));
+    let res1 = base_chip.gate().and(ctx, r_valid, s_valid);
+    let res2 = base_chip.gate().and(ctx, res1, u1_small);
+    let res3 = base_chip.gate().and(ctx, res2, u2_small);
+    let res4 = base_chip.gate().and(ctx, res3, u1_u2_not_neg);
+    let res5 = base_chip.gate().and(ctx, res4, equal_check);
     res5
 }
diff --git a/halo2-ecc/src/ecc/fixed_base.rs b/halo2-ecc/src/ecc/fixed_base.rs
index 4b9bedb6..440f6993 100644
--- a/halo2-ecc/src/ecc/fixed_base.rs
+++ b/halo2-ecc/src/ecc/fixed_base.rs
@@ -3,17 +3,18 @@ use super::{ec_add_unequal, ec_select, ec_select_from_bits, EcPoint, EccChip};
 use crate::halo2_proofs::arithmetic::CurveAffine;
 use crate::{
     bigint::{CRTInteger, FixedCRTInteger},
-    fields::{PrimeFieldChip, Selectable},
+    fields::{PrimeField, PrimeFieldChip, Selectable},
 };
 use group::Curve;
+use halo2_base::gates::builder::GateThreadBuilder;
 use halo2_base::{
-    gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, CurveAffineExt, PrimeField},
+    gates::GateInstructions,
+    utils::{fe_to_biguint, CurveAffineExt},
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use itertools::Itertools;
-use num_bigint::BigUint;
+use rayon::prelude::*;
+use std::sync::Mutex;
 use std::{cmp::min, marker::PhantomData};
 
 // this only works for curves GA with base field of prime order
@@ -39,41 +40,12 @@ where
         Self::construct(x, y)
     }
 
-    pub fn assign<'v, FC>(
-        self,
-        chip: &FC,
-        ctx: &mut Context<'_, F>,
-        native_modulus: &BigUint,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign<FC>(self, chip: &FC, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
     where
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>,
     {
-        let assigned_x = self.x.assign(chip.range().gate(), ctx, chip.limb_bits(), native_modulus);
-        let assigned_y = self.y.assign(chip.range().gate(), ctx, chip.limb_bits(), native_modulus);
-        EcPoint::construct(assigned_x, assigned_y)
-    }
-
-    pub fn assign_without_caching<'v, FC>(
-        self,
-        chip: &FC,
-        ctx: &mut Context<'_, F>,
-        native_modulus: &BigUint,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
-    where
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>,
-    {
-        let assigned_x = self.x.assign_without_caching(
-            chip.range().gate(),
-            ctx,
-            chip.limb_bits(),
-            native_modulus,
-        );
-        let assigned_y = self.y.assign_without_caching(
-            chip.range().gate(),
-            ctx,
-            chip.limb_bits(),
-            native_modulus,
-        );
+        let assigned_x = self.x.assign(ctx, chip.limb_bits(), chip.native_modulus());
+        let assigned_y = self.y.assign(ctx, chip.limb_bits(), chip.native_modulus());
         EcPoint::construct(assigned_x, assigned_y)
     }
 }
@@ -86,27 +58,27 @@ where
 // - `scalar_i < 2^{max_bits} for all i` (constrained by num_to_bits)
 // - `max_bits <= modulus::<F>.bits()`
 
-pub fn scalar_multiply<'v, F, FC, C>(
+pub fn scalar_multiply<F, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     point: &C,
-    scalar: &[AssignedValue<'v, F>],
+    scalar: Vec<AssignedValue<F>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     C: CurveAffineExt,
     C::Base: PrimeField,
-    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-        + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
 {
     if point.is_identity().into() {
         let point = FixedEcPoint::from_curve(*point, chip.num_limbs(), chip.limb_bits());
-        return FixedEcPoint::assign(point, chip, ctx, chip.native_modulus());
+        return FixedEcPoint::assign(point, chip, ctx);
     }
-    assert!(!scalar.is_empty());
-    assert!((max_bits as u32) <= F::NUM_BITS);
+    debug_assert!(!scalar.is_empty());
+    debug_assert!((max_bits as u32) <= F::NUM_BITS);
 
     let total_bits = max_bits * scalar.len();
     let num_windows = (total_bits + window_bits - 1) / window_bits;
@@ -142,12 +114,12 @@ where
         .into_iter()
         .map(|point| {
             let point = FixedEcPoint::from_curve(point, chip.num_limbs(), chip.limb_bits());
-            FixedEcPoint::assign(point, chip, ctx, chip.native_modulus())
+            FixedEcPoint::assign(point, chip, ctx)
         })
         .collect_vec();
 
     let bits = scalar
-        .iter()
+        .into_iter()
         .flat_map(|scalar_chunk| chip.gate().num_to_bits(ctx, scalar_chunk, max_bits))
         .collect::<Vec<_>>();
 
@@ -155,29 +127,24 @@ where
     let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
     let mut curr_point = None;
     // `is_started` is just a way to deal with if `curr_point` is actually identity
-    let mut is_started = chip.gate().load_zero(ctx);
+    let mut is_started = ctx.load_zero();
     for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
-        let bit_sum = chip.gate().sum(ctx, bit_window.iter().map(Existing));
+        let bit_sum = chip.gate().sum(ctx, bit_window.iter().copied());
         // are we just adding a window of all 0s? if so, skip
-        let is_zero_window = chip.gate().is_zero(ctx, &bit_sum);
+        let is_zero_window = chip.gate().is_zero(ctx, bit_sum);
         let add_point = ec_select_from_bits::<F, _>(chip, ctx, cached_point_window, bit_window);
         curr_point = if let Some(curr_point) = curr_point {
             let sum = ec_add_unequal(chip, ctx, &curr_point, &add_point, false);
-            let zero_sum = ec_select(chip, ctx, &curr_point, &sum, &is_zero_window);
-            Some(ec_select(chip, ctx, &zero_sum, &add_point, &is_started))
+            let zero_sum = ec_select(chip, ctx, &curr_point, &sum, is_zero_window);
+            Some(ec_select(chip, ctx, &zero_sum, &add_point, is_started))
         } else {
             Some(add_point)
         };
         is_started = {
             // is_started || !is_zero_window
             // (a || !b) = (1-b) + a*b
-            let not_zero_window = chip.gate().not(ctx, Existing(&is_zero_window));
-            chip.gate().mul_add(
-                ctx,
-                Existing(&is_started),
-                Existing(&is_zero_window),
-                Existing(&not_zero_window),
-            )
+            let not_zero_window = chip.gate().not(ctx, is_zero_window);
+            chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
         };
     }
     curr_point.unwrap()
@@ -185,20 +152,20 @@ where
 
 // basically just adding up individual fixed_base::scalar_multiply except that we do all batched normalization of cached points at once to further save inversion time during witness generation
 // we also use the random accumulator for some extra efficiency (which also works in scalar multiply case but that is TODO)
-pub fn msm<'v, F, FC, C>(
+pub fn msm<F, FC, C>(
     chip: &EccChip<F, FC>,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     C: CurveAffineExt,
     C::Base: PrimeField,
-    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-        + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
 {
     assert!((max_scalar_bits_per_cell as u32) <= F::NUM_BITS);
     let scalar_len = scalars[0].len();
@@ -242,16 +209,16 @@ where
         .map(|point| {
             let point =
                 FixedEcPoint::from_curve(point, field_chip.num_limbs(), field_chip.limb_bits());
-            point.assign_without_caching(field_chip, ctx, field_chip.native_modulus())
+            point.assign(field_chip, ctx)
         })
         .collect_vec();
 
     let bits = scalars
-        .iter()
+        .into_iter()
         .flat_map(|scalar| {
             assert_eq!(scalar.len(), scalar_len);
             scalar
-                .iter()
+                .into_iter()
                 .flat_map(|scalar_chunk| {
                     field_chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell)
                 })
@@ -269,32 +236,26 @@ where
             let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
             let mut curr_point = None;
             // `is_started` is just a way to deal with if `curr_point` is actually identity
-            let mut is_started = field_chip.gate().load_zero(ctx);
+            let mut is_started = ctx.load_zero();
             for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
                 let is_zero_window = {
-                    let sum = field_chip.gate().sum(ctx, bit_window.iter().map(Existing));
-                    field_chip.gate().is_zero(ctx, &sum)
+                    let sum = field_chip.gate().sum(ctx, bit_window.iter().copied());
+                    field_chip.gate().is_zero(ctx, sum)
                 };
                 let add_point =
                     ec_select_from_bits::<F, _>(field_chip, ctx, cached_point_window, bit_window);
                 curr_point = if let Some(curr_point) = curr_point {
                     let sum = ec_add_unequal(field_chip, ctx, &curr_point, &add_point, false);
-                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, &is_zero_window);
-                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, &is_started))
+                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, is_zero_window);
+                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, is_started))
                 } else {
                     Some(add_point)
                 };
                 is_started = {
                     // is_started || !is_zero_window
                     // (a || !b) = (1-b) + a*b
-                    let not_zero_window =
-                        field_chip.range().gate().not(ctx, Existing(&is_zero_window));
-                    field_chip.range().gate().mul_add(
-                        ctx,
-                        Existing(&is_started),
-                        Existing(&is_zero_window),
-                        Existing(&not_zero_window),
-                    )
+                    let not_zero_window = field_chip.gate().not(ctx, is_zero_window);
+                    field_chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
                 };
             }
             curr_point.unwrap()
@@ -302,3 +263,122 @@ where
         .collect_vec();
     chip.sum::<C>(ctx, sm.iter())
 }
+
+pub fn msm_par<F, FC, C>(
+    chip: &EccChip<F, FC>,
+    thread_pool: &Mutex<GateThreadBuilder<F>>,
+    points: &[C],
+    scalars: Vec<Vec<AssignedValue<F>>>,
+    max_scalar_bits_per_cell: usize,
+    window_bits: usize,
+    phase: usize,
+) -> EcPoint<F, FC::FieldPoint>
+where
+    F: PrimeField,
+    C: CurveAffineExt,
+    C::Base: PrimeField,
+    FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+        + Selectable<F, Point = FC::FieldPoint>,
+{
+    assert!((max_scalar_bits_per_cell as u32) <= F::NUM_BITS);
+    let scalar_len = scalars[0].len();
+    let total_bits = max_scalar_bits_per_cell * scalar_len;
+    let num_windows = (total_bits + window_bits - 1) / window_bits;
+
+    // `cached_points` is a flattened 2d vector
+    // first we compute all cached points in Jacobian coordinates since it's fastest
+    let cached_points_jacobian = points
+        .par_iter()
+        .flat_map(|point| {
+            let base_pt = point.to_curve();
+            // cached_points[idx][i * 2^w + j] holds `[j * 2^(i * w)] * points[idx]` for j in {0, ..., 2^w - 1}
+            let mut increment = base_pt;
+            (0..num_windows)
+                .flat_map(|i| {
+                    let mut curr = increment;
+                    let cache_vec = std::iter::once(increment)
+                        .chain((1..(1usize << min(window_bits, total_bits - i * window_bits))).map(
+                            |_| {
+                                let prev = curr;
+                                curr += increment;
+                                prev
+                            },
+                        ))
+                        .collect_vec();
+                    increment = curr;
+                    cache_vec
+                })
+                .collect_vec()
+        })
+        .collect::<Vec<_>>();
+    // for use in circuits we need affine coordinates, so we do a batch normalize: this is much more efficient than calling `to_affine` one by one since field inversion is very expensive
+    // initialize to all 0s
+    let mut cached_points_affine = vec![C::default(); cached_points_jacobian.len()];
+    C::Curve::batch_normalize(&cached_points_jacobian, &mut cached_points_affine);
+
+    let field_chip = chip.field_chip();
+    let witness_gen_only = thread_pool.lock().unwrap().witness_gen_only();
+
+    let (new_threads, scalar_mults): (Vec<_>, Vec<_>) = cached_points_affine
+        .par_chunks(cached_points_affine.len() / points.len())
+        .zip(scalars.into_par_iter())
+        .map(|(cached_points, scalar)| {
+            let thread_id = thread_pool.lock().unwrap().get_new_thread_id();
+            // thread_pool should be unlocked now
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+
+            let cached_points = cached_points
+                .iter()
+                .map(|point| {
+                    let point = FixedEcPoint::from_curve(
+                        *point,
+                        field_chip.num_limbs(),
+                        field_chip.limb_bits(),
+                    );
+                    point.assign(field_chip, ctx)
+                })
+                .collect_vec();
+            let cached_point_window_rev =
+                cached_points.chunks(1usize << window_bits).into_iter().rev();
+
+            debug_assert_eq!(scalar.len(), scalar_len);
+            let bits = scalar
+                .into_iter()
+                .flat_map(|scalar_chunk| {
+                    field_chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell)
+                })
+                .collect_vec();
+            let bit_window_rev = bits.chunks(window_bits).into_iter().rev();
+            let mut curr_point = None;
+            // `is_started` is just a way to deal with if `curr_point` is actually identity
+            let mut is_started = ctx.load_zero();
+            for (cached_point_window, bit_window) in cached_point_window_rev.zip(bit_window_rev) {
+                let is_zero_window = {
+                    let sum = field_chip.gate().sum(ctx, bit_window.iter().copied());
+                    field_chip.gate().is_zero(ctx, sum)
+                };
+                let add_point =
+                    ec_select_from_bits::<F, _>(field_chip, ctx, cached_point_window, bit_window);
+                curr_point = if let Some(curr_point) = curr_point {
+                    let sum = ec_add_unequal(field_chip, ctx, &curr_point, &add_point, false);
+                    let zero_sum = ec_select(field_chip, ctx, &curr_point, &sum, is_zero_window);
+                    Some(ec_select(field_chip, ctx, &zero_sum, &add_point, is_started))
+                } else {
+                    Some(add_point)
+                };
+                is_started = {
+                    // is_started || !is_zero_window
+                    // (a || !b) = (1-b) + a*b
+                    let not_zero_window = field_chip.gate().not(ctx, is_zero_window);
+                    field_chip.gate().mul_add(ctx, is_started, is_zero_window, not_zero_window)
+                };
+            }
+            (thread, curr_point.unwrap())
+        })
+        .unzip();
+    let mut builder = thread_pool.lock().unwrap();
+    builder.threads[phase].extend(new_threads);
+    let ctx = builder.main(phase);
+    chip.sum::<C>(ctx, scalar_mults.iter())
+}
diff --git a/halo2-ecc/src/ecc/fixed_base_pippenger.rs b/halo2-ecc/src/ecc/fixed_base_pippenger.rs
index 1e36bfd1..05d7cf3e 100644
--- a/halo2-ecc/src/ecc/fixed_base_pippenger.rs
+++ b/halo2-ecc/src/ecc/fixed_base_pippenger.rs
@@ -20,14 +20,14 @@ use rand_chacha::ChaCha20Rng;
 // Output:
 // * new_points: length `points.len() * radix`
 // * new_bool_scalars: 2d array `ceil(scalar_bits / radix)` by `points.len() * radix`
-pub fn decompose<'v, F, C>(
+pub fn decompose<F, C>(
     gate: &impl GateInstructions<F>,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &Vec<Vec<AssignedValue<'v, F>>>,
+    scalars: &Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     radix: usize,
-) -> (Vec<C::Curve>, Vec<Vec<AssignedValue<'v, F>>>)
+) -> (Vec<C::Curve>, Vec<Vec<AssignedValue<F>>>)
 where
     F: PrimeField,
     C: CurveAffine,
@@ -66,15 +66,15 @@ where
 // Given points[i] and bool_scalars[j][i],
 // compute G'[j] = sum_{i=0..points.len()} points[i] * bool_scalars[j][i]
 // output is [ G'[j] + rand_point ]_{j=0..bool_scalars.len()}, rand_point
-pub fn multi_product<'v, F: PrimeField, FC, C>(
+pub fn multi_product<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: Vec<C::CurveExt>,
-    bool_scalars: Vec<Vec<AssignedValue<'v, F>>>,
+    bool_scalars: Vec<Vec<AssignedValue<F>>>,
     clumping_factor: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, EcPoint<F, FC::FieldPoint<'v>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, EcPoint<F, FC::FieldPoint>)
 where
-    FC: PrimeFieldChip<F, FieldPoint<'v> = CRTInteger<'v, F>>,
+    FC: PrimeFieldChip<F, FieldPoint = CRTInteger<F>>,
     FC::FieldType: PrimeField,
     C: CurveAffine<Base = FC::FieldType>,
 {
@@ -187,17 +187,17 @@ where
     (acc, rand_point)
 }
 
-pub fn multi_exp<'v, F: PrimeField, FC, C>(
+pub fn multi_exp<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
+    ctx: &mut Context<F>,
     points: &[C],
-    scalars: &Vec<Vec<AssignedValue<'v, F>>>,
+    scalars: &Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
     radix: usize,
     clump_factor: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: PrimeFieldChip<F, FieldPoint<'v> = CRTInteger<'v, F>>,
+    FC: PrimeFieldChip<F, FieldPoint = CRTInteger<F>>,
     FC::FieldType: PrimeField,
     C: CurveAffine<Base = FC::FieldType>,
 {
diff --git a/halo2-ecc/src/ecc/mod.rs b/halo2-ecc/src/ecc/mod.rs
index 7e8baf80..6b1c6655 100644
--- a/halo2-ecc/src/ecc/mod.rs
+++ b/halo2-ecc/src/ecc/mod.rs
@@ -1,18 +1,19 @@
 #![allow(non_snake_case)]
 use crate::bigint::CRTInteger;
-use crate::fields::{fp::FpConfig, FieldChip, PrimeFieldChip, Selectable};
-use crate::halo2_proofs::{arithmetic::CurveAffine, circuit::Value};
+use crate::fields::{fp::FpChip, FieldChip, PrimeField, PrimeFieldChip, Selectable};
+use crate::halo2_proofs::arithmetic::CurveAffine;
 use group::{Curve, Group};
+use halo2_base::gates::builder::GateThreadBuilder;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{modulus, CurveAffineExt, PrimeField},
+    utils::{modulus, CurveAffineExt},
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use itertools::Itertools;
 use rand::SeedableRng;
 use rand_chacha::ChaCha20Rng;
 use std::marker::PhantomData;
+use std::sync::Mutex;
 
 pub mod ecdsa;
 pub mod fixed_base;
@@ -21,7 +22,7 @@ pub mod pippenger;
 
 // EcPoint and EccChip take in a generic `FieldChip` to implement generic elliptic curve operations on arbitrary field extensions (provided chip exists) for short Weierstrass curves (currently further assuming a4 = 0 for optimization purposes)
 #[derive(Debug)]
-pub struct EcPoint<F: PrimeField, FieldPoint: Clone> {
+pub struct EcPoint<F: PrimeField, FieldPoint> {
     pub x: FieldPoint,
     pub y: FieldPoint,
     _marker: PhantomData<F>,
@@ -33,7 +34,7 @@ impl<F: PrimeField, FieldPoint: Clone> Clone for EcPoint<F, FieldPoint> {
     }
 }
 
-impl<F: PrimeField, FieldPoint: Clone> EcPoint<F, FieldPoint> {
+impl<F: PrimeField, FieldPoint> EcPoint<F, FieldPoint> {
     pub fn construct(x: FieldPoint, y: FieldPoint) -> Self {
         Self { x, y, _marker: PhantomData }
     }
@@ -57,18 +58,18 @@ impl<F: PrimeField, FieldPoint: Clone> EcPoint<F, FieldPoint> {
 //  x_3 = lambda^2 - x_1 - x_2 (mod p)
 //  y_3 = lambda (x_1 - x_3) - y_1 mod p
 //
-/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `P.y`
-pub fn ec_add_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
+/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `Q.x`
+pub fn ec_add_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
     is_strict: bool,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+) -> EcPoint<F, FC::FieldPoint> {
     if is_strict {
         // constrains that P.x != Q.x
         let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
-        chip.range().gate().assert_is_const(ctx, &x_is_equal, F::zero());
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
     }
 
     let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
@@ -99,18 +100,18 @@ pub fn ec_add_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
 //  y_3 = lambda (x_1 - x_3) - y_1 mod p
 //  Assumes that P !=Q and Q != (P - Q)
 //
-/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `P.y`
-pub fn ec_sub_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
+/// For optimization reasons, we assume that if you are using this with `is_strict = true`, then you have already called `chip.enforce_less_than_p` on both `P.x` and `Q.x`
+pub fn ec_sub_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
     is_strict: bool,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+) -> EcPoint<F, FC::FieldPoint> {
     if is_strict {
         // constrains that P.x != Q.x
         let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
-        chip.range().gate().assert_is_const(ctx, &x_is_equal, F::zero());
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
     }
 
     let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
@@ -150,11 +151,11 @@ pub fn ec_sub_unequal<'v, F: PrimeField, FC: FieldChip<F>>(
 // we precompute lambda and constrain (2y) * lambda = 3 x^2 (mod p)
 // then we compute x_3 = lambda^2 - 2 x (mod p)
 //                 y_3 = lambda (x - x_3) - y (mod p)
-pub fn ec_double<'v, F: PrimeField, FC: FieldChip<F>>(
+pub fn ec_double<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-) -> EcPoint<F, FC::FieldPoint<'v>> {
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+) -> EcPoint<F, FC::FieldPoint> {
     // removed optimization that computes `2 * lambda` while assigning witness to `lambda` simultaneously, in favor of readability. The difference is just copying `lambda` once
     let two_y = chip.scalar_mul_no_carry(ctx, &P.y, 2);
     let three_x = chip.scalar_mul_no_carry(ctx, &P.x, 3);
@@ -176,15 +177,74 @@ pub fn ec_double<'v, F: PrimeField, FC: FieldChip<F>>(
     EcPoint::construct(x_3, y_3)
 }
 
-pub fn ec_select<'v, F: PrimeField, FC>(
+/// Implements:
+/// computing 2P + Q = P + Q + P for P = (x0, y0), Q = (x1, y1)
+// using Montgomery ladder(?) to skip intermediate y computation
+// from halo2wrong: https://hackmd.io/ncuKqRXzR-Cw-Au2fGzsMg?view
+// lambda_0 = (y_1 - y_0) / (x_1 - x_0)
+// x_2 = lambda_0^2 - x_0 - x_1
+// lambda_1 = lambda_0 + 2 * y_0 / (x_2 - x_0)
+// x_res = lambda_1^2 - x_0 - x_2
+// y_res = lambda_1 * (x_res - x_0) - y_0
+pub fn ec_double_and_add_unequal<F: PrimeField, FC: FieldChip<F>>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    Q: &EcPoint<F, FC::FieldPoint<'v>>,
-    sel: &AssignedValue<'v, F>,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
+    is_strict: bool,
+) -> EcPoint<F, FC::FieldPoint> {
+    if is_strict {
+        // constrains that P.x != Q.x
+        let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &Q.x);
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
+    }
+
+    let dx = chip.sub_no_carry(ctx, &Q.x, &P.x);
+    let dy = chip.sub_no_carry(ctx, &Q.y, &P.y);
+    let lambda_0 = chip.divide(ctx, &dy, &dx);
+
+    //  x_2 = lambda_0^2 - x_0 - x_1 (mod p)
+    let lambda_0_sq = chip.mul_no_carry(ctx, &lambda_0, &lambda_0);
+    let lambda_0_sq_minus_x_0 = chip.sub_no_carry(ctx, &lambda_0_sq, &P.x);
+    let x_2_no_carry = chip.sub_no_carry(ctx, &lambda_0_sq_minus_x_0, &Q.x);
+    let x_2 = chip.carry_mod(ctx, &x_2_no_carry);
+
+    if is_strict {
+        // TODO: when can we remove this check?
+        // constrains that x_2 != x_0
+        let x_is_equal = chip.is_equal_unenforced(ctx, &P.x, &x_2);
+        chip.range().gate().assert_is_const(ctx, &x_is_equal, &F::zero());
+    }
+    // lambda_1 = lambda_0 + 2 * y_0 / (x_2 - x_0)
+    let two_y_0 = chip.scalar_mul_no_carry(ctx, &P.y, 2);
+    let x_2_minus_x_0 = chip.sub_no_carry(ctx, &x_2, &P.x);
+    let lambda_1_minus_lambda_0 = chip.divide(ctx, &two_y_0, &x_2_minus_x_0);
+    let lambda_1_no_carry = chip.add_no_carry(ctx, &lambda_0, &lambda_1_minus_lambda_0);
+
+    // x_res = lambda_1^2 - x_0 - x_2
+    let lambda_1_sq_nc = chip.mul_no_carry(ctx, &lambda_1_no_carry, &lambda_1_no_carry);
+    let lambda_1_sq_minus_x_0 = chip.sub_no_carry(ctx, &lambda_1_sq_nc, &P.x);
+    let x_res_no_carry = chip.sub_no_carry(ctx, &lambda_1_sq_minus_x_0, &x_2);
+    let x_res = chip.carry_mod(ctx, &x_res_no_carry);
+
+    // y_res = lambda_1 * (x_res - x_0) - y_0
+    let x_res_minus_x_0 = chip.sub_no_carry(ctx, &x_res, &P.x);
+    let lambda_1_x_res_minus_x_0 = chip.mul_no_carry(ctx, &lambda_1_no_carry, &x_res_minus_x_0);
+    let y_res_no_carry = chip.sub_no_carry(ctx, &lambda_1_x_res_minus_x_0, &P.y);
+    let y_res = chip.carry_mod(ctx, &y_res_no_carry);
+
+    EcPoint::construct(x_res, y_res)
+}
+
+pub fn ec_select<F: PrimeField, FC>(
+    chip: &FC,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    Q: &EcPoint<F, FC::FieldPoint>,
+    sel: AssignedValue<F>,
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let Rx = chip.select(ctx, &P.x, &Q.x, sel);
     let Ry = chip.select(ctx, &P.y, &Q.y, sel);
@@ -193,14 +253,14 @@ where
 
 // takes the dot product of points with sel, where each is intepreted as
 // a _vector_
-pub fn ec_select_by_indicator<'v, F: PrimeField, FC>(
+pub fn ec_select_by_indicator<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    coeffs: &[AssignedValue<'v, F>],
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    coeffs: &[AssignedValue<F>],
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let x_coords = points.iter().map(|P| P.x.clone()).collect::<Vec<_>>();
     let y_coords = points.iter().map(|P| P.y.clone()).collect::<Vec<_>>();
@@ -210,14 +270,14 @@ where
 }
 
 // `sel` is little-endian binary
-pub fn ec_select_from_bits<'v, F: PrimeField, FC>(
+pub fn ec_select_from_bits<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'_, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    sel: &[AssignedValue<'v, F>],
-) -> EcPoint<F, FC::FieldPoint<'v>>
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    sel: &[AssignedValue<F>],
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     let w = sel.len();
     let num_points = points.len();
@@ -234,16 +294,16 @@ where
 // - `scalar_i < 2^{max_bits} for all i` (constrained by num_to_bits)
 // - `max_bits <= modulus::<F>.bits()`
 //   * P has order given by the scalar field modulus
-pub fn scalar_multiply<'v, F: PrimeField, FC>(
+pub fn scalar_multiply<F: PrimeField, FC>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-    scalar: &Vec<AssignedValue<'v, F>>,
+    ctx: &mut Context<F>,
+    P: &EcPoint<F, FC::FieldPoint>,
+    scalar: Vec<AssignedValue<F>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
 {
     assert!(!scalar.is_empty());
     assert!((max_bits as u64) <= modulus::<F>().bits());
@@ -258,24 +318,15 @@ where
         bits.append(&mut new_bits);
     }
     let mut rounded_bits = bits;
-    let zero_cell = chip.gate().load_zero(ctx);
-    for _ in 0..(rounded_bitlen - total_bits) {
-        rounded_bits.push(zero_cell.clone());
-    }
+    let zero_cell = ctx.load_zero();
+    rounded_bits.resize(rounded_bitlen, zero_cell);
 
     // is_started[idx] holds whether there is a 1 in bits with index at least (rounded_bitlen - idx)
     let mut is_started = Vec::with_capacity(rounded_bitlen);
-    for _ in 0..(rounded_bitlen - total_bits) {
-        is_started.push(zero_cell.clone());
-    }
-    is_started.push(zero_cell.clone());
+    is_started.resize(rounded_bitlen - total_bits + 1, zero_cell);
     for idx in 1..total_bits {
-        let or = chip.gate().or(
-            ctx,
-            Existing(&is_started[rounded_bitlen - total_bits + idx - 1]),
-            Existing(&rounded_bits[total_bits - idx]),
-        );
-        is_started.push(or.clone());
+        let or = chip.gate().or(ctx, *is_started.last().unwrap(), rounded_bits[total_bits - idx]);
+        is_started.push(or);
     }
 
     // is_zero_window[idx] is 0/1 depending on whether bits [rounded_bitlen - window_bits * (idx + 1), rounded_bitlen - window_bits * idx) are all 0
@@ -284,10 +335,10 @@ where
         let temp_bits = rounded_bits
             [rounded_bitlen - window_bits * (idx + 1)..rounded_bitlen - window_bits * idx]
             .iter()
-            .map(|x| Existing(x));
+            .copied();
         let bit_sum = chip.gate().sum(ctx, temp_bits);
-        let is_zero = chip.gate().is_zero(ctx, &bit_sum);
-        is_zero_window.push(is_zero.clone());
+        let is_zero = chip.gate().is_zero(ctx, bit_sum);
+        is_zero_window.push(is_zero);
     }
 
     // cached_points[idx] stores idx * P, with cached_points[0] = P
@@ -298,10 +349,10 @@ where
     for idx in 2..cache_size {
         if idx == 2 {
             let double = ec_double(chip, ctx, P /*, b*/);
-            cached_points.push(double.clone());
+            cached_points.push(double);
         } else {
             let new_point = ec_add_unequal(chip, ctx, &cached_points[idx - 1], P, false);
-            cached_points.push(new_point.clone());
+            cached_points.push(new_point);
         }
     }
 
@@ -327,19 +378,16 @@ where
         );
         let mult_and_add = ec_add_unequal(chip, ctx, &mult_point, &add_point, false);
         let is_started_point =
-            ec_select(chip, ctx, &mult_point, &mult_and_add, &is_zero_window[idx]);
+            ec_select(chip, ctx, &mult_point, &mult_and_add, is_zero_window[idx]);
 
         curr_point =
-            ec_select(chip, ctx, &is_started_point, &add_point, &is_started[window_bits * idx]);
+            ec_select(chip, ctx, &is_started_point, &add_point, is_started[window_bits * idx]);
     }
     curr_point
 }
 
-pub fn is_on_curve<'v, F, FC, C>(
-    chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &EcPoint<F, FC::FieldPoint<'v>>,
-) where
+pub fn is_on_curve<F, FC, C>(chip: &FC, ctx: &mut Context<F>, P: &EcPoint<F, FC::FieldPoint>)
+where
     F: PrimeField,
     FC: FieldChip<F>,
     C: CurveAffine<Base = FC::FieldType>,
@@ -354,10 +402,7 @@ pub fn is_on_curve<'v, F, FC, C>(
     chip.check_carry_mod_to_zero(ctx, &diff)
 }
 
-pub fn load_random_point<'v, F, FC, C>(
-    chip: &FC,
-    ctx: &mut Context<'v, F>,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+pub fn load_random_point<F, FC, C>(chip: &FC, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
 where
     F: PrimeField,
     FC: FieldChip<F>,
@@ -365,8 +410,8 @@ where
 {
     let base_point: C = C::CurveExt::random(ChaCha20Rng::from_entropy()).to_affine();
     let (x, y) = base_point.into_coordinates();
-    let pt_x = FC::fe_to_witness(&Value::known(x));
-    let pt_y = FC::fe_to_witness(&Value::known(y));
+    let pt_x = FC::fe_to_witness(&x);
+    let pt_y = FC::fe_to_witness(&y);
     let base = {
         let x_overflow = chip.load_private(ctx, pt_x);
         let y_overflow = chip.load_private(ctx, pt_y);
@@ -383,16 +428,16 @@ where
 // Input:
 // - `scalars` is vector of same length as `P`
 // - each `scalar` in `scalars` satisfies same assumptions as in `scalar_multiply` above
-pub fn multi_scalar_multiply<'v, F: PrimeField, FC, C>(
+pub fn multi_scalar_multiply<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    P: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    P: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_bits: usize,
     window_bits: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
     let k = P.len();
@@ -406,17 +451,15 @@ where
     let num_windows = (total_bits + window_bits - 1) / window_bits;
     let rounded_bitlen = num_windows * window_bits;
 
-    let zero_cell = chip.gate().load_zero(ctx);
+    let zero_cell = ctx.load_zero();
     let rounded_bits = scalars
-        .iter()
+        .into_iter()
         .flat_map(|scalar| {
-            assert_eq!(scalar.len(), scalar_len);
+            debug_assert_eq!(scalar.len(), scalar_len);
             scalar
-                .iter()
+                .into_iter()
                 .flat_map(|scalar_chunk| chip.gate().num_to_bits(ctx, scalar_chunk, max_bits))
-                .chain(
-                    std::iter::repeat_with(|| zero_cell.clone()).take(rounded_bitlen - total_bits),
-                )
+                .chain(std::iter::repeat(zero_cell).take(rounded_bitlen - total_bits))
                 .collect_vec()
         })
         .collect_vec();
@@ -457,7 +500,7 @@ where
             // adversary could pick `A` so add equal case occurs, so we must use strict add_unequal
             let mut new_point = ec_add_unequal(chip, ctx, prev, point, true);
             // special case for when P[idx] = O
-            new_point = ec_select(chip, ctx, prev, &new_point, &is_infinity);
+            new_point = ec_select(chip, ctx, prev, &new_point, is_infinity);
             chip.enforce_less_than(ctx, new_point.x());
             cached_points.push(new_point);
         }
@@ -547,31 +590,32 @@ pub fn get_naf(mut exp: Vec<u64>) -> Vec<i8> {
     naf
 }
 
-pub type BaseFieldEccChip<C> = EccChip<
+pub type BaseFieldEccChip<'chip, C> = EccChip<
+    'chip,
     <C as CurveAffine>::ScalarExt,
-    FpConfig<<C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>,
+    FpChip<'chip, <C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>,
 >;
 
 #[derive(Clone, Debug)]
-pub struct EccChip<F: PrimeField, FC: FieldChip<F>> {
-    pub field_chip: FC,
+pub struct EccChip<'chip, F: PrimeField, FC: FieldChip<F>> {
+    pub field_chip: &'chip FC,
     _marker: PhantomData<F>,
 }
 
-impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
-    pub fn construct(field_chip: FC) -> Self {
+impl<'chip, F: PrimeField, FC: FieldChip<F>> EccChip<'chip, F, FC> {
+    pub fn new(field_chip: &'chip FC) -> Self {
         Self { field_chip, _marker: PhantomData }
     }
 
     pub fn field_chip(&self) -> &FC {
-        &self.field_chip
+        self.field_chip
     }
 
-    pub fn load_private<'v>(
+    pub fn load_private(
         &self,
-        ctx: &mut Context<'_, F>,
-        point: (Value<FC::FieldType>, Value<FC::FieldType>),
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        point: (FC::FieldType, FC::FieldType),
+    ) -> EcPoint<F, FC::FieldPoint> {
         let (x, y) = (FC::fe_to_witness(&point.0), FC::fe_to_witness(&point.1));
 
         let x_assigned = self.field_chip.load_private(ctx, x);
@@ -581,23 +625,15 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
     }
 
     /// Does not constrain witness to lie on curve
-    pub fn assign_point<'v, C>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        g: Value<C>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign_point<C>(&self, ctx: &mut Context<F>, g: C) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
-        let (x, y) = g.map(|g| g.into_coordinates()).unzip();
+        let (x, y) = g.into_coordinates();
         self.load_private(ctx, (x, y))
     }
 
-    pub fn assign_constant_point<'v, C>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        g: C,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn assign_constant_point<C>(&self, ctx: &mut Context<F>, g: C) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
@@ -609,31 +645,25 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
         EcPoint::construct(x, y)
     }
 
-    pub fn load_random_point<'v, C>(
-        &self,
-        ctx: &mut Context<'v, F>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    pub fn load_random_point<C>(&self, ctx: &mut Context<F>) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
     {
         load_random_point::<F, FC, C>(self.field_chip(), ctx)
     }
 
-    pub fn assert_is_on_curve<'v, C>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) where
+    pub fn assert_is_on_curve<C>(&self, ctx: &mut Context<F>, P: &EcPoint<F, FC::FieldPoint>)
+    where
         C: CurveAffine<Base = FC::FieldType>,
     {
-        is_on_curve::<F, FC, C>(&self.field_chip, ctx, P)
+        is_on_curve::<F, FC, C>(self.field_chip, ctx, P)
     }
 
-    pub fn is_on_curve_or_infinity<'v, C>(
+    pub fn is_on_curve_or_infinity<C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> AssignedValue<'v, F>
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> AssignedValue<F>
     where
         C: CurveAffine<Base = FC::FieldType>,
         C::Base: ff::PrimeField,
@@ -652,71 +682,66 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
         let x_is_zero = self.field_chip.is_zero(ctx, &P.x);
         let y_is_zero = self.field_chip.is_zero(ctx, &P.y);
 
-        self.field_chip.range().gate().or_and(
-            ctx,
-            Existing(&is_on_curve),
-            Existing(&x_is_zero),
-            Existing(&y_is_zero),
-        )
+        self.field_chip.range().gate().or_and(ctx, is_on_curve, x_is_zero, y_is_zero)
     }
 
-    pub fn negate<'v>(
+    pub fn negate(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> EcPoint<F, FC::FieldPoint> {
         EcPoint::construct(P.x.clone(), self.field_chip.negate(ctx, &P.y))
     }
 
     /// Assumes that P.x != Q.x
     /// If `is_strict == true`, then actually constrains that `P.x != Q.x`
-    pub fn add_unequal<'v>(
+    pub fn add_unequal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
         is_strict: bool,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_add_unequal(&self.field_chip, ctx, P, Q, is_strict)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_add_unequal(self.field_chip, ctx, P, Q, is_strict)
     }
 
     /// Assumes that P.x != Q.x
     /// Otherwise will panic
-    pub fn sub_unequal<'v>(
+    pub fn sub_unequal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
         is_strict: bool,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_sub_unequal(&self.field_chip, ctx, P, Q, is_strict)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_sub_unequal(self.field_chip, ctx, P, Q, is_strict)
     }
 
-    pub fn double<'v>(
+    pub fn double(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_double(&self.field_chip, ctx, P)
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_double(self.field_chip, ctx, P)
     }
 
-    pub fn is_equal<'v>(
+    pub fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
+    ) -> AssignedValue<F> {
         // TODO: optimize
         let x_is_equal = self.field_chip.is_equal(ctx, &P.x, &Q.x);
         let y_is_equal = self.field_chip.is_equal(ctx, &P.y, &Q.y);
-        self.field_chip.range().gate().and(ctx, Existing(&x_is_equal), Existing(&y_is_equal))
+        self.field_chip.range().gate().and(ctx, x_is_equal, y_is_equal)
     }
 
-    pub fn assert_equal<'v>(
+    pub fn assert_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
     ) {
         self.field_chip.assert_equal(ctx, &P.x, &Q.x);
         self.field_chip.assert_equal(ctx, &P.y, &Q.y);
@@ -724,12 +749,12 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
 
     pub fn sum<'b, 'v: 'b, C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        points: impl Iterator<Item = &'b EcPoint<F, FC::FieldPoint<'v>>>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        ctx: &mut Context<F>,
+        points: impl Iterator<Item = &'b EcPoint<F, FC::FieldPoint>>,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
-        FC::FieldPoint<'v>: 'b,
+        FC::FieldPoint: 'b,
     {
         let rand_point = self.load_random_point::<C>(ctx);
         self.field_chip.enforce_less_than(ctx, rand_point.x());
@@ -743,40 +768,57 @@ impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC> {
     }
 }
 
-impl<F: PrimeField, FC: FieldChip<F>> EccChip<F, FC>
+impl<'chip, F: PrimeField, FC: FieldChip<F>> EccChip<'chip, F, FC>
 where
-    for<'v> FC: Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: Selectable<F, Point = FC::FieldPoint>,
 {
-    pub fn select<'v>(
+    pub fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        Q: &EcPoint<F, FC::FieldPoint<'v>>,
-        condition: &AssignedValue<'v, F>,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        ec_select(&self.field_chip, ctx, P, Q, condition)
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        Q: &EcPoint<F, FC::FieldPoint>,
+        condition: AssignedValue<F>,
+    ) -> EcPoint<F, FC::FieldPoint> {
+        ec_select(self.field_chip, ctx, P, Q, condition)
     }
 
-    pub fn scalar_mult<'v>(
+    pub fn scalar_mult(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &EcPoint<F, FC::FieldPoint<'v>>,
-        scalar: &Vec<AssignedValue<'v, F>>,
+        ctx: &mut Context<F>,
+        P: &EcPoint<F, FC::FieldPoint>,
+        scalar: Vec<AssignedValue<F>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>> {
-        scalar_multiply::<F, FC>(&self.field_chip, ctx, P, scalar, max_bits, window_bits)
+    ) -> EcPoint<F, FC::FieldPoint> {
+        scalar_multiply::<F, FC>(self.field_chip, ctx, P, scalar, max_bits, window_bits)
+    }
+
+    // default for most purposes
+    pub fn variable_base_msm<C>(
+        &self,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        P: &[EcPoint<F, FC::FieldPoint>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
+        max_bits: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
+    where
+        C: CurveAffineExt<Base = FC::FieldType>,
+        C::Base: ff::PrimeField,
+    {
+        // window_bits = 4 is optimal from empirical observations
+        self.variable_base_msm_in::<C>(thread_pool, P, scalars, max_bits, 4, 0)
     }
 
     // TODO: put a check in place that scalar is < modulus of C::Scalar
-    pub fn variable_base_msm<'v, C>(
+    pub fn variable_base_msm_in<C>(
         &self,
-        ctx: &mut Context<'v, F>,
-        P: &[EcPoint<F, FC::FieldPoint<'v>>],
-        scalars: &[Vec<AssignedValue<'v, F>>],
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        P: &[EcPoint<F, FC::FieldPoint>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        phase: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt<Base = FC::FieldType>,
         C::Base: ff::PrimeField,
@@ -785,8 +827,10 @@ where
         println!("computing length {} MSM", P.len());
 
         if P.len() <= 25 {
+            let mut builder = thread_pool.lock().unwrap();
+            let ctx = builder.main(phase);
             multi_scalar_multiply::<F, FC, C>(
-                &self.field_chip,
+                self.field_chip,
                 ctx,
                 P,
                 scalars,
@@ -801,40 +845,40 @@ where
             if radix == 0 {
                 radix = 1;
             }*/
-            let radix = 1;
-            pippenger::multi_exp::<F, FC, C>(
-                &self.field_chip,
-                ctx,
+            // guessing that is is always better to use parallelism for >25 points
+            pippenger::multi_exp_par::<F, FC, C>(
+                self.field_chip,
+                thread_pool,
                 P,
                 scalars,
                 max_bits,
-                radix,
-                window_bits,
+                window_bits, // clump_factor := window_bits
+                phase,
             )
         }
     }
 }
 
-impl<F: PrimeField, FC: PrimeFieldChip<F>> EccChip<F, FC>
+impl<'chip, F: PrimeField, FC: PrimeFieldChip<F>> EccChip<'chip, F, FC>
 where
     FC::FieldType: PrimeField,
 {
     // TODO: put a check in place that scalar is < modulus of C::Scalar
-    pub fn fixed_base_scalar_mult<'v, C>(
+    pub fn fixed_base_scalar_mult<C>(
         &self,
-        ctx: &mut Context<'v, F>,
+        ctx: &mut Context<F>,
         point: &C,
-        scalar: &[AssignedValue<'v, F>],
+        scalar: Vec<AssignedValue<F>>,
         max_bits: usize,
         window_bits: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt,
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-            + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
     {
         fixed_base::scalar_multiply::<F, _, _>(
-            &self.field_chip,
+            self.field_chip,
             ctx,
             point,
             scalar,
@@ -843,30 +887,61 @@ where
         )
     }
 
-    /// `radix = 0` means auto-calculate
-    ///
+    // default for most purposes
+    pub fn fixed_base_msm<C>(
+        &self,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
+        points: &[C],
+        scalars: Vec<Vec<AssignedValue<F>>>,
+        max_scalar_bits_per_cell: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
+    where
+        C: CurveAffineExt,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
+    {
+        self.fixed_base_msm_in::<C>(thread_pool, points, scalars, max_scalar_bits_per_cell, 4, 0)
+    }
+
+    // `radix = 0` means auto-calculate
+    //
     /// `clump_factor = 0` means auto-calculate
     ///
     /// The user should filter out base points that are identity beforehand; we do not separately do this here
-    pub fn fixed_base_msm<'v, C>(
+    pub fn fixed_base_msm_in<C>(
         &self,
-        ctx: &mut Context<'v, F>,
+        thread_pool: &Mutex<GateThreadBuilder<F>>,
         points: &[C],
-        scalars: &[Vec<AssignedValue<'v, F>>],
+        scalars: Vec<Vec<AssignedValue<F>>>,
         max_scalar_bits_per_cell: usize,
-        _radix: usize,
         clump_factor: usize,
-    ) -> EcPoint<F, FC::FieldPoint<'v>>
+        phase: usize,
+    ) -> EcPoint<F, FC::FieldPoint>
     where
         C: CurveAffineExt,
-        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint<'v> = CRTInteger<'v, F>>
-            + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+        FC: PrimeFieldChip<F, FieldType = C::Base, FieldPoint = CRTInteger<F>>
+            + Selectable<F, Point = FC::FieldPoint>,
     {
-        assert_eq!(points.len(), scalars.len());
+        debug_assert_eq!(points.len(), scalars.len());
         #[cfg(feature = "display")]
         println!("computing length {} fixed base msm", points.len());
 
-        fixed_base::msm(self, ctx, points, scalars, max_scalar_bits_per_cell, clump_factor)
+        // heuristic to decide when to use parallelism
+        if points.len() < rayon::current_num_threads() {
+            let mut builder = thread_pool.lock().unwrap();
+            let ctx = builder.main(phase);
+            fixed_base::msm(self, ctx, points, scalars, max_scalar_bits_per_cell, clump_factor)
+        } else {
+            fixed_base::msm_par(
+                self,
+                thread_pool,
+                points,
+                scalars,
+                max_scalar_bits_per_cell,
+                clump_factor,
+                phase,
+            )
+        }
 
         // Empirically does not seem like pippenger is any better for fixed base msm right now, because of the cost of `select_by_indicator`
         // Cell usage becomes around comparable when `points.len() > 100`, and `clump_factor` should always be 4
diff --git a/halo2-ecc/src/ecc/pippenger.rs b/halo2-ecc/src/ecc/pippenger.rs
index bb60f9c2..11ada696 100644
--- a/halo2-ecc/src/ecc/pippenger.rs
+++ b/halo2-ecc/src/ecc/pippenger.rs
@@ -2,12 +2,14 @@ use super::{
     ec_add_unequal, ec_double, ec_select, ec_select_from_bits, ec_sub_unequal, load_random_point,
     EcPoint,
 };
-use crate::fields::{FieldChip, Selectable};
+use crate::fields::{FieldChip, PrimeField, Selectable};
 use halo2_base::{
-    gates::GateInstructions,
-    utils::{CurveAffineExt, PrimeField},
+    gates::{builder::GateThreadBuilder, GateInstructions},
+    utils::CurveAffineExt,
     AssignedValue, Context,
 };
+use rayon::prelude::*;
+use std::sync::Mutex;
 
 // Reference: https://jbootle.github.io/Misc/pippenger.pdf
 
@@ -15,14 +17,17 @@ use halo2_base::{
 // Output:
 // * new_points: length `points.len() * radix`
 // * new_bool_scalars: 2d array `ceil(scalar_bits / radix)` by `points.len() * radix`
-pub fn decompose<'v, F, FC>(
+//
+// Empirically `radix = 1` is best, so we don't use this function for now
+/*
+pub fn decompose<F, FC>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: &[Vec<AssignedValue<F>>],
     max_scalar_bits_per_cell: usize,
     radix: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, Vec<Vec<AssignedValue<'v, F>>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, Vec<Vec<AssignedValue<F>>>)
 where
     F: PrimeField,
     FC: FieldChip<F>,
@@ -34,7 +39,7 @@ where
     let mut new_points = Vec::with_capacity(radix * points.len());
     let mut new_bool_scalars = vec![Vec::with_capacity(radix * points.len()); t];
 
-    let zero_cell = chip.gate().load_zero(ctx);
+    let zero_cell = ctx.load_zero();
     for (point, scalar) in points.iter().zip(scalars.iter()) {
         assert_eq!(scalars[0].len(), scalar.len());
         let mut g = point.clone();
@@ -46,7 +51,7 @@ where
         }
         let mut bits = Vec::with_capacity(scalar_bits);
         for x in scalar {
-            let mut new_bits = chip.gate().num_to_bits(ctx, x, max_scalar_bits_per_cell);
+            let mut new_bits = chip.gate().num_to_bits(ctx, *x, max_scalar_bits_per_cell);
             bits.append(&mut new_bits);
         }
         for k in 0..t {
@@ -58,19 +63,20 @@ where
 
     (new_points, new_bool_scalars)
 }
+*/
 
 // Given points[i] and bool_scalars[j][i],
 // compute G'[j] = sum_{i=0..points.len()} points[i] * bool_scalars[j][i]
 // output is [ G'[j] + rand_point ]_{j=0..bool_scalars.len()}, rand_point
-pub fn multi_product<'v, F: PrimeField, FC, C>(
+pub fn multi_product<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    bool_scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    bool_scalars: &[Vec<AssignedValue<F>>],
     clumping_factor: usize,
-) -> (Vec<EcPoint<F, FC::FieldPoint<'v>>>, EcPoint<F, FC::FieldPoint<'v>>)
+) -> (Vec<EcPoint<F, FC::FieldPoint>>, EcPoint<F, FC::FieldPoint>)
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
     let c = clumping_factor; // this is `b` in Section 3 of Bootle
@@ -107,7 +113,7 @@ where
             for j in 0..(1 << i) {
                 let mut new_point = ec_add_unequal(chip, ctx, &bucket[j], point, true);
                 // if points[i] is point at infinity, do nothing
-                new_point = ec_select(chip, ctx, &bucket[j], &new_point, &is_infinity);
+                new_point = ec_select(chip, ctx, &bucket[j], &new_point, is_infinity);
                 chip.enforce_less_than(ctx, new_point.x());
                 bucket.push(new_point);
             }
@@ -138,68 +144,220 @@ where
     (acc, rand_point)
 }
 
-pub fn multi_exp<'v, F: PrimeField, FC, C>(
+/// Currently does not support if the final answer is actually the point at infinity
+pub fn multi_exp<F: PrimeField, FC, C>(
     chip: &FC,
-    ctx: &mut Context<'v, F>,
-    points: &[EcPoint<F, FC::FieldPoint<'v>>],
-    scalars: &[Vec<AssignedValue<'v, F>>],
+    ctx: &mut Context<F>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
     max_scalar_bits_per_cell: usize,
-    radix: usize,
+    // radix: usize, // specialize to radix = 1
     clump_factor: usize,
-) -> EcPoint<F, FC::FieldPoint<'v>>
+) -> EcPoint<F, FC::FieldPoint>
 where
-    FC: FieldChip<F> + Selectable<F, Point<'v> = FC::FieldPoint<'v>>,
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
     C: CurveAffineExt<Base = FC::FieldType>,
 {
-    let (points, bool_scalars) =
-        decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
-
-    /*
-    let t = bool_scalars.len();
-    let c = {
-        let m = points.len();
-        let cost = |b: usize| -> usize { (m + b - 1) / b * ((1 << b) + t) };
-        let c_max: usize = f64::from(points.len() as u32).log2().ceil() as usize;
-        let mut c_best = c_max;
-        for b in 1..c_max {
-            if cost(b) <= cost(c_best) {
-                c_best = b;
+    // let (points, bool_scalars) = decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
+
+    debug_assert_eq!(points.len(), scalars.len());
+    let scalar_bits = max_scalar_bits_per_cell * scalars[0].len();
+    // bool_scalars: 2d array `scalar_bits` by `points.len()`
+    let mut bool_scalars = vec![Vec::with_capacity(points.len()); scalar_bits];
+    for scalar in scalars {
+        for (scalar_chunk, bool_chunk) in
+            scalar.into_iter().zip(bool_scalars.chunks_mut(max_scalar_bits_per_cell))
+        {
+            let bits = chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell);
+            for (bit, bool_bit) in bits.into_iter().zip(bool_chunk.iter_mut()) {
+                bool_bit.push(bit);
             }
         }
-        c_best
-    };
-    #[cfg(feature = "display")]
-    dbg!(clump_factor);
-    */
+    }
 
     let (mut agg, rand_point) =
-        multi_product::<F, FC, C>(chip, ctx, &points, &bool_scalars, clump_factor);
+        multi_product::<F, FC, C>(chip, ctx, points, &bool_scalars, clump_factor);
     // everything in agg has been enforced
 
     // compute sum_{k=0..t} agg[k] * 2^{radix * k} - (sum_k 2^{radix * k}) * rand_point
-    // (sum_{k=0..t} 2^{radix * k}) * rand_point = (2^{radix * t} - 1)/(2^radix - 1)
+    // (sum_{k=0..t} 2^{radix * k}) = (2^{radix * t} - 1)/(2^radix - 1)
     let mut sum = agg.pop().unwrap();
     let mut rand_sum = rand_point.clone();
     for g in agg.iter().rev() {
-        for _ in 0..radix {
-            sum = ec_double(chip, ctx, &sum);
-            rand_sum = ec_double(chip, ctx, &rand_sum);
-        }
-        sum = ec_add_unequal(chip, ctx, &sum, g, true);
+        rand_sum = ec_double(chip, ctx, &rand_sum);
+        // cannot use ec_double_and_add_unequal because you cannot guarantee that `sum != g`
+        sum = ec_double(chip, ctx, &sum);
         chip.enforce_less_than(ctx, sum.x());
+        sum = ec_add_unequal(chip, ctx, &sum, g, true);
+    }
+
+    rand_sum = ec_double(chip, ctx, &rand_sum);
+    // assume 2^scalar_bits != +-1 mod modulus::<F>()
+    rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+
+    chip.enforce_less_than(ctx, sum.x());
+    chip.enforce_less_than(ctx, rand_sum.x());
+    ec_sub_unequal(chip, ctx, &sum, &rand_sum, true)
+}
+
+/// Multi-thread witness generation for multi-scalar multiplication.
+/// Should give exact same circuit as `multi_exp`.
+///
+/// Currently does not support if the final answer is actually the point at infinity
+pub fn multi_exp_par<F: PrimeField, FC, C>(
+    chip: &FC,
+    // we use a Mutex guard for synchronous adding threads to the thread pool
+    // these are the threads within a single Phase
+    thread_pool: &Mutex<GateThreadBuilder<F>>,
+    points: &[EcPoint<F, FC::FieldPoint>],
+    scalars: Vec<Vec<AssignedValue<F>>>,
+    max_scalar_bits_per_cell: usize,
+    // radix: usize, // specialize to radix = 1
+    clump_factor: usize,
+    phase: usize,
+) -> EcPoint<F, FC::FieldPoint>
+where
+    FC: FieldChip<F> + Selectable<F, Point = FC::FieldPoint>,
+    C: CurveAffineExt<Base = FC::FieldType>,
+{
+    // let (points, bool_scalars) = decompose::<F, _>(chip, ctx, points, scalars, max_scalar_bits_per_cell, radix);
 
-        if radix != 1 {
-            // Can use non-strict as long as some property of the prime is true?
-            rand_sum = ec_add_unequal(chip, ctx, &rand_sum, &rand_point, false);
+    debug_assert_eq!(points.len(), scalars.len());
+    let scalar_bits = max_scalar_bits_per_cell * scalars[0].len();
+    // bool_scalars: 2d array `scalar_bits` by `points.len()`
+    let mut bool_scalars = vec![Vec::with_capacity(points.len()); scalar_bits];
+
+    // get a main thread
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(phase);
+    let witness_gen_only = ctx.witness_gen_only();
+    // single-threaded computation:
+    for scalar in scalars {
+        for (scalar_chunk, bool_chunk) in
+            scalar.into_iter().zip(bool_scalars.chunks_mut(max_scalar_bits_per_cell))
+        {
+            let bits = chip.gate().num_to_bits(ctx, scalar_chunk, max_scalar_bits_per_cell);
+            for (bit, bool_bit) in bits.into_iter().zip(bool_chunk.iter_mut()) {
+                bool_bit.push(bit);
+            }
         }
     }
+    // see multi-product comments for explanation of below
+
+    let c = clump_factor;
+    let num_rounds = (points.len() + c - 1) / c;
+    let rand_base = load_random_point::<F, FC, C>(chip, ctx);
+    let mut rand_points = Vec::with_capacity(num_rounds);
+    rand_points.push(rand_base);
+    for _ in 1..num_rounds {
+        rand_points.push(ec_double(chip, ctx, rand_points.last().unwrap()));
+    }
+    // we will use a different thread per round
+    // to prevent concurrency issues with context id, we generate all the ids first
+    let thread_ids = (0..num_rounds).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    drop(builder);
+    // now begins multi-threading
+
+    // multi_prods is 2d vector of size `num_rounds` by `scalar_bits`
+    let (new_threads, multi_prods): (Vec<_>, Vec<_>) = points
+        .par_chunks(c)
+        .zip(rand_points.par_iter())
+        .zip(thread_ids.into_par_iter())
+        .enumerate()
+        .map(|(round, ((points_clump, rand_point), thread_id))| {
+            // compute all possible multi-products of elements in points[round * c .. round * (c+1)]
+            // create new thread
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+            // stores { rand_point, rand_point + points[0], rand_point + points[1], rand_point + points[0] + points[1] , ... }
+            let mut bucket = Vec::with_capacity(1 << c);
+            chip.enforce_less_than(ctx, rand_point.x());
+            bucket.push(rand_point.clone());
+            for (i, point) in points_clump.iter().enumerate() {
+                // we allow for points[i] to be the point at infinity, represented by (0, 0) in affine coordinates
+                // this can be checked by points[i].y == 0 iff points[i] == O
+                let is_infinity = chip.is_zero(ctx, &point.y);
+                chip.enforce_less_than(ctx, point.x());
+
+                for j in 0..(1 << i) {
+                    let mut new_point = ec_add_unequal(chip, ctx, &bucket[j], point, true);
+                    // if points[i] is point at infinity, do nothing
+                    new_point = ec_select(chip, ctx, &bucket[j], &new_point, is_infinity);
+                    chip.enforce_less_than(ctx, new_point.x());
+                    bucket.push(new_point);
+                }
+            }
+            let multi_prods = bool_scalars
+                .iter()
+                .map(|bits| {
+                    ec_select_from_bits::<F, _>(
+                        chip,
+                        ctx,
+                        &bucket,
+                        &bits[round * c..round * c + points_clump.len()],
+                    )
+                })
+                .collect::<Vec<_>>();
+
+            (thread, multi_prods)
+        })
+        .unzip();
+    // we collect the new threads to ensure they are a FIXED order, otherwise later `assign_threads_in` will get confused
+    thread_pool.lock().unwrap().threads[phase].extend(new_threads);
 
-    if radix == 1 {
+    // agg[j] = sum_{i=0..num_rounds} multi_prods[i][j] for j = 0..scalar_bits
+    // get a main thread
+    let mut builder = thread_pool.lock().unwrap();
+    let thread_ids = (0..scalar_bits).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    drop(builder);
+    let (new_threads, mut agg): (Vec<_>, Vec<_>) = thread_ids
+        .into_par_iter()
+        .enumerate()
+        .map(|(i, thread_id)| {
+            let mut thread = Context::new(witness_gen_only, thread_id);
+            let ctx = &mut thread;
+            let mut acc = if multi_prods.len() == 1 {
+                multi_prods[0][i].clone()
+            } else {
+                ec_add_unequal(chip, ctx, &multi_prods[0][i], &multi_prods[1][i], true)
+            };
+            chip.enforce_less_than(ctx, acc.x());
+            for multi_prod in multi_prods.iter().skip(2) {
+                acc = ec_add_unequal(chip, ctx, &acc, &multi_prod[i], true);
+                chip.enforce_less_than(ctx, acc.x());
+            }
+            (thread, acc)
+        })
+        .unzip();
+    thread_pool.lock().unwrap().threads[phase].extend(new_threads);
+
+    // gets the LAST thread for single threaded work
+    // warning: don't get any earlier threads, because currently we assume equality constraints in thread i only involves threads <= i
+    let mut builder = thread_pool.lock().unwrap();
+    let ctx = builder.main(phase);
+    // we have agg[j] = G'[j] + (2^num_rounds - 1) * rand_base
+    // let rand_point = (2^num_rounds - 1) * rand_base
+    // TODO: can we remove all these random point operations somehow?
+    let mut rand_point = ec_double(chip, ctx, rand_points.last().unwrap());
+    rand_point = ec_sub_unequal(chip, ctx, &rand_point, &rand_points[0], false);
+
+    // compute sum_{k=0..scalar_bits} agg[k] * 2^k - (sum_{k=0..scalar_bits} 2^k) * rand_point
+    // (sum_{k=0..scalar_bits} 2^k) = (2^scalar_bits - 1)
+    let mut sum = agg.pop().unwrap();
+    let mut rand_sum = rand_point.clone();
+    for g in agg.iter().rev() {
         rand_sum = ec_double(chip, ctx, &rand_sum);
-        // assume 2^t != +-1 mod modulus::<F>()
-        rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+        // cannot use ec_double_and_add_unequal because you cannot guarantee that `sum != g`
+        sum = ec_double(chip, ctx, &sum);
+        chip.enforce_less_than(ctx, sum.x());
+        sum = ec_add_unequal(chip, ctx, &sum, g, true);
     }
 
+    rand_sum = ec_double(chip, ctx, &rand_sum);
+    // assume 2^scalar_bits != +-1 mod modulus::<F>()
+    rand_sum = ec_sub_unequal(chip, ctx, &rand_sum, &rand_point, false);
+
+    chip.enforce_less_than(ctx, sum.x());
     chip.enforce_less_than(ctx, rand_sum.x());
     ec_sub_unequal(chip, ctx, &sum, &rand_sum, true)
 }
diff --git a/halo2-ecc/src/ecc/tests.rs b/halo2-ecc/src/ecc/tests.rs
index fa9d6ed5..fb9d7abf 100644
--- a/halo2-ecc/src/ecc/tests.rs
+++ b/halo2-ecc/src/ecc/tests.rs
@@ -1,6 +1,5 @@
 #![allow(unused_assignments, unused_imports, unused_variables)]
 use super::*;
-use crate::fields::fp::{FpConfig, FpStrategy};
 use crate::fields::fp2::Fp2Chip;
 use crate::halo2_proofs::{
     circuit::*,
@@ -9,158 +8,73 @@ use crate::halo2_proofs::{
     plonk::*,
 };
 use group::Group;
+use halo2_base::gates::builder::RangeCircuitBuilder;
+use halo2_base::gates::RangeChip;
 use halo2_base::utils::bigint_to_fe;
 use halo2_base::SKIP_FIRST_PASS;
-use halo2_base::{
-    gates::range::RangeStrategy, utils::value_to_option, utils::PrimeField, ContextParams,
-};
+use halo2_base::{gates::range::RangeStrategy, utils::value_to_option};
 use num_bigint::{BigInt, RandBigInt};
+use rand_core::OsRng;
 use std::marker::PhantomData;
 use std::ops::Neg;
 
-#[derive(Default)]
-pub struct MyCircuit<F> {
-    pub P: Option<G1Affine>,
-    pub Q: Option<G1Affine>,
-    pub _marker: PhantomData<F>,
-}
-
-const NUM_ADVICE: usize = 2;
-const NUM_FIXED: usize = 2;
-
-impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-    type Config = FpConfig<F, Fq>;
-    type FloorPlanner = SimpleFloorPlanner;
-
-    fn without_witnesses(&self) -> Self {
-        Self { P: None, Q: None, _marker: PhantomData }
+fn basic_g1_tests<F: PrimeField>(
+    ctx: &mut Context<F>,
+    lookup_bits: usize,
+    limb_bits: usize,
+    num_limbs: usize,
+    P: G1Affine,
+    Q: G1Affine,
+) {
+    std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+    let range = RangeChip::<F>::default(lookup_bits);
+    let fp_chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+    let chip = EccChip::new(&fp_chip);
+
+    let P_assigned = chip.load_private(ctx, (P.x, P.y));
+    let Q_assigned = chip.load_private(ctx, (Q.x, Q.y));
+
+    // test add_unequal
+    chip.field_chip.enforce_less_than(ctx, P_assigned.x());
+    chip.field_chip.enforce_less_than(ctx, Q_assigned.x());
+    let sum = chip.add_unequal(ctx, &P_assigned, &Q_assigned, false);
+    assert_eq!(sum.x.truncation.to_bigint(limb_bits), sum.x.value);
+    assert_eq!(sum.y.truncation.to_bigint(limb_bits), sum.y.value);
+    {
+        let actual_sum = G1Affine::from(P + Q);
+        assert_eq!(bigint_to_fe::<Fq>(&sum.x.value), actual_sum.x);
+        assert_eq!(bigint_to_fe::<Fq>(&sum.y.value), actual_sum.y);
     }
-
-    fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-        FpConfig::<F, _>::configure(
-            meta,
-            FpStrategy::Simple,
-            &[NUM_ADVICE],
-            &[1],
-            NUM_FIXED,
-            22,
-            88,
-            3,
-            modulus::<Fq>(),
-            0,
-            23,
-        )
-    }
-
-    fn synthesize(
-        &self,
-        config: Self::Config,
-        mut layouter: impl Layouter<F>,
-    ) -> Result<(), Error> {
-        config.load_lookup_table(&mut layouter)?;
-        let chip = EccChip::construct(config.clone());
-
-        let mut first_pass = SKIP_FIRST_PASS;
-
-        layouter.assign_region(
-            || "ecc",
-            |region| {
-                if first_pass {
-                    first_pass = false;
-                    return Ok(());
-                }
-
-                let mut aux = chip.field_chip().new_context(region);
-                let ctx = &mut aux;
-
-                let P_assigned = chip.load_private(
-                    ctx,
-                    match self.P {
-                        Some(P) => (Value::known(P.x), Value::known(P.y)),
-                        None => (Value::unknown(), Value::unknown()),
-                    },
-                );
-                let Q_assigned = chip.load_private(
-                    ctx,
-                    match self.Q {
-                        Some(Q) => (Value::known(Q.x), Value::known(Q.y)),
-                        None => (Value::unknown(), Value::unknown()),
-                    },
-                );
-
-                // test add_unequal
-                {
-                    chip.field_chip.enforce_less_than(ctx, P_assigned.x());
-                    chip.field_chip.enforce_less_than(ctx, Q_assigned.x());
-                    let sum = chip.add_unequal(ctx, &P_assigned, &Q_assigned, false);
-                    assert_eq!(
-                        value_to_option(sum.x.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(sum.x.value.clone())
-                    );
-                    assert_eq!(
-                        value_to_option(sum.y.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(sum.y.value.clone())
-                    );
-                    if self.P.is_some() {
-                        let actual_sum = G1Affine::from(self.P.unwrap() + self.Q.unwrap());
-                        sum.x.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_sum.x));
-                        sum.y.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_sum.y));
-                    }
-                    println!("add unequal witness OK");
-                }
-
-                // test double
-                {
-                    let doub = chip.double(ctx, &P_assigned);
-                    assert_eq!(
-                        value_to_option(doub.x.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(doub.x.value.clone())
-                    );
-                    assert_eq!(
-                        value_to_option(doub.y.truncation.to_bigint(config.limb_bits)),
-                        value_to_option(doub.y.value.clone())
-                    );
-                    if self.P.is_some() {
-                        let actual_doub = G1Affine::from(self.P.unwrap() * Fr::from(2u64));
-                        doub.x.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_doub.x));
-                        doub.y.value.map(|v| assert_eq!(bigint_to_fe::<Fq>(&v), actual_doub.y));
-                    }
-                    println!("double witness OK");
-                }
-
-                chip.field_chip.finalize(ctx);
-
-                #[cfg(feature = "display")]
-                {
-                    println!("Using {NUM_ADVICE} advice columns and {NUM_FIXED} fixed columns");
-                    println!("total advice cells: {}", ctx.total_advice);
-                    let (const_rows, _) = ctx.fixed_stats();
-                    println!("maximum rows used by a fixed column: {const_rows}");
-                }
-
-                Ok(())
-            },
-        )
+    println!("add unequal witness OK");
+
+    // test double
+    let doub = chip.double(ctx, &P_assigned);
+    assert_eq!(doub.x.truncation.to_bigint(limb_bits), doub.x.value);
+    assert_eq!(doub.y.truncation.to_bigint(limb_bits), doub.y.value);
+    {
+        let actual_doub = G1Affine::from(P * Fr::from(2u64));
+        assert_eq!(bigint_to_fe::<Fq>(&doub.x.value), actual_doub.x);
+        assert_eq!(bigint_to_fe::<Fq>(&doub.y.value), actual_doub.y);
     }
+    println!("double witness OK");
 }
 
-#[cfg(test)]
 #[test]
 fn test_ecc() {
     let k = 23;
-    let mut rng = rand::thread_rng();
+    let P = G1Affine::random(OsRng);
+    let Q = G1Affine::random(OsRng);
 
-    let P = Some(G1Affine::random(&mut rng));
-    let Q = Some(G1Affine::random(&mut rng));
+    let mut builder = GateThreadBuilder::<Fr>::mock();
+    basic_g1_tests(builder.main(0), k - 1, 88, 3, P, Q);
 
-    let circuit = MyCircuit::<Fr> { P, Q, _marker: PhantomData };
+    builder.config(k, Some(20));
+    let circuit = RangeCircuitBuilder::mock(builder);
 
-    let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-    prover.assert_satisfied();
+    MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
 }
 
 #[cfg(feature = "dev-graph")]
-#[cfg(test)]
 #[test]
 fn plot_ecc() {
     let k = 10;
@@ -170,7 +84,14 @@ fn plot_ecc() {
     root.fill(&WHITE).unwrap();
     let root = root.titled("Ecc Layout", ("sans-serif", 60)).unwrap();
 
-    let circuit = MyCircuit::<Fr>::default();
+    let P = G1Affine::random(OsRng);
+    let Q = G1Affine::random(OsRng);
+
+    let mut builder = GateThreadBuilder::<Fr>::keygen();
+    basic_g1_tests(builder.main(0), 22, 88, 3, P, Q);
+
+    builder.config(k, Some(10));
+    let circuit = RangeCircuitBuilder::mock(builder);
 
     halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
 }
diff --git a/halo2-ecc/src/fields/fp.rs b/halo2-ecc/src/fields/fp.rs
index 1329726a..a97f1d11 100644
--- a/halo2-ecc/src/fields/fp.rs
+++ b/halo2-ecc/src/fields/fp.rs
@@ -1,43 +1,33 @@
-use super::{FieldChip, PrimeFieldChip, Selectable};
+use super::{FieldChip, PrimeField, PrimeFieldChip, Selectable};
 use crate::bigint::{
     add_no_carry, big_is_equal, big_is_zero, carry_mod, check_carry_mod_to_zero, mul_no_carry,
     scalar_mul_and_add_no_carry, scalar_mul_no_carry, select, select_by_indicator, sub,
     sub_no_carry, CRTInteger, FixedCRTInteger, OverflowInteger,
 };
-use crate::halo2_proofs::{
-    circuit::{Layouter, Region, Value},
-    halo2curves::CurveAffine,
-    plonk::{ConstraintSystem, Error},
-};
+use crate::halo2_proofs::halo2curves::CurveAffine;
+use halo2_base::gates::RangeChip;
+use halo2_base::utils::decompose_bigint;
 use halo2_base::{
-    gates::{
-        range::{RangeConfig, RangeStrategy},
-        GateInstructions, RangeInstructions,
-    },
-    utils::{
-        bigint_to_fe, biguint_to_fe, bit_length, decompose_bigint_option, decompose_biguint,
-        fe_to_biguint, modulus, PrimeField,
-    },
-    AssignedValue, Context, ContextParams,
+    gates::{range::RangeConfig, GateInstructions, RangeInstructions},
+    utils::{bigint_to_fe, biguint_to_fe, bit_length, decompose_biguint, fe_to_biguint, modulus},
+    AssignedValue, Context,
     QuantumCell::{Constant, Existing},
 };
 use num_bigint::{BigInt, BigUint};
 use num_traits::One;
-use serde::{Deserialize, Serialize};
 use std::{cmp::max, marker::PhantomData};
 
-#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
-pub enum FpStrategy {
-    Simple,
-    SimplePlus,
-}
+pub type BaseFieldChip<'range, C> =
+    FpChip<'range, <C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>;
 
-pub type BaseFieldChip<C> = FpConfig<<C as CurveAffine>::ScalarExt, <C as CurveAffine>::Base>;
+pub type FpConfig<F> = RangeConfig<F>;
+
+// `Fp` always needs to be `BigPrimeField`, we may later want support for `F` being just `ScalarField` but for optimization reasons we'll assume it's also `BigPrimeField` for now
 
 #[derive(Clone, Debug)]
-pub struct FpConfig<F: PrimeField, Fp: PrimeField> {
-    pub range: RangeConfig<F>,
-    // pub bigint_chip: BigIntConfig<F>,
+pub struct FpChip<'range, F: PrimeField, Fp: PrimeField> {
+    pub range: &'range RangeChip<F>,
+
     pub limb_bits: usize,
     pub num_limbs: usize,
 
@@ -55,45 +45,10 @@ pub struct FpConfig<F: PrimeField, Fp: PrimeField> {
     _marker: PhantomData<Fp>,
 }
 
-impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
-    pub fn configure(
-        meta: &mut ConstraintSystem<F>,
-        strategy: FpStrategy,
-        num_advice: &[usize],
-        num_lookup_advice: &[usize],
-        num_fixed: usize,
-        lookup_bits: usize,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-        gate_context_id: usize,
-        k: usize,
-    ) -> Self {
-        let range = RangeConfig::<F>::configure(
-            meta,
-            match strategy {
-                FpStrategy::Simple => RangeStrategy::Vertical,
-                FpStrategy::SimplePlus => RangeStrategy::PlonkPlus,
-            },
-            num_advice,
-            num_lookup_advice,
-            num_fixed,
-            lookup_bits,
-            gate_context_id,
-            k,
-        );
-
-        Self::construct(range, limb_bits, num_limbs, p)
-    }
-
-    pub fn construct(
-        range: RangeConfig<F>,
-        // bigint_chip: BigIntConfig<F>,
-        limb_bits: usize,
-        num_limbs: usize,
-        p: BigUint,
-    ) -> Self {
+impl<'range, F: PrimeField, Fp: PrimeField> FpChip<'range, F, Fp> {
+    pub fn new(range: &'range RangeChip<F>, limb_bits: usize, num_limbs: usize) -> Self {
         let limb_mask = (BigUint::from(1u64) << limb_bits) - 1usize;
+        let p = modulus::<Fp>();
         let p_limbs = decompose_biguint(&p, num_limbs, limb_bits);
         let native_modulus = modulus::<F>();
         let p_native = biguint_to_fe(&(&p % &native_modulus));
@@ -105,9 +60,8 @@ impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
             limb_bases.push(limb_base * limb_bases.last().unwrap());
         }
 
-        FpConfig {
+        Self {
             range,
-            // bigint_chip,
             limb_bits,
             num_limbs,
             num_limbs_bits: bit_length(num_limbs as u64),
@@ -123,54 +77,29 @@ impl<F: PrimeField, Fp: PrimeField> FpConfig<F, Fp> {
         }
     }
 
-    pub fn new_context<'a, 'b>(&'b self, region: Region<'a, F>) -> Context<'a, F> {
-        Context::new(
-            region,
-            ContextParams {
-                max_rows: self.range.gate.max_rows,
-                num_context_ids: 1,
-                fixed_columns: self.range.gate.constants.clone(),
-            },
-        )
-    }
-
-    pub fn load_lookup_table(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
-        self.range.load_lookup_table(layouter)
-    }
-
-    pub fn enforce_less_than_p<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) {
+    pub fn enforce_less_than_p(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) {
         // a < p iff a - p has underflow
         let mut borrow: Option<AssignedValue<F>> = None;
-        for (p_limb, a_limb) in self.p_limbs.iter().zip(a.truncation.limbs.iter()) {
+        for (&p_limb, &a_limb) in self.p_limbs.iter().zip(a.truncation.limbs.iter()) {
             let lt = match borrow {
-                None => self.range.is_less_than(
-                    ctx,
-                    Existing(a_limb),
-                    Constant(*p_limb),
-                    self.limb_bits,
-                ),
+                None => self.range.is_less_than(ctx, a_limb, Constant(p_limb), self.limb_bits),
                 Some(borrow) => {
-                    let plus_borrow =
-                        self.range.gate.add(ctx, Constant(*p_limb), Existing(&borrow));
+                    let plus_borrow = self.range.gate.add(ctx, Constant(p_limb), borrow);
                     self.range.is_less_than(
                         ctx,
                         Existing(a_limb),
-                        Existing(&plus_borrow),
+                        Existing(plus_borrow),
                         self.limb_bits,
                     )
                 }
             };
             borrow = Some(lt);
         }
-        self.range.gate.assert_is_const(ctx, &borrow.unwrap(), F::one())
-    }
-
-    pub fn finalize(&self, ctx: &mut Context<'_, F>) -> usize {
-        self.range.finalize(ctx)
+        self.range.gate.assert_is_const(ctx, &borrow.unwrap(), &F::one());
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpConfig<F, Fp> {
+impl<'range, F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpChip<'range, F, Fp> {
     fn num_limbs(&self) -> usize {
         self.num_limbs
     }
@@ -182,46 +111,45 @@ impl<F: PrimeField, Fp: PrimeField> PrimeFieldChip<F> for FpConfig<F, Fp> {
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
+impl<'range, F: PrimeField, Fp: PrimeField> FieldChip<F> for FpChip<'range, F, Fp> {
     const PRIME_FIELD_NUM_BITS: u32 = Fp::NUM_BITS;
     type ConstantType = BigUint;
-    type WitnessType = Value<BigInt>;
-    type FieldPoint<'v> = CRTInteger<'v, F>;
+    type WitnessType = BigInt;
+    type FieldPoint = CRTInteger<F>;
     type FieldType = Fp;
-    type RangeChip = RangeConfig<F>;
+    type RangeChip = RangeChip<F>;
 
     fn native_modulus(&self) -> &BigUint {
         &self.native_modulus
     }
-    fn range(&self) -> &Self::RangeChip {
-        &self.range
+    fn range(&self) -> &'range Self::RangeChip {
+        self.range
     }
     fn limb_bits(&self) -> usize {
         self.limb_bits
     }
 
-    fn get_assigned_value(&self, x: &CRTInteger<F>) -> Value<Fp> {
-        x.value.as_ref().map(|x| bigint_to_fe::<Fp>(&(x % &self.p)))
+    fn get_assigned_value(&self, x: &CRTInteger<F>) -> Fp {
+        bigint_to_fe(&(&x.value % &self.p))
     }
 
     fn fe_to_constant(x: Fp) -> BigUint {
         fe_to_biguint(&x)
     }
 
-    fn fe_to_witness(x: &Value<Fp>) -> Value<BigInt> {
-        x.map(|x| BigInt::from(fe_to_biguint(&x)))
+    fn fe_to_witness(x: &Fp) -> BigInt {
+        BigInt::from(fe_to_biguint(x))
     }
 
-    fn load_private<'v>(&self, ctx: &mut Context<'_, F>, a: Value<BigInt>) -> CRTInteger<'v, F> {
-        let a_vec = decompose_bigint_option::<F>(a.as_ref(), self.num_limbs, self.limb_bits);
-        let limbs = self.range.gate().assign_witnesses(ctx, a_vec);
+    fn load_private(&self, ctx: &mut Context<F>, a: BigInt) -> CRTInteger<F> {
+        let a_vec = decompose_bigint::<F>(&a, self.num_limbs, self.limb_bits);
+        let limbs = ctx.assign_witnesses(a_vec);
 
         let a_native = OverflowInteger::<F>::evaluate(
             self.range.gate(),
-            //&self.bigint_chip,
             ctx,
-            &limbs,
-            self.limb_bases.iter().cloned(),
+            limbs.iter().copied(),
+            self.limb_bases.iter().copied(),
         );
 
         let a_loaded =
@@ -232,62 +160,57 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         a_loaded
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, a: BigUint) -> CRTInteger<'v, F> {
-        let a_native = self.range.gate.assign_region_last(
-            ctx,
-            vec![Constant(biguint_to_fe(&(&a % modulus::<F>())))],
-            vec![],
-        );
-        let a_limbs = self.range.gate().assign_region(
-            ctx,
-            decompose_biguint::<F>(&a, self.num_limbs, self.limb_bits).into_iter().map(Constant),
-            vec![],
-        );
+    fn load_constant(&self, ctx: &mut Context<F>, a: BigUint) -> CRTInteger<F> {
+        let a_native = ctx.load_constant(biguint_to_fe(&(&a % self.native_modulus())));
+        let a_limbs = decompose_biguint::<F>(&a, self.num_limbs, self.limb_bits)
+            .into_iter()
+            .map(|c| ctx.load_constant(c))
+            .collect();
 
         CRTInteger::construct(
             OverflowInteger::construct(a_limbs, self.limb_bits),
             a_native,
-            Value::known(BigInt::from(a)),
+            BigInt::from(a),
         )
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         add_no_carry::crt::<F>(self.range.gate(), ctx, a, b)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         c: BigUint,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         let c = FixedCRTInteger::from_native(c, self.num_limbs, self.limb_bits);
         let c_native = biguint_to_fe::<F>(&(&c.value % modulus::<F>()));
         let mut limbs = Vec::with_capacity(a.truncation.limbs.len());
         for (a_limb, c_limb) in a.truncation.limbs.iter().zip(c.truncation.limbs.into_iter()) {
-            let limb = self.range.gate.add(ctx, Existing(a_limb), Constant(c_limb));
+            let limb = self.range.gate.add(ctx, *a_limb, Constant(c_limb));
             limbs.push(limb);
         }
-        let native = self.range.gate.add(ctx, Existing(&a.native), Constant(c_native));
+        let native = self.range.gate.add(ctx, a.native, Constant(c_native));
         let trunc =
             OverflowInteger::construct(limbs, max(a.truncation.max_limb_bits, self.limb_bits) + 1);
-        let value = a.value.as_ref().map(|a| a + BigInt::from(c.value));
+        let value = &a.value + BigInt::from(c.value);
 
         CRTInteger::construct(trunc, native, value)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         sub_no_carry::crt::<F>(self.range.gate(), ctx, a, b)
     }
 
@@ -295,47 +218,47 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
     // Output: p - a if a != 0, else a
     // Assume the actual value of `a` equals `a.truncation`
     // Constrains a.truncation <= p using subtraction with carries
-    fn negate<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> CRTInteger<'v, F> {
+    fn negate(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> CRTInteger<F> {
         // Compute p - a.truncation using carries
         let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
         let (out_or_p, underflow) =
             sub::crt::<F>(self.range(), ctx, &p, a, self.limb_bits, self.limb_bases[1]);
         // constrain underflow to equal 0
-        self.range.gate.assert_is_const(ctx, &underflow, F::zero());
+        self.range.gate.assert_is_const(ctx, &underflow, &F::zero());
 
         let a_is_zero = big_is_zero::assign::<F>(self.gate(), ctx, &a.truncation);
-        select::crt::<F>(self.range.gate(), ctx, a, &out_or_p, &a_is_zero)
+        select::crt::<F>(self.range.gate(), ctx, a, &out_or_p, a_is_zero)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         c: i64,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         scalar_mul_no_carry::crt::<F>(self.range.gate(), ctx, a, c)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
         c: i64,
-    ) -> CRTInteger<'v, F> {
+    ) -> CRTInteger<F> {
         scalar_mul_and_add_no_carry::crt::<F>(self.range.gate(), ctx, a, b, c)
     }
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+    ) -> CRTInteger<F> {
         mul_no_carry::crt::<F>(self.range.gate(), ctx, a, b, self.num_limbs_log2_ceil)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) {
         check_carry_mod_to_zero::crt::<F>(
             self.range(),
             // &self.bigint_chip,
@@ -351,7 +274,7 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         )
     }
 
-    fn carry_mod<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> CRTInteger<'v, F> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> CRTInteger<F> {
         carry_mod::crt::<F>(
             self.range(),
             // &self.bigint_chip,
@@ -367,10 +290,10 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         )
     }
 
-    fn range_check<'v>(
+    fn range_check(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
         max_bits: usize, // the maximum bits that a.value could take
     ) {
         let n = self.limb_bits;
@@ -379,111 +302,97 @@ impl<F: PrimeField, Fp: PrimeField> FieldChip<F> for FpConfig<F, Fp> {
         let last_limb_bits = max_bits - n * (k - 1);
 
         #[cfg(debug_assertions)]
-        a.value.as_ref().map(|v| {
-            debug_assert!(v.bits() as usize <= max_bits);
-        });
+        debug_assert!(a.value.bits() as usize <= max_bits);
 
         // range check limbs of `a` are in [0, 2^n) except last limb should be in [0, 2^last_limb_bits)
         for (i, cell) in a.truncation.limbs.iter().enumerate() {
             let limb_bits = if i == k - 1 { last_limb_bits } else { n };
-            self.range.range_check(ctx, cell, limb_bits);
+            self.range.range_check(ctx, *cell, limb_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         self.enforce_less_than_p(ctx, a)
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
-    ) -> AssignedValue<'v, F> {
-        let is_zero = big_is_zero::crt::<F>(self.gate(), ctx, a);
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
+        big_is_zero::crt::<F>(self.gate(), ctx, a)
 
+        // CHECK: I don't think this is necessary:
         // underflow != 0 iff carry < p
-        let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
-        let (_, underflow) =
-            sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
-        let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
-        let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
+        // let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
+        // let (_, underflow) =
+        //     sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
+        // let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
+        // let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
 
-        self.gate().and(ctx, Existing(&is_zero), Existing(&range_check))
+        // self.gate().and(ctx, is_zero, range_check)
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &CRTInteger<'v, F>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
         let is_zero = big_is_zero::crt::<F>(self.gate(), ctx, a);
-        let is_nonzero = self.gate().not(ctx, Existing(&is_zero));
+        let is_nonzero = self.gate().not(ctx, is_zero);
 
         // underflow != 0 iff carry < p
         let p = self.load_constant(ctx, self.p.to_biguint().unwrap());
         let (_, underflow) =
             sub::crt::<F>(self.range(), ctx, a, &p, self.limb_bits, self.limb_bases[1]);
-        let is_underflow_zero = self.gate().is_zero(ctx, &underflow);
-        let range_check = self.gate().not(ctx, Existing(&is_underflow_zero));
+        let is_underflow_zero = self.gate().is_zero(ctx, underflow);
+        let range_check = self.gate().not(ctx, is_underflow_zero);
 
-        self.gate().and(ctx, Existing(&is_nonzero), Existing(&range_check))
+        self.gate().and(ctx, is_nonzero, range_check)
     }
 
     // assuming `a` has been range checked to be a proper BigInt
     // constrain the witness `a` to be `< p`
     // then check if `a` is 0
-    fn is_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &CRTInteger<'v, F>) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &CRTInteger<F>) -> AssignedValue<F> {
         self.enforce_less_than_p(ctx, a);
         // just check truncated limbs are all 0 since they determine the native value
         big_is_zero::positive::<F>(self.gate(), ctx, &a.truncation)
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         big_is_equal::assign::<F>(self.gate(), ctx, &a.truncation, &b.truncation)
     }
 
     // assuming `a, b` have been range checked to be a proper BigInt
     // constrain the witnesses `a, b` to be `< p`
     // then assert `a == b` as BigInts
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         self.enforce_less_than_p(ctx, a);
         self.enforce_less_than_p(ctx, b);
         // a.native and b.native are derived from `a.truncation, b.truncation`, so no need to check if they're equal
         for (limb_a, limb_b) in a.truncation.limbs.iter().zip(a.truncation.limbs.iter()) {
-            self.range.gate.assert_equal(ctx, Existing(limb_a), Existing(limb_b));
+            ctx.constrain_equal(limb_a, limb_b);
         }
     }
 }
 
-impl<F: PrimeField, Fp: PrimeField> Selectable<F> for FpConfig<F, Fp> {
-    type Point<'v> = CRTInteger<'v, F>;
+impl<'range, F: PrimeField, Fp: PrimeField> Selectable<F> for FpChip<'range, F, Fp> {
+    type Point = CRTInteger<F>;
 
-    fn select<'v>(
+    fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &CRTInteger<'v, F>,
-        b: &CRTInteger<'v, F>,
-        sel: &AssignedValue<'v, F>,
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &CRTInteger<F>,
+        b: &CRTInteger<F>,
+        sel: AssignedValue<F>,
+    ) -> CRTInteger<F> {
         select::crt::<F>(self.range.gate(), ctx, a, b, sel)
     }
 
-    fn select_by_indicator<'v>(
+    fn select_by_indicator(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &[CRTInteger<'v, F>],
-        coeffs: &[AssignedValue<'v, F>],
-    ) -> CRTInteger<'v, F> {
+        ctx: &mut Context<F>,
+        a: &[CRTInteger<F>],
+        coeffs: &[AssignedValue<F>],
+    ) -> CRTInteger<F> {
         select_by_indicator::crt::<F>(self.range.gate(), ctx, a, coeffs, &self.limb_bases)
     }
 }
diff --git a/halo2-ecc/src/fields/fp12.rs b/halo2-ecc/src/fields/fp12.rs
index f130fd52..b82305ca 100644
--- a/halo2-ecc/src/fields/fp12.rs
+++ b/halo2-ecc/src/fields/fp12.rs
@@ -1,10 +1,9 @@
-use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeFieldChip};
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
+use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeField, PrimeFieldChip};
+use crate::halo2_proofs::arithmetic::Field;
 use halo2_base::{
     gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, value_to_option, PrimeField},
+    utils::fe_to_biguint,
     AssignedValue, Context,
-    QuantumCell::Existing,
 };
 use num_bigint::{BigInt, BigUint};
 use std::marker::PhantomData;
@@ -15,6 +14,7 @@ use std::marker::PhantomData;
 /// be irreducible over Fp; i.e., in order for -1 to not be a square (quadratic residue) in Fp
 /// This means we store an Fp12 point as `\sum_{i = 0}^6 (a_{i0} + a_{i1} * u) * w^i`
 /// This is encoded in an FqPoint of degree 12 as `(a_{00}, ..., a_{50}, a_{01}, ..., a_{51})`
+#[derive(Clone, Copy, Debug)]
 pub struct Fp12Chip<'a, F: PrimeField, FpChip: PrimeFieldChip<F>, Fp12: Field, const XI_0: i64>
 where
     FpChip::FieldType: PrimeField,
@@ -34,16 +34,16 @@ where
     Fp12: Field + FieldExtConstructor<FpChip::FieldType, 12>,
 {
     /// User must construct an `FpChip` first using a config. This is intended so everything shares a single `FlexGateChip`, which is needed for the column allocation to work.
-    pub fn construct(fp_chip: &'a FpChip) -> Self {
+    pub fn new(fp_chip: &'a FpChip) -> Self {
         Self { fp_chip, _f: PhantomData, _fp12: PhantomData }
     }
 
-    pub fn fp2_mul_no_carry<'v>(
+    pub fn fp2_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        fp2_pt: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        fp2_pt: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 12);
         assert_eq!(fp2_pt.coeffs.len(), 2);
 
@@ -64,11 +64,11 @@ where
     }
 
     // for \sum_i (a_i + b_i u) w^i, returns \sum_i (-1)^i (a_i + b_i u) w^i
-    pub fn conjugate<'v>(
+    pub fn conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 12);
 
         let coeffs = a
@@ -82,11 +82,11 @@ where
 }
 
 /// multiply (a0 + a1 * u) * (XI0 + u) without carry
-pub fn mul_no_carry_w6<'v, F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
+pub fn mul_no_carry_w6<F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
     fp_chip: &FC,
-    ctx: &mut Context<'v, F>,
-    a: &FieldExtPoint<FC::FieldPoint<'v>>,
-) -> FieldExtPoint<FC::FieldPoint<'v>> {
+    ctx: &mut Context<F>,
+    a: &FieldExtPoint<FC::FieldPoint>,
+) -> FieldExtPoint<FC::FieldPoint> {
     assert_eq!(a.coeffs.len(), 2);
     let (a0, a1) = (&a.coeffs[0], &a.coeffs[1]);
     // (a0 + a1 u) * (XI_0 + u) = (a0 * XI_0 - a1) + (a1 * XI_0 + a0) u     with u^2 = -1
@@ -97,17 +97,18 @@ pub fn mul_no_carry_w6<'v, F: PrimeField, FC: FieldChip<F>, const XI_0: i64>(
     FieldExtPoint::construct(vec![out0_0_nocarry, out0_1_nocarry])
 }
 
+// a lot of this is common to any field extension (lots of for loops), but due to the way rust traits work, it is hard to create a common generic trait that does this. The main problem is that if you had a `FieldExtCommon` trait and wanted to implement `FieldChip` for anything with `FieldExtCommon`, rust will stop you because someone could implement `FieldExtCommon` and `FieldChip` for the same type, causing a conflict.
 impl<'a, F, FpChip, Fp12, const XI_0: i64> FieldChip<F> for Fp12Chip<'a, F, FpChip, Fp12, XI_0>
 where
     F: PrimeField,
-    FpChip: PrimeFieldChip<F, WitnessType = Value<BigInt>, ConstantType = BigUint>,
+    FpChip: PrimeFieldChip<F, WitnessType = BigInt, ConstantType = BigUint>,
     FpChip::FieldType: PrimeField,
     Fp12: Field + FieldExtConstructor<FpChip::FieldType, 12>,
 {
     const PRIME_FIELD_NUM_BITS: u32 = FpChip::FieldType::NUM_BITS;
     type ConstantType = Fp12;
-    type WitnessType = Vec<Value<BigInt>>;
-    type FieldPoint<'v> = FieldExtPoint<FpChip::FieldPoint<'v>>;
+    type WitnessType = Vec<BigInt>;
+    type FieldPoint = FieldExtPoint<FpChip::FieldPoint>;
     type FieldType = Fp12;
     type RangeChip = FpChip::RangeChip;
 
@@ -122,30 +123,21 @@ where
         self.fp_chip.limb_bits()
     }
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Fp12> {
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Fp12 {
         assert_eq!(x.coeffs.len(), 12);
-        let values = x.coeffs.iter().map(|v| self.fp_chip.get_assigned_value(v));
-        let values_collected: Value<Vec<FpChip::FieldType>> = values.into_iter().collect();
-        values_collected.map(|c| Fp12::new(c.try_into().unwrap()))
+        let values =
+            x.coeffs.iter().map(|v| self.fp_chip.get_assigned_value(v)).collect::<Vec<_>>();
+        Fp12::new(values.try_into().unwrap())
     }
 
     fn fe_to_constant(x: Self::FieldType) -> Self::ConstantType {
         x
     }
-    fn fe_to_witness(x: &Value<Fp12>) -> Vec<Value<BigInt>> {
-        match value_to_option(*x) {
-            Some(x) => {
-                x.coeffs().iter().map(|c| Value::known(BigInt::from(fe_to_biguint(c)))).collect()
-            }
-            None => vec![Value::unknown(); 12],
-        }
+    fn fe_to_witness(x: &Fp12) -> Vec<BigInt> {
+        x.coeffs().iter().map(|c| BigInt::from(fe_to_biguint(c))).collect()
     }
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Vec<Value<BigInt>>,
-    ) -> Self::FieldPoint<'v> {
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Vec<BigInt>) -> Self::FieldPoint {
         assert_eq!(coeffs.len(), 12);
         let mut assigned_coeffs = Vec::with_capacity(12);
         for a in coeffs {
@@ -155,7 +147,7 @@ where
         Self::FieldPoint::construct(assigned_coeffs)
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, c: Fp12) -> Self::FieldPoint<'v> {
+    fn load_constant(&self, ctx: &mut Context<F>, c: Fp12) -> Self::FieldPoint {
         let mut assigned_coeffs = Vec::with_capacity(12);
         for a in &c.coeffs() {
             let assigned_coeff = self.fp_chip.load_constant(ctx, fe_to_biguint(a));
@@ -165,12 +157,12 @@ where
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -180,12 +172,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let c_coeffs = c.coeffs();
         assert_eq!(a.coeffs.len(), c_coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
@@ -196,12 +188,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -211,11 +203,7 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let out_coeff = self.fp_chip.negate(ctx, a_coeff);
@@ -224,12 +212,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff = self.fp_chip.scalar_mul_no_carry(ctx, &a.coeffs[i], c);
@@ -238,13 +226,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff =
@@ -255,12 +243,12 @@ where
     }
 
     // w^6 = u + xi for xi = 9
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), 12);
         assert_eq!(b.coeffs.len(), 12);
 
@@ -341,17 +329,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for coeff in &a.coeffs {
             self.fp_chip.check_carry_mod_to_zero(ctx, coeff);
         }
     }
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.carry_mod(ctx, a_coeff);
@@ -360,28 +344,24 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize) {
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize) {
         for a_coeff in &a.coeffs {
             self.fp_chip.range_check(ctx, a_coeff, max_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for a_coeff in &a.coeffs {
             self.fp_chip.enforce_less_than(ctx, a_coeff)
         }
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.fp_chip.range().gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -390,16 +370,12 @@ where
         prev.unwrap()
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_nonzero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().or(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().or(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -408,16 +384,12 @@ where
         prev.unwrap()
     }
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -426,17 +398,17 @@ where
         prev.unwrap()
     }
 
-    fn is_equal<'v>(
+    fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -444,17 +416,17 @@ where
         acc.unwrap()
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal_unenforced(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -462,12 +434,7 @@ where
         acc.unwrap()
     }
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             self.fp_chip.assert_equal(ctx, a_coeff, b_coeff);
         }
diff --git a/halo2-ecc/src/fields/fp2.rs b/halo2-ecc/src/fields/fp2.rs
index 633ae6fa..aed390fa 100644
--- a/halo2-ecc/src/fields/fp2.rs
+++ b/halo2-ecc/src/fields/fp2.rs
@@ -1,11 +1,8 @@
-use super::{FieldChip, FieldExtConstructor, FieldExtPoint, PrimeFieldChip, Selectable};
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
-use halo2_base::{
-    gates::{GateInstructions, RangeInstructions},
-    utils::{fe_to_biguint, value_to_option, PrimeField},
-    AssignedValue, Context,
-    QuantumCell::Existing,
+use super::{
+    FieldChip, FieldExtConstructor, FieldExtPoint, PrimeField, PrimeFieldChip, Selectable,
 };
+use crate::halo2_proofs::arithmetic::Field;
+use halo2_base::{gates::GateInstructions, utils::fe_to_biguint, AssignedValue, Context};
 use num_bigint::{BigInt, BigUint};
 use std::marker::PhantomData;
 
@@ -13,7 +10,7 @@ use std::marker::PhantomData;
 /// `Fp2 = Fp[u] / (u^2 + 1)`
 /// This implementation assumes p = 3 (mod 4) in order for the polynomial u^2 + 1 to be irreducible over Fp; i.e., in order for -1 to not be a square (quadratic residue) in Fp
 /// This means we store an Fp2 point as `a_0 + a_1 * u` where `a_0, a_1 in Fp`
-#[derive(Clone, Debug)]
+#[derive(Clone, Copy, Debug)]
 pub struct Fp2Chip<'a, F: PrimeField, FpChip: PrimeFieldChip<F>, Fp2: Field>
 where
     FpChip::FieldType: PrimeField,
@@ -33,16 +30,16 @@ where
     Fp2: Field + FieldExtConstructor<FpChip::FieldType, 2>,
 {
     /// User must construct an `FpChip` first using a config. This is intended so everything shares a single `FlexGateChip`, which is needed for the column allocation to work.
-    pub fn construct(fp_chip: &'a FpChip) -> Self {
+    pub fn new(fp_chip: &'a FpChip) -> Self {
         Self { fp_chip, _f: PhantomData, _fp2: PhantomData }
     }
 
-    pub fn fp_mul_no_carry<'v>(
+    pub fn fp_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        fp_point: &FpChip::FieldPoint<'v>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        fp_point: &FpChip::FieldPoint,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let mut out_coeffs = Vec::with_capacity(2);
@@ -53,37 +50,37 @@ where
         FieldExtPoint::construct(out_coeffs)
     }
 
-    pub fn conjugate<'v>(
+    pub fn conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let neg_a1 = self.fp_chip.negate(ctx, &a.coeffs[1]);
         FieldExtPoint::construct(vec![a.coeffs[0].clone(), neg_a1])
     }
 
-    pub fn neg_conjugate<'v>(
+    pub fn neg_conjugate(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>> {
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+    ) -> FieldExtPoint<FpChip::FieldPoint> {
         assert_eq!(a.coeffs.len(), 2);
 
         let neg_a0 = self.fp_chip.negate(ctx, &a.coeffs[0]);
         FieldExtPoint::construct(vec![neg_a0, a.coeffs[1].clone()])
     }
 
-    pub fn select<'v>(
+    pub fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        b: &FieldExtPoint<FpChip::FieldPoint<'v>>,
-        sel: &AssignedValue<'v, F>,
-    ) -> FieldExtPoint<FpChip::FieldPoint<'v>>
+        ctx: &mut Context<F>,
+        a: &FieldExtPoint<FpChip::FieldPoint>,
+        b: &FieldExtPoint<FpChip::FieldPoint>,
+        sel: AssignedValue<F>,
+    ) -> FieldExtPoint<FpChip::FieldPoint>
     where
-        FpChip: Selectable<F, Point<'v> = FpChip::FieldPoint<'v>>,
+        FpChip: Selectable<F, Point = FpChip::FieldPoint>,
     {
         let coeffs: Vec<_> = a
             .coeffs
@@ -99,13 +96,13 @@ impl<'a, F, FpChip, Fp2> FieldChip<F> for Fp2Chip<'a, F, FpChip, Fp2>
 where
     F: PrimeField,
     FpChip::FieldType: PrimeField,
-    FpChip: PrimeFieldChip<F, WitnessType = Value<BigInt>, ConstantType = BigUint>,
+    FpChip: PrimeFieldChip<F, WitnessType = BigInt, ConstantType = BigUint>,
     Fp2: Field + FieldExtConstructor<FpChip::FieldType, 2>,
 {
     const PRIME_FIELD_NUM_BITS: u32 = FpChip::FieldType::NUM_BITS;
     type ConstantType = Fp2;
-    type WitnessType = Vec<Value<BigInt>>;
-    type FieldPoint<'v> = FieldExtPoint<FpChip::FieldPoint<'v>>;
+    type WitnessType = Vec<BigInt>;
+    type FieldPoint = FieldExtPoint<FpChip::FieldPoint>;
     type FieldType = Fp2;
     type RangeChip = FpChip::RangeChip;
 
@@ -120,34 +117,25 @@ where
         self.fp_chip.limb_bits()
     }
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Fp2> {
-        assert_eq!(x.coeffs.len(), 2);
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Fp2 {
+        debug_assert_eq!(x.coeffs.len(), 2);
         let c0 = self.fp_chip.get_assigned_value(&x.coeffs[0]);
         let c1 = self.fp_chip.get_assigned_value(&x.coeffs[1]);
-        c0.zip(c1).map(|(c0, c1)| Fp2::new([c0, c1]))
+        Fp2::new([c0, c1])
     }
 
     fn fe_to_constant(x: Fp2) -> Fp2 {
         x
     }
 
-    fn fe_to_witness(x: &Value<Fp2>) -> Vec<Value<BigInt>> {
-        match value_to_option(*x) {
-            None => vec![Value::unknown(), Value::unknown()],
-            Some(x) => {
-                let coeffs = x.coeffs();
-                assert_eq!(coeffs.len(), 2);
-                coeffs.iter().map(|c| Value::known(BigInt::from(fe_to_biguint(c)))).collect()
-            }
-        }
+    fn fe_to_witness(x: &Fp2) -> Vec<BigInt> {
+        let coeffs = x.coeffs();
+        debug_assert_eq!(coeffs.len(), 2);
+        coeffs.iter().map(|c| BigInt::from(fe_to_biguint(c))).collect()
     }
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Vec<Value<BigInt>>,
-    ) -> Self::FieldPoint<'v> {
-        assert_eq!(coeffs.len(), 2);
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Vec<BigInt>) -> Self::FieldPoint {
+        debug_assert_eq!(coeffs.len(), 2);
         let mut assigned_coeffs = Vec::with_capacity(2);
         for a in coeffs {
             let assigned_coeff = self.fp_chip.load_private(ctx, a);
@@ -156,7 +144,7 @@ where
         Self::FieldPoint::construct(assigned_coeffs)
     }
 
-    fn load_constant<'v>(&self, ctx: &mut Context<'_, F>, c: Fp2) -> Self::FieldPoint<'v> {
+    fn load_constant(&self, ctx: &mut Context<F>, c: Fp2) -> Self::FieldPoint {
         let mut assigned_coeffs = Vec::with_capacity(2);
         for a in &c.coeffs() {
             let assigned_coeff = self.fp_chip.load_constant(ctx, fe_to_biguint(a));
@@ -166,12 +154,12 @@ where
     }
 
     // signed overflow BigInt functions
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -181,12 +169,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let c_coeffs = c.coeffs();
         assert_eq!(a.coeffs.len(), c_coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
@@ -197,12 +185,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
@@ -212,11 +200,7 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let out_coeff = self.fp_chip.negate(ctx, a_coeff);
@@ -225,12 +209,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff = self.fp_chip.scalar_mul_no_carry(ctx, &a.coeffs[i], c);
@@ -239,13 +223,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v> {
+    ) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for i in 0..a.coeffs.len() {
             let coeff =
@@ -255,12 +239,12 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         assert_eq!(a.coeffs.len(), b.coeffs.len());
         // (a_0 + a_1 * u) * (b_0 + b_1 * u) = (a_0 b_0 - a_1 b_1) + (a_0 b_1 + a_1 b_0) * u
         let mut ab_coeffs = Vec::with_capacity(a.coeffs.len() * b.coeffs.len());
@@ -282,17 +266,13 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for coeff in &a.coeffs {
             self.fp_chip.check_carry_mod_to_zero(ctx, coeff);
         }
     }
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint {
         let mut out_coeffs = Vec::with_capacity(a.coeffs.len());
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.carry_mod(ctx, a_coeff);
@@ -301,28 +281,24 @@ where
         Self::FieldPoint::construct(out_coeffs)
     }
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize) {
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize) {
         for a_coeff in &a.coeffs {
             self.fp_chip.range_check(ctx, a_coeff, max_bits);
         }
     }
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>) {
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) {
         for a_coeff in &a.coeffs {
             self.fp_chip.enforce_less_than(ctx, a_coeff)
         }
     }
 
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -331,16 +307,12 @@ where
         prev.unwrap()
     }
 
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_soft_nonzero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().or(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().or(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -349,16 +321,12 @@ where
         prev.unwrap()
     }
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F> {
         let mut prev = None;
         for a_coeff in &a.coeffs {
             let coeff = self.fp_chip.is_zero(ctx, a_coeff);
             if let Some(p) = prev {
-                let new = self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&p));
+                let new = self.gate().and(ctx, coeff, p);
                 prev = Some(new);
             } else {
                 prev = Some(coeff);
@@ -367,17 +335,17 @@ where
         prev.unwrap()
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         let mut acc = None;
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             let coeff = self.fp_chip.is_equal_unenforced(ctx, a_coeff, b_coeff);
             if let Some(c) = acc {
-                acc = Some(self.fp_chip.range().gate().and(ctx, Existing(&coeff), Existing(&c)));
+                acc = Some(self.gate().and(ctx, coeff, c));
             } else {
                 acc = Some(coeff);
             }
@@ -385,12 +353,7 @@ where
         acc.unwrap()
     }
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) {
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint) {
         for (a_coeff, b_coeff) in a.coeffs.iter().zip(b.coeffs.iter()) {
             self.fp_chip.assert_equal(ctx, a_coeff, b_coeff)
         }
diff --git a/halo2-ecc/src/fields/mod.rs b/halo2-ecc/src/fields/mod.rs
index e5e65f16..cdae8275 100644
--- a/halo2-ecc/src/fields/mod.rs
+++ b/halo2-ecc/src/fields/mod.rs
@@ -1,6 +1,11 @@
-use crate::halo2_proofs::{arithmetic::Field, circuit::Value};
-use halo2_base::{gates::RangeInstructions, utils::PrimeField, AssignedValue, Context};
+use crate::halo2_proofs::arithmetic::Field;
+use halo2_base::{
+    gates::RangeInstructions,
+    utils::{BigPrimeField, ScalarField},
+    AssignedValue, Context,
+};
 use num_bigint::BigUint;
+use serde::{Deserialize, Serialize};
 use std::fmt::Debug;
 
 pub mod fp;
@@ -10,6 +15,8 @@ pub mod fp2;
 #[cfg(test)]
 mod tests;
 
+pub trait PrimeField = BigPrimeField;
+
 #[derive(Clone, Debug)]
 pub struct FieldExtPoint<FieldPoint: Clone + Debug> {
     // `F_q` field extension of `F_p` where `q = p^degree`
@@ -28,12 +35,12 @@ impl<FieldPoint: Clone + Debug> FieldExtPoint<FieldPoint> {
 }
 
 /// Common functionality for finite field chips
-pub trait FieldChip<F: PrimeField> {
+pub trait FieldChip<F: PrimeField>: Clone + Debug + Send + Sync {
     const PRIME_FIELD_NUM_BITS: u32;
 
     type ConstantType: Debug;
     type WitnessType: Debug;
-    type FieldPoint<'v>: Clone + Debug;
+    type FieldPoint: Clone + Debug + Send + Sync;
     // a type implementing `Field` trait to help with witness generation (for example with inverse)
     type FieldType: Field;
     type RangeChip: RangeInstructions<F>;
@@ -45,159 +52,126 @@ pub trait FieldChip<F: PrimeField> {
     fn range(&self) -> &Self::RangeChip;
     fn limb_bits(&self) -> usize;
 
-    fn get_assigned_value(&self, x: &Self::FieldPoint<'_>) -> Value<Self::FieldType>;
+    fn get_assigned_value(&self, x: &Self::FieldPoint) -> Self::FieldType;
 
     fn fe_to_constant(x: Self::FieldType) -> Self::ConstantType;
-    fn fe_to_witness(x: &Value<Self::FieldType>) -> Self::WitnessType;
+    fn fe_to_witness(x: &Self::FieldType) -> Self::WitnessType;
 
-    fn load_private<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Self::WitnessType,
-    ) -> Self::FieldPoint<'v>;
+    fn load_private(&self, ctx: &mut Context<F>, coeffs: Self::WitnessType) -> Self::FieldPoint;
 
-    fn load_constant<'v>(
-        &self,
-        ctx: &mut Context<'_, F>,
-        coeffs: Self::ConstantType,
-    ) -> Self::FieldPoint<'v>;
+    fn load_constant(&self, ctx: &mut Context<F>, coeffs: Self::ConstantType) -> Self::FieldPoint;
 
-    fn add_no_carry<'v>(
+    fn add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
     /// output: `a + c`
-    fn add_constant_no_carry<'v>(
+    fn add_constant_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: Self::ConstantType,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
-    fn sub_no_carry<'v>(
+    fn sub_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
-    fn negate<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+    fn negate(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint;
 
     /// a * c
-    fn scalar_mul_no_carry<'v>(
+    fn scalar_mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
     /// a * c + b
-    fn scalar_mul_and_add_no_carry<'v>(
+    fn scalar_mul_and_add_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
         c: i64,
-    ) -> Self::FieldPoint<'v>;
+    ) -> Self::FieldPoint;
 
-    fn mul_no_carry<'v>(
+    fn mul_no_carry(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint;
 
-    fn check_carry_mod_to_zero<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>);
+    fn check_carry_mod_to_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint);
 
-    fn carry_mod<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v>;
+    fn carry_mod(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> Self::FieldPoint;
 
-    fn range_check<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>, max_bits: usize);
+    fn range_check(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, max_bits: usize);
 
-    fn enforce_less_than<'v>(&self, ctx: &mut Context<'v, F>, a: &Self::FieldPoint<'v>);
+    fn enforce_less_than(&self, ctx: &mut Context<F>, a: &Self::FieldPoint);
 
-    // Assumes the witness for a is 0
-    // Constrains that the underlying big integer is 0 and < p.
+    // Returns 1 iff the underlying big integer for `a` is 0. Otherwise returns 0.
     // For field extensions, checks coordinate-wise.
-    fn is_soft_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_soft_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
-    // Constrains that the underlying big integer is in [1, p - 1].
+    // Constrains that the underlying big integer is in [0, p - 1].
+    // Then returns 1 iff the underlying big integer for `a` is 0. Otherwise returns 0.
     // For field extensions, checks coordinate-wise.
-    fn is_soft_nonzero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_soft_nonzero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
-    fn is_zero<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+    fn is_zero(&self, ctx: &mut Context<F>, a: &Self::FieldPoint) -> AssignedValue<F>;
 
     // assuming `a, b` have been range checked to be a proper BigInt
     // constrain the witnesses `a, b` to be `< p`
     // then check `a == b` as BigInts
-    fn is_equal<'v>(
+    fn is_equal(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F> {
         self.enforce_less_than(ctx, a);
         self.enforce_less_than(ctx, b);
         // a.native and b.native are derived from `a.truncation, b.truncation`, so no need to check if they're equal
         self.is_equal_unenforced(ctx, a, b)
     }
 
-    fn is_equal_unenforced<'v>(
+    fn is_equal_unenforced(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> AssignedValue<'v, F>;
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> AssignedValue<F>;
 
-    fn assert_equal<'v>(
-        &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    );
+    fn assert_equal(&self, ctx: &mut Context<F>, a: &Self::FieldPoint, b: &Self::FieldPoint);
 
-    fn mul<'v>(
+    fn mul(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let no_carry = self.mul_no_carry(ctx, a, b);
         self.carry_mod(ctx, &no_carry)
     }
 
-    fn divide<'v>(
+    fn divide(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let a_val = self.get_assigned_value(a);
         let b_val = self.get_assigned_value(b);
-        let b_inv = b_val.map(|bv| bv.invert().unwrap());
-        let quot_val = a_val.zip(b_inv).map(|(a, bi)| a * bi);
+        let b_inv = b_val.invert().unwrap();
+        let quot_val = a_val * b_inv;
 
         let quot = self.load_private(ctx, Self::fe_to_witness(&quot_val));
 
@@ -211,16 +185,16 @@ pub trait FieldChip<F: PrimeField> {
 
     // constrain and output -a / b
     // this is usually cheaper constraint-wise than computing -a and then (-a) / b separately
-    fn neg_divide<'v>(
+    fn neg_divide(
         &self,
-        ctx: &mut Context<'v, F>,
-        a: &Self::FieldPoint<'v>,
-        b: &Self::FieldPoint<'v>,
-    ) -> Self::FieldPoint<'v> {
+        ctx: &mut Context<F>,
+        a: &Self::FieldPoint,
+        b: &Self::FieldPoint,
+    ) -> Self::FieldPoint {
         let a_val = self.get_assigned_value(a);
         let b_val = self.get_assigned_value(b);
-        let b_inv = b_val.map(|bv| bv.invert().unwrap());
-        let quot_val = a_val.zip(b_inv).map(|(a, b)| -a * b);
+        let b_inv = b_val.invert().unwrap();
+        let quot_val = -a_val * b_inv;
 
         let quot = self.load_private(ctx, Self::fe_to_witness(&quot_val));
         self.range_check(ctx, &quot, Self::PRIME_FIELD_NUM_BITS as usize);
@@ -234,23 +208,23 @@ pub trait FieldChip<F: PrimeField> {
     }
 }
 
-pub trait Selectable<F: PrimeField> {
-    type Point<'v>;
+pub trait Selectable<F: ScalarField> {
+    type Point;
 
-    fn select<'v>(
+    fn select(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &Self::Point<'v>,
-        b: &Self::Point<'v>,
-        sel: &AssignedValue<'v, F>,
-    ) -> Self::Point<'v>;
+        ctx: &mut Context<F>,
+        a: &Self::Point,
+        b: &Self::Point,
+        sel: AssignedValue<F>,
+    ) -> Self::Point;
 
-    fn select_by_indicator<'v>(
+    fn select_by_indicator(
         &self,
-        ctx: &mut Context<'_, F>,
-        a: &[Self::Point<'v>],
-        coeffs: &[AssignedValue<'v, F>],
-    ) -> Self::Point<'v>;
+        ctx: &mut Context<F>,
+        a: &[Self::Point],
+        coeffs: &[AssignedValue<F>],
+    ) -> Self::Point;
 }
 
 // Common functionality for prime field chips
@@ -265,8 +239,13 @@ where
 
 // helper trait so we can actually construct and read the Fp2 struct
 // needs to be implemented for Fp2 struct for use cases below
-pub trait FieldExtConstructor<Fp: PrimeField, const DEGREE: usize> {
+pub trait FieldExtConstructor<Fp: ff::PrimeField, const DEGREE: usize> {
     fn new(c: [Fp; DEGREE]) -> Self;
 
     fn coeffs(&self) -> Vec<Fp>;
 }
+
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
+pub enum FpStrategy {
+    Simple,
+}
diff --git a/halo2-ecc/src/fields/tests.rs b/halo2-ecc/src/fields/tests.rs
index 49cd349e..6e013486 100644
--- a/halo2-ecc/src/fields/tests.rs
+++ b/halo2-ecc/src/fields/tests.rs
@@ -1,120 +1,55 @@
 mod fp {
-    use crate::fields::{
-        fp::{FpConfig, FpStrategy},
-        FieldChip,
-    };
+    use crate::fields::fp::FpChip;
+    use crate::fields::{FieldChip, PrimeField};
     use crate::halo2_proofs::{
-        circuit::*,
         dev::MockProver,
         halo2curves::bn256::{Fq, Fr},
-        plonk::*,
-    };
-    use group::ff::Field;
-    use halo2_base::{
-        utils::{fe_to_biguint, modulus, PrimeField},
-        SKIP_FIRST_PASS,
     };
-    use num_bigint::BigInt;
+    use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+    use halo2_base::gates::RangeChip;
+    use halo2_base::utils::biguint_to_fe;
+    use halo2_base::utils::{fe_to_biguint, modulus};
+    use halo2_base::Context;
     use rand::rngs::OsRng;
-    use std::marker::PhantomData;
-
-    #[derive(Default)]
-    struct MyCircuit<F> {
-        a: Value<Fq>,
-        b: Value<Fq>,
-        _marker: PhantomData<F>,
-    }
 
-    const NUM_ADVICE: usize = 1;
-    const NUM_FIXED: usize = 1;
     const K: usize = 10;
 
-    impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-        type Config = FpConfig<F, Fq>;
-        type FloorPlanner = SimpleFloorPlanner;
-
-        fn without_witnesses(&self) -> Self {
-            Self::default()
-        }
-
-        fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-            FpConfig::<F, _>::configure(
-                meta,
-                FpStrategy::Simple,
-                &[NUM_ADVICE],
-                &[1],
-                NUM_FIXED,
-                9,
-                88,
-                3,
-                modulus::<Fq>(),
-                0,
-                K,
-            )
-        }
-
-        fn synthesize(
-            &self,
-            chip: Self::Config,
-            mut layouter: impl Layouter<F>,
-        ) -> Result<(), Error> {
-            chip.load_lookup_table(&mut layouter)?;
-
-            let mut first_pass = SKIP_FIRST_PASS;
-
-            layouter.assign_region(
-                || "fp",
-                |region| {
-                    if first_pass {
-                        first_pass = false;
-                        return Ok(());
-                    }
-
-                    let mut aux = chip.new_context(region);
-                    let ctx = &mut aux;
-
-                    let a_assigned =
-                        chip.load_private(ctx, self.a.map(|a| BigInt::from(fe_to_biguint(&a))));
-                    let b_assigned =
-                        chip.load_private(ctx, self.b.map(|b| BigInt::from(fe_to_biguint(&b))));
-
-                    // test fp_multiply
-                    {
-                        chip.mul(ctx, &a_assigned, &b_assigned);
-                    }
-
-                    // IMPORTANT: this copies advice cells to enable lookup
-                    // This is not optional.
-                    chip.finalize(ctx);
-
-                    #[cfg(feature = "display")]
-                    {
-                        println!(
-                            "Using {} advice columns and {} fixed columns",
-                            NUM_ADVICE, NUM_FIXED
-                        );
-                        println!("total cells: {}", ctx.total_advice);
-
-                        let (const_rows, _) = ctx.fixed_stats();
-                        println!("maximum rows used by a fixed column: {const_rows}");
-                    }
-                    Ok(())
-                },
-            )
-        }
+    fn fp_mul_test<F: PrimeField>(
+        ctx: &mut Context<F>,
+        lookup_bits: usize,
+        limb_bits: usize,
+        num_limbs: usize,
+        _a: Fq,
+        _b: Fq,
+    ) {
+        std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+        let range = RangeChip::<F>::default(lookup_bits);
+        let chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+
+        let [a, b] = [_a, _b].map(|x| chip.load_private(ctx, FpChip::<F, Fq>::fe_to_witness(&x)));
+        let c = chip.mul(ctx, &a, &b);
+
+        assert_eq!(c.truncation.to_bigint(limb_bits), c.value);
+        assert_eq!(
+            c.native.value(),
+            &biguint_to_fe(&(&c.value.to_biguint().unwrap() % modulus::<F>()))
+        );
+        assert_eq!(c.value, fe_to_biguint(&(_a * _b)).into())
     }
 
     #[test]
     fn test_fp() {
+        let k = K;
         let a = Fq::random(OsRng);
         let b = Fq::random(OsRng);
 
-        let circuit =
-            MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(10));
+        let circuit = RangeCircuitBuilder::mock(builder);
 
-        let prover = MockProver::run(K as u32, &circuit, vec![]).unwrap();
-        prover.assert_satisfied();
-        //assert_eq!(prover.verify(), Ok(()));
+        MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
     }
 
     #[cfg(feature = "dev-graph")]
@@ -126,144 +61,93 @@ mod fp {
         root.fill(&WHITE).unwrap();
         let root = root.titled("Fp Layout", ("sans-serif", 60)).unwrap();
 
-        let circuit = MyCircuit::<Fr>::default();
-        halo2_proofs::dev::CircuitLayout::default().render(K as u32, &circuit, &root).unwrap();
+        let k = K;
+        let a = Fq::zero();
+        let b = Fq::zero();
+
+        let mut builder = GateThreadBuilder::keygen();
+        fp_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(10));
+        let circuit = RangeCircuitBuilder::keygen(builder);
+        halo2_proofs::dev::CircuitLayout::default().render(k as u32, &circuit, &root).unwrap();
     }
 }
 
 mod fp12 {
-    use crate::fields::{
-        fp::{FpConfig, FpStrategy},
-        fp12::*,
-        FieldChip,
-    };
+    use crate::fields::fp::FpChip;
+    use crate::fields::fp12::Fp12Chip;
+    use crate::fields::{FieldChip, PrimeField};
     use crate::halo2_proofs::{
-        circuit::*,
         dev::MockProver,
         halo2curves::bn256::{Fq, Fq12, Fr},
-        plonk::*,
     };
-    use halo2_base::utils::modulus;
-    use halo2_base::{utils::PrimeField, SKIP_FIRST_PASS};
-    use std::marker::PhantomData;
+    use halo2_base::gates::builder::{GateThreadBuilder, RangeCircuitBuilder};
+    use halo2_base::gates::RangeChip;
+    use halo2_base::Context;
+    use rand_core::OsRng;
 
-    #[derive(Default)]
-    struct MyCircuit<F> {
-        a: Value<Fq12>,
-        b: Value<Fq12>,
-        _marker: PhantomData<F>,
-    }
-
-    const NUM_ADVICE: usize = 1;
-    const NUM_FIXED: usize = 1;
     const XI_0: i64 = 9;
 
-    impl<F: PrimeField> Circuit<F> for MyCircuit<F> {
-        type Config = FpConfig<F, Fq>;
-        type FloorPlanner = SimpleFloorPlanner;
-
-        fn without_witnesses(&self) -> Self {
-            Self::default()
-        }
-
-        fn configure(meta: &mut ConstraintSystem<F>) -> Self::Config {
-            FpConfig::<F, _>::configure(
-                meta,
-                FpStrategy::Simple,
-                &[NUM_ADVICE],
-                &[1],
-                NUM_FIXED,
-                22,
-                88,
-                3,
-                modulus::<Fq>(),
-                0,
-                23,
-            )
-        }
-
-        fn synthesize(
-            &self,
-            config: Self::Config,
-            mut layouter: impl Layouter<F>,
-        ) -> Result<(), Error> {
-            config.load_lookup_table(&mut layouter)?;
-            let chip = Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::construct(&config);
-
-            let mut first_pass = SKIP_FIRST_PASS;
-
-            layouter.assign_region(
-                || "fp12",
-                |region| {
-                    if first_pass {
-                        first_pass = false;
-                        return Ok(());
-                    }
-
-                    let mut aux = config.new_context(region);
-                    let ctx = &mut aux;
-
-                    let a_assigned = chip.load_private(
-                        ctx,
-                        Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::fe_to_witness(&self.a),
-                    );
-                    let b_assigned = chip.load_private(
-                        ctx,
-                        Fp12Chip::<F, FpConfig<F, Fq>, Fq12, XI_0>::fe_to_witness(&self.b),
-                    );
-
-                    // test fp_multiply
-                    {
-                        chip.mul(ctx, &a_assigned, &b_assigned);
-                    }
-
-                    // IMPORTANT: this copies advice cells to enable lookup
-                    // This is not optional.
-                    chip.fp_chip.finalize(ctx);
-
-                    #[cfg(feature = "display")]
-                    {
-                        println!(
-                            "Using {} advice columns and {} fixed columns",
-                            NUM_ADVICE, NUM_FIXED
-                        );
-                        println!("total advice cells: {}", ctx.total_advice);
-
-                        let (const_rows, _) = ctx.fixed_stats();
-                        println!("maximum rows used by a fixed column: {const_rows}");
-                    }
-                    Ok(())
-                },
-            )
+    fn fp12_mul_test<F: PrimeField>(
+        ctx: &mut Context<F>,
+        lookup_bits: usize,
+        limb_bits: usize,
+        num_limbs: usize,
+        _a: Fq12,
+        _b: Fq12,
+    ) {
+        std::env::set_var("LOOKUP_BITS", lookup_bits.to_string());
+        let range = RangeChip::<F>::default(lookup_bits);
+        let fp_chip = FpChip::<F, Fq>::new(&range, limb_bits, num_limbs);
+        let chip = Fp12Chip::<F, _, Fq12, XI_0>::new(&fp_chip);
+
+        let [a, b] = [_a, _b].map(|x| {
+            chip.load_private(ctx, Fp12Chip::<F, FpChip<F, Fq>, Fq12, XI_0>::fe_to_witness(&x))
+        });
+        let c = chip.mul(ctx, &a, &b);
+
+        assert_eq!(chip.get_assigned_value(&c), _a * _b);
+        for c in c.coeffs {
+            assert_eq!(c.truncation.to_bigint(limb_bits), c.value);
         }
     }
 
     #[test]
     fn test_fp12() {
-        let k = 23;
-        let mut rng = rand::thread_rng();
-        let a = Fq12::random(&mut rng);
-        let b = Fq12::random(&mut rng);
+        let k = 12;
+        let a = Fq12::random(OsRng);
+        let b = Fq12::random(OsRng);
 
-        let circuit =
-            MyCircuit::<Fr> { a: Value::known(a), b: Value::known(b), _marker: PhantomData };
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp12_mul_test(builder.main(0), k - 1, 88, 3, a, b);
 
-        let prover = MockProver::run(k, &circuit, vec![]).unwrap();
-        prover.assert_satisfied();
-        // assert_eq!(prover.verify(), Ok(()));
+        builder.config(k, Some(20));
+        let circuit = RangeCircuitBuilder::mock(builder);
+
+        MockProver::run(k as u32, &circuit, vec![]).unwrap().assert_satisfied();
     }
 
     #[cfg(feature = "dev-graph")]
     #[test]
     fn plot_fp12() {
-        let k = 9;
+        use ff::Field;
         use plotters::prelude::*;
 
         let root = BitMapBackend::new("layout.png", (1024, 1024)).into_drawing_area();
         root.fill(&WHITE).unwrap();
         let root = root.titled("Fp Layout", ("sans-serif", 60)).unwrap();
 
-        let circuit = MyCircuit::<Fr>::default();
+        let k = 23;
+        let a = Fq12::zero();
+        let b = Fq12::zero();
+
+        let mut builder = GateThreadBuilder::<Fr>::mock();
+        fp12_mul_test(builder.main(0), k - 1, 88, 3, a, b);
+
+        builder.config(k, Some(20));
+        let circuit = RangeCircuitBuilder::mock(builder);
+
         halo2_proofs::dev::CircuitLayout::default().render(k, &circuit, &root).unwrap();
     }
 }
diff --git a/halo2-ecc/src/lib.rs b/halo2-ecc/src/lib.rs
index cfa6e1f5..55df690a 100644
--- a/halo2-ecc/src/lib.rs
+++ b/halo2-ecc/src/lib.rs
@@ -2,12 +2,13 @@
 #![allow(clippy::op_ref)]
 #![allow(clippy::type_complexity)]
 #![feature(int_log)]
+#![feature(trait_alias)]
 
 pub mod bigint;
-//pub mod ecc;
-//pub mod fields;
+pub mod ecc;
+pub mod fields;
 
-//pub mod bn254;
+pub mod bn254;
 //pub mod secp256k1;
 
 pub use halo2_base;
diff --git a/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv b/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
deleted file mode 100644
index d6fdf049..00000000
--- a/halo2-ecc/src/secp256k1/results/ecdsa_bench_m1.csv
+++ /dev/null
@@ -1,10 +0,0 @@
-degree,num_advice,num_lookup,num_fixed,lookup_bits,limb_bits,num_limbs,vk_size,proof_time,proof_size,verify_time
-19,1,0,1,18,88,3,192,13.385351667s,960,3.0945ms
-18,2,1,1,17,88,3,256,8.359564584s,1344,6.137958ms
-17,4,1,1,16,88,3,384,5.56246375s,1920,5.302292ms
-16,9,2,1,15,90,3,736,5.090631625s,3776,6.617666ms
-15,17,3,1,14,90,3,1280,4.457021917s,6560,7.191958ms
-14,36,6,1,13,91,3,2592,4.635864542s,13280,11.689375ms
-13,71,12,1,12,88,3,5024,4.887099708s,25792,16.996459ms
-12,145,23,2,11,88,3,10176,5.740054292s,51808,51.147917ms
-11,305,53,4,10,88,3,21504,7.244600792s,110624,55.647375ms
\ No newline at end of file
diff --git a/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt b/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt
deleted file mode 100644
index 2146b1ab..00000000
--- a/halo2-ecc/src/secp256k1/results/ecdsa_bench_results.txt
+++ /dev/null
@@ -1,253 +0,0 @@
----------------------- degree = 19 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 3.405966541s
-ecdsa done
-Time elapsed in generating vkey: 2.123023125s
-ecdsa done
-Time elapsed in generating pkey: 5.21121525s
-Time elapsed in filling circuit: 340.292µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 0, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 1
-special lookup advice columns: 0
-fixed columns: 1
-lookup bits: 18
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 488463
-minimum rows used by an advice column: 488463
-total cells used: 488463
-cells used in special lookup column: 0
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 1 advice columns?
-Have you tried using 0 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 13.385351667s
-Verify time: 3.0945ms
----------------------- degree = 18 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 1.678767125s
-ecdsa done
-Time elapsed in generating vkey: 2.120563625s
-ecdsa done
-Time elapsed in generating pkey: 3.271299875s
-Time elapsed in filling circuit: 343.416µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 1, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 2
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 17
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 249145
-minimum rows used by an advice column: 249144
-total cells used: 498289
-cells used in special lookup column: 69615
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 2 advice columns?
-Have you tried using 1 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 8.359564584s
-Verify time: 6.137958ms
----------------------- degree = 17 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 845.511958ms
-ecdsa done
-Time elapsed in generating vkey: 1.821084583s
-ecdsa done
-Time elapsed in generating pkey: 2.1293145s
-Time elapsed in filling circuit: 350.042µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 2, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 4
-special lookup advice columns: 1
-fixed columns: 1
-lookup bits: 16
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 125487
-minimum rows used by an advice column: 125483
-total cells used: 501940
-cells used in special lookup column: 70832
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 4 advice columns?
-Have you tried using 1 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 5.56246375s
-Verify time: 5.302292ms
----------------------- degree = 16 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 476.735291ms
-ecdsa done
-Time elapsed in generating vkey: 1.889539709s
-ecdsa done
-Time elapsed in generating pkey: 1.838861167s
-Time elapsed in filling circuit: 361.875µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(60200), row_offset: 3, column: Column { index: 5, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 9
-special lookup advice columns: 2
-fixed columns: 1
-lookup bits: 15
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 59389
-minimum rows used by an advice column: 59366
-total cells used: 534362
-cells used in special lookup column: 71669
-maximum rows used by a fixed column: 8269
-Suggestions:
-Have you tried using 9 advice columns?
-Have you tried using 2 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 5.090631625s
-Verify time: 6.617666ms
----------------------- degree = 15 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 234.525125ms
-ecdsa done
-test secp256k1::ecdsa::bench_secp has been running for over 60 seconds
-Time elapsed in generating vkey: 1.897827708s
-ecdsa done
-Time elapsed in generating pkey: 1.611605583s
-Time elapsed in filling circuit: 347.375µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(60200), row_offset: 3, column: Column { index: 3, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 17
-special lookup advice columns: 3
-fixed columns: 1
-lookup bits: 14
-limb bits: 90
-num limbs: 3
-maximum rows used by an advice column: 32707
-minimum rows used by an advice column: 32678
-total cells used: 555735
-cells used in special lookup column: 85486
-maximum rows used by a fixed column: 8268
-Suggestions:
-Have you tried using 17 advice columns?
-Have you tried using 3 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.457021917s
-Verify time: 7.191958ms
----------------------- degree = 14 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 127.057375ms
-ecdsa done
-Time elapsed in generating vkey: 2.217236041s
-ecdsa done
-Time elapsed in generating pkey: 1.632434708s
-Time elapsed in filling circuit: 344.458µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(61065), row_offset: 3, column: Column { index: 20, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 36
-special lookup advice columns: 6
-fixed columns: 1
-lookup bits: 13
-limb bits: 91
-num limbs: 3
-maximum rows used by an advice column: 15931
-minimum rows used by an advice column: 15895
-total cells used: 572648
-cells used in special lookup column: 85441
-maximum rows used by a fixed column: 8390
-Suggestions:
-Have you tried using 35 advice columns?
-Have you tried using 6 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.635864542s
-Verify time: 11.689375ms
----------------------- degree = 13 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 61.105375ms
-ecdsa done
-Time elapsed in generating vkey: 2.53718925s
-ecdsa done
-Time elapsed in generating pkey: 1.570442167s
-Time elapsed in filling circuit: 344.25µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 16, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 71
-special lookup advice columns: 12
-fixed columns: 1
-lookup bits: 12
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 8096
-minimum rows used by an advice column: 8057
-total cells used: 572742
-cells used in special lookup column: 91593
-maximum rows used by a fixed column: 8026
-Suggestions:
-Have you tried using 70 advice columns?
-Have you tried using 12 lookup columns?
-Have you tried using 1 fixed columns?
-ecdsa done
-Proving time: 4.887099708s
-Verify time: 16.996459ms
----------------------- degree = 12 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 34.131375ms
-ecdsa done
-Time elapsed in generating vkey: 2.9427305s
-ecdsa done
-Time elapsed in generating pkey: 1.724485125s
-Time elapsed in filling circuit: 338.666µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 45, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 145
-special lookup advice columns: 23
-fixed columns: 2
-lookup bits: 11
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 4064
-minimum rows used by an advice column: 4023
-total cells used: 584948
-cells used in special lookup column: 94029
-maximum rows used by a fixed column: 4014
-Suggestions:
-Have you tried using 143 advice columns?
-Have you tried using 23 lookup columns?
-Have you tried using 2 fixed columns?
-ecdsa done
-Proving time: 5.740054292s
-Verify time: 51.147917ms
----------------------- degree = 11 ------------------------------
-Found existing params file. Reading params...
-Time elapsed in circuit & params construction: 17.605458ms
-ecdsa done
-Time elapsed in generating vkey: 3.72480825s
-ecdsa done
-Time elapsed in generating pkey: 2.107728542s
-Time elapsed in filling circuit: 338.25µs
-ECDSA res AssignedCell { value: Some(0x0000000000000000000000000000000000000000000000000000000000000001), cell: Cell { region_index: RegionIndex(58470), row_offset: 3, column: Column { index: 302, column_type: Advice } }, _marker: PhantomData }
-Using:
-advice columns: 305
-special lookup advice columns: 53
-fixed columns: 4
-lookup bits: 10
-limb bits: 88
-num limbs: 3
-maximum rows used by an advice column: 2038
-minimum rows used by an advice column: 1995
-total cells used: 611812
-cells used in special lookup column: 107456
-maximum rows used by a fixed column: 2007
-Suggestions:
-Have you tried using 299 advice columns?
-Have you tried using 53 lookup columns?
-Have you tried using 4 fixed columns?
-ecdsa done
-Proving time: 7.244600792s
-Verify time: 55.647375ms
\ No newline at end of file

From e1540cf136ec40db3fb08c780c9ab21aa9d5ded1 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 09:29:53 -0800
Subject: [PATCH 2/3] feat: remove `size_hint` in `inner_product_simple`

* change other uses of `size_hint` to follow with `assert_eq!` instead
  of `debug_assert_eq!`
---
 halo2-base/src/gates/builder.rs   | 58 +++++++++++++++++++------------
 halo2-base/src/gates/flex_gate.rs | 22 ++++++------
 halo2-ecc/benches/msm.rs          |  2 +-
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/halo2-base/src/gates/builder.rs b/halo2-base/src/gates/builder.rs
index c049ba28..9771aa15 100644
--- a/halo2-base/src/gates/builder.rs
+++ b/halo2-base/src/gates/builder.rs
@@ -168,7 +168,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
                     let column = basic_gate.value;
                     let value = if use_unknown { Value::unknown() } else { Value::known(advice) };
                     #[cfg(feature = "halo2-axiom")]
-                    let cell = *region.assign_advice(column, row_offset, value).unwrap().cell();
+                    let cell = region.assign_advice(column, row_offset, value);
                     #[cfg(not(feature = "halo2-axiom"))]
                     let cell =
                         region.assign_advice(|| "", column, row_offset, || value).unwrap().cell();
@@ -187,8 +187,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
 
                         #[cfg(feature = "halo2-axiom")]
                         {
-                            let ncell =
-                                *region.assign_advice(column, row_offset, value).unwrap().cell();
+                            let ncell = region.assign_advice(column, row_offset, value);
                             region.constrain_equal(&ncell, &cell);
                         }
                         #[cfg(not(feature = "halo2-axiom"))]
@@ -270,10 +269,7 @@ impl<F: ScalarField> GateThreadBuilder<F> {
 
                     #[cfg(feature = "halo2-axiom")]
                     {
-                        let bcell = *region
-                            .assign_advice(column, lookup_offset, value)
-                            .expect("assign_advice should not fail")
-                            .cell();
+                        let bcell = region.assign_advice(column, lookup_offset, value);
                         region.constrain_equal(&acell, &bcell);
                     }
                     #[cfg(not(feature = "halo2-axiom"))]
@@ -327,7 +323,7 @@ pub fn assign_threads_in<F: ScalarField>(
             let value = advice.value;
             let lookup_column = *lookup_column.unwrap();
             #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(lookup_column, lookup_offset, Value::known(value)).unwrap();
+            region.assign_advice(lookup_column, lookup_offset, Value::known(value));
             #[cfg(not(feature = "halo2-axiom"))]
             region
                 .assign_advice(|| "", lookup_column, lookup_offset, || Value::known(value))
@@ -337,7 +333,7 @@ pub fn assign_threads_in<F: ScalarField>(
         }
         for advice in ctx.advice {
             #[cfg(feature = "halo2-axiom")]
-            region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+            region.assign_advice(column, row_offset, Value::known(advice));
             #[cfg(not(feature = "halo2-axiom"))]
             region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
 
@@ -348,7 +344,7 @@ pub fn assign_threads_in<F: ScalarField>(
                 column = config.basic_gates[phase][gate_index].value;
 
                 #[cfg(feature = "halo2-axiom")]
-                region.assign_advice(column, row_offset, Value::known(advice)).unwrap();
+                region.assign_advice(column, row_offset, Value::known(advice));
                 #[cfg(not(feature = "halo2-axiom"))]
                 region.assign_advice(|| "", column, row_offset, || Value::known(advice)).unwrap();
             }
@@ -423,21 +419,28 @@ impl<F: ScalarField> Circuit<F> for GateCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
+                // only support FirstPhase in this Builder because getting challenge value requires more specialized witness generation during synthesize
                 if !self.builder.borrow().witness_gen_only {
                     // clone the builder so we can re-use the circuit for both vk and pk gen
                     let builder = self.builder.borrow().clone();
+                    for threads in builder.threads.iter().skip(1) {
+                        assert!(
+                            threads.is_empty(),
+                            "GateCircuitBuilder only supports FirstPhase for now"
+                        );
+                    }
                     *self.break_points.borrow_mut() =
                         builder.assign_all(&config, &[], &[], &mut region);
                 } else {
                     let builder = self.builder.take();
                     let break_points = self.break_points.take();
-                    for (phase, (threads, break_points)) in
-                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    for (phase, (threads, break_points)) in builder
+                        .threads
+                        .into_iter()
+                        .zip(break_points.into_iter())
+                        .enumerate()
+                        .take(1)
                     {
-                        #[cfg(feature = "halo2-axiom")]
-                        if phase != 0 && !threads.is_empty() {
-                            region.next_phase();
-                        }
                         assign_threads_in(phase, threads, &config, &[], &mut region, break_points);
                     }
                 }
@@ -514,9 +517,16 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                     first_pass = false;
                     return Ok(());
                 }
+                // only support FirstPhase in this Builder because getting challenge value requires more specialized witness generation during synthesize
                 if !self.0.builder.borrow().witness_gen_only {
                     // clone the builder so we can re-use the circuit for both vk and pk gen
                     let builder = self.0.builder.borrow().clone();
+                    for threads in builder.threads.iter().skip(1) {
+                        assert!(
+                            threads.is_empty(),
+                            "GateCircuitBuilder only supports FirstPhase for now"
+                        );
+                    }
                     *self.0.break_points.borrow_mut() = builder.assign_all(
                         &config.gate,
                         &config.lookup_advice,
@@ -524,15 +534,17 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                         &mut region,
                     );
                 } else {
+                    #[cfg(feature = "display")]
+                    let start0 = std::time::Instant::now();
                     let builder = self.0.builder.take();
                     let break_points = self.0.break_points.take();
-                    for (phase, (threads, break_points)) in
-                        builder.threads.into_iter().zip(break_points.into_iter()).enumerate()
+                    for (phase, (threads, break_points)) in builder
+                        .threads
+                        .into_iter()
+                        .zip(break_points.into_iter())
+                        .enumerate()
+                        .take(1)
                     {
-                        #[cfg(feature = "halo2-axiom")]
-                        if phase != 0 && !threads.is_empty() {
-                            region.next_phase();
-                        }
                         assign_threads_in(
                             phase,
                             threads,
@@ -542,6 +554,8 @@ impl<F: ScalarField> Circuit<F> for RangeCircuitBuilder<F> {
                             break_points,
                         );
                     }
+                    #[cfg(feature = "display")]
+                    println!("assign threads in {:?}", start0.elapsed());
                 }
                 Ok(())
             },
diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index a70de4b8..d5292d90 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -290,7 +290,7 @@ pub trait GateInstructions<F: ScalarField> {
             return ctx.assign_region_last([start], []);
         }
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let mut sum = *start.value();
         let cells = iter::once(start).chain(a.flat_map(|a| {
@@ -320,7 +320,7 @@ pub trait GateInstructions<F: ScalarField> {
             return Box::new(iter::once(ctx.assign_region_last([start], [])));
         }
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let mut sum = *start.value();
         let cells = iter::once(start).chain(a.flat_map(|a| {
@@ -532,7 +532,7 @@ pub trait GateInstructions<F: ScalarField> {
         let mut sum = F::zero();
         let a = a.into_iter();
         let (len, hi) = a.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let cells = std::iter::once(Constant(F::zero())).chain(
             a.zip(indicator.into_iter()).flat_map(|(a, ind)| {
@@ -555,7 +555,7 @@ pub trait GateInstructions<F: ScalarField> {
     {
         let cells = cells.into_iter();
         let (len, hi) = cells.size_hint();
-        debug_assert_eq!(Some(len), hi);
+        assert_eq!(Some(len), hi);
 
         let ind = self.idx_to_indicator(ctx, idx, len);
         self.select_by_indicator(ctx, cells, ind)
@@ -706,15 +706,14 @@ impl<F: ScalarField> GateChip<F> {
             [a, b, Witness(sum)]
         }));
 
-        let gate_offsets = if ctx.witness_gen_only() {
-            vec![]
+        if ctx.witness_gen_only() {
+            ctx.assign_region(cells, vec![]);
         } else {
-            let (lo, hi) = cells.size_hint();
-            debug_assert_eq!(Some(lo), hi);
+            let cells = cells.collect::<Vec<_>>();
+            let lo = cells.len();
             let len = lo / 3;
-            (0..len).map(|i| 3 * i as isize).collect()
+            ctx.assign_region(cells, (0..len).map(|i| 3 * i as isize));
         };
-        ctx.assign_region(cells, gate_offsets);
         b_starts_with_one
     }
 }
@@ -899,8 +898,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
             .iter()
             .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
             .map(|x| Witness(F::from(x)))
-            .take(range_bits)
-            .collect::<Vec<_>>();
+            .take(range_bits);
 
         let mut bit_cells = Vec::with_capacity(range_bits);
         let row_offset = ctx.advice.len();
diff --git a/halo2-ecc/benches/msm.rs b/halo2-ecc/benches/msm.rs
index 76141425..1a8e774d 100644
--- a/halo2-ecc/benches/msm.rs
+++ b/halo2-ecc/benches/msm.rs
@@ -82,6 +82,7 @@ fn msm_circuit(
     scalars: Vec<Fr>,
     break_points: Option<MultiPhaseThreadBreakPoints>,
 ) -> RangeCircuitBuilder<Fr> {
+    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
     let k = params.degree as usize;
     let builder = match stage {
         CircuitBuilderStage::Mock => GateThreadBuilder::mock(),
@@ -90,7 +91,6 @@ fn msm_circuit(
     };
     let builder = Mutex::new(builder);
 
-    let start0 = start_timer!(|| format!("Witness generation for circuit in {stage:?} stage"));
     msm_bench(&builder, params, bases, scalars);
 
     let builder = builder.into_inner().unwrap();

From f3e814acca5d4b95014f314e1213a92be8086b41 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <jonathanpwang@users.noreply.github.com>
Date: Fri, 10 Feb 2023 10:32:28 -0800
Subject: [PATCH 3/3] fix: change `debug_assert` in
 `decompose_u64_digits_limbs` to restrict `bit_len < 64` and
 `decompose_biguint` to `64 <= bit_len < 128` * add more comprehensive tests
 for above two functions

---
 halo2-base/src/gates/flex_gate.rs |  2 +-
 halo2-base/src/gates/tests.rs     |  2 +-
 halo2-base/src/lib.rs             |  1 +
 halo2-base/src/utils.rs           | 90 ++++++++++++++++++++++++++-----
 4 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/halo2-base/src/gates/flex_gate.rs b/halo2-base/src/gates/flex_gate.rs
index d5292d90..05317338 100644
--- a/halo2-base/src/gates/flex_gate.rs
+++ b/halo2-base/src/gates/flex_gate.rs
@@ -896,7 +896,7 @@ impl<F: ScalarField> GateInstructions<F> for GateChip<F> {
         let bits = a_bytes
             .as_ref()
             .iter()
-            .flat_map(|byte| (0..8).map(|i| (*byte as u64 >> i) & 1))
+            .flat_map(|byte| (0..8u32).map(|i| (*byte as u64 >> i) & 1))
             .map(|x| Witness(F::from(x)))
             .take(range_bits);
 
diff --git a/halo2-base/src/gates/tests.rs b/halo2-base/src/gates/tests.rs
index cf6a3cb6..06406043 100644
--- a/halo2-base/src/gates/tests.rs
+++ b/halo2-base/src/gates/tests.rs
@@ -54,7 +54,7 @@ fn test_multithread_gates() {
     let mut builder = GateThreadBuilder::mock();
     gate_tests(builder.main(0), inputs);
 
-    let thread_ids = (0..4).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
+    let thread_ids = (0..4usize).map(|_| builder.get_new_thread_id()).collect::<Vec<_>>();
     let new_threads = thread_ids
         .into_par_iter()
         .map(|id| {
diff --git a/halo2-base/src/lib.rs b/halo2-base/src/lib.rs
index ccf4f973..3b7523ae 100644
--- a/halo2-base/src/lib.rs
+++ b/halo2-base/src/lib.rs
@@ -2,6 +2,7 @@
 #![feature(trait_alias)]
 #![deny(clippy::perf)]
 #![allow(clippy::too_many_arguments)]
+#![warn(clippy::default_numeric_fallback)]
 
 // different memory allocator options:
 // mimalloc is fastest on Mac M2
diff --git a/halo2-base/src/utils.rs b/halo2-base/src/utils.rs
index 6802b71c..152971ac 100644
--- a/halo2-base/src/utils.rs
+++ b/halo2-base/src/utils.rs
@@ -62,7 +62,7 @@ pub(crate) fn decompose_u64_digits_to_limbs(
     number_of_limbs: usize,
     bit_len: usize,
 ) -> Vec<u64> {
-    debug_assert!(bit_len <= 64);
+    debug_assert!(bit_len < 64);
 
     let mut e = e.into_iter();
     let mask: u64 = (1u64 << bit_len) - 1u64;
@@ -196,22 +196,22 @@ pub fn decompose_biguint<F: BigPrimeField>(
     num_limbs: usize,
     bit_len: usize,
 ) -> Vec<F> {
-    debug_assert!(bit_len > 64 && bit_len <= 128);
+    debug_assert!((64..128).contains(&bit_len));
     let mut e = e.iter_u64_digits();
 
     let mut limb0 = e.next().unwrap_or(0) as u128;
     let mut rem = bit_len - 64;
     let mut u64_digit = e.next().unwrap_or(0);
-    limb0 |= ((u64_digit & ((1 << rem) - 1)) as u128) << 64;
+    limb0 |= ((u64_digit & ((1 << rem) - 1u64)) as u128) << 64u32;
     u64_digit >>= rem;
     rem = 64 - rem;
 
     core::iter::once(F::from_u128(limb0))
         .chain((1..num_limbs).map(|_| {
-            let mut limb: u128 = u64_digit.into();
+            let mut limb = u64_digit as u128;
             let mut bits = rem;
             u64_digit = e.next().unwrap_or(0);
-            if bit_len - bits >= 64 {
+            if bit_len >= 64 + bits {
                 limb |= (u64_digit as u128) << bits;
                 u64_digit = e.next().unwrap_or(0);
                 bits += 64;
@@ -258,13 +258,6 @@ pub fn compose(input: Vec<BigUint>, bit_len: usize) -> BigUint {
     input.iter().rev().fold(BigUint::zero(), |acc, val| (acc << bit_len) + val)
 }
 
-#[cfg(test)]
-#[test]
-fn test_signed_roundtrip() {
-    use crate::halo2_proofs::halo2curves::bn256::Fr;
-    assert_eq!(fe_to_bigint(&bigint_to_fe::<Fr>(&-BigInt::one())), -BigInt::one());
-}
-
 #[cfg(feature = "halo2-axiom")]
 pub use halo2_proofs_axiom::halo2curves::CurveAffineExt;
 
@@ -337,3 +330,76 @@ pub mod fs {
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::halo2_proofs::halo2curves::bn256::Fr;
+    use num_bigint::RandomBits;
+    use rand::{rngs::OsRng, Rng};
+    use std::ops::Shl;
+
+    use super::*;
+
+    #[test]
+    fn test_signed_roundtrip() {
+        use crate::halo2_proofs::halo2curves::bn256::Fr;
+        assert_eq!(fe_to_bigint(&bigint_to_fe::<Fr>(&-BigInt::one())), -BigInt::one());
+    }
+
+    #[test]
+    fn test_decompose_biguint() {
+        let mut rng = OsRng;
+        const MAX_LIMBS: u64 = 5;
+        for bit_len in 64..128usize {
+            for num_limbs in 1..=MAX_LIMBS {
+                for _ in 0..10_000usize {
+                    let mut e: BigUint = rng.sample(RandomBits::new(num_limbs * bit_len as u64));
+                    let limbs = decompose_biguint::<Fr>(&e, num_limbs as usize, bit_len);
+
+                    let limbs2 = {
+                        let mut limbs = vec![];
+                        let mask = BigUint::one().shl(bit_len) - 1usize;
+                        for _ in 0..num_limbs {
+                            let limb = &e & &mask;
+                            let mut bytes_le = limb.to_bytes_le();
+                            bytes_le.resize(32, 0u8);
+                            limbs.push(Fr::from_bytes(&bytes_le.try_into().unwrap()).unwrap());
+                            e >>= bit_len;
+                        }
+                        limbs
+                    };
+                    assert_eq!(limbs, limbs2);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_decompose_u64_digits_to_limbs() {
+        let mut rng = OsRng;
+        const MAX_LIMBS: u64 = 5;
+        for bit_len in 0..64usize {
+            for num_limbs in 1..=MAX_LIMBS {
+                for _ in 0..10_000usize {
+                    let mut e: BigUint = rng.sample(RandomBits::new(num_limbs * bit_len as u64));
+                    let limbs = decompose_u64_digits_to_limbs(
+                        e.to_u64_digits(),
+                        num_limbs as usize,
+                        bit_len,
+                    );
+                    let limbs2 = {
+                        let mut limbs = vec![];
+                        let mask = BigUint::one().shl(bit_len) - 1usize;
+                        for _ in 0..num_limbs {
+                            let limb = &e & &mask;
+                            limbs.push(u64::try_from(limb).unwrap());
+                            e >>= bit_len;
+                        }
+                        limbs
+                    };
+                    assert_eq!(limbs, limbs2);
+                }
+            }
+        }
+    }
+}