diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/README.md b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/README.md
new file mode 100644
index 000000000000..136d777716ea
--- /dev/null
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/README.md
@@ -0,0 +1,64 @@
+$\newcommand{\endogroup}{\textcolor{orange}{\lambda}}$
+$\newcommand{\endofield}{\textcolor{orange}{\beta}}$
+$\newcommand{\rom}[1]{\textcolor{purple}{#1}}$
+$\newcommand{\windex}[1]{\textcolor{grey}{#1}}$
+
+### Lookup Tables in Biggroup
+
+In the biggroup class, we use lookup tables to store precomputed multiples of a fixed group element $P$. Since we use the wNAF (windowed non-adjacent form) method for scalar multiplication, we need to store odd multiples of $P$ up to a certain window size. Further, to leverage endomorphism while computing scalar multiplication, we also store the endomorphic mapping of the multiples of $P$ in the table. For instance with a wNAF window size of 3, the lookup table for $P$ is represented as follows:
+
+| Index | Element      | Endomorphism            |
+| ----- | ------------ | ----------------------- |
+| 0     | $-7 \cdot P$ | $-7 \endogroup \cdot P$ |
+| 1     | $-5 \cdot P$ | $-5 \endogroup \cdot P$ |
+| 2     | $-3 \cdot P$ | $-3 \endogroup \cdot P$ |
+| 3     | $-1 \cdot P$ | $-1 \endogroup \cdot P$ |
+| 4     | $1 \cdot P$  | $1 \endogroup \cdot P$  |
+| 5     | $3 \cdot P$  | $3 \endogroup \cdot P$  |
+| 6     | $5 \cdot P$  | $5 \endogroup \cdot P$  |
+| 7     | $7 \cdot P$  | $7 \endogroup \cdot P$  |
+
+Note that our wNAF form uses only (positive and negative) odd multiples of $P$ so as to avoid handling conditional logic in the circuit for 0 values. Each group element in the above table is represented as a point on the elliptic curve: $Q = (x, y)$ such that $x, y \in \mathbb{F}_q$. In our case, $\mathbb{F}_q$ is either the base field of BN254 or secp256k1 (or secp256r1). Since the native field used in our circuits is the scalar field $\mathbb{F}_r$ of BN254, $x$ and $y$ are non-native field elements and are represented as two `bigfield` elements, i.e., each of $x$ and $y$ consists of four binary-basis limbs and one prime-basis limb:
+
+$$
+\begin{aligned}
+x &\equiv (x_0, x_1, x_2, x_3, x_p) & & \in \mathbb{F}_r^5, \\
+y &\equiv (y_0, y_1, y_2, y_3, y_p) & & \in \mathbb{F}_r^5.
+\end{aligned}
+$$
+
+Thus, when generating lookup tables, each element $Q$ in the table is represented as a tuple of 10 native field elements. Since we only support tables with one key and two values, we need 5 tables to represent the group element $Q$:
+
+| Table 1: xlo |         |     | Table 2: xhi |         |
+| ------------ | ------- | --- | ------------ | ------- |
+| Value 1      | Value 2 |     | Value 1      | Value 2 |
+| $x_0$        | $x_1$   |     | $x_2$        | $x_3$   |
+
+| Table 3: ylo |         |     | Table 4: yhi |         |
+| ------------ | ------- | --- | ------------ | ------- |
+| Value 1      | Value 2 |     | Value 1      | Value 2 |
+| $y_0$        | $y_1$   |     | $y_2$        | $y_3$   |
+
+| Table 5: prime table |         |
+| -------------------- | ------- |
+| Value 1              | Value 2 |
+| $x_p$                | $y_p$   |
+
+Additionally, we also need tables for the endomorphism values. Suppose $x' := \endofield \cdot x$ is the x-coordinate of the endomorphism of the group element $Q$, represented as $x' = (x'_0, x'_1, x'_2, x'_3, x'_p) \in \mathbb{F}_r^5$. The endomorphism table is represented as follows:
+
+| endo xlo table |         |     |     | endo xhi table |         |
+| -------------- | ------- | --- | --- | -------------- | ------- |
+| Value 1        | Value 2 |     |     | Value 1        | Value 2 |
+| $x'_0$         | $x'_1$  |     |     | $x'_2$         | $x'_3$  |
+
+| endo prime table |         |
+| ---------------- | ------- |
+| Value 1          | Value 2 |
+| $x'_p$           | $y_p$   |
+
+Note that since the y-coordinate remains unchanged under the endomorphism, we can use the same y-coordinate tables. For the prime-basis limb of the endomorphism, we use the same value $y_p$ (which is redundant but ensures consistency of using two-column tables). Thus, overall we need 8 tables to represent the lookup table for a group element $P$ with each table size being $2^3$ (for a wNAF window size of 3).
+
+> Note:
+> In the context of biggroup, we need variable-base lookup tables and fixed-base lookup tables. The variable-base lookup tables are used when the base point $P$ is not known at circuit synthesis time and is provided as a circuit witness. In this case, we need to generate the lookup tables on-the-fly based on the input base point $P$. On the other hand, fixed-base lookup tables are used when the base point $P$ is known at circuit synthesis time and can be hardcoded into the circuit (for example group generators). Fixed-base lookup tables are more efficient as they can be precomputed and do not require additional gates to enforce the correctness of the table entries. Variable-base lookup tables are realized using ROM tables (described below) while fixed-base lookup tables are realized using standard lookup tables in the circuit.
+
+Refer to the [ROM table documentation](../memory/README.md) for details on how ROM tables are implemented in Barretenberg.
diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp
index fcb4fcac9a33..4f5796914725 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp
@@ -17,6 +17,7 @@
 #include "barretenberg/ecc/curves/secp256k1/secp256k1.hpp"
 #include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp"
 #include "barretenberg/stdlib/primitives/biggroup/biggroup_goblin.hpp"
+#include <cstddef>
 
 namespace bb::stdlib::element_default {
 
@@ -49,6 +50,13 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
     element(const element& other);
     element(element&& other) noexcept;
 
+    ~element() = default;
+
+    /**
+     * @brief Construct a dummy element (the group generator) and return its limbs as fr constants
+     *
+     * @return std::array<fr, PUBLIC_INPUTS_SIZE>
+     */
     static std::array<fr, PUBLIC_INPUTS_SIZE> construct_dummy()
     {
         const typename NativeGroup::affine_element& native_val = NativeGroup::affine_element::one();
@@ -64,6 +72,7 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         BB_ASSERT_EQ(idx, PUBLIC_INPUTS_SIZE);
         return limb_vals;
     }
+
     /**
      * @brief Set the witness indices for the x and y coordinates to public
      *
@@ -92,6 +101,14 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         return { Fq::reconstruct_from_public(x_limbs), Fq::reconstruct_from_public(y_limbs) };
     }
 
+    /**
+     * @brief Create a biggroup witness from a native group element, allocating new witnesses as necessary
+     *
+     * @param ctx
+     * @param input
+     * @return element
+     * @warning Use this carefully, as its creating free witnesses.
+     */
     static element from_witness(Builder* ctx, const typename NativeGroup::affine_element& input)
     {
         element out;
@@ -114,6 +131,9 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         return out;
     }
 
+    /**
+     * @brief Check that the point is on the curve
+     */
     void validate_on_curve() const
     {
         Fq b(get_context(), uint256_t(NativeGroup::curve_b));
@@ -143,7 +163,7 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
     }
 
     /**
-     * Fix a witness. The value of the witness is constrained with a selector
+     * @brief Fix a witness. The value of the witness is constrained with a selector
      **/
     void fix_witness()
     {
@@ -155,6 +175,9 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         unset_free_witness_tag();
     }
 
+    /**
+     * @brief Creates a constant group generator.
+     */
     static element one(Builder* ctx)
     {
         uint256_t x = uint256_t(NativeGroup::one.x);
@@ -178,6 +201,11 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
     element& operator=(const element& other);
     element& operator=(element&& other) noexcept;
 
+    /**
+     * @brief Serialize the element to a byte array in form: (yhi || ylo || xhi || xlo).
+     *
+     * @return byte_array<Builder>
+     */
     byte_array<Builder> to_byte_array() const
     {
         byte_array<Builder> result(get_context());
@@ -207,9 +235,9 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         *this = *this - other;
         return *this;
     }
-    std::array<element, 2> checked_unconditional_add_sub(const element&) const;
+    std::array<element, 2> checked_unconditional_add_sub(const element& other) const;
 
-    element operator*(const Fr& other) const;
+    element operator*(const Fr& scalar) const;
 
     element conditional_negate(const bool_ct& predicate) const
     {
@@ -248,17 +276,17 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         Fq y3_prev;
         bool is_element = false;
 
-        chain_add_accumulator() {};
+        chain_add_accumulator() = default;
         explicit chain_add_accumulator(const element& input)
-        {
-            x3_prev = input.x;
-            y3_prev = input.y;
-            is_element = true;
-        }
+            : x3_prev(input.x)
+            , y3_prev(input.y)
+            , is_element(true)
+        {}
         chain_add_accumulator(const chain_add_accumulator& other) = default;
-        chain_add_accumulator(chain_add_accumulator&& other) = default;
+        chain_add_accumulator(chain_add_accumulator&& other) noexcept = default;
         chain_add_accumulator& operator=(const chain_add_accumulator& other) = default;
-        chain_add_accumulator& operator=(chain_add_accumulator&& other) = default;
+        chain_add_accumulator& operator=(chain_add_accumulator&& other) noexcept = default;
+        ~chain_add_accumulator() = default;
     };
 
     /**
@@ -269,7 +297,7 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
     static chain_add_accumulator chain_add(const element& p1, const chain_add_accumulator& accumulator);
     static element chain_add_end(const chain_add_accumulator& accumulator);
     element montgomery_ladder(const element& other) const;
-    element montgomery_ladder(const chain_add_accumulator& accumulator);
+    element montgomery_ladder(const chain_add_accumulator& to_add);
     element multiple_montgomery_ladder(const std::vector<chain_add_accumulator>& to_add) const;
     element quadruple_and_add(const std::vector<element>& to_add) const;
 
@@ -418,139 +446,95 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
     bool_ct _is_infinity;
 
     template <size_t num_elements>
-    static std::array<twin_rom_table<Builder>, 5> create_group_element_rom_tables(
-        const std::array<element, num_elements>& elements, std::array<uint256_t, 8>& limb_max);
+    static std::array<twin_rom_table<Builder>, Fq::NUM_LIMBS + 1> create_group_element_rom_tables(
+        const std::array<element, num_elements>& rom_data, std::array<uint256_t, Fq::NUM_LIMBS * 2>& limb_max);
 
     template <size_t num_elements>
-    static element read_group_element_rom_tables(const std::array<twin_rom_table<Builder>, 5>& tables,
+    static element read_group_element_rom_tables(const std::array<twin_rom_table<Builder>, Fq::NUM_LIMBS + 1>& tables,
                                                  const field_t<Builder>& index,
-                                                 const std::array<uint256_t, 8>& limb_max);
+                                                 const std::array<uint256_t, Fq::NUM_LIMBS * 2>& limb_max);
 
     static std::pair<element, element> compute_offset_generators(const size_t num_rounds);
     static typename NativeGroup::affine_element compute_table_offset_generator();
 
+    /**
+     * @brief Four-bit variable-base table for scalar multiplication
+     * @details We store precomputed multiples of a group element in ROM tables. These precomputed multiples of the
+     * group element are used for scalar multiplication using 4-bit wNAF window.
+     */
     struct four_bit_table_plookup {
-        four_bit_table_plookup() {};
+        four_bit_table_plookup() = default;
         four_bit_table_plookup(const element& input);
 
         four_bit_table_plookup(const four_bit_table_plookup& other) = default;
         four_bit_table_plookup& operator=(const four_bit_table_plookup& other) = default;
+        four_bit_table_plookup(four_bit_table_plookup&& other) noexcept = default;
+        four_bit_table_plookup& operator=(four_bit_table_plookup&& other) noexcept = default;
+        ~four_bit_table_plookup() = default;
 
         element operator[](const field_t<Builder>& index) const;
         element operator[](const size_t idx) const { return element_table[idx]; }
         std::array<element, 16> element_table;
-        std::array<twin_rom_table<Builder>, 5> coordinates;
-        std::array<uint256_t, 8> limb_max; // tracks the maximum limb size represented in each element_table entry
+
+        // Each coordinate is an Fq element, which has 4 binary basis limbs and 1 prime basis limb
+        std::array<twin_rom_table<Builder>, Fq::NUM_LIMBS + 1> coordinates;
+        std::array<uint256_t, Fq::NUM_LIMBS * 2> limb_max; // tracks the maximum size of each binary basis limb
     };
 
+    /**
+     * @brief Eight-bit fixed base table for scalar multiplication
+     * @details This lookup table is used for fixed-base scalar multiplication using 8-bit windows.
+     * It stores precomputed multiples of the generator of bn254 / secp256k1 / secp256r1 depending on
+     * which curve operations are used in the circuit.
+     */
     struct eight_bit_fixed_base_table {
         enum CurveType { BN254, SECP256K1, SECP256R1 };
         eight_bit_fixed_base_table(const CurveType input_curve_type, bool use_endo)
             : curve_type(input_curve_type)
-            , use_endomorphism(use_endo) {};
+            , use_endomorphism(use_endo)
+        {}
 
         eight_bit_fixed_base_table(const eight_bit_fixed_base_table& other) = default;
         eight_bit_fixed_base_table& operator=(const eight_bit_fixed_base_table& other) = default;
+        eight_bit_fixed_base_table(eight_bit_fixed_base_table&& other) noexcept = default;
+        eight_bit_fixed_base_table& operator=(eight_bit_fixed_base_table&& other) noexcept = default;
+        ~eight_bit_fixed_base_table() = default;
 
         element operator[](const field_t<Builder>& index) const;
 
-        element operator[](const size_t idx) const;
+        element operator[](const size_t index) const;
 
         CurveType curve_type;
         bool use_endomorphism;
     };
 
     static std::pair<four_bit_table_plookup, four_bit_table_plookup> create_endo_pair_four_bit_table_plookup(
-        const element& input)
-    {
-        four_bit_table_plookup P1;
-        four_bit_table_plookup endoP1;
-        element d2 = input.dbl();
-
-        P1.element_table[8] = input;
-        for (size_t i = 9; i < 16; ++i) {
-            P1.element_table[i] = P1.element_table[i - 1] + d2;
-        }
-        for (size_t i = 0; i < 8; ++i) {
-            P1.element_table[i] = (-P1.element_table[15 - i]);
-        }
-        for (size_t i = 0; i < 16; ++i) {
-            endoP1.element_table[i].y = P1.element_table[15 - i].y;
-        }
-        uint256_t beta_val = bb::field<typename Fq::TParams>::cube_root_of_unity();
-        Fq beta(bb::fr(beta_val.slice(0, 136)), bb::fr(beta_val.slice(136, 256)), false);
-        for (size_t i = 0; i < 8; ++i) {
-            endoP1.element_table[i].x = P1.element_table[i].x * beta;
-            endoP1.element_table[15 - i].x = endoP1.element_table[i].x;
-        }
-        P1.coordinates = create_group_element_rom_tables<16>(P1.element_table, P1.limb_max);
-        endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table, endoP1.limb_max);
-        auto result = std::make_pair(four_bit_table_plookup(P1), four_bit_table_plookup(endoP1));
-        return result;
-    }
-
-    /**
-     * Creates a lookup table for a set of 2, 3 or 4 points
-     *
-     * The lookup table computes linear combinations of all of the points
-     *
-     * e.g. for 3 points A, B, C, the table represents the following values:
-     *
-     * 0 0 0 ->  C+B+A
-     * 0 0 1 ->  C+B-A
-     * 0 1 0 ->  C-B+A
-     * 0 1 1 ->  C-B-A
-     * 1 0 0 -> -C+B+A
-     * 1 0 1 -> -C+B-A
-     * 1 1 0 -> -C-B+A
-     * 1 1 1 -> -C-B-A
-     *
-     * The table KEY is 3 1-bit NAF entries that correspond to scalar multipliers for
-     * base points A, B, C
-     **/
-    template <size_t length> struct lookup_table_base {
-        static constexpr size_t table_size = (1ULL << (length - 1));
-        lookup_table_base(const std::array<element, length>& inputs);
-        lookup_table_base(const lookup_table_base& other) = default;
-        lookup_table_base& operator=(const lookup_table_base& other) = default;
-
-        element get(const std::array<bool_ct, length>& bits) const;
-
-        element operator[](const size_t idx) const { return element_table[idx]; }
-
-        std::array<field_t<Builder>, table_size> x_b0_table;
-        std::array<field_t<Builder>, table_size> x_b1_table;
-        std::array<field_t<Builder>, table_size> x_b2_table;
-        std::array<field_t<Builder>, table_size> x_b3_table;
-
-        std::array<field_t<Builder>, table_size> y_b0_table;
-        std::array<field_t<Builder>, table_size> y_b1_table;
-        std::array<field_t<Builder>, table_size> y_b2_table;
-        std::array<field_t<Builder>, table_size> y_b3_table;
-        element twin0;
-        element twin1;
-        std::array<element, table_size> element_table;
-    };
+        const element& input);
 
     /**
-     * The Plookup version of the above lookup table
-     *
-     * Uses ROM tables to efficiently access lookup table
+     * @brief Generic lookup table that uses ROM tables internally to access group elements.
+     * @details Can access elements in the table using bit-decomposed index.
      **/
     template <size_t length> struct lookup_table_plookup {
         static constexpr size_t table_size = (1ULL << (length));
-        lookup_table_plookup() {}
+        lookup_table_plookup() = default;
         lookup_table_plookup(const std::array<element, length>& inputs);
         lookup_table_plookup(const lookup_table_plookup& other) = default;
         lookup_table_plookup& operator=(const lookup_table_plookup& other) = default;
+        lookup_table_plookup(lookup_table_plookup&& other) noexcept = default;
+        lookup_table_plookup& operator=(lookup_table_plookup&& other) noexcept = default;
+        ~lookup_table_plookup() = default;
 
         element get(const std::array<bool_ct, length>& bits) const;
 
         element operator[](const size_t idx) const { return element_table[idx]; }
 
         std::array<element, table_size> element_table;
-        std::array<twin_rom_table<Builder>, 5> coordinates;
-        std::array<uint256_t, 8> limb_max;
+
+        // Each coordinate is an Fq element, which has 4 binary basis limbs and 1 prime basis limb
+        // ROM tables: (idx, x0, x1), (idx, x2, x3), (idx, y0, y1), (idx, y2, y3), (idx, xp, yp)
+        std::array<twin_rom_table<Builder>, Fq::NUM_LIMBS + 1> coordinates;
+        std::array<uint256_t, Fq::NUM_LIMBS * 2> limb_max;
     };
 
     using twin_lookup_table = lookup_table_plookup<2>;
@@ -578,90 +562,86 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         }
 
         endo_table.coordinates = create_group_element_rom_tables<16>(endo_table.element_table, endo_table.limb_max);
-        return std::make_pair<quad_lookup_table, quad_lookup_table>((quad_lookup_table)base_table,
-                                                                    (quad_lookup_table)endo_table);
-    }
-
-    /**
-     * Creates a pair of 5-bit lookup tables, the former corresponding to 5 input points,
-     * the latter corresponding to the endomorphism equivalent of the 5 input points (e.g. x -> \beta * x, y -> -y)
-     **/
-    static std::pair<lookup_table_plookup<5>, lookup_table_plookup<5>> create_endo_pair_five_lookup_table(
-        const std::array<element, 5>& inputs)
-    {
-        lookup_table_plookup<5> base_table(inputs);
-        lookup_table_plookup<5> endo_table;
-        uint256_t beta_val = bb::field<typename Fq::TParams>::cube_root_of_unity();
-        Fq beta(bb::fr(beta_val.slice(0, 136)), bb::fr(beta_val.slice(136, 256)), false);
-        for (size_t i = 0; i < 16; ++i) {
-            endo_table.element_table[i + 16].x = base_table[15 - i].x * beta;
-            endo_table.element_table[i + 16].y = base_table[15 - i].y;
-
-            endo_table.element_table[15 - i] = (-endo_table.element_table[i + 16]);
-        }
-
-        endo_table.coordinates = create_group_element_rom_tables<32>(endo_table.element_table, endo_table.limb_max);
-
-        return std::make_pair<lookup_table_plookup<5>, lookup_table_plookup<5>>((lookup_table_plookup<5>)base_table,
-                                                                                (lookup_table_plookup<5>)endo_table);
+        return std::make_pair<quad_lookup_table, quad_lookup_table>(base_table, endo_table);
     }
 
     /**
-     * Helper class to split a set of points into lookup table subsets
+     * Helper struct to split a set of points into lookup table subsets
      *
-     * Ultra version
      **/
     struct batch_lookup_table_plookup {
         batch_lookup_table_plookup(const std::vector<element>& points)
+            : num_points(points.size())
+            , num_fives(num_points / 5)
         {
-            num_points = points.size();
-            num_fives = num_points / 5;
-            num_sixes = 0;
             // size-6 table is expensive and only benefits us if creating them reduces the number of total tables
             if (num_points == 1) {
                 num_fives = 0;
                 num_sixes = 0;
             } else if (num_fives * 5 == (num_points - 1)) {
+                // last 6 points to be added as one 6-table
                 num_fives -= 1;
                 num_sixes = 1;
             } else if (num_fives * 5 == (num_points - 2) && num_fives >= 2) {
+                // last 12 points to be added as two 6-tables
                 num_fives -= 2;
                 num_sixes = 2;
             } else if (num_fives * 5 == (num_points - 3) && num_fives >= 3) {
+                // last 18 points to be added as three 6-tables
                 num_fives -= 3;
                 num_sixes = 3;
             }
 
-            has_quad = ((num_fives * 5 + num_sixes * 6) < num_points - 3) && (num_points >= 4);
+            // Calculate remaining points after allocating fives and sixes tables
+            size_t remaining_points = num_points - (num_fives * 5 + num_sixes * 6);
+
+            // Allocate one quad table if required (and update remaining points)
+            has_quad = (remaining_points >= 4) && (num_points >= 4);
+            if (has_quad) {
+                remaining_points -= 4;
+            }
+
+            // Allocate one triple table if required (and update remaining points)
+            has_triple = (remaining_points >= 3) && (num_points >= 3);
+            if (has_triple) {
+                remaining_points -= 3;
+            }
 
-            has_triple = ((num_fives * 5 + num_sixes * 6 + (size_t)has_quad * 4) < num_points - 2) && (num_points >= 3);
+            // Allocate one twin table if required (and update remaining points)
+            has_twin = (remaining_points >= 2) && (num_points >= 2);
+            if (has_twin) {
+                remaining_points -= 2;
+            }
 
-            has_twin =
-                ((num_fives * 5 + num_sixes * 6 + (size_t)has_quad * 4 + (size_t)has_triple * 3) < num_points - 1) &&
-                (num_points >= 2);
+            // If there is anything remaining, allocate a singleton
+            has_singleton = (remaining_points != 0) && (num_points >= 1);
 
-            has_singleton = num_points != ((num_fives * 5 + num_sixes * 6) + ((size_t)has_quad * 4) +
-                                           ((size_t)has_triple * 3) + ((size_t)has_twin * 2));
+            // Sanity check
+            BB_ASSERT_EQ(num_points,
+                         num_sixes * 6 + num_fives * 5 + static_cast<size_t>(has_quad) * 4 +
+                             static_cast<size_t>(has_triple) * 3 + static_cast<size_t>(has_twin) * 2 +
+                             static_cast<size_t>(has_singleton) * 1,
+                         "point allocation mismatch");
 
             size_t offset = 0;
             for (size_t i = 0; i < num_sixes; ++i) {
                 six_tables.push_back(lookup_table_plookup<6>({
-                    points[offset + 6 * i],
-                    points[offset + 6 * i + 1],
-                    points[offset + 6 * i + 2],
-                    points[offset + 6 * i + 3],
-                    points[offset + 6 * i + 4],
-                    points[offset + 6 * i + 5],
+                    points[offset + (6 * i)],
+                    points[offset + (6 * i) + 1],
+                    points[offset + (6 * i) + 2],
+                    points[offset + (6 * i) + 3],
+                    points[offset + (6 * i) + 4],
+                    points[offset + (6 * i) + 5],
                 }));
             }
             offset += 6 * num_sixes;
             for (size_t i = 0; i < num_fives; ++i) {
                 five_tables.push_back(lookup_table_plookup<5>({
-                    points[offset + 5 * i],
-                    points[offset + 5 * i + 1],
-                    points[offset + 5 * i + 2],
-                    points[offset + 5 * i + 3],
-                    points[offset + 5 * i + 4],
+                    points[offset + (5 * i)],
+                    points[offset + (5 * i) + 1],
+                    points[offset + (5 * i) + 2],
+                    points[offset + (5 * i) + 3],
+                    points[offset + (5 * i) + 4],
                 }));
             }
             offset += 5 * num_fives;
@@ -670,7 +650,6 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
                 quad_tables.push_back(
                     quad_lookup_table({ points[offset], points[offset + 1], points[offset + 2], points[offset + 3] }));
             }
-
             if (has_triple) {
                 triple_tables.push_back(
                     triple_lookup_table({ points[offset], points[offset + 1], points[offset + 2] }));
@@ -678,7 +657,6 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
             if (has_twin) {
                 twin_tables.push_back(twin_lookup_table({ points[offset], points[offset + 1] }));
             }
-
             if (has_singleton) {
                 singletons.push_back(points[points.size() - 1]);
             }
@@ -749,19 +727,19 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
             std::vector<element> round_accumulator;
             for (size_t j = 0; j < num_sixes; ++j) {
                 round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j],
-                                                                naf_entries[6 * j + 1],
-                                                                naf_entries[6 * j + 2],
-                                                                naf_entries[6 * j + 3],
-                                                                naf_entries[6 * j + 4],
-                                                                naf_entries[6 * j + 5] }));
+                                                                naf_entries[(6 * j) + 1],
+                                                                naf_entries[(6 * j) + 2],
+                                                                naf_entries[(6 * j) + 3],
+                                                                naf_entries[(6 * j) + 4],
+                                                                naf_entries[(6 * j) + 5] }));
             }
             size_t offset = num_sixes * 6;
             for (size_t j = 0; j < num_fives; ++j) {
-                round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + j * 5],
-                                                                 naf_entries[offset + j * 5 + 1],
-                                                                 naf_entries[offset + j * 5 + 2],
-                                                                 naf_entries[offset + j * 5 + 3],
-                                                                 naf_entries[offset + j * 5 + 4] }));
+                round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + (j * 5)],
+                                                                 naf_entries[offset + (j * 5) + 1],
+                                                                 naf_entries[offset + (j * 5) + 2],
+                                                                 naf_entries[offset + (j * 5) + 3],
+                                                                 naf_entries[offset + (j * 5) + 4] }));
             }
             offset += num_fives * 5;
             if (has_quad) {
@@ -785,14 +763,18 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
             element::chain_add_accumulator accumulator;
             if (round_accumulator.size() == 1) {
                 return element::chain_add_accumulator(round_accumulator[0]);
-            } else if (round_accumulator.size() == 2) {
+            }
+
+            if (round_accumulator.size() == 2) {
                 return element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-            } else {
-                accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-                for (size_t j = 2; j < round_accumulator.size(); ++j) {
-                    accumulator = element::chain_add(round_accumulator[j], accumulator);
-                }
             }
+
+            // Use chain add for at least 3 elements
+            accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
+            for (size_t j = 2; j < round_accumulator.size(); ++j) {
+                accumulator = element::chain_add(round_accumulator[j], accumulator);
+            }
+
             return (accumulator);
         }
 
@@ -800,21 +782,21 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
         {
             std::vector<element> round_accumulator;
             for (size_t j = 0; j < num_sixes; ++j) {
-                round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j],
-                                                                naf_entries[6 * j + 1],
-                                                                naf_entries[6 * j + 2],
-                                                                naf_entries[6 * j + 3],
-                                                                naf_entries[6 * j + 4],
-                                                                naf_entries[6 * j + 5] }));
+                round_accumulator.push_back(six_tables[j].get({ naf_entries[(6 * j)],
+                                                                naf_entries[(6 * j) + 1],
+                                                                naf_entries[(6 * j) + 2],
+                                                                naf_entries[(6 * j) + 3],
+                                                                naf_entries[(6 * j) + 4],
+                                                                naf_entries[(6 * j) + 5] }));
             }
             size_t offset = num_sixes * 6;
 
             for (size_t j = 0; j < num_fives; ++j) {
-                round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + 5 * j],
-                                                                 naf_entries[offset + 5 * j + 1],
-                                                                 naf_entries[offset + 5 * j + 2],
-                                                                 naf_entries[offset + 5 * j + 3],
-                                                                 naf_entries[offset + 5 * j + 4] }));
+                round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + (5 * j)],
+                                                                 naf_entries[offset + (5 * j) + 1],
+                                                                 naf_entries[offset + (5 * j) + 2],
+                                                                 naf_entries[offset + (5 * j) + 3],
+                                                                 naf_entries[offset + (5 * j) + 4] }));
             }
 
             offset += num_fives * 5;
@@ -823,7 +805,6 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
                 round_accumulator.push_back(quad_tables[0].get(
                     naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2], naf_entries[offset + 3]));
             }
-
             if (has_triple) {
                 round_accumulator.push_back(
                     triple_tables[0].get(naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2]));
@@ -839,194 +820,32 @@ template <class Builder_, class Fq, class Fr, class NativeGroup> class element {
             element::chain_add_accumulator accumulator;
             if (round_accumulator.size() == 1) {
                 return result;
-            } else if (round_accumulator.size() == 2) {
-                return result + round_accumulator[1];
-            } else {
-                accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-                for (size_t j = 2; j < round_accumulator.size(); ++j) {
-                    accumulator = element::chain_add(round_accumulator[j], accumulator);
-                }
             }
-            return element::chain_add_end(accumulator);
-        }
-
-        std::vector<lookup_table_plookup<6>> six_tables;
-        std::vector<lookup_table_plookup<5>> five_tables;
-        std::vector<quad_lookup_table> quad_tables;
-        std::vector<triple_lookup_table> triple_tables;
-        std::vector<twin_lookup_table> twin_tables;
-        std::vector<element> singletons;
-        size_t num_points;
-
-        size_t num_sixes;
-        size_t num_fives;
-        bool has_quad;
-        bool has_triple;
-        bool has_twin;
-        bool has_singleton;
-    };
-
-    /**
-     * Helper class to split a set of points into lookup table subsets
-     *
-     **/
-    struct batch_lookup_table_base {
-        batch_lookup_table_base(const std::vector<element>& points)
-        {
-            num_points = points.size();
-            num_quads = num_points / 4;
 
-            has_triple = ((num_quads * 4) < num_points - 2) && (num_points >= 3);
-
-            has_twin = ((num_quads * 4 + (size_t)has_triple * 3) < num_points - 1) && (num_points >= 2);
-
-            has_singleton = num_points != (num_quads * 4 + ((size_t)has_triple * 3) + ((size_t)has_twin * 2));
-
-            for (size_t i = 0; i < num_quads; ++i) {
-                quad_tables.push_back(
-                    quad_lookup_table({ points[4 * i], points[4 * i + 1], points[4 * i + 2], points[4 * i + 3] }));
-            }
-
-            if (has_triple) {
-                triple_tables.push_back(triple_lookup_table(
-                    { points[4 * num_quads], points[4 * num_quads + 1], points[4 * num_quads + 2] }));
-            }
-            if (has_twin) {
-                twin_tables.push_back(twin_lookup_table({ points[4 * num_quads], points[4 * num_quads + 1] }));
-            }
-
-            if (has_singleton) {
-                singletons.push_back(points[points.size() - 1]);
-            }
-        }
-
-        element get_initial_entry() const
-        {
-            std::vector<element> add_accumulator;
-            for (size_t i = 0; i < num_quads; ++i) {
-                add_accumulator.push_back(quad_tables[i][0]);
-            }
-            if (has_twin) {
-                add_accumulator.push_back(twin_tables[0][0]);
-            }
-            if (has_triple) {
-                add_accumulator.push_back(triple_tables[0][0]);
-            }
-            if (has_singleton) {
-                add_accumulator.push_back(singletons[0]);
-            }
-
-            element accumulator = add_accumulator[0];
-            for (size_t i = 1; i < add_accumulator.size(); ++i) {
-                accumulator = accumulator + add_accumulator[i];
-            }
-            return accumulator;
-        }
-
-        chain_add_accumulator get_chain_initial_entry() const
-        {
-            std::vector<element> add_accumulator;
-            for (size_t i = 0; i < num_quads; ++i) {
-                add_accumulator.push_back(quad_tables[i][0]);
-            }
-            if (has_twin) {
-                add_accumulator.push_back(twin_tables[0][0]);
-            }
-            if (has_triple) {
-                add_accumulator.push_back(triple_tables[0][0]);
-            }
-            if (has_singleton) {
-                add_accumulator.push_back(singletons[0]);
-            }
-            if (add_accumulator.size() >= 2) {
-                chain_add_accumulator output = element::chain_add_start(add_accumulator[0], add_accumulator[1]);
-                for (size_t i = 2; i < add_accumulator.size(); ++i) {
-                    output = element::chain_add(add_accumulator[i], output);
-                }
-                return output;
-            }
-            return chain_add_accumulator(add_accumulator[0]);
-        }
-
-        element::chain_add_accumulator get_chain_add_accumulator(std::vector<bool_ct>& naf_entries) const
-        {
-            std::vector<element> round_accumulator;
-            for (size_t j = 0; j < num_quads; ++j) {
-                round_accumulator.push_back(quad_tables[j].get(std::array<bool_ct, 4>{
-                    naf_entries[4 * j], naf_entries[4 * j + 1], naf_entries[4 * j + 2], naf_entries[4 * j + 3] }));
-            }
-
-            if (has_triple) {
-                round_accumulator.push_back(triple_tables[0].get(std::array<bool_ct, 3>{
-                    naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1], naf_entries[num_quads * 4 + 2] }));
-            }
-            if (has_twin) {
-                round_accumulator.push_back(twin_tables[0].get(
-                    std::array<bool_ct, 2>{ naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1] }));
-            }
-            if (has_singleton) {
-                round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1]));
-            }
-
-            element::chain_add_accumulator accumulator;
-            if (round_accumulator.size() == 1) {
-                accumulator.x3_prev = round_accumulator[0].x;
-                accumulator.y3_prev = round_accumulator[0].y;
-                accumulator.is_element = true;
-                return accumulator;
-            } else if (round_accumulator.size() == 2) {
-                return element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-            } else {
-                accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-                for (size_t j = 2; j < round_accumulator.size(); ++j) {
-                    accumulator = element::chain_add(round_accumulator[j], accumulator);
-                }
-            }
-            return (accumulator);
-        }
-
-        element get(std::vector<bool_ct>& naf_entries) const
-        {
-            std::vector<element> round_accumulator;
-            for (size_t j = 0; j < num_quads; ++j) {
-                round_accumulator.push_back(quad_tables[j].get(
-                    { naf_entries[4 * j], naf_entries[4 * j + 1], naf_entries[4 * j + 2], naf_entries[4 * j + 3] }));
+            if (round_accumulator.size() == 2) {
+                return result + round_accumulator[1];
             }
 
-            if (has_triple) {
-                round_accumulator.push_back(triple_tables[0].get(std::array<bool_ct, 3>{
-                    naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1], naf_entries[num_quads * 4 + 2] }));
-            }
-            if (has_twin) {
-                round_accumulator.push_back(
-                    twin_tables[0].get({ naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1] }));
-            }
-            if (has_singleton) {
-                round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1]));
+            // For 3 or more elements, use chain addition
+            accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
+            for (size_t j = 2; j < round_accumulator.size(); ++j) {
+                accumulator = element::chain_add(round_accumulator[j], accumulator);
             }
 
-            element result = round_accumulator[0];
-            element::chain_add_accumulator accumulator;
-            if (round_accumulator.size() == 1) {
-                return result;
-            } else if (round_accumulator.size() == 2) {
-                return result + round_accumulator[1];
-            } else {
-                accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]);
-                for (size_t j = 2; j < round_accumulator.size(); ++j) {
-                    accumulator = element::chain_add(round_accumulator[j], accumulator);
-                }
-            }
             return element::chain_add_end(accumulator);
         }
 
+        std::vector<lookup_table_plookup<6>> six_tables;
+        std::vector<lookup_table_plookup<5>> five_tables;
         std::vector<quad_lookup_table> quad_tables;
         std::vector<triple_lookup_table> triple_tables;
         std::vector<twin_lookup_table> twin_tables;
         std::vector<element> singletons;
         size_t num_points;
 
-        size_t num_quads;
+        size_t num_sixes = 0;
+        size_t num_fives;
+        bool has_quad;
         bool has_triple;
         bool has_twin;
         bool has_singleton;
diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp
index 65285dabebe6..8fb1b8202e4e 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp
@@ -269,6 +269,7 @@ template <typename C, class Fq, class Fr, class G> element<C, Fq, Fr, G> element
         Fq neg_lambda = Fq::msub_div({ x }, { (two_x + x) }, (y + y), { a });
         Fq x_3 = neg_lambda.sqradd({ -(two_x) });
         Fq y_3 = neg_lambda.madd(x_3 - x, { -y });
+        // TODO(suyash): do we handle the point at infinity case here?
         return element(x_3, y_3);
     }
     Fq neg_lambda = Fq::msub_div({ x }, { (two_x + x) }, (y + y), {});
diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp
index d85b5b214928..978d468a601a 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp
@@ -26,12 +26,12 @@ using plookup::MultiTableId;
  *
  * @details When reading a group element *out* of the ROM table, we must know the maximum value of each coordinate's
  * limbs. We take this value to be the maximum of the maximum values of the input limbs into the table!
- * @return std::array<twin_rom_table<C>, 5>
+ * @return std::array<twin_rom_table<C>, Fq::NUM_LIMBS + 1>
  */
 template <typename C, class Fq, class Fr, class G>
 template <size_t num_elements>
-std::array<twin_rom_table<C>, 5> element<C, Fq, Fr, G>::create_group_element_rom_tables(
-    const std::array<element, num_elements>& rom_data, std::array<uint256_t, 8>& limb_max)
+std::array<twin_rom_table<C>, Fq::NUM_LIMBS + 1> element<C, Fq, Fr, G>::create_group_element_rom_tables(
+    const std::array<element, num_elements>& rom_data, std::array<uint256_t, Fq::NUM_LIMBS * 2>& limb_max)
 {
     std::vector<std::array<field_t<C>, 2>> x_lo_limbs;
     std::vector<std::array<field_t<C>, 2>> x_hi_limbs;
@@ -60,7 +60,7 @@ std::array<twin_rom_table<C>, 5> element<C, Fq, Fr, G>::create_group_element_rom
         prime_limbs.emplace_back(
             std::array<field_t<C>, 2>{ rom_data[i].x.prime_basis_limb, rom_data[i].y.prime_basis_limb });
     }
-    std::array<twin_rom_table<C>, 5> output_tables;
+    std::array<twin_rom_table<C>, Fq::NUM_LIMBS + 1> output_tables;
     output_tables[0] = twin_rom_table<C>(x_lo_limbs);
     output_tables[1] = twin_rom_table<C>(x_hi_limbs);
     output_tables[2] = twin_rom_table<C>(y_lo_limbs);
@@ -72,7 +72,9 @@ std::array<twin_rom_table<C>, 5> element<C, Fq, Fr, G>::create_group_element_rom
 template <typename C, class Fq, class Fr, class G>
 template <size_t>
 element<C, Fq, Fr, G> element<C, Fq, Fr, G>::read_group_element_rom_tables(
-    const std::array<twin_rom_table<C>, 5>& tables, const field_t<C>& index, const std::array<uint256_t, 8>& limb_max)
+    const std::array<twin_rom_table<C>, Fq::NUM_LIMBS + 1>& tables,
+    const field_t<C>& index,
+    const std::array<uint256_t, Fq::NUM_LIMBS * 2>& limb_max)
 {
     const auto xlo = tables[0][index];
     const auto xhi = tables[1][index];
@@ -186,6 +188,8 @@ template <typename C, class Fq, class Fr, class G>
 template <size_t length>
 element<C, Fq, Fr, G>::lookup_table_plookup<length>::lookup_table_plookup(const std::array<element, length>& inputs)
 {
+    static_assert(length <= 6, "lookup_table_plookup only supports up to 6 input elements");
+
     if constexpr (length == 2) {
         auto [A0, A1] = inputs[1].checked_unconditional_add_sub(inputs[0]);
         element_table[0] = A0;
@@ -224,14 +228,14 @@ element<C, Fq, Fr, G>::lookup_table_plookup<length>::lookup_table_plookup(const
         auto [E0, E3] = inputs[4].checked_unconditional_add_sub(T2); // E ± (D + C)
         auto [E1, E2] = inputs[4].checked_unconditional_add_sub(T3); // E ± (D - C)
 
-        auto [F0, F3] = E0.checked_unconditional_add_sub(A0);
-        auto [F1, F2] = E0.checked_unconditional_add_sub(A1);
-        auto [F4, F7] = E1.checked_unconditional_add_sub(A0);
-        auto [F5, F6] = E1.checked_unconditional_add_sub(A1);
-        auto [F8, F11] = E2.checked_unconditional_add_sub(A0);
-        auto [F9, F10] = E2.checked_unconditional_add_sub(A1);
-        auto [F12, F15] = E3.checked_unconditional_add_sub(A0);
-        auto [F13, F14] = E3.checked_unconditional_add_sub(A1);
+        auto [F0, F3] = E0.checked_unconditional_add_sub(A0);   // E + (D + C) ± (B + A)
+        auto [F1, F2] = E0.checked_unconditional_add_sub(A1);   // E + (D + C) ± (B - A)
+        auto [F4, F7] = E1.checked_unconditional_add_sub(A0);   // E + (D - C) ± (B + A)
+        auto [F5, F6] = E1.checked_unconditional_add_sub(A1);   // E + (D - C) ± (B - A)
+        auto [F8, F11] = E2.checked_unconditional_add_sub(A0);  // E - (D - C) ± (B + A)
+        auto [F9, F10] = E2.checked_unconditional_add_sub(A1);  // E - (D - C) ± (B - A)
+        auto [F12, F15] = E3.checked_unconditional_add_sub(A0); // E - (D + C) ± (B + A)
+        auto [F13, F14] = E3.checked_unconditional_add_sub(A1); // E - (D + C) ± (B - A)
 
         element_table[0] = F0;
         element_table[1] = F1;
@@ -315,99 +319,9 @@ element<C, Fq, Fr, G>::lookup_table_plookup<length>::lookup_table_plookup(const
         element_table[29] = W5;
         element_table[30] = W6;
         element_table[31] = W7;
-    } else if constexpr (length == 7) {
-        // 82 adds! This one is not worth using...
-
-        element A0 = inputs[1] + inputs[0]; // B + A
-        element A1 = inputs[1] - inputs[0]; // B - A
-
-        element D0 = inputs[3] + inputs[2]; // D + C
-        element D1 = inputs[3] - inputs[2]; // D - C
-
-        element E0 = D0 + A0; // D + C + B + A
-        element E1 = D0 + A1; // D + C + B - A
-        element E2 = D0 - A1; // D + C - B + A
-        element E3 = D0 - A0; // D + C - B - A
-        element E4 = D1 + A0; // D - C + B + A
-        element E5 = D1 + A1; // D - C + B - A
-        element E6 = D1 - A1; // D - C - B + A
-        element E7 = D1 - A0; // D - C - B - A
-
-        element F0 = inputs[5] + inputs[4]; // F + E
-        element F1 = inputs[5] - inputs[4]; // F - E
-
-        element G0 = inputs[6] + F0; // G + F + E
-        element G1 = inputs[6] + F1; // G + F - E
-        element G2 = inputs[6] - F1; // G - F + E
-        element G3 = inputs[6] - F0; // G - F - E
-
-        element_table[0] = G0 + E0;
-        element_table[1] = G0 + E1;
-        element_table[2] = G0 + E2;
-        element_table[3] = G0 + E3;
-        element_table[4] = G0 + E4;
-        element_table[5] = G0 + E5;
-        element_table[6] = G0 + E6;
-        element_table[7] = G0 + E7;
-        element_table[8] = G0 - E7;
-        element_table[9] = G0 - E6;
-        element_table[10] = G0 - E5;
-        element_table[11] = G0 - E4;
-        element_table[12] = G0 - E3;
-        element_table[13] = G0 - E2;
-        element_table[14] = G0 - E1;
-        element_table[15] = G0 - E0;
-        element_table[16] = G1 + E0;
-        element_table[17] = G1 + E1;
-        element_table[18] = G1 + E2;
-        element_table[19] = G1 + E3;
-        element_table[20] = G1 + E4;
-        element_table[21] = G1 + E5;
-        element_table[22] = G1 + E6;
-        element_table[23] = G1 + E7;
-        element_table[24] = G1 - E7;
-        element_table[25] = G1 - E6;
-        element_table[26] = G1 - E5;
-        element_table[27] = G1 - E4;
-        element_table[28] = G1 - E3;
-        element_table[29] = G1 - E2;
-        element_table[30] = G1 - E1;
-        element_table[31] = G1 - E0;
-        element_table[32] = G2 + E0;
-        element_table[33] = G2 + E1;
-        element_table[34] = G2 + E2;
-        element_table[35] = G2 + E3;
-        element_table[36] = G2 + E4;
-        element_table[37] = G2 + E5;
-        element_table[38] = G2 + E6;
-        element_table[39] = G2 + E7;
-        element_table[40] = G2 - E7;
-        element_table[41] = G2 - E6;
-        element_table[42] = G2 - E5;
-        element_table[43] = G2 - E4;
-        element_table[44] = G2 - E3;
-        element_table[45] = G2 - E2;
-        element_table[46] = G2 - E1;
-        element_table[47] = G2 - E0;
-        element_table[48] = G3 + E0;
-        element_table[49] = G3 + E1;
-        element_table[50] = G3 + E2;
-        element_table[51] = G3 + E3;
-        element_table[52] = G3 + E4;
-        element_table[53] = G3 + E5;
-        element_table[54] = G3 + E6;
-        element_table[55] = G3 + E7;
-        element_table[56] = G3 - E7;
-        element_table[57] = G3 - E6;
-        element_table[58] = G3 - E5;
-        element_table[59] = G3 - E4;
-        element_table[60] = G3 - E3;
-        element_table[61] = G3 - E2;
-        element_table[62] = G3 - E1;
-        element_table[63] = G3 - E0;
     }
     for (size_t i = 0; i < table_size / 2; ++i) {
-        element_table[i + table_size / 2] = (-element_table[table_size / 2 - 1 - i]);
+        element_table[i + (table_size / 2)] = (-element_table[(table_size / 2) - 1 - i]);
     }
     coordinates = create_group_element_rom_tables<table_size>(element_table, limb_max);
 }
@@ -426,227 +340,65 @@ element<C, Fq, Fr, G> element<C, Fq, Fr, G>::lookup_table_plookup<length>::get(
 }
 
 /**
- * lookup_table_base
- **/
-template <typename C, class Fq, class Fr, class G>
-template <size_t length>
-element<C, Fq, Fr, G>::lookup_table_base<length>::lookup_table_base(const std::array<element, length>& inputs)
-{
-    static_assert(length <= 4 && length >= 2);
-    if constexpr (length == 2) {
-        twin0 = inputs[1] + inputs[0];
-        twin1 = inputs[1] - inputs[0];
-        element_table[0] = twin0;
-        element_table[1] = twin1;
-    } else if constexpr (length == 3) {
-        element T0 = inputs[1] + inputs[0];
-        element T1 = inputs[1] - inputs[0];
-        element_table[0] = inputs[2] + T0; // C + B + A
-        element_table[1] = inputs[2] + T1; // C + B - A
-        element_table[2] = inputs[2] - T1; // C - B + A
-        element_table[3] = inputs[2] - T0; // C - B - A
-
-        x_b0_table = field_t<C>::preprocess_two_bit_table(element_table[0].x.binary_basis_limbs[0].element,
-                                                          element_table[1].x.binary_basis_limbs[0].element,
-                                                          element_table[2].x.binary_basis_limbs[0].element,
-                                                          element_table[3].x.binary_basis_limbs[0].element);
-        x_b1_table = field_t<C>::preprocess_two_bit_table(element_table[0].x.binary_basis_limbs[1].element,
-                                                          element_table[1].x.binary_basis_limbs[1].element,
-                                                          element_table[2].x.binary_basis_limbs[1].element,
-                                                          element_table[3].x.binary_basis_limbs[1].element);
-        x_b2_table = field_t<C>::preprocess_two_bit_table(element_table[0].x.binary_basis_limbs[2].element,
-                                                          element_table[1].x.binary_basis_limbs[2].element,
-                                                          element_table[2].x.binary_basis_limbs[2].element,
-                                                          element_table[3].x.binary_basis_limbs[2].element);
-        x_b3_table = field_t<C>::preprocess_two_bit_table(element_table[0].x.binary_basis_limbs[3].element,
-                                                          element_table[1].x.binary_basis_limbs[3].element,
-                                                          element_table[2].x.binary_basis_limbs[3].element,
-                                                          element_table[3].x.binary_basis_limbs[3].element);
-
-        y_b0_table = field_t<C>::preprocess_two_bit_table(element_table[0].y.binary_basis_limbs[0].element,
-                                                          element_table[1].y.binary_basis_limbs[0].element,
-                                                          element_table[2].y.binary_basis_limbs[0].element,
-                                                          element_table[3].y.binary_basis_limbs[0].element);
-        y_b1_table = field_t<C>::preprocess_two_bit_table(element_table[0].y.binary_basis_limbs[1].element,
-                                                          element_table[1].y.binary_basis_limbs[1].element,
-                                                          element_table[2].y.binary_basis_limbs[1].element,
-                                                          element_table[3].y.binary_basis_limbs[1].element);
-        y_b2_table = field_t<C>::preprocess_two_bit_table(element_table[0].y.binary_basis_limbs[2].element,
-                                                          element_table[1].y.binary_basis_limbs[2].element,
-                                                          element_table[2].y.binary_basis_limbs[2].element,
-                                                          element_table[3].y.binary_basis_limbs[2].element);
-        y_b3_table = field_t<C>::preprocess_two_bit_table(element_table[0].y.binary_basis_limbs[3].element,
-                                                          element_table[1].y.binary_basis_limbs[3].element,
-                                                          element_table[2].y.binary_basis_limbs[3].element,
-                                                          element_table[3].y.binary_basis_limbs[3].element);
-    } else if constexpr (length == 4) {
-        element T0 = inputs[1] + inputs[0];
-        element T1 = inputs[1] - inputs[0];
-        element T2 = inputs[3] + inputs[2];
-        element T3 = inputs[3] - inputs[2];
-
-        element_table[0] = T2 + T0; // D + C + B + A
-        element_table[1] = T2 + T1; // D + C + B - A
-        element_table[2] = T2 - T1; // D + C - B + A
-        element_table[3] = T2 - T0; // D + C - B - A
-        element_table[4] = T3 + T0; // D - C + B + A
-        element_table[5] = T3 + T1; // D - C + B - A
-        element_table[6] = T3 - T1; // D - C - B + A
-        element_table[7] = T3 - T0; // D - C - B - A
-
-        x_b0_table = field_t<C>::preprocess_three_bit_table(element_table[0].x.binary_basis_limbs[0].element,
-                                                            element_table[1].x.binary_basis_limbs[0].element,
-                                                            element_table[2].x.binary_basis_limbs[0].element,
-                                                            element_table[3].x.binary_basis_limbs[0].element,
-                                                            element_table[4].x.binary_basis_limbs[0].element,
-                                                            element_table[5].x.binary_basis_limbs[0].element,
-                                                            element_table[6].x.binary_basis_limbs[0].element,
-                                                            element_table[7].x.binary_basis_limbs[0].element);
-        x_b1_table = field_t<C>::preprocess_three_bit_table(element_table[0].x.binary_basis_limbs[1].element,
-                                                            element_table[1].x.binary_basis_limbs[1].element,
-                                                            element_table[2].x.binary_basis_limbs[1].element,
-                                                            element_table[3].x.binary_basis_limbs[1].element,
-                                                            element_table[4].x.binary_basis_limbs[1].element,
-                                                            element_table[5].x.binary_basis_limbs[1].element,
-                                                            element_table[6].x.binary_basis_limbs[1].element,
-                                                            element_table[7].x.binary_basis_limbs[1].element);
-        x_b2_table = field_t<C>::preprocess_three_bit_table(element_table[0].x.binary_basis_limbs[2].element,
-                                                            element_table[1].x.binary_basis_limbs[2].element,
-                                                            element_table[2].x.binary_basis_limbs[2].element,
-                                                            element_table[3].x.binary_basis_limbs[2].element,
-                                                            element_table[4].x.binary_basis_limbs[2].element,
-                                                            element_table[5].x.binary_basis_limbs[2].element,
-                                                            element_table[6].x.binary_basis_limbs[2].element,
-                                                            element_table[7].x.binary_basis_limbs[2].element);
-        x_b3_table = field_t<C>::preprocess_three_bit_table(element_table[0].x.binary_basis_limbs[3].element,
-                                                            element_table[1].x.binary_basis_limbs[3].element,
-                                                            element_table[2].x.binary_basis_limbs[3].element,
-                                                            element_table[3].x.binary_basis_limbs[3].element,
-                                                            element_table[4].x.binary_basis_limbs[3].element,
-                                                            element_table[5].x.binary_basis_limbs[3].element,
-                                                            element_table[6].x.binary_basis_limbs[3].element,
-                                                            element_table[7].x.binary_basis_limbs[3].element);
-
-        y_b0_table = field_t<C>::preprocess_three_bit_table(element_table[0].y.binary_basis_limbs[0].element,
-                                                            element_table[1].y.binary_basis_limbs[0].element,
-                                                            element_table[2].y.binary_basis_limbs[0].element,
-                                                            element_table[3].y.binary_basis_limbs[0].element,
-                                                            element_table[4].y.binary_basis_limbs[0].element,
-                                                            element_table[5].y.binary_basis_limbs[0].element,
-                                                            element_table[6].y.binary_basis_limbs[0].element,
-                                                            element_table[7].y.binary_basis_limbs[0].element);
-        y_b1_table = field_t<C>::preprocess_three_bit_table(element_table[0].y.binary_basis_limbs[1].element,
-                                                            element_table[1].y.binary_basis_limbs[1].element,
-                                                            element_table[2].y.binary_basis_limbs[1].element,
-                                                            element_table[3].y.binary_basis_limbs[1].element,
-                                                            element_table[4].y.binary_basis_limbs[1].element,
-                                                            element_table[5].y.binary_basis_limbs[1].element,
-                                                            element_table[6].y.binary_basis_limbs[1].element,
-                                                            element_table[7].y.binary_basis_limbs[1].element);
-        y_b2_table = field_t<C>::preprocess_three_bit_table(element_table[0].y.binary_basis_limbs[2].element,
-                                                            element_table[1].y.binary_basis_limbs[2].element,
-                                                            element_table[2].y.binary_basis_limbs[2].element,
-                                                            element_table[3].y.binary_basis_limbs[2].element,
-                                                            element_table[4].y.binary_basis_limbs[2].element,
-                                                            element_table[5].y.binary_basis_limbs[2].element,
-                                                            element_table[6].y.binary_basis_limbs[2].element,
-                                                            element_table[7].y.binary_basis_limbs[2].element);
-        y_b3_table = field_t<C>::preprocess_three_bit_table(element_table[0].y.binary_basis_limbs[3].element,
-                                                            element_table[1].y.binary_basis_limbs[3].element,
-                                                            element_table[2].y.binary_basis_limbs[3].element,
-                                                            element_table[3].y.binary_basis_limbs[3].element,
-                                                            element_table[4].y.binary_basis_limbs[3].element,
-                                                            element_table[5].y.binary_basis_limbs[3].element,
-                                                            element_table[6].y.binary_basis_limbs[3].element,
-                                                            element_table[7].y.binary_basis_limbs[3].element);
-    }
-}
-
+ * @brief Create a endo pair four bit table for the given group element
+ *
+ * @tparam C
+ * @tparam Fq
+ * @tparam Fr
+ * @tparam G
+ * @param input
+ * @return std::pair<four_bit_table_plookup, four_bit_table_plookup>
+ *
+ * @details
+ *
+ * | Index | P = (x, y) | Q = (β.x, y) |
+ * |-------|------------|---------------|
+ * | 0     | -15.P      | Q_0           |
+ * | 1     | -13.P      | Q_1           |
+ * | 2     | -11.P      | Q_2           |
+ * | 3     | -9.P       | Q_3           |
+ * | 4     | -7.P       | Q_4           |
+ * | 5     | -5.P       | Q_5           |
+ * | 6     | -3.P       | Q_6           |
+ * | 7     | -1.P       | Q_7           |
+ * | 8     | 1.P        | Q_8           |
+ * | 9     | 3.P        | Q_9           |
+ * | 10    | 5.P        | Q_10          |
+ * | 11    | 7.P        | Q_11          |
+ * | 12    | 9.P        | Q_12          |
+ * | 13    | 11.P       | Q_13          |
+ * | 14    | 13.P       | Q_14          |
+ * | 15    | 15.P       | Q_15          |
+ */
 template <typename C, class Fq, class Fr, class G>
-template <size_t length>
-element<C, Fq, Fr, G> element<C, Fq, Fr, G>::lookup_table_base<length>::get(
-    const std::array<bool_ct, length>& bits) const
+std::pair<typename element<C, Fq, Fr, G>::four_bit_table_plookup,
+          typename element<C, Fq, Fr, G>::four_bit_table_plookup>
+element<C, Fq, Fr, G>::create_endo_pair_four_bit_table_plookup(const element& input)
 {
-    static_assert(length <= 4 && length >= 2);
+    four_bit_table_plookup P1;
+    four_bit_table_plookup endoP1;
+    element d2 = input.dbl();
 
-    if constexpr (length == 2) {
-        bool_ct table_selector = bits[0] ^ bits[1];
-        bool_ct sign_selector = bits[1];
-        Fq to_add_x = twin0.x.conditional_select(twin1.x, table_selector);
-        Fq to_add_y = twin0.y.conditional_select(twin1.y, table_selector);
-        element to_add(to_add_x, to_add_y.conditional_negate(sign_selector));
-        return to_add;
-    } else if constexpr (length == 3) {
-        bool_ct t0 = bits[2] ^ bits[0];
-        bool_ct t1 = bits[2] ^ bits[1];
-
-        field_t<C> x_b0 = field_t<C>::select_from_two_bit_table(x_b0_table, t1, t0);
-        field_t<C> x_b1 = field_t<C>::select_from_two_bit_table(x_b1_table, t1, t0);
-        field_t<C> x_b2 = field_t<C>::select_from_two_bit_table(x_b2_table, t1, t0);
-        field_t<C> x_b3 = field_t<C>::select_from_two_bit_table(x_b3_table, t1, t0);
-
-        field_t<C> y_b0 = field_t<C>::select_from_two_bit_table(y_b0_table, t1, t0);
-        field_t<C> y_b1 = field_t<C>::select_from_two_bit_table(y_b1_table, t1, t0);
-        field_t<C> y_b2 = field_t<C>::select_from_two_bit_table(y_b2_table, t1, t0);
-        field_t<C> y_b3 = field_t<C>::select_from_two_bit_table(y_b3_table, t1, t0);
-
-        Fq to_add_x;
-        Fq to_add_y;
-        to_add_x.binary_basis_limbs[0] = typename Fq::Limb(x_b0, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[1] = typename Fq::Limb(x_b1, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[2] = typename Fq::Limb(x_b2, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[3] = typename Fq::Limb(x_b3, Fq::DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB);
-        to_add_x.prime_basis_limb = to_add_x.binary_basis_limbs[0].element.add_two(
-            to_add_x.binary_basis_limbs[1].element * Fq::shift_1, to_add_x.binary_basis_limbs[2].element * Fq::shift_2);
-        to_add_x.prime_basis_limb += to_add_x.binary_basis_limbs[3].element * Fq::shift_3;
-
-        to_add_y.binary_basis_limbs[0] = typename Fq::Limb(y_b0, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[1] = typename Fq::Limb(y_b1, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[2] = typename Fq::Limb(y_b2, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[3] = typename Fq::Limb(y_b3, Fq::DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB);
-        to_add_y.prime_basis_limb = to_add_y.binary_basis_limbs[0].element.add_two(
-            to_add_y.binary_basis_limbs[1].element * Fq::shift_1, to_add_y.binary_basis_limbs[2].element * Fq::shift_2);
-        to_add_y.prime_basis_limb += to_add_y.binary_basis_limbs[3].element * Fq::shift_3;
-        element to_add(to_add_x, to_add_y.conditional_negate(bits[2]));
-
-        return to_add;
-    } else if constexpr (length == 4) {
-        bool_ct t0 = bits[3] ^ bits[0];
-        bool_ct t1 = bits[3] ^ bits[1];
-        bool_ct t2 = bits[3] ^ bits[2];
-
-        field_t<C> x_b0 = field_t<C>::select_from_three_bit_table(x_b0_table, t2, t1, t0);
-        field_t<C> x_b1 = field_t<C>::select_from_three_bit_table(x_b1_table, t2, t1, t0);
-        field_t<C> x_b2 = field_t<C>::select_from_three_bit_table(x_b2_table, t2, t1, t0);
-        field_t<C> x_b3 = field_t<C>::select_from_three_bit_table(x_b3_table, t2, t1, t0);
-
-        field_t<C> y_b0 = field_t<C>::select_from_three_bit_table(y_b0_table, t2, t1, t0);
-        field_t<C> y_b1 = field_t<C>::select_from_three_bit_table(y_b1_table, t2, t1, t0);
-        field_t<C> y_b2 = field_t<C>::select_from_three_bit_table(y_b2_table, t2, t1, t0);
-        field_t<C> y_b3 = field_t<C>::select_from_three_bit_table(y_b3_table, t2, t1, t0);
-
-        Fq to_add_x;
-        Fq to_add_y;
-        to_add_x.binary_basis_limbs[0] = typename Fq::Limb(x_b0, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[1] = typename Fq::Limb(x_b1, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[2] = typename Fq::Limb(x_b2, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_x.binary_basis_limbs[3] = typename Fq::Limb(x_b3, Fq::DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB);
-        to_add_x.prime_basis_limb = to_add_x.binary_basis_limbs[0].element.add_two(
-            to_add_x.binary_basis_limbs[1].element * Fq::shift_1, to_add_x.binary_basis_limbs[2].element * Fq::shift_2);
-        to_add_x.prime_basis_limb += to_add_x.binary_basis_limbs[3].element * Fq::shift_3;
-
-        to_add_y.binary_basis_limbs[0] = typename Fq::Limb(y_b0, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[1] = typename Fq::Limb(y_b1, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[2] = typename Fq::Limb(y_b2, Fq::DEFAULT_MAXIMUM_LIMB);
-        to_add_y.binary_basis_limbs[3] = typename Fq::Limb(y_b3, Fq::DEFAULT_MAXIMUM_MOST_SIGNIFICANT_LIMB);
-        to_add_y.prime_basis_limb = to_add_y.binary_basis_limbs[0].element.add_two(
-            to_add_y.binary_basis_limbs[1].element * Fq::shift_1, to_add_y.binary_basis_limbs[2].element * Fq::shift_2);
-        to_add_y.prime_basis_limb += to_add_y.binary_basis_limbs[3].element * Fq::shift_3;
-
-        element to_add(to_add_x, to_add_y.conditional_negate(bits[3]));
-
-        return to_add;
+    P1.element_table[8] = input;
+    for (size_t i = 9; i < 16; ++i) {
+        P1.element_table[i] = P1.element_table[i - 1] + d2;
+    }
+    for (size_t i = 0; i < 8; ++i) {
+        P1.element_table[i] = (-P1.element_table[15 - i]);
     }
-    return element::one(bits[0].get_context());
+    for (size_t i = 0; i < 16; ++i) {
+        endoP1.element_table[i].y = P1.element_table[15 - i].y;
+    }
+    uint256_t beta_val = bb::field<typename Fq::TParams>::cube_root_of_unity();
+    Fq beta(bb::fr(beta_val.slice(0, 136)), bb::fr(beta_val.slice(136, 256)));
+    for (size_t i = 0; i < 8; ++i) {
+        endoP1.element_table[i].x = P1.element_table[i].x * beta;
+        endoP1.element_table[15 - i].x = endoP1.element_table[i].x;
+    }
+    P1.coordinates = create_group_element_rom_tables<16>(P1.element_table, P1.limb_max);
+    endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table, endoP1.limb_max);
+    auto result = std::make_pair(four_bit_table_plookup(P1), four_bit_table_plookup(endoP1));
+    return result;
 }
+
 } // namespace bb::stdlib::element_default
diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/README.md b/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/README.md
new file mode 100644
index 000000000000..aea473ebae20
--- /dev/null
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/README.md
@@ -0,0 +1,123 @@
+$\newcommand{\endogroup}{\textcolor{orange}{\lambda}}$
+$\newcommand{\endofield}{\textcolor{orange}{\beta}}$
+$\newcommand{\rom}[1]{\textcolor{purple}{#1}}$
+$\newcommand{\windex}[1]{\textcolor{grey}{#1}}$
+
+### ROM Tables in Barretenberg
+
+> Note: This section briefly describes the implementation of ROM tables in Barretenberg. More details on the implementation will be added during the internal audit of the memory primitives.
+
+Suppose we have a ROM table with $n$ entries, where each entry consists of two values: $\rom{A_i}$ and $\rom{B_i}$. The ROM table is represented as follows:
+
+| ROM Index | Value A         | Value B         |
+| --------- | --------------- | --------------- |
+| 0         | $\rom{A_0}$     | $\rom{B_0}$     |
+| 1         | $\rom{A_1}$     | $\rom{B_1}$     |
+| 2         | $\rom{A_2}$     | $\rom{B_2}$     |
+| $\vdots$  | $\vdots$        | $\vdots$        |
+| $n-1$     | $\rom{A_{n-1}}$ | $\rom{B_{n-1}}$ |
+
+The values in the ROM table need to be circuit witnesses. If any of these values are circuit constants, they must be used as fixed circuit witnesses (fixing circuit witnesses is enforced by one gate for each value). In this case, suppose the ROM values $\rom{A_i}$ and $\rom{B_i}$ are circuit witnesses, represented by the following witness indices:
+
+| Witness index      | Value Witness   |
+| ------------------ | --------------- |
+| 0                  | $0$             |
+| 1                  | $\dots$         |
+| $\vdots$           | $\vdots$        |
+| $\windex{a_0}$     | $\rom{A_0}$     |
+| $\windex{a_1}$     | $\rom{A_1}$     |
+| $\windex{\vdots}$  | $\rom{\vdots}$  |
+| $\windex{a_{n-1}}$ | $\rom{A_{n-1}}$ |
+| $\vdots$           | $\vdots$        |
+| $\windex{b_0}$     | $\rom{B_0}$     |
+| $\windex{b_1}$     | $\rom{B_1}$     |
+| $\windex{\vdots}$  | $\rom{\vdots}$  |
+| $\windex{b_{n-1}}$ | $\rom{B_{n-1}}$ |
+| $\vdots$           | $\vdots$        |
+|                    |                 |
+
+The ROM table is "instantiated" only when we try to use `operator[]` on the ROM table with a witness index. In practice, the ROM table stores the witness indices (instead of the witness values), and the default values are set to $U = 2^{32}-1$ (the witness index used for circuit constants). On initializing the ROM table, the witness indices are set to the corresponding values in the ROM table. Additionally, we add constant witnesses to the circuit for the index set $\{0, 1, \dots, n - 1\}$. Thus, the updated witness vector looks like this:
+
+| Witness index      | Value Witness   |
+| ------------------ | --------------- |
+| 0                  | $0$             |
+| 1                  | $\dots$         |
+| $\vdots$           | $\vdots$        |
+| $\windex{a_0}$     | $\rom{A_0}$     |
+| $\windex{a_1}$     | $\rom{A_1}$     |
+| $\windex{\vdots}$  | $\rom{\vdots}$  |
+| $\windex{a_{n-1}}$ | $\rom{A_{n-1}}$ |
+| $\vdots$           | $\vdots$        |
+| $\windex{b_0}$     | $\rom{B_0}$     |
+| $\windex{b_1}$     | $\rom{B_1}$     |
+| $\windex{\vdots}$  | $\rom{\vdots}$  |
+| $\windex{b_{n-1}}$ | $\rom{B_{n-1}}$ |
+| $\vdots$           | $\vdots$        |
+| $\windex{i_1}$     | $\rom{1}$       |
+| $\windex{i_2}$     | $\rom{2}$       |
+| $\windex{\vdots}$  | $\rom{\vdots}$  |
+| $\windex{i_{n-1}}$ | $\rom{n - 1}$   |
+|                    |                 |
+
+Note we do not need to add the index $0$ as we already have it stored as the witness index $\windex{0}$. This also means that we add $(n - 1)$ gates just to create these constant witnesses for the ROM indices. Finally, the ROM table is instantiated as follows:
+
+| Witness index of ROM index | Witness index of value A | Witness index of value B |
+| -------------------------- | ------------------------ | ------------------------ |
+| $\windex{0}$               | $\windex{a_0}$           | $\windex{b_0}$           |
+| $\windex{i_1}$             | $\windex{a_1}$           | $\windex{b_1}$           |
+| $\windex{i_2}$             | $\windex{a_2}$           | $\windex{b_2}$           |
+| $\windex{\vdots}$          | $\windex{\vdots}$        | $\windex{\vdots}$        |
+| $\windex{i_{n-1}}$         | $\windex{a_{n-1}}$       | $\windex{b_{n-1}}$       |
+|                            |                          |
+
+> **Note**: If we have a ROM table with all entries as circuit constants, we end up adding $2n$ gates just to create the constant witnesses for the ROM values. This is not efficient, and we should avoid using ROM tables with all entries as circuit constants. The constant witnesses added for the ROM indices cost additional $(n - 1)$ gates but they are reused across multiple ROM tables.
+
+The gate layout for the ROM table is as follows:
+
+| Wire 1             | Wire 2             | Wire 3             | Wire 4             |
+| ------------------ | ------------------ | ------------------ | ------------------ |
+| $\windex{0}$       | $\windex{a_0}$     | $\windex{b_0}$     | $\windex{r_1}$     |
+| $\windex{i_1}$     | $\windex{a_1}$     | $\windex{b_1}$     | $\windex{r_2}$     |
+| $\windex{i_2}$     | $\windex{a_2}$     | $\windex{b_2}$     | $\windex{r_3}$     |
+| $\vdots$           | $\vdots$           | $\vdots$           | $\vdots$           |
+| $\windex{i_{n-1}}$ | $\windex{a_{n-1}}$ | $\windex{b_{n-1}}$ | $\windex{r_{n-1}}$ |
+
+Note the fourth wire is used to store the memory record (also known as the "fingerprint"), which is defined as:
+
+$$
+\textsf{record}(i, a, b) := \textcolor{orange}{\eta} \cdot\rom{I} + \textcolor{orange}{\eta^2} \cdot \rom{A} + \textcolor{orange}{\eta^3} \cdot \rom{B},
+\tag{1}
+$$
+
+where $\rom{I}$ is the ROM index, $(\rom{A}, \rom{B})$ is the ROM value, and $\textcolor{orange}{\eta}$ is a challenge value that is used to ensure the memory record is unique for each ROM entry. The memory record is used to verify the integrity of the ROM table and to ensure that the values are correctly associated with their indices.
+
+In practice, the challenge $\textcolor{orange}{\eta}$ is a random value that can be generated only after the entire witness trace is generated. In other words, we don't know $\textcolor{orange}{\eta}$ until the witness trace is complete. Hence, while adding gates for the ROM table, we add the record variable as circuit witness and set it to $0$.
+
+#### Reading from ROM Tables
+
+Suppose we want to read from index $\rom{J}$ of the ROM table. The following steps are performed:
+
+1. Fetch the witness index $\windex{j}$ of the ROM index: $\rom{J}$.
+2. Retrieve the corresponding ROM value: $(\rom{A_j}, \rom{B_j}) = \textsf{table}[\rom{J}]$.
+3. Add two new circuit variables $\windex{a_j}$ and $\windex{b_j}$ to the circuit, which are set to the values $\rom{A_j}$ and $\rom{B_j}$ respectively.
+
+To enforce this in the circuit, we add a ROM gate:
+
+| Wire 1       | Wire 2         | Wire 3         | Wire 4         |
+| ------------ | -------------- | -------------- | -------------- |
+| $\windex{j}$ | $\windex{a_j}$ | $\windex{b_j}$ | $\windex{r_j}$ |
+
+where $\windex{r_j}$ is the witness index of the memory record for the tuple $(\rom{J}, \rom{A_j}, \rom{B_j}).$ We need to enforce a constraint that the memory record was computed correctly as per equation $(1)$.
+
+Additionally, we also need to add sorted ROM gates to the trace (as a part of post-processing of the circuit) to ensure that the ROM entries are consistent. The sorted ROM gates are added to ensure that the ROM entries are in a sorted order based on their indices. To enforce this, we add the following constraint on the sorted ROM gates: given the following two sorted ROM gates,
+
+| Wire 1        | Wire 2            | Wire 3            | Wire 4            |
+| ------------- | ----------------- | ----------------- | ----------------- |
+| $\windex{j}$  | $\windex{a_j}$    | $\windex{b_j}$    | $\windex{r_j}$    |
+| $\windex{j'}$ | $\windex{a_{j'}}$ | $\windex{b_{j'}}$ | $\windex{r_{j'}}$ |
+
+we check that $\windex{j'} \leq \windex{j}$ and that
+
+$$
+\windex{j} = \windex{j'} \implies \textsf{record}(\windex{j}, \windex{a_j}, \windex{b_j}) = \textsf{record}(\windex{j'}, \windex{a_{j'}}, \windex{b_{j'}}).
+$$
diff --git a/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/twin_rom_table.cpp b/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/twin_rom_table.cpp
index f1fc00f13205..4937404e4c97 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/twin_rom_table.cpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib/primitives/memory/twin_rom_table.cpp
@@ -30,7 +30,8 @@ twin_rom_table<Builder>::twin_rom_table(const std::vector<std::array<field_pt, 2
     }
     raw_entries = table_entries;
     length = raw_entries.size();
-    // do not initialize the table yet. The input entries might all be constant,
+
+    // We do not initialize the table yet. The input entries might all be constant,
     // if this is the case we might not have a valid pointer to a Builder
     // We get around this, by initializing the table when `operator[]` is called
     // with a non-const field element.
@@ -51,7 +52,8 @@ template <typename Builder> void twin_rom_table<Builder>::initialize_table() con
     if (initialized) {
         return;
     }
-    ASSERT(context != nullptr);
+    BB_ASSERT_EQ(context != nullptr, true, "twin_rom_table: context must be set before initializing the table");
+
     // populate table. Table entries must be normalized and cannot be constants
     for (const auto& entry : raw_entries) {
         field_pt first;
@@ -68,6 +70,8 @@ template <typename Builder> void twin_rom_table<Builder>::initialize_table() con
         }
         entries.emplace_back(field_pair_pt{ first, second });
     }
+
+    // create uninitialized table of size `length`
     rom_id = context->create_ROM_array(length);
 
     for (size_t i = 0; i < length; ++i) {
@@ -155,7 +159,7 @@ std::array<field_t<Builder>, 2> twin_rom_table<Builder>::operator[](const field_
         context->failure("twin_rom_table: ROM array access out of bounds");
     }
 
-    auto output_indices = context->read_ROM_array_pair(rom_id, index.normalize().get_witness_index());
+    auto output_indices = context->read_ROM_array_pair(rom_id, index.get_normalized_witness_index());
     auto pair = field_pair_pt{
         field_pt::from_witness_index(context, output_indices[0]),
         field_pt::from_witness_index(context, output_indices[1]),
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.cpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.cpp
index b6511098790c..4754e18b8309 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.cpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.cpp
@@ -40,57 +40,51 @@ template <typename G1> void ecc_generator_table<G1>::init_generator_tables()
         uint256_t x = static_cast<uint256_t>(point_table[i].x);
         uint256_t y = static_cast<uint256_t>(point_table[i].y);
 
-        const uint256_t SHIFT = uint256_t(1) << 68;
+        // Store the values in prime-basis lookup tables
+        ecc_generator_table<G1>::generator_xyprime_table[i] = std::make_pair(bb::fr(x), bb::fr(y));
+        ecc_generator_table<G1>::generator_endo_xyprime_table[i] = std::make_pair(bb::fr(endo_x), bb::fr(y));
+
+        // Compute x limbs
+        constexpr size_t num_limb_bits = stdlib::NUM_LIMB_BITS_IN_FIELD_SIMULATION;
+        const uint256_t SHIFT = uint256_t(1) << num_limb_bits;
         const uint256_t MASK = SHIFT - 1;
         uint256_t x0 = x & MASK;
-        x = x >> 68;
+        x = x >> num_limb_bits;
         uint256_t x1 = x & MASK;
-        x = x >> 68;
+        x = x >> num_limb_bits;
         uint256_t x2 = x & MASK;
-        x = x >> 68;
+        x = x >> num_limb_bits;
         uint256_t x3 = x & MASK;
 
+        // Compute endo x limbs
         uint256_t endox0 = endo_x & MASK;
-        endo_x = endo_x >> 68;
+        endo_x = endo_x >> num_limb_bits;
         uint256_t endox1 = endo_x & MASK;
-        endo_x = endo_x >> 68;
+        endo_x = endo_x >> num_limb_bits;
         uint256_t endox2 = endo_x & MASK;
-        endo_x = endo_x >> 68;
+        endo_x = endo_x >> num_limb_bits;
         uint256_t endox3 = endo_x & MASK;
 
+        // Compute y limbs
         uint256_t y0 = y & MASK;
-        y = y >> 68;
+        y = y >> num_limb_bits;
         uint256_t y1 = y & MASK;
-        y = y >> 68;
+        y = y >> num_limb_bits;
         uint256_t y2 = y & MASK;
-        y = y >> 68;
+        y = y >> num_limb_bits;
         uint256_t y3 = y & MASK;
-        ecc_generator_table<G1>::generator_xlo_table[i] = std::make_pair<bb::fr, bb::fr>(x0, x1);
-        ecc_generator_table<G1>::generator_xhi_table[i] = std::make_pair<bb::fr, bb::fr>(x2, x3);
-        ecc_generator_table<G1>::generator_endo_xlo_table[i] = std::make_pair<bb::fr, bb::fr>(endox0, endox1);
-        ecc_generator_table<G1>::generator_endo_xhi_table[i] = std::make_pair<bb::fr, bb::fr>(endox2, endox3);
-        ecc_generator_table<G1>::generator_ylo_table[i] = std::make_pair<bb::fr, bb::fr>(y0, y1);
-        ecc_generator_table<G1>::generator_yhi_table[i] = std::make_pair<bb::fr, bb::fr>(y2, y3);
-        ecc_generator_table<G1>::generator_xyprime_table[i] =
-            std::make_pair<bb::fr, bb::fr>(bb::fr(uint256_t(point_table[i].x)), bb::fr(uint256_t(point_table[i].y)));
-        ecc_generator_table<G1>::generator_endo_xyprime_table[i] = std::make_pair<bb::fr, bb::fr>(
-            bb::fr(uint256_t(point_table[i].x * beta)), bb::fr(uint256_t(point_table[i].y)));
+
+        // Store the limb values in the respective lookup tables
+        ecc_generator_table<G1>::generator_xlo_table[i] = std::make_pair(x0, x1);
+        ecc_generator_table<G1>::generator_xhi_table[i] = std::make_pair(x2, x3);
+        ecc_generator_table<G1>::generator_endo_xlo_table[i] = std::make_pair(endox0, endox1);
+        ecc_generator_table<G1>::generator_endo_xhi_table[i] = std::make_pair(endox2, endox3);
+        ecc_generator_table<G1>::generator_ylo_table[i] = std::make_pair(y0, y1);
+        ecc_generator_table<G1>::generator_yhi_table[i] = std::make_pair(y2, y3);
     }
     init = true;
 }
 
-// map 0 to 255 into 0 to 510 in steps of two
-// actual naf value = (position * 2) - 255
-template <typename G1> size_t ecc_generator_table<G1>::convert_position_to_shifted_naf(const size_t position)
-{
-    return (position * 2);
-}
-
-template <typename G1> size_t ecc_generator_table<G1>::convert_shifted_naf_to_position(const size_t shifted_naf)
-{
-    return shifted_naf / 2;
-}
-
 /**
  * Get 2 low 68-bit limbs of x-coordinate
  **/
@@ -493,4 +487,4 @@ MultiTable ecc_generator_table<G1>::get_xyprime_endo_table(const MultiTableId id
 template class ecc_generator_table<bb::g1>;
 template class ecc_generator_table<secp256k1::g1>;
 
-} // namespace bb::plookup::ecc_generator_tables
\ No newline at end of file
+} // namespace bb::plookup::ecc_generator_tables
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.hpp
index 0755971b0cd2..cbf7ec1dbb12 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.hpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/non_native_group_generator.hpp
@@ -19,7 +19,17 @@ template <typename G1> class ecc_generator_table {
     using element = typename G1::element;
     /**
      * Store arrays of precomputed 8-bit lookup tables for generator point coordinates (and their endomorphism
-     *equivalents)
+     * equivalents):
+     * - xlo_table: (x0, x1) = low limbs of x-coordinate
+     * - xhi_table: (x2, x3) = high limbs of x-coordinate
+     * - ylo_table: (y0, y1) = low limbs of y-coordinate
+     * - yhi_table: (y2, y3) = high limbs of y-coordinate
+     * - xyprime_table: (xp, yp) = x-coordinate and y-coordinate in prime basis (i.e., x % p, y % p)
+     * - endo_xlo_table: (x0', x1') = low limbs of endomorphism-mapped x-coordinate (x' = x * beta)
+     * - endo_xhi_table: (x2', x3') = high limbs of endomorphism-mapped x-coordinate (x' = x * beta)
+     * - endo_xyprime_table: (x'p, yp) = endomorphism-mapped x and y-coord in prime basis (i.e., x' % p, y % p)
+     *
+     * Each table has 256 rows and 2 columns.
      **/
     inline static std::array<std::pair<fr, fr>, 256> generator_endo_xlo_table;
     inline static std::array<std::pair<fr, fr>, 256> generator_endo_xhi_table;
@@ -33,8 +43,6 @@ template <typename G1> class ecc_generator_table {
 
     static void init_generator_tables();
 
-    static size_t convert_position_to_shifted_naf(const size_t position);
-    static size_t convert_shifted_naf_to_position(const size_t shifted_naf);
     static std::array<fr, 2> get_xlo_endo_values(const std::array<uint64_t, 2> key);
     static std::array<fr, 2> get_xhi_endo_values(const std::array<uint64_t, 2> key);
     static std::array<fr, 2> get_xlo_values(const std::array<uint64_t, 2> key);
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/rom_ram_logic.cpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/rom_ram_logic.cpp
index 669c7f19ca2c..f03cea32b9bf 100644
--- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/rom_ram_logic.cpp
+++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/rom_ram_logic.cpp
@@ -56,8 +56,7 @@ void RomRamLogic_<ExecutionTrace>::set_ROM_element_pair(CircuitBuilder* builder,
 {
     BB_ASSERT_GT(rom_arrays.size(), rom_id);
     RomTranscript& rom_array = rom_arrays[rom_id];
-    const uint32_t index_witness =
-        (index_value == 0) ? builder->zero_idx : builder->put_constant_variable((uint64_t)index_value);
+    const uint32_t index_witness = builder->put_constant_variable((uint64_t)index_value);
     BB_ASSERT_GT(rom_array.state.size(), index_value);
     BB_ASSERT_EQ(rom_array.state[index_value][0], UNINITIALIZED_MEMORY_RECORD);
     RomRecord new_record{