AztecProtocol · jeanmon · Mar 19, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
@@ -1,101 +1,166 @@
 include "precomputed.pil";
-include "calldata_hashing.pil";
 
-// ###########
-// Calldata
-// ###########
-//
-// This circuit fills the calldata column with one field per row. We constrain that the index
-// increments by one each row until we begin a new calldata instance, where the context_id must
-// increase and latch must indicate the final row.
-//
-// The values in the calldata columns are really hints. Their correctness is constrained by calldata_hashing.pil,
-// where the size and hash of the calldata is validated. The hash is looked up in the tx and matched to
-// the one in a tx's public inputs, and the size is looked up for each call. These values should be
-// looked up via calldata_hashing.pil's latch against the context_id.
-//
-// For empty calldata, we have a special case where no value exists, but should have a row in this
-// trace to indicate that a certain context_id has been processed:
-//  index |  value | context_id | latch |
-// -------+--------+------------+-------+
-//    0   |    0   |     id     |   1   |
-// This is the only case in which index = 0 and sel = 1. Note that no lookups should access this trace
-// with index = 0 unless to confirm that the calldata is empty (data_copy and calldata_hashing lookup
-// all existing values with a +1).
-//
-// e.g. calldata: [0x111, 0x222, 0x333, 0x444]
-// calldata.pil:
-//  index |  value | context_id | latch |
-// -------+--------+------------+-------+
-//    1   |  0x111 |     1      |   0   |
-//    2   |  0x222 |     1      |   0   |
-//    3   |  0x333 |     1      |   0   |
-//    4   |  0x444 |     1      |   1   |
-//
-// e.g. cd_hash_0 = H([sep, 0x111, 0x222, 0x333, 0x444])
-// calldata_hashing.pil:
-//   index_0_ |  index_1_ | index_2_ | context_id | input_0_ | input_1_ | input_2_ | output_hash | latch | start | rounds_rem | padding | calldata_size |
-// -----------+-----------+----------+------------+----------+----------+----------+-------------+-------+-------+------------+---------+---------------+
-//     0      |     1     |     2    |      1     |    sep   |   0x111  |   0x222  |  cd_hash_0  |   0   |   1   |     2      |    0    |       4       |
-//     3      |     4     |     5    |      1     |   0x333  |   0x444  |     0    |  cd_hash_0  |   1   |   0   |     1      |    1    |       4       |
-//
-// Usage:
-// sel {
-//     index /*starts at 1 (see NOTE below)*/, context_id, value
-// } in calldata.sel {
-//     calldata.index, calldata.context_id, calldata.value
-// };
+/**
+ * Calldata Retrieval
+ *
+ * Stores one calldata field per row, grouped by context_id. Each row holds a (index, value)
+ * pair. The values are hints — their correctness is enforced by calldata_hashing.pil, which
+ * looks up every field from this trace and verifies the resulting Poseidon2 hash matches the
+ * hash submitted in the transaction's public inputs.
+ *
+ * SECURITY ARGUMENT — WHY CALLDATA VALUES ARE TRUSTWORTHY:
+ *
+ * The calldata pipeline works as follows (see diagram in calldata_hashing.pil):
+ *
+ *   1. tx.pil reads `calldata_hash` from the transaction's public inputs
+ *      (tx.pil #[READ_PUBLIC_CALL_REQUEST_PHASE]).
+ *
+ *   2. tx.pil performs a permutation into calldata_hashing.pil at end rows
+ *      (tx.pil #[READ_CALLDATA_HASH]), matching (`output_hash`, `calldata_size`, `context_id`).
+ *      This ensures that calldata_hashing produces exactly the hash declared by the
+ *      transaction for each enqueued call. The `calldata_size` is "returned back" from
+ *      calldata_hashing.pil.
+ *
+ *   3. calldata_hashing.pil rebuilds the hash from scratch: it prepends a domain separator,
+ *      then looks up every calldata field from this trace via #[GET_CALLDATA_FIELD_0/1/2],
+ *      feeds them into a Poseidon2 hash (via #[POSEIDON2_HASH]), and checks that the
+ *      resulting hash equals `output_hash`. This guarantees that the values stored here are
+ *      exactly the preimage of the hash committed in the public inputs.
+ *
+ *   4. calldata_hashing.pil also enforces the size: #[CHECK_FINAL_INDEX] constrains that
+ *      `calldata_size` equals the last non-padding index, and #[CHECK_FINAL_SIZE] is a
+ *      permutation between calldata_hashing (at end rows for non-empty calldata) and this
+ *      trace (at end rows), checking that (`calldata_size`, `context_id`) match
+ *      (`calldata.index`, `calldata.context_id`). This binds the length of data in this trace
+ *      to the length declared in calldata_hashing. Each computation block in this trace
+ *      starts at index 1, increments by 1 each row, and ends at index `calldata_size`.
+ *
+ *   5. tx.pil dispatches `calldata_size` to the execution trace (tx.pil #[DISPATCH_EXEC_START]),
+ *      which passes it to data_copy.pil for CALLDATACOPY operations. data_copy.pil reads
+ *      individual field values from this trace (data_copy.pil #[COL_READ]).
+ *
+ * Because calldata_hashing looks up every field from index 1..calldata_size in this trace,
+ * a malicious prover cannot insert extra fields, omit fields, or alter values without
+ * breaking the hash equality checked against the public inputs.
+ *
+ * INDEX CORRECTNESS:
+ * Within each computation block, #[INDEX] constrains that the index starts at 1 after a
+ * latch and increments by 1 each row. The final index at end == 1 equals the calldata size,
+ * which is enforced by the #[CHECK_FINAL_SIZE] permutation with calldata_hashing.pil.
+ *
+ * EMPTY CALLDATA:
+ * When calldata is empty, calldata_hashing.pil hashes only the domain separator (size = 0).
+ * The #[CHECK_FINAL_SIZE] permutation uses sel_end_not_empty as its source selector, which
+ * evaluates to 0 for empty calldata (start = 1, PADDING_1 = 1). Therefore, no entry in this
+ * trace is required for empty calldata — no rows are generated and no permutation fires.
+ *
+ * CONTEXT_ID UNIQUENESS:
+ * Each computation block has a unique `context_id`, but this is NOT enforced locally by this
+ * trace. Instead, it follows from the permutation chain:
+ *   - tx.pil assigns a unique `context_id` per enqueued call.
+ *   - #[READ_CALLDATA_HASH] (tx.pil) is a permutation into `calldata_hashing.end`, so
+ *     `calldata_hashing` has unique `context_ids` at "end" rows.
+ *   - #[CHECK_FINAL_SIZE] is a permutation between calldata_hashing `end` rows and
+ *     calldata `end` rows, restricted to non-empty calldata, enforcing a 1-to-1 correspondence
+ *     between the non-empty calldata instances.
+ * Therefore, each `calldata.end` row must correspond to a distinct `context_id`. Namely, the list
+ * of the `context_ids` with non-empty calldata in calldata_hashing.pil must be all distinct.
+ * A prover cannot add extra computation blocks (the permutation count would not match) or duplicate
+ * a `context_id` (it would require a duplicate in calldata_hashing, which is impossible).
+ *
+ * USAGE:
+ *
+ * Pattern 1 — Read calldata field by index (calldata_hashing.pil, data_copy.pil):
+ *
+ *   caller_sel { index, context_id, value }
+ *   in
+ *   calldata.sel { calldata.index, calldata.context_id, calldata.value };
+ *
+ *   - Inputs: index (starts at 1), context_id.
+ *   - Output: value (the calldata field).
+ *   - Selector on destination: calldata.sel.
+ *
+ * Pattern 2 — Check final calldata size (calldata_hashing.pil #[CHECK_FINAL_SIZE]):
+ *
+ *   sel_end_not_empty { calldata_size, context_id }
+ *   is
+ *   calldata.end { calldata.index, calldata.context_id };
+ *
+ *   - Inputs: calldata_size, context_id
+ *   - Outputs: There are no outputs from this permutation. This permutation serves to enforce a
+ *              1-to-1 correspondence between the computation blocks in calldata_hashing.pil and
+ *              this trace for a non-empty calldata.
+ *   - Selector on source: sel_end_not_empty (end rows of non-empty calldata only).
+ *   - Selector on destination: calldata.end (the final row per context_id).
+ *
+ * TRACE SHAPE:
+ * Variable number of rows per context_id — one row per calldata field. The index increments
+ * by one each row until end = 1 marks the final row. Empty calldata produces no rows in
+ * this trace.
+ *
+ * e.g. calldata: [0x111, 0x222, 0x333, 0x444]
+ *  index |  value | context_id |  end  |
+ * -------+--------+------------+-------+
+ *    1   |  0x111 |     1      |   0   |
+ *    2   |  0x222 |     1      |   0   |
+ *    3   |  0x333 |     1      |   0   |
+ *    4   |  0x444 |     1      |   1   |
+ *
+ * ERROR HANDLING: This gadget does not have error conditions. The values are hints
+ * validated by calldata_hashing.pil.
+ *
+ * INTERACTIONS:
+ * This trace is not the source of any interaction. See USAGE above to learn which traces
+ * interact with this trace.
+ *
+ * @column sel Selector for active rows.
+ * @column value The calldata field value (a hint, validated by calldata_hashing.pil).
+ * @column context_id Identifies which enqueued call this calldata belongs to.
+ * @column index The 1-based index of the calldata field within the call.
+ * @column end Designates the final row of calldata for a given context_id.
+ */
 namespace calldata;
 
     #[skippable_if]
     sel = 0;
 
     pol commit sel; // @boolean
     sel * (1 - sel) = 0;
-    pol commit value;
-    pol commit context_id;
-    // **NOTE** The index starts at one in this trace (see above comment for special case of empty calldata and index = 0):
-    // We do not currently constrain this (or the existence of a 'start' column) since calldata_hashing constrains it
-    // and all looked up calldata values are basically hints which must also go through calldata_hashing.
-    //      e.g. data_copy will look up values in this trace by context_id. We know that these values are valid (and
-    //      start at index=1) because data_copy also looks up the calldata size by the same context_id, constrained by
-    //      calldata_hashing, which itself constrains the index.
-    // We could constrain this here by introducing a start column:
-    //      pol commit start;
-    // Adding relations like (see calldata_hashing.pil's):
-    //      #[START_AFTER_LATCH]
-    //      sel' * (start' - FIRST_OR_LAST_CALLDATA) = 0;
-    //      #[START_INDEX_IS_ONE]
-    //      sel * start * (is_not_empty * (index - 1) + is_empty * index) = 0;
-    // If start = 1, #[START_INDEX_IS_ONE] should constrain that index = 1 unless we have empty calldata. In that case
-    // the 'special' row must have latch = 1, index = 0, value = 0, and will be constrained by calldata_hashing to have
-    // size = 0. This is a different case to a calldata of one field of value 0, where index = 1.
-    pol commit index;
-    // Designates end of calldata for that context_id
-    pol commit latch; // @boolean
-    latch * (1 - latch) = 0;
 
-    // latch == 1 ==> sel == 1
-    #[SEL_TOGGLED_AT_LATCH]
-    latch * (1 - sel) = 0;
+    // ==== MULTI-ROW COMPUTATION SELECTORS (variant without start) ====
+    // See recipe: https://github.com/AztecProtocol/aztec-packages/blob/next/barretenberg/cpp/pil/vm2/docs/recipes.md#variant-without-start
 
-    pol FIRST_OR_LAST_CALLDATA = precomputed.first_row + latch;
-    // Index increments until latch
-    sel * (1 - FIRST_OR_LAST_CALLDATA) * (index' - index - 1) = 0;
+    pol commit end; // @boolean
+    end * (1 - end) = 0;
 
+    pol LATCH_CONDITION = end + precomputed.first_row;
 
-    // If sel = 0, sel' != 1
+    // end == 1 ==> sel == 1
+    #[SEL_ON_END]
+    end * (1 - sel) = 0;
+
+    // Note: sel * (1 - LATCH_CONDITION) simplifies to (sel - end) because:
+    //   - end == 1 ==> sel == 1 (by #[SEL_ON_END]), so sel * end = end.
+    //   - sel == 0 on the first row (proving system guarantee), so sel * first_row = 0.
+
+    // sel is continuous within a computation block: it cannot change except at a latch.
     #[TRACE_CONTINUITY]
-    (1 - precomputed.first_row) * (1 - sel) * sel' = 0;
+    (1 - LATCH_CONDITION) * (sel - sel') = 0;
 
-    // Context id does not change until we latch
-    #[CONTEXT_ID_CONTINUITY]
-    (1 - FIRST_OR_LAST_CALLDATA) * (context_id - context_id') = 0;
+    // ==== INPUTS AND OUTPUTS ====
 
-    // We ensure that context_id is always different and increasing at each latch:
-    pol commit diff_context_id;
-    diff_context_id = latch * sel' * (context_id' - context_id - 1);
+    pol commit value;
+    pol commit context_id;
+    pol commit index;
 
-    #[RANGE_CHECK_CONTEXT_ID_DIFF]
-    latch { diff_context_id } in precomputed.sel_range_16 { precomputed.idx };
+    // Index initializes to 1 at the beginning of a computation block/latch.
+    // Index increments by 1 each row until `end == 1`.
+    #[INDEX]
+    index' = (sel - end) * (index + 1) + sel' * LATCH_CONDITION;
+
+    // ==== COLUMN CONTINUITY ====
+
+    // Context id does not change until end
+    #[CONTEXT_ID_CONTINUITY]
+    (1 - LATCH_CONDITION) * (context_id - context_id') = 0;