-
Notifications
You must be signed in to change notification settings - Fork 598
fix(avm)!: calldata - internal audit #21380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b281d86
86be30e
5081ad5
abe4591
a246814
1f2c2e0
3967ff4
a2b0f5f
9dcefe3
f4514be
74a9208
b987843
8640ba3
7fd89f6
8fe1bdf
baed1b4
96dfa85
764a4f9
785f0d7
2e4a8c3
248d84a
948b033
bb72c24
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,101 +1,166 @@ | ||
| include "precomputed.pil"; | ||
| include "calldata_hashing.pil"; | ||
|
|
||
| // ########### | ||
| // Calldata | ||
| // ########### | ||
| // | ||
| // This circuit fills the calldata column with one field per row. We constrain that the index | ||
| // increments by one each row until we begin a new calldata instance, where the context_id must | ||
| // increase and latch must indicate the final row. | ||
| // | ||
| // The values in the calldata columns are really hints. Their correctness is constrained by calldata_hashing.pil, | ||
| // where the size and hash of the calldata is validated. The hash is looked up in the tx and matched to | ||
| // the one in a tx's public inputs, and the size is looked up for each call. These values should be | ||
| // looked up via calldata_hashing.pil's latch against the context_id. | ||
| // | ||
| // For empty calldata, we have a special case where no value exists, but should have a row in this | ||
| // trace to indicate that a certain context_id has been processed: | ||
| // index | value | context_id | latch | | ||
| // -------+--------+------------+-------+ | ||
| // 0 | 0 | id | 1 | | ||
| // This is the only case in which index = 0 and sel = 1. Note that no lookups should access this trace | ||
| // with index = 0 unless to confirm that the calldata is empty (data_copy and calldata_hashing lookup | ||
| // all existing values with a +1). | ||
| // | ||
| // e.g. calldata: [0x111, 0x222, 0x333, 0x444] | ||
| // calldata.pil: | ||
| // index | value | context_id | latch | | ||
| // -------+--------+------------+-------+ | ||
| // 1 | 0x111 | 1 | 0 | | ||
| // 2 | 0x222 | 1 | 0 | | ||
| // 3 | 0x333 | 1 | 0 | | ||
| // 4 | 0x444 | 1 | 1 | | ||
| // | ||
| // e.g. cd_hash_0 = H([sep, 0x111, 0x222, 0x333, 0x444]) | ||
| // calldata_hashing.pil: | ||
| // index_0_ | index_1_ | index_2_ | context_id | input_0_ | input_1_ | input_2_ | output_hash | latch | start | rounds_rem | padding | calldata_size | | ||
| // -----------+-----------+----------+------------+----------+----------+----------+-------------+-------+-------+------------+---------+---------------+ | ||
| // 0 | 1 | 2 | 1 | sep | 0x111 | 0x222 | cd_hash_0 | 0 | 1 | 2 | 0 | 4 | | ||
| // 3 | 4 | 5 | 1 | 0x333 | 0x444 | 0 | cd_hash_0 | 1 | 0 | 1 | 1 | 4 | | ||
| // | ||
| // Usage: | ||
| // sel { | ||
| // index /*starts at 1 (see NOTE below)*/, context_id, value | ||
| // } in calldata.sel { | ||
| // calldata.index, calldata.context_id, calldata.value | ||
| // }; | ||
| /** | ||
| * Calldata Retrieval | ||
| * | ||
| * Stores one calldata field per row, grouped by context_id. Each row holds a (index, value) | ||
| * pair. The values are hints — their correctness is enforced by calldata_hashing.pil, which | ||
| * looks up every field from this trace and verifies the resulting Poseidon2 hash matches the | ||
| * hash submitted in the transaction's public inputs. | ||
| * | ||
| * SECURITY ARGUMENT — WHY CALLDATA VALUES ARE TRUSTWORTHY: | ||
| * | ||
| * The calldata pipeline works as follows (see diagram in calldata_hashing.pil): | ||
| * | ||
| * 1. tx.pil reads `calldata_hash` from the transaction's public inputs | ||
| * (tx.pil #[READ_PUBLIC_CALL_REQUEST_PHASE]). | ||
| * | ||
| * 2. tx.pil performs a permutation into calldata_hashing.pil at end rows | ||
| * (tx.pil #[READ_CALLDATA_HASH]), matching (`output_hash`, `calldata_size`, `context_id`). | ||
| * This ensures that calldata_hashing produces exactly the hash declared by the | ||
| * transaction for each enqueued call. The `calldata_size` is "returned back" from | ||
jeanmon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * calldata_hashing.pil. | ||
| * | ||
| * 3. calldata_hashing.pil rebuilds the hash from scratch: it prepends a domain separator, | ||
| * then looks up every calldata field from this trace via #[GET_CALLDATA_FIELD_0/1/2], | ||
| * feeds them into a Poseidon2 hash (via #[POSEIDON2_HASH]), and checks that the | ||
| * resulting hash equals `output_hash`. This guarantees that the values stored here are | ||
| * exactly the preimage of the hash committed in the public inputs. | ||
| * | ||
| * 4. calldata_hashing.pil also enforces the size: #[CHECK_FINAL_INDEX] constrains that | ||
| * `calldata_size` equals the last non-padding index, and #[CHECK_FINAL_SIZE] is a | ||
| * permutation between calldata_hashing (at end rows for non-empty calldata) and this | ||
| * trace (at end rows), checking that (`calldata_size`, `context_id`) match | ||
| * (`calldata.index`, `calldata.context_id`). This binds the length of data in this trace | ||
| * to the length declared in calldata_hashing. Each computation block in this trace | ||
| * starts at index 1, increments by 1 each row, and ends at index `calldata_size`. | ||
| * | ||
| * 5. tx.pil dispatches `calldata_size` to the execution trace (tx.pil #[DISPATCH_EXEC_START]), | ||
| * which passes it to data_copy.pil for CALLDATACOPY operations. data_copy.pil reads | ||
| * individual field values from this trace (data_copy.pil #[COL_READ]). | ||
| * | ||
| * Because calldata_hashing looks up every field from index 1..calldata_size in this trace, | ||
| * a malicious prover cannot insert extra fields, omit fields, or alter values without | ||
| * breaking the hash equality checked against the public inputs. | ||
| * | ||
| * INDEX CORRECTNESS: | ||
| * Within each computation block, #[INDEX] constrains that the index starts at 1 after a | ||
| * latch and increments by 1 each row. The final index at end == 1 equals the calldata size, | ||
| * which is enforced by the #[CHECK_FINAL_SIZE] permutation with calldata_hashing.pil. | ||
| * | ||
| * EMPTY CALLDATA: | ||
| * When calldata is empty, calldata_hashing.pil hashes only the domain separator (size = 0). | ||
| * The #[CHECK_FINAL_SIZE] permutation uses sel_end_not_empty as its source selector, which | ||
| * evaluates to 0 for empty calldata (start = 1, PADDING_1 = 1). Therefore, no entry in this | ||
| * trace is required for empty calldata — no rows are generated and no permutation fires. | ||
|
Comment on lines
+51
to
+55
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
EDIT: Spent some time convincing myself of this and thought it may be useful for future readers to put my thoughts down! TLDR: This looks good, great work Jean!! I initially thought it felt strange to have no rows in the Ofc, we cannot manipulate/find a collision for this hash due to existing guarantees in 'Skipping' this trace is also not a completeness issue as (unlike the bytecode traces) Aside: originally, I added a 'special row' to this trace to manage empty calldata so that there was a 1:1 between calldata processed and a set of rows (start -> end, sel on, context_id for calldata processed) here. IIRC back then we needed it for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great summary about why this is fine to remove empty calldata entries in calldata.pil! |
||
| * | ||
| * CONTEXT_ID UNIQUENESS: | ||
| * Each computation block has a unique `context_id`, but this is NOT enforced locally by this | ||
| * trace. Instead, it follows from the permutation chain: | ||
| * - tx.pil assigns a unique `context_id` per enqueued call. | ||
| * - #[READ_CALLDATA_HASH] (tx.pil) is a permutation into `calldata_hashing.end`, so | ||
| * `calldata_hashing` has unique `context_ids` at "end" rows. | ||
| * - #[CHECK_FINAL_SIZE] is a permutation between calldata_hashing `end` rows and | ||
| * calldata `end` rows, restricted to non-empty calldata, enforcing a 1-to-1 correspondence | ||
| * between the non-empty calldata instances. | ||
| * Therefore, each `calldata.end` row must correspond to a distinct `context_id`. Namely, the list | ||
| * of the `context_ids` with non-empty calldata in calldata_hashing.pil must be all distinct. | ||
| * A prover cannot add extra computation blocks (the permutation count would not match) or duplicate | ||
| * a `context_id` (it would require a duplicate in calldata_hashing, which is impossible). | ||
| * | ||
| * USAGE: | ||
| * | ||
| * Pattern 1 — Read calldata field by index (calldata_hashing.pil, data_copy.pil): | ||
| * | ||
| * caller_sel { index, context_id, value } | ||
| * in | ||
| * calldata.sel { calldata.index, calldata.context_id, calldata.value }; | ||
| * | ||
| * - Inputs: index (starts at 1), context_id. | ||
| * - Output: value (the calldata field). | ||
| * - Selector on destination: calldata.sel. | ||
| * | ||
| * Pattern 2 — Check final calldata size (calldata_hashing.pil #[CHECK_FINAL_SIZE]): | ||
| * | ||
| * sel_end_not_empty { calldata_size, context_id } | ||
| * is | ||
| * calldata.end { calldata.index, calldata.context_id }; | ||
| * | ||
| * - Inputs: calldata_size, context_id | ||
| * - Outputs: There are no outputs from this permutation. This permutation serves to enforce a | ||
| * 1-to-1 correspondence between the computation blocks in calldata_hashing.pil and | ||
| * this trace for a non-empty calldata. | ||
| * - Selector on source: sel_end_not_empty (end rows of non-empty calldata only). | ||
| * - Selector on destination: calldata.end (the final row per context_id). | ||
| * | ||
| * TRACE SHAPE: | ||
| * Variable number of rows per context_id — one row per calldata field. The index increments | ||
| * by one each row until end = 1 marks the final row. Empty calldata produces no rows in | ||
| * this trace. | ||
| * | ||
| * e.g. calldata: [0x111, 0x222, 0x333, 0x444] | ||
| * index | value | context_id | end | | ||
| * -------+--------+------------+-------+ | ||
| * 1 | 0x111 | 1 | 0 | | ||
| * 2 | 0x222 | 1 | 0 | | ||
| * 3 | 0x333 | 1 | 0 | | ||
| * 4 | 0x444 | 1 | 1 | | ||
| * | ||
| * ERROR HANDLING: This gadget does not have error conditions. The values are hints | ||
| * validated by calldata_hashing.pil. | ||
| * | ||
| * INTERACTIONS: | ||
| * This trace is not the source of any interaction. See USAGE above to learn which traces | ||
| * interact with this trace. | ||
| * | ||
| * @column sel Selector for active rows. | ||
| * @column value The calldata field value (a hint, validated by calldata_hashing.pil). | ||
| * @column context_id Identifies which enqueued call this calldata belongs to. | ||
| * @column index The 1-based index of the calldata field within the call. | ||
| * @column end Designates the final row of calldata for a given context_id. | ||
| */ | ||
| namespace calldata; | ||
|
|
||
| #[skippable_if] | ||
| sel = 0; | ||
|
|
||
| pol commit sel; // @boolean | ||
| sel * (1 - sel) = 0; | ||
| pol commit value; | ||
| pol commit context_id; | ||
| // **NOTE** The index starts at one in this trace (see above comment for special case of empty calldata and index = 0): | ||
| // We do not currently constrain this (or the existence of a 'start' column) since calldata_hashing constrains it | ||
| // and all looked up calldata values are basically hints which must also go through calldata_hashing. | ||
| // e.g. data_copy will look up values in this trace by context_id. We know that these values are valid (and | ||
| // start at index=1) because data_copy also looks up the calldata size by the same context_id, constrained by | ||
| // calldata_hashing, which itself constrains the index. | ||
| // We could constrain this here by introducing a start column: | ||
| // pol commit start; | ||
| // Adding relations like (see calldata_hashing.pil's): | ||
| // #[START_AFTER_LATCH] | ||
| // sel' * (start' - FIRST_OR_LAST_CALLDATA) = 0; | ||
| // #[START_INDEX_IS_ONE] | ||
| // sel * start * (is_not_empty * (index - 1) + is_empty * index) = 0; | ||
| // If start = 1, #[START_INDEX_IS_ONE] should constrain that index = 1 unless we have empty calldata. In that case | ||
| // the 'special' row must have latch = 1, index = 0, value = 0, and will be constrained by calldata_hashing to have | ||
| // size = 0. This is a different case to a calldata of one field of value 0, where index = 1. | ||
| pol commit index; | ||
| // Designates end of calldata for that context_id | ||
| pol commit latch; // @boolean | ||
| latch * (1 - latch) = 0; | ||
|
|
||
| // latch == 1 ==> sel == 1 | ||
| #[SEL_TOGGLED_AT_LATCH] | ||
| latch * (1 - sel) = 0; | ||
| // ==== MULTI-ROW COMPUTATION SELECTORS (variant without start) ==== | ||
| // See recipe: https://github.com/AztecProtocol/aztec-packages/blob/next/barretenberg/cpp/pil/vm2/docs/recipes.md#variant-without-start | ||
|
|
||
| pol FIRST_OR_LAST_CALLDATA = precomputed.first_row + latch; | ||
| // Index increments until latch | ||
| sel * (1 - FIRST_OR_LAST_CALLDATA) * (index' - index - 1) = 0; | ||
| pol commit end; // @boolean | ||
| end * (1 - end) = 0; | ||
|
|
||
| pol LATCH_CONDITION = end + precomputed.first_row; | ||
|
|
||
| // If sel = 0, sel' != 1 | ||
| // end == 1 ==> sel == 1 | ||
| #[SEL_ON_END] | ||
| end * (1 - sel) = 0; | ||
|
|
||
| // Note: sel * (1 - LATCH_CONDITION) simplifies to (sel - end) because: | ||
| // - end == 1 ==> sel == 1 (by #[SEL_ON_END]), so sel * end = end. | ||
| // - sel == 0 on the first row (proving system guarantee), so sel * first_row = 0. | ||
|
|
||
| // sel is continuous within a computation block: it cannot change except at a latch. | ||
| #[TRACE_CONTINUITY] | ||
| (1 - precomputed.first_row) * (1 - sel) * sel' = 0; | ||
| (1 - LATCH_CONDITION) * (sel - sel') = 0; | ||
|
|
||
| // Context id does not change until we latch | ||
| #[CONTEXT_ID_CONTINUITY] | ||
| (1 - FIRST_OR_LAST_CALLDATA) * (context_id - context_id') = 0; | ||
| // ==== INPUTS AND OUTPUTS ==== | ||
|
|
||
| // We ensure that context_id is always different and increasing at each latch: | ||
| pol commit diff_context_id; | ||
| diff_context_id = latch * sel' * (context_id' - context_id - 1); | ||
| pol commit value; | ||
| pol commit context_id; | ||
| pol commit index; | ||
|
|
||
| #[RANGE_CHECK_CONTEXT_ID_DIFF] | ||
| latch { diff_context_id } in precomputed.sel_range_16 { precomputed.idx }; | ||
jeanmon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Index initializes to 1 at the beginning of a computation block/latch. | ||
| // Index increments by 1 each row until `end == 1`. | ||
| #[INDEX] | ||
| index' = (sel - end) * (index + 1) + sel' * LATCH_CONDITION; | ||
|
|
||
| // ==== COLUMN CONTINUITY ==== | ||
|
|
||
| // Context id does not change until end | ||
| #[CONTEXT_ID_CONTINUITY] | ||
| (1 - LATCH_CONDITION) * (context_id - context_id') = 0; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.