From 4e375e39e7dd1780313df74df09e82f30023d24b Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Tue, 13 Dec 2022 18:22:00 +0700 Subject: [PATCH 01/18] Refactor bytecode circuit (draft) --- specs/bytecode-proof.md | 152 ++++++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 30 deletions(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index f952a624d..a29feef08 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -4,24 +4,29 @@ The bytecode proof helps the EVM proof by making the bytecode (identified by its ## Circuit Layout -| Column | Description | -| --------------------- | ------------------------------------------------------------------ | -| `q_first` | `1` on the first row, else `0` | -| `q_last` | `1` on the last row, else `0` | -| `hash` | The keccak hash of the bytecode | -| `tag` | Tag indicates whether `value` is a byte or length of the bytecode | -| `index` | The position of the byte in the bytecode | -| `value` | The byte data for the current position, or length of the bytecode | -| `is_code` | `1` if the byte is code, `0` if the byte is PUSH data | -| `push_data_left` | The number of PUSH data bytes that still follow the current row | -| `hash_rlc` | The accumulator containing the current and previous bytes | -| `hash_length` | The bytecode length | -| `byte_push_size` | The number of bytes pushed for the current byte | -| `is_final` | `1` if the current byte is the last byte of the bytecode, else `0` | -| `padding` | `1` if the current row is padding, else `0` | -| `push_data_left_inv` | The inverse of `push_data_left` (`IsZeroChip` helper) | -| `push_table.byte` | Push Table: A byte value | -| `push_table.push_size`| Push Table: The number of bytes pushed for this byte as opcode | +The collumn `tag` (advice) makes the circuit behave as a state machine, selecting different constrains depending on the current and next row value. The `tag` collumn can have two different values: Header, Byte. A row of `tag==Header` preceeds a series of `tag==Byte` rows that contain a complete bytecode sequence. The row `tag==Header` contains the length of the bytecode and the hash of the bytecode, each `tag==Byte` contains one byte of the bytecode, bytecode hash, its lenght and other for the push data. + + +| Column | Description | +| --------------------- | --------------------------------------------------------------------| +| `q_first` (fixed) | `1` on the first row, else `0` | +| `q_last` (fixed) | `1` on the last row, else `0` | +| `hash` | The keccak hash of the bytecode | +| `index` | The position of the byte in the bytecode starting from 0 | +| `value` | Value for this row bytecode byte, and the length in Header rows. | +| `is_code` | `1` if the byte is code, `0` if the byte is PUSH data | +| `push_data_left` | The number of PUSH data bytes that still follow the current row | +| `value_rlc` | The accumulator containing the current and previous bytes RLC | +| `length` | The bytecode length, that could be 0 for empty byrecodes and padding| +| `push_data_size` | The number of bytes pushed for the current byte | +| `push_table.byte` | Push Table: A byte value | +| `push_table.push_size`| Push Table: The number of bytes pushed for this byte as opcode | + + +After all the bytecodes have been added, the rest of the rows are filled with padding in the form of `tag == Header && length == 0 && value ==0` rows. + +Additionally we will need two collumns for IsZeroChip for `length` and `push_data_left` + ## Push table @@ -35,30 +40,117 @@ Because we do this lookup for each byte, this table is also indirectly used to r | \[OpcodeId::PUSH1, OpcodeId::PUSH32\] | `[1..32]` | | \[OpcodeId::PUSH32, 256\] | `0` | -### Circuit behavior +## Witness generation -The circuit starts by adding a row that contains the bytecode length using `tag = Length`. Then it -runs over all the bytes of the bytecode starting at the byte at position `0`. -Each following row unrolls a single byte of the bytecode while also storing its position +The circuit starts by adding a row that contains the bytecode length using `tag = Header`. + +Then it runs over all the bytes of the bytecode in order starting at the byte at position `0`. +Each following row unrolls a single byte (using `tag = Byte` and `value = the actual byte value`) of the bytecode while also storing its position (`index`), the code hash it's part of (`hash`), and if it is code or not -(`is_code`); using `tag = Byte` +(`is_code`). Also `push_data_size` is filled to match the push table, and `push_data_left` is computed. + +All byte data is accumulated per byte (with one byte per row) into `value_rlc` as follows, where r is a challenge: + +``` +first_bytecode.value_rlc := firstbytecode.value + +next.value_rlc := curr.value_rlc * r + next.value +``` + +For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `push_data_left` (`push_data_size + 1`) if and only if the current byte is code (the first byte in any bytecode is code). + +If a row contains a non-zero value for `push_data_left` on its previous row we know the current byte is an opcode: + +``` +first_bytecode.is_code := 1 +curr.is_code := curr.push_data_left == 0 +next.push_data_left := curr.byte_push_size if curr.is_code else curr.push_data_left - 1 +``` + +The fixed columns `q_first` and `q_last` should be zero for all rows, except the first one where `q_first := 1` and the last one where `q_last:=1`. + +## Circuit constrains + +All circuit constrains are based on the current row (`curr`) and the `next` row. + +First of all if `curr.q_first` or `curr.q_last` are `1`, then `curr.tag == Header`. + +We should have the following contrains based on `curr.tag` and `next.tag` (state transition), for all rows except the last one (`curr.q_last == 1`). + +To enable lookup all `curr.tag == Header` rows should have: + +``` +assert curr.index == 0 +assert curr.value == curr.length +``` + +Also, each `curr.tag == Byte` should have: + +``` +assert push_data_size_table_lookup(curr.value, curr.push_data_size) +assert curr.is_code == (curr.push_data_left == 0) +if curr.is_code: + assert next.push_data_left == curr.push_data_size +else: + assert next.push_data_left == curr.push_data_left - 1 +``` + +This way we make sure is_code and next.push_data_left has the right values. + +### curr.tag == Header and next.tag == Header + +We are in a transition from a empty bytecode to the begining of another bytecode that could be empty or not. + +Hence: +``` +assert curr.length == 0 +assert curr.hash == EMPTY_HASH +``` -All byte data is accumulated per byte (with one byte per row) into `hash_rlc` as follows: +### curr.tag == Header and next.tag == Byte + +We are at the begining of a non-empty bytecode. + +Hence: + +``` +assert next.length == curr.length +assert next.index == 0 +assert next.is_code == 1 +assert next.hash == curr.hash +assert next.value_rlc == next.value +``` + +### curr.tag == Bytecode and next.tag == Byte + +We are working on an actual bytecode byte that is not the last one. + +Hence: ``` -hash_rlc := hash_rlc_prev * r + byte +assert next.length == curr.length +assert next.index == curr.index + 1 +assert next.hash == curr.hash +assert next.value_rlc == curr.value_rlc * randomness + next.value ``` -For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `push_data_left` if and only if the current byte is code (the first byte in any bytecode is code). If a row contains a non-zero value for `push_data_left` on its previous row we know the current byte is an opcode: +We make sure that `index` is incremented and `value_rlc` is accumulated. + +### curr.tag == Bytecode and next.tag == Header + +We are at the last byte of a bytecode. + +Hence: ``` -is_code := prev_push_data_left == 0 -push_data_left := byte_push_size if is_code else prev_push_data_left - 1 +assert curr.index + 1 == curr.length +assert keccak256_table_lookup(curr.hash, curr.length, curr.value_rlc) ``` -At the last byte the prover can set `is_final` to `1`, which will enable the keccak lookup on `(hash_rlc, hash_length, hash)`. This will ensure that the byte data passed into the circuit matches the data the prover gave as input (all the byte data is accumulated into `hash_rlc`). This has the consequence that the circuit _requires_ the full bytecode to be a part of its state, otherwise the prover could pass in invalid byte data for the specified hash. This is enforced by the circuit by requiring the last row in the circuit (when `q_last == 1`, note that `q_first` of the next row _cannot_ be used because of unusable rows) to either have `is_final == 1` or `padding == 1`, and padding itself can only be enabled after a `is_final` was set to `1`. +First, we make sure that the bytecode has `curr.length` bytes in the table. + +Second, we ensure that the byte data passed into the circuit matches the data the prover gave as input (all the byte data is accumulated into `value_rlc`). This has the consequence that the circuit _requires_ the full bytecode to be a part of its state, otherwise the prover could pass in invalid byte data for the specified hash. -Explicit padding is added to this circuit to be able to fully fill the circuit with valid data without depending on the keccak circuit to either hash this padding data or support looking up e.g. all zero data. ## Code From fdc33727eab1ce25a64387d01524326b2d74b024 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 Dec 2022 10:53:23 +0700 Subject: [PATCH 02/18] Use cur instead of curr like in halo2 --- specs/bytecode-proof.md | 60 ++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index a29feef08..70a6aa02e 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -54,7 +54,7 @@ All byte data is accumulated per byte (with one byte per row) into `value_rlc` a ``` first_bytecode.value_rlc := firstbytecode.value -next.value_rlc := curr.value_rlc * r + next.value +next.value_rlc := cur.value_rlc * r + next.value ``` For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `push_data_left` (`push_data_size + 1`) if and only if the current byte is code (the first byte in any bytecode is code). @@ -63,91 +63,91 @@ If a row contains a non-zero value for `push_data_left` on its previous row we k ``` first_bytecode.is_code := 1 -curr.is_code := curr.push_data_left == 0 -next.push_data_left := curr.byte_push_size if curr.is_code else curr.push_data_left - 1 +cur.is_code := cur.push_data_left == 0 +next.push_data_left := cur.byte_push_size if cur.is_code else cur.push_data_left - 1 ``` The fixed columns `q_first` and `q_last` should be zero for all rows, except the first one where `q_first := 1` and the last one where `q_last:=1`. ## Circuit constrains -All circuit constrains are based on the current row (`curr`) and the `next` row. +All circuit constrains are based on the current row (`cur`) and the `next` row. -First of all if `curr.q_first` or `curr.q_last` are `1`, then `curr.tag == Header`. +First of all if `cur.q_first` or `cur.q_last` are `1`, then `cur.tag == Header`. -We should have the following contrains based on `curr.tag` and `next.tag` (state transition), for all rows except the last one (`curr.q_last == 1`). +We should have the following contrains based on `cur.tag` and `next.tag` (state transition), for all rows except the last one (`cur.q_last == 1`). -To enable lookup all `curr.tag == Header` rows should have: +To enable lookup all `cur.tag == Header` rows should have: ``` -assert curr.index == 0 -assert curr.value == curr.length +assert cur.index == 0 +assert cur.value == cur.length ``` -Also, each `curr.tag == Byte` should have: +Also, each `cur.tag == Byte` should have: ``` -assert push_data_size_table_lookup(curr.value, curr.push_data_size) -assert curr.is_code == (curr.push_data_left == 0) -if curr.is_code: - assert next.push_data_left == curr.push_data_size +assert push_data_size_table_lookup(cur.value, cur.push_data_size) +assert cur.is_code == (cur.push_data_left == 0) +if cur.is_code: + assert next.push_data_left == cur.push_data_size else: - assert next.push_data_left == curr.push_data_left - 1 + assert next.push_data_left == cur.push_data_left - 1 ``` This way we make sure is_code and next.push_data_left has the right values. -### curr.tag == Header and next.tag == Header +### cur.tag == Header and next.tag == Header We are in a transition from a empty bytecode to the begining of another bytecode that could be empty or not. Hence: ``` -assert curr.length == 0 -assert curr.hash == EMPTY_HASH +assert cur.length == 0 +assert cur.hash == EMPTY_HASH ``` -### curr.tag == Header and next.tag == Byte +### cur.tag == Header and next.tag == Byte We are at the begining of a non-empty bytecode. Hence: ``` -assert next.length == curr.length +assert next.length == cur.length assert next.index == 0 assert next.is_code == 1 -assert next.hash == curr.hash +assert next.hash == cur.hash assert next.value_rlc == next.value ``` -### curr.tag == Bytecode and next.tag == Byte +### cur.tag == Bytecode and next.tag == Byte We are working on an actual bytecode byte that is not the last one. Hence: ``` -assert next.length == curr.length -assert next.index == curr.index + 1 -assert next.hash == curr.hash -assert next.value_rlc == curr.value_rlc * randomness + next.value +assert next.length == cur.length +assert next.index == cur.index + 1 +assert next.hash == cur.hash +assert next.value_rlc == cur.value_rlc * randomness + next.value ``` We make sure that `index` is incremented and `value_rlc` is accumulated. -### curr.tag == Bytecode and next.tag == Header +### cur.tag == Bytecode and next.tag == Header We are at the last byte of a bytecode. Hence: ``` -assert curr.index + 1 == curr.length -assert keccak256_table_lookup(curr.hash, curr.length, curr.value_rlc) +assert cur.index + 1 == cur.length +assert keccak256_table_lookup(cur.hash, cur.length, cur.value_rlc) ``` -First, we make sure that the bytecode has `curr.length` bytes in the table. +First, we make sure that the bytecode has `cur.length` bytes in the table. Second, we ensure that the byte data passed into the circuit matches the data the prover gave as input (all the byte data is accumulated into `value_rlc`). This has the consequence that the circuit _requires_ the full bytecode to be a part of its state, otherwise the prover could pass in invalid byte data for the specified hash. From d22cde293b19f67dff1d99240f7352fca056319b Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 Dec 2022 17:11:28 +0700 Subject: [PATCH 03/18] Fix docs error --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 70a6aa02e..32ac64ebe 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -121,7 +121,7 @@ assert next.hash == cur.hash assert next.value_rlc == next.value ``` -### cur.tag == Bytecode and next.tag == Byte +### cur.tag == Byte and next.tag == Byte We are working on an actual bytecode byte that is not the last one. From b2f05e4a1e2cac22bcbd51a8b308527a82fd2a6d Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 Dec 2022 17:16:00 +0700 Subject: [PATCH 04/18] Spec in code with tests --- src/zkevm_specs/bytecode.py | 212 ++++++++++++----------------- src/zkevm_specs/evm/instruction.py | 2 +- src/zkevm_specs/evm/table.py | 2 +- src/zkevm_specs/evm/typing.py | 2 +- tests/test_bytecode_circuit.py | 14 +- 5 files changed, 96 insertions(+), 136 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 83ced381d..e17862b35 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -7,114 +7,76 @@ # Row in the circuit Row = namedtuple( "Row", - "q_first q_last hash tag index is_code value push_data_left hash_rlc hash_length byte_push_size is_final padding", + "q_first q_last hash tag index value is_code push_data_left value_rlc length push_data_size", ) # Unrolled bytecode class UnrolledBytecode(NamedTuple): bytes: bytes rows: Sequence[BytecodeTableRow] - -@is_circuit_code -def assert_bool(value: Union[int, bool]): - assert value in [0, 1] - - -@is_circuit_code -def select( - selector: U8, - when_true: U256, - when_false: U256, -) -> U256: - return U256(selector * when_true + (1 - selector) * when_false) - - @is_circuit_code def check_bytecode_row( - row: Row, - prev_row: Row, - next_row: Row, + cur: Row, + next: Row, push_table: Set[Tuple[int, int]], keccak_table: Set[Tuple[int, int, int]], - r: int, + randomness: int, ): - row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in row]) - prev_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in prev_row]) - next_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in next_row]) - if row.q_first == 0 and prev_row.is_final == 0: - # Continue - if prev_row.tag == BytecodeFieldTag.Length: - # index starts from 0 - assert row.index == 0 - # is_code := 1, since this is the first byte of the bytecode - assert row.is_code == 1 - else: - # index is 1 more than previous row's index - assert row.index == prev_row.index + 1 - # is_code := push_data_left_prev == 0 - assert row.is_code == (prev_row.push_data_left == 0) - # hash_rlc := hash_rlc_prev * r + byte - assert row.hash_rlc == prev_row.hash_rlc * r + row.value - # padding needs to remain the same - assert row.padding == prev_row.padding - # hash needs to remain the same - assert row.hash == prev_row.hash - # hash_length needs to remain the same - assert row.hash_length == prev_row.hash_length - else: - # Start - # the row following an `is_final` previous row is either tagged Length - if row.tag == BytecodeFieldTag.Length: - # value matches hash length - assert row.value == row.hash_length - # if bytecode length is zero - if row.value == 0: - # bytecode hash should be EMPTY_HASH - assert row.hash == RLC(EMPTY_HASH, FQ(r)).expr() - # the next row should be a tag Length or padding - assert (next_row.tag == BytecodeFieldTag.Length) or (next_row.tag == 0) + cur = Row(*[v if isinstance(v, RLC) else FQ(v) for v in cur]) + next = Row(*[v if isinstance(v, RLC) else FQ(v) for v in next]) + + if cur.q_first == 1: + assert cur.tag == BytecodeFieldTag.Header + + if cur.q_last == 0: + if cur.tag == BytecodeFieldTag.Header: + assert cur.value == cur.length + assert cur.index == 0 + if next.tag == BytecodeFieldTag.Byte: + check_bytecode_row_header_to_byte(cur, next) + if next.tag == BytecodeFieldTag.Header: + check_bytecode_row_header_to_header(cur, randomness) + + if cur.tag == BytecodeFieldTag.Byte: + assert (cur.value, cur.push_data_size) in push_table + assert cur.is_code == (cur.push_data_left == 0) + if cur.is_code == 1: + assert next.push_data_left == cur.push_data_size else: - # the next row should be tag Byte - assert next_row.tag == BytecodeFieldTag.Byte - # or is the start of padding rows - else: - assert row.padding == 1 - - # is_final needs to be boolean - assert_bool(row.is_final) - # padding needs to be boolean - assert_bool(row.padding) - - if row.tag == BytecodeFieldTag.Byte: - # push_data_left := is_code ? byte_push_size : push_data_left_prev - 1 - assert row.push_data_left == select( - row.is_code, row.byte_push_size, prev_row.push_data_left - 1 - ) + assert next.push_data_left == cur.push_data_left - 1 - # Padding - if row.q_first == 0: - # padding can only go 0 -> 1 once - assert_bool(row.padding - prev_row.padding) - - # Last row - # The hash is checked on the latest row because only then have - # we accumulated all the bytes. We also have to go through the bytes - # in a forward manner because that's the only way we can know which - # bytes are op codes and which are push data. - if row.q_last == 1: - # padding needs to be enabled OR - # the last row needs to be the last byte - assert row.padding == 1 or row.is_final == 1 - - if row.tag == BytecodeFieldTag.Byte: - # Lookup how many bytes the current opcode pushes - # (also indirectly range checks `byte` to be in [0, 255]) - assert (row.value, row.byte_push_size) in push_table - - # keccak lookup when on the last byte - if row.is_final == 1 and row.padding == 0: - assert (row.hash_rlc, row.hash_length, row.hash) in keccak_table + if next.tag == BytecodeFieldTag.Byte: + check_bytecode_row_byte_to_byte(cur, next, randomness) + if next.tag == BytecodeFieldTag.Header: + check_bytecode_row_byte_to_header(cur, keccak_table) + if cur.q_last == 1: + assert cur.tag == BytecodeFieldTag.Header + +@is_circuit_code +def check_bytecode_row_header_to_byte(cur: Row, next: Row): + assert next.length == cur.length + assert next.index == 0 + assert next.is_code == 1 + assert next.hash == cur.hash + assert next.value_rlc == next.value + +@is_circuit_code +def check_bytecode_row_header_to_header(cur: Row, randomness: int): + assert cur.length == 0 + assert cur.hash == RLC(EMPTY_HASH, randomness).expr() + +@is_circuit_code +def check_bytecode_row_byte_to_byte(cur: Row, next: Row, r: int): + assert next.length == cur.length + assert next.index == cur.index + 1 + assert next.hash == cur.hash + assert next.value_rlc == cur.value_rlc * r + next.value + +@is_circuit_code +def check_bytecode_row_byte_to_header(cur: Row, keccak_table: Set[Tuple[int, int, int]]): + assert cur.index + 1 == cur.length + assert (cur.value_rlc, cur.length, cur.hash) in keccak_table # Populate the circuit matrix def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], randomness: int): @@ -124,36 +86,34 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando rows = [] offset = 0 for bytecode in bytecodes: - push_data_left = 0 - hash_rlc = FQ(0) + next_push_data_left = 0 + value_rlc = FQ(0) for idx, row in enumerate(bytecode.rows): # Subsequent rows represent the bytecode bytes # Track which byte is an opcode and which is push data + push_data_left = next_push_data_left is_code = push_data_left == 0 - byte_push_size = 0 + push_data_size = 0 if idx > 0: - byte_push_size = get_push_size(row.value) - push_data_left = byte_push_size if is_code else push_data_left - 1 + push_data_size = get_push_size(row.value) + next_push_data_left = push_data_size if is_code else push_data_left - 1 # Add the byte to the accumulator - hash_rlc = hash_rlc * randomness + row.value + value_rlc = value_rlc * randomness + row.value # Set the data for this row rows.append( Row( - offset == 0, - offset == last_row_offset, - row.bytecode_hash, - row.field_tag, - row.index, - row.is_code, - row.value, - push_data_left, - hash_rlc, - len(bytecode.bytes), - byte_push_size, - # Since 1 row is taken up by the Length tag - idx == len(bytecode.bytes), - False, + q_first=offset == 0, + q_last=offset == last_row_offset, + hash=row.bytecode_hash, + tag= row.field_tag, + index= row.index, + value= row.value, + is_code= row.is_code, + push_data_left= push_data_left, + value_rlc= value_rlc, + length=len(bytecode.bytes), + push_data_size= push_data_size, ) ) @@ -166,19 +126,17 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando for idx in range(offset, 2**k): rows.append( Row( - idx == 0, - idx == last_row_offset, - 0, - 0, - 0, - 0, - True, - 0, - 0, - 0, - 0, - True, - True, + q_first= idx == 0, + q_last= idx == last_row_offset, + hash= RLC(EMPTY_HASH, randomness).expr(), + tag= BytecodeFieldTag.Header, + index= 0, + value= 0, + is_code= False, + push_data_left= 0, + value_rlc= 0, + length= 0, + push_data_size= 0 ) ) diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index 42b38eea4..8a14323cb 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -686,7 +686,7 @@ def bytecode_lookup_pair( def bytecode_length(self, bytecode_hash: Expression) -> Expression: return self.tables.bytecode_lookup( - bytecode_hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0) + bytecode_hash, FQ(BytecodeFieldTag.Header), FQ(0), FQ(0) ).value def tx_gas_price(self, tx_id: Expression) -> RLC: diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index 5d2627b2f..4a7d91347 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -164,7 +164,7 @@ class BytecodeFieldTag(IntEnum): Tag for BytecodeTable lookup. """ - Length = 1 + Header = 1 Byte = 2 diff --git a/src/zkevm_specs/evm/typing.py b/src/zkevm_specs/evm/typing.py index 1ad2dd7a2..5504a0226 100644 --- a/src/zkevm_specs/evm/typing.py +++ b/src/zkevm_specs/evm/typing.py @@ -321,7 +321,7 @@ def __next__(self): if self.idx == 0: self.idx += 1 return BytecodeTableRow( - self.hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0), FQ(len(self.code)) + self.hash, FQ(BytecodeFieldTag.Header), FQ(0), FQ(0), FQ(len(self.code)) ) if self.idx > len(self.code): diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index 91eea29fc..6a750e6a3 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -17,13 +17,15 @@ def verify(k, bytecodes, randomness, success): rows = assign_bytecode_circuit(k, bytecodes, randomness) try: for (idx, row) in enumerate(rows): - prev_row = rows[(idx - 1) % len(rows)] next_row = rows[(idx + 1) % len(rows)] - check_bytecode_row(row, prev_row, next_row, push_table, keccak_table, randomness) + check_bytecode_row(row, next_row, push_table, keccak_table, randomness) ok = True except AssertionError as e: if success: traceback.print_exc() + print(idx) + print(row) + print(next_row) ok = False assert ok == success @@ -53,7 +55,7 @@ def test_bytecode_unrolling(): for i in range(len(rows)): rows[i] = BytecodeTableRow(hash.expr(), rows[i][1], rows[i][2], rows[i][3], rows[i][4]) # Prepend the length of bytecode to rows - rows.insert(0, BytecodeTableRow(hash.expr(), BytecodeFieldTag.Length, 0, 0, len(bytecode))) + rows.insert(0, BytecodeTableRow(hash.expr(), BytecodeFieldTag.Header, 0, 0, len(bytecode))) # Unroll the bytecode unrolled = unroll(bytes(bytecode), randomness) # Check if the bytecode was unrolled correctly @@ -69,7 +71,7 @@ def test_bytecode_empty(): def test_bytecode_full(): bytecodes = [unroll(bytes([7] * (2**k - 1)), randomness)] - verify(k, bytecodes, randomness, True) + verify(k, bytecodes, randomness, False) # Last row must be tag=Header def test_bytecode_incomplete(): @@ -85,7 +87,7 @@ def test_bytecode_multiple(): unroll(bytes([Opcode.ADD, Opcode.PUSH32]), randomness), unroll(bytes([Opcode.ADD, Opcode.PUSH32, Opcode.ADD]), randomness), ] - verify(k, bytecodes, randomness, True) + verify(k, bytecodes, randomness, False) # Push without data must fail def test_bytecode_invalid_hash_data(): @@ -183,7 +185,7 @@ def test_bytecode_invalid_is_code(): ), randomness, ) - verify(k, [unrolled], randomness, True) + verify(k, [unrolled], randomness, False) # Push without data must fail # The first row, i.e. index == 0 is taken up by the tag Length. # Mark the 3rd byte as code (is push data from the first PUSH1) From 8db0f5e9e73dd8145c8c7d35093d3fb7c50ffc02 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 Dec 2022 17:19:03 +0700 Subject: [PATCH 05/18] Fix linting --- src/zkevm_specs/bytecode.py | 46 +++++++++++++++++++--------------- tests/test_bytecode_circuit.py | 6 ++--- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index e17862b35..8bcf9d7bd 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -1,8 +1,8 @@ -from typing import Sequence, Union, Tuple, Set, NamedTuple +from typing import Sequence, Tuple, Set, NamedTuple from collections import namedtuple from .util import keccak256, EMPTY_HASH, FQ, RLC from .evm import get_push_size, BytecodeFieldTag, BytecodeTableRow -from .encoding import U8, U256, is_circuit_code +from .encoding import is_circuit_code # Row in the circuit Row = namedtuple( @@ -14,6 +14,7 @@ class UnrolledBytecode(NamedTuple): bytes: bytes rows: Sequence[BytecodeTableRow] + @is_circuit_code def check_bytecode_row( cur: Row, @@ -53,6 +54,7 @@ def check_bytecode_row( if cur.q_last == 1: assert cur.tag == BytecodeFieldTag.Header + @is_circuit_code def check_bytecode_row_header_to_byte(cur: Row, next: Row): assert next.length == cur.length @@ -61,11 +63,13 @@ def check_bytecode_row_header_to_byte(cur: Row, next: Row): assert next.hash == cur.hash assert next.value_rlc == next.value + @is_circuit_code def check_bytecode_row_header_to_header(cur: Row, randomness: int): assert cur.length == 0 assert cur.hash == RLC(EMPTY_HASH, randomness).expr() + @is_circuit_code def check_bytecode_row_byte_to_byte(cur: Row, next: Row, r: int): assert next.length == cur.length @@ -73,11 +77,13 @@ def check_bytecode_row_byte_to_byte(cur: Row, next: Row, r: int): assert next.hash == cur.hash assert next.value_rlc == cur.value_rlc * r + next.value + @is_circuit_code def check_bytecode_row_byte_to_header(cur: Row, keccak_table: Set[Tuple[int, int, int]]): assert cur.index + 1 == cur.length assert (cur.value_rlc, cur.length, cur.hash) in keccak_table + # Populate the circuit matrix def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], randomness: int): # All rows are usable in this emulation @@ -106,14 +112,14 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando q_first=offset == 0, q_last=offset == last_row_offset, hash=row.bytecode_hash, - tag= row.field_tag, - index= row.index, - value= row.value, - is_code= row.is_code, - push_data_left= push_data_left, - value_rlc= value_rlc, + tag=row.field_tag, + index=row.index, + value=row.value, + is_code=row.is_code, + push_data_left=push_data_left, + value_rlc=value_rlc, length=len(bytecode.bytes), - push_data_size= push_data_size, + push_data_size=push_data_size, ) ) @@ -126,17 +132,17 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando for idx in range(offset, 2**k): rows.append( Row( - q_first= idx == 0, - q_last= idx == last_row_offset, - hash= RLC(EMPTY_HASH, randomness).expr(), - tag= BytecodeFieldTag.Header, - index= 0, - value= 0, - is_code= False, - push_data_left= 0, - value_rlc= 0, - length= 0, - push_data_size= 0 + q_first=idx == 0, + q_last=idx == last_row_offset, + hash=RLC(EMPTY_HASH, randomness).expr(), + tag=BytecodeFieldTag.Header, + index=0, + value=0, + is_code=False, + push_data_left=0, + value_rlc=0, + length=0, + push_data_size=0, ) ) diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index 6a750e6a3..ae520e3c8 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -71,7 +71,7 @@ def test_bytecode_empty(): def test_bytecode_full(): bytecodes = [unroll(bytes([7] * (2**k - 1)), randomness)] - verify(k, bytecodes, randomness, False) # Last row must be tag=Header + verify(k, bytecodes, randomness, False) # Last row must be tag=Header def test_bytecode_incomplete(): @@ -87,7 +87,7 @@ def test_bytecode_multiple(): unroll(bytes([Opcode.ADD, Opcode.PUSH32]), randomness), unroll(bytes([Opcode.ADD, Opcode.PUSH32, Opcode.ADD]), randomness), ] - verify(k, bytecodes, randomness, False) # Push without data must fail + verify(k, bytecodes, randomness, False) # Push without data must fail def test_bytecode_invalid_hash_data(): @@ -185,7 +185,7 @@ def test_bytecode_invalid_is_code(): ), randomness, ) - verify(k, [unrolled], randomness, False) # Push without data must fail + verify(k, [unrolled], randomness, False) # Push without data must fail # The first row, i.e. index == 0 is taken up by the tag Length. # Mark the 3rd byte as code (is push data from the first PUSH1) From dd622651d659f620e2ce357ae3b984c57038f5d2 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 16 Dec 2022 17:25:50 +0700 Subject: [PATCH 06/18] Fix types --- src/zkevm_specs/bytecode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 8bcf9d7bd..4e2e4f9a9 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -67,7 +67,7 @@ def check_bytecode_row_header_to_byte(cur: Row, next: Row): @is_circuit_code def check_bytecode_row_header_to_header(cur: Row, randomness: int): assert cur.length == 0 - assert cur.hash == RLC(EMPTY_HASH, randomness).expr() + assert cur.hash == RLC(EMPTY_HASH, FQ(randomness)).expr() @is_circuit_code @@ -134,7 +134,7 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando Row( q_first=idx == 0, q_last=idx == last_row_offset, - hash=RLC(EMPTY_HASH, randomness).expr(), + hash=RLC(EMPTY_HASH, FQ(randomness)).expr(), tag=BytecodeFieldTag.Header, index=0, value=0, From f894c72b82bdc96d2fbace99aa2df7760d4ff781 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Sat, 17 Dec 2022 13:43:18 +0700 Subject: [PATCH 07/18] Fix documentation --- specs/bytecode-proof.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 32ac64ebe..80bcf44a2 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -23,7 +23,7 @@ The collumn `tag` (advice) makes the circuit behave as a state machine, selectin | `push_table.push_size`| Push Table: The number of bytes pushed for this byte as opcode | -After all the bytecodes have been added, the rest of the rows are filled with padding in the form of `tag == Header && length == 0 && value ==0` rows. +After all the bytecodes have been added, the rest of the rows are filled with padding in the form of `tag == Header && length == 0 && value == 0 && hash == EMPTY_HASH` rows. Additionally we will need two collumns for IsZeroChip for `length` and `push_data_left` @@ -57,9 +57,9 @@ first_bytecode.value_rlc := firstbytecode.value next.value_rlc := cur.value_rlc * r + next.value ``` -For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `push_data_left` (`push_data_size + 1`) if and only if the current byte is code (the first byte in any bytecode is code). +For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `next.push_data_left` if and only if the current byte is code (the first byte in any bytecode is code). -If a row contains a non-zero value for `push_data_left` on its previous row we know the current byte is an opcode: +If a row contains a non-zero value for `push_data_left` we know the current byte is an opcode: ``` first_bytecode.is_code := 1 @@ -67,7 +67,7 @@ cur.is_code := cur.push_data_left == 0 next.push_data_left := cur.byte_push_size if cur.is_code else cur.push_data_left - 1 ``` -The fixed columns `q_first` and `q_last` should be zero for all rows, except the first one where `q_first := 1` and the last one where `q_last:=1`. +The fixed columns `q_first` and `q_last` should be zero for all rows, except the first one where `q_first := 1` and the last one where `q_last := 1`. ## Circuit constrains From b5e691256792eecf5a76c2a66e347bffe7cac60c Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:42:16 +0700 Subject: [PATCH 08/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 80bcf44a2..fd72e97fb 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -4,7 +4,7 @@ The bytecode proof helps the EVM proof by making the bytecode (identified by its ## Circuit Layout -The collumn `tag` (advice) makes the circuit behave as a state machine, selecting different constrains depending on the current and next row value. The `tag` collumn can have two different values: Header, Byte. A row of `tag==Header` preceeds a series of `tag==Byte` rows that contain a complete bytecode sequence. The row `tag==Header` contains the length of the bytecode and the hash of the bytecode, each `tag==Byte` contains one byte of the bytecode, bytecode hash, its lenght and other for the push data. +The column `tag` (advice) makes the circuit behave as a state machine, selecting different constraints depending on the current and next row value. The `tag` column can have two different values: Header, Byte. A row of `tag==Header` precedes a series of `tag==Byte` rows that contain a complete bytecode sequence. The row `tag==Header` contains the length of the bytecode and the hash of the bytecode, each `tag==Byte` contains one byte of the bytecode, bytecode hash, its length and other for the push data. | Column | Description | From eb1864f86ce0a5a3d75b215c56e3a438d8e07541 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:42:28 +0700 Subject: [PATCH 09/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index fd72e97fb..19c673c15 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -17,7 +17,7 @@ The column `tag` (advice) makes the circuit behave as a state machine, selecting | `is_code` | `1` if the byte is code, `0` if the byte is PUSH data | | `push_data_left` | The number of PUSH data bytes that still follow the current row | | `value_rlc` | The accumulator containing the current and previous bytes RLC | -| `length` | The bytecode length, that could be 0 for empty byrecodes and padding| +| `length` | The bytecode length, that could be 0 for empty bytecodes and padding| | `push_data_size` | The number of bytes pushed for the current byte | | `push_table.byte` | Push Table: A byte value | | `push_table.push_size`| Push Table: The number of bytes pushed for this byte as opcode | From 70720142eb85b18bece10aae8e2e118e4020f654 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:42:41 +0700 Subject: [PATCH 10/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 19c673c15..b17453a0c 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -59,7 +59,7 @@ next.value_rlc := cur.value_rlc * r + next.value For detecting which byte is code and which byte is push data the [Push table](#push-table) is used. This table allows finding out how many bytes an opcode pushes. This is used to set `next.push_data_left` if and only if the current byte is code (the first byte in any bytecode is code). -If a row contains a non-zero value for `push_data_left` we know the current byte is an opcode: +If a row contains a zero value for `push_data_left` we know the current byte is an opcode: ``` first_bytecode.is_code := 1 From 51a8d73a1e94218be5b28d3ca7439d7c01fdc100 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:42:51 +0700 Subject: [PATCH 11/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index b17453a0c..5a35ce871 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -25,7 +25,7 @@ The column `tag` (advice) makes the circuit behave as a state machine, selecting After all the bytecodes have been added, the rest of the rows are filled with padding in the form of `tag == Header && length == 0 && value == 0 && hash == EMPTY_HASH` rows. -Additionally we will need two collumns for IsZeroChip for `length` and `push_data_left` +Additionally we will need two columns for IsZeroChip for `length` and `push_data_left` ## Push table From 6a38b02e8272718f47221a8ea0c19dcc4ce71d87 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:43:14 +0700 Subject: [PATCH 12/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 5a35ce871..7c7c5ce51 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -71,7 +71,7 @@ The fixed columns `q_first` and `q_last` should be zero for all rows, except the ## Circuit constrains -All circuit constrains are based on the current row (`cur`) and the `next` row. +All circuit constraints are based on the current row (`cur`) and the `next` row. First of all if `cur.q_first` or `cur.q_last` are `1`, then `cur.tag == Header`. From 4489b4c731d57ed178d34bdbd57898eb378ba453 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:43:28 +0700 Subject: [PATCH 13/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 7c7c5ce51..cc00b2e54 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -75,7 +75,7 @@ All circuit constraints are based on the current row (`cur`) and the `next` row. First of all if `cur.q_first` or `cur.q_last` are `1`, then `cur.tag == Header`. -We should have the following contrains based on `cur.tag` and `next.tag` (state transition), for all rows except the last one (`cur.q_last == 1`). +We should have the following constraint based on `cur.tag` and `next.tag` (state transition), for all rows except the last one (`cur.q_last == 1`). To enable lookup all `cur.tag == Header` rows should have: From 44e97e1d2a499770457b52f206a1273e26973f05 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 10:43:39 +0700 Subject: [PATCH 14/18] Update specs/bytecode-proof.md Co-authored-by: Brecht Devos --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index cc00b2e54..1ec7b5e83 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -95,7 +95,7 @@ else: assert next.push_data_left == cur.push_data_left - 1 ``` -This way we make sure is_code and next.push_data_left has the right values. +This way we make sure is_code and next.push_data_left have the right values. ### cur.tag == Header and next.tag == Header From 372a8ef6a60f1cc5500b9fd4d69415754a193baa Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Mon, 26 Dec 2022 11:01:59 +0700 Subject: [PATCH 15/18] Improve test_bytecode_full --- tests/test_bytecode_circuit.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index ae520e3c8..4b350a874 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -70,8 +70,11 @@ def test_bytecode_empty(): def test_bytecode_full(): - bytecodes = [unroll(bytes([7] * (2**k - 1)), randomness)] - verify(k, bytecodes, randomness, False) # Last row must be tag=Header + bytecodes = [ + unroll(bytes([7] * (2**k - 2)), randomness), + unroll(bytes([]), randomness), # Last row must be tag=Header + ] + verify(k, bytecodes, randomness, True) def test_bytecode_incomplete(): From 461d32a73d430150c456258fa1db6ffe02772a93 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 30 Dec 2022 15:05:26 +0700 Subject: [PATCH 16/18] Update specs/bytecode-proof.md Co-authored-by: Han --- specs/bytecode-proof.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/bytecode-proof.md b/specs/bytecode-proof.md index 1ec7b5e83..091b9b25f 100644 --- a/specs/bytecode-proof.md +++ b/specs/bytecode-proof.md @@ -52,7 +52,7 @@ Each following row unrolls a single byte (using `tag = Byte` and `value = the ac All byte data is accumulated per byte (with one byte per row) into `value_rlc` as follows, where r is a challenge: ``` -first_bytecode.value_rlc := firstbytecode.value +first_bytecode.value_rlc := first_bytecode.value next.value_rlc := cur.value_rlc * r + next.value ``` From b65959821d006104e02481c3483cd9b2787b378a Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 30 Dec 2022 15:14:15 +0700 Subject: [PATCH 17/18] Push without data is valid for bytecode circuit --- src/zkevm_specs/bytecode.py | 8 ++++---- tests/test_bytecode_circuit.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 4e2e4f9a9..5ba11e15d 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -41,10 +41,6 @@ def check_bytecode_row( if cur.tag == BytecodeFieldTag.Byte: assert (cur.value, cur.push_data_size) in push_table assert cur.is_code == (cur.push_data_left == 0) - if cur.is_code == 1: - assert next.push_data_left == cur.push_data_size - else: - assert next.push_data_left == cur.push_data_left - 1 if next.tag == BytecodeFieldTag.Byte: check_bytecode_row_byte_to_byte(cur, next, randomness) @@ -76,6 +72,10 @@ def check_bytecode_row_byte_to_byte(cur: Row, next: Row, r: int): assert next.index == cur.index + 1 assert next.hash == cur.hash assert next.value_rlc == cur.value_rlc * r + next.value + if cur.is_code == 1: + assert next.push_data_left == cur.push_data_size + else: + assert next.push_data_left == cur.push_data_left - 1 @is_circuit_code diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index 4b350a874..76dd572c3 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -90,7 +90,7 @@ def test_bytecode_multiple(): unroll(bytes([Opcode.ADD, Opcode.PUSH32]), randomness), unroll(bytes([Opcode.ADD, Opcode.PUSH32, Opcode.ADD]), randomness), ] - verify(k, bytecodes, randomness, False) # Push without data must fail + verify(k, bytecodes, randomness, True) def test_bytecode_invalid_hash_data(): @@ -188,7 +188,7 @@ def test_bytecode_invalid_is_code(): ), randomness, ) - verify(k, [unrolled], randomness, False) # Push without data must fail + verify(k, [unrolled], randomness, True) # The first row, i.e. index == 0 is taken up by the tag Length. # Mark the 3rd byte as code (is push data from the first PUSH1) From 12726cc07c7b3affef2d951952ede5c54fb3ad36 Mon Sep 17 00:00:00 2001 From: Leo Lara Date: Fri, 30 Dec 2022 15:52:44 +0700 Subject: [PATCH 18/18] Last row checks --- src/zkevm_specs/bytecode.py | 1 + tests/test_bytecode_circuit.py | 74 +++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 5ba11e15d..881a1e6f2 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -49,6 +49,7 @@ def check_bytecode_row( if cur.q_last == 1: assert cur.tag == BytecodeFieldTag.Header + check_bytecode_row_header_to_header(cur, randomness) @is_circuit_code diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index 76dd572c3..d2510f24d 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -3,7 +3,7 @@ from zkevm_specs.bytecode import * from zkevm_specs.evm import Opcode, Bytecode, BytecodeFieldTag, BytecodeTableRow, is_push -from zkevm_specs.util import RLC, rand_fq +from zkevm_specs.util import RLC, rand_fq, U256 # Unroll the bytecode def unroll(bytecode, randomness): @@ -12,9 +12,13 @@ def unroll(bytecode, randomness): # Verify the bytecode circuit with the given data def verify(k, bytecodes, randomness, success): + rows = assign_bytecode_circuit(k, bytecodes, randomness) + verify_rows(bytecodes, rows, success) + + +def verify_rows(bytecodes, rows, success): push_table = assign_push_table() keccak_table = assign_keccak_table(map(lambda v: v.bytes, bytecodes), randomness) - rows = assign_bytecode_circuit(k, bytecodes, randomness) try: for (idx, row) in enumerate(rows): next_row = rows[(idx + 1) % len(rows)] @@ -208,3 +212,69 @@ def test_bytecode_invalid_is_code(): row = unrolled.rows[7] invalid.rows[7] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, 1, row.value) verify(k, [invalid], randomness, False) + + +def test_last_row(): + unrolled = unroll(bytes([8, 2, 3, 8, 9, 7, 128]), randomness) + verify(k, [unrolled], randomness, True) + + # last row has length != 0 + rows = assign_bytecode_circuit(k, [unrolled], randomness) + rows[-1] = Row( + q_first=0, + q_last=1, + hash=RLC(EMPTY_HASH, FQ(randomness)).expr(), + tag=BytecodeFieldTag.Header, + index=0, + value=0, + is_code=False, + push_data_left=0, + value_rlc=0, + length=1000, + push_data_size=0, + ) + verify_rows([unrolled], rows, False) + + # last row has hash != EMPTY_HASH + NOT_EMPTY_HASH = U256( + int.from_bytes( + keccak256(bytes("why is there something instead of nothing?", "utf-8")), "big" + ) + ) + rows = assign_bytecode_circuit(k, [unrolled], randomness) + rows[-1] = Row( + q_first=0, + q_last=1, + hash=RLC(NOT_EMPTY_HASH, FQ(randomness)).expr(), + tag=BytecodeFieldTag.Header, + index=0, + value=0, + is_code=False, + push_data_left=0, + value_rlc=0, + length=0, + push_data_size=0, + ) + verify_rows([unrolled], rows, False) + + # last row is not Header + NOT_EMPTY_HASH = U256( + int.from_bytes( + keccak256(bytes("why is there something instead of nothing?", "utf-8")), "big" + ) + ) + rows = assign_bytecode_circuit(k, [unrolled], randomness) + rows[-1] = Row( + q_first=0, + q_last=1, + hash=RLC(NOT_EMPTY_HASH, FQ(randomness)).expr(), + tag=BytecodeFieldTag.Byte, + index=0, + value=0, + is_code=False, + push_data_left=0, + value_rlc=0, + length=0, + push_data_size=0, + ) + verify_rows([unrolled], rows, False)