privacy-ethereum · han0110 · Apr 1, 2022 · Mar 11, 2022 · Mar 11, 2022 · Mar 16, 2022
diff --git a/specs/opcode/39CODECOPY.md b/specs/opcode/39CODECOPY.md
@@ -0,0 +1,43 @@
+# CODECOPY opcode
+
+## Procedure
+
+The `CODECOPY` opcode pops `memory_offset`, `code_offset` and `size` from the stack.
+It then copies `size` bytes of code running in the current environment from an offset `code_offset` to the memory at the address `memory_offset`. For out-of-bound scenarios where `size > len(code) - code_offset`, EVM pads 0 to the end of the copied bytes.
+
+The gas cost of `CODECOPY` opcode consists of two parts:
+
+1. A constant gas cost: `3 gas`
+2. A dynamic gas cost: cost of memory expansion and copying (variable depending on the `size` copied to memory)
+
+## Circuit Behaviour
+
+`CODECOPY` makes use of the internal execution step `CopyCodeToMemory` and loops over these steps iteratively until there are no more bytes to be copied. The `CODECOPY` circuit itself only constrains the values popped from stack and call context/account read lookups.
+
+The gadget then transits to the internal state of `CopyCodeToMemory`.
+
+## Constraints
+
+1. opId = 0x39
+2. State Transitions:
+   - rw_counter -> rw_counter + 3 (3 stack reads)
+   - stack_pointer -> stack_pointer + 3
+   - pc -> pc + 1
+   - gas -> 3 + dynamic_cost (memory expansion and copier cost when `size > 0`)
+   - memory_size
+     - `prev_memory_size` if `size = 0`
+     - `max(prev_memory_size, (memory_offset + size + 31) / 32)` if `size > 0`
+3. Lookups:
+   - `memory_offset` is at the top of the stack
+   - `code_offset` is at the second position of the stack
+   - `size` is at the third position of the stack
+   - `code_size` from the bytecode table
+
+## Exceptions
+
+1. Stack Underflow: `1021 <= stack_pointer <= 1024`
+2. Out-of-Gas: remaining gas is not enough
+
+## Code
+
+Please refer to `src/zkevm_specs/evm/execution/codecopy.py`
diff --git a/specs/opcode/CopyCodeToMemory.md b/specs/opcode/CopyCodeToMemory.md
@@ -0,0 +1,31 @@
+# CopyCodeToMemory
+
+## Circuit Behaviour
+
+`CopyCodeToMemory` is an internal execution state and doesn't correspond to an EVM opcode. It verifies that data from bytecode table has been written to memory. This gadget can in one iteration only copy `MAX_COPY_BYTES` number of bytes, hence for lengths longer than the bound the gadget loops itself until there are no more bytes to be copied.
+
+The `CopyCodeToMemory` circuit uses the `BufferReaderGadget` to check if the access is out of bounds and needs 0 padding.
+
+The `CopyCodeToMemory` circuit looks up the bytes read from buffer against both the bytecode table and the RW table (memory-write). An additional constraint checks whether or not the copying is finished, and if not, it constrains the next execution state to continue being `CopyCodeToMemory` while also adding constraints to the next step's auxiliary data.
+
+## Constraints
+
+We define `n_bytes_read` as the number of bytes read from the bytecode table. `n_bytes_read <= MAX_COPY_BYTES`.
+
+We define `n_bytes_written` as the number of bytes written to the memory. `n_bytes_written <= MAX_COPY_BYTES`.
+
+`n_bytes_read` differs from `n_bytes_written` in out-of-bound cases where nothing is read from the bytecode table but a `0` is written to memory.
+
+1. State Transition:
+   - rw_counter: `n_bytes_written`
+2. Lookups:
+   - `n_bytes_read` lookups from bytecode table
+   - `n_bytes_written` lookups from RW table (memory-write)
+
+## Exceptions
+
+No exceptions for `CopyCodeToMemory` since it is an internal state.
+
+## Code
+
+Please refer to `src/zkevm_specs/evm/execution/copy_code_to_memory.py`.
diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py
@@ -1,13 +1,13 @@
 from typing import Sequence, Union, Tuple, Set, NamedTuple
 from collections import namedtuple
-from .util import keccak256, FQ, RLC
-from .evm import get_push_size, BytecodeTableRow
+from .util import keccak256, EMPTY_HASH, FQ, RLC
+from .evm import get_push_size, BytecodeFieldTag, BytecodeTableRow
 from .encoding import U8, U256, is_circuit_code
 
 # Row in the circuit
 Row = namedtuple(
     "Row",
-    "q_first q_last hash index byte is_code push_data_left hash_rlc hash_length byte_push_size is_final padding",
+    "q_first q_last hash tag index is_code value push_data_left hash_rlc hash_length byte_push_size is_final padding",
 )
 # Unrolled bytecode
 class UnrolledBytecode(NamedTuple):
@@ -33,21 +33,28 @@ def select(
 def check_bytecode_row(
     row: Row,
     prev_row: Row,
+    next_row: Row,
     push_table: Set[Tuple[int, int]],
     keccak_table: Set[Tuple[int, int, int]],
     r: int,
 ):
     row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in row])
     prev_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in prev_row])
+    next_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in next_row])
     if row.q_first == 0 and prev_row.is_final == 0:
         # Continue
-        # index needs to increase by 1
-        assert row.index == prev_row.index + 1
-        # is_code := push_data_left_prev == 0
-        assert row.is_code == (prev_row.push_data_left == 0)
+        if prev_row.tag == BytecodeFieldTag.Length:
+            # index starts from 0
+            assert row.index == 0
+            # is_code := 1, since this is the first byte of the bytecode
+            assert row.is_code == 1
+        else:
+            # index is 1 more than previous row's index
+            assert row.index == prev_row.index + 1
+            # is_code := push_data_left_prev == 0
+            assert row.is_code == (prev_row.push_data_left == 0)
         # hash_rlc := hash_rlc_prev * r + byte
-        assert row.hash_rlc == prev_row.hash_rlc * r + row.byte
-
+        assert row.hash_rlc == prev_row.hash_rlc * r + row.value
         # padding needs to remain the same
         assert row.padding == prev_row.padding
         # hash needs to remain the same
@@ -56,21 +63,33 @@ def check_bytecode_row(
         assert row.hash_length == prev_row.hash_length
     else:
         # Start
-        # index needs to start at 0
-        assert row.index == 0
-        # is_code needs to be 1 (first byte is always an opcode)
-        assert row.is_code == True
-        # hash_rlc needs to start at byte
-        assert row.hash_rlc == row.byte
+        # the row following an `is_final` previous row is either tagged Length
+        if row.tag == BytecodeFieldTag.Length:
+            # value matches hash length
+            assert row.value == row.hash_length
+            # if bytecode length is zero
+            if row.value == 0:
+                # bytecode hash should be EMPTY_HASH
+                assert row.hash == RLC(EMPTY_HASH, FQ(r)).expr()
+                # the next row should be a tag Length or padding
+                assert (next_row.tag == BytecodeFieldTag.Length) or (next_row.tag == 0)
+            else:
+                # the next row should be tag Byte
+                assert next_row.tag == BytecodeFieldTag.Byte
+        # or is the start of padding rows
+        else:
+            assert row.padding == 1
 
     # is_final needs to be boolean
     assert_bool(row.is_final)
     # padding needs to be boolean
     assert_bool(row.padding)
-    # push_data_left := is_code ? byte_push_size : push_data_left_prev - 1
-    assert row.push_data_left == select(
-        row.is_code, row.byte_push_size, prev_row.push_data_left - 1
-    )
+
+    if row.tag == BytecodeFieldTag.Byte:
+        # push_data_left := is_code ? byte_push_size : push_data_left_prev - 1
+        assert row.push_data_left == select(
+            row.is_code, row.byte_push_size, prev_row.push_data_left - 1
+        )
 
     # Padding
     if row.q_first == 0:
@@ -87,9 +106,10 @@ def check_bytecode_row(
         # the last row needs to be the last byte
         assert row.padding == 1 or row.is_final == 1
 
-    # Lookup how many bytes the current opcode pushes
-    # (also indirectly range checks `byte` to be in [0, 255])
-    assert (row.byte, row.byte_push_size) in push_table
+    if row.tag == BytecodeFieldTag.Byte:
+        # Lookup how many bytes the current opcode pushes
+        # (also indirectly range checks `byte` to be in [0, 255])
+        assert (row.value, row.byte_push_size) in push_table
 
     # keccak lookup when on the last byte
     if row.is_final == 1 and row.padding == 0:
@@ -107,28 +127,32 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando
         push_data_left = 0
         hash_rlc = FQ(0)
         for idx, row in enumerate(bytecode.rows):
+            # Subsequent rows represent the bytecode bytes
             # Track which byte is an opcode and which is push data
             is_code = push_data_left == 0
-            byte_push_size = get_push_size(row.byte)
-            push_data_left = byte_push_size if is_code else push_data_left - 1
-
-            # Add the byte to the accumulator
-            hash_rlc = hash_rlc * randomness + row.byte
+            byte_push_size = 0
+            if idx > 0:
+                byte_push_size = get_push_size(row.value)
+                push_data_left = byte_push_size if is_code else push_data_left - 1
+                # Add the byte to the accumulator
+                hash_rlc = hash_rlc * randomness + row.value
 
             # Set the data for this row
             rows.append(
                 Row(
                     offset == 0,
                     offset == last_row_offset,
                     row.bytecode_hash,
+                    row.field_tag,
                     row.index,
-                    row.byte,
                     row.is_code,
+                    row.value,
                     push_data_left,
                     hash_rlc,
                     len(bytecode.bytes),
                     byte_push_size,
-                    idx == len(bytecode.bytes) - 1,
+                    # Since 1 row is taken up by the Length tag
+                    idx == len(bytecode.bytes),
                     False,
                 )
             )
@@ -147,6 +171,7 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando
                 0,
                 0,
                 0,
+                0,
                 True,
                 0,
                 0,

diff --git a/src/zkevm_specs/evm/execution/__init__.py b/src/zkevm_specs/evm/execution/__init__.py
@@ -3,6 +3,7 @@
 from ..execution_state import ExecutionState
 
 from .begin_tx import *
+from .copy_code_to_memory import *
 from .end_tx import *
 from .end_block import *
 from .memory_copy import *
@@ -19,6 +20,7 @@
 from .callvalue import *
 from .calldatacopy import *
 from .calldataload import *
+from .codecopy import *
 from .gas import *
 from .iszero import *
 from .jump import *
@@ -39,13 +41,15 @@
     ExecutionState.EndTx: end_tx,
     ExecutionState.EndBlock: end_block,
     ExecutionState.CopyToMemory: copy_to_memory,
+    ExecutionState.CopyCodeToMemory: copy_code_to_memory,
     ExecutionState.ADD: add,
     ExecutionState.ORIGIN: origin,
     ExecutionState.CALLER: caller,
     ExecutionState.CALLVALUE: callvalue,
     ExecutionState.CALLDATACOPY: calldatacopy,
     ExecutionState.CALLDATALOAD: calldataload,
     ExecutionState.CALLDATASIZE: calldatasize,
+    ExecutionState.CODECOPY: codecopy,
     ExecutionState.COINBASE: coinbase,
     ExecutionState.TIMESTAMP: timestamp,
     ExecutionState.NUMBER: number,

diff --git a/src/zkevm_specs/evm/execution/calldatacopy.py b/src/zkevm_specs/evm/execution/calldatacopy.py
@@ -33,7 +33,7 @@ def calldatacopy(instruction: Instruction):
     gas_cost = instruction.memory_copier_gas_cost(length, memory_expansion_gas_cost)
 
     # When length != 0, constrain the state in the next execution state CopyToMemory
-    if not instruction.is_zero(length):
+    if instruction.is_zero(length) == FQ(0):
         assert instruction.next is not None
         instruction.constrain_equal(instruction.next.execution_state, ExecutionState.CopyToMemory)
         next_aux = instruction.next.aux_data

diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py
@@ -0,0 +1,47 @@
+from ...util import N_BYTES_MEMORY_ADDRESS, FQ
+from ..execution_state import ExecutionState
+from ..instruction import Instruction, Transition
+from ..step import CopyCodeToMemoryAuxData
+from ..table import RW, RWTableTag, CallContextFieldTag, AccountFieldTag
+
+
+def codecopy(instruction: Instruction):
+    opcode = instruction.opcode_lookup(True)
+
+    memory_offset_word, code_offset_word, size_word = (
+        instruction.stack_pop(),
+        instruction.stack_pop(),
+        instruction.stack_pop(),
+    )
+
+    memory_offset, size = instruction.memory_offset_and_length(memory_offset_word, size_word)
+    code_offset = instruction.rlc_to_fq_exact(code_offset_word, N_BYTES_MEMORY_ADDRESS)
+
+    code_size = instruction.bytecode_length(instruction.curr.code_source)
+
+    next_memory_size, memory_expansion_gas_cost = instruction.memory_expansion_dynamic_length(
+        memory_offset, size
+    )
+    gas_cost = instruction.memory_copier_gas_cost(size, memory_expansion_gas_cost)
+
+    if instruction.is_zero(size) == FQ(0):
+        assert instruction.next is not None
+        instruction.constrain_equal(
+            instruction.next.execution_state, ExecutionState.CopyCodeToMemory
+        )
+        next_aux = instruction.next.aux_data
+        assert isinstance(next_aux, CopyCodeToMemoryAuxData)
+        instruction.constrain_equal(next_aux.src_addr, code_offset)
+        instruction.constrain_equal(next_aux.dst_addr, memory_offset)
+        instruction.constrain_equal(next_aux.src_addr_end, code_size)
+        instruction.constrain_equal(next_aux.bytes_left, size)
+        instruction.constrain_equal(next_aux.code_source, instruction.curr.code_source)
+
+    instruction.step_state_transition_in_same_context(
+        opcode,
+        rw_counter=Transition.delta(instruction.rw_counter_offset),
+        program_counter=Transition.delta(1),
+        stack_pointer=Transition.delta(3),
+        memory_size=Transition.to(next_memory_size),
+        dynamic_gas_cost=gas_cost,
+    )
diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py
@@ -0,0 +1,62 @@
+import itertools
+from typing import Iterator
+
+from ...util import FQ, MAX_N_BYTES_COPY_CODE_TO_MEMORY, N_BYTES_MEMORY_SIZE, RLC
+from ..execution_state import ExecutionState
+from ..instruction import Instruction, Transition
+from ..step import CopyCodeToMemoryAuxData
+from ..table import RW
+from ..util import BufferReaderGadget
+
+
+def copy_code_to_memory(instruction: Instruction):
+    aux = instruction.curr.aux_data
+    assert isinstance(aux, CopyCodeToMemoryAuxData)
+
+    buffer_reader = BufferReaderGadget(
+        instruction, MAX_N_BYTES_COPY_CODE_TO_MEMORY, aux.src_addr, aux.src_addr_end, aux.bytes_left
+    )
+
+    for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY):
+        if buffer_reader.read_flag(idx) == 1:
+            byte = instruction.bytecode_lookup(
+                aux.code_source,
+                aux.src_addr + idx,
+            )
+            buffer_reader.constrain_byte(idx, byte)
+
+    for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY):
+        if buffer_reader.has_data(idx) == 1:
+            byte = instruction.memory_lookup(RW.Write, aux.dst_addr + idx)
+            buffer_reader.constrain_byte(idx, byte)
+
+    copied_bytes = buffer_reader.num_bytes()
+    lt, finished = instruction.compare(copied_bytes, aux.bytes_left, N_BYTES_MEMORY_SIZE)
+
+    # either copied bytes are less than the bytes left, or copying is finished
+    instruction.constrain_zero((1 - lt) * (1 - finished))
+
+    if finished == 0:
+        assert instruction.next is not None
+        instruction.constrain_equal(
+            instruction.next.execution_state, ExecutionState.CopyCodeToMemory
+        )
+        next_aux = instruction.next.aux_data
+        assert next_aux is not None and isinstance(next_aux, CopyCodeToMemoryAuxData)
+        instruction.constrain_equal(next_aux.src_addr, aux.src_addr + copied_bytes)
+        instruction.constrain_equal(next_aux.dst_addr, aux.dst_addr + copied_bytes)
+        instruction.constrain_equal(next_aux.bytes_left + copied_bytes, aux.bytes_left)
+        instruction.constrain_equal(next_aux.src_addr_end, aux.src_addr_end)
+        instruction.constrain_equal(next_aux.code_source, aux.code_source)
+
+    instruction.constrain_step_state_transition(
+        rw_counter=Transition.delta(instruction.rw_counter_offset),
+        call_id=Transition.same(),
+        is_root=Transition.same(),
+        is_create=Transition.same(),
+        code_source=Transition.same(),
+        program_counter=Transition.same(),
+        stack_pointer=Transition.same(),
+        memory_size=Transition.same(),
+        state_write_counter=Transition.same(),
+    )