From 4632d186142eef7e73525dfb13483b668e9de0fe Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Fri, 11 Mar 2022 13:46:19 +0800 Subject: [PATCH 01/16] feat: copy code to memory --- src/zkevm_specs/evm/execution/__init__.py | 2 + src/zkevm_specs/evm/execution/codecopy.py | 0 .../evm/execution/copy_code_to_memory.py | 69 +++++++ src/zkevm_specs/evm/execution_state.py | 1 + src/zkevm_specs/evm/step.py | 23 +++ tests/evm/test_codecopy.py | 188 ++++++++++++++++++ 6 files changed, 283 insertions(+) create mode 100644 src/zkevm_specs/evm/execution/codecopy.py create mode 100644 src/zkevm_specs/evm/execution/copy_code_to_memory.py create mode 100644 tests/evm/test_codecopy.py diff --git a/src/zkevm_specs/evm/execution/__init__.py b/src/zkevm_specs/evm/execution/__init__.py index a9c9fed36..b639f1171 100644 --- a/src/zkevm_specs/evm/execution/__init__.py +++ b/src/zkevm_specs/evm/execution/__init__.py @@ -3,6 +3,7 @@ from ..execution_state import ExecutionState from .begin_tx import * +from .copy_code_to_memory import * from .end_tx import * from .end_block import * from .memory_copy import * @@ -39,6 +40,7 @@ ExecutionState.EndTx: end_tx, ExecutionState.EndBlock: end_block, ExecutionState.CopyToMemory: copy_to_memory, + ExecutionState.CopyCodeToMemory: copy_code_to_memory, ExecutionState.ADD: add, ExecutionState.ORIGIN: origin, ExecutionState.CALLER: caller, diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py new file mode 100644 index 000000000..0ba0cda41 --- /dev/null +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -0,0 +1,69 @@ +import itertools +from typing import Iterator + +from ...util import N_BYTES_MEMORY_SIZE, RLC +from ..execution_state import ExecutionState +from ..instruction import Instruction, Transition +from ..step import CopyCodeToMemoryAuxData +from ..table import RW +from ..util import BufferReaderGadget + + +MAX_COPY_BYTES = 54 + + +def copy_code_to_memory(instruction: Instruction): + aux = instruction.curr.aux_data + assert isinstance(aux, CopyCodeToMemoryAuxData) + + buffer_reader = BufferReaderGadget( + instruction, MAX_COPY_BYTES, aux.src_addr, aux.src_addr_end, aux.bytes_left + ) + + is_codes = [ + c[3] + for c in itertools.islice( + aux.code.table_assignments(instruction.randomness), + aux.src_addr.n, + aux.src_addr.n + MAX_COPY_BYTES, + ) + ] + for (idx, is_code) in enumerate(is_codes): + if buffer_reader.read_flag(idx): + buffer_reader.constrain_byte( + idx, + instruction.bytecode_lookup( + RLC(aux.code.hash(), instruction.randomness), + aux.src_addr + idx, + is_code, + ), + ) + + for idx in range(min(aux.bytes_left.n, MAX_COPY_BYTES)): + if buffer_reader.has_data(idx): + buffer_reader.constrain_byte( + idx, + instruction.memory_lookup(RW.Write, aux.dst_addr + idx), + ) + + copied_bytes = buffer_reader.num_bytes() + lt, finished = instruction.compare(copied_bytes, aux.bytes_left, N_BYTES_MEMORY_SIZE) + + # either copied bytes are less than the bytes left, or copying is finished + instruction.constrain_zero((1 - lt) * (1 - finished)) + + if finished == 0: + instruction.constrain_equal( + instruction.next.execution_state, ExecutionState.CopyCodeToMemory + ) + next_aux = instruction.next.aux_data + assert next_aux is not None and isinstance(next_aux, CopyCodeToMemoryAuxData) + instruction.constrain_equal(next_aux.src_addr, aux.src_addr + copied_bytes) + instruction.constrain_equal(next_aux.dst_addr, aux.dst_addr + copied_bytes) + instruction.constrain_equal(next_aux.bytes_left + copied_bytes, aux.bytes_left) + instruction.constrain_equal(next_aux.src_addr_end, aux.src_addr_end) + instruction.constrain_equal(next_aux.code.hash(), aux.code.hash()) + + instruction.constrain_step_state_transition( + rw_counter=Transition.delta(instruction.rw_counter_offset), + ) diff --git a/src/zkevm_specs/evm/execution_state.py b/src/zkevm_specs/evm/execution_state.py index 3ba0e4f14..4b4930c03 100644 --- a/src/zkevm_specs/evm/execution_state.py +++ b/src/zkevm_specs/evm/execution_state.py @@ -21,6 +21,7 @@ class ExecutionState(IntEnum): EndBlock = auto() CopyToMemory = auto() CopyToLog = auto() + CopyCodeToMemory = auto() # Opcode's successful cases STOP = auto() diff --git a/src/zkevm_specs/evm/step.py b/src/zkevm_specs/evm/step.py index c524fed71..d34646374 100644 --- a/src/zkevm_specs/evm/step.py +++ b/src/zkevm_specs/evm/step.py @@ -1,5 +1,6 @@ from typing import Any from .execution_state import ExecutionState +from .typing import Bytecode from ..util import FQ, RLC @@ -120,3 +121,25 @@ def __init__( self.bytes_left = FQ(bytes_left) self.src_addr_end = FQ(src_addr_end) self.is_persistent = FQ(is_persistent) + + +class CopyCodeToMemoryAuxData: + src_addr: FQ + dst_addr: FQ + bytes_left: FQ + src_addr_end: FQ + code: Bytecode + + def __init__( + self, + src_addr: int, + dst_addr: int, + bytes_left: int, + src_addr_end: int, + code: Bytecode, + ): + self.src_addr = FQ(src_addr) + self.dst_addr = FQ(dst_addr) + self.bytes_left = FQ(bytes_left) + self.src_addr_end = FQ(src_addr_end) + self.code = code diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py new file mode 100644 index 000000000..4e3395831 --- /dev/null +++ b/tests/evm/test_codecopy.py @@ -0,0 +1,188 @@ +from itertools import chain +import pytest +from typing import Mapping, Sequence, Tuple + +from zkevm_specs.evm import ( + Bytecode, + CopyCodeToMemoryAuxData, + ExecutionState, + RW, + RWTableTag, + StepState, + Tables, + verify_steps, +) +from zkevm_specs.evm.execution.copy_code_to_memory import MAX_COPY_BYTES +from zkevm_specs.util import rand_fp, RLC, U64 + + +CALL_ID = 1 +TESTING_DATA = ( + # single step + (0x00, 0x00, 54), + # multi step + (0x00, 0x40, 123), + # out of bounds + (0x10, 0x20, 200), +) + + +def make_copy_code_step( + code: Bytecode, + code_source: RLC, + buffer_map: Mapping[int, int], + src_addr: int, + dst_addr: int, + src_addr_end: int, + bytes_left: int, + program_counter: int, + stack_pointer: int, + memory_size: int, + rw_counter: int, +) -> Tuple[StepState, Sequence[RW]]: + aux_data = CopyCodeToMemoryAuxData( + src_addr=src_addr, + dst_addr=dst_addr, + src_addr_end=src_addr_end, + bytes_left=bytes_left, + code=code, + ) + step = StepState( + execution_state=ExecutionState.CopyCodeToMemory, + rw_counter=rw_counter, + call_id=CALL_ID, + is_root=False, + program_counter=program_counter, + stack_pointer=stack_pointer, + gas_left=0, + memory_size=memory_size, + code_source=code_source, + aux_data=aux_data, + ) + rws = [] + num_bytes = min(MAX_COPY_BYTES, bytes_left) + for i in range(num_bytes): + byte = buffer_map[src_addr + i] if src_addr + i < src_addr_end else 0 + rws.append( + ( + rw_counter, + RW.Write, + RWTableTag.Memory, + CALL_ID, + dst_addr + i, + 0, + byte, + 0, + 0, + 0, + ) + ) + rw_counter += 1 + return step, rws + + +def make_copy_code_steps( + code: Bytecode, + code_source: RLC, + src_addr: int, + dst_addr: int, + length: int, + program_counter: int, + stack_pointer: int, + memory_size: int, + rw_counter: int, +) -> Tuple[Sequence[StepState], Sequence[RW]]: + buffer_map = dict(zip(range(src_addr, len(code.code)), code.code)) + steps = [] + rws = [] + bytes_left = length + while bytes_left > 0: + curr_rw_counter = rws[-1][0] + 1 if rws else rw_counter + new_step, new_rws = make_copy_code_step( + code, + code_source, + buffer_map, + src_addr, + dst_addr, + len(code.code), + bytes_left, + program_counter, + stack_pointer, + memory_size, + curr_rw_counter, + ) + steps.append(new_step) + rws.extend(new_rws) + src_addr += MAX_COPY_BYTES + dst_addr += MAX_COPY_BYTES + bytes_left -= MAX_COPY_BYTES + return steps, rws + + +def to_word_size(addr: int) -> int: + return (addr + 31) // 32 + + +@pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) +def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): + randomness = rand_fp() + + code = ( + Bytecode() + .push32(0x123) + .pop() + .push32(0x213) + .pop() + .push32(0x321) + .pop() + .push32(0x12349AB) + .pop() + .push32(0x1928835) + .pop() + ) + print(len(code.code)) + + dummy_code = Bytecode().stop() + code_source = RLC(dummy_code.hash(), randomness) + + next_memory_word_size = to_word_size(dst_addr + length) + steps, rws = make_copy_code_steps( + code, + code_source, + src_addr, + dst_addr, + length, + program_counter=100, + memory_size=next_memory_word_size, + stack_pointer=1024, + rw_counter=1, + ) + steps.append( + StepState( + execution_state=ExecutionState.STOP, + rw_counter=rws[-1][0] + 1, + call_id=CALL_ID, + is_root=False, + is_create=False, + code_source=code_source, + program_counter=100, + stack_pointer=1024, + memory_size=next_memory_word_size, + gas_left=0, + ) + ) + + tables = Tables( + block_table=set(), + tx_table=set(), + bytecode_table=set(code.table_assignments(randomness)).union( + dummy_code.table_assignments(randomness) + ), + rw_table=set(rws), + ) + + verify_steps( + randomness=randomness, + tables=tables, + steps=steps, + ) From a15ba0c4c5f9cf708f833614dbc027d282cfc556 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Fri, 11 Mar 2022 15:55:24 +0800 Subject: [PATCH 02/16] doc(CopyCodeToMemory): spec for copy code to memory gadget --- specs/opcode/CopyCodeToMemory.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 specs/opcode/CopyCodeToMemory.md diff --git a/specs/opcode/CopyCodeToMemory.md b/specs/opcode/CopyCodeToMemory.md new file mode 100644 index 000000000..c6b67abde --- /dev/null +++ b/specs/opcode/CopyCodeToMemory.md @@ -0,0 +1,31 @@ +# CopyCodeToMemory + +## Circuit Behaviour + +`CopyCodeToMemory` is an internal execution state and doesn't correspond to an EVM opcode. It verifies that data from bytecode table has been written to memory. This gadget can in one iteration only copy `MAX_COPY_BYTES` number of bytes, hence for lengths longer than the bound the gadget loops itself until there are no more bytes to be copied. + +The `CopyCodeToMemory` circuit uses the `BufferReaderGadget` to check if the access is out of bounds and needs 0 padding. + +The `CopyCodeToMemory` circuit looks up the bytes read from buffer against both the bytecode table and the RW table (memory-write). An additional constraint checks whether or not the copying is finished, and if not, it constrains the next execution state to continue being `CopyCodeToMemory` while also adding constraints to the next step's auxiliary data. + +## Constraints + +We define `n_bytes_read` as the number of bytes read from the bytecode table. `n_bytes_read <= MAX_COPY_BYTES`. + +We define `n_bytes_written` as the number of bytes written to the memory. `n_bytes_written <= MAX_COPY_BYTES`. + +`n_bytes_read` differs from `n_bytes_written` in out-of-bound cases where nothing is read from the bytecode table but a `0` is written to memory. + +1. State Transition: + - rw_counter: `n_bytes_written` +2. Lookups: + - `n_bytes_read` lookups from bytecode table + - `n_bytes_written` lookups from RW table (memory-write) + +## Exceptions + +No exceptions for `CopyCodeToMemory` since it is an internal state. + +## Code + +Please refer to `src/zkevm_specs/evm/execution/copy_code_to_memory.py`. From e5a468bb06f5458bd005b5551458d56cdd6d60c1 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Wed, 16 Mar 2022 11:02:47 +0800 Subject: [PATCH 03/16] feat: codecopy test --- specs/opcode/39CODECOPY.md | 0 src/zkevm_specs/evm/execution/__init__.py | 2 + src/zkevm_specs/evm/execution/codecopy.py | 48 ++++++ src/zkevm_specs/evm/table.py | 1 + tests/evm/test_codecopy.py | 185 +++++++++++++++++++++- 5 files changed, 230 insertions(+), 6 deletions(-) create mode 100644 specs/opcode/39CODECOPY.md diff --git a/specs/opcode/39CODECOPY.md b/specs/opcode/39CODECOPY.md new file mode 100644 index 000000000..e69de29bb diff --git a/src/zkevm_specs/evm/execution/__init__.py b/src/zkevm_specs/evm/execution/__init__.py index b639f1171..699c949fc 100644 --- a/src/zkevm_specs/evm/execution/__init__.py +++ b/src/zkevm_specs/evm/execution/__init__.py @@ -20,6 +20,7 @@ from .callvalue import * from .calldatacopy import * from .calldataload import * +from .codecopy import * from .gas import * from .iszero import * from .jump import * @@ -48,6 +49,7 @@ ExecutionState.CALLDATACOPY: calldatacopy, ExecutionState.CALLDATALOAD: calldataload, ExecutionState.CALLDATASIZE: calldatasize, + ExecutionState.CODECOPY: codecopy, ExecutionState.COINBASE: coinbase, ExecutionState.TIMESTAMP: timestamp, ExecutionState.NUMBER: number, diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index e69de29bb..816eeecde 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -0,0 +1,48 @@ +from ...util import N_BYTES_MEMORY_ADDRESS, FQ +from ..execution_state import ExecutionState +from ..instruction import Instruction, Transition +from ..step import CopyCodeToMemoryAuxData +from ..table import RW, RWTableTag, CallContextFieldTag, AccountFieldTag + + +def codecopy(instruction: Instruction): + opcode = instruction.opcode_lookup(True) + + memory_offset, code_offset, size = ( + instruction.stack_pop(), + instruction.stack_pop(), + instruction.stack_pop(), + ) + + memory_offset, size = instruction.memory_offset_and_length(memory_offset, size) + code_offset = instruction.rlc_to_fq_exact(code_offset, N_BYTES_MEMORY_ADDRESS) + + account = instruction.call_context_lookup(CallContextFieldTag.CalleeAddress) + code_size = instruction.account_read(account, AccountFieldTag.CodeSize) + code_hash = instruction.account_read(account, AccountFieldTag.CodeHash) + + next_memory_size, memory_expansion_gas_cost = instruction.memory_expansion_dynamic_length( + memory_offset, size + ) + gas_cost = instruction.memory_copier_gas_cost(size, memory_expansion_gas_cost) + + if not instruction.is_zero(size): + instruction.constrain_equal( + instruction.next.execution_state, ExecutionState.CopyCodeToMemory + ) + next_aux = instruction.next.aux_data + assert isinstance(next_aux, CopyCodeToMemoryAuxData) + instruction.constrain_equal(next_aux.src_addr, code_offset) + instruction.constrain_equal(next_aux.dst_addr, memory_offset) + instruction.constrain_equal(next_aux.src_addr_end, code_size) + instruction.constrain_equal(next_aux.bytes_left, size) + instruction.constrain_equal(FQ(next_aux.code.hash()), code_hash) + + instruction.step_state_transition_in_same_context( + opcode, + rw_counter=Transition.delta(instruction.rw_counter_offset), + program_counter=Transition.delta(1), + stack_pointer=Transition.delta(3), + memory_size=Transition.to(next_memory_size), + dynamic_gas_cost=gas_cost, + ) diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index 22cd620bb..7ca71b8d2 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -171,6 +171,7 @@ class AccountFieldTag(IntEnum): Nonce = auto() Balance = auto() CodeHash = auto() + CodeSize = auto() class CallContextFieldTag(IntEnum): diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index 4e3395831..29c044183 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -3,9 +3,12 @@ from typing import Mapping, Sequence, Tuple from zkevm_specs.evm import ( + AccountFieldTag, Bytecode, + CallContextFieldTag, CopyCodeToMemoryAuxData, ExecutionState, + Opcode, RW, RWTableTag, StepState, @@ -13,7 +16,15 @@ verify_steps, ) from zkevm_specs.evm.execution.copy_code_to_memory import MAX_COPY_BYTES -from zkevm_specs.util import rand_fp, RLC, U64 +from zkevm_specs.util import ( + GAS_COST_COPY, + MEMORY_EXPANSION_LINEAR_COEFF, + MEMORY_EXPANSION_QUAD_DENOMINATOR, + rand_address, + rand_fp, + RLC, + U64, +) CALL_ID = 1 @@ -27,6 +38,24 @@ ) +def to_word_size(addr: int) -> int: + return (addr + 31) // 32 + + +def memory_gas_cost(memory_word_size: int) -> int: + quad_cost = memory_word_size * memory_word_size // MEMORY_EXPANSION_QUAD_DENOMINATOR + linear_cost = memory_word_size * MEMORY_EXPANSION_LINEAR_COEFF + return quad_cost + linear_cost + + +def memory_copier_gas_cost( + curr_memory_word_size: int, next_memory_word_size: int, length: int +) -> int: + curr_memory_cost = memory_gas_cost(curr_memory_word_size) + next_memory_cost = memory_gas_cost(next_memory_word_size) + return to_word_size(length) * GAS_COST_COPY + next_memory_cost - curr_memory_cost + + def make_copy_code_step( code: Bytecode, code_source: RLC, @@ -51,7 +80,7 @@ def make_copy_code_step( execution_state=ExecutionState.CopyCodeToMemory, rw_counter=rw_counter, call_id=CALL_ID, - is_root=False, + is_root=True, program_counter=program_counter, stack_pointer=stack_pointer, gas_left=0, @@ -119,8 +148,153 @@ def make_copy_code_steps( return steps, rws -def to_word_size(addr: int) -> int: - return (addr + 31) // 32 +@pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) +def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): + randomness = rand_fp() + callee_addr = rand_address() + + length_rlc = RLC(length, randomness) + src_addr_rlc = RLC(src_addr, randomness) + dst_addr_rlc = RLC(dst_addr, randomness) + + code = Bytecode().push32(length_rlc).push32(src_addr_rlc).push32(dst_addr_rlc).codecopy().stop() + + code_source = RLC(code.hash(), randomness) + next_memory_word_size = to_word_size(dst_addr + length) + + gas_cost_push32 = Opcode.PUSH32.constant_gas_cost() + gas_cost_codecopy = Opcode.CODECOPY.constant_gas_cost() + memory_copier_gas_cost( + 0, next_memory_word_size, length + ) + total_gas_cost = gas_cost_codecopy + (3 * gas_cost_push32) + + code_hash = code.hash() + rws = [ + (1, RW.Write, RWTableTag.Stack, CALL_ID, 1023, 0, length_rlc, 0, 0, 0), + (2, RW.Write, RWTableTag.Stack, CALL_ID, 1022, 0, src_addr_rlc, 0, 0, 0), + (3, RW.Write, RWTableTag.Stack, CALL_ID, 1021, 0, dst_addr_rlc, 0, 0, 0), + (4, RW.Read, RWTableTag.Stack, CALL_ID, 1021, 0, dst_addr_rlc, 0, 0, 0), + (5, RW.Read, RWTableTag.Stack, CALL_ID, 1022, 0, src_addr_rlc, 0, 0, 0), + (6, RW.Read, RWTableTag.Stack, CALL_ID, 1023, 0, length_rlc, 0, 0, 0), + ( + 7, + RW.Read, + RWTableTag.CallContext, + CALL_ID, + CallContextFieldTag.CalleeAddress, + 0, + callee_addr, + 0, + 0, + 0, + ), + ( + 8, + RW.Read, + RWTableTag.Account, + callee_addr, + AccountFieldTag.CodeSize, + 0, + len(code.code), + 0, + 0, + 0, + ), + ( + 9, + RW.Read, + RWTableTag.Account, + callee_addr, + AccountFieldTag.CodeHash, + 0, + code_hash, + 0, + 0, + 0, + ), + ] + steps = [ + StepState( + execution_state=ExecutionState.PUSH, + rw_counter=1, + call_id=1, + is_root=True, + code_source=code_source, + program_counter=0, + stack_pointer=1024, + gas_left=total_gas_cost, + ), + StepState( + execution_state=ExecutionState.PUSH, + rw_counter=2, + call_id=1, + is_root=True, + code_source=code_source, + program_counter=33, + stack_pointer=1023, + gas_left=total_gas_cost - gas_cost_push32, + ), + StepState( + execution_state=ExecutionState.PUSH, + rw_counter=3, + call_id=1, + is_root=True, + code_source=code_source, + program_counter=66, + stack_pointer=1022, + gas_left=total_gas_cost - 2 * gas_cost_push32, + ), + StepState( + execution_state=ExecutionState.CODECOPY, + rw_counter=4, + call_id=1, + is_root=True, + code_source=code_source, + program_counter=99, + stack_pointer=1021, + gas_left=gas_cost_codecopy, + ), + ] + + steps_internal, rws_internal = make_copy_code_steps( + code, + code_source, + src_addr, + dst_addr, + length, + program_counter=100, + memory_size=next_memory_word_size, + stack_pointer=1024, + rw_counter=10, + ) + steps.extend(steps_internal) + rws.extend(rws_internal) + + steps.append( + StepState( + execution_state=ExecutionState.STOP, + rw_counter=rws_internal[-1][0] + 1, + call_id=1, + is_root=True, + code_source=code_source, + program_counter=33, + stack_pointer=1024, + gas_left=0, + ) + ) + + tables = Tables( + block_table=set(), + tx_table=set(), + bytecode_table=set(code.table_assignments(randomness)), + rw_table=set(rws), + ) + + verify_steps( + randomness=randomness, + tables=tables, + steps=steps, + ) @pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) @@ -140,7 +314,6 @@ def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): .push32(0x1928835) .pop() ) - print(len(code.code)) dummy_code = Bytecode().stop() code_source = RLC(dummy_code.hash(), randomness) @@ -162,7 +335,7 @@ def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): execution_state=ExecutionState.STOP, rw_counter=rws[-1][0] + 1, call_id=CALL_ID, - is_root=False, + is_root=True, is_create=False, code_source=code_source, program_counter=100, From 332d8b7b2af6c3cbba111a0e96e2930d2b6b8edc Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Thu, 17 Mar 2022 12:14:11 +0800 Subject: [PATCH 04/16] specs for codecopy --- specs/opcode/39CODECOPY.md | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/specs/opcode/39CODECOPY.md b/specs/opcode/39CODECOPY.md index e69de29bb..a7634394b 100644 --- a/specs/opcode/39CODECOPY.md +++ b/specs/opcode/39CODECOPY.md @@ -0,0 +1,45 @@ +# CODECOPY opcode + +## Procedure + +The `CODECOPY` opcode pops `memory_offset`, `code_offset` and `size` from the stack. +It then copies `size` bytes of code running in the current environment from an offset `code_offset` to the memory at the address `memory_offset`. For out-of-bound scenarios where `size > len(code) - code_offset`, EVM pads 0 to the end of the copied bytes. + +The gas cost of `CODECOPY` opcode consists of two parts: + +1. A constant gas cost: `3 gas` +2. A dynamic gas cost: cost of memory expansion and copying (variable depending on the `size` copied to memory) + +## Circuit Behaviour + +`CODECOPY` makes use of the internal execution step `CopyCodeToMemory` and loops over these steps iteratively until there are no more bytes to be copied. The `CODECOPY` circuit itself only constrains the values popped from stack and call context/account read lookups. + +The gadget then transits to the internal state of `CopyCodeToMemory`. + +## Constraints + +1. opId = 0x39 +2. State Transitions: + - rw_counter -> rw_counter + 6 (3 stack reads, 1 call context read, 2 account reads) + - stack_pointer -> stack_pointer + 3 + - pc -> pc + 1 + - gas -> 3 + dynamic_cost (memory expansion and copier cost when `size > 0`) + - memory_size + - `prev_memory_size` if `size = 0` + - `max(prev_memory_size, (memory_offset + size + 31) / 32)` if `size > 0` +3. Lookups: + - `memory_offset` is at the top of the stack + - `code_offset` is at the second position of the stack + - `size` is at the third position of the stack + - `callee_address` is in the call context for the current call + - `code_size` is in the account context for the `callee_address` + - `code_hash` is in the account context for the `callee_address` + +## Exceptions + +1. Stack Underflow: `1021 <= stack_pointer <= 1024` +2. Out-of-Gas: remaining gas is not enough + +## Code + +Please refer to `src/zkevm_specs/evm/execution/codecode.py` From 71e427dcd703894e2fb6f84c9912e5c42e78c18b Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Thu, 17 Mar 2022 16:54:57 +0800 Subject: [PATCH 05/16] fix: update specs impl as per upstream updates --- src/zkevm_specs/evm/execution/codecopy.py | 13 +- .../evm/execution/copy_code_to_memory.py | 41 ++---- tests/evm/test_calldatacopy.py | 2 +- tests/evm/test_codecopy.py | 137 ++++++------------ 4 files changed, 71 insertions(+), 122 deletions(-) diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index 816eeecde..151cb71b1 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -8,14 +8,14 @@ def codecopy(instruction: Instruction): opcode = instruction.opcode_lookup(True) - memory_offset, code_offset, size = ( + memory_offset_word, code_offset_word, size_word = ( instruction.stack_pop(), instruction.stack_pop(), instruction.stack_pop(), ) - memory_offset, size = instruction.memory_offset_and_length(memory_offset, size) - code_offset = instruction.rlc_to_fq_exact(code_offset, N_BYTES_MEMORY_ADDRESS) + memory_offset, size = instruction.memory_offset_and_length(memory_offset_word, size_word) + code_offset = instruction.rlc_to_fq_exact(code_offset_word, N_BYTES_MEMORY_ADDRESS) account = instruction.call_context_lookup(CallContextFieldTag.CalleeAddress) code_size = instruction.account_read(account, AccountFieldTag.CodeSize) @@ -27,6 +27,7 @@ def codecopy(instruction: Instruction): gas_cost = instruction.memory_copier_gas_cost(size, memory_expansion_gas_cost) if not instruction.is_zero(size): + assert instruction.next is not None instruction.constrain_equal( instruction.next.execution_state, ExecutionState.CopyCodeToMemory ) @@ -34,9 +35,11 @@ def codecopy(instruction: Instruction): assert isinstance(next_aux, CopyCodeToMemoryAuxData) instruction.constrain_equal(next_aux.src_addr, code_offset) instruction.constrain_equal(next_aux.dst_addr, memory_offset) - instruction.constrain_equal(next_aux.src_addr_end, code_size) + instruction.constrain_equal(next_aux.src_addr_end, instruction.rlc_to_fq_exact(code_size)) instruction.constrain_equal(next_aux.bytes_left, size) - instruction.constrain_equal(FQ(next_aux.code.hash()), code_hash) + instruction.constrain_equal( + FQ(next_aux.code.hash()), instruction.rlc_to_fq_exact(code_hash) + ) instruction.step_state_transition_in_same_context( opcode, diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py index 0ba0cda41..b8c03a19f 100644 --- a/src/zkevm_specs/evm/execution/copy_code_to_memory.py +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -1,7 +1,7 @@ import itertools from typing import Iterator -from ...util import N_BYTES_MEMORY_SIZE, RLC +from ...util import FQ, N_BYTES_MEMORY_SIZE, RLC from ..execution_state import ExecutionState from ..instruction import Instruction, Transition from ..step import CopyCodeToMemoryAuxData @@ -20,31 +20,21 @@ def copy_code_to_memory(instruction: Instruction): instruction, MAX_COPY_BYTES, aux.src_addr, aux.src_addr_end, aux.bytes_left ) - is_codes = [ - c[3] - for c in itertools.islice( - aux.code.table_assignments(instruction.randomness), - aux.src_addr.n, - aux.src_addr.n + MAX_COPY_BYTES, - ) - ] - for (idx, is_code) in enumerate(is_codes): - if buffer_reader.read_flag(idx): - buffer_reader.constrain_byte( - idx, - instruction.bytecode_lookup( - RLC(aux.code.hash(), instruction.randomness), - aux.src_addr + idx, - is_code, - ), + is_codes = [c.is_code.expr() for c in aux.code.table_assignments(instruction.randomness)] + for idx in range(MAX_COPY_BYTES): + if buffer_reader.read_flag(idx) == 1: + is_code = True if is_codes[aux.src_addr.n + idx] == FQ(1) else False + byte = instruction.bytecode_lookup( + RLC(aux.code.hash(), instruction.randomness), + aux.src_addr + idx, + is_code, ) + buffer_reader.constrain_byte(idx, byte) - for idx in range(min(aux.bytes_left.n, MAX_COPY_BYTES)): - if buffer_reader.has_data(idx): - buffer_reader.constrain_byte( - idx, - instruction.memory_lookup(RW.Write, aux.dst_addr + idx), - ) + for idx in range(MAX_COPY_BYTES): + if buffer_reader.has_data(idx) == 1: + byte = instruction.memory_lookup(RW.Write, aux.dst_addr + idx) + buffer_reader.constrain_byte(idx, byte) copied_bytes = buffer_reader.num_bytes() lt, finished = instruction.compare(copied_bytes, aux.bytes_left, N_BYTES_MEMORY_SIZE) @@ -53,6 +43,7 @@ def copy_code_to_memory(instruction: Instruction): instruction.constrain_zero((1 - lt) * (1 - finished)) if finished == 0: + assert instruction.next is not None instruction.constrain_equal( instruction.next.execution_state, ExecutionState.CopyCodeToMemory ) @@ -62,7 +53,7 @@ def copy_code_to_memory(instruction: Instruction): instruction.constrain_equal(next_aux.dst_addr, aux.dst_addr + copied_bytes) instruction.constrain_equal(next_aux.bytes_left + copied_bytes, aux.bytes_left) instruction.constrain_equal(next_aux.src_addr_end, aux.src_addr_end) - instruction.constrain_equal(next_aux.code.hash(), aux.code.hash()) + instruction.constrain_equal(FQ(next_aux.code.hash()), FQ(aux.code.hash())) instruction.constrain_step_state_transition( rw_counter=Transition.delta(instruction.rw_counter_offset), diff --git a/tests/evm/test_calldatacopy.py b/tests/evm/test_calldatacopy.py index 471143ebf..705a18973 100644 --- a/tests/evm/test_calldatacopy.py +++ b/tests/evm/test_calldatacopy.py @@ -63,7 +63,7 @@ def make_copy_step( memory_size: int, gas_left: int, code_source: RLC, -) -> Tuple[StepState, Sequence[RW]]: +) -> StepState: aux_data = CopyToMemoryAuxData( src_addr=src_addr, dst_addr=dst_addr, diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index 29c044183..ec70fc285 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -10,6 +10,7 @@ ExecutionState, Opcode, RW, + RWDictionary, RWTableTag, StepState, Tables, @@ -21,7 +22,7 @@ MEMORY_EXPANSION_LINEAR_COEFF, MEMORY_EXPANSION_QUAD_DENOMINATOR, rand_address, - rand_fp, + rand_fq, RLC, U64, ) @@ -64,11 +65,11 @@ def make_copy_code_step( dst_addr: int, src_addr_end: int, bytes_left: int, + rw_dictionary: RWDictionary, program_counter: int, stack_pointer: int, memory_size: int, - rw_counter: int, -) -> Tuple[StepState, Sequence[RW]]: +) -> StepState: aux_data = CopyCodeToMemoryAuxData( src_addr=src_addr, dst_addr=dst_addr, @@ -78,7 +79,7 @@ def make_copy_code_step( ) step = StepState( execution_state=ExecutionState.CopyCodeToMemory, - rw_counter=rw_counter, + rw_counter=rw_dictionary.rw_counter, call_id=CALL_ID, is_root=True, program_counter=program_counter, @@ -88,26 +89,12 @@ def make_copy_code_step( code_source=code_source, aux_data=aux_data, ) - rws = [] + num_bytes = min(MAX_COPY_BYTES, bytes_left) for i in range(num_bytes): byte = buffer_map[src_addr + i] if src_addr + i < src_addr_end else 0 - rws.append( - ( - rw_counter, - RW.Write, - RWTableTag.Memory, - CALL_ID, - dst_addr + i, - 0, - byte, - 0, - 0, - 0, - ) - ) - rw_counter += 1 - return step, rws + rw_dictionary.memory_write(CALL_ID, dst_addr + i, byte) + return step def make_copy_code_steps( @@ -116,18 +103,16 @@ def make_copy_code_steps( src_addr: int, dst_addr: int, length: int, + rw_dictionary: RWDictionary, program_counter: int, stack_pointer: int, memory_size: int, - rw_counter: int, -) -> Tuple[Sequence[StepState], Sequence[RW]]: +) -> Sequence[StepState]: buffer_map = dict(zip(range(src_addr, len(code.code)), code.code)) steps = [] - rws = [] bytes_left = length while bytes_left > 0: - curr_rw_counter = rws[-1][0] + 1 if rws else rw_counter - new_step, new_rws = make_copy_code_step( + new_step = make_copy_code_step( code, code_source, buffer_map, @@ -135,22 +120,21 @@ def make_copy_code_steps( dst_addr, len(code.code), bytes_left, + rw_dictionary, program_counter, stack_pointer, memory_size, - curr_rw_counter, ) steps.append(new_step) - rws.extend(new_rws) src_addr += MAX_COPY_BYTES dst_addr += MAX_COPY_BYTES bytes_left -= MAX_COPY_BYTES - return steps, rws + return steps @pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): - randomness = rand_fp() + randomness = rand_fq() callee_addr = rand_address() length_rlc = RLC(length, randomness) @@ -169,55 +153,25 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): total_gas_cost = gas_cost_codecopy + (3 * gas_cost_push32) code_hash = code.hash() - rws = [ - (1, RW.Write, RWTableTag.Stack, CALL_ID, 1023, 0, length_rlc, 0, 0, 0), - (2, RW.Write, RWTableTag.Stack, CALL_ID, 1022, 0, src_addr_rlc, 0, 0, 0), - (3, RW.Write, RWTableTag.Stack, CALL_ID, 1021, 0, dst_addr_rlc, 0, 0, 0), - (4, RW.Read, RWTableTag.Stack, CALL_ID, 1021, 0, dst_addr_rlc, 0, 0, 0), - (5, RW.Read, RWTableTag.Stack, CALL_ID, 1022, 0, src_addr_rlc, 0, 0, 0), - (6, RW.Read, RWTableTag.Stack, CALL_ID, 1023, 0, length_rlc, 0, 0, 0), - ( - 7, - RW.Read, - RWTableTag.CallContext, - CALL_ID, - CallContextFieldTag.CalleeAddress, - 0, - callee_addr, - 0, - 0, - 0, - ), - ( - 8, - RW.Read, - RWTableTag.Account, - callee_addr, - AccountFieldTag.CodeSize, - 0, - len(code.code), - 0, - 0, - 0, - ), - ( - 9, - RW.Read, - RWTableTag.Account, - callee_addr, - AccountFieldTag.CodeHash, - 0, - code_hash, - 0, - 0, - 0, - ), - ] + + rw_dictionary = ( + RWDictionary(1) + .stack_write(CALL_ID, 1023, length_rlc) + .stack_write(CALL_ID, 1022, src_addr_rlc) + .stack_write(CALL_ID, 1021, dst_addr_rlc) + .stack_read(CALL_ID, 1021, dst_addr_rlc) + .stack_read(CALL_ID, 1022, src_addr_rlc) + .stack_read(CALL_ID, 1023, length_rlc) + .call_context_read(CALL_ID, CallContextFieldTag.CalleeAddress, callee_addr) + .account_read(callee_addr, AccountFieldTag.CodeSize, RLC(len(code.code), randomness)) + .account_read(callee_addr, AccountFieldTag.CodeHash, RLC(code_hash, randomness)) + ) + steps = [ StepState( execution_state=ExecutionState.PUSH, rw_counter=1, - call_id=1, + call_id=CALL_ID, is_root=True, code_source=code_source, program_counter=0, @@ -227,7 +181,7 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): StepState( execution_state=ExecutionState.PUSH, rw_counter=2, - call_id=1, + call_id=CALL_ID, is_root=True, code_source=code_source, program_counter=33, @@ -237,7 +191,7 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): StepState( execution_state=ExecutionState.PUSH, rw_counter=3, - call_id=1, + call_id=CALL_ID, is_root=True, code_source=code_source, program_counter=66, @@ -247,7 +201,7 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): StepState( execution_state=ExecutionState.CODECOPY, rw_counter=4, - call_id=1, + call_id=CALL_ID, is_root=True, code_source=code_source, program_counter=99, @@ -256,25 +210,24 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): ), ] - steps_internal, rws_internal = make_copy_code_steps( + steps_internal = make_copy_code_steps( code, code_source, src_addr, dst_addr, length, + rw_dictionary=rw_dictionary, program_counter=100, memory_size=next_memory_word_size, stack_pointer=1024, - rw_counter=10, ) steps.extend(steps_internal) - rws.extend(rws_internal) steps.append( StepState( execution_state=ExecutionState.STOP, - rw_counter=rws_internal[-1][0] + 1, - call_id=1, + rw_counter=rw_dictionary.rw_counter, + call_id=CALL_ID, is_root=True, code_source=code_source, program_counter=33, @@ -287,7 +240,7 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): block_table=set(), tx_table=set(), bytecode_table=set(code.table_assignments(randomness)), - rw_table=set(rws), + rw_table=set(rw_dictionary.rws), ) verify_steps( @@ -299,7 +252,7 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): @pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): - randomness = rand_fp() + randomness = rand_fq() code = ( Bytecode() @@ -318,27 +271,29 @@ def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): dummy_code = Bytecode().stop() code_source = RLC(dummy_code.hash(), randomness) + rw_dictionary = RWDictionary(1) + next_memory_word_size = to_word_size(dst_addr + length) - steps, rws = make_copy_code_steps( + steps = make_copy_code_steps( code, code_source, src_addr, dst_addr, length, - program_counter=100, + rw_dictionary=rw_dictionary, + program_counter=0, memory_size=next_memory_word_size, stack_pointer=1024, - rw_counter=1, ) steps.append( StepState( execution_state=ExecutionState.STOP, - rw_counter=rws[-1][0] + 1, + rw_counter=rw_dictionary.rw_counter, call_id=CALL_ID, is_root=True, is_create=False, code_source=code_source, - program_counter=100, + program_counter=0, stack_pointer=1024, memory_size=next_memory_word_size, gas_left=0, @@ -351,7 +306,7 @@ def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): bytecode_table=set(code.table_assignments(randomness)).union( dummy_code.table_assignments(randomness) ), - rw_table=set(rws), + rw_table=set(rw_dictionary.rws), ) verify_steps( From dde50804c62ad336254f5bc9dd1fe64be8be70fa Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Thu, 17 Mar 2022 16:59:34 +0800 Subject: [PATCH 06/16] fix: type --- src/zkevm_specs/evm/execution/codecopy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index 151cb71b1..25e16d7d1 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -35,10 +35,12 @@ def codecopy(instruction: Instruction): assert isinstance(next_aux, CopyCodeToMemoryAuxData) instruction.constrain_equal(next_aux.src_addr, code_offset) instruction.constrain_equal(next_aux.dst_addr, memory_offset) - instruction.constrain_equal(next_aux.src_addr_end, instruction.rlc_to_fq_exact(code_size)) + instruction.constrain_equal( + next_aux.src_addr_end, instruction.rlc_to_fq_exact(code_size, n_bytes=1) + ) instruction.constrain_equal(next_aux.bytes_left, size) instruction.constrain_equal( - FQ(next_aux.code.hash()), instruction.rlc_to_fq_exact(code_hash) + FQ(next_aux.code.hash()), instruction.rlc_to_fq_exact(code_hash, n_bytes=32) ) instruction.step_state_transition_in_same_context( From 518c8be8b716f32864133764e3cd8017a6239711 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Thu, 17 Mar 2022 21:08:59 +0800 Subject: [PATCH 07/16] chore: minor refactor --- src/zkevm_specs/evm/execution/copy_code_to_memory.py | 11 ++++------- src/zkevm_specs/evm/execution/memory_copy.py | 7 +++---- src/zkevm_specs/util/param.py | 6 ++++++ tests/evm/test_calldatacopy.py | 10 +++++----- tests/evm/test_codecopy.py | 10 +++++----- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py index b8c03a19f..cddef5891 100644 --- a/src/zkevm_specs/evm/execution/copy_code_to_memory.py +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -1,7 +1,7 @@ import itertools from typing import Iterator -from ...util import FQ, N_BYTES_MEMORY_SIZE, RLC +from ...util import FQ, MAX_N_BYTES_COPY_CODE_TO_MEMORY, N_BYTES_MEMORY_SIZE, RLC from ..execution_state import ExecutionState from ..instruction import Instruction, Transition from ..step import CopyCodeToMemoryAuxData @@ -9,19 +9,16 @@ from ..util import BufferReaderGadget -MAX_COPY_BYTES = 54 - - def copy_code_to_memory(instruction: Instruction): aux = instruction.curr.aux_data assert isinstance(aux, CopyCodeToMemoryAuxData) buffer_reader = BufferReaderGadget( - instruction, MAX_COPY_BYTES, aux.src_addr, aux.src_addr_end, aux.bytes_left + instruction, MAX_N_BYTES_COPY_CODE_TO_MEMORY, aux.src_addr, aux.src_addr_end, aux.bytes_left ) is_codes = [c.is_code.expr() for c in aux.code.table_assignments(instruction.randomness)] - for idx in range(MAX_COPY_BYTES): + for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY): if buffer_reader.read_flag(idx) == 1: is_code = True if is_codes[aux.src_addr.n + idx] == FQ(1) else False byte = instruction.bytecode_lookup( @@ -31,7 +28,7 @@ def copy_code_to_memory(instruction: Instruction): ) buffer_reader.constrain_byte(idx, byte) - for idx in range(MAX_COPY_BYTES): + for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY): if buffer_reader.has_data(idx) == 1: byte = instruction.memory_lookup(RW.Write, aux.dst_addr + idx) buffer_reader.constrain_byte(idx, byte) diff --git a/src/zkevm_specs/evm/execution/memory_copy.py b/src/zkevm_specs/evm/execution/memory_copy.py index 9519145cf..d83bb129e 100644 --- a/src/zkevm_specs/evm/execution/memory_copy.py +++ b/src/zkevm_specs/evm/execution/memory_copy.py @@ -1,10 +1,9 @@ -from ...util import N_BYTES_MEMORY_SIZE, FQ, Expression +from ...util import MAX_N_BYTES_COPY_TO_MEMORY, N_BYTES_MEMORY_SIZE, FQ, Expression from ..execution_state import ExecutionState from ..instruction import Instruction, Transition from ..step import CopyToMemoryAuxData from ..table import RW from ..util import BufferReaderGadget -from ...util import MAX_COPY_BYTES def copy_to_memory(instruction: Instruction): @@ -12,10 +11,10 @@ def copy_to_memory(instruction: Instruction): assert isinstance(aux, CopyToMemoryAuxData) buffer_reader = BufferReaderGadget( - instruction, MAX_COPY_BYTES, aux.src_addr, aux.src_addr_end, aux.bytes_left + instruction, MAX_N_BYTES_COPY_TO_MEMORY, aux.src_addr, aux.src_addr_end, aux.bytes_left ) - for i in range(MAX_COPY_BYTES): + for i in range(MAX_N_BYTES_COPY_TO_MEMORY): if buffer_reader.read_flag(i) == 0: byte: Expression = FQ(0) elif aux.from_tx == 1: diff --git a/src/zkevm_specs/util/param.py b/src/zkevm_specs/util/param.py index ad7ecbb05..4bfe870dc 100644 --- a/src/zkevm_specs/util/param.py +++ b/src/zkevm_specs/util/param.py @@ -71,6 +71,12 @@ # Coefficient of linear part of memory expansion gas cost MEMORY_EXPANSION_LINEAR_COEFF = 3 +# Maximum number of bytes copied during one single iteration of CopyToMemory, i.e. the internal state used by the +# CALLDATACOPY gadget +MAX_N_BYTES_COPY_TO_MEMORY = 74 +# Maximum number of bytes copied during one single iteration of CopyCodeToMemory, i.e. the internal state used by +# the CODECOPY gadget +MAX_N_BYTES_COPY_CODE_TO_MEMORY = 54 COLD_SLOAD_COST = 2100 WARM_STORAGE_READ_COST = 100 diff --git a/tests/evm/test_calldatacopy.py b/tests/evm/test_calldatacopy.py index 705a18973..f9b1e31be 100644 --- a/tests/evm/test_calldatacopy.py +++ b/tests/evm/test_calldatacopy.py @@ -17,11 +17,11 @@ Bytecode, RWDictionary, ) -from zkevm_specs.evm.execution.memory_copy import MAX_COPY_BYTES from zkevm_specs.util import ( rand_fq, rand_bytes, GAS_COST_COPY, + MAX_N_BYTES_COPY_TO_MEMORY, MEMORY_EXPANSION_QUAD_DENOMINATOR, MEMORY_EXPANSION_LINEAR_COEFF, ) @@ -85,7 +85,7 @@ def make_copy_step( aux_data=aux_data, ) - num_bytes = min(MAX_COPY_BYTES, bytes_left) + num_bytes = min(MAX_N_BYTES_COPY_TO_MEMORY, bytes_left) for i in range(num_bytes): byte = buffer_map[src_addr + i] if src_addr + i < src_addr_end else 0 if not from_tx and src_addr + i < src_addr_end: @@ -129,9 +129,9 @@ def make_copy_steps( code_source, ) steps.append(new_step) - src_addr += MAX_COPY_BYTES - dst_addr += MAX_COPY_BYTES - bytes_left -= MAX_COPY_BYTES + src_addr += MAX_N_BYTES_COPY_TO_MEMORY + dst_addr += MAX_N_BYTES_COPY_TO_MEMORY + bytes_left -= MAX_N_BYTES_COPY_TO_MEMORY return steps diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index ec70fc285..8dc603abe 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -16,9 +16,9 @@ Tables, verify_steps, ) -from zkevm_specs.evm.execution.copy_code_to_memory import MAX_COPY_BYTES from zkevm_specs.util import ( GAS_COST_COPY, + MAX_N_BYTES_COPY_CODE_TO_MEMORY, MEMORY_EXPANSION_LINEAR_COEFF, MEMORY_EXPANSION_QUAD_DENOMINATOR, rand_address, @@ -90,7 +90,7 @@ def make_copy_code_step( aux_data=aux_data, ) - num_bytes = min(MAX_COPY_BYTES, bytes_left) + num_bytes = min(MAX_N_BYTES_COPY_CODE_TO_MEMORY, bytes_left) for i in range(num_bytes): byte = buffer_map[src_addr + i] if src_addr + i < src_addr_end else 0 rw_dictionary.memory_write(CALL_ID, dst_addr + i, byte) @@ -126,9 +126,9 @@ def make_copy_code_steps( memory_size, ) steps.append(new_step) - src_addr += MAX_COPY_BYTES - dst_addr += MAX_COPY_BYTES - bytes_left -= MAX_COPY_BYTES + src_addr += MAX_N_BYTES_COPY_CODE_TO_MEMORY + dst_addr += MAX_N_BYTES_COPY_CODE_TO_MEMORY + bytes_left -= MAX_N_BYTES_COPY_CODE_TO_MEMORY return steps From a35874db883abbe4b15a9b952d116382447343d5 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Fri, 18 Mar 2022 14:45:29 +0800 Subject: [PATCH 08/16] feat: extend bytecode circuit with tags Length and Byte --- src/zkevm_specs/bytecode.py | 182 ++++++++++++++++++++--------- src/zkevm_specs/evm/instruction.py | 5 +- src/zkevm_specs/evm/table.py | 21 +++- src/zkevm_specs/evm/typing.py | 20 +++- tests/test_bytecode_circuit.py | 66 +++++++---- 5 files changed, 209 insertions(+), 85 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 1fee53213..a12190940 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -1,13 +1,13 @@ from typing import Sequence, Union, Tuple, Set, NamedTuple from collections import namedtuple -from .util import keccak256, FQ, RLC -from .evm import get_push_size, BytecodeTableRow +from .util import keccak256, EMPTY_HASH, FQ, RLC +from .evm import get_push_size, BytecodeFieldTag, BytecodeTableRow from .encoding import U8, U256, is_circuit_code # Row in the circuit Row = namedtuple( "Row", - "q_first q_last hash index byte is_code push_data_left hash_rlc hash_length byte_push_size is_final padding", + "q_first q_last hash tag index is_code value push_data_left hash_rlc hash_length byte_push_size is_final padding", ) # Unrolled bytecode class UnrolledBytecode(NamedTuple): @@ -33,44 +33,93 @@ def select( def check_bytecode_row( row: Row, prev_row: Row, + next_row: Row, push_table: Set[Tuple[int, int]], keccak_table: Set[Tuple[int, int, int]], r: int, ): row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in row]) prev_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in prev_row]) + next_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in next_row]) if row.q_first == 0 and prev_row.is_final == 0: # Continue - # index needs to increase by 1 - assert row.index == prev_row.index + 1 - # is_code := push_data_left_prev == 0 - assert row.is_code == (prev_row.push_data_left == 0) - # hash_rlc := hash_rlc_prev * r + byte - assert row.hash_rlc == prev_row.hash_rlc * r + row.byte - - # padding needs to remain the same - assert row.padding == prev_row.padding - # hash needs to remain the same - assert row.hash == prev_row.hash - # hash_length needs to remain the same - assert row.hash_length == prev_row.hash_length + if row.tag == BytecodeFieldTag.Length: + # value for a Length tag is the bytecode length + assert row.value == row.hash_length + if row.value == 0: + # if len(bytecode) == 0 then hash == EMPTY_HASH + assert row.hash == EMPTY_HASH + # next row represents the start of another bytecode + assert next_row.tag == BytecodeFieldTag.Length + else: + # if len(bytecode) > 0 then hash != EMPTY_HASH + assert row.hash != EMPTY_HASH + # next row's hash should also be the same + assert row.hash == next_row.hash + # if len(bytecode) > 0 then the following rows are the bytes + assert next_row.tag == BytecodeFieldTag.Byte + # the immediate following row is the byte at index == 0 + assert next_row.index == 0 + # equality for bytecode length + assert next_row.hash_length == row.value + else: + if prev_row.tag == BytecodeFieldTag.Length: + # index starts from 0 + assert row.index == 0 + else: + # index is 1 more than previous row's index + assert row.index == prev_row.index + 1 + # is_code := push_data_left_prev == 0 + assert row.is_code == (prev_row.push_data_left == 0) + # hash_rlc := hash_rlc_prev * r + byte + assert row.hash_rlc == prev_row.hash_rlc * r + row.value + # padding needs to remain the same + assert row.padding == prev_row.padding + # hash needs to remain the same + assert row.hash == prev_row.hash + # hash_length needs to remain the same + assert row.hash_length == prev_row.hash_length else: # Start - # index needs to start at 0 - assert row.index == 0 - # is_code needs to be 1 (first byte is always an opcode) - assert row.is_code == True - # hash_rlc needs to start at byte - assert row.hash_rlc == row.byte + # the row following an `is_final` previous row is either tagged Length + if row.tag == BytecodeFieldTag.Length: + # index needs to be 0 + assert row.index == 0 + # is_code needs to be 0 + assert row.is_code == 0 + # hash_rlc needs to start at byte + assert row.hash_rlc == 0 + # if bytecode length is zero + if row.value == 0: + # bytecode hash should be EMPTY_HASH + row.hash == EMPTY_HASH + # the next row should also be a tag Length + next_row.tag == BytecodeFieldTag.Length + else: + # bytecode hash should not be EMPTY_HASH + row.hash != EMPTY_HASH + # the next row should be tag Byte + next_row.tag == BytecodeFieldTag.Byte + # the next row should start with index == 0 + next_row.index == 0 + # the next row's hash length should be this row's value + next_row.hash_length == row.value + # the next row is the start of hash_rlc + next_row.hash_rlc == next_row.value + # or is the start of padding rows + else: + assert row.padding == 1 # is_final needs to be boolean assert_bool(row.is_final) # padding needs to be boolean assert_bool(row.padding) - # push_data_left := is_code ? byte_push_size : push_data_left_prev - 1 - assert row.push_data_left == select( - row.is_code, row.byte_push_size, prev_row.push_data_left - 1 - ) + + if row.tag == BytecodeFieldTag.Byte: + # push_data_left := is_code ? byte_push_size : push_data_left_prev - 1 + assert row.push_data_left == select( + row.is_code, row.byte_push_size, prev_row.push_data_left - 1 + ) # Padding if row.q_first == 0: @@ -87,9 +136,10 @@ def check_bytecode_row( # the last row needs to be the last byte assert row.padding == 1 or row.is_final == 1 - # Lookup how many bytes the current opcode pushes - # (also indirectly range checks `byte` to be in [0, 255]) - assert (row.byte, row.byte_push_size) in push_table + if row.tag == BytecodeFieldTag.Byte: + # Lookup how many bytes the current opcode pushes + # (also indirectly range checks `byte` to be in [0, 255]) + assert (row.value, row.byte_push_size) in push_table # keccak lookup when on the last byte if row.is_final == 1 and row.padding == 0: @@ -107,31 +157,56 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando push_data_left = 0 hash_rlc = FQ(0) for idx, row in enumerate(bytecode.rows): - # Track which byte is an opcode and which is push data - is_code = push_data_left == 0 - byte_push_size = get_push_size(row.byte) - push_data_left = byte_push_size if is_code else push_data_left - 1 - - # Add the byte to the accumulator - hash_rlc = hash_rlc * randomness + row.byte - - # Set the data for this row - rows.append( - Row( - offset == 0, - offset == last_row_offset, - row.bytecode_hash, - row.index, - row.byte, - row.is_code, - push_data_left, - hash_rlc, - len(bytecode.bytes), - byte_push_size, - idx == len(bytecode.bytes) - 1, - False, + if idx == 0: + # First row represents tag Length + assert row.field_tag == BytecodeFieldTag.Length + rows.append( + Row( + offset == 0, + offset == last_row_offset, + row.bytecode_hash, + BytecodeFieldTag.Length, + FQ(0), + FQ(0), + len(bytecode.bytes), + FQ(0), + hash_rlc, + len(bytecode.bytes), + FQ(0), + row.bytecode_hash == EMPTY_HASH, + False, + ) + ) + else: + # Subsequent rows represent the bytecode bytes + # Track which byte is an opcode and which is push data + is_code = push_data_left == 0 + byte_push_size = get_push_size(row.value) + push_data_left = byte_push_size if is_code else push_data_left - 1 + + # Add the byte to the accumulator + hash_rlc = hash_rlc * randomness + row.value + + # Set the data for this row + assert row.field_tag == BytecodeFieldTag.Byte + rows.append( + Row( + offset == -1, + offset == last_row_offset, + row.bytecode_hash, + BytecodeFieldTag.Byte, + row.index, + row.is_code, + row.value, + push_data_left, + hash_rlc, + len(bytecode.bytes), + byte_push_size, + # Since 1 row is taken up by the Length tag + idx == len(bytecode.bytes), + False, + ) ) - ) offset += 1 # return when the circuit is full @@ -147,6 +222,7 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando 0, 0, 0, + 0, True, 0, 0, diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index ea6b21b5b..f13345012 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -397,7 +397,10 @@ def tx_log_lookup(self, field_tag: TxLogFieldTag, index: int = 0) -> Expression: def bytecode_lookup( self, bytecode_hash: Expression, index: Expression, is_code: bool ) -> Expression: - return self.tables.bytecode_lookup(bytecode_hash, index, FQ(is_code)).byte + return self.tables.bytecode_lookup(bytecode_hash, index, FQ(is_code)).value + + def bytecode_length(self, bytecode_hash: Expression) -> Expression: + return self.tables.bytecode_length(bytecode_hash) def tx_gas_price(self, tx_id: Expression) -> RLC: return cast_expr(self.tx_context_lookup(tx_id, TxContextFieldTag.GasPrice), RLC) diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index 7ca71b8d2..f95e92b13 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -129,6 +129,15 @@ class TxContextFieldTag(IntEnum): CallData = auto() +class BytecodeFieldTag(IntEnum): + """ + Tag for BytecodeTable lookup. + """ + + Length = 1 + Byte = 2 + + class RW(IntEnum): Read = 0 Write = 1 @@ -294,9 +303,10 @@ class TxTableRow(TableRow): @dataclass(frozen=True) class BytecodeTableRow(TableRow): bytecode_hash: Expression + field_tag: Expression index: Expression - byte: Expression is_code: Expression + value: Expression @dataclass(frozen=True) @@ -378,11 +388,20 @@ def bytecode_lookup( ) -> BytecodeTableRow: query = { "bytecode_hash": bytecode_hash, + "field_tag": FQ(BytecodeFieldTag.Byte), "index": index, "is_code": is_code, } return _lookup(BytecodeTableRow, self.bytecode_table, query) + def bytecode_length(self, bytecode_hash: Expression) -> Expression: + query = { + "bytecode_hash": bytecode_hash, + "field_tag": FQ(BytecodeFieldTag.Length), + } + row = _lookup(BytecodeTableRow, self.bytecode_table, query) + return row.value + def rw_lookup( self, rw_counter: Expression, diff --git a/src/zkevm_specs/evm/typing.py b/src/zkevm_specs/evm/typing.py index eda5e814c..cb2b0f01a 100644 --- a/src/zkevm_specs/evm/typing.py +++ b/src/zkevm_specs/evm/typing.py @@ -21,6 +21,7 @@ AccountFieldTag, BlockContextFieldTag, BlockTableRow, + BytecodeFieldTag, BytecodeTableRow, CallContextFieldTag, RWTableRow, @@ -256,18 +257,25 @@ def __iter__(self): return self def __next__(self): - if self.idx == len(self.code): + if len(self.code) == 0 or self.idx > len(self.code): raise StopIteration - idx = self.idx - byte = self.code[idx] + # return the length of the bytecode in the first row + if self.idx == 0: + self.idx += 1 + return BytecodeTableRow( + self.hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0), FQ(len(self.code)) + ) + # the other rows represent each byte in the bytecode + idx = self.idx - 1 + byte = self.code[idx] is_code = self.push_data_left == 0 self.push_data_left = get_push_size(byte) if is_code else self.push_data_left - 1 - self.idx += 1 - - return BytecodeTableRow(self.hash, FQ(idx), FQ(byte), FQ(is_code)) + return BytecodeTableRow( + self.hash, FQ(BytecodeFieldTag.Byte), FQ(idx), FQ(is_code), FQ(byte) + ) return BytecodeIterator(RLC(self.hash(), randomness).expr(), self.code) diff --git a/tests/test_bytecode_circuit.py b/tests/test_bytecode_circuit.py index 270c56e24..91eea29fc 100644 --- a/tests/test_bytecode_circuit.py +++ b/tests/test_bytecode_circuit.py @@ -2,7 +2,7 @@ from copy import deepcopy from zkevm_specs.bytecode import * -from zkevm_specs.evm import Opcode, Bytecode, BytecodeTableRow, is_push +from zkevm_specs.evm import Opcode, Bytecode, BytecodeFieldTag, BytecodeTableRow, is_push from zkevm_specs.util import RLC, rand_fq # Unroll the bytecode @@ -18,7 +18,8 @@ def verify(k, bytecodes, randomness, success): try: for (idx, row) in enumerate(rows): prev_row = rows[(idx - 1) % len(rows)] - check_bytecode_row(row, prev_row, push_table, keccak_table, randomness) + next_row = rows[(idx + 1) % len(rows)] + check_bytecode_row(row, prev_row, next_row, push_table, keccak_table, randomness) ok = True except AssertionError as e: if success: @@ -38,19 +39,21 @@ def test_bytecode_unrolling(): for byte in range(256): if not is_push(byte): bytecode.append(byte) - rows.append((0, len(rows), byte, True)) + rows.append((0, BytecodeFieldTag.Byte, len(rows), True, byte)) # Now add the different push ops for n in range(1, 33): data_byte = int(Opcode.PUSH32) bytecode.append(Opcode.PUSH1 + n - 1) bytecode.extend([data_byte] * n) - rows.append((0, len(rows), Opcode.PUSH1 + n - 1, True)) + rows.append((0, BytecodeFieldTag.Byte, len(rows), True, Opcode.PUSH1 + n - 1)) for _ in range(n): - rows.append((0, len(rows), data_byte, False)) + rows.append((0, BytecodeFieldTag.Byte, len(rows), False, data_byte)) # Set the hash of the complete bytecode in the rows hash = RLC(bytes(reversed(keccak256(bytes(bytecode)))), randomness) for i in range(len(rows)): - rows[i] = BytecodeTableRow(hash.expr(), rows[i][1], rows[i][2], rows[i][3]) + rows[i] = BytecodeTableRow(hash.expr(), rows[i][1], rows[i][2], rows[i][3], rows[i][4]) + # Prepend the length of bytecode to rows + rows.insert(0, BytecodeTableRow(hash.expr(), BytecodeFieldTag.Length, 0, 0, len(bytecode))) # Unroll the bytecode unrolled = unroll(bytes(bytecode), randomness) # Check if the bytecode was unrolled correctly @@ -65,7 +68,7 @@ def test_bytecode_empty(): def test_bytecode_full(): - bytecodes = [unroll(bytes([7] * 2**k), randomness)] + bytecodes = [unroll(bytes([7] * (2**k - 1)), randomness)] verify(k, bytecodes, randomness, True) @@ -89,22 +92,34 @@ def test_bytecode_invalid_hash_data(): unrolled = unroll(bytes([8, 2, 3, 8, 9, 7, 128]), randomness) verify(k, [unrolled], randomness, True) - # Change the hash on the first position + # Change the hash on the first row, i.e. row denoting tag Length invalid = deepcopy(unrolled) row = unrolled.rows[0] - invalid.rows[0] = BytecodeTableRow(row.bytecode_hash + 1, row.index, row.byte, row.is_code) + invalid.rows[0] = BytecodeTableRow( + row.bytecode_hash + 1, row.field_tag, row.index, row.is_code, row.value + ) + verify(k, [invalid], randomness, False) + + # Change the hash on the second row, i.e. first row with tag Byte + invalid = deepcopy(unrolled) + row = unrolled.rows[1] + invalid.rows[1] = BytecodeTableRow( + row.bytecode_hash + 1, row.field_tag, row.index, row.is_code, row.value + ) verify(k, [invalid], randomness, False) # Change the hash on another position invalid = deepcopy(unrolled) row = unrolled.rows[4] - invalid.rows[0] = BytecodeTableRow(row.bytecode_hash + 1, row.index, row.byte, row.is_code) + invalid.rows[1] = BytecodeTableRow( + row.bytecode_hash + 1, row.field_tag, row.index, row.is_code, row.value + ) verify(k, [invalid], randomness, False) # Change all the hashes so it doesn't match the keccak lookup hash invalid = deepcopy(unrolled) for idx, row in enumerate(unrolled.rows): - invalid.rows[idx] = BytecodeTableRow(1, row.index, row.byte, row.is_code) + invalid.rows[idx] = BytecodeTableRow(1, row.field_tag, row.index, row.is_code, row.value) verify(k, [invalid], randomness, False) @@ -116,14 +131,14 @@ def test_bytecode_invalid_index(): invalid = deepcopy(unrolled) for idx, row in enumerate(unrolled.rows): invalid.rows[idx] = BytecodeTableRow( - row.bytecode_hash + 1, row.index, row.byte, row.is_code + row.bytecode_hash + 1, row.field_tag, row.index, row.is_code, row.value ) verify(k, [invalid], randomness, False) # Don't increment an index once invalid = deepcopy(unrolled) invalid.rows[-1] = BytecodeTableRow( - invalid.rows[-1].bytecode_hash - 1, row.index, row.byte, row.is_code + invalid.rows[-1].bytecode_hash - 1, row.field_tag, row.index, row.is_code, row.value ) verify(k, [invalid], randomness, False) @@ -132,22 +147,24 @@ def test_bytecode_invalid_byte_data(): unrolled = unroll(bytes([8, 2, 3, 8, 9, 7, 128]), randomness) verify(k, [unrolled], randomness, True) - # Change the first byte + # Change the first byte in the bytecode invalid = deepcopy(unrolled) - row = unrolled.rows[0] - invalid.rows[0] = BytecodeTableRow(row.bytecode_hash, row.index, row.byte, 9) + row = unrolled.rows[1] + invalid.rows[1] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, row.is_code, 9) verify(k, [invalid], randomness, False) # Change a byte on another position invalid = deepcopy(unrolled) row = unrolled.rows[5] - invalid.rows[5] = BytecodeTableRow(row.bytecode_hash, row.index, row.byte, 6) + invalid.rows[5] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, row.is_code, 6) verify(k, [invalid], randomness, False) # Set a byte value out of range invalid = deepcopy(unrolled) row = unrolled.rows[3] - invalid.rows[3] = BytecodeTableRow(row.bytecode_hash, row.index, row.byte, 256) + invalid.rows[3] = BytecodeTableRow( + row.bytecode_hash, row.field_tag, row.index, row.is_code, 256 + ) verify(k, [invalid], randomness, False) @@ -168,20 +185,21 @@ def test_bytecode_invalid_is_code(): ) verify(k, [unrolled], randomness, True) + # The first row, i.e. index == 0 is taken up by the tag Length. # Mark the 3rd byte as code (is push data from the first PUSH1) invalid = deepcopy(unrolled) - row = unrolled.rows[2] - invalid.rows[2] = BytecodeTableRow(row.bytecode_hash, row.index, 1, row.is_code) + row = unrolled.rows[3] + invalid.rows[3] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, 1, row.value) verify(k, [invalid], randomness, False) # Mark the 4rd byte as data (is code) invalid = deepcopy(unrolled) - row = unrolled.rows[3] - invalid.rows[3] = BytecodeTableRow(row.bytecode_hash, row.index, 0, row.is_code) + row = unrolled.rows[4] + invalid.rows[4] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, 0, row.value) verify(k, [invalid], randomness, False) # Mark the 7th byte as code (is data for the PUSH7) invalid = deepcopy(unrolled) - row = unrolled.rows[6] - invalid.rows[6] = BytecodeTableRow(row.bytecode_hash, row.index, 1, row.is_code) + row = unrolled.rows[7] + invalid.rows[7] = BytecodeTableRow(row.bytecode_hash, row.field_tag, row.index, 1, row.value) verify(k, [invalid], randomness, False) From f6d81acc3deb051c37d5335ec03762ac3cbf2c22 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Fri, 18 Mar 2022 14:45:50 +0800 Subject: [PATCH 09/16] fix: codecopy now uses code size from bytecode lookup --- specs/opcode/39CODECOPY.md | 4 ++-- src/zkevm_specs/evm/execution/codecopy.py | 6 ++---- src/zkevm_specs/evm/execution/copy_code_to_memory.py | 3 ++- tests/evm/test_codecopy.py | 7 ++++++- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/specs/opcode/39CODECOPY.md b/specs/opcode/39CODECOPY.md index a7634394b..f5d358b93 100644 --- a/specs/opcode/39CODECOPY.md +++ b/specs/opcode/39CODECOPY.md @@ -20,7 +20,7 @@ The gadget then transits to the internal state of `CopyCodeToMemory`. 1. opId = 0x39 2. State Transitions: - - rw_counter -> rw_counter + 6 (3 stack reads, 1 call context read, 2 account reads) + - rw_counter -> rw_counter + 5 (3 stack reads, 1 call context read `CalleeAddress`, 1 account read `CodeHash`) - stack_pointer -> stack_pointer + 3 - pc -> pc + 1 - gas -> 3 + dynamic_cost (memory expansion and copier cost when `size > 0`) @@ -32,8 +32,8 @@ The gadget then transits to the internal state of `CopyCodeToMemory`. - `code_offset` is at the second position of the stack - `size` is at the third position of the stack - `callee_address` is in the call context for the current call - - `code_size` is in the account context for the `callee_address` - `code_hash` is in the account context for the `callee_address` + - `code_size` from the bytecode table ## Exceptions diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index 25e16d7d1..2d7302d88 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -18,8 +18,8 @@ def codecopy(instruction: Instruction): code_offset = instruction.rlc_to_fq_exact(code_offset_word, N_BYTES_MEMORY_ADDRESS) account = instruction.call_context_lookup(CallContextFieldTag.CalleeAddress) - code_size = instruction.account_read(account, AccountFieldTag.CodeSize) code_hash = instruction.account_read(account, AccountFieldTag.CodeHash) + code_size = instruction.bytecode_length(code_hash) next_memory_size, memory_expansion_gas_cost = instruction.memory_expansion_dynamic_length( memory_offset, size @@ -35,9 +35,7 @@ def codecopy(instruction: Instruction): assert isinstance(next_aux, CopyCodeToMemoryAuxData) instruction.constrain_equal(next_aux.src_addr, code_offset) instruction.constrain_equal(next_aux.dst_addr, memory_offset) - instruction.constrain_equal( - next_aux.src_addr_end, instruction.rlc_to_fq_exact(code_size, n_bytes=1) - ) + instruction.constrain_equal(next_aux.src_addr_end, code_size) instruction.constrain_equal(next_aux.bytes_left, size) instruction.constrain_equal( FQ(next_aux.code.hash()), instruction.rlc_to_fq_exact(code_hash, n_bytes=32) diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py index cddef5891..f46864bef 100644 --- a/src/zkevm_specs/evm/execution/copy_code_to_memory.py +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -20,7 +20,8 @@ def copy_code_to_memory(instruction: Instruction): is_codes = [c.is_code.expr() for c in aux.code.table_assignments(instruction.randomness)] for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY): if buffer_reader.read_flag(idx) == 1: - is_code = True if is_codes[aux.src_addr.n + idx] == FQ(1) else False + # the first row is allocated for tag Length. + is_code = True if is_codes[aux.src_addr.n + idx + 1] == FQ(1) else False byte = instruction.bytecode_lookup( RLC(aux.code.hash(), instruction.randomness), aux.src_addr + idx, diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index 8dc603abe..59e629592 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -163,9 +163,10 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): .stack_read(CALL_ID, 1022, src_addr_rlc) .stack_read(CALL_ID, 1023, length_rlc) .call_context_read(CALL_ID, CallContextFieldTag.CalleeAddress, callee_addr) - .account_read(callee_addr, AccountFieldTag.CodeSize, RLC(len(code.code), randomness)) .account_read(callee_addr, AccountFieldTag.CodeHash, RLC(code_hash, randomness)) ) + # rw counter before memory writes + rw_counter_interim = rw_dictionary.rw_counter steps = [ StepState( @@ -223,6 +224,10 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): ) steps.extend(steps_internal) + # rw counter post memory writes + rw_counter_final = rw_dictionary.rw_counter + assert rw_counter_final - rw_counter_interim == length + steps.append( StepState( execution_state=ExecutionState.STOP, From 78948c6957e5b778ac476a468c443154a14011db Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Sat, 19 Mar 2022 18:21:57 +0800 Subject: [PATCH 10/16] fix: refactor, remove redundant/duplicate constraints, PR review fixes --- src/zkevm_specs/bytecode.py | 131 +++++++++-------------------- src/zkevm_specs/evm/instruction.py | 7 +- src/zkevm_specs/evm/table.py | 16 ++-- src/zkevm_specs/evm/typing.py | 6 +- src/zkevm_specs/util/param.py | 4 +- 5 files changed, 56 insertions(+), 108 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index a12190940..12caf5b91 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -43,52 +43,28 @@ def check_bytecode_row( next_row = Row(*[v if isinstance(v, RLC) else FQ(v) for v in next_row]) if row.q_first == 0 and prev_row.is_final == 0: # Continue - if row.tag == BytecodeFieldTag.Length: - # value for a Length tag is the bytecode length - assert row.value == row.hash_length - if row.value == 0: - # if len(bytecode) == 0 then hash == EMPTY_HASH - assert row.hash == EMPTY_HASH - # next row represents the start of another bytecode - assert next_row.tag == BytecodeFieldTag.Length - else: - # if len(bytecode) > 0 then hash != EMPTY_HASH - assert row.hash != EMPTY_HASH - # next row's hash should also be the same - assert row.hash == next_row.hash - # if len(bytecode) > 0 then the following rows are the bytes - assert next_row.tag == BytecodeFieldTag.Byte - # the immediate following row is the byte at index == 0 - assert next_row.index == 0 - # equality for bytecode length - assert next_row.hash_length == row.value + if prev_row.tag == BytecodeFieldTag.Length: + # index starts from 0 + assert row.index == 0 + # hash length should be the previous row's value + assert row.hash_length == prev_row.value else: - if prev_row.tag == BytecodeFieldTag.Length: - # index starts from 0 - assert row.index == 0 - else: - # index is 1 more than previous row's index - assert row.index == prev_row.index + 1 - # is_code := push_data_left_prev == 0 - assert row.is_code == (prev_row.push_data_left == 0) - # hash_rlc := hash_rlc_prev * r + byte - assert row.hash_rlc == prev_row.hash_rlc * r + row.value - # padding needs to remain the same - assert row.padding == prev_row.padding - # hash needs to remain the same - assert row.hash == prev_row.hash - # hash_length needs to remain the same - assert row.hash_length == prev_row.hash_length + # index is 1 more than previous row's index + assert row.index == prev_row.index + 1 + # is_code := push_data_left_prev == 0 + assert row.is_code == (prev_row.push_data_left == 0) + # hash_rlc := hash_rlc_prev * r + byte + assert row.hash_rlc == prev_row.hash_rlc * r + row.value + # padding needs to remain the same + assert row.padding == prev_row.padding + # hash needs to remain the same + assert row.hash == prev_row.hash + # hash_length needs to remain the same + assert row.hash_length == prev_row.hash_length else: # Start # the row following an `is_final` previous row is either tagged Length if row.tag == BytecodeFieldTag.Length: - # index needs to be 0 - assert row.index == 0 - # is_code needs to be 0 - assert row.is_code == 0 - # hash_rlc needs to start at byte - assert row.hash_rlc == 0 # if bytecode length is zero if row.value == 0: # bytecode hash should be EMPTY_HASH @@ -100,12 +76,6 @@ def check_bytecode_row( row.hash != EMPTY_HASH # the next row should be tag Byte next_row.tag == BytecodeFieldTag.Byte - # the next row should start with index == 0 - next_row.index == 0 - # the next row's hash length should be this row's value - next_row.hash_length == row.value - # the next row is the start of hash_rlc - next_row.hash_rlc == next_row.value # or is the start of padding rows else: assert row.padding == 1 @@ -157,56 +127,35 @@ def assign_bytecode_circuit(k: int, bytecodes: Sequence[UnrolledBytecode], rando push_data_left = 0 hash_rlc = FQ(0) for idx, row in enumerate(bytecode.rows): - if idx == 0: - # First row represents tag Length - assert row.field_tag == BytecodeFieldTag.Length - rows.append( - Row( - offset == 0, - offset == last_row_offset, - row.bytecode_hash, - BytecodeFieldTag.Length, - FQ(0), - FQ(0), - len(bytecode.bytes), - FQ(0), - hash_rlc, - len(bytecode.bytes), - FQ(0), - row.bytecode_hash == EMPTY_HASH, - False, - ) - ) - else: - # Subsequent rows represent the bytecode bytes - # Track which byte is an opcode and which is push data - is_code = push_data_left == 0 + # Subsequent rows represent the bytecode bytes + # Track which byte is an opcode and which is push data + is_code = push_data_left == 0 + byte_push_size = 0 + if idx > 0: byte_push_size = get_push_size(row.value) push_data_left = byte_push_size if is_code else push_data_left - 1 - # Add the byte to the accumulator hash_rlc = hash_rlc * randomness + row.value - # Set the data for this row - assert row.field_tag == BytecodeFieldTag.Byte - rows.append( - Row( - offset == -1, - offset == last_row_offset, - row.bytecode_hash, - BytecodeFieldTag.Byte, - row.index, - row.is_code, - row.value, - push_data_left, - hash_rlc, - len(bytecode.bytes), - byte_push_size, - # Since 1 row is taken up by the Length tag - idx == len(bytecode.bytes), - False, - ) + # Set the data for this row + rows.append( + Row( + offset == 0, + offset == last_row_offset, + row.bytecode_hash, + row.field_tag, + row.index, + row.is_code, + row.value, + push_data_left, + hash_rlc, + len(bytecode.bytes), + byte_push_size, + # Since 1 row is taken up by the Length tag + idx == len(bytecode.bytes), + False, ) + ) offset += 1 # return when the circuit is full diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index f13345012..5d4508df9 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -23,6 +23,7 @@ from .table import ( AccountFieldTag, BlockContextFieldTag, + BytecodeFieldTag, CallContextFieldTag, FixedTableRow, RWTableRow, @@ -397,10 +398,12 @@ def tx_log_lookup(self, field_tag: TxLogFieldTag, index: int = 0) -> Expression: def bytecode_lookup( self, bytecode_hash: Expression, index: Expression, is_code: bool ) -> Expression: - return self.tables.bytecode_lookup(bytecode_hash, index, FQ(is_code)).value + return self.tables.bytecode_lookup( + bytecode_hash, FQ(BytecodeFieldTag.Byte), index, FQ(is_code) + ).value def bytecode_length(self, bytecode_hash: Expression) -> Expression: - return self.tables.bytecode_length(bytecode_hash) + return self.tables.bytecode_lookup(bytecode_hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0)) def tx_gas_price(self, tx_id: Expression) -> RLC: return cast_expr(self.tx_context_lookup(tx_id, TxContextFieldTag.GasPrice), RLC) diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index f95e92b13..0e7d82e9a 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -384,24 +384,20 @@ def tx_lookup( return _lookup(TxTableRow, self.tx_table, query) def bytecode_lookup( - self, bytecode_hash: Expression, index: Expression, is_code: Expression + self, + bytecode_hash: Expression, + field_tag: Expression, + index: Expression, + is_code: Expression, ) -> BytecodeTableRow: query = { "bytecode_hash": bytecode_hash, - "field_tag": FQ(BytecodeFieldTag.Byte), + "field_tag": field_tag, "index": index, "is_code": is_code, } return _lookup(BytecodeTableRow, self.bytecode_table, query) - def bytecode_length(self, bytecode_hash: Expression) -> Expression: - query = { - "bytecode_hash": bytecode_hash, - "field_tag": FQ(BytecodeFieldTag.Length), - } - row = _lookup(BytecodeTableRow, self.bytecode_table, query) - return row.value - def rw_lookup( self, rw_counter: Expression, diff --git a/src/zkevm_specs/evm/typing.py b/src/zkevm_specs/evm/typing.py index cb2b0f01a..274a779ba 100644 --- a/src/zkevm_specs/evm/typing.py +++ b/src/zkevm_specs/evm/typing.py @@ -257,9 +257,6 @@ def __iter__(self): return self def __next__(self): - if len(self.code) == 0 or self.idx > len(self.code): - raise StopIteration - # return the length of the bytecode in the first row if self.idx == 0: self.idx += 1 @@ -267,6 +264,9 @@ def __next__(self): self.hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0), FQ(len(self.code)) ) + if self.idx > len(self.code): + raise StopIteration + # the other rows represent each byte in the bytecode idx = self.idx - 1 byte = self.code[idx] diff --git a/src/zkevm_specs/util/param.py b/src/zkevm_specs/util/param.py index 4bfe870dc..067bd3fd7 100644 --- a/src/zkevm_specs/util/param.py +++ b/src/zkevm_specs/util/param.py @@ -73,10 +73,10 @@ # Maximum number of bytes copied during one single iteration of CopyToMemory, i.e. the internal state used by the # CALLDATACOPY gadget -MAX_N_BYTES_COPY_TO_MEMORY = 74 +MAX_N_BYTES_COPY_TO_MEMORY = 32 # Maximum number of bytes copied during one single iteration of CopyCodeToMemory, i.e. the internal state used by # the CODECOPY gadget -MAX_N_BYTES_COPY_CODE_TO_MEMORY = 54 +MAX_N_BYTES_COPY_CODE_TO_MEMORY = 32 COLD_SLOAD_COST = 2100 WARM_STORAGE_READ_COST = 100 From b513ddac7325caa1d2f4ef6ad39c3e133ac563a1 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Sat, 19 Mar 2022 18:27:17 +0800 Subject: [PATCH 11/16] fix: type check --- src/zkevm_specs/evm/instruction.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index 5d4508df9..cb707b16d 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -403,7 +403,9 @@ def bytecode_lookup( ).value def bytecode_length(self, bytecode_hash: Expression) -> Expression: - return self.tables.bytecode_lookup(bytecode_hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0)) + return self.tables.bytecode_lookup( + bytecode_hash, FQ(BytecodeFieldTag.Length), FQ(0), FQ(0) + ).value def tx_gas_price(self, tx_id: Expression) -> RLC: return cast_expr(self.tx_context_lookup(tx_id, TxContextFieldTag.GasPrice), RLC) From ec6bd14ed85b232138e2eda9aadc23054e35a144 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 21 Mar 2022 10:53:18 +0800 Subject: [PATCH 12/16] fix: remove redundant constraint | fix other constraints --- src/zkevm_specs/bytecode.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index 12caf5b91..ffc56648f 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -46,8 +46,8 @@ def check_bytecode_row( if prev_row.tag == BytecodeFieldTag.Length: # index starts from 0 assert row.index == 0 - # hash length should be the previous row's value - assert row.hash_length == prev_row.value + # is_code := 1, since this is the first byte of the bytecode + assert row.is_code == 1 else: # index is 1 more than previous row's index assert row.index == prev_row.index + 1 @@ -65,17 +65,19 @@ def check_bytecode_row( # Start # the row following an `is_final` previous row is either tagged Length if row.tag == BytecodeFieldTag.Length: + # value matches hash length + assert row.value == row.hash_length # if bytecode length is zero if row.value == 0: # bytecode hash should be EMPTY_HASH - row.hash == EMPTY_HASH - # the next row should also be a tag Length - next_row.tag == BytecodeFieldTag.Length + assert row.hash == RLC(EMPTY_HASH, FQ(r)).expr() + # the next row should be a tag Length or padding + assert (next_row.tag == BytecodeFieldTag.Length) or (next_row.tag == 0) else: # bytecode hash should not be EMPTY_HASH - row.hash != EMPTY_HASH + assert row.hash != RLC(EMPTY_HASH, FQ(r)).expr() # the next row should be tag Byte - next_row.tag == BytecodeFieldTag.Byte + assert next_row.tag == BytecodeFieldTag.Byte # or is the start of padding rows else: assert row.padding == 1 From 62ae3d3fe9b29a342f7951e4f3835eeceefa74d1 Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 21 Mar 2022 10:54:44 +0800 Subject: [PATCH 13/16] fix: remove redundant constraint --- src/zkevm_specs/bytecode.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/zkevm_specs/bytecode.py b/src/zkevm_specs/bytecode.py index ffc56648f..83ced381d 100644 --- a/src/zkevm_specs/bytecode.py +++ b/src/zkevm_specs/bytecode.py @@ -74,8 +74,6 @@ def check_bytecode_row( # the next row should be a tag Length or padding assert (next_row.tag == BytecodeFieldTag.Length) or (next_row.tag == 0) else: - # bytecode hash should not be EMPTY_HASH - assert row.hash != RLC(EMPTY_HASH, FQ(r)).expr() # the next row should be tag Byte assert next_row.tag == BytecodeFieldTag.Byte # or is the start of padding rows From be96e29fa22dd5975b62e9a8c85f276b36a79bad Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Mon, 21 Mar 2022 15:54:38 +0800 Subject: [PATCH 14/16] fix: updates as per code review comments --- src/zkevm_specs/evm/execution/calldatacopy.py | 2 +- src/zkevm_specs/evm/execution/codecopy.py | 8 ++++---- .../evm/execution/copy_code_to_memory.py | 16 ++++++++++------ src/zkevm_specs/evm/execution/memory_copy.py | 8 ++++++++ src/zkevm_specs/evm/instruction.py | 6 +++--- src/zkevm_specs/evm/step.py | 6 +++--- src/zkevm_specs/evm/table.py | 6 +++--- tests/evm/test_codecopy.py | 17 ++++++++++++----- 8 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/zkevm_specs/evm/execution/calldatacopy.py b/src/zkevm_specs/evm/execution/calldatacopy.py index 925501515..1b66b9c95 100644 --- a/src/zkevm_specs/evm/execution/calldatacopy.py +++ b/src/zkevm_specs/evm/execution/calldatacopy.py @@ -33,7 +33,7 @@ def calldatacopy(instruction: Instruction): gas_cost = instruction.memory_copier_gas_cost(length, memory_expansion_gas_cost) # When length != 0, constrain the state in the next execution state CopyToMemory - if not instruction.is_zero(length): + if instruction.is_zero(length) == FQ(0): assert instruction.next is not None instruction.constrain_equal(instruction.next.execution_state, ExecutionState.CopyToMemory) next_aux = instruction.next.aux_data diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index 2d7302d88..3c3d98326 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -19,6 +19,8 @@ def codecopy(instruction: Instruction): account = instruction.call_context_lookup(CallContextFieldTag.CalleeAddress) code_hash = instruction.account_read(account, AccountFieldTag.CodeHash) + instruction.constrain_equal(code_hash, instruction.curr.code_source) + code_size = instruction.bytecode_length(code_hash) next_memory_size, memory_expansion_gas_cost = instruction.memory_expansion_dynamic_length( @@ -26,7 +28,7 @@ def codecopy(instruction: Instruction): ) gas_cost = instruction.memory_copier_gas_cost(size, memory_expansion_gas_cost) - if not instruction.is_zero(size): + if instruction.is_zero(size) == FQ(0): assert instruction.next is not None instruction.constrain_equal( instruction.next.execution_state, ExecutionState.CopyCodeToMemory @@ -37,9 +39,7 @@ def codecopy(instruction: Instruction): instruction.constrain_equal(next_aux.dst_addr, memory_offset) instruction.constrain_equal(next_aux.src_addr_end, code_size) instruction.constrain_equal(next_aux.bytes_left, size) - instruction.constrain_equal( - FQ(next_aux.code.hash()), instruction.rlc_to_fq_exact(code_hash, n_bytes=32) - ) + instruction.constrain_equal(next_aux.code_hash, code_hash) instruction.step_state_transition_in_same_context( opcode, diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py index f46864bef..5ae0ad2e0 100644 --- a/src/zkevm_specs/evm/execution/copy_code_to_memory.py +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -17,15 +17,11 @@ def copy_code_to_memory(instruction: Instruction): instruction, MAX_N_BYTES_COPY_CODE_TO_MEMORY, aux.src_addr, aux.src_addr_end, aux.bytes_left ) - is_codes = [c.is_code.expr() for c in aux.code.table_assignments(instruction.randomness)] for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY): if buffer_reader.read_flag(idx) == 1: - # the first row is allocated for tag Length. - is_code = True if is_codes[aux.src_addr.n + idx + 1] == FQ(1) else False byte = instruction.bytecode_lookup( - RLC(aux.code.hash(), instruction.randomness), + aux.code_hash, aux.src_addr + idx, - is_code, ) buffer_reader.constrain_byte(idx, byte) @@ -51,8 +47,16 @@ def copy_code_to_memory(instruction: Instruction): instruction.constrain_equal(next_aux.dst_addr, aux.dst_addr + copied_bytes) instruction.constrain_equal(next_aux.bytes_left + copied_bytes, aux.bytes_left) instruction.constrain_equal(next_aux.src_addr_end, aux.src_addr_end) - instruction.constrain_equal(FQ(next_aux.code.hash()), FQ(aux.code.hash())) + instruction.constrain_equal(next_aux.code_hash, aux.code_hash) instruction.constrain_step_state_transition( rw_counter=Transition.delta(instruction.rw_counter_offset), + call_id=Transition.same(), + is_root=Transition.same(), + is_create=Transition.same(), + code_source=Transition.same(), + program_counter=Transition.same(), + stack_pointer=Transition.same(), + memory_size=Transition.same(), + state_write_counter=Transition.same(), ) diff --git a/src/zkevm_specs/evm/execution/memory_copy.py b/src/zkevm_specs/evm/execution/memory_copy.py index d83bb129e..7cac7f618 100644 --- a/src/zkevm_specs/evm/execution/memory_copy.py +++ b/src/zkevm_specs/evm/execution/memory_copy.py @@ -46,4 +46,12 @@ def copy_to_memory(instruction: Instruction): instruction.constrain_step_state_transition( rw_counter=Transition.delta(instruction.rw_counter_offset), + call_id=Transition.same(), + is_root=Transition.same(), + is_create=Transition.same(), + code_source=Transition.same(), + program_counter=Transition.same(), + stack_pointer=Transition.same(), + memory_size=Transition.same(), + state_write_counter=Transition.same(), ) diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index cb707b16d..84b694983 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -396,10 +396,10 @@ def tx_log_lookup(self, field_tag: TxLogFieldTag, index: int = 0) -> Expression: return value def bytecode_lookup( - self, bytecode_hash: Expression, index: Expression, is_code: bool + self, bytecode_hash: Expression, index: Expression, is_code: Expression = None ) -> Expression: return self.tables.bytecode_lookup( - bytecode_hash, FQ(BytecodeFieldTag.Byte), index, FQ(is_code) + bytecode_hash, FQ(BytecodeFieldTag.Byte), index, is_code ).value def bytecode_length(self, bytecode_hash: Expression) -> Expression: @@ -424,7 +424,7 @@ def opcode_lookup_at(self, index: FQ, is_code: bool) -> FQ: "The opcode source when is_root and is_create (root creation call) is not determined yet" ) else: - return self.bytecode_lookup(self.curr.code_source, index, is_code).expr() + return self.bytecode_lookup(self.curr.code_source, index, FQ(is_code)).expr() def rw_lookup( self, diff --git a/src/zkevm_specs/evm/step.py b/src/zkevm_specs/evm/step.py index d34646374..6c53aaf79 100644 --- a/src/zkevm_specs/evm/step.py +++ b/src/zkevm_specs/evm/step.py @@ -128,7 +128,7 @@ class CopyCodeToMemoryAuxData: dst_addr: FQ bytes_left: FQ src_addr_end: FQ - code: Bytecode + code_hash: RLC def __init__( self, @@ -136,10 +136,10 @@ def __init__( dst_addr: int, bytes_left: int, src_addr_end: int, - code: Bytecode, + code_hash: RLC, ): self.src_addr = FQ(src_addr) self.dst_addr = FQ(dst_addr) self.bytes_left = FQ(bytes_left) self.src_addr_end = FQ(src_addr_end) - self.code = code + self.code_hash = code_hash diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index 0e7d82e9a..d9fac98c7 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -180,7 +180,6 @@ class AccountFieldTag(IntEnum): Nonce = auto() Balance = auto() CodeHash = auto() - CodeSize = auto() class CallContextFieldTag(IntEnum): @@ -388,14 +387,15 @@ def bytecode_lookup( bytecode_hash: Expression, field_tag: Expression, index: Expression, - is_code: Expression, + is_code: Expression = None, ) -> BytecodeTableRow: query = { "bytecode_hash": bytecode_hash, "field_tag": field_tag, "index": index, - "is_code": is_code, } + if is_code is not None: + query["is_code"] = is_code return _lookup(BytecodeTableRow, self.bytecode_table, query) def rw_lookup( diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index 59e629592..ff1608742 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -18,13 +18,14 @@ ) from zkevm_specs.util import ( GAS_COST_COPY, + FQ, MAX_N_BYTES_COPY_CODE_TO_MEMORY, MEMORY_EXPANSION_LINEAR_COEFF, MEMORY_EXPANSION_QUAD_DENOMINATOR, - rand_address, - rand_fq, RLC, U64, + rand_address, + rand_fq, ) @@ -69,13 +70,14 @@ def make_copy_code_step( program_counter: int, stack_pointer: int, memory_size: int, + randomness: FQ, ) -> StepState: aux_data = CopyCodeToMemoryAuxData( src_addr=src_addr, dst_addr=dst_addr, src_addr_end=src_addr_end, bytes_left=bytes_left, - code=code, + code_hash=RLC(code.hash(), randomness), ) step = StepState( execution_state=ExecutionState.CopyCodeToMemory, @@ -107,6 +109,7 @@ def make_copy_code_steps( program_counter: int, stack_pointer: int, memory_size: int, + randomness: FQ, ) -> Sequence[StepState]: buffer_map = dict(zip(range(src_addr, len(code.code)), code.code)) steps = [] @@ -124,6 +127,7 @@ def make_copy_code_steps( program_counter, stack_pointer, memory_size, + randomness, ) steps.append(new_step) src_addr += MAX_N_BYTES_COPY_CODE_TO_MEMORY @@ -219,8 +223,9 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): length, rw_dictionary=rw_dictionary, program_counter=100, - memory_size=next_memory_word_size, stack_pointer=1024, + memory_size=next_memory_word_size, + randomness=randomness, ) steps.extend(steps_internal) @@ -235,8 +240,9 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): call_id=CALL_ID, is_root=True, code_source=code_source, - program_counter=33, + program_counter=100, stack_pointer=1024, + memory_size=next_memory_word_size, gas_left=0, ) ) @@ -289,6 +295,7 @@ def test_copy_code_to_memory(src_addr: U64, dst_addr: U64, length: U64): program_counter=0, memory_size=next_memory_word_size, stack_pointer=1024, + randomness=randomness, ) steps.append( StepState( From f7df2b0d1ca4a682d9d89c16e42753d4287221bc Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Tue, 22 Mar 2022 00:24:00 +0800 Subject: [PATCH 15/16] fix: code source from current instruction --- src/zkevm_specs/evm/execution/codecopy.py | 8 ++------ src/zkevm_specs/evm/execution/copy_code_to_memory.py | 4 ++-- src/zkevm_specs/evm/step.py | 6 +++--- src/zkevm_specs/evm/table.py | 3 +-- tests/evm/test_codecopy.py | 7 +------ 5 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/zkevm_specs/evm/execution/codecopy.py b/src/zkevm_specs/evm/execution/codecopy.py index 3c3d98326..bea246a0b 100644 --- a/src/zkevm_specs/evm/execution/codecopy.py +++ b/src/zkevm_specs/evm/execution/codecopy.py @@ -17,11 +17,7 @@ def codecopy(instruction: Instruction): memory_offset, size = instruction.memory_offset_and_length(memory_offset_word, size_word) code_offset = instruction.rlc_to_fq_exact(code_offset_word, N_BYTES_MEMORY_ADDRESS) - account = instruction.call_context_lookup(CallContextFieldTag.CalleeAddress) - code_hash = instruction.account_read(account, AccountFieldTag.CodeHash) - instruction.constrain_equal(code_hash, instruction.curr.code_source) - - code_size = instruction.bytecode_length(code_hash) + code_size = instruction.bytecode_length(instruction.curr.code_source) next_memory_size, memory_expansion_gas_cost = instruction.memory_expansion_dynamic_length( memory_offset, size @@ -39,7 +35,7 @@ def codecopy(instruction: Instruction): instruction.constrain_equal(next_aux.dst_addr, memory_offset) instruction.constrain_equal(next_aux.src_addr_end, code_size) instruction.constrain_equal(next_aux.bytes_left, size) - instruction.constrain_equal(next_aux.code_hash, code_hash) + instruction.constrain_equal(next_aux.code_source, instruction.curr.code_source) instruction.step_state_transition_in_same_context( opcode, diff --git a/src/zkevm_specs/evm/execution/copy_code_to_memory.py b/src/zkevm_specs/evm/execution/copy_code_to_memory.py index 5ae0ad2e0..2cec47410 100644 --- a/src/zkevm_specs/evm/execution/copy_code_to_memory.py +++ b/src/zkevm_specs/evm/execution/copy_code_to_memory.py @@ -20,7 +20,7 @@ def copy_code_to_memory(instruction: Instruction): for idx in range(MAX_N_BYTES_COPY_CODE_TO_MEMORY): if buffer_reader.read_flag(idx) == 1: byte = instruction.bytecode_lookup( - aux.code_hash, + aux.code_source, aux.src_addr + idx, ) buffer_reader.constrain_byte(idx, byte) @@ -47,7 +47,7 @@ def copy_code_to_memory(instruction: Instruction): instruction.constrain_equal(next_aux.dst_addr, aux.dst_addr + copied_bytes) instruction.constrain_equal(next_aux.bytes_left + copied_bytes, aux.bytes_left) instruction.constrain_equal(next_aux.src_addr_end, aux.src_addr_end) - instruction.constrain_equal(next_aux.code_hash, aux.code_hash) + instruction.constrain_equal(next_aux.code_source, aux.code_source) instruction.constrain_step_state_transition( rw_counter=Transition.delta(instruction.rw_counter_offset), diff --git a/src/zkevm_specs/evm/step.py b/src/zkevm_specs/evm/step.py index 6c53aaf79..f6dbb4faf 100644 --- a/src/zkevm_specs/evm/step.py +++ b/src/zkevm_specs/evm/step.py @@ -128,7 +128,7 @@ class CopyCodeToMemoryAuxData: dst_addr: FQ bytes_left: FQ src_addr_end: FQ - code_hash: RLC + code_source: RLC def __init__( self, @@ -136,10 +136,10 @@ def __init__( dst_addr: int, bytes_left: int, src_addr_end: int, - code_hash: RLC, + code_source: RLC, ): self.src_addr = FQ(src_addr) self.dst_addr = FQ(dst_addr) self.bytes_left = FQ(bytes_left) self.src_addr_end = FQ(src_addr_end) - self.code_hash = code_hash + self.code_source = code_source diff --git a/src/zkevm_specs/evm/table.py b/src/zkevm_specs/evm/table.py index d9fac98c7..2a60dc3b9 100644 --- a/src/zkevm_specs/evm/table.py +++ b/src/zkevm_specs/evm/table.py @@ -393,9 +393,8 @@ def bytecode_lookup( "bytecode_hash": bytecode_hash, "field_tag": field_tag, "index": index, + "is_code": is_code, } - if is_code is not None: - query["is_code"] = is_code return _lookup(BytecodeTableRow, self.bytecode_table, query) def rw_lookup( diff --git a/tests/evm/test_codecopy.py b/tests/evm/test_codecopy.py index ff1608742..9492811d0 100644 --- a/tests/evm/test_codecopy.py +++ b/tests/evm/test_codecopy.py @@ -77,7 +77,7 @@ def make_copy_code_step( dst_addr=dst_addr, src_addr_end=src_addr_end, bytes_left=bytes_left, - code_hash=RLC(code.hash(), randomness), + code_source=RLC(code.hash(), randomness), ) step = StepState( execution_state=ExecutionState.CopyCodeToMemory, @@ -139,7 +139,6 @@ def make_copy_code_steps( @pytest.mark.parametrize("src_addr, dst_addr, length", TESTING_DATA) def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): randomness = rand_fq() - callee_addr = rand_address() length_rlc = RLC(length, randomness) src_addr_rlc = RLC(src_addr, randomness) @@ -156,8 +155,6 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): ) total_gas_cost = gas_cost_codecopy + (3 * gas_cost_push32) - code_hash = code.hash() - rw_dictionary = ( RWDictionary(1) .stack_write(CALL_ID, 1023, length_rlc) @@ -166,8 +163,6 @@ def test_codecopy(src_addr: U64, dst_addr: U64, length: U64): .stack_read(CALL_ID, 1021, dst_addr_rlc) .stack_read(CALL_ID, 1022, src_addr_rlc) .stack_read(CALL_ID, 1023, length_rlc) - .call_context_read(CALL_ID, CallContextFieldTag.CalleeAddress, callee_addr) - .account_read(callee_addr, AccountFieldTag.CodeHash, RLC(code_hash, randomness)) ) # rw counter before memory writes rw_counter_interim = rw_dictionary.rw_counter From 87f58bc3ab1e34c6c0dd454ecac979a92e58a2dd Mon Sep 17 00:00:00 2001 From: Rohit Narurkar Date: Tue, 22 Mar 2022 21:51:36 +0800 Subject: [PATCH 16/16] fix: specs are updated --- specs/opcode/39CODECOPY.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/specs/opcode/39CODECOPY.md b/specs/opcode/39CODECOPY.md index f5d358b93..8cac964d8 100644 --- a/specs/opcode/39CODECOPY.md +++ b/specs/opcode/39CODECOPY.md @@ -20,7 +20,7 @@ The gadget then transits to the internal state of `CopyCodeToMemory`. 1. opId = 0x39 2. State Transitions: - - rw_counter -> rw_counter + 5 (3 stack reads, 1 call context read `CalleeAddress`, 1 account read `CodeHash`) + - rw_counter -> rw_counter + 3 (3 stack reads) - stack_pointer -> stack_pointer + 3 - pc -> pc + 1 - gas -> 3 + dynamic_cost (memory expansion and copier cost when `size > 0`) @@ -31,8 +31,6 @@ The gadget then transits to the internal state of `CopyCodeToMemory`. - `memory_offset` is at the top of the stack - `code_offset` is at the second position of the stack - `size` is at the third position of the stack - - `callee_address` is in the call context for the current call - - `code_hash` is in the account context for the `callee_address` - `code_size` from the bytecode table ## Exceptions @@ -42,4 +40,4 @@ The gadget then transits to the internal state of `CopyCodeToMemory`. ## Code -Please refer to `src/zkevm_specs/evm/execution/codecode.py` +Please refer to `src/zkevm_specs/evm/execution/codecopy.py`