diff --git a/specs/opcode/35CALLDATALOAD.md b/specs/opcode/35CALLDATALOAD.md new file mode 100644 index 000000000..a27bc9d36 --- /dev/null +++ b/specs/opcode/35CALLDATALOAD.md @@ -0,0 +1,41 @@ +# CALLDATALOAD opcode + +## Procedure + +The `CALLDATALOAD` opcode gets input data of current environment. + +## EVM Behaviour + +Stack input is the byte offset to read call data from. Stack output is a 32-byte value starting from the given offset of the call data. All bytes after the end of the call data are set to `0`. + +## Constraints + +1. opId == 0x35 +2. State Transition: + - if is_root_call: + - rw_counter += 3 (1 stack read, 1 call context read, 1 stack write) + - if is_internal_call: + - rw_counter += rw_counter_offset ∈ {5, 6, ..., 36, 37} (1 stack read, 3 call context reads, i ∈ {0, 1, ..., 31, 32} memory reads, 1 stack write) + - stack_pointer unchanged + - pc + 1 + - gas - 3 +3. Lookups: + - `offset` is at the top of the stack + - `tx_id` is in the RW table (call context) + - if is_root_call (where `src_addr = offset`): + - `calldata_length` is in the TX table + - i ∈ {0, 1, ..., 31, 32} lookups for `i in range(32)`: if `buffer.read_flag(i)` then the i'th byte of the element on top of the stack `calldata_word[i]` is in the TX table {tx id, call data, src_addr + i} + - if is_internal_call (where `src_addr = offset + calldata_offset`): + - `calldata_length` is in the RW table (call context) + - `calldata_offset` is in the RW table (call context) + - `caller_id` is in the RW table (call context) + - i ∈ {0, 1, ..., 31, 32} lookups for `i in range(32)`: if `buffer.read_flag(i)` then the i'th byte of the element on top of the stack `calldata_word[i]` is in the RW table {memory, src_addr + i, caller_id} + +## Exceptions + +1. Stack underflow: stack is empty, stack pointer = 1024 +2. Out of gas: remaining gas is not enough for this opcode + +## Code + +Please refer to `src/zkevm_specs/evm/execution/calldataload.py`. diff --git a/src/zkevm_specs/evm/execution/__init__.py b/src/zkevm_specs/evm/execution/__init__.py index be699cbec..6adff7800 100644 --- a/src/zkevm_specs/evm/execution/__init__.py +++ b/src/zkevm_specs/evm/execution/__init__.py @@ -15,6 +15,7 @@ from .caller import * from .callvalue import * from .calldatacopy import * +from .calldataload import * from .gas import * from .jump import * from .jumpi import * @@ -35,6 +36,7 @@ ExecutionState.CALLER: caller, ExecutionState.CALLVALUE: callvalue, ExecutionState.CALLDATACOPY: calldatacopy, + ExecutionState.CALLDATALOAD: calldataload, ExecutionState.CALLDATASIZE: calldatasize, ExecutionState.COINBASE: coinbase, ExecutionState.TIMESTAMP: timestamp, diff --git a/src/zkevm_specs/evm/execution/calldataload.py b/src/zkevm_specs/evm/execution/calldataload.py new file mode 100644 index 000000000..c2361cbc3 --- /dev/null +++ b/src/zkevm_specs/evm/execution/calldataload.py @@ -0,0 +1,57 @@ +from ..instruction import Instruction, Transition +from ..opcode import Opcode +from ..table import RW, CallContextFieldTag, TxContextFieldTag +from ..util import BufferReaderGadget +from ...util.param import N_BYTES_WORD + + +def calldataload(instruction: Instruction): + opcode = instruction.opcode_lookup(True) + instruction.constrain_equal(opcode, Opcode.CALLDATALOAD) + + # offset is the 64-bit offset to start reading 32-bytes from start of calldata. + offset = instruction.rlc_to_fq_exact(instruction.stack_pop(), n_bytes=8) + + tx_id = instruction.call_context_lookup(CallContextFieldTag.TxId, RW.Read) + + if instruction.curr.is_root: + calldata_length = instruction.tx_context_lookup(tx_id, TxContextFieldTag.CallDataLength) + calldata_offset = 0 + else: + calldata_length = instruction.call_context_lookup(CallContextFieldTag.CallDataLength) + calldata_offset = instruction.call_context_lookup(CallContextFieldTag.CallDataOffset) + caller_id = instruction.call_context_lookup(CallContextFieldTag.CallerId) + + src_addr = offset + calldata_offset + src_addr_end = calldata_length + calldata_offset + + buffer_reader = BufferReaderGadget( + instruction, N_BYTES_WORD, src_addr, src_addr_end, N_BYTES_WORD + ) + + calldata_word = [] + for idx in range(N_BYTES_WORD): + if buffer_reader.read_flag(idx): + if instruction.curr.is_root: + tx_byte = instruction.tx_calldata_lookup(tx_id, src_addr + idx) + buffer_reader.constrain_byte(idx, tx_byte) + calldata_word.append(int(tx_byte)) + else: + mem_byte = instruction.memory_lookup(RW.Read, src_addr + idx, caller_id) + buffer_reader.constrain_byte(idx, mem_byte) + calldata_word.append(int(mem_byte)) + else: + buffer_reader.constrain_byte(idx, 0) + calldata_word.append(0) + + instruction.constrain_equal( + instruction.stack_push(), + instruction.bytes_to_rlc(bytes(calldata_word)), + ) + + instruction.step_state_transition_in_same_context( + opcode, + rw_counter=Transition.delta(instruction.rw_counter_offset), + program_counter=Transition.delta(1), + stack_pointer=Transition.same(), + ) diff --git a/src/zkevm_specs/evm/instruction.py b/src/zkevm_specs/evm/instruction.py index d37b1c4a8..bf5e36a45 100644 --- a/src/zkevm_specs/evm/instruction.py +++ b/src/zkevm_specs/evm/instruction.py @@ -302,6 +302,9 @@ def word_to_lo_hi(self, word: RLC) -> Tuple[FQ, FQ]: def int_to_rlc(self, value: int, n_bytes: int) -> RLC: return RLC(value, self.randomness, n_bytes) + def bytes_to_rlc(self, value: bytes) -> RLC: + return RLC(value, self.randomness, len(value)) + def bytes_to_int(self, value: bytes) -> int: assert len(value) <= MAX_N_BYTES, "Too many bytes to composite an integer in field" return int.from_bytes(value, "little") diff --git a/tests/evm/test_calldataload.py b/tests/evm/test_calldataload.py new file mode 100644 index 000000000..a207217f4 --- /dev/null +++ b/tests/evm/test_calldataload.py @@ -0,0 +1,229 @@ +import pytest + +from typing import Optional +from zkevm_specs.evm import ( + Bytecode, + CallContextFieldTag, + ExecutionState, + RW, + RWTableTag, + StepState, + Tables, + Transaction, + verify_steps, +) +from zkevm_specs.util import rand_fp, RLC, U64 + +TESTING_DATA = ( + ( + bytes.fromhex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), + 0x20, + 0x00, + bytes.fromhex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), + True, + None, + ), + ( + bytes.fromhex("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"), + 0x20, + 0x1F, + bytes.fromhex("FF00000000000000000000000000000000000000000000000000000000000000"), + True, + None, + ), + ( + bytes.fromhex("a1bacf5488bfafc33bad736db41f06866eaeb35e1c1dd81dfc268357ec98563f"), + 0x20, + 0x10, + bytes.fromhex("6eaeb35e1c1dd81dfc268357ec98563f00000000000000000000000000000000"), + True, + None, + ), + ( + bytes.fromhex("a1bacf5488bfafc33bad736db41f06866eaeb35e1c1dd81dfc268357ec98563f"), + 0x20, + 0x10, + bytes.fromhex("6eaeb35e1c1dd81dfc268357ec98563f00000000000000000000000000000000"), + False, + 0x00, + ), + ( + bytes.fromhex("a1bacf5488bfafc33bad736db41f06866eaeb35e1c1dd81dfc268357ec98563fab"), + 0x20, + 0x10, + bytes.fromhex("aeb35e1c1dd81dfc268357ec98563fab00000000000000000000000000000000"), + False, + 0x01, + ), +) + + +@pytest.mark.parametrize( + "call_data, call_data_length, offset, expected_stack_top, is_root, call_data_offset", + TESTING_DATA, +) +def test_calldataload( + call_data: bytes, + call_data_length: U64, + offset: U64, + expected_stack_top: bytes, + is_root: bool, + call_data_offset: Optional[U64], +): + randomness = rand_fp() + + tx = Transaction(id=1) + if is_root: + tx.call_data = call_data + + offset_rlc = RLC(offset, randomness) + expected_stack_top = RLC(expected_stack_top, randomness) + + bytecode = Bytecode().push(offset_rlc, n_bytes=32).calldataload().stop() + bytecode_hash = RLC(bytecode.hash(), randomness) + + if is_root: + call_id = 1 + else: + call_id = 2 + parent_call_id = 1 + + rws = set( + [ + (1, RW.Write, RWTableTag.Stack, call_id, 1023, 0, offset_rlc, 0, 0, 0), + (2, RW.Read, RWTableTag.Stack, call_id, 1023, 0, offset_rlc, 0, 0, 0), + (3, RW.Read, RWTableTag.CallContext, call_id, CallContextFieldTag.TxId, 0, 1, 0, 0, 0), + ] + ) + if is_root: + rws.add((4, RW.Write, RWTableTag.Stack, call_id, 1023, 0, expected_stack_top, 0, 0, 0)) + rw_counter_stop = 5 + else: + # add to RW table call context, call data length (read) + rws.add( + ( + 4, + RW.Read, + RWTableTag.CallContext, + call_id, + CallContextFieldTag.CallDataLength, + 0, + call_data_length, + 0, + 0, + 0, + ) + ) + # add to RW table call context, call data offset (read) + rws.add( + ( + 5, + RW.Read, + RWTableTag.CallContext, + call_id, + CallContextFieldTag.CallDataOffset, + 0, + call_data_offset, + 0, + 0, + 0, + ) + ) + # add to RW table call context, caller'd ID (read) + rws.add( + ( + 6, + RW.Read, + RWTableTag.CallContext, + call_id, + CallContextFieldTag.CallerId, + 0, + parent_call_id, + 0, + 0, + 0, + ) + ) + rw_counter = 7 + # add to RW table memory (read) + for i in range(0, len(call_data)): + idx = offset + call_data_offset + i + if idx < len(call_data): + rws.add( + ( + rw_counter, + RW.Read, + RWTableTag.Memory, + parent_call_id, + idx, + 0, + call_data[idx], + 0, + 0, + 0, + ) + ) + rw_counter += 1 + # add to RW table stack (write) + rws.add( + ( + rw_counter, + RW.Write, + RWTableTag.Stack, + call_id, + 1023, + 0, + expected_stack_top, + 0, + 0, + 0, + ) + ) + rw_counter_stop = rw_counter + 1 + + tables = Tables( + block_table=set(), + tx_table=set(tx.table_assignments(randomness)), + bytecode_table=set(bytecode.table_assignments(randomness)), + rw_table=rws, + ) + + verify_steps( + randomness=randomness, + tables=tables, + steps=[ + StepState( + execution_state=ExecutionState.PUSH, + rw_counter=1, + call_id=call_id, + is_root=is_root, + is_create=False, + code_source=bytecode_hash, + program_counter=0, + stack_pointer=1024, + gas_left=6, + ), + StepState( + execution_state=ExecutionState.CALLDATALOAD, + rw_counter=2, + call_id=call_id, + is_root=is_root, + is_create=False, + code_source=bytecode_hash, + program_counter=33, + stack_pointer=1023, + gas_left=3, + ), + StepState( + execution_state=ExecutionState.STOP, + rw_counter=rw_counter_stop, + call_id=call_id, + is_root=is_root, + is_create=False, + code_source=bytecode_hash, + program_counter=34, + stack_pointer=1023, + gas_left=0, + ), + ], + ) diff --git a/tests/evm/test_calldatasize.py b/tests/evm/test_calldatasize.py index 156f7bce0..3f8a1879a 100644 --- a/tests/evm/test_calldatasize.py +++ b/tests/evm/test_calldatasize.py @@ -3,7 +3,6 @@ from zkevm_specs.evm import ( ExecutionState, StepState, - Opcode, verify_steps, Tables, RWTableTag,