-
-
Notifications
You must be signed in to change notification settings - Fork 821
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat[venom]: add venom parser (#4381)
this commit adds a frontend (parser) for venom. it uses the lark library to define a grammar for venom, and constructs an `IRContext` which can be used to emit bytecode. the entry point to the venom compiler is `vyper/cli/venom_main.py`. possible improvements in the future include: - make data section optional in the grammar - make the entry block optional in the grammar (a la llvm) - add asm and opcodes output formats --------- Co-authored-by: Harry Kalogirou <[email protected]> Co-authored-by: Charles Cooper <[email protected]>
- Loading branch information
1 parent
c951dea
commit 537313b
Showing
12 changed files
with
560 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,275 @@ | ||
from vyper.venom.basicblock import IRBasicBlock, IRInstruction, IRLabel, IRLiteral, IRVariable | ||
from vyper.venom.context import IRContext | ||
from vyper.venom.function import IRFunction | ||
from vyper.venom.parser import parse_venom | ||
|
||
# TODO: Refactor tests with these helpers | ||
|
||
|
||
def instructions_eq(i1: IRInstruction, i2: IRInstruction) -> bool: | ||
return i1.output == i2.output and i1.opcode == i2.opcode and i1.operands == i2.operands | ||
|
||
|
||
def assert_bb_eq(bb1: IRBasicBlock, bb2: IRBasicBlock): | ||
assert bb1.label.value == bb2.label.value | ||
assert len(bb1.instructions) == len(bb2.instructions) | ||
for i1, i2 in zip(bb1.instructions, bb2.instructions): | ||
assert instructions_eq(i1, i2), f"[{i1}] != [{i2}]" | ||
|
||
|
||
def assert_fn_eq(fn1: IRFunction, fn2: IRFunction): | ||
assert fn1.name.value == fn2.name.value | ||
assert fn1.last_variable == fn2.last_variable | ||
assert len(fn1._basic_block_dict) == len(fn2._basic_block_dict) | ||
|
||
for name1, bb1 in fn1._basic_block_dict.items(): | ||
assert name1 in fn2._basic_block_dict | ||
assert_bb_eq(bb1, fn2._basic_block_dict[name1]) | ||
|
||
# check function entry is the same | ||
assert fn1.entry.label == fn2.entry.label | ||
|
||
|
||
def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext): | ||
assert ctx1.last_label == ctx2.last_label | ||
assert len(ctx1.functions) == len(ctx2.functions) | ||
for label1, fn1 in ctx1.functions.items(): | ||
assert label1 in ctx2.functions | ||
assert_fn_eq(fn1, ctx2.functions[label1]) | ||
|
||
# check entry function is the same | ||
assert next(iter(ctx1.functions.keys())) == next(iter(ctx2.functions.keys())) | ||
|
||
assert len(ctx1.data_segment) == len(ctx2.data_segment) | ||
for d1, d2 in zip(ctx1.data_segment, ctx2.data_segment): | ||
assert instructions_eq(d1, d2), f"data: [{d1}] != [{d2}]" | ||
|
||
|
||
def test_single_bb(): | ||
source = """ | ||
function main { | ||
main: | ||
stop | ||
} | ||
[data] | ||
""" | ||
|
||
parsed_ctx = parse_venom(source) | ||
|
||
expected_ctx = IRContext() | ||
expected_ctx.add_function(main_fn := IRFunction(IRLabel("main"))) | ||
main_bb = main_fn.get_basic_block("main") | ||
main_bb.append_instruction("stop") | ||
|
||
assert_ctx_eq(parsed_ctx, expected_ctx) | ||
|
||
|
||
def test_multi_bb_single_fn(): | ||
source = """ | ||
function start { | ||
start: | ||
%1 = callvalue | ||
jnz @fine, @has_callvalue, %1 | ||
fine: | ||
%2 = calldataload 4 | ||
%4 = add %2, 279387 | ||
return %2, %4 | ||
has_callvalue: | ||
revert 0, 0 | ||
} | ||
[data] | ||
""" | ||
|
||
parsed_ctx = parse_venom(source) | ||
|
||
expected_ctx = IRContext() | ||
expected_ctx.add_function(start_fn := IRFunction(IRLabel("start"))) | ||
|
||
start_bb = start_fn.get_basic_block("start") | ||
start_bb.append_instruction("callvalue", ret=IRVariable("1")) | ||
start_bb.append_instruction("jnz", IRVariable("1"), IRLabel("has_callvalue"), IRLabel("fine")) | ||
|
||
start_fn.append_basic_block(fine_bb := IRBasicBlock(IRLabel("fine"), start_fn)) | ||
fine_bb.append_instruction("calldataload", IRLiteral(4), ret=IRVariable("2")) | ||
fine_bb.append_instruction("add", IRLiteral(279387), IRVariable("2"), ret=IRVariable("4")) | ||
fine_bb.append_instruction("return", IRVariable("4"), IRVariable("2")) | ||
|
||
has_callvalue_bb = IRBasicBlock(IRLabel("has_callvalue"), start_fn) | ||
start_fn.append_basic_block(has_callvalue_bb) | ||
has_callvalue_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) | ||
has_callvalue_bb.append_instruction("stop") | ||
|
||
start_fn.last_variable = 4 | ||
|
||
assert_ctx_eq(parsed_ctx, expected_ctx) | ||
|
||
|
||
def test_data_section(): | ||
parsed_ctx = parse_venom( | ||
""" | ||
function entry { | ||
entry: | ||
stop | ||
} | ||
[data] | ||
dbname @selector_buckets | ||
db @selector_bucket_0 | ||
db @fallback | ||
db @selector_bucket_2 | ||
db @selector_bucket_3 | ||
db @fallback | ||
db @selector_bucket_5 | ||
db @selector_bucket_6 | ||
""" | ||
) | ||
|
||
expected_ctx = IRContext() | ||
expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) | ||
entry_fn.get_basic_block("entry").append_instruction("stop") | ||
|
||
expected_ctx.data_segment = [ | ||
IRInstruction("dbname", [IRLabel("selector_buckets")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_0")]), | ||
IRInstruction("db", [IRLabel("fallback")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_2")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_3")]), | ||
IRInstruction("db", [IRLabel("fallback")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_5")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_6")]), | ||
] | ||
|
||
assert_ctx_eq(parsed_ctx, expected_ctx) | ||
|
||
|
||
def test_multi_function(): | ||
parsed_ctx = parse_venom( | ||
""" | ||
function entry { | ||
entry: | ||
invoke @check_cv | ||
jmp @wow | ||
wow: | ||
mstore 0, 1 | ||
return 0, 32 | ||
} | ||
function check_cv { | ||
check_cv: | ||
%1 = callvalue | ||
%2 = param | ||
jnz @no_value, @has_value, %1 | ||
no_value: | ||
ret %2 | ||
has_value: | ||
revert 0, 0 | ||
} | ||
[data] | ||
""" | ||
) | ||
|
||
expected_ctx = IRContext() | ||
expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) | ||
|
||
entry_bb = entry_fn.get_basic_block("entry") | ||
entry_bb.append_instruction("invoke", IRLabel("check_cv")) | ||
entry_bb.append_instruction("jmp", IRLabel("wow")) | ||
|
||
entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) | ||
wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) | ||
wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) | ||
|
||
expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) | ||
|
||
check_entry_bb = check_fn.get_basic_block("check_cv") | ||
check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) | ||
check_entry_bb.append_instruction("param", ret=IRVariable("2")) | ||
check_entry_bb.append_instruction( | ||
"jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") | ||
) | ||
check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) | ||
no_value_bb.append_instruction("ret", IRVariable("2")) | ||
|
||
check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) | ||
value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) | ||
value_bb.append_instruction("stop") | ||
|
||
check_fn.last_variable = 2 | ||
|
||
assert_ctx_eq(parsed_ctx, expected_ctx) | ||
|
||
|
||
def test_multi_function_and_data(): | ||
parsed_ctx = parse_venom( | ||
""" | ||
function entry { | ||
entry: | ||
invoke @check_cv | ||
jmp @wow | ||
wow: | ||
mstore 0, 1 | ||
return 0, 32 | ||
} | ||
function check_cv { | ||
check_cv: | ||
%1 = callvalue | ||
%2 = param | ||
jnz @no_value, @has_value, %1 | ||
no_value: | ||
ret %2 | ||
has_value: | ||
revert 0, 0 | ||
} | ||
[data] | ||
dbname @selector_buckets | ||
db @selector_bucket_0 | ||
db @fallback | ||
db @selector_bucket_2 | ||
db @selector_bucket_3 | ||
db @selector_bucket_6 | ||
""" | ||
) | ||
|
||
expected_ctx = IRContext() | ||
expected_ctx.add_function(entry_fn := IRFunction(IRLabel("entry"))) | ||
|
||
entry_bb = entry_fn.get_basic_block("entry") | ||
entry_bb.append_instruction("invoke", IRLabel("check_cv")) | ||
entry_bb.append_instruction("jmp", IRLabel("wow")) | ||
|
||
entry_fn.append_basic_block(wow_bb := IRBasicBlock(IRLabel("wow"), entry_fn)) | ||
wow_bb.append_instruction("mstore", IRLiteral(1), IRLiteral(0)) | ||
wow_bb.append_instruction("return", IRLiteral(32), IRLiteral(0)) | ||
|
||
expected_ctx.add_function(check_fn := IRFunction(IRLabel("check_cv"))) | ||
|
||
check_entry_bb = check_fn.get_basic_block("check_cv") | ||
check_entry_bb.append_instruction("callvalue", ret=IRVariable("1")) | ||
check_entry_bb.append_instruction("param", ret=IRVariable("2")) | ||
check_entry_bb.append_instruction( | ||
"jnz", IRVariable("1"), IRLabel("has_value"), IRLabel("no_value") | ||
) | ||
check_fn.append_basic_block(no_value_bb := IRBasicBlock(IRLabel("no_value"), check_fn)) | ||
no_value_bb.append_instruction("ret", IRVariable("2")) | ||
|
||
check_fn.append_basic_block(value_bb := IRBasicBlock(IRLabel("has_value"), check_fn)) | ||
value_bb.append_instruction("revert", IRLiteral(0), IRLiteral(0)) | ||
value_bb.append_instruction("stop") | ||
|
||
check_fn.last_variable = 2 | ||
|
||
expected_ctx.data_segment = [ | ||
IRInstruction("dbname", [IRLabel("selector_buckets")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_0")]), | ||
IRInstruction("db", [IRLabel("fallback")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_2")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_3")]), | ||
IRInstruction("db", [IRLabel("selector_bucket_6")]), | ||
] | ||
|
||
assert_ctx_eq(parsed_ctx, expected_ctx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
import sys | ||
|
||
import vyper | ||
import vyper.evm.opcodes as evm | ||
from vyper.compiler.phases import generate_bytecode | ||
from vyper.compiler.settings import OptimizationLevel, Settings, set_global_settings | ||
from vyper.venom import generate_assembly_experimental, run_passes_on | ||
from vyper.venom.parser import parse_venom | ||
|
||
""" | ||
Standalone entry point into venom compiler. Parses venom input and emits | ||
bytecode. | ||
""" | ||
|
||
|
||
def _parse_cli_args(): | ||
return _parse_args(sys.argv[1:]) | ||
|
||
|
||
def _parse_args(argv: list[str]): | ||
parser = argparse.ArgumentParser( | ||
description="Venom EVM IR parser & compiler", formatter_class=argparse.RawTextHelpFormatter | ||
) | ||
parser.add_argument("input_file", help="Venom sourcefile", nargs="?") | ||
parser.add_argument("--version", action="version", version=vyper.__long_version__) | ||
parser.add_argument( | ||
"--evm-version", | ||
help=f"Select desired EVM version (default {evm.DEFAULT_EVM_VERSION})", | ||
choices=list(evm.EVM_VERSIONS), | ||
dest="evm_version", | ||
) | ||
parser.add_argument( | ||
"--stdin", action="store_true", help="whether to pull venom input from stdin" | ||
) | ||
|
||
args = parser.parse_args(argv) | ||
|
||
if args.evm_version is not None: | ||
set_global_settings(Settings(evm_version=args.evm_version)) | ||
|
||
if args.stdin: | ||
if not sys.stdin.isatty(): | ||
venom_source = sys.stdin.read() | ||
else: | ||
# No input provided | ||
print("Error: --stdin flag used but no input provided") | ||
sys.exit(1) | ||
else: | ||
if args.input_file is None: | ||
print("Error: No input file provided, either use --stdin or provide a path") | ||
sys.exit(1) | ||
with open(args.input_file, "r") as f: | ||
venom_source = f.read() | ||
|
||
ctx = parse_venom(venom_source) | ||
run_passes_on(ctx, OptimizationLevel.default()) | ||
asm = generate_assembly_experimental(ctx) | ||
bytecode = generate_bytecode(asm, compiler_metadata=None) | ||
print(f"0x{bytecode.hex()}") | ||
|
||
|
||
if __name__ == "__main__": | ||
_parse_args(sys.argv[1:]) |
Oops, something went wrong.