Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat[venom]: add venom parser #4381

Merged
merged 32 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b50f4f4
feat[venom]: frontend (WIP)
Philogy Nov 28, 2024
a219d53
run passes
Philogy Nov 28, 2024
b98d86d
remove unused `last_label`
Philogy Nov 28, 2024
919952c
have last var set
Philogy Nov 28, 2024
6d15bda
nit: improve error
Philogy Nov 29, 2024
6044c3b
nit: top->bottom operand order
Philogy Nov 29, 2024
e55184a
add evm version
Philogy Nov 29, 2024
ea855e8
update docs
Philogy Nov 29, 2024
dbb2c7c
remove print
Philogy Nov 29, 2024
3f7c0f4
Merge branch 'master' into master
harkal Nov 29, 2024
f9689f5
cleanup
Philogy Nov 30, 2024
60e03fd
implement most nits
Philogy Dec 3, 2024
d46738c
make stdin more explicit
Philogy Dec 3, 2024
4d8fc3f
remove unnecessary digit bypass
Philogy Dec 3, 2024
a7a326a
further improvements
Philogy Dec 5, 2024
1b6c5ab
Merge branch 'vyperlang:master' into master
Philogy Dec 5, 2024
717c2ff
cleanup bb dict
Philogy Dec 8, 2024
dd56501
handle potential suffix collision
Philogy Dec 8, 2024
4a6826d
add instruction item to grammar
Philogy Dec 10, 2024
f4009c1
rename grammar nodes
Philogy Dec 11, 2024
5c12f07
wip: basic venom parser tests
Philogy Dec 11, 2024
cd07e8d
remove grammar ambiguity
Philogy Dec 12, 2024
6f52778
finish tests
Philogy Dec 12, 2024
f7705b0
back to curly
Philogy Dec 14, 2024
5bb1d0a
factor out a function
charles-cooper Dec 14, 2024
3e52019
fix lint
charles-cooper Dec 14, 2024
a165475
style, comments
charles-cooper Dec 14, 2024
aecb545
simplify a test function
charles-cooper Dec 14, 2024
a806aaa
add additional checks for test function/ctx equality
charles-cooper Dec 14, 2024
659ee0d
Merge branch 'master' into master
charles-cooper Dec 14, 2024
6995f28
rename file
charles-cooper Dec 14, 2024
990c219
add note on compiling venom
charles-cooper Dec 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def _global_version(version):
"asttokens>=2.0.5,<3",
"pycryptodome>=3.5.1,<4",
"packaging>=23.1,<24",
"lark>=1.0.0,<2",
"importlib-metadata",
"wheel",
],
Expand All @@ -105,6 +106,7 @@ def _global_version(version):
"vyper=vyper.cli.vyper_compile:_parse_cli_args",
"fang=vyper.cli.vyper_ir:_parse_cli_args",
"vyper-json=vyper.cli.vyper_json:_parse_cli_args",
"venom=vyper.cli.vyper_venom:_parse_cli_args",
]
},
classifiers=[
Expand Down
61 changes: 61 additions & 0 deletions vyper/cli/vyper_venom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python3
import argparse
import sys
import vyper
from vyper.venom.parser import parse_venom
from vyper.venom import generate_assembly_experimental, run_passes_on
from vyper.compiler.settings import OptimizationLevel, set_global_settings, Settings
from vyper.compiler.phases import generate_bytecode
import vyper.evm.opcodes as evm


def _parse_cli_args():
return _parse_args(sys.argv[1:])


def _parse_args(argv: list[str]):
parser = argparse.ArgumentParser(
description="Venom EVM IR parser & compiler", formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("input_file", help="Venom sourcefile", nargs="?")
parser.add_argument("--version", action="version", version=vyper.__long_version__)
parser.add_argument(
"--evm-version",
help=f"Select desired EVM version (default {evm.DEFAULT_EVM_VERSION})",
choices=list(evm.EVM_VERSIONS),
dest="evm_version",
)
parser.add_argument(
"--stdin",
action="store_true",
help="whether to pull venom input from stdin"
)

args = parser.parse_args(argv)

if args.evm_version is not None:
set_global_settings(Settings(evm_version=args.evm_version))

if args.stdin:
if not sys.stdin.isatty():
venom_source = sys.stdin.read()
else:
# No input provided
print("Error: --stdin flag used but no input provided")
sys.exit(1)
else:
if args.input_file is None:
print("Error: No input file provided, either use --stdin or provide a path")
sys.exit(1)
with open(args.input_file, "r") as f:
venom_source = f.read()

ctx = parse_venom(venom_source)
run_passes_on(ctx, OptimizationLevel.default())
asm = generate_assembly_experimental(ctx)
bytecode = generate_bytecode(asm, compiler_metadata=None)
print(f"0x{bytecode.hex()}")


if __name__ == "__main__":
_parse_args(sys.argv[1:])
Philogy marked this conversation as resolved.
Show resolved Hide resolved
78 changes: 29 additions & 49 deletions vyper/venom/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,59 +29,39 @@ Venom employs two scopes: global and function level.
### Example code

```llvm
IRFunction: global

global:
%1 = calldataload 0
%2 = shr 224, %1
jmp label %selector_bucket_0

selector_bucket_0:
%3 = xor %2, 1579456981
%4 = iszero %3
jnz label %1, label %2, %4

1: IN=[selector_bucket_0] OUT=[9]
jmp label %fallback

2:
%5 = callvalue
%6 = calldatasize
%7 = lt %6, 164
%8 = or %5, %7
%9 = iszero %8
assert %9
stop

fallback:
revert 0, 0
fn global => {
Philogy marked this conversation as resolved.
Show resolved Hide resolved
global:
%1 = calldataload 0
%2 = shr 224, %1
jmp @selector_bucket_0

selector_bucket_0:
%3 = xor %2, 1579456981
%4 = iszero %3
jnz @1, @2, %4

1:
jmp @fallback

2:
%5 = callvalue
%6 = calldatasize
%7 = lt %6, 164
%8 = or %5, %7
%9 = iszero %8
assert %9
stop

fallback:
revert 0, 0
}

data:
```

### Grammar

Below is a (not-so-complete) grammar to describe the text format of Venom IR:

```llvm
program ::= function_declaration*

function_declaration ::= "IRFunction:" identifier input_list? output_list? "=>" block

input_list ::= "IN=" "[" (identifier ("," identifier)*)? "]"
output_list ::= "OUT=" "[" (identifier ("," identifier)*)? "]"

block ::= label ":" input_list? output_list? "=>{" operation* "}"

operation ::= "%" identifier "=" opcode operand ("," operand)*
| opcode operand ("," operand)*

opcode ::= "calldataload" | "shr" | "shl" | "and" | "add" | "codecopy" | "mload" | "jmp" | "xor" | "iszero" | "jnz" | "label" | "lt" | "or" | "assert" | "callvalue" | "calldatasize" | "alloca" | "calldatacopy" | "invoke" | "gt" | ...

operand ::= "%" identifier | label | integer | "label" "%" identifier
label ::= "%" identifier

identifier ::= [a-zA-Z_][a-zA-Z0-9_]*
integer ::= [0-9]+
```
To see a definition of grammar see the [venom parser](./parser.py)

## Implementation

Expand Down
8 changes: 6 additions & 2 deletions vyper/venom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,14 @@ def _run_passes(fn: IRFunction, optimize: OptimizationLevel) -> None:
DFTPass(ac, fn).run_pass()


def run_passes_on(ctx: IRContext, optimize: OptimizationLevel):
for fn in ctx.functions.values():
_run_passes(fn, optimize)


def generate_ir(ir: IRnode, optimize: OptimizationLevel) -> IRContext:
# Convert "old" IR to "new" IR
ctx = ir_node_to_venom(ir)
for fn in ctx.functions.values():
_run_passes(fn, optimize)
run_passes_on(ctx, optimize)

return ctx
2 changes: 1 addition & 1 deletion vyper/venom/analysis/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def analyze(self) -> None:
bb.is_reachable = False

for bb in fn.get_basic_blocks():
assert bb.is_terminated
assert bb.is_terminated, f"not terminating:\n{bb}"

term = bb.instructions[-1]
if term.opcode in CFG_ALTERING_INSTRUCTIONS:
Expand Down
2 changes: 1 addition & 1 deletion vyper/venom/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def remove_phi_operand(self, label: IRLabel) -> None:
assert self.opcode == "phi", "instruction must be a phi"
for i in range(0, len(self.operands), 2):
if self.operands[i] == label:
del self.operands[i : i + 2]
del self.operands[i: i + 2]
return

def get_ast_source(self) -> Optional[IRnode]:
Expand Down
2 changes: 0 additions & 2 deletions vyper/venom/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class IRFunction:
name: IRLabel # symbol name
ctx: "IRContext" # type: ignore # noqa: F821
args: list
last_label: int
last_variable: int
_basic_block_dict: dict[str, IRBasicBlock]

Expand Down Expand Up @@ -182,7 +181,6 @@ def chain_basic_blocks(self) -> None:
def copy(self):
new = IRFunction(self.name)
new._basic_block_dict = self._basic_block_dict.copy()
new.last_label = self.last_label
new.last_variable = self.last_variable
return new

Expand Down
165 changes: 165 additions & 0 deletions vyper/venom/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from vyper.venom.context import IRContext
from vyper.venom.basicblock import (
IRLabel,
IRVariable,
IRLiteral,
IROperand,
IRInstruction,
IRBasicBlock,
)
from vyper.venom.function import IRFunction
from lark import Lark, Transformer


VENOM_PARSER = Lark(
"""
%import common.CNAME
%import common.DIGIT
%import common.LETTER
%import common.WS
%import common.INT

start: function* data_section
function: "function" NAME "=>" "{" block* "}"
data_section: "data:" call*

block: NAME ":" (call | assignment)*

assignment: VAR_IDENT "=" expr
expr: call | CONST
call: OPCODE operands_list

operands_list: (operand ("," operand)*)?

operand: VAR_IDENT | CONST | LABEL

CONST: INT
OPCODE: CNAME
VAR_IDENT: "%" INT (":" INT)?
LABEL: "@" NAME
NAME: (DIGIT|LETTER|"_")+

%ignore WS
"""
)


def _set_last_var(fn: IRFunction):
for block in fn.get_basic_blocks():
output_vars = (
instruction.output
for instruction in block.instructions
if instruction.output is not None
)
for output_var in output_vars:
assert isinstance(output_var, IRVariable)
value = output_var.value
assert value.startswith("%")
fn.last_variable = max(fn.last_variable, int(value[1:]))


def _set_last_label(ctx: IRContext):
for fn in ctx.functions.values():
for bb in fn.get_basic_blocks():
label = bb.label.value
if label.isdigit():
Philogy marked this conversation as resolved.
Show resolved Hide resolved
ctx.last_label = max(int(label), ctx.last_label)


class VenomTransformer(Transformer):
def start(self, children) -> IRContext:
ctx = IRContext()
funcs = children[:-1]
data_section = children[-1]
for fn_name, blocks in funcs:
fn = ctx.create_function(fn_name)
for block_name, instructions in blocks:
# Get default function block if entry bb
if block_name == fn_name:
Philogy marked this conversation as resolved.
Show resolved Hide resolved
bb = fn.get_basic_block(block_name)
else:
bb = IRBasicBlock(IRLabel(block_name), fn)
fn.append_basic_block(bb)

for instruction in instructions:
assert isinstance(instruction, IRInstruction)
bb.insert_instruction(instruction)

# Since "revert" is not considered terminal explicitly check for it to ensure basic
# blocks are terminating
if not bb.is_terminated:
has_revert = any(
instruction.opcode == 'revert'
for instruction in bb.instructions
)
if has_revert:
bb.append_instruction('stop')

_set_last_var(fn)
_set_last_label(ctx)

ctx.data_segment = data_section

return ctx

def function(self, children) -> tuple[str, list[tuple[str, list[IRInstruction]]]]:
name, *blocks = children
return name, blocks

def data_section(self, children):
return children

def block(self, children) -> tuple[str, list[IRInstruction]]:
label, *instructions = children
return label, instructions

def assignment(self, children) -> IRInstruction:
to, value = children
if isinstance(value, IRInstruction):
value.output = to
return value
if isinstance(value, IRLiteral):
return IRInstruction("store", [value], output=to)
raise TypeError(f"Unexpected value {value} of type {type(value)}")

def expr(self, children):
return children[0]

def call(self, children) -> IRInstruction:
name, operands = children
# reverse operands because top->bottom is more intuitive but Venom does bottom->top
return IRInstruction(name, reversed(operands))

def operands_list(self, children) -> list[IROperand]:
return children

def operand(self, children) -> IROperand:
return children[0]

def LABEL(self, label) -> IRLabel:
return IRLabel(label[1:])

def VAR_IDENT(self, var_ident) -> IRVariable:
parts = var_ident[1:].split(":", maxsplit=1)
Philogy marked this conversation as resolved.
Show resolved Hide resolved
assert 1 <= len(parts) <= 2
varname = parts[0]
version = None
if len(parts) > 1:
version = parts[1]
return IRVariable(varname, version=version)

def CONST(self, val) -> IRLiteral:
return IRLiteral(int(val))

def CNAME(self, val) -> str:
return val.value

def NAME(self, val) -> str:
return val.value


def parse_venom(source: str) -> IRContext:
tree = VENOM_PARSER.parse(source)
ctx = VenomTransformer().transform(tree)
assert isinstance(ctx, IRContext)
return ctx
2 changes: 1 addition & 1 deletion vyper/venom/passes/sccp/sccp.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _handle_SSA_work_item(self, work_item: SSAWorkListItem):
self._visit_expr(work_item.inst)

def _lookup_from_lattice(self, op: IROperand) -> LatticeItem:
assert isinstance(op, IRVariable), "Can't get lattice for non-variable"
assert isinstance(op, IRVariable), f"Can't get lattice for non-variable ({op})"
lat = self.lattice[op]
assert lat is not None, f"Got undefined var {op}"
return lat
Expand Down