Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 51 additions & 71 deletions data/transactions/logic/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func asmPushBytes(ops *OpStream, spec *OpSpec, args []string) error {
return nil
}

func base32DecdodeAnyPadding(x string) (val []byte, err error) {
func base32DecodeAnyPadding(x string) (val []byte, err error) {
val, err = base32.StdEncoding.WithPadding(base32.NoPadding).DecodeString(x)
if err != nil {
// try again with standard padding
Expand All @@ -567,7 +567,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
err = errors.New("byte base32 arg lacks close paren")
return
}
val, err = base32DecdodeAnyPadding(arg[open+1 : close])
val, err = base32DecodeAnyPadding(arg[open+1 : close])
if err != nil {
return
}
Expand Down Expand Up @@ -595,7 +595,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
err = fmt.Errorf("need literal after 'byte %s'", arg)
return
}
val, err = base32DecdodeAnyPadding(args[1])
val, err = base32DecodeAnyPadding(args[1])
if err != nil {
return
}
Expand Down Expand Up @@ -1399,25 +1399,26 @@ func typecheck(expected, got StackType) bool {
return expected == got
}

var spaces = [256]uint8{'\t': 1, ' ': 1}
// semi-colon is quite space-like, so include it
var spaces = [256]bool{'\t': true, ' ': true, ';': true}
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this change, but I will say it does mean that we lose the ability to make some weird macros like a macro beginning with a semicolon as we will now separate the semicolon and macro into two distinct tokens. I think that's fine but just something to keep track of

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth keeping in mind, but I can't think of an macro that would want to terminate the last "line" from the beginning.

Copy link
Copy Markdown
Collaborator Author

@jannotti jannotti Aug 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, but it might not be as bad as you're thinking. Although ";" is its own token, since "# directives" will get the whole rest of the line, I think "#define x y; z" will still cause x to become "x ; z"


func fieldsFromLine(line string) []string {
var fields []string

i := 0
for i < len(line) && spaces[line[i]] != 0 {
for i < len(line) && spaces[line[i]] {
i++
}

start := i
inString := false
inBase64 := false
inString := false // tracked to allow spaces and comments inside
inBase64 := false // tracked to allow '//' inside
for i < len(line) {
if spaces[line[i]] == 0 { // if not space
if !spaces[line[i]] { // if not space
switch line[i] {
case '"': // is a string literal?
if !inString {
if i == 0 || i > 0 && spaces[line[i-1]] != 0 {
if i == 0 || i > 0 && spaces[line[i-1]] {
inString = true
}
} else {
Expand Down Expand Up @@ -1446,19 +1447,29 @@ func fieldsFromLine(line string) []string {
i++
continue
}

// we've hit a space, end last token unless inString

if !inString {
field := line[start:i]
fields = append(fields, field)
if field == "base64" || field == "b64" {
inBase64 = true
} else if inBase64 {
if line[i] == ';' {
fields = append(fields, ";")
}
if inBase64 {
inBase64 = false
} else if field == "base64" || field == "b64" {
inBase64 = true
}
}
i++

// gooble up consecutive whitespace (but notice semis)
if !inString {
for i < len(line) && spaces[line[i]] != 0 {
for i < len(line) && spaces[line[i]] {
if line[i] == ';' {
fields = append(fields, ";")
}
i++
}
start = i
Expand Down Expand Up @@ -1531,25 +1542,12 @@ func (ops *OpStream) trackStack(args StackTypes, returns StackTypes, instruction
}
}

// processFields walks through the input fields until it gets to a semi-colon
// at the end of a field at which point it returns everything prior as current
// and everything following the semicolon as next

// fields should not be used after as processFields mangles it
// current and next do share the same array if next is not nil
func processFields(fields []string) (current, next []string) {
for i := 0; i < len(fields); i++ {
field := fields[i]
if string(field[len(field)-1]) == ";" {
field = field[0 : len(field)-1]
current = fields[:i]
if len(field) > 0 {
current = append(current, field)
}
if i+1 == len(fields) {
return current, nil
}
return current, fields[i+1:]
// processFields breaks fields into a slice of tokens up to the first
// semi-colon, and the rest.
func processFields(fields []string) (current, rest []string) {
for i, field := range fields {
if field == ";" {
return fields[:i], fields[i+1:]
}
}
return fields, nil
Expand All @@ -1565,52 +1563,28 @@ func (ops *OpStream) assemble(text string) error {
for scanner.Scan() {
ops.sourceLine++
line := scanner.Text()
line = strings.TrimSpace(line)
if len(line) == 0 {
ops.trace("%3d: 0 line\n", ops.sourceLine)
continue
}
if strings.HasPrefix(line, "//") {
ops.trace("%3d: // line\n", ops.sourceLine)
continue
}
if strings.HasPrefix(line, "#pragma") {
ops.trace("%3d: #pragma line\n", ops.sourceLine)
ops.pragma(line)
continue
}
fields := fieldsFromLine(line)
if len(fields) == 0 {
ops.trace("%3d: no fields\n", ops.sourceLine)
continue
}
// we're about to begin processing opcodes, so settle the Version
if ops.Version == assemblerNoVersion {
ops.Version = AssemblerDefaultVersion
}
if ops.versionedPseudoOps == nil {
ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
}
var current []string
var next []string
next = fields
for current, next = processFields(next); len(current) > 0 || len(next) > 0; current, next = processFields(next) {
for current, next := processFields(fields); len(current) > 0 || len(next) > 0; current, next = processFields(next) {
if len(current) == 0 {
continue
}
opstring := current[0]
if len(opstring) == 0 {
continue
}
if strings.HasPrefix(opstring, "//") {
// semicolon inside comment is not counted as newline
ops.trace("%3d: comment\n", ops.sourceLine)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is actually an impossible case that came from when I had a different implementation (prior to what I PR'd), but I'll think about it a bit more

break
}
currentLine := strings.Join(current, " ")
if strings.HasPrefix(currentLine, "#pragma") {
if opstring == "#pragma" {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may break some of the tests, but I think a pretty reasonable rule would be that # directives have to go on their own line. Any strong thoughts on that?

Copy link
Copy Markdown
Collaborator Author

@jannotti jannotti Aug 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rule now is "If you have a #pragma, it consumes the rest of the line, no matter what" so as we make that true of all "# directives" that should handle most of the ideas I've thought of, where the expansion contains semi-colons.

I could see only allowing it at the start of lines though.

ops.trace("%3d: #pragma line\n", ops.sourceLine)
ops.pragma(currentLine)
continue
// pragma get the rest of the tokens
ops.pragma(append(current, next...))
break
}
// we're about to begin processing opcodes, so settle the Version
if ops.Version == assemblerNoVersion {
ops.Version = AssemblerDefaultVersion
}
if ops.versionedPseudoOps == nil {
ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
}
if opstring[len(opstring)-1] == ':' {
ops.createLabel(opstring[:len(opstring)-1])
Expand Down Expand Up @@ -1653,6 +1627,13 @@ func (ops *OpStream) assemble(text string) error {
}
}

if err := scanner.Err(); err != nil {
if errors.Is(err, bufio.ErrTooLong) {
err = errors.New("line too long")
}
ops.error(err)
}

// backward compatibility: do not allow jumps behind last instruction in v1
if ops.Version <= 1 {
for label, dest := range ops.labels {
Expand Down Expand Up @@ -1680,8 +1661,7 @@ func (ops *OpStream) assemble(text string) error {
return nil
}

func (ops *OpStream) pragma(line string) error {
fields := strings.Split(line, " ")
func (ops *OpStream) pragma(fields []string) error {
if fields[0] != "#pragma" {
return ops.errorf("invalid syntax: %s", fields[0])
}
Expand Down
92 changes: 60 additions & 32 deletions data/transactions/logic/assembler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package logic

import (
"bufio"
"encoding/hex"
"fmt"
"strings"
Expand Down Expand Up @@ -435,29 +434,6 @@ func pseudoOp(opcode string) bool {
strings.HasPrefix(opcode, "arg")
}

func addSemis(s string) (ret string) {
scanner := bufio.NewScanner(strings.NewReader(s))
scanLoop:
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
fields := fieldsFromLine(line)
for i, field := range fields {
if i == 0 && strings.HasPrefix(field, "#") {
ret += "\n" + line + "\n"
continue scanLoop
}
if strings.HasPrefix(field, "//") {
ret += line + "\n"
continue scanLoop
}
}
if len(fields) > 0 {
ret += strings.Join(fields, " ") + "; "
}
}
return
}

// Check that assembly output is stable across time.
func TestAssemble(t *testing.T) {
partitiontest.PartitionTest(t)
Expand All @@ -484,15 +460,13 @@ func TestAssemble(t *testing.T) {
}

ops := testProg(t, nonsense[v], v)
opsWithSemiColons := testProg(t, addSemis(nonsense[v]), v)
// check that compilation is stable over
// time. we must assemble to the same bytes
// this month that we did last month.
expectedBytes, _ := hex.DecodeString(compiled[v])
// the hex is for convenience if the program has been changed. the
// hex string can be copy pasted back in as a new expected result.
require.Equal(t, expectedBytes, ops.Program, hex.EncodeToString(ops.Program))
require.Equal(t, expectedBytes, opsWithSemiColons.Program, hex.EncodeToString(ops.Program))
})
}
}
Expand Down Expand Up @@ -1128,6 +1102,29 @@ func TestFieldsFromLine(t *testing.T) {
require.Equal(t, "base64", fields[1])
require.Equal(t, "ABC//==", fields[2])

line = "op base64 base64"
fields = fieldsFromLine(line)
require.Equal(t, 3, len(fields))
require.Equal(t, "op", fields[0])
require.Equal(t, "base64", fields[1])
require.Equal(t, "base64", fields[2])

line = "op base64 base64 //comment"
fields = fieldsFromLine(line)
require.Equal(t, 3, len(fields))
require.Equal(t, "op", fields[0])
require.Equal(t, "base64", fields[1])
require.Equal(t, "base64", fields[2])

line = "op base64 base64; op2 //done"
fields = fieldsFromLine(line)
require.Equal(t, 5, len(fields))
require.Equal(t, "op", fields[0])
require.Equal(t, "base64", fields[1])
require.Equal(t, "base64", fields[2])
require.Equal(t, ";", fields[3])
require.Equal(t, "op2", fields[4])

line = "op base64 ABC/=="
fields = fieldsFromLine(line)
require.Equal(t, 3, len(fields))
Expand Down Expand Up @@ -1312,6 +1309,15 @@ func TestFieldsFromLine(t *testing.T) {
fields = fieldsFromLine(line)
require.Equal(t, 1, len(fields))
require.Equal(t, `""`, fields[0])

line = "int 1; int 2"
fields = fieldsFromLine(line)
require.Equal(t, 5, len(fields))
require.Equal(t, "int", fields[0])
require.Equal(t, "1", fields[1])
require.Equal(t, ";", fields[2])
require.Equal(t, "int", fields[3])
require.Equal(t, "2", fields[4])
}

func TestAssembleRejectNegJump(t *testing.T) {
Expand Down Expand Up @@ -2714,15 +2720,37 @@ func TestReplacePseudo(t *testing.T) {
}
}

func checkSame(t *testing.T, weird string, normal string) {
ops, _ := AssembleStringWithVersion(weird, 7)
otherOps, _ := AssembleStringWithVersion(normal, 7)
require.Equal(t, otherOps.Program, ops.Program)
func checkSame(t *testing.T, first string, compares ...string) {
t.Helper()
ops, err := AssembleStringWithVersion(first, 7)
require.NoError(t, err, first)
for _, compare := range compares {
other, err := AssembleStringWithVersion(compare, 7)
assert.NoError(t, err, compare)
assert.Equal(t, other.Program, ops.Program, "%s unlike %s", first, compare)
}
}

func TestSemiColon(t *testing.T) {
partitiontest.PartitionTest(t)
t.Parallel()
// Space for weird semicolon cases that might not be checked by rest of tests
checkSame(t, "pushint 0 ; pushint 1 ; +; int 3 ; *", "pushint 0\npushint 1\n+\nint 3\n*")

checkSame(t,
"pushint 0 ; pushint 1 ; +; int 3 ; *",
"pushint 0\npushint 1\n+\nint 3\n*",
"pushint 0; pushint 1; +; int 3; *; // comment; int 2",
"pushint 0; ; ; pushint 1 ; +; int 3 ; *//check",
)

checkSame(t,
"#pragma version 7\nint 1",
"// junk;\n#pragma version 7\nint 1",
"// junk;\n #pragma version 7\nint 1",
)

checkSame(t,
`byte "test;this"; pop;`,
`byte "test;this"; ; pop;`,
`byte "test;this";;pop;`,
)
}
3 changes: 1 addition & 2 deletions data/transactions/logic/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3532,8 +3532,7 @@ func benchmarkBasicProgram(b *testing.B, source string) {
func benchmarkOperation(b *testing.B, prefix string, operation string, suffix string) {
runs := 1 + b.N/2000
inst := strings.Count(operation, ";") + strings.Count(operation, "\n")
source := prefix + ";" + strings.Repeat(operation+";", 2000) + ";" + suffix
source = strings.ReplaceAll(source, ";", "\n")
source := prefix + ";" + strings.Repeat(operation+"\n", 2000) + ";" + suffix
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just to make assembly take less time or was there an error?

Copy link
Copy Markdown
Collaborator Author

@jannotti jannotti Aug 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing line 3536 was just because it didn't seem like we should have to replace semi-colons anymore. But with that done, the other change turned out to be necessary. bufio.Scanner has a MaxTokenSize. Since we ask it to read entire lines, a "token" is an entire line. So, as I removed substitutions of "\n" for ";", our benchmarks had enormous lines that exceeded that limit. By adding a newline in the main repeated clause, we avoid the problem. (And, I inserted some code to check scanner.Err() in case we exceed it in the future, changing the error to "too long line" when it happens.)

ops := testProg(b, source, AssemblerMaxVersion)
evalLoop(b, runs, ops.Program)
b.ReportMetric(float64(inst), "extra/op")
Expand Down