diff --git a/data/transactions/logic/assembler.go b/data/transactions/logic/assembler.go index f0177dc8df..044b31e705 100644 --- a/data/transactions/logic/assembler.go +++ b/data/transactions/logic/assembler.go @@ -545,7 +545,7 @@ func asmPushBytes(ops *OpStream, spec *OpSpec, args []string) error { return nil } -func base32DecdodeAnyPadding(x string) (val []byte, err error) { +func base32DecodeAnyPadding(x string) (val []byte, err error) { val, err = base32.StdEncoding.WithPadding(base32.NoPadding).DecodeString(x) if err != nil { // try again with standard padding @@ -567,7 +567,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) { err = errors.New("byte base32 arg lacks close paren") return } - val, err = base32DecdodeAnyPadding(arg[open+1 : close]) + val, err = base32DecodeAnyPadding(arg[open+1 : close]) if err != nil { return } @@ -595,7 +595,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) { err = fmt.Errorf("need literal after 'byte %s'", arg) return } - val, err = base32DecdodeAnyPadding(args[1]) + val, err = base32DecodeAnyPadding(args[1]) if err != nil { return } @@ -1399,25 +1399,26 @@ func typecheck(expected, got StackType) bool { return expected == got } -var spaces = [256]uint8{'\t': 1, ' ': 1} +// semi-colon is quite space-like, so include it +var spaces = [256]bool{'\t': true, ' ': true, ';': true} func fieldsFromLine(line string) []string { var fields []string i := 0 - for i < len(line) && spaces[line[i]] != 0 { + for i < len(line) && spaces[line[i]] { i++ } start := i - inString := false - inBase64 := false + inString := false // tracked to allow spaces and comments inside + inBase64 := false // tracked to allow '//' inside for i < len(line) { - if spaces[line[i]] == 0 { // if not space + if !spaces[line[i]] { // if not space switch line[i] { case '"': // is a string literal? if !inString { - if i == 0 || i > 0 && spaces[line[i-1]] != 0 { + if i == 0 || i > 0 && spaces[line[i-1]] { inString = true } } else { @@ -1446,19 +1447,29 @@ func fieldsFromLine(line string) []string { i++ continue } + + // we've hit a space, end last token unless inString + if !inString { field := line[start:i] fields = append(fields, field) - if field == "base64" || field == "b64" { - inBase64 = true - } else if inBase64 { + if line[i] == ';' { + fields = append(fields, ";") + } + if inBase64 { inBase64 = false + } else if field == "base64" || field == "b64" { + inBase64 = true } } i++ + // gooble up consecutive whitespace (but notice semis) if !inString { - for i < len(line) && spaces[line[i]] != 0 { + for i < len(line) && spaces[line[i]] { + if line[i] == ';' { + fields = append(fields, ";") + } i++ } start = i @@ -1531,25 +1542,12 @@ func (ops *OpStream) trackStack(args StackTypes, returns StackTypes, instruction } } -// processFields walks through the input fields until it gets to a semi-colon -// at the end of a field at which point it returns everything prior as current -// and everything following the semicolon as next - -// fields should not be used after as processFields mangles it -// current and next do share the same array if next is not nil -func processFields(fields []string) (current, next []string) { - for i := 0; i < len(fields); i++ { - field := fields[i] - if string(field[len(field)-1]) == ";" { - field = field[0 : len(field)-1] - current = fields[:i] - if len(field) > 0 { - current = append(current, field) - } - if i+1 == len(fields) { - return current, nil - } - return current, fields[i+1:] +// processFields breaks fields into a slice of tokens up to the first +// semi-colon, and the rest. +func processFields(fields []string) (current, rest []string) { + for i, field := range fields { + if field == ";" { + return fields[:i], fields[i+1:] } } return fields, nil @@ -1565,52 +1563,28 @@ func (ops *OpStream) assemble(text string) error { for scanner.Scan() { ops.sourceLine++ line := scanner.Text() - line = strings.TrimSpace(line) - if len(line) == 0 { - ops.trace("%3d: 0 line\n", ops.sourceLine) - continue - } - if strings.HasPrefix(line, "//") { - ops.trace("%3d: // line\n", ops.sourceLine) - continue - } - if strings.HasPrefix(line, "#pragma") { - ops.trace("%3d: #pragma line\n", ops.sourceLine) - ops.pragma(line) - continue - } fields := fieldsFromLine(line) - if len(fields) == 0 { - ops.trace("%3d: no fields\n", ops.sourceLine) - continue - } - // we're about to begin processing opcodes, so settle the Version - if ops.Version == assemblerNoVersion { - ops.Version = AssemblerDefaultVersion - } - if ops.versionedPseudoOps == nil { - ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version) - } - var current []string - var next []string - next = fields - for current, next = processFields(next); len(current) > 0 || len(next) > 0; current, next = processFields(next) { + for current, next := processFields(fields); len(current) > 0 || len(next) > 0; current, next = processFields(next) { if len(current) == 0 { continue } opstring := current[0] - if len(opstring) == 0 { - continue - } if strings.HasPrefix(opstring, "//") { - // semicolon inside comment is not counted as newline + ops.trace("%3d: comment\n", ops.sourceLine) break } - currentLine := strings.Join(current, " ") - if strings.HasPrefix(currentLine, "#pragma") { + if opstring == "#pragma" { ops.trace("%3d: #pragma line\n", ops.sourceLine) - ops.pragma(currentLine) - continue + // pragma get the rest of the tokens + ops.pragma(append(current, next...)) + break + } + // we're about to begin processing opcodes, so settle the Version + if ops.Version == assemblerNoVersion { + ops.Version = AssemblerDefaultVersion + } + if ops.versionedPseudoOps == nil { + ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version) } if opstring[len(opstring)-1] == ':' { ops.createLabel(opstring[:len(opstring)-1]) @@ -1653,6 +1627,13 @@ func (ops *OpStream) assemble(text string) error { } } + if err := scanner.Err(); err != nil { + if errors.Is(err, bufio.ErrTooLong) { + err = errors.New("line too long") + } + ops.error(err) + } + // backward compatibility: do not allow jumps behind last instruction in v1 if ops.Version <= 1 { for label, dest := range ops.labels { @@ -1680,8 +1661,7 @@ func (ops *OpStream) assemble(text string) error { return nil } -func (ops *OpStream) pragma(line string) error { - fields := strings.Split(line, " ") +func (ops *OpStream) pragma(fields []string) error { if fields[0] != "#pragma" { return ops.errorf("invalid syntax: %s", fields[0]) } diff --git a/data/transactions/logic/assembler_test.go b/data/transactions/logic/assembler_test.go index 9ebd2d27b2..2178c536be 100644 --- a/data/transactions/logic/assembler_test.go +++ b/data/transactions/logic/assembler_test.go @@ -17,7 +17,6 @@ package logic import ( - "bufio" "encoding/hex" "fmt" "strings" @@ -435,29 +434,6 @@ func pseudoOp(opcode string) bool { strings.HasPrefix(opcode, "arg") } -func addSemis(s string) (ret string) { - scanner := bufio.NewScanner(strings.NewReader(s)) -scanLoop: - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - fields := fieldsFromLine(line) - for i, field := range fields { - if i == 0 && strings.HasPrefix(field, "#") { - ret += "\n" + line + "\n" - continue scanLoop - } - if strings.HasPrefix(field, "//") { - ret += line + "\n" - continue scanLoop - } - } - if len(fields) > 0 { - ret += strings.Join(fields, " ") + "; " - } - } - return -} - // Check that assembly output is stable across time. func TestAssemble(t *testing.T) { partitiontest.PartitionTest(t) @@ -484,7 +460,6 @@ func TestAssemble(t *testing.T) { } ops := testProg(t, nonsense[v], v) - opsWithSemiColons := testProg(t, addSemis(nonsense[v]), v) // check that compilation is stable over // time. we must assemble to the same bytes // this month that we did last month. @@ -492,7 +467,6 @@ func TestAssemble(t *testing.T) { // the hex is for convenience if the program has been changed. the // hex string can be copy pasted back in as a new expected result. require.Equal(t, expectedBytes, ops.Program, hex.EncodeToString(ops.Program)) - require.Equal(t, expectedBytes, opsWithSemiColons.Program, hex.EncodeToString(ops.Program)) }) } } @@ -1128,6 +1102,29 @@ func TestFieldsFromLine(t *testing.T) { require.Equal(t, "base64", fields[1]) require.Equal(t, "ABC//==", fields[2]) + line = "op base64 base64" + fields = fieldsFromLine(line) + require.Equal(t, 3, len(fields)) + require.Equal(t, "op", fields[0]) + require.Equal(t, "base64", fields[1]) + require.Equal(t, "base64", fields[2]) + + line = "op base64 base64 //comment" + fields = fieldsFromLine(line) + require.Equal(t, 3, len(fields)) + require.Equal(t, "op", fields[0]) + require.Equal(t, "base64", fields[1]) + require.Equal(t, "base64", fields[2]) + + line = "op base64 base64; op2 //done" + fields = fieldsFromLine(line) + require.Equal(t, 5, len(fields)) + require.Equal(t, "op", fields[0]) + require.Equal(t, "base64", fields[1]) + require.Equal(t, "base64", fields[2]) + require.Equal(t, ";", fields[3]) + require.Equal(t, "op2", fields[4]) + line = "op base64 ABC/==" fields = fieldsFromLine(line) require.Equal(t, 3, len(fields)) @@ -1312,6 +1309,15 @@ func TestFieldsFromLine(t *testing.T) { fields = fieldsFromLine(line) require.Equal(t, 1, len(fields)) require.Equal(t, `""`, fields[0]) + + line = "int 1; int 2" + fields = fieldsFromLine(line) + require.Equal(t, 5, len(fields)) + require.Equal(t, "int", fields[0]) + require.Equal(t, "1", fields[1]) + require.Equal(t, ";", fields[2]) + require.Equal(t, "int", fields[3]) + require.Equal(t, "2", fields[4]) } func TestAssembleRejectNegJump(t *testing.T) { @@ -2714,15 +2720,37 @@ func TestReplacePseudo(t *testing.T) { } } -func checkSame(t *testing.T, weird string, normal string) { - ops, _ := AssembleStringWithVersion(weird, 7) - otherOps, _ := AssembleStringWithVersion(normal, 7) - require.Equal(t, otherOps.Program, ops.Program) +func checkSame(t *testing.T, first string, compares ...string) { + t.Helper() + ops, err := AssembleStringWithVersion(first, 7) + require.NoError(t, err, first) + for _, compare := range compares { + other, err := AssembleStringWithVersion(compare, 7) + assert.NoError(t, err, compare) + assert.Equal(t, other.Program, ops.Program, "%s unlike %s", first, compare) + } } func TestSemiColon(t *testing.T) { partitiontest.PartitionTest(t) t.Parallel() - // Space for weird semicolon cases that might not be checked by rest of tests - checkSame(t, "pushint 0 ; pushint 1 ; +; int 3 ; *", "pushint 0\npushint 1\n+\nint 3\n*") + + checkSame(t, + "pushint 0 ; pushint 1 ; +; int 3 ; *", + "pushint 0\npushint 1\n+\nint 3\n*", + "pushint 0; pushint 1; +; int 3; *; // comment; int 2", + "pushint 0; ; ; pushint 1 ; +; int 3 ; *//check", + ) + + checkSame(t, + "#pragma version 7\nint 1", + "// junk;\n#pragma version 7\nint 1", + "// junk;\n #pragma version 7\nint 1", + ) + + checkSame(t, + `byte "test;this"; pop;`, + `byte "test;this"; ; pop;`, + `byte "test;this";;pop;`, + ) } diff --git a/data/transactions/logic/eval_test.go b/data/transactions/logic/eval_test.go index d5ccf865c2..e34815bf9b 100644 --- a/data/transactions/logic/eval_test.go +++ b/data/transactions/logic/eval_test.go @@ -3532,8 +3532,7 @@ func benchmarkBasicProgram(b *testing.B, source string) { func benchmarkOperation(b *testing.B, prefix string, operation string, suffix string) { runs := 1 + b.N/2000 inst := strings.Count(operation, ";") + strings.Count(operation, "\n") - source := prefix + ";" + strings.Repeat(operation+";", 2000) + ";" + suffix - source = strings.ReplaceAll(source, ";", "\n") + source := prefix + ";" + strings.Repeat(operation+"\n", 2000) + ";" + suffix ops := testProg(b, source, AssemblerMaxVersion) evalLoop(b, runs, ops.Program) b.ReportMetric(float64(inst), "extra/op")