iten-alg · iten-alg · Aug 9, 2022 · Aug 7, 2022 · iten-alg · Aug 7, 2022
diff --git a/data/transactions/logic/assembler.go b/data/transactions/logic/assembler.go
@@ -545,7 +545,7 @@ func asmPushBytes(ops *OpStream, spec *OpSpec, args []string) error {
 	return nil
 }
 
-func base32DecdodeAnyPadding(x string) (val []byte, err error) {
+func base32DecodeAnyPadding(x string) (val []byte, err error) {
 	val, err = base32.StdEncoding.WithPadding(base32.NoPadding).DecodeString(x)
 	if err != nil {
 		// try again with standard padding
@@ -567,7 +567,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
 			err = errors.New("byte base32 arg lacks close paren")
 			return
 		}
-		val, err = base32DecdodeAnyPadding(arg[open+1 : close])
+		val, err = base32DecodeAnyPadding(arg[open+1 : close])
 		if err != nil {
 			return
 		}
@@ -595,7 +595,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
 			err = fmt.Errorf("need literal after 'byte %s'", arg)
 			return
 		}
-		val, err = base32DecdodeAnyPadding(args[1])
+		val, err = base32DecodeAnyPadding(args[1])
 		if err != nil {
 			return
 		}
@@ -1399,25 +1399,26 @@ func typecheck(expected, got StackType) bool {
 	return expected == got
 }
 
-var spaces = [256]uint8{'\t': 1, ' ': 1}
+// semi-colon is quite space-like, so include it
+var spaces = [256]bool{'\t': true, ' ': true, ';': true}
 
 func fieldsFromLine(line string) []string {
 	var fields []string
 
 	i := 0
-	for i < len(line) && spaces[line[i]] != 0 {
+	for i < len(line) && spaces[line[i]] {
 		i++
 	}
 
 	start := i
-	inString := false
-	inBase64 := false
+	inString := false // tracked to allow spaces and comments inside
+	inBase64 := false // tracked to allow '//' inside
 	for i < len(line) {
-		if spaces[line[i]] == 0 { // if not space
+		if !spaces[line[i]] { // if not space
 			switch line[i] {
 			case '"': // is a string literal?
 				if !inString {
-					if i == 0 || i > 0 && spaces[line[i-1]] != 0 {
+					if i == 0 || i > 0 && spaces[line[i-1]] {
 						inString = true
 					}
 				} else {
@@ -1446,19 +1447,29 @@ func fieldsFromLine(line string) []string {
 			i++
 			continue
 		}
+
+		// we've hit a space, end last token unless inString
+
 		if !inString {
 			field := line[start:i]
 			fields = append(fields, field)
-			if field == "base64" || field == "b64" {
-				inBase64 = true
-			} else if inBase64 {
+			if line[i] == ';' {
+				fields = append(fields, ";")
+			}
+			if inBase64 {
 				inBase64 = false
+			} else if field == "base64" || field == "b64" {
+				inBase64 = true
 			}
 		}
 		i++
 
+		// gooble up consecutive whitespace (but notice semis)
 		if !inString {
-			for i < len(line) && spaces[line[i]] != 0 {
+			for i < len(line) && spaces[line[i]] {
+				if line[i] == ';' {
+					fields = append(fields, ";")
+				}
 				i++
 			}
 			start = i
@@ -1531,25 +1542,12 @@ func (ops *OpStream) trackStack(args StackTypes, returns StackTypes, instruction
 	}
 }
 
-// processFields walks through the input fields until it gets to a semi-colon
-// at the end of a field at which point it returns everything prior as current
-// and everything following the semicolon as next
-
-// fields should not be used after as processFields mangles it
-// current and next do share the same array if next is not nil
-func processFields(fields []string) (current, next []string) {
-	for i := 0; i < len(fields); i++ {
-		field := fields[i]
-		if string(field[len(field)-1]) == ";" {
-			field = field[0 : len(field)-1]
-			current = fields[:i]
-			if len(field) > 0 {
-				current = append(current, field)
-			}
-			if i+1 == len(fields) {
-				return current, nil
-			}
-			return current, fields[i+1:]
+// processFields breaks fields into a slice of tokens up to the first
+// semi-colon, and the rest.
+func processFields(fields []string) (current, rest []string) {
+	for i, field := range fields {
+		if field == ";" {
+			return fields[:i], fields[i+1:]
 		}
 	}
 	return fields, nil
@@ -1565,52 +1563,28 @@ func (ops *OpStream) assemble(text string) error {
 	for scanner.Scan() {
 		ops.sourceLine++
 		line := scanner.Text()
-		line = strings.TrimSpace(line)
-		if len(line) == 0 {
-			ops.trace("%3d: 0 line\n", ops.sourceLine)
-			continue
-		}
-		if strings.HasPrefix(line, "//") {
-			ops.trace("%3d: // line\n", ops.sourceLine)
-			continue
-		}
-		if strings.HasPrefix(line, "#pragma") {
-			ops.trace("%3d: #pragma line\n", ops.sourceLine)
-			ops.pragma(line)
-			continue
-		}
 		fields := fieldsFromLine(line)
-		if len(fields) == 0 {
-			ops.trace("%3d: no fields\n", ops.sourceLine)
-			continue
-		}
-		// we're about to begin processing opcodes, so settle the Version
-		if ops.Version == assemblerNoVersion {
-			ops.Version = AssemblerDefaultVersion
-		}
-		if ops.versionedPseudoOps == nil {
-			ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
-		}
-		var current []string
-		var next []string
-		next = fields
-		for current, next = processFields(next); len(current) > 0 || len(next) > 0; current, next = processFields(next) {
+		for current, next := processFields(fields); len(current) > 0 || len(next) > 0; current, next = processFields(next) {
 			if len(current) == 0 {
 				continue
 			}
 			opstring := current[0]
-			if len(opstring) == 0 {
-				continue
-			}
 			if strings.HasPrefix(opstring, "//") {
-				// semicolon inside comment is not counted as newline
+				ops.trace("%3d: comment\n", ops.sourceLine)
 				break
 			}
-			currentLine := strings.Join(current, " ")
-			if strings.HasPrefix(currentLine, "#pragma") {
+			if opstring == "#pragma" {
 				ops.trace("%3d: #pragma line\n", ops.sourceLine)
-				ops.pragma(currentLine)
-				continue
+				// pragma get the rest of the tokens
+				ops.pragma(append(current, next...))
+				break
+			}
+			// we're about to begin processing opcodes, so settle the Version
+			if ops.Version == assemblerNoVersion {
+				ops.Version = AssemblerDefaultVersion
+			}
+			if ops.versionedPseudoOps == nil {
+				ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
 			}
 			if opstring[len(opstring)-1] == ':' {
 				ops.createLabel(opstring[:len(opstring)-1])
@@ -1653,6 +1627,13 @@ func (ops *OpStream) assemble(text string) error {
 		}
 	}
 
+	if err := scanner.Err(); err != nil {
+		if errors.Is(err, bufio.ErrTooLong) {
+			err = errors.New("line too long")
+		}
+		ops.error(err)
+	}
+
 	// backward compatibility: do not allow jumps behind last instruction in v1
 	if ops.Version <= 1 {
 		for label, dest := range ops.labels {
@@ -1680,8 +1661,7 @@ func (ops *OpStream) assemble(text string) error {
 	return nil
 }
 
-func (ops *OpStream) pragma(line string) error {
-	fields := strings.Split(line, " ")
+func (ops *OpStream) pragma(fields []string) error {
 	if fields[0] != "#pragma" {
 		return ops.errorf("invalid syntax: %s", fields[0])
 	}

diff --git a/data/transactions/logic/assembler_test.go b/data/transactions/logic/assembler_test.go
@@ -17,7 +17,6 @@
 package logic
 
 import (
-	"bufio"
 	"encoding/hex"
 	"fmt"
 	"strings"
@@ -435,29 +434,6 @@ func pseudoOp(opcode string) bool {
 		strings.HasPrefix(opcode, "arg")
 }
 
-func addSemis(s string) (ret string) {
-	scanner := bufio.NewScanner(strings.NewReader(s))
-scanLoop:
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		fields := fieldsFromLine(line)
-		for i, field := range fields {
-			if i == 0 && strings.HasPrefix(field, "#") {
-				ret += "\n" + line + "\n"
-				continue scanLoop
-			}
-			if strings.HasPrefix(field, "//") {
-				ret += line + "\n"
-				continue scanLoop
-			}
-		}
-		if len(fields) > 0 {
-			ret += strings.Join(fields, " ") + "; "
-		}
-	}
-	return
-}
-
 // Check that assembly output is stable across time.
 func TestAssemble(t *testing.T) {
 	partitiontest.PartitionTest(t)
@@ -484,15 +460,13 @@ func TestAssemble(t *testing.T) {
 			}
 
 			ops := testProg(t, nonsense[v], v)
-			opsWithSemiColons := testProg(t, addSemis(nonsense[v]), v)
 			// check that compilation is stable over
 			// time. we must assemble to the same bytes
 			// this month that we did last month.
 			expectedBytes, _ := hex.DecodeString(compiled[v])
 			// the hex is for convenience if the program has been changed. the
 			// hex string can be copy pasted back in as a new expected result.
 			require.Equal(t, expectedBytes, ops.Program, hex.EncodeToString(ops.Program))
-			require.Equal(t, expectedBytes, opsWithSemiColons.Program, hex.EncodeToString(ops.Program))
 		})
 	}
 }
@@ -1128,6 +1102,29 @@ func TestFieldsFromLine(t *testing.T) {
 	require.Equal(t, "base64", fields[1])
 	require.Equal(t, "ABC//==", fields[2])
 
+	line = "op base64 base64"
+	fields = fieldsFromLine(line)
+	require.Equal(t, 3, len(fields))
+	require.Equal(t, "op", fields[0])
+	require.Equal(t, "base64", fields[1])
+	require.Equal(t, "base64", fields[2])
+
+	line = "op base64 base64 //comment"
+	fields = fieldsFromLine(line)
+	require.Equal(t, 3, len(fields))
+	require.Equal(t, "op", fields[0])
+	require.Equal(t, "base64", fields[1])
+	require.Equal(t, "base64", fields[2])
+
+	line = "op base64 base64; op2 //done"
+	fields = fieldsFromLine(line)
+	require.Equal(t, 5, len(fields))
+	require.Equal(t, "op", fields[0])
+	require.Equal(t, "base64", fields[1])
+	require.Equal(t, "base64", fields[2])
+	require.Equal(t, ";", fields[3])
+	require.Equal(t, "op2", fields[4])
+
 	line = "op base64 ABC/=="
 	fields = fieldsFromLine(line)
 	require.Equal(t, 3, len(fields))
@@ -1312,6 +1309,15 @@ func TestFieldsFromLine(t *testing.T) {
 	fields = fieldsFromLine(line)
 	require.Equal(t, 1, len(fields))
 	require.Equal(t, `""`, fields[0])
+
+	line = "int 1; int 2"
+	fields = fieldsFromLine(line)
+	require.Equal(t, 5, len(fields))
+	require.Equal(t, "int", fields[0])
+	require.Equal(t, "1", fields[1])
+	require.Equal(t, ";", fields[2])
+	require.Equal(t, "int", fields[3])
+	require.Equal(t, "2", fields[4])
 }
 
 func TestAssembleRejectNegJump(t *testing.T) {
@@ -2714,15 +2720,37 @@ func TestReplacePseudo(t *testing.T) {
 	}
 }
 
-func checkSame(t *testing.T, weird string, normal string) {
-	ops, _ := AssembleStringWithVersion(weird, 7)
-	otherOps, _ := AssembleStringWithVersion(normal, 7)
-	require.Equal(t, otherOps.Program, ops.Program)
+func checkSame(t *testing.T, first string, compares ...string) {
+	t.Helper()
+	ops, err := AssembleStringWithVersion(first, 7)
+	require.NoError(t, err, first)
+	for _, compare := range compares {
+		other, err := AssembleStringWithVersion(compare, 7)
+		assert.NoError(t, err, compare)
+		assert.Equal(t, other.Program, ops.Program, "%s unlike %s", first, compare)
+	}
 }
 
 func TestSemiColon(t *testing.T) {
 	partitiontest.PartitionTest(t)
 	t.Parallel()
-	// Space for weird semicolon cases that might not be checked by rest of tests
-	checkSame(t, "pushint 0 ; pushint 1 ; +; int 3 ; *", "pushint 0\npushint 1\n+\nint 3\n*")
+
+	checkSame(t,
+		"pushint 0 ; pushint 1 ; +; int 3 ; *",
+		"pushint 0\npushint 1\n+\nint 3\n*",
+		"pushint 0; pushint 1; +; int 3; *; // comment; int 2",
+		"pushint 0; ; ; pushint 1 ; +; int 3 ; *//check",
+	)
+
+	checkSame(t,
+		"#pragma version 7\nint 1",
+		"// junk;\n#pragma version 7\nint 1",
+		"// junk;\n #pragma version 7\nint 1",
+	)
+
+	checkSame(t,
+		`byte "test;this"; pop;`,
+		`byte "test;this"; ; pop;`,
+		`byte "test;this";;pop;`,
+	)
 }
diff --git a/data/transactions/logic/eval_test.go b/data/transactions/logic/eval_test.go
@@ -3532,8 +3532,7 @@ func benchmarkBasicProgram(b *testing.B, source string) {
 func benchmarkOperation(b *testing.B, prefix string, operation string, suffix string) {
 	runs := 1 + b.N/2000
 	inst := strings.Count(operation, ";") + strings.Count(operation, "\n")
-	source := prefix + ";" + strings.Repeat(operation+";", 2000) + ";" + suffix
-	source = strings.ReplaceAll(source, ";", "\n")
+	source := prefix + ";" + strings.Repeat(operation+"\n", 2000) + ";" + suffix
 	ops := testProg(b, source, AssemblerMaxVersion)
 	evalLoop(b, runs, ops.Program)
 	b.ReportMetric(float64(inst), "extra/op")