Skip to content

Commit

Permalink
cmd/compile/internal/syntax: add BasicLit.Bad field for lexical errors
Browse files Browse the repository at this point in the history
The new (internal) field scanner.bad indicates whether a syntax error
occurred while scanning a literal; the corresponding scanner.lit
string may be syntactically incorrect in that case. Store the value
of scanner.bad together with the scanner.lit in BasicLit.

Clean up error handling so that all syntactic errors use one of the
scanner's error reporting methods which also set scanner.bad. Make
use of the new field in a few places where we used to track a prior
error separately.

Preliminary step towards fixing golang#32133 in a comprehensive manner.

Change-Id: I4d79ad6e3b50632dd5fb3fc32ca3df0598ee77b4
Reviewed-on: https://go-review.googlesource.com/c/go/+/192278
Reviewed-by: Matthew Dempsky <[email protected]>
  • Loading branch information
griesemer authored and tomocy committed Sep 1, 2019
1 parent 2b1fe99 commit 45f218f
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 44 deletions.
1 change: 1 addition & 0 deletions src/cmd/compile/internal/syntax/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ type (
BasicLit struct {
Value string
Kind LitKind
Bad bool // true means the literal Value has syntax errors
expr
}

Expand Down
15 changes: 8 additions & 7 deletions src/cmd/compile/internal/syntax/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ func (p *parser) typeDecl(group *Group) Decl {
d.Alias = p.gotAssign()
d.Type = p.typeOrNil()
if d.Type == nil {
d.Type = p.bad()
d.Type = p.badExpr()
p.syntaxError("in type declaration")
p.advance(_Semi, _Rparen)
}
Expand Down Expand Up @@ -867,7 +867,7 @@ func (p *parser) operand(keep_parens bool) Expr {
return p.type_() // othertype

default:
x := p.bad()
x := p.badExpr()
p.syntaxError("expecting expression")
p.advance(_Rparen, _Rbrack, _Rbrace)
return x
Expand Down Expand Up @@ -1083,7 +1083,7 @@ func (p *parser) type_() Expr {

typ := p.typeOrNil()
if typ == nil {
typ = p.bad()
typ = p.badExpr()
p.syntaxError("expecting type")
p.advance(_Comma, _Colon, _Semi, _Rparen, _Rbrack, _Rbrace)
}
Expand Down Expand Up @@ -1220,7 +1220,7 @@ func (p *parser) chanElem() Expr {

typ := p.typeOrNil()
if typ == nil {
typ = p.bad()
typ = p.badExpr()
p.syntaxError("missing channel element type")
// assume element type is simply absent - don't advance
}
Expand Down Expand Up @@ -1401,6 +1401,7 @@ func (p *parser) oliteral() *BasicLit {
b.pos = p.pos()
b.Value = p.lit
b.Kind = p.kind
b.Bad = p.bad
p.next()
return b
}
Expand Down Expand Up @@ -1515,7 +1516,7 @@ func (p *parser) dotsType() *DotsType {
p.want(_DotDotDot)
t.Elem = p.typeOrNil()
if t.Elem == nil {
t.Elem = p.bad()
t.Elem = p.badExpr()
p.syntaxError("final argument in variadic function missing type")
}

Expand Down Expand Up @@ -1572,7 +1573,7 @@ func (p *parser) paramList() (list []*Field) {
} else {
// par.Type == nil && typ == nil => we only have a par.Name
ok = false
t := p.bad()
t := p.badExpr()
t.pos = par.Name.Pos() // correct position
par.Type = t
}
Expand All @@ -1585,7 +1586,7 @@ func (p *parser) paramList() (list []*Field) {
return
}

func (p *parser) bad() *BadExpr {
func (p *parser) badExpr() *BadExpr {
b := new(BadExpr)
b.pos = p.pos()
return b
Expand Down
77 changes: 42 additions & 35 deletions src/cmd/compile/internal/syntax/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type scanner struct {
line, col uint
tok token
lit string // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF")
bad bool // valid if tok is _Literal, true if a syntax error occurred, lit may be incorrect
kind LitKind // valid if tok is _Literal
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
Expand All @@ -47,10 +48,20 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
s.nlsemi = false
}

// errorf reports an error at the most recently read character position.
func (s *scanner) errorf(format string, args ...interface{}) {
// TODO(gri) Consider using s.bad to consistently suppress multiple errors
// per token, here and below.
s.bad = true
s.error(fmt.Sprintf(format, args...))
}

// errorAtf reports an error at a byte column offset relative to the current token start.
func (s *scanner) errorAtf(offset int, format string, args ...interface{}) {
s.bad = true
s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...))
}

// next advances the scanner by reading the next token.
//
// If a read, source encoding, or lexical error occurs, next calls
Expand Down Expand Up @@ -442,6 +453,7 @@ func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {

func (s *scanner) number(c rune) {
s.startLit()
s.bad = false

base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
Expand Down Expand Up @@ -477,14 +489,14 @@ func (s *scanner) number(c rune) {
if c == '.' {
s.kind = FloatLit
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
s.errorf("invalid radix point in %s", litname(prefix))
}
c, ds = s.digits(s.getr(), base, &invalid)
digsep |= ds
}

if digsep&1 == 0 {
s.error(litname(prefix) + " has no digits")
s.errorf("%s has no digits", litname(prefix))
}

// exponent
Expand All @@ -503,10 +515,10 @@ func (s *scanner) number(c rune) {
c, ds = s.digits(c, 10, nil)
digsep |= ds
if ds&1 == 0 {
s.error("exponent has no digits")
s.errorf("exponent has no digits")
}
} else if prefix == 'x' && s.kind == FloatLit {
s.error("hexadecimal mantissa requires a 'p' exponent")
s.errorf("hexadecimal mantissa requires a 'p' exponent")
}

// suffix 'i'
Expand All @@ -521,12 +533,12 @@ func (s *scanner) number(c rune) {
s.tok = _Literal

if s.kind == IntLit && invalid >= 0 {
s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix)))
s.errorAtf(invalid, "invalid digit %q in %s", s.lit[invalid], litname(prefix))
}

if digsep&2 != 0 {
if i := invalidSep(s.lit); i >= 0 {
s.errh(s.line, s.col+uint(i), "'_' must separate successive digits")
s.errorAtf(i, "'_' must separate successive digits")
}
}
}
Expand Down Expand Up @@ -585,42 +597,38 @@ func invalidSep(x string) int {

func (s *scanner) rune() {
s.startLit()
s.bad = false

ok := true // only report errors if we're ok so far
n := 0
for ; ; n++ {
r := s.getr()
if r == '\'' {
break
}
if r == '\\' {
if !s.escape('\'') {
ok = false
}
s.escape('\'')
continue
}
if r == '\n' {
s.ungetr() // assume newline is not part of literal
if ok {
s.error("newline in character literal")
ok = false
if !s.bad {
s.errorf("newline in character literal")
}
break
}
if r < 0 {
if ok {
s.errh(s.line, s.col, "invalid character literal (missing closing ')")
ok = false
if !s.bad {
s.errorAtf(0, "invalid character literal (missing closing ')")
}
break
}
}

if ok {
if !s.bad {
if n == 0 {
s.error("empty character literal or unescaped ' in character literal")
s.errorf("empty character literal or unescaped ' in character literal")
} else if n != 1 {
s.errh(s.line, s.col, "invalid character literal (more than one character)")
s.errorAtf(0, "invalid character literal (more than one character)")
}
}

Expand All @@ -632,6 +640,7 @@ func (s *scanner) rune() {

func (s *scanner) stdString() {
s.startLit()
s.bad = false

for {
r := s.getr()
Expand All @@ -644,11 +653,11 @@ func (s *scanner) stdString() {
}
if r == '\n' {
s.ungetr() // assume newline is not part of literal
s.error("newline in string")
s.errorf("newline in string")
break
}
if r < 0 {
s.errh(s.line, s.col, "string not terminated")
s.errorAtf(0, "string not terminated")
break
}
}
Expand All @@ -661,14 +670,15 @@ func (s *scanner) stdString() {

func (s *scanner) rawString() {
s.startLit()
s.bad = false

for {
r := s.getr()
if r == '`' {
break
}
if r < 0 {
s.errh(s.line, s.col, "string not terminated")
s.errorAtf(0, "string not terminated")
break
}
}
Expand Down Expand Up @@ -741,7 +751,7 @@ func (s *scanner) skipComment(r rune) bool {
}
r = s.getr()
}
s.errh(s.line, s.col, "comment not terminated")
s.errorAtf(0, "comment not terminated")
return false
}

Expand Down Expand Up @@ -782,14 +792,14 @@ func (s *scanner) fullComment() {
}
}

func (s *scanner) escape(quote rune) bool {
func (s *scanner) escape(quote rune) {
var n int
var base, max uint32

c := s.getr()
switch c {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
return true
return
case '0', '1', '2', '3', '4', '5', '6', '7':
n, base, max = 3, 8, 255
case 'x':
Expand All @@ -803,10 +813,10 @@ func (s *scanner) escape(quote rune) bool {
n, base, max = 8, 16, unicode.MaxRune
default:
if c < 0 {
return true // complain in caller about EOF
return // complain in caller about EOF
}
s.error("unknown escape sequence")
return false
s.errorf("unknown escape sequence")
return
}

var x uint32
Expand All @@ -820,15 +830,15 @@ func (s *scanner) escape(quote rune) bool {
}
if d >= base {
if c < 0 {
return true // complain in caller about EOF
return // complain in caller about EOF
}
kind := "hex"
if base == 8 {
kind = "octal"
}
s.errorf("non-%s character in escape sequence: %c", kind, c)
s.ungetr()
return false
return
}
// d < base
x = x*base + d
Expand All @@ -838,13 +848,10 @@ func (s *scanner) escape(quote rune) bool {

if x > max && base == 8 {
s.errorf("octal escape value > 255: %d", x)
return false
return
}

if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
s.error("escape sequence is invalid Unicode code point")
return false
s.errorf("escape sequence is invalid Unicode code point %#U", x)
}

return true
}
6 changes: 5 additions & 1 deletion src/cmd/compile/internal/syntax/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,10 @@ func TestNumbers(t *testing.T) {
err = ""
s.next()

if err != "" && !s.bad {
t.Errorf("%q: got error but bad not set", test.src)
}

// compute lit where where s.lit is not defined
var lit string
switch s.tok {
Expand Down Expand Up @@ -598,7 +602,7 @@ func TestScanErrors(t *testing.T) {
{`"\x`, "string not terminated", 0, 0},
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18},

// former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/compile/internal/syntax/tokens.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func contains(tokset uint64, tok token) bool {
return tokset&(1<<tok) != 0
}

type LitKind uint
type LitKind uint8

// TODO(gri) With the 'i' (imaginary) suffix now permitted on integer
// and floating-point numbers, having a single ImagLit does
Expand Down

0 comments on commit 45f218f

Please sign in to comment.