Skip to content

Commit 117400e

Browse files
committed
cmd/compile/internal/syntax: add BasicLit.Bad field for lexical errors
The new (internal) field scanner.bad indicates whether a syntax error occurred while scanning a literal; the corresponding scanner.lit string may be syntactically incorrect in that case. Store the value of scanner.bad together with the scanner.lit in BasicLit. Clean up error handling so that all syntactic errors use one of the scanner's error reporting methods which also set scanner.bad. Make use of the new field in a few places where we used to track a prior error separately. Preliminary step towards fixing #32133 in a comprehensive manner. Change-Id: I4d79ad6e3b50632dd5fb3fc32ca3df0598ee77b4 Reviewed-on: https://go-review.googlesource.com/c/go/+/192278 Reviewed-by: Matthew Dempsky <[email protected]>
1 parent bf36219 commit 117400e

File tree

5 files changed

+57
-44
lines changed

5 files changed

+57
-44
lines changed

src/cmd/compile/internal/syntax/nodes.go

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ type (
139139
BasicLit struct {
140140
Value string
141141
Kind LitKind
142+
Bad bool // true means the literal Value has syntax errors
142143
expr
143144
}
144145

src/cmd/compile/internal/syntax/parser.go

+8-7
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ func (p *parser) typeDecl(group *Group) Decl {
550550
d.Alias = p.gotAssign()
551551
d.Type = p.typeOrNil()
552552
if d.Type == nil {
553-
d.Type = p.bad()
553+
d.Type = p.badExpr()
554554
p.syntaxError("in type declaration")
555555
p.advance(_Semi, _Rparen)
556556
}
@@ -867,7 +867,7 @@ func (p *parser) operand(keep_parens bool) Expr {
867867
return p.type_() // othertype
868868

869869
default:
870-
x := p.bad()
870+
x := p.badExpr()
871871
p.syntaxError("expecting expression")
872872
p.advance(_Rparen, _Rbrack, _Rbrace)
873873
return x
@@ -1083,7 +1083,7 @@ func (p *parser) type_() Expr {
10831083

10841084
typ := p.typeOrNil()
10851085
if typ == nil {
1086-
typ = p.bad()
1086+
typ = p.badExpr()
10871087
p.syntaxError("expecting type")
10881088
p.advance(_Comma, _Colon, _Semi, _Rparen, _Rbrack, _Rbrace)
10891089
}
@@ -1220,7 +1220,7 @@ func (p *parser) chanElem() Expr {
12201220

12211221
typ := p.typeOrNil()
12221222
if typ == nil {
1223-
typ = p.bad()
1223+
typ = p.badExpr()
12241224
p.syntaxError("missing channel element type")
12251225
// assume element type is simply absent - don't advance
12261226
}
@@ -1401,6 +1401,7 @@ func (p *parser) oliteral() *BasicLit {
14011401
b.pos = p.pos()
14021402
b.Value = p.lit
14031403
b.Kind = p.kind
1404+
b.Bad = p.bad
14041405
p.next()
14051406
return b
14061407
}
@@ -1515,7 +1516,7 @@ func (p *parser) dotsType() *DotsType {
15151516
p.want(_DotDotDot)
15161517
t.Elem = p.typeOrNil()
15171518
if t.Elem == nil {
1518-
t.Elem = p.bad()
1519+
t.Elem = p.badExpr()
15191520
p.syntaxError("final argument in variadic function missing type")
15201521
}
15211522

@@ -1572,7 +1573,7 @@ func (p *parser) paramList() (list []*Field) {
15721573
} else {
15731574
// par.Type == nil && typ == nil => we only have a par.Name
15741575
ok = false
1575-
t := p.bad()
1576+
t := p.badExpr()
15761577
t.pos = par.Name.Pos() // correct position
15771578
par.Type = t
15781579
}
@@ -1585,7 +1586,7 @@ func (p *parser) paramList() (list []*Field) {
15851586
return
15861587
}
15871588

1588-
func (p *parser) bad() *BadExpr {
1589+
func (p *parser) badExpr() *BadExpr {
15891590
b := new(BadExpr)
15901591
b.pos = p.pos()
15911592
return b

src/cmd/compile/internal/syntax/scanner.go

+42-35
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ type scanner struct {
3636
line, col uint
3737
tok token
3838
lit string // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF")
39+
bad bool // valid if tok is _Literal, true if a syntax error occurred, lit may be incorrect
3940
kind LitKind // valid if tok is _Literal
4041
op Operator // valid if tok is _Operator, _AssignOp, or _IncOp
4142
prec int // valid if tok is _Operator, _AssignOp, or _IncOp
@@ -47,10 +48,20 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
4748
s.nlsemi = false
4849
}
4950

51+
// errorf reports an error at the most recently read character position.
5052
func (s *scanner) errorf(format string, args ...interface{}) {
53+
// TODO(gri) Consider using s.bad to consistently suppress multiple errors
54+
// per token, here and below.
55+
s.bad = true
5156
s.error(fmt.Sprintf(format, args...))
5257
}
5358

59+
// errorAtf reports an error at a byte column offset relative to the current token start.
60+
func (s *scanner) errorAtf(offset int, format string, args ...interface{}) {
61+
s.bad = true
62+
s.errh(s.line, s.col+uint(offset), fmt.Sprintf(format, args...))
63+
}
64+
5465
// next advances the scanner by reading the next token.
5566
//
5667
// If a read, source encoding, or lexical error occurs, next calls
@@ -442,6 +453,7 @@ func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
442453

443454
func (s *scanner) number(c rune) {
444455
s.startLit()
456+
s.bad = false
445457

446458
base := 10 // number base
447459
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
@@ -477,14 +489,14 @@ func (s *scanner) number(c rune) {
477489
if c == '.' {
478490
s.kind = FloatLit
479491
if prefix == 'o' || prefix == 'b' {
480-
s.error("invalid radix point in " + litname(prefix))
492+
s.errorf("invalid radix point in %s", litname(prefix))
481493
}
482494
c, ds = s.digits(s.getr(), base, &invalid)
483495
digsep |= ds
484496
}
485497

486498
if digsep&1 == 0 {
487-
s.error(litname(prefix) + " has no digits")
499+
s.errorf("%s has no digits", litname(prefix))
488500
}
489501

490502
// exponent
@@ -503,10 +515,10 @@ func (s *scanner) number(c rune) {
503515
c, ds = s.digits(c, 10, nil)
504516
digsep |= ds
505517
if ds&1 == 0 {
506-
s.error("exponent has no digits")
518+
s.errorf("exponent has no digits")
507519
}
508520
} else if prefix == 'x' && s.kind == FloatLit {
509-
s.error("hexadecimal mantissa requires a 'p' exponent")
521+
s.errorf("hexadecimal mantissa requires a 'p' exponent")
510522
}
511523

512524
// suffix 'i'
@@ -521,12 +533,12 @@ func (s *scanner) number(c rune) {
521533
s.tok = _Literal
522534

523535
if s.kind == IntLit && invalid >= 0 {
524-
s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix)))
536+
s.errorAtf(invalid, "invalid digit %q in %s", s.lit[invalid], litname(prefix))
525537
}
526538

527539
if digsep&2 != 0 {
528540
if i := invalidSep(s.lit); i >= 0 {
529-
s.errh(s.line, s.col+uint(i), "'_' must separate successive digits")
541+
s.errorAtf(i, "'_' must separate successive digits")
530542
}
531543
}
532544
}
@@ -585,42 +597,38 @@ func invalidSep(x string) int {
585597

586598
func (s *scanner) rune() {
587599
s.startLit()
600+
s.bad = false
588601

589-
ok := true // only report errors if we're ok so far
590602
n := 0
591603
for ; ; n++ {
592604
r := s.getr()
593605
if r == '\'' {
594606
break
595607
}
596608
if r == '\\' {
597-
if !s.escape('\'') {
598-
ok = false
599-
}
609+
s.escape('\'')
600610
continue
601611
}
602612
if r == '\n' {
603613
s.ungetr() // assume newline is not part of literal
604-
if ok {
605-
s.error("newline in character literal")
606-
ok = false
614+
if !s.bad {
615+
s.errorf("newline in character literal")
607616
}
608617
break
609618
}
610619
if r < 0 {
611-
if ok {
612-
s.errh(s.line, s.col, "invalid character literal (missing closing ')")
613-
ok = false
620+
if !s.bad {
621+
s.errorAtf(0, "invalid character literal (missing closing ')")
614622
}
615623
break
616624
}
617625
}
618626

619-
if ok {
627+
if !s.bad {
620628
if n == 0 {
621-
s.error("empty character literal or unescaped ' in character literal")
629+
s.errorf("empty character literal or unescaped ' in character literal")
622630
} else if n != 1 {
623-
s.errh(s.line, s.col, "invalid character literal (more than one character)")
631+
s.errorAtf(0, "invalid character literal (more than one character)")
624632
}
625633
}
626634

@@ -632,6 +640,7 @@ func (s *scanner) rune() {
632640

633641
func (s *scanner) stdString() {
634642
s.startLit()
643+
s.bad = false
635644

636645
for {
637646
r := s.getr()
@@ -644,11 +653,11 @@ func (s *scanner) stdString() {
644653
}
645654
if r == '\n' {
646655
s.ungetr() // assume newline is not part of literal
647-
s.error("newline in string")
656+
s.errorf("newline in string")
648657
break
649658
}
650659
if r < 0 {
651-
s.errh(s.line, s.col, "string not terminated")
660+
s.errorAtf(0, "string not terminated")
652661
break
653662
}
654663
}
@@ -661,14 +670,15 @@ func (s *scanner) stdString() {
661670

662671
func (s *scanner) rawString() {
663672
s.startLit()
673+
s.bad = false
664674

665675
for {
666676
r := s.getr()
667677
if r == '`' {
668678
break
669679
}
670680
if r < 0 {
671-
s.errh(s.line, s.col, "string not terminated")
681+
s.errorAtf(0, "string not terminated")
672682
break
673683
}
674684
}
@@ -741,7 +751,7 @@ func (s *scanner) skipComment(r rune) bool {
741751
}
742752
r = s.getr()
743753
}
744-
s.errh(s.line, s.col, "comment not terminated")
754+
s.errorAtf(0, "comment not terminated")
745755
return false
746756
}
747757

@@ -782,14 +792,14 @@ func (s *scanner) fullComment() {
782792
}
783793
}
784794

785-
func (s *scanner) escape(quote rune) bool {
795+
func (s *scanner) escape(quote rune) {
786796
var n int
787797
var base, max uint32
788798

789799
c := s.getr()
790800
switch c {
791801
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
792-
return true
802+
return
793803
case '0', '1', '2', '3', '4', '5', '6', '7':
794804
n, base, max = 3, 8, 255
795805
case 'x':
@@ -803,10 +813,10 @@ func (s *scanner) escape(quote rune) bool {
803813
n, base, max = 8, 16, unicode.MaxRune
804814
default:
805815
if c < 0 {
806-
return true // complain in caller about EOF
816+
return // complain in caller about EOF
807817
}
808-
s.error("unknown escape sequence")
809-
return false
818+
s.errorf("unknown escape sequence")
819+
return
810820
}
811821

812822
var x uint32
@@ -820,15 +830,15 @@ func (s *scanner) escape(quote rune) bool {
820830
}
821831
if d >= base {
822832
if c < 0 {
823-
return true // complain in caller about EOF
833+
return // complain in caller about EOF
824834
}
825835
kind := "hex"
826836
if base == 8 {
827837
kind = "octal"
828838
}
829839
s.errorf("non-%s character in escape sequence: %c", kind, c)
830840
s.ungetr()
831-
return false
841+
return
832842
}
833843
// d < base
834844
x = x*base + d
@@ -838,13 +848,10 @@ func (s *scanner) escape(quote rune) bool {
838848

839849
if x > max && base == 8 {
840850
s.errorf("octal escape value > 255: %d", x)
841-
return false
851+
return
842852
}
843853

844854
if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
845-
s.error("escape sequence is invalid Unicode code point")
846-
return false
855+
s.errorf("escape sequence is invalid Unicode code point %#U", x)
847856
}
848-
849-
return true
850857
}

src/cmd/compile/internal/syntax/scanner_test.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,10 @@ func TestNumbers(t *testing.T) {
499499
err = ""
500500
s.next()
501501

502+
if err != "" && !s.bad {
503+
t.Errorf("%q: got error but bad not set", test.src)
504+
}
505+
502506
// compute lit where where s.lit is not defined
503507
var lit string
504508
switch s.tok {
@@ -598,7 +602,7 @@ func TestScanErrors(t *testing.T) {
598602
{`"\x`, "string not terminated", 0, 0},
599603
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
600604
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
601-
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
605+
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18},
602606

603607
// former problem cases
604608
{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},

src/cmd/compile/internal/syntax/tokens.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ func contains(tokset uint64, tok token) bool {
9090
return tokset&(1<<tok) != 0
9191
}
9292

93-
type LitKind uint
93+
type LitKind uint8
9494

9595
// TODO(gri) With the 'i' (imaginary) suffix now permitted on integer
9696
// and floating-point numbers, having a single ImagLit does

0 commit comments

Comments
 (0)