Skip to content

Commit 902fdca

Browse files
findleyradonovan
authored andcommitted
gopls/internal/lsp/source/typerefs: purge func bodies before parsing
This change reuses the purgeFuncBodies optimization from completion (now moved into a new package) to reduce the amount of work and allocation done by the parser in typerefs. Now that purge is a unit, it has a unit test. Also, minor tweaks: - strength-reduce localImports value to a PackageID - opt: avoid redundant map lookups - opt: put common ast.Node switch cases first. Benchmark time is reduced by about a third. Change-Id: I397a09d6364914df48fd6caa78767dc266dad862 Reviewed-on: https://go-review.googlesource.com/c/tools/+/480917 Run-TryBot: Alan Donovan <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Auto-Submit: Alan Donovan <[email protected]> gopls-CI: kokoro <[email protected]> Reviewed-by: Robert Findley <[email protected]>
1 parent 58c9a63 commit 902fdca

File tree

5 files changed

+218
-98
lines changed

5 files changed

+218
-98
lines changed

gopls/internal/astutil/purge.go

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// Package astutil provides various AST utility functions for gopls.
6+
package astutil
7+
8+
import (
9+
"bytes"
10+
"go/scanner"
11+
"go/token"
12+
13+
"golang.org/x/tools/gopls/internal/lsp/safetoken"
14+
)
15+
16+
// PurgeFuncBodies returns a copy of src in which the contents of each
17+
// outermost {...} region except struct and interface types have been
18+
// deleted. This reduces the amount of work required to parse the
19+
// top-level declarations.
20+
//
21+
// PurgeFuncBodies does not preserve newlines or position information.
22+
// Also, if the input is invalid, parsing the output of
23+
// PurgeFuncBodies may result in a different tree due to its effects
24+
// on parser error recovery.
25+
func PurgeFuncBodies(src []byte) []byte {
26+
// Destroy the content of any {...}-bracketed regions that are
27+
// not immediately preceded by a "struct" or "interface"
28+
// token. That includes function bodies, composite literals,
29+
// switch/select bodies, and all blocks of statements.
30+
// This will lead to non-void functions that don't have return
31+
// statements, which of course is a type error, but that's ok.
32+
33+
var out bytes.Buffer
34+
file := token.NewFileSet().AddFile("", -1, len(src))
35+
var sc scanner.Scanner
36+
sc.Init(file, src, nil, 0)
37+
var prev token.Token
38+
var cursor int // last consumed src offset
39+
var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
40+
for {
41+
pos, tok, _ := sc.Scan()
42+
if tok == token.EOF {
43+
break
44+
}
45+
switch tok {
46+
case token.COMMENT:
47+
// TODO(adonovan): opt: skip, to save an estimated 20% of time.
48+
49+
case token.LBRACE:
50+
if prev == token.STRUCT || prev == token.INTERFACE {
51+
pos = -1
52+
}
53+
braces = append(braces, pos)
54+
55+
case token.RBRACE:
56+
if last := len(braces) - 1; last >= 0 {
57+
top := braces[last]
58+
braces = braces[:last]
59+
if top < 0 {
60+
// struct/interface type: leave alone
61+
} else if len(braces) == 0 { // toplevel only
62+
// Delete {...} body.
63+
start, _ := safetoken.Offset(file, top)
64+
end, _ := safetoken.Offset(file, pos)
65+
out.Write(src[cursor : start+len("{")])
66+
cursor = end
67+
}
68+
}
69+
}
70+
prev = tok
71+
}
72+
out.Write(src[cursor:])
73+
return out.Bytes()
74+
}

gopls/internal/astutil/purge_test.go

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package astutil_test
6+
7+
import (
8+
"go/ast"
9+
"go/parser"
10+
"go/token"
11+
"os"
12+
"reflect"
13+
"testing"
14+
15+
"golang.org/x/tools/go/packages"
16+
"golang.org/x/tools/gopls/internal/astutil"
17+
)
18+
19+
// TestPurgeFuncBodies tests PurgeFuncBodies by comparing it against a
20+
// (less efficient) reference implementation that purges after parsing.
21+
func TestPurgeFuncBodies(t *testing.T) {
22+
// Load a few standard packages.
23+
config := packages.Config{Mode: packages.NeedCompiledGoFiles}
24+
pkgs, err := packages.Load(&config, "encoding/...")
25+
if err != nil {
26+
t.Fatal(err)
27+
}
28+
29+
// preorder returns the nodes of tree f in preorder.
30+
preorder := func(f *ast.File) (nodes []ast.Node) {
31+
ast.Inspect(f, func(n ast.Node) bool {
32+
if n != nil {
33+
nodes = append(nodes, n)
34+
}
35+
return true
36+
})
37+
return nodes
38+
}
39+
40+
packages.Visit(pkgs, nil, func(p *packages.Package) {
41+
for _, filename := range p.CompiledGoFiles {
42+
content, err := os.ReadFile(filename)
43+
if err != nil {
44+
t.Fatal(err)
45+
}
46+
47+
fset := token.NewFileSet()
48+
49+
// Parse then purge (reference implementation).
50+
f1, _ := parser.ParseFile(fset, filename, content, 0)
51+
ast.Inspect(f1, func(n ast.Node) bool {
52+
switch n := n.(type) {
53+
case *ast.FuncDecl:
54+
if n.Body != nil {
55+
n.Body.List = nil
56+
}
57+
case *ast.FuncLit:
58+
n.Body.List = nil
59+
case *ast.CompositeLit:
60+
n.Elts = nil
61+
}
62+
return true
63+
})
64+
65+
// Purge before parse (logic under test).
66+
f2, _ := parser.ParseFile(fset, filename, astutil.PurgeFuncBodies(content), 0)
67+
68+
// Compare sequence of node types.
69+
nodes1 := preorder(f1)
70+
nodes2 := preorder(f2)
71+
if len(nodes2) < len(nodes1) {
72+
t.Errorf("purged file has fewer nodes: %d vs %d",
73+
len(nodes2), len(nodes1))
74+
nodes1 = nodes1[:len(nodes2)] // truncate
75+
}
76+
for i := range nodes1 {
77+
x, y := nodes1[i], nodes2[i]
78+
if reflect.TypeOf(x) != reflect.TypeOf(y) {
79+
t.Errorf("%s: got %T, want %T",
80+
fset.Position(x.Pos()), y, x)
81+
break
82+
}
83+
}
84+
}
85+
})
86+
}

gopls/internal/lsp/source/completion/completion.go

+2-57
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
package completion
88

99
import (
10-
"bytes"
1110
"context"
1211
"fmt"
1312
"go/ast"
@@ -27,6 +26,7 @@ import (
2726

2827
"golang.org/x/sync/errgroup"
2928
"golang.org/x/tools/go/ast/astutil"
29+
goplsastutil "golang.org/x/tools/gopls/internal/astutil"
3030
"golang.org/x/tools/gopls/internal/lsp/protocol"
3131
"golang.org/x/tools/gopls/internal/lsp/safetoken"
3232
"golang.org/x/tools/gopls/internal/lsp/snippet"
@@ -3176,7 +3176,7 @@ func isSlice(obj types.Object) bool {
31763176
// quick partial parse. fn is non-nil only for function declarations.
31773177
// The AST position information is garbage.
31783178
func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident, fn *ast.FuncDecl)) {
3179-
purged := purgeFuncBodies(content)
3179+
purged := goplsastutil.PurgeFuncBodies(content)
31803180
file, _ := parser.ParseFile(token.NewFileSet(), "", purged, 0)
31813181
for _, decl := range file.Decls {
31823182
switch decl := decl.(type) {
@@ -3198,58 +3198,3 @@ func forEachPackageMember(content []byte, f func(tok token.Token, id *ast.Ident,
31983198
}
31993199
}
32003200
}
3201-
3202-
// purgeFuncBodies returns a copy of src in which the contents of each
3203-
// outermost {...} region except struct and interface types have been
3204-
// deleted. It does not preserve newlines. This reduces the amount of
3205-
// work required to parse the top-level declarations.
3206-
func purgeFuncBodies(src []byte) []byte {
3207-
// Destroy the content of any {...}-bracketed regions that are
3208-
// not immediately preceded by a "struct" or "interface"
3209-
// token. That includes function bodies, composite literals,
3210-
// switch/select bodies, and all blocks of statements.
3211-
// This will lead to non-void functions that don't have return
3212-
// statements, which of course is a type error, but that's ok.
3213-
3214-
var out bytes.Buffer
3215-
file := token.NewFileSet().AddFile("", -1, len(src))
3216-
var sc scanner.Scanner
3217-
sc.Init(file, src, nil, 0)
3218-
var prev token.Token
3219-
var cursor int // last consumed src offset
3220-
var braces []token.Pos // stack of unclosed braces or -1 for struct/interface type
3221-
for {
3222-
pos, tok, _ := sc.Scan()
3223-
if tok == token.EOF {
3224-
break
3225-
}
3226-
switch tok {
3227-
case token.COMMENT:
3228-
// TODO(adonovan): opt: skip, to save an estimated 20% of time.
3229-
3230-
case token.LBRACE:
3231-
if prev == token.STRUCT || prev == token.INTERFACE {
3232-
pos = -1
3233-
}
3234-
braces = append(braces, pos)
3235-
3236-
case token.RBRACE:
3237-
if last := len(braces) - 1; last >= 0 {
3238-
top := braces[last]
3239-
braces = braces[:last]
3240-
if top < 0 {
3241-
// struct/interface type: leave alone
3242-
} else if len(braces) == 0 { // toplevel only
3243-
// Delete {...} body.
3244-
start, _ := safetoken.Offset(file, top)
3245-
end, _ := safetoken.Offset(file, pos)
3246-
out.Write(src[cursor : start+len("{")])
3247-
cursor = end
3248-
}
3249-
}
3250-
}
3251-
prev = tok
3252-
}
3253-
out.Write(src[cursor:])
3254-
return out.Bytes()
3255-
}

gopls/internal/lsp/source/typerefs/pkgrefs_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020

2121
"golang.org/x/tools/go/gcexportdata"
2222
"golang.org/x/tools/go/packages"
23+
"golang.org/x/tools/gopls/internal/astutil"
2324
"golang.org/x/tools/gopls/internal/lsp/cache"
2425
"golang.org/x/tools/gopls/internal/lsp/source"
2526
"golang.org/x/tools/gopls/internal/lsp/source/typerefs"
@@ -286,10 +287,12 @@ func newParser() *memoizedParser {
286287

287288
func (p *memoizedParser) parse(ctx context.Context, uri span.URI) (*ParsedGoFile, error) {
288289
doParse := func(ctx context.Context, uri span.URI) (*ParsedGoFile, error) {
290+
// TODO(adonovan): hoist this operation outside the benchmark critsec.
289291
content, err := os.ReadFile(uri.Filename())
290292
if err != nil {
291293
return nil, err
292294
}
295+
content = astutil.PurgeFuncBodies(content)
293296
pgf, _ := cache.ParseGoSrc(ctx, token.NewFileSet(), uri, content, source.ParseFull)
294297
return pgf, nil
295298
}

0 commit comments

Comments
 (0)