Skip to content

Commit 117517d

Browse files
prattmicyunginnanet
authored andcommitted
cmd/compile: lookup indirect callees from export data for devirtualization
Today, the PGO IR graph only contains entries for ir.Func loaded into the package. This can include functions from transitive dependencies, but only if they happen to be referenced by something in the current package. If they are not referenced, noder never bothers to load them. This leads to a deficiency in PGO devirtualization: some callee methods are available in transitive dependencies but do not devirtualize because they happen to not get loaded from export data. Resolve this by adding an explicit lookup from export data of callees mentioned in the profile. I have chosen to do this during loading of the profile for simplicity: the PGO IR graph always contains all of the functions we might need. That said, it isn't strictly necessary. PGO devirtualization could do the lookup lazily if it decides it actually needs a method. This saves work at the expense of a bit more complexity, but I've chosen the simpler approach for now as I measured the cost of this as significantly less than the rest of PGO loading. For golang#61577. Change-Id: Ieafb2a549510587027270ee6b4c3aefd149a901f Reviewed-on: https://go-review.googlesource.com/c/go/+/497175 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Matthew Dempsky <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent 41871c1 commit 117517d

File tree

12 files changed

+367
-58
lines changed

12 files changed

+367
-58
lines changed

src/cmd/compile/internal/inline/inl.go

+3-24
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ import (
3030
"fmt"
3131
"go/constant"
3232
"internal/goexperiment"
33-
"sort"
3433
"strconv"
3534

3635
"cmd/compile/internal/base"
@@ -122,38 +121,18 @@ func pgoInlinePrologue(p *pgo.Profile, funcs []*ir.Func) {
122121
// comparing with the threshold may not accurately reflect which nodes are
123122
// considiered hot).
124123
func hotNodesFromCDF(p *pgo.Profile) (float64, []pgo.NodeMapKey) {
125-
nodes := make([]pgo.NodeMapKey, len(p.NodeMap))
126-
i := 0
127-
for n := range p.NodeMap {
128-
nodes[i] = n
129-
i++
130-
}
131-
sort.Slice(nodes, func(i, j int) bool {
132-
ni, nj := nodes[i], nodes[j]
133-
if wi, wj := p.NodeMap[ni].EWeight, p.NodeMap[nj].EWeight; wi != wj {
134-
return wi > wj // want larger weight first
135-
}
136-
// same weight, order by name/line number
137-
if ni.CallerName != nj.CallerName {
138-
return ni.CallerName < nj.CallerName
139-
}
140-
if ni.CalleeName != nj.CalleeName {
141-
return ni.CalleeName < nj.CalleeName
142-
}
143-
return ni.CallSiteOffset < nj.CallSiteOffset
144-
})
145124
cum := int64(0)
146-
for i, n := range nodes {
125+
for i, n := range p.NodesByWeight {
147126
w := p.NodeMap[n].EWeight
148127
cum += w
149128
if pgo.WeightInPercentage(cum, p.TotalEdgeWeight) > inlineCDFHotCallSiteThresholdPercent {
150129
// nodes[:i+1] to include the very last node that makes it to go over the threshold.
151130
// (Say, if the CDF threshold is 50% and one hot node takes 60% of weight, we want to
152131
// include that node instead of excluding it.)
153-
return pgo.WeightInPercentage(w, p.TotalEdgeWeight), nodes[:i+1]
132+
return pgo.WeightInPercentage(w, p.TotalEdgeWeight), p.NodesByWeight[:i+1]
154133
}
155134
}
156-
return 0, nodes
135+
return 0, p.NodesByWeight
157136
}
158137

159138
// InlinePackage finds functions that can be inlined and clones them before walk expands them.

src/cmd/compile/internal/ir/expr.go

+45
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,51 @@ func MethodSymSuffix(recv *types.Type, msym *types.Sym, suffix string) *types.Sy
11891189
return rpkg.LookupBytes(b.Bytes())
11901190
}
11911191

1192+
// LookupMethodSelector returns the types.Sym of the selector for a method
1193+
// named in local symbol name, as well as the types.Sym of the receiver.
1194+
//
1195+
// TODO(prattmic): this does not attempt to handle method suffixes (wrappers).
1196+
func LookupMethodSelector(pkg *types.Pkg, name string) (typ, meth *types.Sym, err error) {
1197+
typeName, methName := splitType(name)
1198+
if typeName == "" {
1199+
return nil, nil, fmt.Errorf("%s doesn't contain type split", name)
1200+
}
1201+
1202+
if len(typeName) > 3 && typeName[:2] == "(*" && typeName[len(typeName)-1] == ')' {
1203+
// Symbol name is for a pointer receiver method. We just want
1204+
// the base type name.
1205+
typeName = typeName[2 : len(typeName)-1]
1206+
}
1207+
1208+
typ = pkg.Lookup(typeName)
1209+
meth = pkg.Selector(methName)
1210+
return typ, meth, nil
1211+
}
1212+
1213+
// splitType splits a local symbol name into type and method (fn). If this a
1214+
// free function, typ == "".
1215+
//
1216+
// N.B. closures and methods can be ambiguous (e.g., bar.func1). These cases
1217+
// are returned as methods.
1218+
func splitType(name string) (typ, fn string) {
1219+
// Types are split on the first dot, ignoring everything inside
1220+
// brackets (instantiation of type parameter, usually including
1221+
// "go.shape").
1222+
bracket := 0
1223+
for i, r := range name {
1224+
if r == '.' && bracket == 0 {
1225+
return name[:i], name[i+1:]
1226+
}
1227+
if r == '[' {
1228+
bracket++
1229+
}
1230+
if r == ']' {
1231+
bracket--
1232+
}
1233+
}
1234+
return "", name
1235+
}
1236+
11921237
// MethodExprName returns the ONAME representing the method
11931238
// referenced by expression n, which must be a method selector,
11941239
// method expression, or method value.

src/cmd/compile/internal/ir/func.go

+62
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"cmd/internal/src"
1313
"fmt"
1414
"strings"
15+
"unicode/utf8"
1516
)
1617

1718
// A Func corresponds to a single function in a Go program
@@ -312,6 +313,67 @@ func LinkFuncName(f *Func) string {
312313
return objabi.PathToPrefix(pkg.Path) + "." + s.Name
313314
}
314315

316+
// ParseLinkFuncName parsers a symbol name (as returned from LinkFuncName) back
317+
// to the package path and local symbol name.
318+
func ParseLinkFuncName(name string) (pkg, sym string, err error) {
319+
pkg, sym = splitPkg(name)
320+
if pkg == "" {
321+
return "", "", fmt.Errorf("no package path in name")
322+
}
323+
324+
pkg, err = objabi.PrefixToPath(pkg) // unescape
325+
if err != nil {
326+
return "", "", fmt.Errorf("malformed package path: %v", err)
327+
}
328+
329+
return pkg, sym, nil
330+
}
331+
332+
// Borrowed from x/mod.
333+
func modPathOK(r rune) bool {
334+
if r < utf8.RuneSelf {
335+
return r == '-' || r == '.' || r == '_' || r == '~' ||
336+
'0' <= r && r <= '9' ||
337+
'A' <= r && r <= 'Z' ||
338+
'a' <= r && r <= 'z'
339+
}
340+
return false
341+
}
342+
343+
func escapedImportPathOK(r rune) bool {
344+
return modPathOK(r) || r == '+' || r == '/' || r == '%'
345+
}
346+
347+
// splitPkg splits the full linker symbol name into package and local symbol
348+
// name.
349+
func splitPkg(name string) (pkgpath, sym string) {
350+
// package-sym split is at first dot after last the / that comes before
351+
// any characters illegal in a package path.
352+
353+
lastSlashIdx := 0
354+
for i, r := range name {
355+
// Catches cases like:
356+
// * example.foo[sync/atomic.Uint64].
357+
// * example%2ecom.foo[sync/atomic.Uint64].
358+
//
359+
// Note that name is still escaped; unescape occurs after splitPkg.
360+
if !escapedImportPathOK(r) {
361+
break
362+
}
363+
if r == '/' {
364+
lastSlashIdx = i
365+
}
366+
}
367+
for i := lastSlashIdx; i < len(name); i++ {
368+
r := name[i]
369+
if r == '.' {
370+
return name[:i], name[i+1:]
371+
}
372+
}
373+
374+
return "", name
375+
}
376+
315377
var CurFunc *Func
316378

317379
// WithFunc invokes do with CurFunc and base.Pos set to curfn and
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package ir
6+
7+
import (
8+
"testing"
9+
)
10+
11+
func TestSplitPkg(t *testing.T) {
12+
tests := []struct {
13+
in string
14+
pkg string
15+
sym string
16+
}{
17+
{
18+
in: "foo.Bar",
19+
pkg: "foo",
20+
sym: "Bar",
21+
},
22+
{
23+
in: "foo/bar.Baz",
24+
pkg: "foo/bar",
25+
sym: "Baz",
26+
},
27+
{
28+
in: "memeqbody",
29+
pkg: "",
30+
sym: "memeqbody",
31+
},
32+
{
33+
in: `example%2ecom.Bar`,
34+
pkg: `example%2ecom`,
35+
sym: "Bar",
36+
},
37+
{
38+
// Not a real generated symbol name, but easier to catch the general parameter form.
39+
in: `foo.Bar[sync/atomic.Uint64]`,
40+
pkg: `foo`,
41+
sym: "Bar[sync/atomic.Uint64]",
42+
},
43+
{
44+
in: `example%2ecom.Bar[sync/atomic.Uint64]`,
45+
pkg: `example%2ecom`,
46+
sym: "Bar[sync/atomic.Uint64]",
47+
},
48+
{
49+
in: `gopkg.in/yaml%2ev3.Bar[sync/atomic.Uint64]`,
50+
pkg: `gopkg.in/yaml%2ev3`,
51+
sym: "Bar[sync/atomic.Uint64]",
52+
},
53+
{
54+
// This one is a real symbol name.
55+
in: `foo.Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]`,
56+
pkg: `foo`,
57+
sym: "Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]",
58+
},
59+
{
60+
in: `example%2ecom.Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]`,
61+
pkg: `example%2ecom`,
62+
sym: "Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]",
63+
},
64+
{
65+
in: `gopkg.in/yaml%2ev3.Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]`,
66+
pkg: `gopkg.in/yaml%2ev3`,
67+
sym: "Bar[go.shape.struct { sync/atomic._ sync/atomic.noCopy; sync/atomic._ sync/atomic.align64; sync/atomic.v uint64 }]",
68+
},
69+
}
70+
71+
for _, tc := range tests {
72+
t.Run(tc.in, func(t *testing.T) {
73+
pkg, sym := splitPkg(tc.in)
74+
if pkg != tc.pkg {
75+
t.Errorf("splitPkg(%q) got pkg %q want %q", tc.in, pkg, tc.pkg)
76+
}
77+
if sym != tc.sym {
78+
t.Errorf("splitPkg(%q) got sym %q want %q", tc.in, sym, tc.sym)
79+
}
80+
})
81+
}
82+
}

src/cmd/compile/internal/noder/unified.go

+62
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package noder
66

77
import (
8+
"fmt"
89
"internal/pkgbits"
910
"io"
1011
"runtime"
@@ -14,6 +15,7 @@ import (
1415
"cmd/compile/internal/base"
1516
"cmd/compile/internal/inline"
1617
"cmd/compile/internal/ir"
18+
"cmd/compile/internal/pgo"
1719
"cmd/compile/internal/typecheck"
1820
"cmd/compile/internal/types"
1921
"cmd/compile/internal/types2"
@@ -25,6 +27,65 @@ import (
2527
// later.
2628
var localPkgReader *pkgReader
2729

30+
// LookupMethodFunc returns the ir.Func for an arbitrary full symbol name if
31+
// that function exists in the set of available export data.
32+
//
33+
// This allows lookup of arbitrary methods that aren't otherwise referenced by
34+
// the local package and thus haven't been read yet.
35+
//
36+
// TODO(prattmic): Does not handle instantiation of generic types. Currently
37+
// profiles don't contain the original type arguments, so we won't be able to
38+
// create the runtime dictionaries.
39+
//
40+
// TODO(prattmic): Hit rate of this function is usually fairly low, and errors
41+
// are only used when debug logging is enabled. Consider constructing cheaper
42+
// errors by default.
43+
func LookupMethodFunc(fullName string) (*ir.Func, error) {
44+
pkgPath, symName, err := ir.ParseLinkFuncName(fullName)
45+
if err != nil {
46+
return nil, fmt.Errorf("error parsing symbol name %q: %v", fullName, err)
47+
}
48+
49+
pkg, ok := types.PkgMap()[pkgPath]
50+
if !ok {
51+
return nil, fmt.Errorf("pkg %s doesn't exist in %v", pkgPath, types.PkgMap())
52+
}
53+
54+
// N.B. readPackage creates a Sym for every object in the package to
55+
// initialize objReader and importBodyReader, even if the object isn't
56+
// read.
57+
//
58+
// However, objReader is only initialized for top-level objects, so we
59+
// must first lookup the type and use that to find the method rather
60+
// than looking for the method directly.
61+
typ, meth, err := ir.LookupMethodSelector(pkg, symName)
62+
if err != nil {
63+
return nil, fmt.Errorf("error looking up method symbol %q: %v", symName, err)
64+
}
65+
66+
pri, ok := objReader[typ]
67+
if !ok {
68+
return nil, fmt.Errorf("type sym %v missing objReader", typ)
69+
}
70+
71+
name := pri.pr.objIdx(pri.idx, nil, nil, false).(*ir.Name)
72+
if name.Op() != ir.OTYPE {
73+
return nil, fmt.Errorf("type sym %v refers to non-type name: %v", typ, name)
74+
}
75+
if name.Alias() {
76+
return nil, fmt.Errorf("type sym %v refers to alias", typ)
77+
}
78+
79+
for _, m := range name.Type().Methods() {
80+
if m.Sym == meth {
81+
fn := m.Nname.(*ir.Name).Func
82+
return fn, nil
83+
}
84+
}
85+
86+
return nil, fmt.Errorf("method %s missing from method set of %v", symName, typ)
87+
}
88+
2889
// unified constructs the local package's Internal Representation (IR)
2990
// from its syntax tree (AST).
3091
//
@@ -69,6 +130,7 @@ var localPkgReader *pkgReader
69130
func unified(m posMap, noders []*noder) {
70131
inline.InlineCall = unifiedInlineCall
71132
typecheck.HaveInlineBody = unifiedHaveInlineBody
133+
pgo.LookupMethodFunc = LookupMethodFunc
72134

73135
data := writePkgStub(m, noders)
74136

0 commit comments

Comments
 (0)