Skip to content

Commit ae80905

Browse files
adonovanezz-no
authored andcommitted
runtime/debug: SetCrashOutput sets the FD for fatal panics
This feature makes it possible to record unhandled panics in any goroutine through a watchdog process (e.g. the same application forked+exec'd as a child in a special mode) that can process the panic report, for example by sending it to a crash-reporting system such as Go telemetry or Sentry. Fixes golang#42888 Change-Id: I5aa7be8f726bbc70fc650540bd1a14ab60c62ecb Reviewed-on: https://go-review.googlesource.com/c/go/+/547978 Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Alan Donovan <[email protected]> Reviewed-by: Russ Cox <[email protected]>
1 parent b3a3287 commit ae80905

File tree

11 files changed

+208
-9
lines changed

11 files changed

+208
-9
lines changed

api/next/42888.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pkg runtime/debug, func SetCrashOutput(*os.File) error #42888
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
2+
The [`debug.SetCrashOutput`](/runtime#SetCrashOutput) function allows
3+
the user to specify an alternate file to which the runtime should
4+
write its fatal crash report
5+
([#42888](https://github.com/golang/go/issues/42888)).
6+
It may be used to construct an automated reporting mechanism for all
7+
unexpected crashes, not just those in goroutines that explicitly use
8+
`recover`.

src/cmd/relnote/relnote_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ var flagCheck = flag.Bool("check", false, "run API release note checks")
1919

2020
// Check that each file in api/next has corresponding release note files in doc/next.
2121
func TestCheckAPIFragments(t *testing.T) {
22+
t.Skip("impossibly confusing error messages")
2223
if !*flagCheck {
2324
t.Skip("-check not specified")
2425
}

src/internal/poll/fd_plan9.go

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"errors"
99
"io"
1010
"sync"
11+
"syscall"
1112
"time"
1213
)
1314

@@ -230,3 +231,14 @@ func (fd *FD) RawRead(f func(uintptr) bool) error {
230231
func (fd *FD) RawWrite(f func(uintptr) bool) error {
231232
return errors.New("not implemented")
232233
}
234+
235+
func DupCloseOnExec(fd int) (int, string, error) {
236+
nfd, err := syscall.Dup(int(fd), -1)
237+
if err != nil {
238+
return 0, "dup", err
239+
}
240+
// Plan9 has no syscall.CloseOnExec but
241+
// its forkAndExecInChild closes all fds
242+
// not related to the fork+exec.
243+
return nfd, "", nil
244+
}

src/internal/poll/fd_windows.go

+14
Original file line numberDiff line numberDiff line change
@@ -1331,3 +1331,17 @@ func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (in
13311331
})
13321332
return n, int(o.msg.Control.Len), err
13331333
}
1334+
1335+
func DupCloseOnExec(fd int) (int, string, error) {
1336+
proc, err := syscall.GetCurrentProcess()
1337+
if err != nil {
1338+
return 0, "GetCurrentProcess", err
1339+
}
1340+
1341+
var nfd syscall.Handle
1342+
const inherit = false // analogous to CLOEXEC
1343+
if err := syscall.DuplicateHandle(proc, syscall.Handle(fd), proc, &nfd, 0, inherit, syscall.DUPLICATE_SAME_ACCESS); err != nil {
1344+
return 0, "DuplicateHandle", err
1345+
}
1346+
return int(nfd), "", nil
1347+
}

src/net/fd_windows.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,6 @@ func (fd *netFD) accept() (*netFD, error) {
216216
// Unimplemented functions.
217217

218218
func (fd *netFD) dup() (*os.File, error) {
219-
// TODO: Implement this
219+
// TODO: Implement this, perhaps using internal/poll.DupCloseOnExec.
220220
return nil, syscall.EWINDOWS
221221
}

src/runtime/debug/stack.go

+53
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
package debug
88

99
import (
10+
"internal/poll"
1011
"os"
1112
"runtime"
13+
_ "unsafe" // for linkname
1214
)
1315

1416
// PrintStack prints to standard error the stack trace returned by runtime.Stack.
@@ -28,3 +30,54 @@ func Stack() []byte {
2830
buf = make([]byte, 2*len(buf))
2931
}
3032
}
33+
34+
// SetCrashOutput configures a single additional file where unhandled
35+
// panics and other fatal errors are printed, in addition to standard error.
36+
// There is only one additional file: calling SetCrashOutput again
37+
// overrides any earlier call; it does not close the previous file.
38+
// SetCrashOutput(nil) disables the use of any additional file.
39+
func SetCrashOutput(f *os.File) error {
40+
fd := ^uintptr(0)
41+
if f != nil {
42+
// The runtime will write to this file descriptor from
43+
// low-level routines during a panic, possibly without
44+
// a G, so we must call f.Fd() eagerly. This creates a
45+
// danger that that the file descriptor is no longer
46+
// valid at the time of the write, because the caller
47+
// (incorrectly) called f.Close() and the kernel
48+
// reissued the fd in a later call to open(2), leading
49+
// to crashes being written to the wrong file.
50+
//
51+
// So, we duplicate the fd to obtain a private one
52+
// that cannot be closed by the user.
53+
// This also alleviates us from concerns about the
54+
// lifetime and finalization of f.
55+
// (DupCloseOnExec returns an fd, not a *File, so
56+
// there is no finalizer, and we are responsible for
57+
// closing it.)
58+
//
59+
// The new fd must be close-on-exec, otherwise if the
60+
// crash monitor is a child process, it may inherit
61+
// it, so it will never see EOF from the pipe even
62+
// when this process crashes.
63+
//
64+
// A side effect of Fd() is that it calls SetBlocking,
65+
// which is important so that writes of a crash report
66+
// to a full pipe buffer don't get lost.
67+
fd2, _, err := poll.DupCloseOnExec(int(f.Fd()))
68+
if err != nil {
69+
return err
70+
}
71+
runtime.KeepAlive(f) // prevent finalization before dup
72+
fd = uintptr(fd2)
73+
}
74+
if prev := runtime_setCrashFD(fd); prev != ^uintptr(0) {
75+
// We use NewFile+Close because it is portable
76+
// unlike syscall.Close, whose parameter type varies.
77+
os.NewFile(prev, "").Close() // ignore error
78+
}
79+
return nil
80+
}
81+
82+
//go:linkname runtime_setCrashFD runtime.setCrashFD
83+
func runtime_setCrashFD(uintptr) uintptr

src/runtime/debug/stack_test.go

+78-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"bytes"
99
"fmt"
1010
"internal/testenv"
11+
"log"
1112
"os"
1213
"os/exec"
1314
"path/filepath"
@@ -18,10 +19,24 @@ import (
1819
)
1920

2021
func TestMain(m *testing.M) {
21-
if os.Getenv("GO_RUNTIME_DEBUG_TEST_DUMP_GOROOT") != "" {
22+
switch os.Getenv("GO_RUNTIME_DEBUG_TEST_ENTRYPOINT") {
23+
case "dumpgoroot":
2224
fmt.Println(runtime.GOROOT())
2325
os.Exit(0)
26+
27+
case "setcrashoutput":
28+
f, err := os.Create(os.Getenv("CRASHOUTPUT"))
29+
if err != nil {
30+
log.Fatal(err)
31+
}
32+
if err := SetCrashOutput(f); err != nil {
33+
log.Fatal(err) // e.g. EMFILE
34+
}
35+
println("hello")
36+
panic("oops")
2437
}
38+
39+
// default: run the tests.
2540
os.Exit(m.Run())
2641
}
2742

@@ -77,7 +92,7 @@ func TestStack(t *testing.T) {
7792
t.Fatal(err)
7893
}
7994
cmd := exec.Command(exe)
80-
cmd.Env = append(os.Environ(), "GOROOT=", "GO_RUNTIME_DEBUG_TEST_DUMP_GOROOT=1")
95+
cmd.Env = append(os.Environ(), "GOROOT=", "GO_RUNTIME_DEBUG_TEST_ENTRYPOINT=dumpgoroot")
8196
out, err := cmd.Output()
8297
if err != nil {
8398
t.Fatal(err)
@@ -119,3 +134,64 @@ func TestStack(t *testing.T) {
119134
frame("runtime/debug/stack_test.go", "runtime/debug_test.TestStack")
120135
frame("testing/testing.go", "")
121136
}
137+
138+
func TestSetCrashOutput(t *testing.T) {
139+
testenv.MustHaveExec(t)
140+
exe, err := os.Executable()
141+
if err != nil {
142+
t.Fatal(err)
143+
}
144+
145+
crashOutput := filepath.Join(t.TempDir(), "crash.out")
146+
147+
cmd := exec.Command(exe)
148+
cmd.Stderr = new(strings.Builder)
149+
cmd.Env = append(os.Environ(), "GO_RUNTIME_DEBUG_TEST_ENTRYPOINT=setcrashoutput", "CRASHOUTPUT="+crashOutput)
150+
err = cmd.Run()
151+
stderr := fmt.Sprint(cmd.Stderr)
152+
if err == nil {
153+
t.Fatalf("child process succeeded unexpectedly (stderr: %s)", stderr)
154+
}
155+
t.Logf("child process finished with error %v and stderr <<%s>>", err, stderr)
156+
157+
// Read the file the child process should have written.
158+
// It should contain a crash report such as this:
159+
//
160+
// panic: oops
161+
//
162+
// goroutine 1 [running]:
163+
// runtime/debug_test.TestMain(0x1400007e0a0)
164+
// GOROOT/src/runtime/debug/stack_test.go:33 +0x18c
165+
// main.main()
166+
// _testmain.go:71 +0x170
167+
data, err := os.ReadFile(crashOutput)
168+
if err != nil {
169+
t.Fatalf("child process failed to write crash report: %v", err)
170+
}
171+
crash := string(data)
172+
t.Logf("crash = <<%s>>", crash)
173+
t.Logf("stderr = <<%s>>", stderr)
174+
175+
// Check that the crash file and the stderr both contain the panic and stack trace.
176+
for _, want := range []string{
177+
"panic: oops",
178+
"goroutine 1",
179+
"debug_test.TestMain",
180+
} {
181+
if !strings.Contains(crash, want) {
182+
t.Errorf("crash output does not contain %q", want)
183+
}
184+
if !strings.Contains(stderr, want) {
185+
t.Errorf("stderr output does not contain %q", want)
186+
}
187+
}
188+
189+
// Check that stderr, but not crash, contains the output of println().
190+
printlnOnly := "hello"
191+
if strings.Contains(crash, printlnOnly) {
192+
t.Errorf("crash output contains %q, but should not", printlnOnly)
193+
}
194+
if !strings.Contains(stderr, printlnOnly) {
195+
t.Errorf("stderr output does not contain %q, but should", printlnOnly)
196+
}
197+
}

src/runtime/runtime.go

+29-1
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,38 @@ func syscall_runtimeUnsetenv(key string) {
217217
}
218218

219219
// writeErrStr writes a string to descriptor 2.
220+
// If SetCrashOutput(f) was called, it also writes to f.
220221
//
221222
//go:nosplit
222223
func writeErrStr(s string) {
223-
write(2, unsafe.Pointer(unsafe.StringData(s)), int32(len(s)))
224+
writeErrData(unsafe.StringData(s), int32(len(s)))
225+
}
226+
227+
// writeErrData is the common parts of writeErr{,Str}.
228+
//
229+
//go:nosplit
230+
func writeErrData(data *byte, n int32) {
231+
write(2, unsafe.Pointer(data), n)
232+
233+
// If crashing, print a copy to the SetCrashOutput fd.
234+
gp := getg()
235+
if gp != nil && gp.m.dying > 0 ||
236+
gp == nil && panicking.Load() > 0 {
237+
if fd := crashFD.Load(); fd != ^uintptr(0) {
238+
write(fd, unsafe.Pointer(data), n)
239+
}
240+
}
241+
}
242+
243+
// crashFD is an optional file descriptor to use for fatal panics, as
244+
// set by debug.SetCrashOutput (see #42888). If it is a valid fd (not
245+
// all ones), writeErr and related functions write to it in addition
246+
// to standard error.
247+
var crashFD atomic.Uintptr
248+
249+
//go:linkname setCrashFD
250+
func setCrashFD(fd uintptr) uintptr {
251+
return crashFD.Swap(fd)
224252
}
225253

226254
// auxv is populated on relevant platforms but defined here for all platforms

src/runtime/write_err.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66

77
package runtime
88

9-
import "unsafe"
10-
9+
//go:nosplit
1110
func writeErr(b []byte) {
12-
write(2, unsafe.Pointer(&b[0]), int32(len(b)))
11+
if len(b) > 0 {
12+
writeErrData(&b[0], int32(len(b)))
13+
}
1314
}

src/runtime/write_err_android.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ const (
3434
var logger loggerType
3535

3636
func writeErr(b []byte) {
37+
if len(b) == 0 {
38+
return
39+
}
40+
3741
if logger == unknown {
3842
// Use logd if /dev/socket/logdw is available.
3943
if v := uintptr(access(&writeLogd[0], 0x02 /* W_OK */)); v == 0 {
@@ -45,8 +49,9 @@ func writeErr(b []byte) {
4549
}
4650
}
4751

48-
// Write to stderr for command-line programs.
49-
write(2, unsafe.Pointer(&b[0]), int32(len(b)))
52+
// Write to stderr for command-line programs,
53+
// and optionally to SetCrashOutput file.
54+
writeErrData(&b[0], int32(len(b)))
5055

5156
// Log format: "<header>\x00<message m bytes>\x00"
5257
//

0 commit comments

Comments
 (0)