-
Notifications
You must be signed in to change notification settings - Fork 4.1k
roachtest: use structured errors #88556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+141
−153
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,13 +11,10 @@ | |
| package main | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "context" | ||
| "fmt" | ||
| "io" | ||
| // For the debug http handlers. | ||
| _ "net/http/pprof" | ||
| "runtime" | ||
| "os" | ||
| "strings" | ||
| "time" | ||
|
|
||
|
|
@@ -27,6 +24,7 @@ import ( | |
| "github.com/cockroachdb/cockroach/pkg/util/syncutil" | ||
| "github.com/cockroachdb/cockroach/pkg/util/timeutil" | ||
| "github.com/cockroachdb/cockroach/pkg/util/version" | ||
| "github.com/cockroachdb/errors" | ||
| "github.com/petermattis/goid" | ||
| ) | ||
|
|
||
|
|
@@ -71,23 +69,33 @@ type testImpl struct { | |
|
|
||
| mu struct { | ||
| syncutil.RWMutex | ||
| done bool | ||
| failed bool | ||
| timeout bool // if failed == true, this indicates whether the test timed out | ||
| // cancel, if set, is called from the t.Fatal() family of functions when the | ||
| // test is being marked as failed (i.e. when the failed field above is also | ||
| // set). This is used to cancel the context passed to t.spec.Run(), so async | ||
| // test goroutines can be notified. | ||
| done bool | ||
|
|
||
| cancel func() | ||
| failLoc struct { | ||
| file string | ||
| line int | ||
| } | ||
| failureMsg string | ||
|
|
||
| // Errors are all the errors passed to `addFailure`, in order of | ||
| // these calls. | ||
| // | ||
| // NB: the first failure is not always the relevant one due to: | ||
| // https://github.com/cockroachdb/cockroach/issues/44436 | ||
| errors []error | ||
| // If len(errors)>0, this indicates whether the test timed out | ||
| // cancel, if set, is called from the t.Fatal() family of functions when the | ||
| // test is being marked as failed (i.e. when the failed field above is also | ||
| // set). This is used to cancel the context passed to t.spec.Run(), so async | ||
| // test goroutines can be notified. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems misplaced, shouldn't this be near the |
||
|
|
||
| // status is a map from goroutine id to status set by that goroutine. A | ||
| // special goroutine is indicated by runnerID; that one provides the test's | ||
| // "main status". | ||
| status map[int64]testStatus | ||
|
|
||
| // TODO(test-eng): this should just be an in-mem (ring) buffer attached to | ||
| // `t.L()`. | ||
| output []byte | ||
| } | ||
| // Map from version to path to the cockroach binary to be used when | ||
|
|
@@ -246,6 +254,23 @@ func (t *testImpl) Skipf(format string, args ...interface{}) { | |
| panic(errTestFatal) | ||
| } | ||
|
|
||
| // This creates an error from the first arg, and adds each subsequent arg | ||
| // as error detail | ||
| func argsToErr(depth int, args ...interface{}) error { | ||
| // NB: we'd probably not allow multiple arguments here and we'd want | ||
| // the one remaining arg to be an `error`, but we are trying to be | ||
| // compatible with `(*testing.T).Fatal`. | ||
| var err error | ||
| for _, arg := range args { | ||
| if err == nil { | ||
| err = errors.NewWithDepthf(depth+1, "%v", arg) | ||
| continue | ||
| } | ||
| err = errors.WithDetailf(err, "%v", arg) | ||
| } | ||
| return err | ||
| } | ||
|
|
||
| // Fatal marks the test as failed, prints the args to t.L(), and calls | ||
| // panic(errTestFatal). It can be called multiple times. | ||
| // | ||
|
|
@@ -255,162 +280,100 @@ func (t *testImpl) Skipf(format string, args ...interface{}) { | |
| // ATTENTION: Since this calls panic(errTestFatal), it should only be called | ||
| // from a test's closure. The test runner itself should never call this. | ||
| func (t *testImpl) Fatal(args ...interface{}) { | ||
| t.markFailedInner("" /* format */, args...) | ||
| t.addFailure(argsToErr(1, args...)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should |
||
| panic(errTestFatal) | ||
| } | ||
|
|
||
| // Fatalf is like Fatal, but takes a format string. | ||
| func (t *testImpl) Fatalf(format string, args ...interface{}) { | ||
| t.markFailedInner(format, args...) | ||
| t.addFailure(errors.NewWithDepthf(1, format, args...)) | ||
| panic(errTestFatal) | ||
| } | ||
|
|
||
| // FailNow implements the TestingT interface. | ||
| func (t *testImpl) FailNow() { | ||
| t.Fatal() | ||
| t.addFailure(errors.NewWithDepthf(1, "FailNow called")) | ||
| panic(errTestFatal) | ||
| } | ||
|
|
||
| // Errorf implements the TestingT interface. | ||
| func (t *testImpl) Errorf(format string, args ...interface{}) { | ||
| t.markFailedInner(format, args...) | ||
| func (t *testImpl) Error(args ...interface{}) { | ||
| t.addFailure(argsToErr(1, args...)) | ||
| } | ||
|
|
||
| func (t *testImpl) markFailedInner(format string, args ...interface{}) { | ||
| // Skip two frames: our own and the caller. | ||
| if format != "" { | ||
| t.printfAndFail(2 /* skip */, format, args...) | ||
| } else { | ||
| t.printAndFail(2 /* skip */, args...) | ||
| } | ||
| // Errorf implements the TestingT interface. | ||
| func (t *testImpl) Errorf(format string, args ...interface{}) { | ||
| t.addFailure(errors.NewWithDepthf(1, format, args...)) | ||
| } | ||
|
|
||
| func (t *testImpl) printAndFail(skip int, args ...interface{}) { | ||
| var msg string | ||
| if len(args) == 1 { | ||
| // If we were passed only an error, then format it with "%+v" in order to | ||
| // get any stack traces. | ||
| if err, ok := args[0].(error); ok { | ||
| msg = fmt.Sprintf("%+v", err) | ||
| func formatFailure(b *strings.Builder, errs ...error) { | ||
| for i, err := range errs { | ||
| if i > 0 { | ||
| fmt.Fprintln(b) | ||
| } | ||
| file, line, fn, ok := errors.GetOneLineSource(err) | ||
| if !ok { | ||
| file, line, fn = "<unknown>", 0, "unknown" | ||
| } | ||
| fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, err) | ||
| } | ||
| if msg == "" { | ||
| msg = fmt.Sprint(args...) | ||
| } | ||
| t.failWithMsg(t.decorate(skip+1, msg)) | ||
| } | ||
|
|
||
| func (t *testImpl) printfAndFail(skip int, format string, args ...interface{}) { | ||
| if format == "" { | ||
| panic(fmt.Sprintf("invalid empty format. args: %s", args)) | ||
| } | ||
| t.failWithMsg(t.decorate(skip+1, fmt.Sprintf(format, args...))) | ||
| } | ||
|
|
||
| func (t *testImpl) failWithMsg(msg string) { | ||
| func (t *testImpl) addFailure(reportErr error) { | ||
| t.mu.Lock() | ||
| defer t.mu.Unlock() | ||
|
|
||
| prefix := "" | ||
| if t.mu.failed { | ||
| prefix = "[not the first failure] " | ||
| // NB: the first failure is not always the relevant one due to: | ||
| // https://github.com/cockroachdb/cockroach/issues/44436 | ||
| // | ||
| // So we chain all failures together in the order in which we see | ||
| // them. | ||
| msg = "\n" + msg | ||
| t.mu.errors = append(t.mu.errors, reportErr) | ||
|
|
||
| var b strings.Builder | ||
| formatFailure(&b, reportErr) | ||
| msg := b.String() | ||
|
|
||
| t.L().Printf("test failure #%d: %s", len(t.mu.errors), msg) | ||
| // Also dump the verbose error (incl. all stack traces) to a log file, in case | ||
| // we need it. The stacks are sometimes helpful, but we don't want them in the | ||
| // main log as they are highly verbose. | ||
| { | ||
| cl, err := t.L().ChildLogger( | ||
| fmt.Sprintf("failure_%d", len(t.mu.errors)), | ||
| logger.QuietStderr, logger.QuietStdout, | ||
| ) | ||
| if err == nil { | ||
| // We don't actually log through this logger since it adds an unrelated | ||
| // file:line caller (namely ours). The error already has stack traces | ||
| // so it's better to write only it to the file to avoid confusion. | ||
| path := cl.File.Name() | ||
| cl.Close() // we just wanted the filename | ||
| _ = os.WriteFile(path, []byte(fmt.Sprintf("%+v", reportErr)), 0644) | ||
| } | ||
| } | ||
| t.L().Printf("%stest failure: %s", prefix, msg) | ||
|
|
||
| t.mu.failed = true | ||
| t.mu.failureMsg += msg | ||
| t.mu.output = append(t.mu.output, msg...) | ||
| t.mu.output = append(t.mu.output, '\n') | ||
| if t.mu.cancel != nil { | ||
| t.mu.cancel() | ||
| } | ||
| } | ||
|
|
||
| // Args: | ||
| // skip: The number of stack frames to exclude from the result. 0 means that | ||
| // | ||
| // the caller will be the first frame identified. 1 means the caller's caller | ||
| // will be the first, etc. | ||
| func (t *testImpl) decorate(skip int, s string) string { | ||
| // Skip two extra frames to account for this function and runtime.Callers | ||
| // itself. | ||
| var pc [50]uintptr | ||
| n := runtime.Callers(2+skip, pc[:]) | ||
| if n == 0 { | ||
| panic("zero callers found") | ||
| } | ||
|
|
||
| buf := new(bytes.Buffer) | ||
| frames := runtime.CallersFrames(pc[:n]) | ||
| sep := "\t" | ||
| runnerFound := false | ||
| for { | ||
| if runnerFound { | ||
| break | ||
| } | ||
|
|
||
| frame, more := frames.Next() | ||
| if !more { | ||
| break | ||
| } | ||
| if frame.Function == t.runner { | ||
| runnerFound = true | ||
|
|
||
| // Handle the special case of the runner function being the caller of | ||
| // t.Fatal(). In that case, that's the line to be used for issue creation. | ||
| if t.mu.failLoc.file == "" { | ||
| t.mu.failLoc.file = frame.File | ||
| t.mu.failLoc.line = frame.Line | ||
| } | ||
| } | ||
| if !t.mu.failed && !runnerFound { | ||
| // Keep track of the highest stack frame that is lower than the t.runner | ||
| // stack frame. This is used to determine the author of that line of code | ||
| // and issue assignment. | ||
| t.mu.failLoc.file = frame.File | ||
| t.mu.failLoc.line = frame.Line | ||
| } | ||
| file := frame.File | ||
| if index := strings.LastIndexByte(file, '/'); index >= 0 { | ||
| file = file[index+1:] | ||
| } | ||
| fmt.Fprintf(buf, "%s%s:%d", sep, file, frame.Line) | ||
| sep = "," | ||
| } | ||
| buf.WriteString(": ") | ||
|
|
||
| lines := strings.Split(s, "\n") | ||
| if l := len(lines); l > 1 && lines[l-1] == "" { | ||
| lines = lines[:l-1] | ||
| } | ||
| for i, line := range lines { | ||
| if i > 0 { | ||
| buf.WriteString("\n\t\t") | ||
| } | ||
| buf.WriteString(line) | ||
| } | ||
| buf.WriteByte('\n') | ||
| return buf.String() | ||
| } | ||
|
|
||
| func (t *testImpl) duration() time.Duration { | ||
| return t.end.Sub(t.start) | ||
| } | ||
|
|
||
| func (t *testImpl) Failed() bool { | ||
| t.mu.RLock() | ||
| defer t.mu.RUnlock() | ||
| return t.mu.failed | ||
| return t.failedRLocked() | ||
| } | ||
|
|
||
| func (t *testImpl) failedRLocked() bool { | ||
| return len(t.mu.errors) > 0 | ||
| } | ||
|
|
||
| func (t *testImpl) FailureMsg() string { | ||
| t.mu.RLock() | ||
| defer t.mu.RUnlock() | ||
| return t.mu.failureMsg | ||
| var b strings.Builder | ||
| formatFailure(&b, t.mu.errors...) | ||
| return b.String() | ||
| } | ||
|
|
||
| func (t *testImpl) ArtifactsDir() string { | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't seem right?
len(errors) > 0indicates that the test timed out?