Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2065,29 +2065,13 @@ func (c *clusterImpl) Install(
return errors.Wrap(roachprod.Install(ctx, l, c.MakeNodes(nodes), software), "cluster.Install")
}

var reOnlyAlphanumeric = regexp.MustCompile(`[^a-zA-Z0-9]+`)

// cmdLogFileName comes up with a log file to use for the given argument string.
func cmdLogFileName(t time.Time, nodes option.NodeListOption, args ...string) string {
// Make sure we treat {"./cockroach start"} like {"./cockroach", "start"}.
args = strings.Split(strings.Join(args, " "), " ")
prefix := []string{reOnlyAlphanumeric.ReplaceAllString(args[0], "")}
for _, arg := range args[1:] {
if s := reOnlyAlphanumeric.ReplaceAllString(arg, ""); s != arg {
break
}
prefix = append(prefix, arg)
}
s := strings.Join(prefix, "_")
const maxLen = 70
if len(s) > maxLen {
s = s[:maxLen]
}
logFile := fmt.Sprintf(
"run_%s_n%s_%s",
t.Format(`150405.000000000`),
nodes.String()[1:],
s,
install.GenFilenameFromArgs(20, args...),
)
return logFile
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func TestClusterMachineType(t *testing.T) {
func TestCmdLogFileName(t *testing.T) {
ts := time.Date(2000, 1, 1, 15, 4, 12, 0, time.Local)

const exp = `run_150412.000000000_n1,3-4,9_cockroach_bla`
const exp = `run_150412.000000000_n1,3-4,9_cockroach-bla-foo-ba`
nodes := option.NodeListOption{1, 3, 4, 9}
assert.Equal(t,
exp,
Expand Down
58 changes: 32 additions & 26 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,52 +272,47 @@ func collectErrors(args []interface{}) []error {
// ATTENTION: Since this calls panic(errTestFatal), it should only be called
// from a test's closure. The test runner itself should never call this.
func (t *testImpl) Fatal(args ...interface{}) {
t.addFailure("", args...)
t.addFailureAndCancel(1, "", args...)
panic(errTestFatal)
}

// Fatalf is like Fatal, but takes a format string.
func (t *testImpl) Fatalf(format string, args ...interface{}) {
t.addFailure(format, args...)
t.addFailureAndCancel(1, format, args...)
panic(errTestFatal)
}

// FailNow implements the TestingT interface.
func (t *testImpl) FailNow() {
t.addFailure("FailNow called")
t.addFailureAndCancel(1, "FailNow called")
panic(errTestFatal)
}

// Error implements the TestingT interface
func (t *testImpl) Error(args ...interface{}) {
t.addFailure("", args...)
t.addFailureAndCancel(1, "", args...)
}

// Errorf implements the TestingT interface.
func (t *testImpl) Errorf(format string, args ...interface{}) {
t.addFailure(format, args...)
t.addFailureAndCancel(1, format, args...)
}

// We take the first error from each failure which is the
// "squashed" error that contains all information of a failure
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
if i > 0 {
fmt.Fprintln(b)
}
file, line, fn, ok := errors.GetOneLineSource(failure.squashedErr)
if !ok {
file, line, fn = "<unknown>", 0, "unknown"
}
fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, failure.squashedErr)
func (t *testImpl) addFailureAndCancel(depth int, format string, args ...interface{}) {
t.addFailure(depth+1, format, args...)
if t.mu.cancel != nil {
t.mu.cancel()
}
}

func (t *testImpl) addFailure(format string, args ...interface{}) {
// addFailure depth indicates how many stack frames to skip when reporting the
// site of the failure in logs. `0` will report the caller of addFailure, `1` the
// caller of the caller of addFailure, etc.
func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
if format == "" {
format = strings.Repeat(" %v", len(args))[1:]
}
reportFailure := newFailure(errors.NewWithDepthf(2, format, args...), collectErrors(args))
reportFailure := newFailure(errors.NewWithDepthf(depth+1, format, args...), collectErrors(args))

t.mu.Lock()
defer t.mu.Unlock()
Expand All @@ -328,15 +323,14 @@ func (t *testImpl) addFailure(format string, args ...interface{}) {
formatFailure(&b, reportFailure)
msg := b.String()

t.L().Printf("test failure #%d: %s", len(t.mu.failures), msg)
failureNum := len(t.mu.failures)
failureLog := fmt.Sprintf("failure_%d", failureNum)
t.L().Printf("test failure #%d: full stack retained in %s.log: %s", failureNum, failureLog, msg)
// Also dump the verbose error (incl. all stack traces) to a log file, in case
// we need it. The stacks are sometimes helpful, but we don't want them in the
// main log as they are highly verbose.
{
cl, err := t.L().ChildLogger(
fmt.Sprintf("failure_%d", len(t.mu.failures)),
logger.QuietStderr, logger.QuietStdout,
)
cl, err := t.L().ChildLogger(failureLog, logger.QuietStderr, logger.QuietStdout)
if err == nil {
// We don't actually log through this logger since it adds an unrelated
// file:line caller (namely ours). The error already has stack traces
Expand All @@ -349,8 +343,20 @@ func (t *testImpl) addFailure(format string, args ...interface{}) {

t.mu.output = append(t.mu.output, msg...)
t.mu.output = append(t.mu.output, '\n')
if t.mu.cancel != nil {
t.mu.cancel()
}

// We take the first error from each failure which is the
// "squashed" error that contains all information of a failure
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
if i > 0 {
fmt.Fprintln(b)
}
file, line, fn, ok := errors.GetOneLineSource(failure.squashedErr)
if !ok {
file, line, fn = "<unknown>", 0, "unknown"
}
fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, failure.squashedErr)
}
}

Expand Down
16 changes: 8 additions & 8 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,10 +909,9 @@ func (r *testRunner) runTest(
}
t.L().Printf("tearing down after %s; see teardown.log", s)
case <-time.After(timeout):
// NB: we're intentionally not failing the test if it hasn't
// already. This will be done at the very end of this method,
// after we've collected artifacts.
t.L().Printf("test timed out after %s; check __stacks.log and CRDB logs for goroutine dumps", timeout)
// NB: We're adding the timeout failure intentionally without cancelling the context
// to capture as much state as possible during artifact collection.
t.addFailure(0, "test timed out (%s)", timeout)
timedOut = true
}

Expand Down Expand Up @@ -1024,10 +1023,11 @@ func (r *testRunner) teardownTest(
// around so someone can poke at it.
_ = c.StopE(ctx, t.L(), option.DefaultStopOpts(), c.All())

// The hung test may, against all odds, still not have reported an error.
// We delayed it to improve artifacts collection, and now we ensure the test
// is marked as failing.
t.Errorf("test timed out (%s)", t.Spec().(*registry.TestSpec).Timeout)
// We previously added a timeout failure without cancellation, so we cancel here.
if t.mu.cancel != nil {
t.mu.cancel()
}
t.L().Printf("test timed out; check __stacks.log and CRDB logs for goroutine dumps")
}
return nil
}
Expand Down
Loading