Skip to content

Commit

Permalink
Merge #29499
Browse files Browse the repository at this point in the history
29499: kv/ui: add metric and graph for AsyncWriteFailure restarts r=nvanbenschoten a=nvanbenschoten

Informs #28876.

This change adds a new metric called `txn.restarts.asyncwritefailure`,
along with a corresponding series to the admin ui's `KV Transaction
Restarts` graph. These transaction restarts are caused by async
consensus writes which fail while a transaction is attempting to
pipeline writes. The restart occurs when the transaction visits the
write's key while attempting to prove that the write succeeded and
finds a missing intent.

Release note: None

Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
craig[bot] and nvanbenschoten committed Sep 4, 2018
2 parents 23dffae + b1f6e2c commit 1aa2063
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
38 changes: 24 additions & 14 deletions pkg/kv/txn_coord_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,11 @@ type TxnMetrics struct {
Restarts *metric.Histogram

// Counts of restart types.
RestartsWriteTooOld *metric.Counter
RestartsDeleteRange *metric.Counter
RestartsSerializable *metric.Counter
RestartsPossibleReplay *metric.Counter
RestartsWriteTooOld *metric.Counter
RestartsDeleteRange *metric.Counter
RestartsSerializable *metric.Counter
RestartsPossibleReplay *metric.Counter
RestartsAsyncWriteFailure *metric.Counter
}

var (
Expand Down Expand Up @@ -314,22 +315,29 @@ var (
Measurement: "Restarted Transactions",
Unit: metric.Unit_COUNT,
}
metaRestartsAsyncWriteFailure = metric.Metadata{
Name: "txn.restarts.asyncwritefailure",
Help: "Number of restarts due to async consensus writes that failed to leave intents",
Measurement: "Restarted Transactions",
Unit: metric.Unit_COUNT,
}
)

// MakeTxnMetrics returns a TxnMetrics struct that contains metrics whose
// windowed portions retain data for approximately histogramWindow.
func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics {
return TxnMetrics{
Aborts: metric.NewCounterWithRates(metaAbortsRates),
Commits: metric.NewCounterWithRates(metaCommitsRates),
Commits1PC: metric.NewCounterWithRates(metaCommits1PCRates),
AutoRetries: metric.NewCounterWithRates(metaAutoRetriesRates),
Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow),
Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3),
RestartsWriteTooOld: metric.NewCounter(metaRestartsWriteTooOld),
RestartsDeleteRange: metric.NewCounter(metaRestartsDeleteRange),
RestartsSerializable: metric.NewCounter(metaRestartsSerializable),
RestartsPossibleReplay: metric.NewCounter(metaRestartsPossibleReplay),
Aborts: metric.NewCounterWithRates(metaAbortsRates),
Commits: metric.NewCounterWithRates(metaCommitsRates),
Commits1PC: metric.NewCounterWithRates(metaCommits1PCRates),
AutoRetries: metric.NewCounterWithRates(metaAutoRetriesRates),
Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow),
Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3),
RestartsWriteTooOld: metric.NewCounter(metaRestartsWriteTooOld),
RestartsDeleteRange: metric.NewCounter(metaRestartsDeleteRange),
RestartsSerializable: metric.NewCounter(metaRestartsSerializable),
RestartsPossibleReplay: metric.NewCounter(metaRestartsPossibleReplay),
RestartsAsyncWriteFailure: metric.NewCounter(metaRestartsAsyncWriteFailure),
}
}

Expand Down Expand Up @@ -776,6 +784,8 @@ func (tc *TxnCoordSender) handleRetryableErrLocked(
tc.metrics.RestartsSerializable.Inc(1)
case roachpb.RETRY_POSSIBLE_REPLAY:
tc.metrics.RestartsPossibleReplay.Inc(1)
case roachpb.RETRY_ASYNC_WRITE_FAILURE:
tc.metrics.RestartsAsyncWriteFailure.Inc(1)
}
}
errTxnID := pErr.GetTxn().ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export default function (props: GraphDashboardProps) {
<Metric name="cr.node.txn.restarts.deleterange" title="Forwarded Timestamp (delete range)" nonNegativeRate />
<Metric name="cr.node.txn.restarts.serializable" title="Forwarded Timestamp (iso=serializable)" nonNegativeRate />
<Metric name="cr.node.txn.restarts.possiblereplay" title="Possible Replay" nonNegativeRate />
<Metric name="cr.node.txn.restarts.asyncwritefailure" title="Async Consensus Failure" nonNegativeRate />
</Axis>
</LineGraph>,

Expand Down

0 comments on commit 1aa2063

Please sign in to comment.