From b1f6e2cdd3ca3cf106188cedd5d1c1863823063e Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 3 Sep 2018 23:57:03 -0400 Subject: [PATCH] kv/ui: add metric and graph for AsyncWriteFailure restarts Informs #28876. This change adds a new metric called `txn.restarts.asyncwritefailure`, along with a corresponding series to the admin ui's `KV Transaction Restarts` graph. These transaction restarts are caused by async consensus writes which fail while a transaction is attempting to pipeline writes. The restart occurs when the transaction visits the write's key while attempting to prove that the write succeeded and finds a missing intent. Release note: None --- pkg/kv/txn_coord_sender.go | 38 ++++++++++++------- .../nodeGraphs/dashboards/distributed.tsx | 1 + 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pkg/kv/txn_coord_sender.go b/pkg/kv/txn_coord_sender.go index 3b727e759757..902d525813ca 100644 --- a/pkg/kv/txn_coord_sender.go +++ b/pkg/kv/txn_coord_sender.go @@ -244,10 +244,11 @@ type TxnMetrics struct { Restarts *metric.Histogram // Counts of restart types. - RestartsWriteTooOld *metric.Counter - RestartsDeleteRange *metric.Counter - RestartsSerializable *metric.Counter - RestartsPossibleReplay *metric.Counter + RestartsWriteTooOld *metric.Counter + RestartsDeleteRange *metric.Counter + RestartsSerializable *metric.Counter + RestartsPossibleReplay *metric.Counter + RestartsAsyncWriteFailure *metric.Counter } var ( @@ -314,22 +315,29 @@ var ( Measurement: "Restarted Transactions", Unit: metric.Unit_COUNT, } + metaRestartsAsyncWriteFailure = metric.Metadata{ + Name: "txn.restarts.asyncwritefailure", + Help: "Number of restarts due to async consensus writes that failed to leave intents", + Measurement: "Restarted Transactions", + Unit: metric.Unit_COUNT, + } ) // MakeTxnMetrics returns a TxnMetrics struct that contains metrics whose // windowed portions retain data for approximately histogramWindow. func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics { return TxnMetrics{ - Aborts: metric.NewCounterWithRates(metaAbortsRates), - Commits: metric.NewCounterWithRates(metaCommitsRates), - Commits1PC: metric.NewCounterWithRates(metaCommits1PCRates), - AutoRetries: metric.NewCounterWithRates(metaAutoRetriesRates), - Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow), - Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3), - RestartsWriteTooOld: metric.NewCounter(metaRestartsWriteTooOld), - RestartsDeleteRange: metric.NewCounter(metaRestartsDeleteRange), - RestartsSerializable: metric.NewCounter(metaRestartsSerializable), - RestartsPossibleReplay: metric.NewCounter(metaRestartsPossibleReplay), + Aborts: metric.NewCounterWithRates(metaAbortsRates), + Commits: metric.NewCounterWithRates(metaCommitsRates), + Commits1PC: metric.NewCounterWithRates(metaCommits1PCRates), + AutoRetries: metric.NewCounterWithRates(metaAutoRetriesRates), + Durations: metric.NewLatency(metaDurationsHistograms, histogramWindow), + Restarts: metric.NewHistogram(metaRestartsHistogram, histogramWindow, 100, 3), + RestartsWriteTooOld: metric.NewCounter(metaRestartsWriteTooOld), + RestartsDeleteRange: metric.NewCounter(metaRestartsDeleteRange), + RestartsSerializable: metric.NewCounter(metaRestartsSerializable), + RestartsPossibleReplay: metric.NewCounter(metaRestartsPossibleReplay), + RestartsAsyncWriteFailure: metric.NewCounter(metaRestartsAsyncWriteFailure), } } @@ -775,6 +783,8 @@ func (tc *TxnCoordSender) handleRetryableErrLocked( tc.metrics.RestartsSerializable.Inc(1) case roachpb.RETRY_POSSIBLE_REPLAY: tc.metrics.RestartsPossibleReplay.Inc(1) + case roachpb.RETRY_ASYNC_WRITE_FAILURE: + tc.metrics.RestartsAsyncWriteFailure.Inc(1) } } errTxnID := pErr.GetTxn().ID diff --git a/pkg/ui/src/views/cluster/containers/nodeGraphs/dashboards/distributed.tsx b/pkg/ui/src/views/cluster/containers/nodeGraphs/dashboards/distributed.tsx index a515b8988ec3..b9febc7174fb 100644 --- a/pkg/ui/src/views/cluster/containers/nodeGraphs/dashboards/distributed.tsx +++ b/pkg/ui/src/views/cluster/containers/nodeGraphs/dashboards/distributed.tsx @@ -46,6 +46,7 @@ export default function (props: GraphDashboardProps) { + ,