From 31c16e2a52c6750f185b0584b7f64e85bf3b2ecc Mon Sep 17 00:00:00 2001 From: Manish R Jain Date: Tue, 21 Apr 2020 19:43:58 -0700 Subject: [PATCH] Avoid crashing live loader in case the network is interrupted. (#5268) Live loader currently runs `x.Fatalf` the moment it has a connection interrupt. Instead, it should just retry indefinitely. Also, remove a `%+v` error print for aborting transactions, which causes the entire error stack trace to be printed, which makes it look like a crash. --- dgraph/cmd/live/batch.go | 7 ++++++- worker/draft.go | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dgraph/cmd/live/batch.go b/dgraph/cmd/live/batch.go index f30c0881551..7011a0e23dd 100644 --- a/dgraph/cmd/live/batch.go +++ b/dgraph/cmd/live/batch.go @@ -114,7 +114,12 @@ func handleError(err error, isRetry bool) { s := status.Convert(err) switch { case s.Code() == codes.Internal, s.Code() == codes.Unavailable: - x.Fatalf(s.Message()) + // Let us not crash live loader due to this. Instead, we should infinitely retry to + // reconnect and retry the request. + dur := time.Duration(1+rand.Intn(60)) * time.Second + fmt.Printf("Connection has been possibly interrupted. Got error: %v."+ + " Will retry after %s.\n", err, dur.Round(time.Second)) + time.Sleep(dur) case strings.Contains(s.Message(), "x509"): x.Fatalf(s.Message()) case s.Code() == codes.Aborted: diff --git a/worker/draft.go b/worker/draft.go index dcfa130a2ba..eaf00164822 100644 --- a/worker/draft.go +++ b/worker/draft.go @@ -1289,7 +1289,7 @@ func (n *node) abortOldTransactions() { glog.Infof("Found %d old transactions. Acting to abort them.\n", len(starts)) req := &pb.TxnTimestamps{Ts: starts} err := n.blockingAbort(req) - glog.Infof("Done abortOldTransactions for %d txns. Error: %+v\n", len(req.Ts), err) + glog.Infof("Done abortOldTransactions for %d txns. Error: %v\n", len(req.Ts), err) } // calculateSnapshot would calculate a snapshot index, considering these factors: