Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify design and make tablet moves robust #2800

Merged
merged 22 commits into from
Jan 14, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
edcb804
Start work on tablet move txn failures.
manishrjain Nov 25, 2018
34cbaef
Merge branch 'master' into mrjn/move-tablet
manishrjain Nov 28, 2018
cd82e30
Add some tracing as well.
manishrjain Nov 28, 2018
bc7a46a
Merge branch 'master' into mrjn/move-tablet
manishrjain Nov 29, 2018
471bf27
Start work on simplifying tablet move.
manishrjain Nov 29, 2018
d9020f9
Merge branch 'master' into mrjn/move-tablet
manishrjain Nov 30, 2018
f2c2244
More work on predicate movement in Zero. Need to remove moveTablet fu…
manishrjain Nov 30, 2018
09ff289
Remove moveTablet and custom logic in favor of runRecovery.
manishrjain Nov 30, 2018
eedb49c
Some cleanup. Think we can do away with Zero proposals during move.
manishrjain Nov 30, 2018
8be8e6a
Rewrote how predicate move is being handled at Zero. No need to propo…
manishrjain Nov 30, 2018
0b8dd83
Working move tablets.
manishrjain Nov 30, 2018
972359e
Remove the simulated error for Jepsen.
manishrjain Nov 30, 2018
4682cae
Add more spans.
manishrjain Nov 30, 2018
d70d3fe
Use a pointer to mutex to avoid a mutex copy during locking.
manishrjain Dec 13, 2018
c9d0b0b
Use x.TxnWriter to avoid a bug which was causing sync.WaitGroup to no…
manishrjain Dec 14, 2018
920d562
More tracing.
manishrjain Dec 14, 2018
68f2198
Merge with master. Has a lot of breaking changes.
manishrjain Jan 13, 2019
856289e
Wao. Build is fine.
manishrjain Jan 13, 2019
0643a67
Self review
manishrjain Jan 13, 2019
2e9312d
Remove TODOs by using move payload TxnTs.
manishrjain Jan 14, 2019
ceb8b92
Set glog logger for Badger logs, so the log output is consistent.
manishrjain Jan 14, 2019
314dda7
Rename tabletBlocked to isBlocked
manishrjain Jan 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dgraph/cmd/zero/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ func (st *state) moveTablet(w http.ResponseWriter, r *http.Request) {
return
}

if !st.node.AmLeader() {
w.WriteHeader(http.StatusBadRequest)
x.SetStatus(w, x.ErrorInvalidRequest, "This Zero server is not the leader. Re-run command on leader.")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line is 104 characters (from lll)

return
}

tablet := r.URL.Query().Get("tablet")
if len(tablet) == 0 {
w.WriteHeader(http.StatusBadRequest)
Expand Down Expand Up @@ -183,6 +189,8 @@ func (st *state) moveTablet(w http.ResponseWriter, r *http.Request) {
}

if err := st.zero.movePredicate(tablet, srcGroup, dstGroup); err != nil {
glog.Errorf("While moving predicate %s from %d -> %d. Error: %v",
tablet, srcGroup, dstGroup, err)
w.WriteHeader(http.StatusInternalServerError)
x.SetStatus(w, x.Error, err.Error())
return
Expand Down
77 changes: 51 additions & 26 deletions dgraph/cmd/zero/oracle.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ package zero

import (
"errors"
"fmt"
"math/rand"
"strconv"
"strings"
"time"

"github.com/dgraph-io/badger/y"
Expand Down Expand Up @@ -328,33 +331,55 @@ func (s *Server) commit(ctx context.Context, src *api.TxnContext) error {
return s.proposeTxn(ctx, src)
}

// Check if any of these tablets is being moved. If so, abort the transaction.
preds := make(map[string]struct{})
// _predicate_ would never be part of conflict detection, so keys corresponding to any
// modifications to this predicate would not be sent to Zero. But, we still need to abort
// transactions which are coming in, while this predicate is being moved. This means that if
// _predicate_ expansion is enabled, and a move for this predicate is happening, NO transactions
// across the entire cluster would commit. Sorry! But if we don't do this, we might lose commits
// which sneaked in during the move.

// Ensure that we only consider checking _predicate_, if expand_edge flag is
// set to true, i.e. we are actually serving _predicate_. Otherwise, the
// code below, which checks if a tablet is present or is readonly, causes
// ALL txns to abort. See #2547.
if tablet := s.ServingTablet("_predicate_"); tablet != nil {
preds["_predicate_"] = struct{}{}
}

for _, k := range src.Preds {
preds[k] = struct{}{}
}
for pred := range preds {
tablet := s.ServingTablet(pred)
if tablet == nil || tablet.GetReadOnly() {
span.Annotate(nil, "Tablet is readonly. Aborting.")
src.Aborted = true
return s.proposeTxn(ctx, src)
checkPreds := func() error {
// Check if any of these tablets is being moved. If so, abort the transaction.
preds := make(map[string]struct{})
// _predicate_ would never be part of conflict detection, so keys corresponding to any
// modifications to this predicate would not be sent to Zero. But, we still need to abort
// transactions which are coming in, while this predicate is being moved. This means that if
// _predicate_ expansion is enabled, and a move for this predicate is happening, NO transactions
// across the entire cluster would commit. Sorry! But if we don't do this, we might lose commits
// which sneaked in during the move.

// Ensure that we only consider checking _predicate_, if expand_edge flag is
// set to true, i.e. we are actually serving _predicate_. Otherwise, the
// code below, which checks if a tablet is present or is readonly, causes
// ALL txns to abort. See #2547.
if tablet := s.ServingTablet("_predicate_"); tablet != nil {
pkey := fmt.Sprintf("%d-_predicate_", tablet.GroupId)
preds[pkey] = struct{}{}
}
for _, k := range src.Preds {
preds[k] = struct{}{}
}
for pkey := range preds {
splits := strings.Split(pkey, "-")
if len(splits) < 2 {
return x.Errorf("Unable to find group id in %s", pkey)
}
gid, err := strconv.Atoi(splits[0])
if err != nil {
return x.Errorf("Unable to parse group id from %s. Error: %v", pkey, err)
}
pred := strings.Join(splits[1:], "")
tablet := s.ServingTablet(pred)
if tablet == nil {
return x.Errorf("Tablet for %s is nil", pred)
}
if tablet.GroupId != uint32(gid) {
return x.Errorf("Mutation done in group: %d. Predicate %s assigned to %d",
gid, pred, tablet.GroupId)
}
if s.tabletBlocked(pred) {
return x.Errorf("Commits on predicate %s are blocked due to predicate move", pred)
}
}
return nil
}
if err := checkPreds(); err != nil {
span.Annotate(nil, err.Error())
src.Aborted = true
return s.proposeTxn(ctx, src)
}

num := pb.Num{Val: 1}
Expand Down
4 changes: 1 addition & 3 deletions dgraph/cmd/zero/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ func (n *node) handleTabletProposal(tablet *pb.Tablet) error {
// only the first one succeeds.
if prev := n.server.servingTablet(tablet.Predicate); prev != nil {
if tablet.Force {
// TODO: Try and remove this whole Force flag logic.
originalGroup := state.Groups[prev.GroupId]
delete(originalGroup.Tablets, tablet.Predicate)
} else {
Expand All @@ -256,10 +255,9 @@ func (n *node) handleTabletProposal(tablet *pb.Tablet) error {
prev.Predicate, tablet.GroupId, prev.GroupId)
return errTabletAlreadyServed
}
// This update can come from tablet size.
tablet.ReadOnly = prev.ReadOnly
}
}
tablet.Force = false
group.Tablets[tablet.Predicate] = tablet
return nil
}
Expand Down
Loading