Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions go/vt/topo/tablet.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,15 +338,10 @@ func (ts *Server) CreateTablet(ctx context.Context, tablet *topodatapb.Tablet) e
return err
}
tabletPath := path.Join(TabletsPath, topoproto.TabletAliasString(tablet.Alias), TabletFile)
if _, err = conn.Create(ctx, tabletPath, data); err != nil && !IsErrType(err, NodeExists) {
if _, err = conn.Create(ctx, tabletPath, data); err != nil {
return err
}

// Update ShardReplication in any case, to be sure. This is
// meant to fix the case when a Tablet record was created, but
// then the ShardReplication record was not (because for
// instance of a startup timeout). Upon running this code
// again, we want to fix ShardReplication.
if updateErr := UpdateTabletReplicationData(ctx, ts, tablet); updateErr != nil {
return updateErr
}
Expand Down
23 changes: 0 additions & 23 deletions go/vt/topo/topotests/tablet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"github.com/golang/protobuf/proto"
"golang.org/x/net/context"

"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/memorytopo"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
Expand Down Expand Up @@ -61,26 +60,4 @@ func TestCreateTablet(t *testing.T) {
if err != nil || len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) {
t.Fatalf("Created ShardReplication doesn't match: %v %v", sri, err)
}

// Create the same tablet again, make sure it fails with ErrNodeExists.
if err := ts.CreateTablet(ctx, tablet); !topo.IsErrType(err, topo.NodeExists) {
t.Fatalf("CreateTablet(again) returned: %v", err)
}

// Remove the ShardReplication record, try to create the
// tablets again, make sure it's fixed.
if err := topo.RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, alias); err != nil {
t.Fatalf("RemoveShardReplicationRecord failed: %v", err)
}
sri, err = ts.GetShardReplication(ctx, cell, keyspace, shard)
if err != nil || len(sri.Nodes) != 0 {
t.Fatalf("Modifed ShardReplication doesn't match: %v %v", sri, err)
}
if err := ts.CreateTablet(ctx, tablet); !topo.IsErrType(err, topo.NodeExists) {
t.Fatalf("CreateTablet(again and again) returned: %v", err)
}
sri, err = ts.GetShardReplication(ctx, cell, keyspace, shard)
if err != nil || len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, alias) {
t.Fatalf("Created ShardReplication doesn't match: %v %v", sri, err)
}
}
9 changes: 9 additions & 0 deletions go/vt/vttablet/tabletmanager/init_tablet.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,15 @@ func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error {
return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard)
}

// Update ShardReplication in any case, to be sure. This is
// meant to fix the case when a Tablet record was created, but
// then the ShardReplication record was not (because for
// instance of a startup timeout). Upon running this code
// again, we want to fix ShardReplication.
if updateErr := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); updateErr != nil {
return updateErr
}

// Then overwrite everything, ignoring version mismatch.
if err := agent.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, nil)); err != nil {
return fmt.Errorf("UpdateTablet failed: %v", err)
Expand Down
129 changes: 129 additions & 0 deletions go/vt/vttablet/tabletmanager/init_tablet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (

"golang.org/x/net/context"

"github.com/golang/protobuf/proto"

"vitess.io/vitess/go/history"
"vitess.io/vitess/go/vt/dbconfigs"
"vitess.io/vitess/go/vt/mysqlctl/fakemysqldaemon"
Expand All @@ -31,6 +33,133 @@ import (
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)

// Init tablet fixes replication data when safe
func TestInitTabletFixesReplicationData(t *testing.T) {
ctx := context.Background()
cell := "cell1"
ts := memorytopo.NewServer(cell, "cell2")
tabletAlias := &topodatapb.TabletAlias{
Cell: cell,
Uid: 1,
}

// start with a tablet record that doesn't exist
port := int32(1234)
gRPCPort := int32(3456)
mysqlDaemon := fakemysqldaemon.NewFakeMysqlDaemon(nil)
agent := &ActionAgent{
TopoServer: ts,
TabletAlias: tabletAlias,
MysqlDaemon: mysqlDaemon,
DBConfigs: &dbconfigs.DBConfigs{},
BinlogPlayerMap: nil,
batchCtx: ctx,
History: history.New(historyLength),
_healthy: fmt.Errorf("healthcheck not run yet"),
}

// 1. Initialize the tablet as REPLICA.
*tabletHostname = "localhost"
*initKeyspace = "test_keyspace"
*initShard = "-C0"
*initTabletType = "replica"
tabletAlias = &topodatapb.TabletAlias{
Cell: cell,
Uid: 2,
}
agent.TabletAlias = tabletAlias
if err := agent.InitTablet(port, gRPCPort); err != nil {
t.Fatalf("InitTablet(type) failed: %v", err)
}
sri, err := ts.GetShardReplication(ctx, cell, *initKeyspace, "-c0")
if err != nil || len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, tabletAlias) {
t.Fatalf("Created ShardReplication doesn't match: %v %v", sri, err)
}

// Remove the ShardReplication record, try to create the
// tablets again, make sure it's fixed.
if err := topo.RemoveShardReplicationRecord(ctx, ts, cell, *initKeyspace, "-c0", tabletAlias); err != nil {
t.Fatalf("RemoveShardReplicationRecord failed: %v", err)
}
sri, err = ts.GetShardReplication(ctx, cell, *initKeyspace, "-c0")
if err != nil || len(sri.Nodes) != 0 {
t.Fatalf("Modifed ShardReplication doesn't match: %v %v", sri, err)
}

// Initialize the same tablet again, CreateTablet will fail, but it should recreate shard replication data
if err := agent.InitTablet(port, gRPCPort); err != nil {
t.Fatalf("InitTablet(type) failed: %v", err)
}

sri, err = ts.GetShardReplication(ctx, cell, *initKeyspace, "-c0")
if err != nil || len(sri.Nodes) != 1 || !proto.Equal(sri.Nodes[0].TabletAlias, tabletAlias) {
t.Fatalf("Created ShardReplication doesn't match: %v %v", sri, err)
}
}

// This is a test to make sure a regression does not happen in the future.
// There is code in InitTablet that updates replication data if tablet fails
// to be created due to a NodeExists error. During this particular error we were not doing
// the sanity checks that the provided tablet was the same in the topo.
func TestInitTabletDoesNotUpdateReplicationDataForTabletInWrongShard(t *testing.T) {
ctx := context.Background()
ts := memorytopo.NewServer("cell1", "cell2")
tabletAlias := &topodatapb.TabletAlias{
Cell: "cell1",
Uid: 1,
}

// start with a tablet record that doesn't exist
port := int32(1234)
gRPCPort := int32(3456)
mysqlDaemon := fakemysqldaemon.NewFakeMysqlDaemon(nil)
agent := &ActionAgent{
TopoServer: ts,
TabletAlias: tabletAlias,
MysqlDaemon: mysqlDaemon,
DBConfigs: &dbconfigs.DBConfigs{},
BinlogPlayerMap: nil,
batchCtx: ctx,
History: history.New(historyLength),
_healthy: fmt.Errorf("healthcheck not run yet"),
}

// 1. Initialize the tablet as REPLICA.
*tabletHostname = "localhost"
*initKeyspace = "test_keyspace"
*initShard = "-C0"
*initTabletType = "replica"
tabletAlias = &topodatapb.TabletAlias{
Cell: "cell1",
Uid: 2,
}
agent.TabletAlias = tabletAlias
if err := agent.InitTablet(port, gRPCPort); err != nil {
t.Fatalf("InitTablet(type) failed: %v", err)
}
tabletAliases, err := ts.FindAllTabletAliasesInShard(ctx, "test_keyspace", "-c0")
if err != nil {
t.Fatalf("Could not fetch tablet aliases for shard: %v", err)
}

if len(tabletAliases) != 1 {
t.Fatalf("Expected to have only one tablet alias, got: %v", len(tabletAliases))
}
if tabletAliases[0].Uid != 2 {
t.Fatalf("Expected table UID be equal to 2, got: %v", tabletAliases[0].Uid)
}

// Try to initialize a tablet with the same uid in a different shard.
*initShard = "-D0"
if err := agent.InitTablet(port, gRPCPort); err == nil {
t.Fatalf("InitTablet(type) should have failed, got nil")
}

if _, err = ts.FindAllTabletAliasesInShard(ctx, "test_keyspace", "-d0"); err == nil {
t.Fatalf("Tablet shouldn't be added to replication data")
}
}

// TestInitTablet will test the InitTablet code creates / updates the
// tablet node correctly. Note we modify global parameters (the flags)
// so this has to be in one test.
Expand Down