diff --git a/go/cmd/vtgr/main.go b/go/cmd/vtgr/main.go new file mode 100644 index 00000000000..fd5820a78bf --- /dev/null +++ b/go/cmd/vtgr/main.go @@ -0,0 +1,37 @@ +/* +Copyright 2021 The Vitess Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "strings" + + "golang.org/x/net/context" + + "vitess.io/vitess/go/vt/vtgr" +) + +func main() { + clustersToWatch := flag.String("clusters_to_watch", "", "Comma-separated list of keyspaces or keyspace/shards that this instance will monitor and repair. Defaults to all clusters in the topology. Example: \"ks1,ks2/-80\"") + flag.Parse() + + // openTabletDiscovery will open up a connection to topo server + // and populate the tablets in memory + vtgr := vtgr.OpenTabletDiscovery(context.Background(), nil, strings.Split(*clustersToWatch, ",")) + vtgr.RefreshCluster() + vtgr.ScanAndRepair() + + // block here so that we don't exit directly + select {} +} diff --git a/go/mysql/flavor_mysqlgr.go b/go/mysql/flavor_mysqlgr.go new file mode 100644 index 00000000000..e2ebe992fee --- /dev/null +++ b/go/mysql/flavor_mysqlgr.go @@ -0,0 +1,228 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mysql + +import ( + "errors" + "fmt" + "math" + + "vitess.io/vitess/go/vt/proto/vtrpc" + + "vitess.io/vitess/go/vt/vterrors" + + "vitess.io/vitess/go/sqltypes" +) + +// GRFlavorID is the string identifier for the MysqlGR flavor. +const GRFlavorID = "MysqlGR" + +// ErrNoGroupStatus means no status for group replication. +var ErrNoGroupStatus = errors.New("no group status") + +// mysqlGRFlavor implements the Flavor interface for Mysql. +type mysqlGRFlavor struct { + mysqlFlavor +} + +// newMysqlGRFlavor creates a new mysqlGR flavor. +func newMysqlGRFlavor() flavor { + return &mysqlGRFlavor{} +} + +// startReplicationCommand returns the command to start the replication. +// we return empty here since `START GROUP_REPLICATION` should be called by +// the external orchestrator +func (mysqlGRFlavor) startReplicationCommand() string { + return "" +} + +// restartReplicationCommands is disabled in mysqlGRFlavor +func (mysqlGRFlavor) restartReplicationCommands() []string { + return []string{} +} + +// startReplicationUntilAfter is disabled in mysqlGRFlavor +func (mysqlGRFlavor) startReplicationUntilAfter(pos Position) string { + return "" +} + +// stopReplicationCommand returns the command to stop the replication. +// we return empty here since `STOP GROUP_REPLICATION` should be called by +// the external orchestrator +func (mysqlGRFlavor) stopReplicationCommand() string { + return "" +} + +// stopIOThreadCommand is disabled in mysqlGRFlavor +func (mysqlGRFlavor) stopIOThreadCommand() string { + return "" +} + +// resetReplicationCommands is disabled in mysqlGRFlavor +func (mysqlGRFlavor) resetReplicationCommands(c *Conn) []string { + return []string{} +} + +// setReplicationPositionCommands is disabled in mysqlGRFlavor +func (mysqlGRFlavor) setReplicationPositionCommands(pos Position) []string { + return []string{} +} + +// status returns the result of the appropriate status command, +// with parsed replication position. +// +// Note: primary will skip this function, only replica will call it. +// TODO: Right now the GR's lag is defined as the lag between a node processing a txn +// and the time the txn was committed. We should consider reporting lag between current queueing txn timestamp +// from replication_connection_status and the current processing txn's commit timestamp +func (mysqlGRFlavor) status(c *Conn) (ReplicationStatus, error) { + res := ReplicationStatus{} + // Get master node information + query := `SELECT + MEMBER_HOST, + MEMBER_PORT + FROM + performance_schema.replication_group_members + WHERE + MEMBER_ROLE='PRIMARY' AND MEMBER_STATE='ONLINE'` + err := fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { + parsePrimaryGroupMember(&res, values) + return nil + }) + if err != nil { + return ReplicationStatus{}, err + } + + query = `SELECT + MEMBER_STATE + FROM + performance_schema.replication_group_members + WHERE + MEMBER_HOST=convert(@@hostname using ascii) AND MEMBER_PORT=@@port` + var chanel string + err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { + state := values[0].ToString() + if state == "ONLINE" { + chanel = "group_replication_applier" + } else if state == "RECOVERING" { + chanel = "group_replication_recovery" + } else { // OFFLINE, ERROR, UNREACHABLE + // If the member is not in healthy state, use max int as lag + res.SecondsBehindMaster = math.MaxUint32 + } + return nil + }) + if err != nil { + return ReplicationStatus{}, err + } + // if chanel is not set, it means the state is not ONLINE or RECOVERING + // return partial result early + if chanel == "" { + return res, nil + } + + // Populate IOThreadRunning from replication_connection_status + query = fmt.Sprintf(`SELECT SERVICE_STATE + FROM performance_schema.replication_connection_status + WHERE CHANNEL_NAME='%s'`, chanel) + var ioThreadRunning bool + err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { + ioThreadRunning = values[0].ToString() == "ON" + return nil + }) + if err != nil { + return ReplicationStatus{}, err + } + res.IOThreadRunning = ioThreadRunning + // Populate SQLThreadRunning from replication_connection_status + var sqlThreadRunning bool + query = fmt.Sprintf(`SELECT SERVICE_STATE + FROM performance_schema.replication_applier_status_by_coordinator + WHERE CHANNEL_NAME='%s'`, chanel) + err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { + sqlThreadRunning = values[0].ToString() == "ON" + return nil + }) + if err != nil { + return ReplicationStatus{}, err + } + res.SQLThreadRunning = sqlThreadRunning + + // Collect lag information + // we use the difference between the last processed transaction's commit time + // and the end buffer time as the proxy to the lag + query = fmt.Sprintf(`SELECT + TIMESTAMPDIFF(SECOND, LAST_PROCESSED_TRANSACTION_ORIGINAL_COMMIT_TIMESTAMP, LAST_PROCESSED_TRANSACTION_END_BUFFER_TIMESTAMP) + FROM + performance_schema.replication_applier_status_by_coordinator + WHERE + CHANNEL_NAME='%s'`, chanel) + err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { + parseReplicationApplierLag(&res, values) + return nil + }) + if err != nil { + return ReplicationStatus{}, err + } + return res, nil +} + +func parsePrimaryGroupMember(res *ReplicationStatus, row []sqltypes.Value) { + res.MasterHost = row[0].ToString() /* MEMBER_HOST */ + memberPort, _ := row[1].ToInt64() /* MEMBER_PORT */ + res.MasterPort = int(memberPort) +} + +func parseReplicationApplierLag(res *ReplicationStatus, row []sqltypes.Value) { + lagSec, err := row[0].ToInt64() + // if the error is not nil, SecondsBehindMaster will remain to be MaxUint32 + if err == nil { + // Only set where there is no error + // The value can be NULL when there is no replication applied yet + res.SecondsBehindMaster = uint(lagSec) + } +} + +func fetchStatusForGroupReplication(c *Conn, query string, onResult func([]sqltypes.Value) error) error { + qr, err := c.ExecuteFetch(query, 100, true /* wantfields */) + if err != nil { + return err + } + // if group replication related query returns 0 rows, it means the group replication is not set up + if len(qr.Rows) == 0 { + return ErrNoGroupStatus + } + if len(qr.Rows) > 1 { + return vterrors.Errorf(vtrpc.Code_INTERNAL, "unexpected results for %v: %v", query, qr.Rows) + } + return onResult(qr.Rows[0]) +} + +// primarymasterStatusStatus returns the result of 'SHOW MASTER STATUS', +// with parsed executed position. +func (mysqlGRFlavor) primaryStatus(c *Conn) (PrimaryStatus, error) { + return mysqlFlavor{}.primaryStatus(c) +} + +func (mysqlGRFlavor) baseShowTablesWithSizes() string { + return TablesWithSize80 +} + +func init() { + flavors[GRFlavorID] = newMysqlGRFlavor +} diff --git a/go/mysql/flavor_mysqlgr_test.go b/go/mysql/flavor_mysqlgr_test.go new file mode 100644 index 00000000000..22c2e5d2644 --- /dev/null +++ b/go/mysql/flavor_mysqlgr_test.go @@ -0,0 +1,53 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package mysql + +import ( + "testing" + + "gotest.tools/assert" + + "vitess.io/vitess/go/sqltypes" + querypb "vitess.io/vitess/go/vt/proto/query" +) + +func TestMysqlGRParsePrimaryGroupMember(t *testing.T) { + res := ReplicationStatus{} + rows := []sqltypes.Value{ + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("host1")), + sqltypes.MakeTrusted(querypb.Type_INT32, []byte("10")), + } + parsePrimaryGroupMember(&res, rows) + assert.Equal(t, "host1", res.MasterHost) + assert.Equal(t, 10, res.MasterPort) + assert.Equal(t, false, res.IOThreadRunning) + assert.Equal(t, false, res.SQLThreadRunning) +} + +func TestMysqlGRReplicationApplierLagParse(t *testing.T) { + res := ReplicationStatus{} + row := []sqltypes.Value{ + sqltypes.MakeTrusted(querypb.Type_INT32, []byte("NULL")), + } + parseReplicationApplierLag(&res, row) + // strconv.NumError will leave SecondsBehindMaster unset + assert.Equal(t, uint(0), res.SecondsBehindMaster) + row = []sqltypes.Value{ + sqltypes.MakeTrusted(querypb.Type_INT32, []byte("100")), + } + parseReplicationApplierLag(&res, row) + assert.Equal(t, uint(100), res.SecondsBehindMaster) +} diff --git a/go/test/endtoend/cluster/cluster_process.go b/go/test/endtoend/cluster/cluster_process.go index f0e1a9a8380..532792bdc9a 100644 --- a/go/test/endtoend/cluster/cluster_process.go +++ b/go/test/endtoend/cluster/cluster_process.go @@ -116,6 +116,7 @@ type Vttablet struct { MysqlctlProcess MysqlctlProcess MysqlctldProcess MysqlctldProcess VttabletProcess *VttabletProcess + VtgrProcess *VtgrProcess } // Keyspace : Cluster accepts keyspace to launch it @@ -724,6 +725,19 @@ func (cluster *LocalProcessCluster) NewOrcProcess(configFile string) *VtorcProce } } +// NewVtgrProcess creates a new VtgrProcess object +func (cluster *LocalProcessCluster) NewVtgrProcess(clusters []string, config string, grPort int) *VtgrProcess { + base := VtctlProcessInstance(cluster.TopoProcess.Port, cluster.Hostname) + base.Binary = "vtgr" + return &VtgrProcess{ + VtctlProcess: *base, + LogDir: cluster.TmpDirectory, + clusters: clusters, + config: config, + grPort: grPort, + } +} + // VtprocessInstanceFromVttablet creates a new vttablet object func (cluster *LocalProcessCluster) VtprocessInstanceFromVttablet(tablet *Vttablet, shardName string, ksName string) *VttabletProcess { return VttabletProcessInstance(tablet.HTTPPort, diff --git a/go/test/endtoend/cluster/vtgr_process.go b/go/test/endtoend/cluster/vtgr_process.go new file mode 100644 index 00000000000..362ea7cb75a --- /dev/null +++ b/go/test/endtoend/cluster/vtgr_process.go @@ -0,0 +1,104 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "fmt" + "os" + "os/exec" + "path" + "strings" + "syscall" + "time" + + "vitess.io/vitess/go/vt/log" +) + +// VtgrProcess represents the vtgr process +type VtgrProcess struct { + VtctlProcess + LogDir string + ExtraArgs []string + clusters []string + config string + grPort int + proc *exec.Cmd + exit chan error +} + +// Start starts vtgr process with required arguements +func (vtgr *VtgrProcess) Start(alias string) (err error) { + /* minimal command line arguments: + $ vtgr -topo_implementation etcd2 \ + -topo_global_server_address localhost:2379 \ + -topo_global_root /vitess/global \ + -clusters_to_watch ks/0 + */ + vtgr.proc = exec.Command( + vtgr.Binary, + "-topo_implementation", vtgr.TopoImplementation, + "-topo_global_server_address", vtgr.TopoGlobalAddress, + "-topo_global_root", vtgr.TopoGlobalRoot, + "-tablet_manager_protocol", "grpc", + "-scan_repair_timeout", "50s", + "-clusters_to_watch", strings.Join(vtgr.clusters, ","), + ) + if vtgr.config != "" { + vtgr.proc.Args = append(vtgr.proc.Args, fmt.Sprintf("-config=%s", vtgr.config)) + } + if vtgr.grPort != 0 { + vtgr.proc.Args = append(vtgr.proc.Args, fmt.Sprintf("-gr_port=%d", vtgr.grPort)) + } + vtgr.proc.Args = append(vtgr.proc.Args, vtgr.ExtraArgs...) + errFile, _ := os.Create(path.Join(vtgr.LogDir, fmt.Sprintf("vtgr-stderr-%v.txt", alias))) + vtgr.proc.Stderr = errFile + vtgr.proc.Env = append(vtgr.proc.Env, os.Environ()...) + log.Infof("Running vtgr with command: %v", strings.Join(vtgr.proc.Args, " ")) + err = vtgr.proc.Start() + if err != nil { + return + } + + vtgr.exit = make(chan error) + go func() { + if vtgr.proc != nil { + vtgr.exit <- vtgr.proc.Wait() + } + }() + + return nil +} + +// TearDown shuts down the running vtgr service +func (vtgr *VtgrProcess) TearDown() error { + if vtgr.proc == nil || vtgr.exit == nil { + return nil + } + // Attempt graceful shutdown with SIGTERM first + _ = vtgr.proc.Process.Signal(syscall.SIGTERM) + + select { + case <-vtgr.exit: + vtgr.proc = nil + return nil + + case <-time.After(10 * time.Second): + _ = vtgr.proc.Process.Kill() + vtgr.proc = nil + return <-vtgr.exit + } +} diff --git a/go/test/endtoend/cluster/vttablet_process.go b/go/test/endtoend/cluster/vttablet_process.go index 5b5d18a877b..e8f70afcb8b 100644 --- a/go/test/endtoend/cluster/vttablet_process.go +++ b/go/test/endtoend/cluster/vttablet_process.go @@ -71,6 +71,7 @@ type VttabletProcess struct { DbPassword string DbPort int VreplicationTabletType string + DbFlavor string //Extra Args to be set before starting the vttablet process ExtraArgs []string @@ -117,6 +118,9 @@ func (vttablet *VttabletProcess) Setup() (err error) { if vttablet.EnableSemiSync { vttablet.proc.Args = append(vttablet.proc.Args, "-enable_semi_sync") } + if vttablet.DbFlavor != "" { + vttablet.proc.Args = append(vttablet.proc.Args, fmt.Sprintf("-db_flavor=%s", vttablet.DbFlavor)) + } vttablet.proc.Args = append(vttablet.proc.Args, vttablet.ExtraArgs...) fname := path.Join(vttablet.LogDir, vttablet.TabletPath+"-vttablet-stderr.txt") diff --git a/go/test/endtoend/vtgr/my.cnf b/go/test/endtoend/vtgr/my.cnf new file mode 100644 index 00000000000..14185182e5a --- /dev/null +++ b/go/test/endtoend/vtgr/my.cnf @@ -0,0 +1,41 @@ +[mysqld] +innodb_log_file_size=4GB +innodb_flush_neighbors=0 +innodb_log_buffer_size=67108864 +innodb_buffer_pool_size=96GB +innodb_buffer_pool_instances=16 +innodb_io_capacity=100 + +log_error_verbosity=3 + +# binlog appliers +slave_parallel_type=LOGICAL_CLOCK +slave_preserve_commit_order=1 +binlog_transaction_dependency_tracking=WRITESET_SESSION +slave_parallel_workers=32 +sync_relay_log=0 +relay_log_recovery=1 + +plugin-load-add='mysql_clone.so' +plugin-load-add='group_replication.so' + +gtid_mode=ON +enforce_gtid_consistency=ON +log_slave_updates=ON +binlog_format=ROW + +# Group replication +loose_group_replication_start_on_boot=OFF +loose_group_replication_bootstrap_group=OFF +# use auto-rejoin instead of expel timeout so that we can remove the group member +# loose_group_replication_member_expel_timeout=0 +loose_group_replication_autorejoin_tries=3 +loose_group_replication_exit_state_action=OFFLINE_MODE +loose_group_replication_communication_debug_options='GCS_DEBUG_BASIC,XCOM_DEBUG_BASIC' +loose_group-replication-recovery-retry-count=3 +loose-group_replication_ssl_mode = REQUIRED +loose-group_replication_recovery_use_ssl = 1 +loose-group_replication_ip_whitelist = "0.0.0.0/0" + +# Set multi-primary mode +loose-group_replication_single_primary_mode = ON \ No newline at end of file diff --git a/go/test/endtoend/vtgr/test_config.json b/go/test/endtoend/vtgr/test_config.json new file mode 100644 index 00000000000..03cf0e49701 --- /dev/null +++ b/go/test/endtoend/vtgr/test_config.json @@ -0,0 +1,9 @@ +{ + "MySQLTopologyUser": "orc_client_user", + "MySQLTopologyPassword": "orc_client_user_password", + "MySQLReplicaUser": "vt_repl", + "MySQLReplicaPassword": "", + "InstancePollSeconds": 1, + "MySQLConnectTimeoutSeconds": 50, + "MySQLTopologyReadTimeoutSeconds": 50 +} diff --git a/go/test/endtoend/vtgr/vtgr_test.go b/go/test/endtoend/vtgr/vtgr_test.go new file mode 100644 index 00000000000..e239fdffd78 --- /dev/null +++ b/go/test/endtoend/vtgr/vtgr_test.go @@ -0,0 +1,366 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +import ( + "fmt" + "os" + "os/exec" + "path" + "strconv" + "strings" + "testing" + "time" + + "vitess.io/vitess/go/sqltypes" + + "github.com/stretchr/testify/require" + "gotest.tools/assert" + + "vitess.io/vitess/go/json2" + "vitess.io/vitess/go/test/endtoend/cluster" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" +) + +// To run this test locally on MacOS, set hostname to localhost first: +// $ sudo scutil --set HostName localhost + +func createCluster(t *testing.T, numReplicas int) *cluster.LocalProcessCluster { + keyspaceName := "ks" + shardName := "0" + keyspace := &cluster.Keyspace{Name: keyspaceName} + shard0 := &cluster.Shard{Name: shardName} + hostname := "localhost" + cell1 := "zone1" + tablets := []*cluster.Vttablet{} + clusterInstance := cluster.NewCluster(cell1, hostname) + + os.Setenv("EXTRA_MY_CNF", path.Join(os.Getenv("PWD"), "my.cnf")) + + // Start topo server + err := clusterInstance.StartTopo() + require.NoError(t, err) + + uidBase := 100 + for i := 0; i < numReplicas; i++ { + tablet := clusterInstance.NewVttabletInstance("replica", uidBase+i, cell1) + tablets = append(tablets, tablet) + } + + // Initialize Cluster + shard0.Vttablets = tablets + err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard0}) + require.NoError(t, err) + + // Start MySql + var mysqlCtlProcessList []*exec.Cmd + for _, tablet := range shard0.Vttablets { + proc, err := tablet.MysqlctlProcess.StartProcess() + require.NoError(t, err) + mysqlCtlProcessList = append(mysqlCtlProcessList, proc) + } + + // Wait for mysql processes to start + for _, proc := range mysqlCtlProcessList { + err := proc.Wait() + require.NoError(t, err) + } + for _, tablet := range shard0.Vttablets { + // Reset status, don't wait for the tablet status. We will check it later + tablet.VttabletProcess.ServingStatus = "" + tablet.VttabletProcess.DbFlavor = "MysqlGR" + // If we enable backup the GR setup is a bit wacky + tablet.VttabletProcess.SupportsBackup = false + // Start the tablet + err := tablet.VttabletProcess.Setup() + require.NoError(t, err) + } + + // Start vtgr - we deploy vtgr on the tablet node in the test + baseGrPort := 33061 + for i, tablet := range shard0.Vttablets { + tablet.VtgrProcess = clusterInstance.NewVtgrProcess( + []string{fmt.Sprintf("%s/%s", keyspaceName, shardName)}, + path.Join(os.Getenv("PWD"), "test_config.json"), + baseGrPort+i, + ) + } + + for _, tablet := range shard0.Vttablets { + err := tablet.VttabletProcess.WaitForTabletTypes([]string{"NOT_SERVING"}) + require.NoError(t, err) + } + return clusterInstance +} + +func killTablets(t *testing.T, shard *cluster.Shard) { + for _, tablet := range shard.Vttablets { + if tablet.VtgrProcess != nil { + err := tablet.VtgrProcess.TearDown() + require.NoError(t, err) + } + err := tablet.VttabletProcess.TearDown() + require.NoError(t, err) + } +} + +func TestBasicSetup(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := createCluster(t, 2) + keyspace := &clusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + defer func() { + clusterInstance.Teardown() + killTablets(t, shard0) + }() + for _, tablet := range shard0.Vttablets { + // Until there is a primary, all tablets are replica and should all be NOT_SERVING status + tab := getTablet(t, clusterInstance, tablet.Alias) + assert.Equal(t, tab.Type.String(), "REPLICA") + assert.Equal(t, tablet.VttabletProcess.GetTabletStatus(), "NOT_SERVING") + } + _, err := getPrimaryTablet(t, clusterInstance, keyspace.Name, shard0.Name) + assert.ErrorContains(t, err, "timeout looking for primary tablet") + + tablet1 := shard0.Vttablets[0] + query := `select count(*) + from performance_schema.replication_group_members + where MEMBER_STATE='ONLINE'` + var count int + err = getSQLResult(t, tablet1, query, func(values []sqltypes.Value) bool { + cnt, err := values[0].ToInt64() + if err != nil { + return false + } + count = int(cnt) + return true + }) + require.NoError(t, err) + require.NoError(t, err) + // without vtgr, tablet process will not create a mysql group + // and all the nodes are replicas type in NOT_SERVING state + assert.Equal(t, 0, int(count)) +} + +func TestVTGRSetup(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := createCluster(t, 2) + keyspace := &clusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + defer func() { + clusterInstance.Teardown() + killTablets(t, shard0) + }() + for _, tablet := range shard0.Vttablets { + // Until there is a primary, all tablets are replica and should all be NOT_SERVING status + tab := getTablet(t, clusterInstance, tablet.Alias) + assert.Equal(t, tab.Type.String(), "REPLICA") + assert.Equal(t, tablet.VttabletProcess.GetTabletStatus(), "NOT_SERVING") + } + + // start VTGR processes + for _, tablet := range shard0.Vttablets { + err := tablet.VtgrProcess.Start(tablet.Alias) + require.NoError(t, err) + } + + // VTGR will pick one tablet as the primary + primaryAlias, err := getPrimaryTablet(t, clusterInstance, keyspace.Name, shard0.Name) + require.NoError(t, err) + require.NotEqual(t, nil, primaryAlias) + + tablet1 := shard0.Vttablets[0] + query := `select count(*) + from performance_schema.replication_group_members + where MEMBER_STATE='ONLINE'` + err = getSQLResult(t, tablet1, query, func(values []sqltypes.Value) bool { + cnt, err := values[0].ToInt64() + if err != nil { + return false + } + // VTGR should bootstrap the group and put the replica into the group + return cnt == 2 + }) + require.NoError(t, err) +} + +func TestVTGRWrongPrimaryTablet(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := createCluster(t, 2) + keyspace := &clusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + defer func() { + clusterInstance.Teardown() + killTablets(t, shard0) + }() + for _, tablet := range shard0.Vttablets { + // Until there is a primary, all tablets are replica and should all be NOT_SERVING status + tab := getTablet(t, clusterInstance, tablet.Alias) + assert.Equal(t, tab.Type.String(), "REPLICA") + assert.Equal(t, tablet.VttabletProcess.GetTabletStatus(), "NOT_SERVING") + } + // start VTGR processes + for _, tablet := range shard0.Vttablets { + err := tablet.VtgrProcess.Start(tablet.Alias) + require.NoError(t, err) + } + // VTGR will pick one tablet as the primary + primaryAlias, err := getPrimaryTablet(t, clusterInstance, keyspace.Name, shard0.Name) + require.NoError(t, err) + require.NotEqual(t, nil, primaryAlias) + tablet := shard0.Vttablets[0] + query := `select member_id + from performance_schema.replication_group_members + where member_role='SECONDARY' and member_state='ONLINE'` + var member string + err = getSQLResult(t, tablet, query, func(values []sqltypes.Value) bool { + member = values[0].ToString() + return true + }) + require.NoError(t, err) + query = fmt.Sprintf(`select group_replication_set_as_primary('%s')`, member) + _, err = tablet.VttabletProcess.QueryTabletWithDB(query, "") + require.NoError(t, err) + + // Verify the mysql primary changed, and also the primary tablet changed as well + query = fmt.Sprintf(`select member_role from performance_schema.replication_group_members where member_id='%s'`, member) + err = getSQLResult(t, tablet, query, func(values []sqltypes.Value) bool { + return values[0].ToString() == "PRIMARY" + }) + require.NoError(t, err) + err = verifyPrimaryChange(t, clusterInstance, keyspace.Name, shard0.Name, primaryAlias) + require.NoError(t, err) +} + +func TestVTGRFailover(t *testing.T) { + defer cluster.PanicHandler(t) + clusterInstance := createCluster(t, 3) + keyspace := &clusterInstance.Keyspaces[0] + shard0 := &keyspace.Shards[0] + defer func() { + clusterInstance.Teardown() + killTablets(t, shard0) + }() + for _, tablet := range shard0.Vttablets { + // Until there is a primary, all tablets are replica and should all be NOT_SERVING status + tab := getTablet(t, clusterInstance, tablet.Alias) + assert.Equal(t, tab.Type.String(), "REPLICA") + assert.Equal(t, tablet.VttabletProcess.GetTabletStatus(), "NOT_SERVING") + } + // start VTGR processes + for _, tablet := range shard0.Vttablets { + err := tablet.VtgrProcess.Start(tablet.Alias) + require.NoError(t, err) + } + primaryAlias, err := getPrimaryTablet(t, clusterInstance, keyspace.Name, shard0.Name) + require.NoError(t, err) + // VTGR has init the cluster + require.NotEqual(t, "", primaryAlias) + primaryTablet := findTabletByAlias(shard0.Vttablets, primaryAlias) + require.NotNil(t, primaryTablet) + // Wait until there are two nodes in the group + query := `select count(*) from + performance_schema.replication_group_members + where MEMBER_STATE='ONLINE'` + err = getSQLResult(t, primaryTablet, query, func(values []sqltypes.Value) bool { + return values[0].ToString() == "3" + }) + require.NoError(t, err) + + // Now kill the primary + // VTGR should move mysql primary to a different node and change failover primary tablet + err = primaryTablet.VttabletProcess.TearDown() + require.NoError(t, err) + err = verifyPrimaryChange(t, clusterInstance, keyspace.Name, shard0.Name, primaryAlias) + require.NoError(t, err) + // now the primary has changed + primaryAlias, err = getPrimaryTablet(t, clusterInstance, keyspace.Name, shard0.Name) + require.NoError(t, err) + // verify on the _new_ primary node, we are running the mysql primary as well + primaryTablet = findTabletByAlias(shard0.Vttablets, primaryAlias) + require.NotNil(t, primaryTablet) + query = `SELECT count(*) FROM + performance_schema.replication_group_members + WHERE MEMBER_STATE='ONLINE' AND MEMBER_ROLE='PRIMARY' AND MEMBER_PORT=@@port` + err = getSQLResult(t, primaryTablet, query, func(values []sqltypes.Value) bool { + return values[0].ToString() == "1" + }) + require.NoError(t, err) +} + +func getTablet(t *testing.T, cluster *cluster.LocalProcessCluster, alias string) *topodatapb.Tablet { + result, err := cluster.VtctlclientProcess.ExecuteCommandWithOutput("GetTablet", alias) + require.NoError(t, err) + var tabletInfo *topodatapb.Tablet + err = json2.Unmarshal([]byte(result), &tabletInfo) + require.NoError(t, err) + return tabletInfo +} + +func findTabletByAlias(tablets []*cluster.Vttablet, alias *topodatapb.TabletAlias) *cluster.Vttablet { + for _, tablet := range tablets { + if tablet.Cell == alias.Cell && strings.HasSuffix(tablet.Alias, strconv.Itoa(int(alias.Uid))) { + return tablet + } + } + return nil +} + +func verifyPrimaryChange(t *testing.T, cluster *cluster.LocalProcessCluster, ks, shard string, old *topodatapb.TabletAlias) error { + timeToWait := time.Now().Add(180 * time.Second) + for time.Now().Before(timeToWait) { + time.Sleep(1 * time.Second) + result, err := cluster.VtctlclientProcess.ExecuteCommandWithOutput("GetShard", fmt.Sprintf("%s/%s", ks, shard)) + require.NoError(t, err) + var shardInfo topodatapb.Shard + err = json2.Unmarshal([]byte(result), &shardInfo) + require.NoError(t, err) + if shardInfo.MasterAlias.String() != old.String() { + return nil + } + } + return fmt.Errorf("fail to verify primary change") +} + +func getPrimaryTablet(t *testing.T, cluster *cluster.LocalProcessCluster, ks, shard string) (*topodatapb.TabletAlias, error) { + timeToWait := time.Now().Add(180 * time.Second) + for time.Now().Before(timeToWait) { + time.Sleep(1 * time.Second) + result, err := cluster.VtctlclientProcess.ExecuteCommandWithOutput("GetShard", fmt.Sprintf("%s/%s", ks, shard)) + require.NoError(t, err) + var shardInfo topodatapb.Shard + err = json2.Unmarshal([]byte(result), &shardInfo) + require.NoError(t, err) + if shardInfo.MasterAlias != nil { + return shardInfo.MasterAlias, nil + } + } + return nil, fmt.Errorf("timeout looking for primary tablet") +} + +func getSQLResult(t *testing.T, tablet *cluster.Vttablet, query string, check func([]sqltypes.Value) bool) error { + timeToWait := time.Now().Add(180 * time.Second) + for time.Now().Before(timeToWait) { + time.Sleep(1 * time.Second) + qr, err := tablet.VttabletProcess.QueryTabletWithDB(query, "") + require.NoError(t, err) + if len(qr.Rows) == 1 && check(qr.Rows[0]) { + return nil + } + } + return fmt.Errorf("timeout waiting for sql result") +} diff --git a/go/vt/orchestrator/inst/instance_dao.go b/go/vt/orchestrator/inst/instance_dao.go index d972b6c62df..4fc9c363f48 100644 --- a/go/vt/orchestrator/inst/instance_dao.go +++ b/go/vt/orchestrator/inst/instance_dao.go @@ -76,10 +76,11 @@ const ( GroupReplicationMemberRolePrimary = "PRIMARY" GroupReplicationMemberRoleSecondary = "SECONDARY" // Group member states - GroupReplicationMemberStateOnline = "ONLINE" - GroupReplicationMemberStateRecovering = "RECOVERING" - GroupReplicationMemberStateOffline = "OFFLINE" - GroupReplicationMemberStateError = "ERROR" + GroupReplicationMemberStateOnline = "ONLINE" + GroupReplicationMemberStateRecovering = "RECOVERING" + GroupReplicationMemberStateUnreachable = "UNREACHABLE" + GroupReplicationMemberStateOffline = "OFFLINE" + GroupReplicationMemberStateError = "ERROR" ) // We use this map to identify whether the query failed because the server does not support group replication or due diff --git a/go/vt/vtgr/config/vtgr_config.go b/go/vt/vtgr/config/vtgr_config.go new file mode 100644 index 00000000000..7c7dca177f0 --- /dev/null +++ b/go/vt/vtgr/config/vtgr_config.go @@ -0,0 +1,58 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "encoding/json" + "os" +) + +// VTGRConfig is the config for VTGR +type VTGRConfig struct { + DisableReadOnlyProtection bool + GroupSize int + MinNumReplica int + BackoffErrorWaitTimeSeconds int + BootstrapWaitTimeSeconds int +} + +var vtgrCfg = newVTGRConfig() + +func newVTGRConfig() *VTGRConfig { + config := &VTGRConfig{ + DisableReadOnlyProtection: false, + GroupSize: 5, + MinNumReplica: 3, + BackoffErrorWaitTimeSeconds: 10, + BootstrapWaitTimeSeconds: 10 * 60, + } + return config +} + +// ReadVTGRConfig reads config for VTGR +func ReadVTGRConfig(file string) (*VTGRConfig, error) { + vtgrFile, err := os.Open(file) + if err != nil { + return nil, err + } + decoder := json.NewDecoder(vtgrFile) + err = decoder.Decode(vtgrCfg) + if err != nil { + return nil, err + } + return vtgrCfg, nil +} diff --git a/go/vt/vtgr/config/vtgr_config.json b/go/vt/vtgr/config/vtgr_config.json new file mode 100644 index 00000000000..a14412afafd --- /dev/null +++ b/go/vt/vtgr/config/vtgr_config.json @@ -0,0 +1,4 @@ +{ + "BackoffErrorWaitTimeSeconds": 5, + "GroupSize": 3 +} \ No newline at end of file diff --git a/go/vt/vtgr/config/vtgr_config_test.go b/go/vt/vtgr/config/vtgr_config_test.go new file mode 100644 index 00000000000..631b421ebe4 --- /dev/null +++ b/go/vt/vtgr/config/vtgr_config_test.go @@ -0,0 +1,37 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReadConfig(t *testing.T) { + path, _ := os.Getwd() + config, err := ReadVTGRConfig(filepath.Join(path, "vtgr_config.json")) + assert.NoError(t, err) + // Make sure VTGR config honors the default setting + assert.Equal(t, false, config.DisableReadOnlyProtection) + assert.Equal(t, 600, config.BootstrapWaitTimeSeconds) + // Make sure the config is load correctly + assert.Equal(t, 3, config.GroupSize) + assert.Equal(t, 5, config.BackoffErrorWaitTimeSeconds) +} diff --git a/go/vt/vtgr/controller/controller.go b/go/vt/vtgr/controller/controller.go new file mode 100644 index 00000000000..2b2c36cd320 --- /dev/null +++ b/go/vt/vtgr/controller/controller.go @@ -0,0 +1,26 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "math/rand" + "time" +) + +func init() { + rand.Seed(time.Now().UnixNano()) +} diff --git a/go/vt/vtgr/controller/diagnose.go b/go/vt/vtgr/controller/diagnose.go new file mode 100644 index 00000000000..fa092b69f0b --- /dev/null +++ b/go/vt/vtgr/controller/diagnose.go @@ -0,0 +1,574 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "errors" + "flag" + "fmt" + "math/rand" + "os" + "sort" + "strings" + "sync" + "time" + + "golang.org/x/net/context" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/vt/concurrency" + "vitess.io/vitess/go/vt/log" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgr/db" +) + +var ( + pingTabletTimeout = flag.Duration("ping_tablet_timeout", 2*time.Second, "time to wait when we ping a tablet") +) + +// DiagnoseType is the types of Diagnose result +type DiagnoseType string + +type instanceGTIDSet struct { + gtids mysql.GTIDSet + instance *grInstance +} + +// groupGTIDRecorder is used to help us query all the instance in parallel and record the result +// it helps us to take care of the consistency / synchronization among go routines +type groupGTIDRecorder struct { + name string + gtidWithInstances []*instanceGTIDSet + hasActive bool + sync.Mutex +} + +const ( + // DiagnoseTypeError represents an DiagnoseTypeError status + DiagnoseTypeError DiagnoseType = "error" + // DiagnoseTypeHealthy represents everything is DiagnoseTypeHealthy + DiagnoseTypeHealthy = "Healthy" + // DiagnoseTypeShardHasNoGroup represents the cluster has not init yet + DiagnoseTypeShardHasNoGroup = "ShardHasNoGroup" + // DiagnoseTypeShardHasInactiveGroup represents the status where we have a group name but no member in it + DiagnoseTypeShardHasInactiveGroup = "ShardHasInactiveGroup" + // DiagnoseTypeInsufficientGroupSize represents the cluster has insufficient group members + DiagnoseTypeInsufficientGroupSize = "InsufficientGroupSize" + // DiagnoseTypeReadOnlyShard represents the cluster who has a read only node + DiagnoseTypeReadOnlyShard = "ReadOnlyShard" + // DiagnoseTypeUnreachablePrimary represents the primary tablet is unreachable + DiagnoseTypeUnreachablePrimary = "UnreachablePrimary" + // DiagnoseTypeWrongPrimaryTablet represents the primary tablet is incorrect based on mysql group + DiagnoseTypeWrongPrimaryTablet = "WrongPrimaryTablet" + // DiagnoseTypeUnconnectedReplica represents cluster with primary tablet, but a node is not connected to it + DiagnoseTypeUnconnectedReplica = "UnconnectedReplica" + // DiagnoseTypeBackoffError represents a transient error e.g., the primary is unreachable + DiagnoseTypeBackoffError = "BackoffError" + // DiagnoseTypeBootstrapBackoff represents an ongoing bootstrap + DiagnoseTypeBootstrapBackoff = "BootstrapBackoff" + + // diagnoseTypeUnknown represents a unclear intermediate diagnose state + diagnoseTypeUnknown = "Unknown" +) + +// ScanAndRepairShard scans a particular shard by first Diagnose the shard with info from grShard +// and then repair the probelm if the shard is unhealthy +func (shard *GRShard) ScanAndRepairShard(ctx context.Context) { + status, err := shard.Diagnose(ctx) + if err != nil { + log.Errorf("fail to scanAndRepairShard %v/%v because of Diagnose error: %v", shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard, err) + return + } + // We are able to get Diagnose without error + // + // Note: all the recovery function should first try to grab a shard level lock + // and check the trigger conditions before doing anything. This is to avoid + // other VTGR instance try to do the same thing + log.Infof("%v status is %v", formatKeyspaceShard(shard.KeyspaceShard), status) + if _, err := shard.Repair(ctx, status); err != nil { + log.Errorf("failed to repair %v: %v", status, err) + } +} + +// Diagnose the shard in the following order: +// TODO: use FSM to make sure the status transition is correct +// 1. if the shard has a group that every node agreed on +// 2. if the group has any active (online / recovering) member +// 3. if the shard has initialized a Vitess primary +// 4. if primary tablet is reachable +// 5. if Vitess primary and mysql primary reconciled +// 6. if we have enough group members +// 7. if the primary node has read_only=OFF +// 8. if there is a node that is not in Mysql group +func (shard *GRShard) Diagnose(ctx context.Context) (DiagnoseType, error) { + shard.Lock() + defer shard.Unlock() + diagnoseResult, err := shard.diagnoseLocked(ctx) + shard.shardStatusCollector.recordDiagnoseResult(diagnoseResult) + shard.populateVTGRStatusLocked() + if diagnoseResult != DiagnoseTypeHealthy { + log.Warningf(`VTGR diagnose shard as unhealthy for %s/%s: result=%v | last_result=%v | instances=%v | primary=%v | primary_tablet=%v | problematics=%v | unreachables=%v | SQL group=%v`, + shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard, + shard.shardStatusCollector.status.DiagnoseResult, + shard.lastDiagnoseResult, + shard.shardStatusCollector.status.Instances, + shard.shardStatusCollector.status.Primary, + shard.primaryTabletAlias(), + shard.shardStatusCollector.status.Problematics, + shard.shardStatusCollector.status.Unreachables, + shard.sqlGroup.ToString()) + } + if diagnoseResult != shard.lastDiagnoseResult { + shard.lastDiagnoseResult = diagnoseResult + shard.lastDiagnoseSince = time.Now() + } + return diagnoseResult, err +} + +func (shard *GRShard) diagnoseLocked(ctx context.Context) (DiagnoseType, error) { + if shard.localDbPort != 0 { + localView := shard.getLocalView() + if localView != nil { + fastDiagnose := shard.fastPathDiagnose(ctx, localView) + if fastDiagnose != diagnoseTypeUnknown { + // If we can use local sql group info to diagnose + // we should record the view as well. This view is all we need + // later VTGR needs to find group name, primary etc from + // SQLGroup for repairing instead of getting nil + shard.sqlGroup.overrideView([]*db.GroupView{localView}) + log.Infof("Diagnose %v from fast path", fastDiagnose) + return fastDiagnose, nil + } + } + } + // fast path is disabled or cannot diagnose the shard + // fall back to the normal strategy where we fetch info from all the nodes + err := shard.refreshSQLGroup() + if err != nil { + if errors.Is(err, db.ErrGroupBackoffError) { + return DiagnoseTypeBackoffError, nil + } + if errors.Is(err, db.ErrGroupOngoingBootstrap) { + return DiagnoseTypeBootstrapBackoff, nil + } + return DiagnoseTypeError, vterrors.Wrap(err, "fail to refreshSQLGroup") + } + // First, we check if there is any group in the shard + // if no, we should bootstrap one + mysqlGroup := shard.shardAgreedGroupName() + if mysqlGroup == "" { + return DiagnoseTypeShardHasNoGroup, nil + } + // We handle the case where the shard has an agreed group name but all nodes are offline + // In this situation, instead of bootstrap a group, we should re-build the + // old group for the shard + if shard.isAllOfflineOrError() { + log.Info("Found all members are OFFLINE or ERROR") + return DiagnoseTypeShardHasInactiveGroup, nil + } + + // Secondly, we check if there is a primary tablet. + // If there is a group but we cannot find a primary tablet + // we should set it based on mysql group + hasWrongPrimary, err := shard.hasWrongPrimaryTablet(ctx) + if err != nil { + // errMissingGroup means we cannot find a mysql group for the shard + // we are in DiagnoseTypeShardHasNoGroup state + if err == errMissingGroup { + log.Warning("Missing mysql group") + return DiagnoseTypeShardHasNoGroup, nil + } + // errMissingPrimaryTablet means we cannot find a tablet based on mysql primary + // which means the tablet disconnected from topo server and we cannot find it + if err == errMissingPrimaryTablet { + return DiagnoseTypeUnreachablePrimary, nil + } + return DiagnoseTypeError, vterrors.Wrap(err, "fail to diagnose shardNeedsInitialized") + } + if hasWrongPrimary { + return DiagnoseTypeWrongPrimaryTablet, nil + } + + // Thirdly, we check if primary tablet is reachable + isPrimaryReachable, err := shard.isPrimaryReachable(ctx) + if err != nil { + return DiagnoseTypeError, vterrors.Wrap(err, "fail to diagnose isPrimaryReachable") + } + if !isPrimaryReachable { + return DiagnoseTypeUnreachablePrimary, nil + } + + // Then we check if we satisfy the minimum replica requirement + if shard.minNumReplicas > 0 { + // At this point, the primary tablet should be consistent with mysql primary + // so the view from priamry tablet should be accurate + onlineMembers, isReadOnly := shard.getOnlineGroupInfo() + if onlineMembers >= shard.minNumReplicas && isReadOnly { + return DiagnoseTypeReadOnlyShard, nil + } + // If we disable readonly protection and still found we have a read only shard, + // we should return DiagnoseTypeReadOnlyShard so that VTGR can turn off read only + if shard.disableReadOnlyProtection && isReadOnly { + return DiagnoseTypeReadOnlyShard, nil + } + if !shard.disableReadOnlyProtection && onlineMembers < shard.minNumReplicas && !isReadOnly { + return DiagnoseTypeInsufficientGroupSize, nil + } + } + + // Lastly, we check if there is a replica that is not connected to primary node + disconnectedInstance, err := shard.disconnectedInstance() + if err != nil { + return DiagnoseTypeError, vterrors.Wrap(err, "fail to diagnose disconnectedInstance") + } + if disconnectedInstance != nil { + return DiagnoseTypeUnconnectedReplica, nil + } + + // If we get here, shard is DiagnoseTypeHealthy + return DiagnoseTypeHealthy, nil +} + +func (shard *GRShard) getLocalView() *db.GroupView { + localHostname, _ := os.Hostname() + localInst := shard.findTabletByHostAndPort(localHostname, shard.localDbPort) + if localInst == nil { + return nil + } + // TODO: consider using -db_socket to read local info + view, err := shard.dbAgent.FetchGroupView(localInst.alias, localInst.instanceKey) + // We still have the fallback logic if this failed, therefore we don't raise error + // but try to get local view with best effort + if err != nil { + log.Errorf("failed to fetch local group view: %v", err) + } + return view +} + +func (shard *GRShard) fastPathDiagnose(ctx context.Context, view *db.GroupView) DiagnoseType { + pHost, pPort, isOnline := view.GetPrimaryView() + primaryTablet := shard.findShardPrimaryTablet() + if !isOnline || pHost == "" || pPort == 0 || primaryTablet == nil { + return diagnoseTypeUnknown + } + // VTGR will only bootstrap a group when it observes same number of views as group_size + // it means if we can find an ONLINE primary, we should be able to trust the view reported locally + // together with the primary tablet from topo server, we can determine: + // - if we need to failover vitess + // - if we need to failover mysql + if primaryTablet.instanceKey.Hostname != pHost || primaryTablet.instanceKey.Port != pPort { + // we find a mismatch but if the reported mysql primary is not in + // topology we should consider it as unreachable. + if shard.findTabletByHostAndPort(pHost, pPort) == nil { + return DiagnoseTypeUnreachablePrimary + } + return DiagnoseTypeWrongPrimaryTablet + } + if !shard.instanceReachable(ctx, primaryTablet) { + return DiagnoseTypeUnreachablePrimary + } + return diagnoseTypeUnknown +} + +func (shard *GRShard) shardAgreedGroupName() string { + if len(shard.instances) == 0 { + return "" + } + return shard.sqlGroup.GetGroupName() +} + +func (shard *GRShard) isAllOfflineOrError() bool { + return shard.sqlGroup.IsAllOfflineOrError() +} + +func (shard *GRShard) getOnlineGroupInfo() (int, bool) { + return shard.sqlGroup.GetOnlineGroupInfo() +} + +func (shard *GRShard) hasWrongPrimaryTablet(ctx context.Context) (bool, error) { + // Find out the hostname and port of the primary in mysql group + // we try to use local instance and then fallback to a random instance to check mysqld + // in case the primary is unreachable + host, port, _ := shard.sqlGroup.GetPrimary() + if !isHostPortValid(host, port) { + log.Warningf("Invalid address for primary %v:%v", host, port) + return false, errMissingGroup + } + // Make sure we have a tablet available + // findTabletByHostAndPort returns nil when we cannot find a tablet + // that is running on host:port, which means the tablet get stuck + // or when the tablet is not reachable + // we retrun errMissingPrimaryTablet so that VTGR will trigger a failover + tablet := shard.findTabletByHostAndPort(host, port) + if tablet == nil || !shard.instanceReachable(ctx, tablet) { + log.Errorf("Failed to find tablet that is running with mysql on %v:%v", host, port) + return false, errMissingPrimaryTablet + } + // Now we know we have a valid mysql primary in the group + // we should make sure tablets are aligned with it + primary := shard.findShardPrimaryTablet() + // If we failed to find primary for shard, it mostly means we are initializing the shard + // return true directly so that VTGR will set primary tablet according to MySQL group + if primary == nil { + log.Infof("unable to find primary tablet for %v", formatKeyspaceShard(shard.KeyspaceShard)) + return true, nil + } + return (host != primary.instanceKey.Hostname) || (port != primary.instanceKey.Port), nil +} + +func (shard *GRShard) isPrimaryReachable(ctx context.Context) (bool, error) { + primaryTablet := shard.findShardPrimaryTablet() + if primaryTablet == nil { + return false, fmt.Errorf("unable to find primary for %v", formatKeyspaceShard(shard.KeyspaceShard)) + } + return shard.instanceReachable(ctx, primaryTablet), nil +} + +func (shard *GRShard) instanceReachable(ctx context.Context, instance *grInstance) bool { + pingCtx, cancel := context.WithTimeout(context.Background(), *pingTabletTimeout) + defer cancel() + c := make(chan error, 1) + // tmc.Ping create grpc client connection first without timeout via dial + // then call the grpc endpoint using the context with timeout + // this is problematic if the host is really unreachable, we have to wait the + // all the retries inside grpc.dial with exponential backoff + go func() { c <- shard.tmc.Ping(pingCtx, instance.tablet) }() + select { + case <-pingCtx.Done(): + log.Errorf("Ping abort timeout %v", *pingTabletTimeout) + return false + case err := <-c: + if err != nil { + log.Errorf("Ping error host=%v: %v", instance.instanceKey.Hostname, err) + } + return err == nil + } +} + +// findShardPrimaryTablet iterates through the replicas stored in grShard and returns +// the one that's marked as primary +func (shard *GRShard) findShardPrimaryTablet() *grInstance { + var latestMasterTimestamp time.Time + var primaryInstance *grInstance + foundPrimary := false + for _, instance := range shard.instances { + if instance.tablet.Type == topodatapb.TabletType_MASTER { + foundPrimary = true + // It is possible that there are more than one master in topo server + // we should compare timestamp to pick the latest one + if latestMasterTimestamp.Before(instance.masterTimeStamp) { + latestMasterTimestamp = instance.masterTimeStamp + primaryInstance = instance + } + } + } + if !foundPrimary { + return nil + } + return primaryInstance +} + +func (shard *GRShard) primaryTabletAlias() string { + primary := shard.findShardPrimaryTablet() + if primary == nil { + return "UNKNOWN" + } + return primary.alias +} + +// disconnectedInstance iterates all known the replica records +// and checks mysql to see if the group replication is setup on it +func (shard *GRShard) disconnectedInstance() (*grInstance, error) { + primaryInstance := shard.findShardPrimaryTablet() + // if there is no primary, we should recover from DiagnoseTypeWrongPrimaryTablet + if primaryInstance == nil { + return nil, fmt.Errorf("%v does not have primary", formatKeyspaceShard(shard.KeyspaceShard)) + } + // Up to this check, we know: + // - shard has an agreed group + // - shard has a primary tablet + // - shard primary tablet is running on the same node as mysql + rand.Shuffle(len(shard.instances), func(i, j int) { + shard.instances[i], shard.instances[j] = shard.instances[j], shard.instances[i] + }) + for _, instance := range shard.instances { + // Skip master because VTGR always join group and then update tablet type + // which means if a tablet has type master then it should have a group already + if instance.tablet.Type == topodatapb.TabletType_MASTER { + continue + } + // Skip instance without hostname because they are not up and running + // also skip instances that raised unrecoverable errors + if shard.shardStatusCollector.isUnreachable(instance) { + log.Infof("Skip %v to check disconnectedInstance because it is unhealthy", instance.alias) + continue + } + isUnconnected := shard.sqlGroup.IsUnconnectedReplica(instance.instanceKey) + if isUnconnected { + return instance, nil + } + } + return nil, nil +} + +func (recorder *groupGTIDRecorder) recordGroupStatus(name string, isActive bool) error { + recorder.Lock() + defer recorder.Unlock() + if recorder.name != "" && recorder.name != name { + return fmt.Errorf("group has more than one group name") + } + recorder.name = name + // hasActive records true if any node finds an active member + if isActive { + recorder.hasActive = true + } + return nil +} + +func (recorder *groupGTIDRecorder) recordGroupGTIDs(gtids mysql.GTIDSet, instance *grInstance) { + recorder.Lock() + defer recorder.Unlock() + recorder.gtidWithInstances = append(recorder.gtidWithInstances, &instanceGTIDSet{gtids: gtids, instance: instance}) +} + +func (recorder *groupGTIDRecorder) sort() { + sort.SliceStable(recorder.gtidWithInstances, func(i, j int) bool { + return recorder.gtidWithInstances[i].instance.alias < recorder.gtidWithInstances[j].instance.alias + }) +} + +func (collector *shardStatusCollector) recordDiagnoseResult(result DiagnoseType) { + collector.Lock() + defer collector.Unlock() + collector.status.DiagnoseResult = result +} + +func (collector *shardStatusCollector) recordUnreachables(instance *grInstance) { + collector.Lock() + defer collector.Unlock() + // dedup + // the list size is at most same as number instances in a shard so iterate to dedup is not terrible + for _, alias := range collector.status.Unreachables { + if alias == instance.alias { + return + } + } + collector.status.Unreachables = append(collector.status.Unreachables, instance.alias) +} + +func (collector *shardStatusCollector) clear() { + collector.Lock() + defer collector.Unlock() + collector.status.Unreachables = nil + collector.status.Problematics = nil +} + +func (collector *shardStatusCollector) recordProblematics(instance *grInstance) { + collector.Lock() + defer collector.Unlock() + // dedup + // the list size is at most same as number instances in a shard so iterate to dedup is not terrible + for _, alias := range collector.status.Problematics { + if alias == instance.alias { + return + } + } + collector.status.Problematics = append(collector.status.Problematics, instance.alias) +} + +func formatKeyspaceShard(keyspaceShard *topo.KeyspaceShard) string { + return fmt.Sprintf("%v/%v", keyspaceShard.Keyspace, keyspaceShard.Shard) +} + +func isHostPortValid(host string, port int) bool { + return host != "" && port != 0 +} + +// We use forAllInstances in two cases: +// 1. FetchGroupView GTIDs to find a candidate for failover. +// If a node is not healthy it should not be considered as a failover candidate +// +// 2. FetchGroupView group member status to see if we need to bootstrap a group, +// either for the first time or rebuild a group after all the nodes are died. +// +// caller will be responsible to decide if they want to tolerate errors from the forAllInstances call +func (shard *GRShard) forAllInstances(task func(instance *grInstance, wg *sync.WaitGroup, er concurrency.ErrorRecorder)) *concurrency.AllErrorRecorder { + errorRecord := concurrency.AllErrorRecorder{} + shard.shardStatusCollector.clear() + var wg sync.WaitGroup + for _, instance := range shard.instances { + wg.Add(1) + go task(instance, &wg, &errorRecord) + } + wg.Wait() + if len(errorRecord.Errors) > 0 { + log.Errorf("get errors in forAllInstances call: %v", errorRecord.Error()) + } + return &errorRecord +} + +func unreachableError(err error) bool { + contains := []string{ + // "no such host"/"no route to host" is the error when a host is not reachalbe + "no such host", + "no route to host", + // "connect: connection refused" is the error when a mysqld refused the connection + "connect: connection refused", + // "invalid mysql instance key" is the error when a tablet does not populate mysql hostname or port + // this can happen if the tablet crashed. We keep them in the grShard.instances list to compute + // quorum but consider it as an unreachable host. + "invalid mysql instance key", + } + for _, k := range contains { + if strings.Contains(err.Error(), k) { + return true + } + } + return false +} + +// refreshSQLGroup hits all instances and renders a SQL group locally for later diagnoses +// the SQL group contains a list of "views" for the group from all the available nodes +func (shard *GRShard) refreshSQLGroup() error { + // reset views in sql group + shard.sqlGroup.clear() + er := shard.forAllInstances(func(instance *grInstance, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { + defer wg.Done() + view, err := shard.dbAgent.FetchGroupView(instance.alias, instance.instanceKey) + // We just log error here because we rely on mysql tells us if it is happy or not + // If the node is unreachable + if err != nil { + er.RecordError(err) + shard.shardStatusCollector.recordProblematics(instance) + if unreachableError(err) { + shard.shardStatusCollector.recordUnreachables(instance) + } + log.Errorf("%v get error while fetch group info: %v", instance.alias, err) + return + } + shard.sqlGroup.recordView(view) + }) + // Only raise error if we failed to get any data from mysql + // otherwise, we will use what we get from mysql directly + if len(er.Errors) == len(shard.instances) { + log.Errorf("fail to fetch any data for mysql") + return db.ErrGroupBackoffError + } + return shard.sqlGroup.Resolve() +} diff --git a/go/vt/vtgr/controller/diagnose_test.go b/go/vt/vtgr/controller/diagnose_test.go new file mode 100644 index 00000000000..011dffb470e --- /dev/null +++ b/go/vt/vtgr/controller/diagnose_test.go @@ -0,0 +1,610 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "errors" + "fmt" + "os" + "strconv" + "strings" + "testing" + "time" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/vt/orchestrator/inst" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/memorytopo" + "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil" + "vitess.io/vitess/go/vt/vtgr/config" + "vitess.io/vitess/go/vt/vtgr/db" +) + +const diagnoseGroupSize = 2 + +var ( + testHost, _ = os.Hostname() + alias0 = "test_cell-0000000000" + alias1 = "test_cell-0000000001" + alias2 = "test_cell-0000000002" + testPort0 = 17000 + testPort1 = 17001 + testPort2 = 17002 +) + +type testGroupInput struct { + groupName string + readOnly bool + groupState []db.TestGroupState + gtid mysql.GTIDSet +} + +func TestShardIsHealthy(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablet1 := buildTabletInfo(uint32(testPort0), testHost, testPort0, topodatapb.TabletType_MASTER, time.Now()) + tablet2 := buildTabletInfo(uint32(testPort1), testHost, testPort1, topodatapb.TabletType_SPARE, time.Time{}) + tablet3 := buildTabletInfo(uint32(testPort2), testHost, testPort2, topodatapb.TabletType_REPLICA, time.Time{}) + testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil) + dbAgent. + EXPECT(). + FetchGroupView(gomock.Any(), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + return db.BuildGroupView(alias, "group", testHost, testPort0, false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }), nil + }). + AnyTimes() + tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + cfg := &config.VTGRConfig{GroupSize: 3, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + shard.refreshTabletsInShardLocked(ctx) + diagnose, _ := shard.Diagnose(ctx) + assert.Equal(t, DiagnoseTypeHealthy, string(diagnose)) +} + +func TestTabletIssueDiagnoses(t *testing.T) { + type data struct { + pingable bool + ttype topodatapb.TabletType + } + var tablettests = []struct { + name string + expected DiagnoseType + errMessage string + inputs []data + }{ + {name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ + {true, topodatapb.TabletType_MASTER}, + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + }}, + {name: "non primary tablet is not pingable", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ // vtgr should do nothing + {true, topodatapb.TabletType_MASTER}, + {false, topodatapb.TabletType_REPLICA}, + {false, topodatapb.TabletType_REPLICA}, + }}, + {name: "primary tablet is not pingable", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should trigger a failover + {false, topodatapb.TabletType_MASTER}, + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + }}, + {name: "no primary tablet", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should create one based on mysql + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + }}, + {name: "mysql and vttablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should fix vttablet + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_MASTER}, + {true, topodatapb.TabletType_REPLICA}, + }}, + {name: "unreachable wrong vttablet primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should fix vttablet + {true, topodatapb.TabletType_REPLICA}, + {false, topodatapb.TabletType_MASTER}, + {true, topodatapb.TabletType_REPLICA}, + }}, + {name: "unreachable uninitialized primary vttablet", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover + {false, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + {true, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range tablettests { + t.Run(tt.name, func(t *testing.T) { + expected := tt.expected + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ts := NewMockGRTopo(ctrl) + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + for i, input := range tt.inputs { + id := uint32(testPort0 + i) + tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now()) + tablets[fmt.Sprintf("cell-%d", id)] = tablet + var response = struct { + pingable bool + }{input.pingable} + dbAgent. + EXPECT(). + FetchGroupView(gomock.Any(), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + return db.BuildGroupView(alias, "group", testHost, testPort0, false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }), nil + }). + AnyTimes() + tmc. + EXPECT(). + Ping(gomock.Any(), &topodatapb.Tablet{ + Alias: tablet.Alias, + Hostname: tablet.Hostname, + Keyspace: tablet.Keyspace, + Shard: tablet.Shard, + Type: tablet.Type, + Tags: tablet.Tags, + MysqlHostname: tablet.MysqlHostname, + MysqlPort: tablet.MysqlPort, + MasterTermStartTime: tablet.MasterTermStartTime, + }). + DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { + if !response.pingable { + return errors.New("unreachable") + } + return nil + }). + AnyTimes() + } + ts. + EXPECT(). + GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()). + Return(tablets, nil) + + ctx := context.Background() + cfg := &config.VTGRConfig{GroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + shard.refreshTabletsInShardLocked(ctx) + diagnose, err := shard.Diagnose(ctx) + assert.Equal(t, expected, diagnose) + if tt.errMessage == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error()) + } + }) + } +} + +func TestMysqlIssueDiagnoses(t *testing.T) { + cfg := &config.VTGRConfig{GroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + disableProtectionCfg := &config.VTGRConfig{GroupSize: diagnoseGroupSize, MinNumReplica: 2, DisableReadOnlyProtection: true, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + type data struct { + alias string + groupName string + readOnly bool + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + var sqltests = []struct { + name string + expected DiagnoseType + errMessage string + config *config.VTGRConfig + inputs []data + removeTablets []string // to simulate missing tablet in topology + }{ + {name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "recovering primary shard", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "no group in shard", expected: DiagnoseTypeShardHasNoGroup, errMessage: "", inputs: []data{ + {alias0, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias1, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "unreachable node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "mysql and tablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should failover vttablet + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "mysql primary out of topology", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover mysql + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }, removeTablets: []string{alias0}}, + {name: "one error node", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "two error node", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "insufficient group member", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "unconnected node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "unreachable primary", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "more than one group name", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group_xxx", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "different primary", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "insufficient members in group", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + // the shard has insufficient member, but the primary is already read_only + // we should try to connect the replica node + {name: "insufficient members in read only shard", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "insufficient members in group with disable read only protection", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", config: disableProtectionCfg, inputs: []data{ + {alias0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "read only with disable read only protection", expected: DiagnoseTypeReadOnlyShard, errMessage: "", config: disableProtectionCfg, inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "read only healthy shard", expected: DiagnoseTypeReadOnlyShard, errMessage: "", inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "network partition", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {name: "start bootstrap in progress", expected: DiagnoseTypeBootstrapBackoff, errMessage: "", inputs: []data{ + {alias0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, "", true, []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, + {alias2, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range sqltests { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ts := NewMockGRTopo(ctrl) + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + expected := tt.expected + inputMap := make(map[string]testGroupInput) + if tt.config == nil { + tt.config = cfg + } + conf := tt.config + for i, input := range tt.inputs { + id := uint32(testPort0 + i) + tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now()) + tablets[input.alias] = tablet + inputMap[input.alias] = testGroupInput{ + input.groupName, + input.readOnly, + input.groupInput, + nil, + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Any(), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[alias] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + for _, tid := range tt.removeTablets { + delete(tablets, tid) + } + ts. + EXPECT(). + GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()). + Return(tablets, nil) + tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + + ctx := context.Background() + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, conf, testPort0) + shard.refreshTabletsInShardLocked(ctx) + diagnose, err := shard.Diagnose(ctx) + assert.Equal(t, expected, diagnose) + if tt.errMessage == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error()) + } + }) + } +} + +func TestGroupStatusRecorder(t *testing.T) { + r := &groupGTIDRecorder{} + + err := r.recordGroupStatus("group1", true) + assert.NoError(t, err) + assert.Equal(t, r.name, "group1") + assert.Equal(t, r.hasActive, true) + + err = r.recordGroupStatus("group2", false) + assert.Error(t, err, "group has more than one group name") + assert.Equal(t, r.name, "group1") + + err = r.recordGroupStatus("group1", false) + assert.NoError(t, err) + assert.Equal(t, r.name, "group1") + assert.Equal(t, r.hasActive, true) + + pos1, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-22:1000019-1000021") + assert.NoError(t, err) + inst1 := &grInstance{alias: "alias1"} + r.recordGroupGTIDs(pos1.GTIDSet, inst1) + pos2, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-1000021") + assert.NoError(t, err) + inst2 := &grInstance{alias: "alias2"} + r.recordGroupGTIDs(pos2.GTIDSet, inst2) + assert.Equal(t, len(r.gtidWithInstances), 2) + assert.Equal(t, r.gtidWithInstances[0].instance, inst1) + assert.Equal(t, pos1.GTIDSet.Equal(r.gtidWithInstances[0].gtids), true) + assert.Equal(t, r.gtidWithInstances[1].instance, inst2) + assert.Equal(t, pos2.GTIDSet.Equal(r.gtidWithInstances[1].gtids), true) +} diff --git a/go/vt/vtgr/controller/error.go b/go/vt/vtgr/controller/error.go new file mode 100644 index 00000000000..5613c802524 --- /dev/null +++ b/go/vt/vtgr/controller/error.go @@ -0,0 +1,25 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import "errors" + +var ( + errMissingPrimaryTablet = errors.New("no primary tablet available") + errMissingGroup = errors.New("no mysql group") + errForceAbortBootstrap = errors.New("force abort bootstrap") +) diff --git a/go/vt/vtgr/controller/group.go b/go/vt/vtgr/controller/group.go new file mode 100644 index 00000000000..b9333c3c5f6 --- /dev/null +++ b/go/vt/vtgr/controller/group.go @@ -0,0 +1,375 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "sort" + "strings" + "sync" + + "vitess.io/vitess/go/stats" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/inst" + "vitess.io/vitess/go/vt/vtgr/db" +) + +var ( + groupOnlineSize = stats.NewGaugesWithMultiLabels("MysqlGroupOnlineSize", "Online MySQL server in the group", []string{"Keyspace", "Shard"}) + isLostQuorum = stats.NewGaugesWithMultiLabels("MysqlGroupLostQuorum", "If MySQL group lost quorum", []string{"Keyspace", "Shard"}) +) + +// SQLGroup contains views from all the nodes within the shard +type SQLGroup struct { + views []*db.GroupView + resolvedView *ResolvedView + size int + singlePrimary bool + statsTags []string + sync.Mutex +} + +// NewSQLGroup creates a new SQLGroup +func NewSQLGroup(size int, singlePrimary bool, keyspace, shard string) *SQLGroup { + return &SQLGroup{size: size, singlePrimary: singlePrimary, statsTags: []string{keyspace, shard}} +} + +// ResolvedView is the resolved view +type ResolvedView struct { + groupName string + view map[inst.InstanceKey]db.GroupMember +} + +// recordView adds a view to the group +func (group *SQLGroup) recordView(view *db.GroupView) { + group.Lock() + defer group.Unlock() + group.views = append(group.views, view) +} + +// overrideView overrides a view to the group +func (group *SQLGroup) overrideView(views []*db.GroupView) { + group.Lock() + defer group.Unlock() + group.views = views + group.resolveLocked() +} + +// clear reset the views +func (group *SQLGroup) clear() { + group.Lock() + defer group.Unlock() + group.views = nil + group.resolvedView = nil +} + +// GetViews returns views from everyone in the group +func (group *SQLGroup) GetViews() []*db.GroupView { + group.Lock() + defer group.Unlock() + return group.views +} + +// GetGroupName returns the group name +func (group *SQLGroup) GetGroupName() string { + group.Lock() + defer group.Unlock() + rv := group.resolvedView + return rv.groupName +} + +// GetOnlineGroupInfo returns number of online members in the group and also if the primary is read only +func (group *SQLGroup) GetOnlineGroupInfo() (int, bool) { + group.Lock() + defer group.Unlock() + rv := group.resolvedView + view := rv.view + onlineSize := 0 + isPrimaryReadOnly := false + for _, status := range view { + if status.State == db.ONLINE { + onlineSize++ + } + if status.Role == db.PRIMARY { + isPrimaryReadOnly = isPrimaryReadOnly || status.ReadOnly + } + } + return onlineSize, isPrimaryReadOnly +} + +// IsUnconnectedReplica checks if the node is connected to a group +func (group *SQLGroup) IsUnconnectedReplica(instanceKey *inst.InstanceKey) bool { + if instanceKey == nil { + return false + } + group.Lock() + defer group.Unlock() + rv := group.resolvedView + view := rv.view + status, ok := view[*instanceKey] + if !ok { + return true + } + return status.State != db.ONLINE && status.State != db.RECOVERING +} + +// IsAllOfflineOrError returns true if all the nodes are in offline mode +func (group *SQLGroup) IsAllOfflineOrError() bool { + group.Lock() + defer group.Unlock() + rv := group.resolvedView + view := rv.view + for _, status := range view { + if status.State != db.OFFLINE && status.State != db.ERROR { + return false + } + } + return true +} + +// GetStatus returns GroupMember status for given a host +func (group *SQLGroup) GetStatus(instanceKey *inst.InstanceKey) *db.GroupMember { + if instanceKey == nil { + return nil + } + group.Lock() + defer group.Unlock() + rv := group.resolvedView + view := rv.view + status, ok := view[*instanceKey] + if !ok { + return nil + } + return &status +} + +// IsSafeToBootstrap checks if it is safe to bootstrap a mysql group +func (group *SQLGroup) IsSafeToBootstrap() bool { + group.Lock() + defer group.Unlock() + // for bootstrap we require group at least has quorum number of views + // this is to make sure we don't bootstrap a group improperly + if len(group.views) < group.size { + log.Errorf("[sql_group] cannot bootstrap because we only have %v views | expected %v", len(group.views), group.size) + return false + } + // we think it is safe to bootstrap a group if all the views don't have a primary host + host, port, _ := group.getPrimaryLocked() + if host != "" || port != 0 { + log.Warningf("not safe to bootstrap sql group because %v/%v might already be primary", host, port) + } + return host == "" && port == 0 +} + +// GetPrimary returns the hostname, port of the primary that everyone agreed on +// isActive bool indicates if there is any node in the group whose primary is "ONLINE" +func (group *SQLGroup) GetPrimary() (string, int, bool) { + group.Lock() + defer group.Unlock() + return group.getPrimaryLocked() +} + +func (group *SQLGroup) getPrimaryLocked() (string, int, bool) { + rv := group.resolvedView + view := rv.view + for instance, status := range view { + if status.Role == db.PRIMARY { + return instance.Hostname, instance.Port, status.State == db.ONLINE + } + } + return "", 0, false +} + +// Resolve merges the views into a map +func (group *SQLGroup) Resolve() error { + group.Lock() + defer group.Unlock() + return group.resolveLocked() +} +func (group *SQLGroup) resolveLocked() error { + rv := &ResolvedView{} + group.resolvedView = rv + m := make(map[inst.InstanceKey]db.GroupMember) + for _, view := range group.views { + if rv.groupName == "" && view.GroupName != "" { + rv.groupName = view.GroupName + } + if view.GroupName != "" && rv.groupName != view.GroupName { + log.Errorf("previous group name %v found %v", rv.groupName, view.GroupName) + return db.ErrGroupSplitBrain + } + for _, member := range view.UnresolvedMembers { + instance := view.CreateInstanceKey(member) + memberState := member.State + memberRole := member.Role + isReadOnly := member.ReadOnly + st, ok := m[instance] + if !ok { + m[instance] = db.GroupMember{ + HostName: instance.Hostname, + Port: instance.Port, + State: memberState, + Role: memberRole, + ReadOnly: isReadOnly, + } + continue + } + if st.State == memberState && st.Role == memberRole && st.ReadOnly == isReadOnly { + continue + } + m[instance] = db.GroupMember{ + HostName: instance.Hostname, + Port: instance.Port, + State: group.mergeState(st.State, memberState), + Role: group.mergeRole(st.Role, memberRole), + ReadOnly: st.ReadOnly || isReadOnly, + } + } + } + rv.view = m + return group.resolvedView.validate(group.singlePrimary, group.statsTags) +} + +func (rv *ResolvedView) validate(singlePrimary bool, statsTags []string) error { + if !rv.hasGroup() { + log.Info("Resolved view does not have a group") + return nil + } + hasPrimary := false + primaryState := db.UNKNOWNSTATE + var onlineCount, recoveringCount, unreachableCount, offlineCount, errorCount int + for _, status := range rv.view { + if status.Role == db.PRIMARY { + if singlePrimary && hasPrimary { + log.Errorf("Found more than one primary in the group") + return db.ErrGroupSplitBrain + } + hasPrimary = true + primaryState = status.State + if status.State != db.ONLINE { + log.Warningf("Found a PRIMARY not ONLINE (%v)", status.State) + } + } + switch status.State { + case db.ONLINE: + onlineCount++ + case db.UNREACHABLE: + unreachableCount++ + case db.OFFLINE: + offlineCount++ + case db.ERROR: + errorCount++ + case db.RECOVERING: + recoveringCount++ + } + } + groupOnlineSize.Set(statsTags, int64(onlineCount)) + if unreachableCount > 0 || errorCount > 0 || offlineCount > 0 { + log.Warningf("Some of nodes are unconnected in the group. hasPrimary=%v (%v), online_count=%v, recovering_count=%v, unreachable_count=%v, offline_count=%v, error_count=%v", hasPrimary, primaryState, onlineCount, recoveringCount, unreachableCount, offlineCount, errorCount) + } + if unreachableCount >= len(rv.view)/2+1 { + log.Errorf("Backoff error by quorum unreachable: found %v number of UNREACHABLE nodes while quorum is %v", unreachableCount, len(rv.view)/2+1) + isLostQuorum.Set(statsTags, 1) + } else { + isLostQuorum.Set(statsTags, 0) + } + // In theory there should be no UNREACHABLE nodes + // raise ErrGroupBackoffError to backoff and wait + // If we lost quorum, then the group is not writable + // If we still have a functioning group, we can backoff and wait + // the unreachable node should either be expelled or we have a frozen view + // Note: this means we should set group_replication_unreachable_majority_timeout + // greater than 0. Otherwise VTGR can see all nodes are ONLINE when a single node + // is partitioned and end up doing nothing. + if unreachableCount > 0 { + return db.ErrGroupBackoffError + } + // Ongoing bootstrap, we should backoff and wait + if recoveringCount == 1 && (offlineCount+recoveringCount == len(rv.view)) { + log.Warningf("Group has one recovery node with all others in offline mode") + return db.ErrGroupOngoingBootstrap + } + // We don't have quorum number of unreachable, but the primary is not online + // This most likely means there is a failover in the group we should back off and wait + if hasPrimary && primaryState != db.ONLINE { + log.Warningf("Found a PRIMARY that is not ONLINE (%v)", primaryState) + return db.ErrGroupBackoffError + } + // If all the node in view are OFFLINE or ERROR, it is an inactive group + // It is expected to have no primary in this case + if !hasPrimary && (offlineCount+errorCount != len(rv.view)) { + log.Warningf("Group is NOT all offline or error without a primary node") + return db.ErrGroupBackoffError + } + return nil +} + +func (rv *ResolvedView) hasGroup() bool { + return rv.groupName != "" +} + +func (group *SQLGroup) mergeState(s1, s2 db.MemberState) db.MemberState { + return db.MemberState(group.maxStatus(int(s1), int(s2))) +} + +func (group *SQLGroup) mergeRole(r1, r2 db.MemberRole) db.MemberRole { + return db.MemberRole(group.maxStatus(int(r1), int(r2))) +} + +func (group *SQLGroup) maxStatus(a, b int) int { + if a > b { + return a + } + return b +} + +// ToString returns a string representatino of the sql group +func (group *SQLGroup) ToString() string { + group.Lock() + defer group.Unlock() + var sb strings.Builder + views := group.views + for _, view := range views { + sb.WriteString(fmt.Sprintf("[%s] SQLGroup group=%s", view.TabletAlias, view.GroupName)) + for _, member := range view.UnresolvedMembers { + sb.WriteString(fmt.Sprintf(" | %s %s %s readonly=%v", member.HostName, member.Role, member.State, member.ReadOnly)) + } + sb.WriteString("\n") + } + rv := group.resolvedView + if rv != nil { + sb.WriteString("[resolved_view]\n") + sb.WriteString(fmt.Sprintf("group_name=%v\n", rv.groupName)) + keys := make([]inst.InstanceKey, 0, len(rv.view)) + for k := range rv.view { + keys = append(keys, k) + } + sort.Slice(keys, func(i, j int) bool { + return keys[i].Hostname < keys[j].Hostname + }) + for _, instance := range keys { + status := rv.view[instance] + sb.WriteString(fmt.Sprintf("[%s] state=%v role=%v readonly=%v\n", instance.Hostname, status.State, status.Role, status.ReadOnly)) + + } + } + return sb.String() +} + +func (group *SQLGroup) quorum() int { + return group.size/2 + 1 +} diff --git a/go/vt/vtgr/controller/group_test.go b/go/vt/vtgr/controller/group_test.go new file mode 100644 index 00000000000..b8c2b5c487e --- /dev/null +++ b/go/vt/vtgr/controller/group_test.go @@ -0,0 +1,349 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "testing" + + "vitess.io/vitess/go/vt/orchestrator/inst" + + "vitess.io/vitess/go/vt/vtgr/db" + + "github.com/stretchr/testify/assert" +) + +func TestSQLGroupToString(t *testing.T) { + group := NewSQLGroup(2, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group_name" + var l1 []*db.GroupMember + var l2 []*db.GroupMember + m1 := db.NewGroupMember("ONLINE", "PRIMARY", "host1", 10, false) + m2 := db.NewGroupMember("ONLINE", "SECONDARY", "host2", 10, true) + m3 := db.NewGroupMember("OFFLINE", "SECONDARY", "host3", 10, true) + l1 = append(l1, m1) + l1 = append(l1, m2) + v1.UnresolvedMembers = l1 + l2 = append(l2, m3) + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group_name" + v2.UnresolvedMembers = l2 + group.recordView(v2) + group.recordView(v1) + assert.Equal(t, `[v2] SQLGroup group=group_name | host3 SECONDARY OFFLINE readonly=true +[v1] SQLGroup group=group_name | host1 PRIMARY ONLINE readonly=false | host2 SECONDARY ONLINE readonly=true +`, group.ToString()) + group.Resolve() + assert.Equal(t, `[v2] SQLGroup group=group_name | host3 SECONDARY OFFLINE readonly=true +[v1] SQLGroup group=group_name | host1 PRIMARY ONLINE readonly=false | host2 SECONDARY ONLINE readonly=true +[resolved_view] +group_name=group_name +[host1] state=ONLINE role=PRIMARY readonly=false +[host2] state=ONLINE role=SECONDARY readonly=true +[host3] state=OFFLINE role=SECONDARY readonly=true +`, group.ToString()) +} + +func TestGetGroupName(t *testing.T) { + group := NewSQLGroup(3, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "host1", 10, true), + } + group.recordView(v1) + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group" + v2.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "", 0, true), + } + group.recordView(v2) + err := group.Resolve() + assert.NoError(t, err) + name := group.GetGroupName() + assert.Equal(t, "group", name) + v3 := db.NewGroupView("v3", "host3", 10) + v3.GroupName = "group_foo" + group.recordView(v3) + err = group.Resolve() + assert.Errorf(t, err, "group has split brain") + name = group.GetGroupName() + // group keeps the group name before finding a divergent group name + assert.Equal(t, "group", name) +} + +func TestIsActiveWithMultiplePrimary(t *testing.T) { + group := NewSQLGroup(2, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "PRIMARY", "host1", 10, false), + db.NewGroupMember("ONLINE", "SECONDARY", "host2", 10, true), + } + group.recordView(v1) + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group" + v2.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "SECONDARY", "host1", 10, true), + db.NewGroupMember("ONLINE", "PRIMARY", "host2", 10, false), + } + group.recordView(v2) + err := group.Resolve() + assert.Errorf(t, err, "group network partition") +} + +func TestIsSafeToBootstrap(t *testing.T) { + group := NewSQLGroup(1, true, "ks", "0") + isSafe := group.IsSafeToBootstrap() + assert.False(t, isSafe) + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "", 0, true), + db.NewGroupMember("OFFLINE", "", "", 0, true), + } + group.recordView(v1) + group.Resolve() + isSafe = group.IsSafeToBootstrap() + assert.True(t, isSafe) +} + +func TestIsSafeToBootstrapWithPrimary(t *testing.T) { + group := NewSQLGroup(1, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + // it is not safe to bootstrap if we see a primary node in group + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "PRIMARY", "host1", 0, false), + db.NewGroupMember("OFFLINE", "", "", 0, true), + } + group.recordView(v1) + group.Resolve() + isSafe := group.IsSafeToBootstrap() + assert.False(t, isSafe) +} + +func TestIsUnconnectedReplica(t *testing.T) { + group := NewSQLGroup(1, true, "ks", "0") + isSafe := group.IsSafeToBootstrap() + assert.False(t, isSafe) + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "PRIMARY", "host1", 10, false), + db.NewGroupMember("ONLINE", "SECONDARY", "host2", 10, true), + } + group.recordView(v1) + group.Resolve() + isUnconnected := group.IsUnconnectedReplica(&inst.InstanceKey{Hostname: "host2", Port: 10}) + assert.False(t, isUnconnected) +} + +func TestGetOnlineGroupSizeFromPrimary(t *testing.T) { + group := NewSQLGroup(1, true, "ks", "0") + isSafe := group.IsSafeToBootstrap() + assert.False(t, isSafe) + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "PRIMARY", "host1", 10, false), + db.NewGroupMember("ONLINE", "SECONDARY", "host2", 10, true), + db.NewGroupMember("RECOVERING", "SECONDARY", "host3", 10, true), + } + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group" + v2.UnresolvedMembers = []*db.GroupMember{} + group.recordView(v1) + group.recordView(v2) + group.Resolve() + size, readOnly := group.GetOnlineGroupInfo() + assert.Equal(t, 2, size) + assert.False(t, readOnly) +} + +func TestNetworkPartition(t *testing.T) { + group := NewSQLGroup(3, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("ONLINE", "PRIMARY", "host1", 10, false), + db.NewGroupMember("UNREACHABLE", "SECONDARY", "host2", 10, true), + db.NewGroupMember("UNREACHABLE", "SECONDARY", "host3", 10, true), + } + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group" + v2.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "host2", 10, true), + } + v3 := db.NewGroupView("v3", "host3", 10) + v3.GroupName = "group" + v3.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "host3", 10, true), + } + group.recordView(v1) + group.recordView(v2) + group.recordView(v3) + err := group.Resolve() + assert.Errorf(t, err, "group backoff error") + rv := group.resolvedView + assert.Equal(t, "group", rv.groupName) + assert.Equal(t, map[inst.InstanceKey]db.GroupMember{ + {Hostname: "host1", Port: 10}: {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {Hostname: "host2", Port: 10}: {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: true}, + {Hostname: "host3", Port: 10}: {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: true}, + }, rv.view) +} + +func TestIsBootstrapInProcess(t *testing.T) { + group := NewSQLGroup(3, true, "ks", "0") + v1 := db.NewGroupView("v1", "host1", 10) + v1.GroupName = "group" + v1.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("RECOVERING", "SECONDARY", "host1", 10, false), + } + v2 := db.NewGroupView("v2", "host2", 10) + v2.GroupName = "group" + v2.UnresolvedMembers = []*db.GroupMember{ + db.NewGroupMember("OFFLINE", "", "host2", 10, false), + } + v3 := db.NewGroupView("v3", "host", 10) + v3.GroupName = "group" + v3.UnresolvedMembers = []*db.GroupMember{} + group.recordView(v1) + group.recordView(v2) + group.recordView(v3) + err := group.Resolve() + assert.Errorf(t, err, "group transient error") +} + +func TestResolve(t *testing.T) { + healthyView := []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + } + var testCases = []struct { + testName string + views []*db.GroupView + expected *ResolvedView + errorMsg string + }{ + {"test healthy shard", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: healthyView}, + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group", UnresolvedMembers: healthyView}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: healthyView}, + }, &ResolvedView{"group", map[inst.InstanceKey]db.GroupMember{ + {Hostname: "host1", Port: 10}: {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {Hostname: "host2", Port: 10}: {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {Hostname: "host3", Port: 10}: {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + }}, ""}, + {"test readonly with unreachable primary", []*db.GroupView{ // host1 is unreachable + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: false}, + }}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + }}, + }, &ResolvedView{"group", map[inst.InstanceKey]db.GroupMember{ + {Hostname: "host1", Port: 10}: {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {Hostname: "host2", Port: 10}: {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {Hostname: "host3", Port: 10}: {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + }}, ""}, + {"test split brain by group name", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: healthyView}, + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group1", UnresolvedMembers: healthyView}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: healthyView}, + }, nil, "group has split brain"}, + {"test empty hostname", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "", Port: 0, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + }}, + {MySQLHost: "host2", MySQLPort: 10, GroupName: "", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host2", Port: 10, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + }}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host3", Port: 10, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + }}, + }, &ResolvedView{"group", map[inst.InstanceKey]db.GroupMember{ + {Hostname: "host1", Port: 10}: {HostName: "host1", Port: 10, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + {Hostname: "host2", Port: 10}: {HostName: "host2", Port: 10, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + {Hostname: "host3", Port: 10}: {HostName: "host3", Port: 10, Role: db.UNKNOWNROLE, State: db.OFFLINE, ReadOnly: true}, + }}, ""}, + {"test network partition by majority unreachable", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.UNREACHABLE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: true}, + }}, + }, nil, "group backoff error"}, + {"test no network partition with less then majority unreachable", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: false}, + }}, + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: false}, + }}, + }, &ResolvedView{"group", map[inst.InstanceKey]db.GroupMember{ + {Hostname: "host1", Port: 10}: {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.ONLINE, ReadOnly: false}, + {Hostname: "host2", Port: 10}: {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE, ReadOnly: true}, + {Hostname: "host3", Port: 10}: {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.UNREACHABLE, ReadOnly: false}, + }}, "group backoff error"}, + {"test network partition by unreachable primary", []*db.GroupView{ + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.UNREACHABLE}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE}, + }}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "host1", Port: 10, Role: db.PRIMARY, State: db.UNREACHABLE}, + {HostName: "host2", Port: 10, Role: db.SECONDARY, State: db.ONLINE}, + {HostName: "host3", Port: 10, Role: db.SECONDARY, State: db.ONLINE}, + }}, + }, nil, "group backoff error"}, + {"test bootstrap ongoing", []*db.GroupView{ + {MySQLHost: "host1", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{ + {HostName: "", Port: 0, Role: db.SECONDARY, State: db.RECOVERING, ReadOnly: true}, + }}, + {MySQLHost: "host2", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{}}, + {MySQLHost: "host3", MySQLPort: 10, GroupName: "group", UnresolvedMembers: []*db.GroupMember{}}, + }, nil, "group ongoing bootstrap"}, + } + for _, testCase := range testCases { + t.Run(testCase.testName, func(t *testing.T) { + group := SQLGroup{views: testCase.views, statsTags: []string{"ks", "0"}} + err := group.Resolve() + if testCase.errorMsg != "" { + assert.EqualError(t, err, testCase.errorMsg) + } else { + assert.NoError(t, err) + } + if testCase.expected != nil { + rv := group.resolvedView + expected := testCase.expected + assert.Equal(t, expected, rv) + } + }) + } +} diff --git a/go/vt/vtgr/controller/mock_refresh.go b/go/vt/vtgr/controller/mock_refresh.go new file mode 100644 index 00000000000..7b3073e018a --- /dev/null +++ b/go/vt/vtgr/controller/mock_refresh.go @@ -0,0 +1,135 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: go/vt/vtgr/controller/refresh.go + +// Package mock_controller is a generated GoMock package. +package controller + +import ( + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + context "golang.org/x/net/context" + + topodata "vitess.io/vitess/go/vt/proto/topodata" + topo "vitess.io/vitess/go/vt/topo" +) + +// MockGRTopo is a mock of GRTopo interface +type MockGRTopo struct { + ctrl *gomock.Controller + recorder *MockGRTopoMockRecorder +} + +// MockGRTopoMockRecorder is the mock recorder for MockGRTopo +type MockGRTopoMockRecorder struct { + mock *MockGRTopo +} + +// NewMockGRTopo creates a new mock instance +func NewMockGRTopo(ctrl *gomock.Controller) *MockGRTopo { + mock := &MockGRTopo{ctrl: ctrl} + mock.recorder = &MockGRTopoMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockGRTopo) EXPECT() *MockGRTopoMockRecorder { + return m.recorder +} + +// GetShardNames mocks base method +func (m *MockGRTopo) GetShardNames(ctx context.Context, keyspace string) ([]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetShardNames", ctx, keyspace) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetShardNames indicates an expected call of GetShardNames +func (mr *MockGRTopoMockRecorder) GetShardNames(ctx, keyspace interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetShardNames", reflect.TypeOf((*MockGRTopo)(nil).GetShardNames), ctx, keyspace) +} + +// GetTabletMapForShardByCell mocks base method +func (m *MockGRTopo) GetTabletMapForShardByCell(ctx context.Context, keyspace, shard string, cells []string) (map[string]*topo.TabletInfo, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTabletMapForShardByCell", ctx, keyspace, shard, cells) + ret0, _ := ret[0].(map[string]*topo.TabletInfo) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTabletMapForShardByCell indicates an expected call of GetTabletMapForShardByCell +func (mr *MockGRTopoMockRecorder) GetTabletMapForShardByCell(ctx, keyspace, shard, cells interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTabletMapForShardByCell", reflect.TypeOf((*MockGRTopo)(nil).GetTabletMapForShardByCell), ctx, keyspace, shard, cells) +} + +// LockShard mocks base method +func (m *MockGRTopo) LockShard(ctx context.Context, keyspace, shard, action string) (context.Context, func(*error), error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "LockShard", ctx, keyspace, shard, action) + ret0, _ := ret[0].(context.Context) + ret1, _ := ret[1].(func(*error)) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 +} + +// LockShard indicates an expected call of LockShard +func (mr *MockGRTopoMockRecorder) LockShard(ctx, keyspace, shard, action interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LockShard", reflect.TypeOf((*MockGRTopo)(nil).LockShard), ctx, keyspace, shard, action) +} + +// MockGRTmcClient is a mock of GRTmcClient interface +type MockGRTmcClient struct { + ctrl *gomock.Controller + recorder *MockGRTmcClientMockRecorder +} + +// MockGRTmcClientMockRecorder is the mock recorder for MockGRTmcClient +type MockGRTmcClientMockRecorder struct { + mock *MockGRTmcClient +} + +// NewMockGRTmcClient creates a new mock instance +func NewMockGRTmcClient(ctrl *gomock.Controller) *MockGRTmcClient { + mock := &MockGRTmcClient{ctrl: ctrl} + mock.recorder = &MockGRTmcClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockGRTmcClient) EXPECT() *MockGRTmcClientMockRecorder { + return m.recorder +} + +// ChangeType mocks base method +func (m *MockGRTmcClient) ChangeType(ctx context.Context, tablet *topodata.Tablet, dbType topodata.TabletType) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ChangeType", ctx, tablet, dbType) + ret0, _ := ret[0].(error) + return ret0 +} + +// ChangeType indicates an expected call of ChangeType +func (mr *MockGRTmcClientMockRecorder) ChangeType(ctx, tablet, dbType interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ChangeType", reflect.TypeOf((*MockGRTmcClient)(nil).ChangeType), ctx, tablet, dbType) +} + +// Ping mocks base method +func (m *MockGRTmcClient) Ping(ctx context.Context, tablet *topodata.Tablet) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Ping", ctx, tablet) + ret0, _ := ret[0].(error) + return ret0 +} + +// Ping indicates an expected call of Ping +func (mr *MockGRTmcClientMockRecorder) Ping(ctx, tablet interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Ping", reflect.TypeOf((*MockGRTmcClient)(nil).Ping), ctx, tablet) +} diff --git a/go/vt/vtgr/controller/refresh.go b/go/vt/vtgr/controller/refresh.go new file mode 100644 index 00000000000..e03f5bd36b0 --- /dev/null +++ b/go/vt/vtgr/controller/refresh.go @@ -0,0 +1,258 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "fmt" + "strconv" + "sync" + "time" + + "vitess.io/vitess/go/vt/vtgr/config" + + "vitess.io/vitess/go/vt/vtgr/db" + + "vitess.io/vitess/go/stats" + + "golang.org/x/net/context" + + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/logutil" + "vitess.io/vitess/go/vt/orchestrator/inst" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" +) + +var ( + lockShardTimingsMs = stats.NewMultiTimings("lockShard", "time vtgr takes to lock the shard", []string{"operation", "success"}) +) + +// grInstance represents an instance that's running MySQL GR +// it wraps a InstanceKey plus some tablet related information +type grInstance struct { + instanceKey *inst.InstanceKey + tablet *topodatapb.Tablet + masterTimeStamp time.Time + alias string +} + +// GRTopo is VTGR wrapper for topo server +type GRTopo interface { + GetShardNames(ctx context.Context, keyspace string) ([]string, error) + GetTabletMapForShardByCell(ctx context.Context, keyspace, shard string, cells []string) (map[string]*topo.TabletInfo, error) + LockShard(ctx context.Context, keyspace, shard, action string) (context.Context, func(*error), error) +} + +// GRTmcClient is VTGR wrapper for tmc client +type GRTmcClient interface { + ChangeType(ctx context.Context, tablet *topodatapb.Tablet, dbType topodatapb.TabletType) error + Ping(ctx context.Context, tablet *topodatapb.Tablet) error +} + +// HostnameGetter is used to get local hostname +type HostnameGetter func() (string, error) + +// GRShard stores the information about a Vitess shard that's running MySQL GR +type GRShard struct { + KeyspaceShard *topo.KeyspaceShard + cells []string + instances []*grInstance + shardStatusCollector *shardStatusCollector + sqlGroup *SQLGroup + ts GRTopo + tmc GRTmcClient + dbAgent db.Agent + + // configuration + minNumReplicas int + localDbPort int + disableReadOnlyProtection bool + + transientErrorWaitTime time.Duration + bootstrapWaitTime time.Duration + + lastDiagnoseResult DiagnoseType + lastDiagnoseSince time.Time + + // lock prevents multiple go routine fights with each other + sync.Mutex +} + +// shardStatusCollector is used for collecting shard status +type shardStatusCollector struct { + status *ShardStatus + sync.Mutex +} + +// ShardStatus is used for debugging purpose to get current status of a shard +type ShardStatus struct { + Keyspace string + Shard string + Instances []string + Unreachables []string + Problematics []string + Primary string + DiagnoseResult DiagnoseType +} + +func newShardStatusCollector(keyspace, shard string) *shardStatusCollector { + return &shardStatusCollector{ + status: &ShardStatus{Keyspace: keyspace, Shard: shard}, + } +} + +// NewGRShard creates a new GRShard +func NewGRShard( + keyspace, shard string, + cells []string, + tmc GRTmcClient, + ts GRTopo, + dbAgent db.Agent, + config *config.VTGRConfig, + localDbPort int) *GRShard { + return &GRShard{ + KeyspaceShard: &topo.KeyspaceShard{Keyspace: keyspace, Shard: shard}, + cells: cells, + shardStatusCollector: newShardStatusCollector(keyspace, shard), + tmc: tmc, + ts: ts, + dbAgent: dbAgent, + sqlGroup: NewSQLGroup(config.GroupSize, true, keyspace, shard), + minNumReplicas: config.MinNumReplica, + disableReadOnlyProtection: config.DisableReadOnlyProtection, + localDbPort: localDbPort, + transientErrorWaitTime: time.Duration(config.BackoffErrorWaitTimeSeconds) * time.Second, + bootstrapWaitTime: time.Duration(config.BootstrapWaitTimeSeconds) * time.Second, + } +} + +// refreshTabletsInShardLocked is called by repair to get a fresh view of the shard +// The caller is responsible to make sure the lock on GRShard +func (shard *GRShard) refreshTabletsInShardLocked(ctx context.Context) { + instances, err := shard.refreshTabletsInShardInternal(ctx) + if err == nil { + shard.instances = instances + } +} + +// UpdateTabletsInShardWithLock updates the shard instances with a lock +func (shard *GRShard) UpdateTabletsInShardWithLock(ctx context.Context) { + instances, err := shard.refreshTabletsInShardInternal(ctx) + if err == nil { + // Take a per shard lock here when we actually refresh the data to avoid + // race conditions bewteen controller and repair tasks + shard.Lock() + defer shard.Unlock() + shard.instances = instances + } +} + +func (shard *GRShard) refreshTabletsInShardInternal(ctx context.Context) ([]*grInstance, error) { + keyspace, shardName := shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard + tablets, err := shard.ts.GetTabletMapForShardByCell(ctx, keyspace, shardName, shard.cells) + if err != nil { + log.Errorf("Error fetching tablets for keyspace/shardName %v/%v: %v", keyspace, shardName, err) + return nil, err + } + return parseTabletInfos(tablets), nil +} + +// parseTabletInfos replaces the replica reports for the shard key +// Note: this is not thread-safe +func parseTabletInfos(tablets map[string]*topo.TabletInfo) []*grInstance { + // collect all replicas + var newReplicas []*grInstance + for alias, tabletInfo := range tablets { + tablet := tabletInfo.Tablet + // Only monitor master, replica and ronly tablet types + switch tablet.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY: + // mysql hostname and port might be empty here if tablet is not running + // we will treat them as unreachable + instanceKey := inst.InstanceKey{ + Hostname: tablet.MysqlHostname, + Port: int(tablet.MysqlPort), + } + grInstance := grInstance{ + instanceKey: &instanceKey, + tablet: tablet, + masterTimeStamp: logutil.ProtoToTime(tablet.MasterTermStartTime), + alias: alias, + } + newReplicas = append(newReplicas, &grInstance) + } + } + return newReplicas +} + +// LockShard locks the keyspace-shard on topo server to prevent others from executing conflicting actions. +func (shard *GRShard) LockShard(ctx context.Context, action string) (context.Context, func(*error), error) { + if shard.KeyspaceShard.Keyspace == "" || shard.KeyspaceShard.Shard == "" { + return nil, nil, fmt.Errorf("try to grab lock with incomplete information: %v/%v", shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard) + } + start := time.Now() + ctx, unlock, err := shard.ts.LockShard(ctx, shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard, fmt.Sprintf("VTGR repairing %s", action)) + lockShardTimingsMs.Record([]string{action, strconv.FormatBool(err == nil)}, start) + return ctx, unlock, err +} + +func (shard *GRShard) findTabletByHostAndPort(host string, port int) *grInstance { + for _, instance := range shard.instances { + if instance.instanceKey.Hostname == host && instance.instanceKey.Port == port { + return instance + } + } + return nil +} + +func (shard *GRShard) getToleratedNumError() int { + quorum := len(shard.instances)/2 + 1 + return len(shard.instances) - quorum +} + +func (shard *GRShard) populateVTGRStatusLocked() { + var instanceList []string + for _, instance := range shard.instances { + instanceList = append(instanceList, instance.alias) + } + shard.shardStatusCollector.status.Instances = instanceList + if primary := shard.findShardPrimaryTablet(); primary != nil { + shard.shardStatusCollector.status.Primary = primary.alias + } +} + +// GetCurrentShardStatuses returns the status collector has +func (shard *GRShard) GetCurrentShardStatuses() ShardStatus { + shard.Lock() + collector := shard.shardStatusCollector + // dereference status so that we return a copy of the struct + status := *collector.status + shard.Unlock() + return status +} + +func (collector *shardStatusCollector) isUnreachable(instance *grInstance) bool { + if instance.instanceKey == nil || instance.instanceKey.Hostname == "" { + return true + } + for _, alias := range collector.status.Unreachables { + if instance.alias == alias { + return true + } + } + return false +} diff --git a/go/vt/vtgr/controller/refresh_test.go b/go/vt/vtgr/controller/refresh_test.go new file mode 100644 index 00000000000..ec395bba3e4 --- /dev/null +++ b/go/vt/vtgr/controller/refresh_test.go @@ -0,0 +1,139 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + "sort" + "testing" + "time" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + + "vitess.io/vitess/go/vt/logutil" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/memorytopo" + "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil" + "vitess.io/vitess/go/vt/vtgr/config" +) + +func TestRefreshTabletsInShard(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tablet1 := buildTabletInfo(uint32(0), testHost, testPort0, topodatapb.TabletType_MASTER, time.Time{}) + tablet2 := buildTabletInfo(uint32(1), testHost, testPort1, topodatapb.TabletType_SPARE, time.Time{}) + tablet3 := buildTabletInfo(uint32(2), testHost, 0, topodatapb.TabletType_REPLICA, time.Time{}) + testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil) + cfg := &config.VTGRConfig{GroupSize: 3, MinNumReplica: 0, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, nil, ts, nil, cfg, testPort0) + assert.Equal(t, "ks", shard.shardStatusCollector.status.Keyspace) + assert.Equal(t, "0", shard.shardStatusCollector.status.Shard) + shard.refreshTabletsInShardLocked(context.Background()) + instances := shard.instances + // only have 2 instances here because we filter out the spare tablet + assert.Equal(t, 2, len(instances)) + sort.Slice(instances[:], func(i, j int) bool { + return instances[i].alias < instances[j].alias + }) + assert.Equal(t, testHost, instances[0].tablet.Hostname) + assert.Equal(t, int32(testPort0), instances[0].tablet.MysqlPort) + assert.Equal(t, topodatapb.TabletType_MASTER, instances[0].tablet.Type) + // host 3 is missing mysql host but we still put it in the instances list here + assert.Equal(t, testHost, instances[1].instanceKey.Hostname) + assert.Equal(t, int32(0), instances[1].tablet.MysqlPort) + assert.Equal(t, topodatapb.TabletType_REPLICA, instances[1].tablet.Type) +} + +func TestRefreshWithCells(t *testing.T) { + ctx := context.Background() + ts := memorytopo.NewServer("cell1", "cell2", "cell3") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tablet1 := buildTabletInfoWithCell(uint32(0), testHost, "cell1", testPort0, topodatapb.TabletType_REPLICA, time.Time{}) + tablet2 := buildTabletInfoWithCell(uint32(1), testHost, "cell2", testPort1, topodatapb.TabletType_REPLICA, time.Time{}) + tablet3 := buildTabletInfoWithCell(uint32(2), testHost, "cell3", testPort2, topodatapb.TabletType_REPLICA, time.Time{}) + testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil) + cfg := &config.VTGRConfig{GroupSize: 3, MinNumReplica: 0, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", []string{"cell1", "cell3"}, nil, ts, nil, cfg, testPort0) + shard.refreshTabletsInShardLocked(context.Background()) + instances := shard.instances + // only have 2 instances here because we are not watching cell2 + assert.Equal(t, 2, len(instances)) + sort.Slice(instances[:], func(i, j int) bool { + return instances[i].alias < instances[j].alias + }) + assert.Equal(t, "cell1-0000000000", instances[0].alias) + assert.Equal(t, "cell3-0000000002", instances[1].alias) +} + +func TestRefreshWithEmptyCells(t *testing.T) { + ctx := context.Background() + ts := memorytopo.NewServer("cell1", "cell2", "cell3") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tablet1 := buildTabletInfoWithCell(uint32(0), testHost, "cell1", testPort0, topodatapb.TabletType_REPLICA, time.Time{}) + tablet2 := buildTabletInfoWithCell(uint32(1), testHost, "cell2", testPort1, topodatapb.TabletType_REPLICA, time.Time{}) + tablet3 := buildTabletInfoWithCell(uint32(2), testHost, "cell3", testPort2, topodatapb.TabletType_REPLICA, time.Time{}) + testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil) + testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil) + cfg := &config.VTGRConfig{GroupSize: 3, MinNumReplica: 0, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, nil, ts, nil, cfg, testPort0) + shard.refreshTabletsInShardLocked(context.Background()) + instances := shard.instances + // nil cell will return everything + assert.Equal(t, 3, len(instances)) + sort.Slice(instances[:], func(i, j int) bool { + return instances[i].alias < instances[j].alias + }) + assert.Equal(t, "cell1-0000000000", instances[0].alias) + assert.Equal(t, "cell2-0000000001", instances[1].alias) + assert.Equal(t, "cell3-0000000002", instances[2].alias) +} + +func buildTabletInfo(id uint32, host string, mysqlPort int, ttype topodatapb.TabletType, masterTermTime time.Time) *topo.TabletInfo { + return buildTabletInfoWithCell(id, host, "test_cell", mysqlPort, ttype, masterTermTime) +} + +func buildTabletInfoWithCell(id uint32, host, cell string, mysqlPort int, ttype topodatapb.TabletType, masterTermTime time.Time) *topo.TabletInfo { + alias := &topodatapb.TabletAlias{Cell: cell, Uid: id} + return &topo.TabletInfo{Tablet: &topodatapb.Tablet{ + Alias: alias, + Hostname: host, + MysqlHostname: host, + MysqlPort: int32(mysqlPort), + Keyspace: "ks", + Shard: "0", + Type: ttype, + MasterTermStartTime: logutil.TimeToProto(masterTermTime), + Tags: map[string]string{"hostname": fmt.Sprintf("host_%d", id)}, + }} +} diff --git a/go/vt/vtgr/controller/repair.go b/go/vt/vtgr/controller/repair.go new file mode 100644 index 00000000000..8781cf08e3f --- /dev/null +++ b/go/vt/vtgr/controller/repair.go @@ -0,0 +1,740 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "errors" + "flag" + "fmt" + "strconv" + "sync" + "time" + + "golang.org/x/net/context" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/stats" + "vitess.io/vitess/go/vt/concurrency" + "vitess.io/vitess/go/vt/log" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgr/db" +) + +var ( + repairTimingsMs = stats.NewMultiTimings("repairTimingsMs", "time vtgr takes to repair", []string{"status", "success"}) + unexpectedLockLost = stats.NewCountersWithMultiLabels("unexpectedLockLost", "unexpected lost of the lock", []string{"Keyspace", "Shard"}) + + abortRebootstrap = flag.Bool("abort_rebootstrap", false, "don't allow vtgr to rebootstrap an existing group") +) + +// RepairResultCode is the code for repair +type RepairResultCode string + +const ( + // Success means successfully repaired + Success RepairResultCode = "Success" + // Fail means failed to repaire + Fail RepairResultCode = "Fail" + // Noop means do nothing + Noop RepairResultCode = "Noop" +) + +// Repair tries to fix shard based on the diagnose type +func (shard *GRShard) Repair(ctx context.Context, status DiagnoseType) (RepairResultCode, error) { + shard.Lock() + defer shard.Unlock() + var err error + code := Noop + switch status { + case DiagnoseTypeShardHasNoGroup: + code, err = shard.repairShardHasNoGroup(ctx) + case DiagnoseTypeShardHasInactiveGroup: + code, err = shard.repairShardHasInactiveGroup(ctx) + case DiagnoseTypeWrongPrimaryTablet: + code, err = shard.repairWrongPrimaryTablet(ctx) + case DiagnoseTypeUnconnectedReplica: + code, err = shard.repairUnconnectedReplica(ctx) + case DiagnoseTypeUnreachablePrimary: + code, err = shard.repairUnreachablePrimary(ctx) + case DiagnoseTypeInsufficientGroupSize: + code, err = shard.repairInsufficientGroupSize(ctx) + case DiagnoseTypeReadOnlyShard: + code, err = shard.repairReadOnlyShard(ctx) + case DiagnoseTypeBootstrapBackoff, DiagnoseTypeBackoffError: + code, err = shard.repairBackoffError(ctx, status) + case DiagnoseTypeError: + log.Errorf("%v is %v", formatKeyspaceShard(shard.KeyspaceShard), status) + case DiagnoseTypeHealthy: + start := time.Now() + repairTimingsMs.Record([]string{string(status), "true"}, start) + } + if status != DiagnoseTypeHealthy { + log.Infof("VTGR repaired %v status=%v | code=%v", formatKeyspaceShard(shard.KeyspaceShard), status, code) + } + return code, vterrors.Wrap(err, "vtgr repair") +} + +func (shard *GRShard) repairShardHasNoGroup(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairShardHasNoGroup") + if err != nil { + log.Warningf("repairShardHasNoPrimaryTablet fails to grab lock for the shard %v: %v", shard.KeyspaceShard, err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + // Diagnose() will call shardAgreedGroup as the first thing + // which will update mysqlGroup stored in the shard + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeShardHasNoGroup { + log.Infof("Shard %v is no longer in DiagnoseTypeShardHasNoGroup: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + start := time.Now() + err = shard.repairShardHasNoGroupAction(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeShardHasNoGroup, strconv.FormatBool(err == nil)}, start) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) repairShardHasNoGroupAction(ctx context.Context) error { + // If group is not empty AND there is at least one active group member + // we don't need to bootstrap. Instead we should try to join the group + mysqlGroup := shard.shardAgreedGroupName() + isAllOffline := shard.isAllOfflineOrError() + if mysqlGroup != "" { + log.Infof("Shard %v already have a group %v", formatKeyspaceShard(shard.KeyspaceShard), mysqlGroup) + return nil + } + // This should not really happen in reality + if mysqlGroup == "" && !isAllOffline { + return fmt.Errorf("shard %v has empty group name but some node is not OFFLINE", formatKeyspaceShard(shard.KeyspaceShard)) + } + + // Now we know group is null and there is no active node + // we should bootstrap the group + replicas := shard.instances + // Sanity check to make sure there is at least one instance + if len(replicas) == 0 { + log.Warningf("Cannot find any instance for the shard %v", formatKeyspaceShard(shard.KeyspaceShard)) + return nil + } + if !shard.sqlGroup.IsSafeToBootstrap() { + return errors.New("unsafe to bootstrap group") + } + var candidate *grInstance + for _, replica := range replicas { + if !shard.shardStatusCollector.isUnreachable(replica) { + candidate = replica + break + } + } + if candidate == nil { + return errors.New("fail to find any candidate to bootstrap") + } + // Bootstrap the group + log.Infof("Bootstrapping the group for %v on host=%v", formatKeyspaceShard(shard.KeyspaceShard), candidate.instanceKey.Hostname) + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + if err := shard.dbAgent.BootstrapGroupLocked(candidate.instanceKey); err != nil { + // if bootstrap failed, the next one that gets the lock will try to do it again + log.Errorf("Failed to bootstrap mysql group on %v: %v", candidate.instanceKey.Hostname, err) + return err + } + log.Infof("Bootstrapped the group for %v", formatKeyspaceShard(shard.KeyspaceShard)) + return nil +} + +func (shard *GRShard) repairShardHasInactiveGroup(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairShardHasInactiveGroup") + if err != nil { + log.Warningf("repairShardHasInactiveGroup fails to grab lock for the shard %v: %v", shard.KeyspaceShard, err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + // Diagnose() will call shardAgreedGroup as the first thing + // which will update mysqlGroup stored in the shard + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeShardHasInactiveGroup { + log.Infof("Shard %v is no longer in DiagnoseTypeShardHasInactiveGroup: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + // Now we know the shard has an agreed group but no member in it + // We should find one with the largest GTID set as the + // new mysql primary to bootstrap the group + start := time.Now() + err = shard.stopAndRebootstrap(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeShardHasInactiveGroup, strconv.FormatBool(err == nil)}, start) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) repairBackoffError(ctx context.Context, diagnose DiagnoseType) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairBackoffError") + if err != nil { + log.Warningf("repairBackoffError fails to grab lock for the shard %v: %v", shard.KeyspaceShard, err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != diagnose { + log.Infof("Shard %v is no longer in %v: %v", formatKeyspaceShard(shard.KeyspaceShard), diagnose, status) + return Noop, nil + } + if shard.lastDiagnoseResult != diagnose { + log.Infof("diagnose shard as %v but last diagnose result was %v", diagnose, shard.lastDiagnoseResult) + return Noop, nil + } + now := time.Now() + var waitTime time.Duration + switch diagnose { + case DiagnoseTypeBackoffError: + waitTime = shard.transientErrorWaitTime + case DiagnoseTypeBootstrapBackoff: + waitTime = shard.bootstrapWaitTime + default: + return Fail, fmt.Errorf("unsupported diagnose for repairBackoffError: %v", diagnose) + } + if now.Sub(shard.lastDiagnoseSince) < waitTime { + log.Infof("Detected %v at %v. In wait time for network partition", diagnose, shard.lastDiagnoseSince) + return Noop, nil + } + log.Infof("Detected %v at %v. Start repairing after %v", diagnose, shard.lastDiagnoseSince, shard.transientErrorWaitTime) + err = shard.stopAndRebootstrap(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeBackoffError, strconv.FormatBool(err == nil)}, now) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) stopAndRebootstrap(ctx context.Context) error { + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + // Before bootstrap the group, we need to stop group first + // abort aggressively here as soon as we encounter an error + // StopGroupLocked will check if instance is NOT in "ONLINE"/"RECOVERING" state (i.e., UNREACHABLE, ERROR or OFFLINE) + errorRecorder := shard.forAllInstances(func(instance *grInstance, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { + defer wg.Done() + status := shard.sqlGroup.GetStatus(instance.instanceKey) + if status != nil && status.State == db.OFFLINE { + log.Infof("stop group replication on %v skipped because it is already OFFLINE", instance.alias) + return + } + log.Infof("stop group replication on %v", instance.alias) + err := shard.dbAgent.StopGroupLocked(instance.instanceKey) + if err != nil { + if !unreachableError(err) { + er.RecordError(err) + } + log.Warningf("Error during stop group replication on %v: %v", instance.instanceKey.Hostname, err) + } + }) + if errorRecorder.HasErrors() { + log.Errorf("Failed to stop group replication %v", errorRecorder.Error()) + return errorRecorder.Error() + } + log.Infof("Stop the group for %v", formatKeyspaceShard(shard.KeyspaceShard)) + log.Info("Start find candidate to rebootstrap") + candidate, err := shard.findRebootstrapCandidate(ctx) + if err != nil { + log.Errorf("Failed to find rebootstrap candidate: %v", err) + return err + } + shard.refreshSQLGroup() + if !shard.sqlGroup.IsSafeToBootstrap() { + return errors.New("unsafe to bootstrap group") + } + if *abortRebootstrap { + log.Warningf("Abort stopAndRebootstrap because rebootstrap hook override") + return errForceAbortBootstrap + } + log.Infof("Rebootstrap %v on %v", formatKeyspaceShard(shard.KeyspaceShard), candidate.instanceKey.Hostname) + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + return shard.dbAgent.BootstrapGroupLocked(candidate.instanceKey) +} + +func (shard *GRShard) getGTIDSetFromAll(skipMaster bool) (*groupGTIDRecorder, *concurrency.AllErrorRecorder, error) { + if len(shard.instances) == 0 { + return nil, nil, fmt.Errorf("%v has 0 instance", formatKeyspaceShard(shard.KeyspaceShard)) + } + // Before we do failover, we first verify if there is no one agreed group name. + // If not, VTGR is not smart enough to figure out how to failover + // Note: the caller should make sure the mysqlGroup is refreshed after we grab a shard level lock + mysqlGroup := shard.shardAgreedGroupName() + if mysqlGroup == "" { + return nil, nil, fmt.Errorf("unable to find an agreed group name in %v", formatKeyspaceShard(shard.KeyspaceShard)) + } + primary := shard.findShardPrimaryTablet() + var mysqlPrimaryHost string + var mysqlPrimaryPort int + // skipMaster is true when we manual failover or if there is a unreachalbe primary tablet + // in both case, there should be a reconciled primary tablet + if skipMaster && primary != nil { + status := shard.sqlGroup.GetStatus(primary.instanceKey) + mysqlPrimaryHost, mysqlPrimaryPort = status.HostName, status.Port + log.Infof("Found primary instance from MySQL on %v", mysqlPrimaryHost) + } + gtidRecorder := &groupGTIDRecorder{} + // Iterate through all the instances in the shard and find the one with largest GTID set with best effort + // We wrap it with forAllInstances so that the failover can continue if there is a host + // that is unreachable + errorRecorder := shard.forAllInstances(func(instance *grInstance, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { + defer wg.Done() + if skipMaster && instance.instanceKey.Hostname == mysqlPrimaryHost && instance.instanceKey.Port == mysqlPrimaryPort { + log.Infof("Skip %v to failover to a non-primary node", mysqlPrimaryHost) + return + } + gtids, err := shard.dbAgent.FetchApplierGTIDSet(instance.instanceKey) + if err != nil { + er.RecordError(err) + log.Errorf("%v get error while fetch applier GTIDs: %v", instance.alias, err) + shard.shardStatusCollector.recordProblematics(instance) + if unreachableError(err) { + shard.shardStatusCollector.recordUnreachables(instance) + } + return + } + if gtids == nil { + log.Warningf("[failover candidate] skip %s with empty gtid", instance.alias) + return + } + gtidRecorder.recordGroupGTIDs(gtids, instance) + }) + return gtidRecorder, errorRecorder, nil +} + +func (shard *GRShard) findRebootstrapCandidate(ctx context.Context) (*grInstance, error) { + gtidRecorder, errorRecorder, err := shard.getGTIDSetFromAll(false) + if err != nil { + log.Errorf("Failed to get gtid from all: %v", err) + return nil, err + } + err = errorRecorder.Error() + // We cannot tolerate any error from mysql during a rebootstrap. + if err != nil { + log.Errorf("Failed to fetch all GTID with forAllInstances for rebootstrap: %v", err) + return nil, err + } + candidate, err := shard.findFailoverCandidateFromRecorder(ctx, gtidRecorder, nil) + if err != nil { + log.Errorf("Failed to find rebootstrap candidate by GTID after forAllInstances: %v", err) + return nil, err + } + if candidate == nil { + return nil, fmt.Errorf("failed to find rebootstrap candidate for %v", formatKeyspaceShard(shard.KeyspaceShard)) + } + if !shard.instanceReachable(ctx, candidate) { + log.Errorf("rebootstrap candidate %v (%v) is not reachable via ping", candidate.alias, candidate.instanceKey.Hostname) + return nil, fmt.Errorf("%v is unreachable", candidate.alias) + } + log.Infof("%v is the rebootstrap candidate", candidate.alias) + return candidate, nil +} + +// Caller of this function should make sure it gets the shard lock and it has the +// latest view of a shard. Otherwise, we might skip the wrong node when we locate the candidate +func (shard *GRShard) findFailoverCandidate(ctx context.Context) (*grInstance, error) { + gtidRecorder, errorRecorder, err := shard.getGTIDSetFromAll(true) + if err != nil { + log.Errorf("Failed to get gtid from all: %v", err) + return nil, err + } + err = errorRecorder.Error() + // During the repair for unreachable primary we still have a mysql group. + // Failover within the group is safe, finding the largest GTID is an optimization. + // therefore we don't check error from errorRecorder just log it + if err != nil { + log.Warningf("Errors when fetch all GTID with forAllInstances for failover: %v", err) + } + shard.forAllInstances(func(instance *grInstance, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { + defer wg.Done() + if !shard.instanceReachable(ctx, instance) { + log.Errorf("%v is not reachable via ping", instance.alias) + shard.shardStatusCollector.recordProblematics(instance) + shard.shardStatusCollector.recordUnreachables(instance) + } + }) + var candidate *grInstance + candidate, err = shard.findFailoverCandidateFromRecorder(ctx, gtidRecorder, func(c context.Context, instance *grInstance) bool { + for _, unreachable := range shard.shardStatusCollector.status.Unreachables { + if unreachable == instance.alias { + return false + } + } + return true + }) + if err != nil { + log.Errorf("Failed to find failover candidate by GTID after forAllInstances: %v", err) + return nil, err + } + if candidate == nil { + return nil, fmt.Errorf("failed to find failover candidate for %v", formatKeyspaceShard(shard.KeyspaceShard)) + } + log.Infof("%v is the failover candidate", candidate.alias) + return candidate, nil +} + +func (shard *GRShard) repairWrongPrimaryTablet(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairWrongPrimaryTablet") + if err != nil { + log.Warningf("repairWrongPrimaryTablet fails to grab lock for the shard %v: %v", shard.KeyspaceShard, err) + return Noop, err + } + defer unlock(&err) + // We grab shard level lock and check again if there is no primary + // to avoid race conditions + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeWrongPrimaryTablet { + log.Infof("Shard %v is no longer in DiagnoseTypeWrongPrimaryTablet: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + start := time.Now() + err = shard.fixPrimaryTabletLocked(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeWrongPrimaryTablet, strconv.FormatBool(err == nil)}, start) + if err != nil { + return Fail, err + } + return Success, nil +} + +// fixPrimaryTabletLocked changes Vitess primary tablet based on mysql group +func (shard *GRShard) fixPrimaryTabletLocked(ctx context.Context) error { + host, port, isActive := shard.sqlGroup.GetPrimary() + if !isActive { + return db.ErrGroupInactive + } + // Primary tablet does not run mysql primary, we need to change it accordingly + candidate := shard.findTabletByHostAndPort(host, port) + if candidate == nil { + return errMissingPrimaryTablet + } + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + err := shard.tmc.ChangeType(ctx, candidate.tablet, topodatapb.TabletType_MASTER) + if err != nil { + return fmt.Errorf("failed to change type to master on %v: %v", candidate.alias, err) + } + log.Infof("Successfully make %v the primary tablet", candidate.alias) + return nil +} + +// repairUnconnectedReplica usually handle the case when there is a DiagnoseTypeHealthy tablet and +// it is not connected to mysql primary node +func (shard *GRShard) repairUnconnectedReplica(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairUnconnectedReplica") + if err != nil { + log.Warningf("repairUnconnectedReplica fails to grab lock for the shard %v: %v", formatKeyspaceShard(shard.KeyspaceShard), err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeUnconnectedReplica { + log.Infof("Shard %v is no longer in DiagnoseTypeUnconnectedReplica: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + start := time.Now() + err = shard.repairUnconnectedReplicaAction(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeUnconnectedReplica, strconv.FormatBool(err == nil)}, start) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) repairUnconnectedReplicaAction(ctx context.Context) error { + primaryInstance := shard.findShardPrimaryTablet() + target, err := shard.disconnectedInstance() + if err != nil { + return err + } + if target == nil { + log.Infof("there is no instance without group for %v", formatKeyspaceShard(shard.KeyspaceShard)) + return nil + } + log.Infof("Connecting replica %v to %v", target.instanceKey.Hostname, primaryInstance.instanceKey.Hostname) + status := shard.sqlGroup.GetStatus(target.instanceKey) + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + if status != nil && status.State != db.OFFLINE { + log.Infof("stop group replication on %v ($v) before join the group", target.alias, status.State) + err := shard.dbAgent.StopGroupLocked(target.instanceKey) + if err != nil { + log.Errorf("Failed to stop group replication on %v: %v", target.instanceKey.Hostname, err) + return err + } + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + } + return shard.dbAgent.JoinGroupLocked(target.instanceKey, primaryInstance.instanceKey) +} + +func (shard *GRShard) repairUnreachablePrimary(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairUnreachablePrimary") + if err != nil { + log.Warningf("repairUnreachablePrimary fails to grab lock for the shard %v: %v", formatKeyspaceShard(shard.KeyspaceShard), err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeUnreachablePrimary { + log.Infof("Shard %v is no longer in DiagnoseTypeUnreachablePrimary: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + // We are here because either: + // 1. we have a primary tablet, but it's not reachable + // 2. we cannot find primary tablet but we do have a mysql group + // we need to failover mysql manually + // + // other case will be handled by different testGroupInput, e.g., + // has reachable primary tablet, but run on different node than mysql -> DiagnoseTypeWrongPrimaryTablet + start := time.Now() + err = shard.failoverLocked(ctx) + repairTimingsMs.Record([]string{DiagnoseTypeUnreachablePrimary, strconv.FormatBool(err == nil)}, start) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) repairInsufficientGroupSize(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairInsufficientGroupSize") + if err != nil { + log.Warningf("repairInsufficientGroupSize fails to grab lock for the shard %v: %v", formatKeyspaceShard(shard.KeyspaceShard), err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeInsufficientGroupSize { + log.Infof("Shard %v is no longer in DiagnoseTypeInsufficientGroupSize: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + // We check primary tablet is consistent with sql primary before InsufficientGroupSize + // therefore primary we found here is correct and healthy + primary := shard.findShardPrimaryTablet() + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return Fail, err + } + // mysql group will set super_read_only properly automatically + // https://mysqlhighavailability.com/protecting-your-data-fail-safe-enhancements-to-group-replication/ + // since Vitess only knows one writable node (primary tablet) if we want to make sure there is no write + // after there is insufficient members, we can just set primary mysql node to be read only + err = shard.dbAgent.SetReadOnly(primary.instanceKey, true) + if err != nil { + return Fail, err + } + return Success, nil +} + +func (shard *GRShard) repairReadOnlyShard(ctx context.Context) (RepairResultCode, error) { + ctx, unlock, err := shard.LockShard(ctx, "repairReadOnlyShard") + if err != nil { + log.Warningf("repairReadOnlyShard fails to grab lock for the shard %v: %v", formatKeyspaceShard(shard.KeyspaceShard), err) + return Noop, err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + status, err := shard.diagnoseLocked(ctx) + if err != nil { + log.Errorf("Failed to diagnose: %v", err) + return Fail, err + } + if status != DiagnoseTypeReadOnlyShard { + log.Infof("Shard %v is no longer in DiagnoseTypeReadOnlyShard: %v", formatKeyspaceShard(shard.KeyspaceShard), status) + return Noop, nil + } + primary := shard.findShardPrimaryTablet() + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return Fail, err + } + // undo what we did repairInsufficientGroupSize + err = shard.dbAgent.SetReadOnly(primary.instanceKey, false) + if err != nil { + return Fail, err + } + return Success, nil +} + +// Failover takes a shard and find an node with largest GTID as the mysql primary of the group +func (shard *GRShard) Failover(ctx context.Context) error { + ctx, unlock, err := shard.LockShard(ctx, "Failover") + if err != nil { + log.Warningf("Failover fails to grab lock for the shard %v: %v", formatKeyspaceShard(shard.KeyspaceShard), err) + return err + } + defer unlock(&err) + shard.refreshTabletsInShardLocked(ctx) + return shard.failoverLocked(ctx) +} + +func (shard *GRShard) failoverLocked(ctx context.Context) error { + candidate, err := shard.findFailoverCandidate(ctx) + if err != nil { + log.Errorf("Failed to find failover candidate: %v", err) + return err + } + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + err = shard.dbAgent.Failover(candidate.instanceKey) + if err != nil { + log.Errorf("Failed to failover mysql to %v", candidate.alias) + return err + } + log.Infof("Successfully failover MySQL to %v for %v", candidate.instanceKey.Hostname, formatKeyspaceShard(shard.KeyspaceShard)) + // Make sure we still hold the topo server lock before moving on + if err := shard.checkShardLocked(ctx); err != nil { + return err + } + err = shard.tmc.ChangeType(ctx, candidate.tablet, topodatapb.TabletType_MASTER) + if err != nil { + log.Errorf("Failed to failover Vitess %v", candidate.alias) + return err + } + log.Infof("Successfully failover Vitess to %v for %v", candidate.alias, formatKeyspaceShard(shard.KeyspaceShard)) + return nil +} + +func (shard *GRShard) findFailoverCandidateFromRecorder(ctx context.Context, recorder *groupGTIDRecorder, check func(context.Context, *grInstance) bool) (*grInstance, error) { + if len(recorder.gtidWithInstances) == 0 { + return nil, fmt.Errorf("empty failover candidate list for %v", formatKeyspaceShard(shard.KeyspaceShard)) + } + // Sort the gtidWithInstances slice so that we have consistent candidate + // in case they have same gtid set + recorder.sort() + for _, gtidInst := range recorder.gtidWithInstances { + log.Infof("[failover candidates] %s gtid %s", gtidInst.instance.alias, gtidInst.gtids.String()) + } + var largestGTIDs mysql.GTIDSet + var candidate *grInstance + var divergentCandidates []string + // All the instances in the recorder have a reachable mysqld + // hence anyone is a valid failover candidate + for _, elem := range recorder.gtidWithInstances { + gtids := elem.gtids + inst := elem.instance + if check != nil && !check(ctx, inst) { + log.Warningf("Skip %v as candidate with gtid %v because it failed the check", inst.alias, gtids.String()) + continue + } + if largestGTIDs == nil { + largestGTIDs = gtids + candidate = inst + continue + } + // If largestGTIDs is subset of current gtids, it means instance has larger GTID than candidate + // we need to swap them out + isSubset, isSuperset := compareGTIDSet(largestGTIDs, gtids) + if isSubset { + largestGTIDs = gtids + candidate = inst + continue + } + // largestGTIDs is neither subset nor super set of gtids + // we log and append to candidates so that we know there is a problem in the group + // after the iteration + if !isSuperset { + log.Errorf("FetchGroupView divergent GITD set from host=%v GTIDSet=%v", inst.instanceKey.Hostname, gtids) + divergentCandidates = append(divergentCandidates, inst.alias) + } + } + // unless GTID set diverged, the candidates should be empty + if len(divergentCandidates) > 0 { + divergentCandidates = append(divergentCandidates, candidate.alias) + return nil, fmt.Errorf("found more than one failover candidates by GTID set for %v: %v", formatKeyspaceShard(shard.KeyspaceShard), divergentCandidates) + } + return candidate, nil +} + +func compareGTIDSet(set1, set2 mysql.GTIDSet) (bool, bool) { + isSubset := set2.Contains(set1) + // If set1 is subset of set2 we find a GTID super set and just need to record it + if isSubset { + return true, false + } + // If set1 is not a subset of set2 we need to see if set1 is actually a super set of set2 + // this is to controller GTID set divergence + isSubset = set1.Contains(set2) + // We know set1 is not subset of set2 if set2 is also not subset of set1, it means + // there is a divergent in GTID sets + return false, isSubset +} + +func (shard *GRShard) checkShardLocked(ctx context.Context) error { + if err := topo.CheckShardLocked(ctx, shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard); err != nil { + labels := []string{shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard} + unexpectedLockLost.Add(labels, 1) + log.Errorf("lost topology lock; aborting") + return vterrors.Wrap(err, "lost topology lock; aborting") + } + return nil +} diff --git a/go/vt/vtgr/controller/repair_test.go b/go/vt/vtgr/controller/repair_test.go new file mode 100644 index 00000000000..d4344c3138e --- /dev/null +++ b/go/vt/vtgr/controller/repair_test.go @@ -0,0 +1,1235 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "errors" + "fmt" + "math/rand" + "strconv" + "strings" + "sync" + "testing" + "time" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/vt/orchestrator/inst" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/memorytopo" + "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil" + "vitess.io/vitess/go/vt/vtgr/config" + "vitess.io/vitess/go/vt/vtgr/db" + + gomock "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" +) + +const repairGroupSize = 2 + +func TestRepairShardHasNoGroup(t *testing.T) { + type data struct { + alias string + mysqlhost string + mysqlport int + groupName string + readOnly bool + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + var testcases = []struct { + name string + expectedCalls int + errorMsg string + inputs []data + }{ + {"shard without group", 1, "", []data{ + {alias0, testHost, testPort0, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"healthy shard", 0, "", []data{ + {alias0, testHost, testPort0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testHost, testPort1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"no active member for group", 0, "", []data{ // this should rebootstrap a group by DiagnoseTypeShardHasInactiveGroup + {alias0, testHost, testPort0, "group", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "", false, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"raise error for unreachable primary", 0, "", []data{ // shoud be ShardHasInactiveGroup + {alias0, testHost, testPort0, "group", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"raise error without bootstrap with only one reachable node", 0, "vtgr repair: unsafe to bootstrap group", []data{ + {alias0, "", 0, "group", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + {alias2, "", testPort2, "", true, []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + } + tablets := make(map[string]*topo.TabletInfo) + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + inputMap := make(map[int]testGroupInput) + dbAgent. + EXPECT(). + // RepairShardHasNoGroup is fixed by calling BootstrapGroupLocked + BootstrapGroupLocked(gomock.Any()). + DoAndReturn(func(target *inst.InstanceKey) error { + if target.Hostname == "" || target.Port == 0 { + return errors.New("invalid mysql instance key") + } + input := inputMap[target.Port] + groupState := input.groupState + if len(groupState) == 1 && groupState[0].MemberState == "OFFLINE" { + groupState[0].MemberState = "ONLINE" + groupState[0].MemberRole = "PRIMARY" + groupState[0].MemberHost = target.Hostname + groupState[0].MemberPort = strconv.Itoa(target.Port) + input.groupState = groupState + } else { + for i, s := range groupState { + if s.MemberHost == target.Hostname { + s.MemberState = "ONLINE" + s.MemberRole = "PRIMARY" + groupState[i] = s + } + input.groupState = groupState + } + } + inputMap[target.Port] = input + return nil + }). + Times(tt.expectedCalls) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), input.mysqlhost, testPort0+i, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.mysqlport] = testGroupInput{ + input.groupName, + input.readOnly, + input.groupInput, + nil, + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[target.Port] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + Return(nil). + AnyTimes() + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + shard.UpdateTabletsInShardWithLock(ctx) + _, err := shard.Repair(ctx, DiagnoseTypeShardHasNoGroup) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.EqualError(t, err, tt.errorMsg) + } + }) + } +} + +func TestRepairShardHasInactiveGroup(t *testing.T) { + type data struct { + alias string + mysqlhost string + mysqlport int + groupName string + groupInput []db.TestGroupState + pingable bool + gtid mysql.GTIDSet + ttype topodatapb.TabletType + } + sid1 := "3e11fa47-71ca-11e1-9e33-c80aa9429562" + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"shard has inactive group", "", testPort0, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_MASTER}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"unreachable rebootstrap candidate", "vtgr repair: test_cell-0000000000 is unreachable", 0, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, false, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_MASTER}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"inactive shard with empty gtid", "", testPort0, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet("", ""), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet("", ""), topodatapb.TabletType_REPLICA}, + }}, + {"shard has more than one group", "vtgr repair: fail to refreshSQLGroup: group has split brain", 0, []data{ // vtgr raises error + {alias0, testHost, testPort0, "group1", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group2", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group1", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"shard has inconsistent gtids", "vtgr repair: found more than one failover candidates by GTID set for ks/0", 0, []data{ // vtgr raises error + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet("264a8230-67d2-11eb-acdd-0a8d91f24125", "1-9"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"error on one unreachable mysql", "invalid mysql instance key", 0, []data{ + {alias0, "", 0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-11"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"error on one unreachable tablet", "vtgr repair: test_cell-0000000000 is unreachable", 0, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, false, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"shard has active member", "", 0, []data{ // vtgr sees an active node it should not try to bootstrap + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: "host_2", MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"shard has active member but more than one group", "vtgr repair: fail to refreshSQLGroup: group has split brain", 0, []data{ // split brain should overweight active member diagnose + {alias0, testHost, testPort0, "group1", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group1", []db.TestGroupState{ + {MemberHost: "host_2", MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group2", []db.TestGroupState{ + {MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + } + tablets := make(map[string]*topo.TabletInfo) + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + expectedCalls := 0 + if tt.expectedCandidatePort != 0 { + expectedCalls = 1 + } + inputMap := make(map[int]testGroupInput) + pingable := make(map[string]bool) + var lock sync.Mutex + dbAgent. + EXPECT(). + // RepairShardHasNoGroup is fixed by calling BootstrapGroupLocked + BootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}). + DoAndReturn(func(target *inst.InstanceKey) error { + if target.Hostname == "" || target.Port == 0 { + return errors.New("invalid mysql instance key") + } + input := inputMap[target.Port] + groupState := input.groupState + if len(groupState) == 1 && groupState[0].MemberState == "OFFLINE" { + groupState[0].MemberState = "ONLINE" + groupState[0].MemberRole = "PRIMARY" + groupState[0].MemberHost = target.Hostname + groupState[0].MemberPort = strconv.Itoa(target.Port) + input.groupState = groupState + } else { + for i, s := range groupState { + if s.MemberHost == target.Hostname { + s.MemberState = "ONLINE" + s.MemberRole = "PRIMARY" + groupState[i] = s + } + input.groupState = groupState + } + } + inputMap[target.Port] = input + return nil + }). + Times(expectedCalls) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), input.mysqlhost, input.mysqlport, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.mysqlport] = testGroupInput{ + input.groupName, + false, + input.groupInput, + input.gtid, + } + pingable[tablet.Alias.String()] = input.pingable + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[target.Port] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + dbAgent. + EXPECT(). + FetchApplierGTIDSet(gomock.Any()). + DoAndReturn(func(target *inst.InstanceKey) (mysql.GTIDSet, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + return inputMap[target.Port].gtid, nil + }). + AnyTimes() + dbAgent. + EXPECT(). + StopGroupLocked(gomock.Any()). + DoAndReturn(func(target *inst.InstanceKey) error { + if target.Hostname == "" || target.Port == 0 { + return errors.New("invalid mysql instance key") + } + lock.Lock() + view := inputMap[target.Port] + view.groupState = []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(target.Port), MemberState: "OFFLINE", MemberRole: ""}, + } + inputMap[target.Port] = view + lock.Unlock() + return nil + }). + AnyTimes() + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { + if !pingable[t.Alias.String()] { + return errors.New("unreachable") + } + return nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeShardHasInactiveGroup) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err, tt.errorMsg) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func TestRepairWrongPrimaryTablet(t *testing.T) { + type data struct { + alias string + mysqlport int + groupName string + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"fix no primary tablet in shard", "", testPort0, []data{ + {alias0, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"fix wrong primary tablet", "", testPort0, []data{ + {alias0, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias2, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"fix shard if there is an unreachable secondary", "", testPort0, []data{ + {alias0, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias2, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"diagnose as ShardHasInactiveGroup if quorum number of not online", "", 0, []data{ + {alias0, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias2, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"tolerate failed nodes", "", testPort0, []data{ + {alias0, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias1, 0, "group", []db.TestGroupState{}, topodatapb.TabletType_MASTER}, + {alias2, 0, "group", []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, + }}, + {"raise error if all nodes failed", "", 0, []data{ // diagnose as DiagnoseTypeShardNetworkPartition + {alias0, 0, "group", []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, + {alias1, 0, "group", []db.TestGroupState{}, topodatapb.TabletType_MASTER}, + {alias2, 0, "group", []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + Return(nil). + AnyTimes() + expectedCalls := 0 + if tt.expectedCandidatePort != 0 { + expectedCalls = 1 + } + var candidate *topo.TabletInfo + inputMap := make(map[string]testGroupInput) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), testHost, input.mysqlport, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.alias] = testGroupInput{ + input.groupName, + false, + input.groupInput, + nil, + } + if expectedCalls > 0 && input.mysqlport == tt.expectedCandidatePort { + candidate = tablet + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: input.mysqlport})). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[alias] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + if candidate != nil { + tmc. + EXPECT(). + ChangeType(gomock.Any(), gomock.Any(), topodatapb.TabletType_MASTER). + Return(nil). + Times(expectedCalls) + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeWrongPrimaryTablet) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func TestRepairUnconnectedReplica(t *testing.T) { + type data struct { + alias string + port int + groupName string + readOnly bool + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"fix unconnected replica tablet", "", testPort2, []data{ + {alias0, testPort0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, "", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"do nothing if shard has wrong primary tablet", "", 0, []data{ // this should be diagnosed as DiagnoseTypeWrongPrimaryTablet instead + {alias0, testPort0, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, "", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"fix replica in ERROR state", "", testPort2, []data{ + {alias0, testPort0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: ""}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"fix replica with two nodes in ERROR state", "", 0, []data{ // InsufficientGroupSize + {alias0, testPort0, "group", false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, "group", true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + rand.Seed(1) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + Return(nil). + AnyTimes() + if tt.expectedCandidatePort != 0 { + dbAgent. + EXPECT(). + StopGroupLocked(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort})). + Return(nil). + AnyTimes() + dbAgent. + EXPECT(). + JoinGroupLocked(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), gomock.Any()). + Return(nil). + Times(1) + } + inputMap := make(map[string]testGroupInput) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), testHost, input.port, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.alias] = testGroupInput{ + input.groupName, + input.readOnly, + input.groupInput, + nil, + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: input.port})). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[alias] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeUnconnectedReplica) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func TestRepairUnreachablePrimary(t *testing.T) { + type data struct { + port int + pingalbe bool + gtid mysql.GTIDSet + ttype topodatapb.TabletType + } + sid := "3e11fa47-71ca-11e1-9e33-c80aa9429562" + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"primary is unreachable", "", testPort1, []data{ + {testPort0, false, getMysql56GTIDSet(sid, "1-11"), topodatapb.TabletType_MASTER}, + {testPort1, true, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_REPLICA}, + {testPort2, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"failover to reachable node when primary is unreachable", "", testPort2, []data{ + {testPort0, false, getMysql56GTIDSet(sid, "1-11"), topodatapb.TabletType_MASTER}, + {testPort1, false, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_REPLICA}, + {testPort2, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"do nothing if replica is unreachable", "", 0, []data{ + {testPort0, true, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_MASTER}, + {testPort1, false, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_REPLICA}, + {testPort2, false, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"raise error if gtid divergence", "vtgr repair: found more than one failover candidates by GTID set for ks/0", 0, []data{ + {testPort0, false, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_MASTER}, + {testPort1, true, getMysql56GTIDSet("264a8230-67d2-11eb-acdd-0a8d91f24125", "1-10"), topodatapb.TabletType_REPLICA}, + {testPort2, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + dbAgent. + EXPECT(). + FetchGroupView(gomock.Any(), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + return db.BuildGroupView(alias, "group", target.Hostname, target.Port, false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + }), nil + }). + AnyTimes() + expectedCalls := 0 + if tt.expectedCandidatePort != 0 { + expectedCalls = 1 + } + dbAgent. + EXPECT(). + Failover(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}). + Return(nil). + Times(expectedCalls) + tmc. + EXPECT(). + ChangeType(gomock.Any(), gomock.Any(), topodatapb.TabletType_MASTER). + Return(nil). + Times(expectedCalls) + status := make(map[int32]struct { + pingalbe bool + gtid mysql.GTIDSet + }) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), testHost, input.port, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + status[tablet.MysqlPort] = struct { + pingalbe bool + gtid mysql.GTIDSet + }{ + input.pingalbe, + input.gtid, + } + dbAgent. + EXPECT(). + FetchApplierGTIDSet(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: input.port})). + DoAndReturn(func(target *inst.InstanceKey) (mysql.GTIDSet, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + return status[int32(target.Port)].gtid, nil + }). + AnyTimes() + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { + if !status[t.MysqlPort].pingalbe { + return errors.New("unreachable") + } + return nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeUnreachablePrimary) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err, tt.errorMsg) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg)) + } + }) + } +} + +func TestRepairInsufficientGroupSize(t *testing.T) { + type data struct { + alias string + readOnly bool + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"fix insufficient group size", "", testPort0, []data{ + {alias0, false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + Return(nil). + AnyTimes() + if tt.expectedCandidatePort != 0 { + dbAgent. + EXPECT(). + SetSuperReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), true). + Return(nil). + Times(1) + } + inputMap := make(map[string]testGroupInput) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), testHost, testPort0+i, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.alias] = testGroupInput{ + "group", + input.readOnly, + input.groupInput, + nil, + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Any(), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[alias] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeInsufficientGroupSize) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func TestRepairReadOnlyShard(t *testing.T) { + type data struct { + alias string + port int + readOnly bool + groupInput []db.TestGroupState + ttype topodatapb.TabletType + } + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + inputs []data + }{ + {"fix readonly shard", "", testPort0, []data{ + {alias0, testPort0, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + {"do nothing if primary is not read only", "", 0, []data{ + {alias0, testPort0, false, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_MASTER}, + {alias1, testPort1, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + {alias2, testPort2, true, []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, topodatapb.TabletType_REPLICA}, + }}, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + tablets := make(map[string]*topo.TabletInfo) + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + Return(nil). + AnyTimes() + if tt.expectedCandidatePort != 0 { + dbAgent. + EXPECT(). + SetSuperReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), false). + Return(nil). + Times(1) + } + inputMap := make(map[string]testGroupInput) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), testHost, input.port, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.alias] = testGroupInput{ + "group", + input.readOnly, + input.groupInput, + nil, + } + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[alias] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + _, err := shard.Repair(ctx, DiagnoseTypeReadOnlyShard) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func TestRepairBackoffError(t *testing.T) { + type data struct { + alias string + mysqlhost string + mysqlport int + groupName string + groupInput []db.TestGroupState + pingable bool + gtid mysql.GTIDSet + ttype topodatapb.TabletType + } + sid := "3e11fa47-71ca-11e1-9e33-c80aa9429562" + var testcases = []struct { + name string + errorMsg string + expectedCandidatePort int + diagnose DiagnoseType + inputs []data + }{ + {"shard has network partition", "", testPort0, DiagnoseTypeBackoffError, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, + }, true, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + {"shard bootstrap in progress", "", testPort0, DiagnoseTypeBootstrapBackoff, []data{ + {alias0, testHost, testPort0, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, + }, true, getMysql56GTIDSet(sid, "1-10"), topodatapb.TabletType_REPLICA}, + {alias1, testHost, testPort1, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + {alias2, testHost, testPort2, "group", []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, + }, true, getMysql56GTIDSet(sid, "1-9"), topodatapb.TabletType_REPLICA}, + }}, + } + tablets := make(map[string]*topo.TabletInfo) + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + ctx := context.Background() + ts := memorytopo.NewServer("test_cell") + defer ts.Close() + ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) + ts.CreateShard(ctx, "ks", "0") + tmc := NewMockGRTmcClient(ctrl) + dbAgent := db.NewMockAgent(ctrl) + expectedCalls := 0 + if tt.expectedCandidatePort != 0 { + expectedCalls = 1 + } + inputMap := make(map[int]testGroupInput) + pingable := make(map[string]bool) + var lock sync.Mutex + dbAgent. + EXPECT(). + BootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}). + DoAndReturn(func(target *inst.InstanceKey) error { + if target.Hostname == "" || target.Port == 0 { + return errors.New("invalid mysql instance key") + } + input := inputMap[target.Port] + groupState := input.groupState + if len(groupState) == 1 && groupState[0].MemberState == "OFFLINE" { + groupState[0].MemberState = "ONLINE" + groupState[0].MemberRole = "PRIMARY" + groupState[0].MemberHost = target.Hostname + groupState[0].MemberPort = strconv.Itoa(target.Port) + input.groupState = groupState + } else { + for i, s := range groupState { + if s.MemberHost == target.Hostname { + s.MemberState = "ONLINE" + s.MemberRole = "PRIMARY" + groupState[i] = s + } + input.groupState = groupState + } + } + inputMap[target.Port] = input + return nil + }). + Times(expectedCalls) + for i, input := range tt.inputs { + tablet := buildTabletInfo(uint32(i), input.mysqlhost, input.mysqlport, input.ttype, time.Now()) + testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) + tablets[input.alias] = tablet + inputMap[input.mysqlport] = testGroupInput{ + input.groupName, + false, + input.groupInput, + input.gtid, + } + pingable[input.alias] = input.pingable + dbAgent. + EXPECT(). + FetchGroupView(gomock.Eq(input.alias), gomock.Any()). + DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + s := inputMap[target.Port] + view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.groupState) + return view, nil + }). + AnyTimes() + dbAgent. + EXPECT(). + FetchApplierGTIDSet(gomock.Any()). + DoAndReturn(func(target *inst.InstanceKey) (mysql.GTIDSet, error) { + if target.Hostname == "" || target.Port == 0 { + return nil, errors.New("invalid mysql instance key") + } + return inputMap[target.Port].gtid, nil + }). + AnyTimes() + dbAgent. + EXPECT(). + StopGroupLocked(gomock.Any()). + DoAndReturn(func(target *inst.InstanceKey) error { + lock.Lock() + view := inputMap[target.Port] + view.groupState = []db.TestGroupState{ + {MemberHost: testHost, MemberPort: strconv.Itoa(target.Port), MemberState: "OFFLINE", MemberRole: ""}, + } + inputMap[target.Port] = view + lock.Unlock() + return nil + }). + AnyTimes() + tmc. + EXPECT(). + Ping(gomock.Any(), gomock.Any()). + DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { + if !pingable[input.alias] { + return errors.New("unreachable") + } + return nil + }). + AnyTimes() + } + cfg := &config.VTGRConfig{GroupSize: repairGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} + shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0) + shard.lastDiagnoseResult = tt.diagnose + _, err := shard.Repair(ctx, tt.diagnose) + if tt.errorMsg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err, tt.errorMsg) + assert.True(t, strings.Contains(err.Error(), tt.errorMsg), err.Error()) + } + }) + } +} + +func getMysql56GTIDSet(sid, interval string) mysql.GTIDSet { + input := fmt.Sprintf("%s:%s", sid, interval) + pos, _ := mysql.ParsePosition(mysql.Mysql56FlavorID, input) + return pos.GTIDSet +} diff --git a/go/vt/vtgr/db/mock_mysql.go b/go/vt/vtgr/db/mock_mysql.go new file mode 100644 index 00000000000..9d23bd4593f --- /dev/null +++ b/go/vt/vtgr/db/mock_mysql.go @@ -0,0 +1,176 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package db + +import ( + reflect "reflect" + "strconv" + + gomock "github.com/golang/mock/gomock" + + mysql "vitess.io/vitess/go/mysql" + inst "vitess.io/vitess/go/vt/orchestrator/inst" +) + +// MockAgent is a mock of Agent interface +type MockAgent struct { + ctrl *gomock.Controller + recorder *MockAgentMockRecorder +} + +// MockAgentMockRecorder is the mock recorder for MockAgent +type MockAgentMockRecorder struct { + mock *MockAgent +} + +// NewMockAgent creates a new mock instance +func NewMockAgent(ctrl *gomock.Controller) *MockAgent { + mock := &MockAgent{ctrl: ctrl} + mock.recorder = &MockAgentMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockAgent) EXPECT() *MockAgentMockRecorder { + return m.recorder +} + +// BootstrapGroupLocked mocks base method +func (m *MockAgent) BootstrapGroupLocked(instanceKey *inst.InstanceKey) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "BootstrapGroupLocked", instanceKey) + ret0, _ := ret[0].(error) + return ret0 +} + +// BootstrapGroupLocked indicates an expected call of BootstrapGroupLocked +func (mr *MockAgentMockRecorder) BootstrapGroupLocked(instanceKey interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BootstrapGroupLocked", reflect.TypeOf((*MockAgent)(nil).BootstrapGroupLocked), instanceKey) +} + +// StopGroupLocked mocks base method +func (m *MockAgent) StopGroupLocked(instanceKey *inst.InstanceKey) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "StopGroupLocked", instanceKey) + ret0, _ := ret[0].(error) + return ret0 +} + +// StopGroupLocked indicates an expected call of StopGroupLocked +func (mr *MockAgentMockRecorder) StopGroupLocked(instanceKey interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StopGroupLocked", reflect.TypeOf((*MockAgent)(nil).StopGroupLocked), instanceKey) +} + +// JoinGroupLocked mocks base method +func (m *MockAgent) JoinGroupLocked(instanceKey, primaryKey *inst.InstanceKey) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "JoinGroupLocked", instanceKey, primaryKey) + ret0, _ := ret[0].(error) + return ret0 +} + +// JoinGroupLocked indicates an expected call of JoinGroupLocked +func (mr *MockAgentMockRecorder) JoinGroupLocked(instanceKey, primaryKey interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "JoinGroupLocked", reflect.TypeOf((*MockAgent)(nil).JoinGroupLocked), instanceKey, primaryKey) +} + +// SetReadOnly mocks base method +func (m *MockAgent) SetReadOnly(instanceKey *inst.InstanceKey, readOnly bool) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SetReadOnly", instanceKey, readOnly) + ret0, _ := ret[0].(error) + return ret0 +} + +// SetSuperReadOnly indicates an expected call of SetSuperReadOnly +func (mr *MockAgentMockRecorder) SetSuperReadOnly(instanceKey, readOnly interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetReadOnly", reflect.TypeOf((*MockAgent)(nil).SetReadOnly), instanceKey, readOnly) +} + +// FetchApplierGTIDSet mocks base method +func (m *MockAgent) FetchApplierGTIDSet(instanceKey *inst.InstanceKey) (mysql.GTIDSet, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "FetchApplierGTIDSet", instanceKey) + ret0, _ := ret[0].(mysql.GTIDSet) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// FetchApplierGTIDSet indicates an expected call of FetchApplierGTIDSet +func (mr *MockAgentMockRecorder) FetchApplierGTIDSet(instanceKey interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchApplierGTIDSet", reflect.TypeOf((*MockAgent)(nil).FetchApplierGTIDSet), instanceKey) +} + +// Failover mocks base method +func (m *MockAgent) Failover(instance *inst.InstanceKey) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Failover", instance) + ret0, _ := ret[0].(error) + return ret0 +} + +// Failover indicates an expected call of Failover +func (mr *MockAgentMockRecorder) Failover(instance interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Failover", reflect.TypeOf((*MockAgent)(nil).Failover), instance) +} + +// FetchGroupView mocks base method +func (m *MockAgent) FetchGroupView(alias string, instanceKey *inst.InstanceKey) (*GroupView, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "FetchGroupView", alias, instanceKey) + ret0, _ := ret[0].(*GroupView) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// FetchGroupView indicates an expected call of FetchGroupView +func (mr *MockAgentMockRecorder) FetchGroupView(alias, instanceKey interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchGroupView", reflect.TypeOf((*MockAgent)(nil).FetchGroupView), alias, instanceKey) +} + +// TestGroupState mocks a row from mysql +type TestGroupState struct { + MemberHost, MemberPort, MemberState, MemberRole string +} + +// BuildGroupView builds gruop view from input +func BuildGroupView(alias, groupName, host string, port int, readOnly bool, inputs []TestGroupState) *GroupView { + view := NewGroupView(alias, host, port) + view.GroupName = groupName + // group_name, member_host, member_port, member_state, member_role, is_local + for _, row := range inputs { + memberPort, _ := strconv.Atoi(row.MemberPort) + member := NewGroupMember( + row.MemberState, + row.MemberRole, + row.MemberHost, + memberPort, + false) + if host == row.MemberHost && port == memberPort { + member.ReadOnly = readOnly + } + view.UnresolvedMembers = append(view.UnresolvedMembers, member) + } + return view +} diff --git a/go/vt/vtgr/db/mysql.go b/go/vt/vtgr/db/mysql.go new file mode 100644 index 00000000000..06e856dc9bc --- /dev/null +++ b/go/vt/vtgr/db/mysql.go @@ -0,0 +1,535 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package db + +import ( + "errors" + "flag" + "fmt" + "strconv" + "strings" + + gouuid "github.com/google/uuid" + + "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/orchestrator/config" + "vitess.io/vitess/go/vt/orchestrator/db" + "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" + "vitess.io/vitess/go/vt/orchestrator/inst" +) + +var ( + configFilePath = flag.String("db_config", "", "full path to db config file that will be used by VTGR") + dbFlavor = flag.String("db_flavor", "MySQL56", "mysql flavor override") + mysqlGroupPort = flag.Int("gr_port", 33061, "port to bootstrap a mysql group") + + // ErrGroupSplitBrain is the error when mysql group is split-brain + ErrGroupSplitBrain = errors.New("group has split brain") + // ErrGroupBackoffError is either the transient error or network partition from the group + ErrGroupBackoffError = errors.New("group backoff error") + // ErrGroupOngoingBootstrap is the error when a bootstrap is in progress + ErrGroupOngoingBootstrap = errors.New("group ongoing bootstrap") + // ErrGroupInactive is the error when mysql group is inactive unexpectedly + ErrGroupInactive = errors.New("group is inactive") + // ErrInvalidInstance is the error when the instance key has empty hostname + ErrInvalidInstance = errors.New("invalid mysql instance key") +) + +// Agent is used by vtgr to interact with Mysql +type Agent interface { + // BootstrapGroupLocked bootstraps a mysql group + // the caller should grab a lock before + BootstrapGroupLocked(instanceKey *inst.InstanceKey) error + + // StopGroupLocked stops a mysql group + StopGroupLocked(instanceKey *inst.InstanceKey) error + + // JoinGroupLocked puts an instance into a mysql group based on primary instance + // the caller should grab a lock before + JoinGroupLocked(instanceKey *inst.InstanceKey, primaryKey *inst.InstanceKey) error + + // SetReadOnly set super_read_only variable + // https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_super_read_only + SetReadOnly(instanceKey *inst.InstanceKey, readOnly bool) error + + // FetchApplierGTIDSet fetches the GTID set from group_replication_applier channel + FetchApplierGTIDSet(instanceKey *inst.InstanceKey) (mysql.GTIDSet, error) + + // Failover move the mysql primary to the node defined by memberUUID + Failover(instance *inst.InstanceKey) error + + // FetchGroupView fetches group related information + FetchGroupView(alias string, instanceKey *inst.InstanceKey) (*GroupView, error) +} + +// MemberState is member state +type MemberState int + +// MemberRole is member role +type MemberRole int + +const ( + UNKNOWNSTATE MemberState = iota + OFFLINE + UNREACHABLE + RECOVERING + ONLINE + ERROR +) + +const ( + UNKNOWNROLE MemberRole = iota + SECONDARY + PRIMARY +) + +// GroupMember represents a ROW we get from performance_schema +type GroupMember struct { + HostName string + Port int + Role MemberRole + State MemberState + ReadOnly bool +} + +// GroupView is an instance's view for the group +type GroupView struct { + TabletAlias string + MySQLHost string + MySQLPort int + GroupName string + UnresolvedMembers []*GroupMember +} + +// SQLAgentImpl implements Agent +type SQLAgentImpl struct { + config *config.Configuration + dbFlavor string +} + +// NewGroupView creates a new GroupView +func NewGroupView(alias, host string, port int) *GroupView { + return &GroupView{TabletAlias: alias, MySQLHost: host, MySQLPort: port} +} + +// NewGroupMember creates a new GroupMember +func NewGroupMember(state, role, host string, port int, readonly bool) *GroupMember { + return &GroupMember{ + State: toMemberState(state), + Role: toMemberRole(role), + HostName: host, + Port: port, + ReadOnly: readonly, + } +} + +// NewVTGRSqlAgent creates a SQLAgentImpl +func NewVTGRSqlAgent() *SQLAgentImpl { + var conf *config.Configuration + if (*configFilePath) != "" { + log.Infof("use config from %v", *configFilePath) + conf = config.ForceRead(*configFilePath) + } else { + log.Warningf("use default config") + conf = config.Config + } + agent := &SQLAgentImpl{ + config: conf, + dbFlavor: *dbFlavor, + } + return agent +} + +// BootstrapGroupLocked implements Agent interface +func (agent *SQLAgentImpl) BootstrapGroupLocked(instanceKey *inst.InstanceKey) error { + if instanceKey == nil { + return errors.New("nil instance key for bootstrap") + } + // Before bootstrap a group, double check locally there is really nothing running locally + uuid, state, err := agent.getGroupNameAndMemberState(instanceKey) + if err != nil { + return err + } + if state != "" && state != inst.GroupReplicationMemberStateOffline { + return fmt.Errorf("%v not OFFLINE mode %v [group_name=%v]", instanceKey.Hostname, state, uuid) + } + // If there is a group name stored locally, we should try to reuse it + // for port, we will override with a new one + if uuid == "" { + uuid = gouuid.New().String() + log.Infof("Try to bootstrap with a new uuid") + } + log.Infof("Bootstrap group on %v with %v", instanceKey.Hostname, uuid) + // Use persist to set group_replication_group_name + // so that the instance will persist the name after restart + cmds := []string{ + "set global offline_mode=0", + fmt.Sprintf("set @@persist.group_replication_group_name=\"%s\"", uuid), + fmt.Sprintf("set global group_replication_local_address=\"%s:%d\"", instanceKey.Hostname, *mysqlGroupPort), + fmt.Sprintf("set global group_replication_group_seeds=\"%s:%d\"", instanceKey.Hostname, *mysqlGroupPort), + "set global group_replication_bootstrap_group=ON", + fmt.Sprintf("start group_replication user='%s', password='%s'", agent.config.MySQLReplicaUser, agent.config.MySQLReplicaPassword), + "set global group_replication_bootstrap_group=OFF", + } + for _, cmd := range cmds { + if err := execInstanceWithTopo(instanceKey, cmd); err != nil { + return err + } + } + return nil +} + +// StopGroupLocked implements Agent interface +func (agent *SQLAgentImpl) StopGroupLocked(instanceKey *inst.InstanceKey) error { + cmd := "stop group_replication" + return execInstanceWithTopo(instanceKey, cmd) +} + +// SetReadOnly implements Agent interface +func (agent *SQLAgentImpl) SetReadOnly(instanceKey *inst.InstanceKey, readOnly bool) error { + // Setting super_read_only ON implicitly forces read_only ON + // Setting read_only OFF implicitly forces super_read_only OFF + // https://www.perconaicom/blog/2016/09/27/using-the-super_read_only-system-variable/ + if readOnly { + return execInstance(instanceKey, "set @@global.super_read_only=1") + } + return execInstance(instanceKey, "set @@global.read_only=0") +} + +// JoinGroupLocked implements Agent interface +// Note: caller should grab the lock before calling this +func (agent *SQLAgentImpl) JoinGroupLocked(instanceKey *inst.InstanceKey, primaryInstanceKey *inst.InstanceKey) error { + var numExistingMembers int + var uuid string + query := `select count(*) as count, @@group_replication_group_name as group_name + from performance_schema.replication_group_members where member_state='ONLINE'` + err := fetchInstance(primaryInstanceKey, query, func(m sqlutils.RowMap) error { + numExistingMembers = m.GetInt("count") + uuid = m.GetString("group_name") + return nil + }) + if err != nil { + return err + } + if numExistingMembers == 0 { + return fmt.Errorf("there is no group members found on %v:%v", primaryInstanceKey.Hostname, primaryInstanceKey.Port) + } + // The queries above are executed on the primary instance + // now let's do one more check with local information to make sure it's OK to join the primary + localGroup, state, err := agent.getGroupNameAndMemberState(instanceKey) + if err != nil { + return err + } + if localGroup != "" && localGroup != uuid { + return fmt.Errorf("%v has a different group name (%v) than primary %v (%v)", instanceKey.Hostname, localGroup, primaryInstanceKey.Hostname, uuid) + } + if state == inst.GroupReplicationMemberStateOnline || state == inst.GroupReplicationMemberStateRecovering { + return fmt.Errorf("%v [%v] is alredy in a group %v", instanceKey.Hostname, state, localGroup) + } + var primaryGrPort int + query = `select @@group_replication_local_address as address` + err = fetchInstance(primaryInstanceKey, query, func(m sqlutils.RowMap) error { + address := m.GetString("address") + arr := strings.Split(address, ":") + primaryGrPort, err = strconv.Atoi(arr[1]) + if err != nil { + log.Errorf("Failed to parse primary GR port: %v", err) + return err + } + return nil + }) + if primaryGrPort == 0 { + return fmt.Errorf("cannot find group replication port on %v", primaryInstanceKey.Hostname) + } + // Now it's safe to join the group + cmds := []string{ + "set global offline_mode=0", + fmt.Sprintf("set @@persist.group_replication_group_name=\"%s\"", uuid), + fmt.Sprintf("set global group_replication_group_seeds=\"%s:%d\"", primaryInstanceKey.Hostname, primaryGrPort), + fmt.Sprintf("set global group_replication_local_address=\"%s:%d\"", instanceKey.Hostname, *mysqlGroupPort), + fmt.Sprintf("start group_replication user='%s', password='%s'", agent.config.MySQLReplicaUser, agent.config.MySQLReplicaPassword), + } + for _, cmd := range cmds { + if err := execInstanceWithTopo(instanceKey, cmd); err != nil { + return err + } + } + return nil +} + +// Failover implements Agent interface +func (agent *SQLAgentImpl) Failover(instance *inst.InstanceKey) error { + var memberUUID string + query := `select member_id + from performance_schema.replication_group_members + where member_host=convert(@@hostname using ascii) and member_port=@@port and member_state='ONLINE'` + err := fetchInstance(instance, query, func(m sqlutils.RowMap) error { + memberUUID = m.GetString("member_id") + if memberUUID == "" { + return fmt.Errorf("unable to find member_id on %v", instance.Hostname) + } + return nil + }) + if err != nil { + return err + } + cmd := fmt.Sprintf(`select group_replication_set_as_primary('%s')`, memberUUID) + if err := execInstance(instance, cmd); err != nil { + return err + } + return nil +} + +// FetchGroupView implements Agent interface +func (agent *SQLAgentImpl) FetchGroupView(alias string, instanceKey *inst.InstanceKey) (*GroupView, error) { + view := NewGroupView(alias, instanceKey.Hostname, instanceKey.Port) + var groupName string + var isReadOnly bool + query := `select + @@group_replication_group_name as group_name, + @@super_read_only as read_only, + member_host, member_port, member_state, member_role + from performance_schema.replication_group_members` + err := fetchInstance(instanceKey, query, func(m sqlutils.RowMap) error { + if groupName == "" { + groupName = m.GetString("group_name") + } + host := m.GetString("member_host") + port := m.GetInt("member_port") + isReadOnly = m.GetBool("read_only") + unresolvedMember := NewGroupMember( + m.GetString("member_state"), + m.GetString("member_role"), + host, + port, + false) + // readOnly is used to re-enable write after we set primary to read_only to protect the shard when there is + // less than desired number of nodes + // the default value is false because if the node is reachable and read_only, it will get override by the OR op + // if the host is unreachable, we don't need to trigger the protection for it therefore assume the it's writable + if host == instanceKey.Hostname && port == instanceKey.Port && isReadOnly { + unresolvedMember.ReadOnly = true + } + view.UnresolvedMembers = append(view.UnresolvedMembers, unresolvedMember) + return nil + }) + view.GroupName = groupName + if err != nil { + return nil, err + } + return view, nil +} + +// GetPrimaryView returns the view of primary member +func (view *GroupView) GetPrimaryView() (string, int, bool) { + for _, member := range view.UnresolvedMembers { + if member.Role == PRIMARY { + return member.HostName, member.Port, member.State == ONLINE + } + } + return "", 0, false +} + +func (agent *SQLAgentImpl) getGroupNameAndMemberState(instanceKey *inst.InstanceKey) (string, string, error) { + // If there is an instance that is unreachable but we still have quorum, GR will remove it from + // the replication_group_members and Failover if it is the primary node + // If the state becomes UNREACHABLE it indicates there is a network partition inside the group + // https://dev.mysql.com/doc/refman/8.0/en/group-replication-network-partitioning.html + // And then eventually if the node does not recover, the group will transit into ERROR state + // VTGR cannot handle this case, therefore we raise error here + var name, state string + query := `select @@group_replication_group_name as group_name` + err := fetchInstance(instanceKey, query, func(m sqlutils.RowMap) error { + name = m.GetString("group_name") + return nil + }) + if err != nil { + return "", "", err + } + query = `select member_state + from performance_schema.replication_group_members + where member_host=convert(@@hostname using ascii) and member_port=@@port` + err = fetchInstance(instanceKey, query, func(m sqlutils.RowMap) error { + state = m.GetString("member_state") + if state == "" { + state = inst.GroupReplicationMemberStateOffline + } + return nil + }) + if err != nil { + return "", "", err + } + return name, state, nil +} + +// FetchApplierGTIDSet implements Agent interface +func (agent *SQLAgentImpl) FetchApplierGTIDSet(instanceKey *inst.InstanceKey) (mysql.GTIDSet, error) { + var gtidSet string + // TODO: should we also take group_replication_recovery as well? + query := `select gtid_subtract(concat(received_transaction_set, ',', @@global.gtid_executed), '') as gtid_set + from performance_schema.replication_connection_status + where channel_name='group_replication_applier'` + err := fetchInstance(instanceKey, query, func(m sqlutils.RowMap) error { + // If the instance has no committed transaction, gtidSet will be empty string + gtidSet = m.GetString("gtid_set") + return nil + }) + if err != nil { + return nil, err + } + pos, err := mysql.ParsePosition(agent.dbFlavor, gtidSet) + if err != nil { + return nil, err + } + return pos.GTIDSet, nil +} + +// execInstance executes a given query on the given MySQL discovery instance +func execInstance(instanceKey *inst.InstanceKey, query string, args ...interface{}) error { + if err := verifyInstance(instanceKey); err != nil { + return err + } + sqlDb, err := db.OpenDiscovery(instanceKey.Hostname, instanceKey.Port) + if err != nil { + log.Errorf("error exec %v: %v", query, err) + return err + } + _, err = sqlutils.ExecNoPrepare(sqlDb, query, args...) + return err +} + +// execInstanceWithTopo executes a given query on the given MySQL topology instance +func execInstanceWithTopo(instanceKey *inst.InstanceKey, query string, args ...interface{}) error { + if err := verifyInstance(instanceKey); err != nil { + return err + } + sqlDb, err := db.OpenTopology(instanceKey.Hostname, instanceKey.Port) + if err != nil { + log.Errorf("error exec %v: %v", query, err) + return err + } + _, err = sqlutils.ExecNoPrepare(sqlDb, query, args...) + return err +} + +// fetchInstance fetches result from mysql +func fetchInstance(instanceKey *inst.InstanceKey, query string, onRow func(sqlutils.RowMap) error) error { + if err := verifyInstance(instanceKey); err != nil { + return err + } + sqlDb, err := db.OpenDiscovery(instanceKey.Hostname, instanceKey.Port) + if err != nil { + return err + } + return sqlutils.QueryRowsMap(sqlDb, query, onRow) +} + +// The hostname and port can be empty if a tablet crashed and did not populate them in +// the topo server. We treat them as if the host is unreachable when we calculate the +// quorum for the shard. +func verifyInstance(instanceKey *inst.InstanceKey) error { + if instanceKey.Hostname == "" || instanceKey.Port == 0 { + return ErrInvalidInstance + } + return nil +} + +// CreateInstanceKey returns an InstanceKey based on group member input +// When the group is init for the first time, the hostname and port are not set, e.g., +// +---------------------------+-----------+-------------+-------------+--------------+-------------+ +// | CHANNEL_NAME | MEMBER_ID | MEMBER_HOST | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | +// +---------------------------+-----------+-------------+-------------+--------------+-------------+ +// | group_replication_applier | | | NULL | OFFLINE | | +// +---------------------------+-----------+-------------+-------------+--------------+-------------+ +// therefore we substitute with view's local hostname and port +func (view *GroupView) CreateInstanceKey(member *GroupMember) inst.InstanceKey { + if member.HostName == "" && member.Port == 0 { + return inst.InstanceKey{ + Hostname: view.MySQLHost, + Port: view.MySQLPort, + } + } + return inst.InstanceKey{ + Hostname: member.HostName, + Port: member.Port, + } +} + +// ToString make string for group view +func (view *GroupView) ToString() string { + var sb strings.Builder + sb.WriteString(fmt.Sprintf("group_name:%v\n", view.GroupName)) + for _, m := range view.UnresolvedMembers { + sb.WriteString(fmt.Sprintf("host:%v:%v | role:%v | state:%v\n", m.HostName, m.Port, m.Role, m.State)) + } + return sb.String() +} + +func (state MemberState) String() string { + switch state { + case ONLINE: + return inst.GroupReplicationMemberStateOnline + case ERROR: + return inst.GroupReplicationMemberStateError + case RECOVERING: + return inst.GroupReplicationMemberStateRecovering + case OFFLINE: + return inst.GroupReplicationMemberStateOffline + case UNREACHABLE: + return inst.GroupReplicationMemberStateUnreachable + } + return "UNKNOWN" +} + +func toMemberState(state string) MemberState { + switch state { + case inst.GroupReplicationMemberStateOnline: + return ONLINE + case inst.GroupReplicationMemberStateError: + return ERROR + case inst.GroupReplicationMemberStateRecovering: + return RECOVERING + case inst.GroupReplicationMemberStateOffline: + return OFFLINE + case inst.GroupReplicationMemberStateUnreachable: + return UNREACHABLE + default: + return UNKNOWNSTATE + } +} + +func (role MemberRole) String() string { + switch role { + case PRIMARY: + return inst.GroupReplicationMemberRolePrimary + case SECONDARY: + return inst.GroupReplicationMemberRoleSecondary + } + return "UNKNOWN" +} + +func toMemberRole(role string) MemberRole { + switch role { + case inst.GroupReplicationMemberRolePrimary: + return PRIMARY + case inst.GroupReplicationMemberRoleSecondary: + return SECONDARY + default: + return UNKNOWNROLE + } +} diff --git a/go/vt/vtgr/plugin_consultopo.go b/go/vt/vtgr/plugin_consultopo.go new file mode 100644 index 00000000000..3786fd59c26 --- /dev/null +++ b/go/vt/vtgr/plugin_consultopo.go @@ -0,0 +1,23 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreedto in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +// This plugin imports consultopo to register the consul implementation of TopoServer. + +import ( + _ "vitess.io/vitess/go/vt/topo/consultopo" +) diff --git a/go/vt/vtgr/plugin_etcd2topo.go b/go/vt/vtgr/plugin_etcd2topo.go new file mode 100644 index 00000000000..0f9c385f69b --- /dev/null +++ b/go/vt/vtgr/plugin_etcd2topo.go @@ -0,0 +1,23 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +// This plugin imports etcd2topo to register the etcd2 implementation of TopoServer. + +import ( + _ "vitess.io/vitess/go/vt/topo/etcd2topo" +) diff --git a/go/vt/vtgr/plugin_grpctmclient.go b/go/vt/vtgr/plugin_grpctmclient.go new file mode 100644 index 00000000000..529c560c207 --- /dev/null +++ b/go/vt/vtgr/plugin_grpctmclient.go @@ -0,0 +1,23 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +// Imports and register the gRPC tabletmanager client + +import ( + _ "vitess.io/vitess/go/vt/vttablet/grpctmclient" +) diff --git a/go/vt/vtgr/plugin_zk2topo.go b/go/vt/vtgr/plugin_zk2topo.go new file mode 100644 index 00000000000..f524fd0e21a --- /dev/null +++ b/go/vt/vtgr/plugin_zk2topo.go @@ -0,0 +1,23 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreedto in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +// Imports and register the zk2 TopologyServer + +import ( + _ "vitess.io/vitess/go/vt/topo/zk2topo" +) diff --git a/go/vt/vtgr/vtgr.go b/go/vt/vtgr/vtgr.go new file mode 100644 index 00000000000..a520044d9fb --- /dev/null +++ b/go/vt/vtgr/vtgr.go @@ -0,0 +1,174 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vtgr + +import ( + "flag" + "strings" + "sync" + "time" + + "vitess.io/vitess/go/vt/vtgr/config" + + "vitess.io/vitess/go/vt/vtgr/db" + + "golang.org/x/net/context" + + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/vtgr/controller" + "vitess.io/vitess/go/vt/vttablet/tmclient" +) + +var ( + refreshInterval = flag.Duration("refresh_interval", 10*time.Second, "refresh interval to load tablets") + scanInterval = flag.Duration("scan_interval", 3*time.Second, "scan interval to diagnose and repair") + scanAndRepairTimeout = flag.Duration("scan_repair_timeout", 3*time.Second, "time to wait for a Diagnose and repair operation") + vtgrConfigFile = flag.String("vtgr_config", "", "config file for vtgr") + + localDbPort = flag.Int("db_port", 0, "local mysql port, set this to enable local fast check") +) + +// VTGR is the interface to manage the component to set up group replication with Vitess. +// The main goal of it is to reconcile MySQL group and the Vitess topology. +// Caller should use OpenTabletDiscovery to create the VTGR instance. +type VTGR struct { + // Shards are all the shards that a VTGR is monitoring. + // Caller can choose to iterate the shards to scan and repair for more granular control (e.g., stats report) + // instead of calling ScanAndRepair() directly. + Shards []*controller.GRShard + topo controller.GRTopo + tmc tmclient.TabletManagerClient + ctx context.Context +} + +func newVTGR(ctx context.Context, ts controller.GRTopo, tmc tmclient.TabletManagerClient) *VTGR { + return &VTGR{ + topo: ts, + tmc: tmc, + ctx: ctx, + } +} + +// OpenTabletDiscovery opens connection with topo server +// and triggers the first round of controller based on specified cells and keyspace/shards. +func OpenTabletDiscovery(ctx context.Context, cellsToWatch, clustersToWatch []string) *VTGR { + if *vtgrConfigFile == "" { + log.Fatal("vtgr_config is required") + } + config, err := config.ReadVTGRConfig(*vtgrConfigFile) + if err != nil { + log.Fatalf("Cannot load vtgr config file: %v", err) + } + vtgr := newVTGR( + ctx, + topo.Open(), + tmclient.NewTabletManagerClient(), + ) + var shards []*controller.GRShard + ctx, cancel := context.WithTimeout(vtgr.ctx, *topo.RemoteOperationTimeout) + defer cancel() + for _, ks := range clustersToWatch { + if strings.Contains(ks, "/") { + // This is a keyspace/shard specification + input := strings.Split(ks, "/") + shards = append(shards, controller.NewGRShard(input[0], input[1], cellsToWatch, vtgr.tmc, vtgr.topo, db.NewVTGRSqlAgent(), config, *localDbPort)) + } else { + // Assume this is a keyspace and find all shards in keyspace + shardNames, err := vtgr.topo.GetShardNames(ctx, ks) + if err != nil { + // Log the error and continue + log.Errorf("Error fetching shards for keyspace %v: %v", ks, err) + continue + } + if len(shardNames) == 0 { + log.Errorf("Topo has no shards for ks: %v", ks) + continue + } + for _, s := range shardNames { + shards = append(shards, controller.NewGRShard(ks, s, cellsToWatch, vtgr.tmc, vtgr.topo, db.NewVTGRSqlAgent(), config, *localDbPort)) + } + } + } + vtgr.Shards = shards + log.Infof("Monitoring shards size %v", len(vtgr.Shards)) + // Force refresh all tablet here to populate data for vtgr + var wg sync.WaitGroup + for _, shard := range vtgr.Shards { + wg.Add(1) + go func(shard *controller.GRShard) { + defer wg.Done() + shard.UpdateTabletsInShardWithLock(ctx) + }(shard) + } + wg.Wait() + log.Info("Ready to start VTGR") + return vtgr +} + +// RefreshCluster get the latest tablets from topo server +func (vtgr *VTGR) RefreshCluster() { + for _, shard := range vtgr.Shards { + go func(shard *controller.GRShard) { + ticker := time.Tick(*refreshInterval) + for range ticker { + ctx, cancel := context.WithTimeout(vtgr.ctx, *refreshInterval) + shard.UpdateTabletsInShardWithLock(ctx) + cancel() + } + }(shard) + } +} + +// ScanAndRepair starts the scanAndFix routine +func (vtgr *VTGR) ScanAndRepair() { + for _, shard := range vtgr.Shards { + go func(shard *controller.GRShard) { + ticker := time.Tick(*scanInterval) + for range ticker { + func() { + ctx, cancel := context.WithTimeout(vtgr.ctx, *scanAndRepairTimeout) + defer cancel() + log.Infof("Start scan and repair %v/%v", shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard) + shard.ScanAndRepairShard(ctx) + log.Infof("Finished scan and repair %v/%v", shard.KeyspaceShard.Keyspace, shard.KeyspaceShard.Shard) + }() + } + }(shard) + } +} + +// Diagnose exposes the endpoint to diagnose a particular shard +func (vtgr *VTGR) Diagnose(ctx context.Context, shard *controller.GRShard) (controller.DiagnoseType, error) { + return shard.Diagnose(ctx) +} + +// Repair exposes the endpoint to repair a particular shard +func (vtgr *VTGR) Repair(ctx context.Context, shard *controller.GRShard, diagnose controller.DiagnoseType) (controller.RepairResultCode, error) { + return shard.Repair(ctx, diagnose) +} + +// GetCurrentShardStatuses is used when we want to know what VTGR observes +// it contains information about a list of instances and primary tablet +func (vtgr *VTGR) GetCurrentShardStatuses() []controller.ShardStatus { + var result []controller.ShardStatus + for _, shard := range vtgr.Shards { + status := shard.GetCurrentShardStatuses() + result = append(result, status) + } + return result +}