From 96d5608f0b7b4f0a7ddc5e316f7731b1cb7cf1c4 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Thu, 19 Nov 2020 23:31:50 +0100 Subject: [PATCH 01/26] Initial commit for switching reads and writes independently Signed-off-by: Rohit Nayak --- go/vt/vtctl/vtctl.go | 53 +++++-- go/vt/wrangler/switcher.go | 8 +- go/vt/wrangler/switcher_dry_run.go | 4 +- go/vt/wrangler/switcher_interface.go | 4 +- go/vt/wrangler/traffic_switcher.go | 181 +++++++++++++++++++----- go/vt/wrangler/traffic_switcher_test.go | 114 +++++++-------- test/local_example.sh | 2 +- 7 files changed, 254 insertions(+), 112 deletions(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 28552317efd..f5db8915b75 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -340,7 +340,7 @@ var commands = []commandGroup{ "[-cells=c1,c2,...] [-reverse] -tablet_type={replica|rdonly} [-dry-run] ", "Switch read traffic for the specified workflow."}, {"SwitchWrites", commandSwitchWrites, - "[-filtered_replication_wait_time=30s] [-cancel] [-reverse_replication=true] [-dry-run] ", + "[-timeout=30s] [-reverse] [-reverse_replication=true] [-dry-run] ", "Switch write traffic for the specified workflow."}, {"CancelResharding", commandCancelResharding, "", @@ -2140,18 +2140,29 @@ func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { reverse := subFlags.Bool("reverse", false, "Moves the served tablet type backward instead of forward.") cellsStr := subFlags.String("cells", "", "Specifies a comma-separated list of cells to update") - tabletType := subFlags.String("tablet_type", "", "Tablet type (replica or rdonly)") + tabletTypes := subFlags.String("tablet_types", "rdonly,replica", "Tablet types to switch one or both or rdonly/replica") + deprecatedTabletType := subFlags.String("tablet_type", "", "(DEPRECATED) one of rdonly/replica") dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") if err := subFlags.Parse(args); err != nil { return err } - if *tabletType == "" { - return fmt.Errorf("-tablet_type must be specified") + if !(*deprecatedTabletType == "" || *deprecatedTabletType == "replica" || *deprecatedTabletType == "rdonly") { + return fmt.Errorf("invalid value specified for -tablet_type: %s", *deprecatedTabletType) } - servedType, err := parseTabletType(*tabletType, []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}) - if err != nil { - return err + + if *deprecatedTabletType != "" { + *tabletTypes = *deprecatedTabletType + } + + tabletTypesArr := strings.Split(*tabletTypes, ",") + var servedTypes []topodatapb.TabletType + for _, tabletType := range tabletTypesArr { + servedType, err := parseTabletType(tabletType, []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}) + if err != nil { + return err + } + servedTypes = append(servedTypes, servedType) } var cells []string if *cellsStr != "" { @@ -2168,8 +2179,13 @@ func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl if err != nil { return err } + /* + if strings.HasSuffix(workflow, "_reverse") { + return fmt.Errorf("workflow cannot end with _reverse, it is reserved for vreplication to create a reverse workflow") + } - dryRunResults, err := wr.SwitchReads(ctx, keyspace, workflow, servedType, cells, direction, *dryRun) + */ + dryRunResults, err := wr.SwitchReads(ctx, keyspace, workflow, servedTypes, cells, direction, *dryRun) if err != nil { return err } @@ -2181,9 +2197,11 @@ func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } func commandSwitchWrites(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { - filteredReplicationWaitTime := subFlags.Duration("filtered_replication_wait_time", 30*time.Second, "Specifies the maximum time to wait, in seconds, for filtered replication to catch up on master migrations. The migration will be aborted on timeout.") + timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") + filteredReplicationWaitTime := subFlags.Duration("filtered_replication_wait_time", 30*time.Second, "DEPRECATED Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") - cancelMigrate := subFlags.Bool("cancel", false, "Cancel the failed migration and serve from source") + cancel := subFlags.Bool("cancel", false, "Cancel the failed migration and serve from source") + reverse := subFlags.Bool("reverse", false, "Reverse a previous SwitchWrites serve from source") dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchWrites and only reports the actions to be taken") if err := subFlags.Parse(args); err != nil { return err @@ -2196,8 +2214,19 @@ func commandSwitchWrites(ctx context.Context, wr *wrangler.Wrangler, subFlags *f if err != nil { return err } + /* + TODO: uncomment for subsequent release + if strings.HasSuffix(workflow, "_reverse") { + return fmt.Errorf("workflow cannot end with _reverse, it is reserved for vreplication to create a reverse workflow") + } + + */ + + if filteredReplicationWaitTime != timeout { + timeout = filteredReplicationWaitTime + } - journalID, dryRunResults, err := wr.SwitchWrites(ctx, keyspace, workflow, *filteredReplicationWaitTime, *cancelMigrate, *reverseReplication, *dryRun) + journalID, dryRunResults, err := wr.SwitchWrites(ctx, keyspace, workflow, *timeout, *cancel, *reverse, *reverseReplication, *dryRun) if err != nil { return err } @@ -3035,7 +3064,7 @@ func commandWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag. return err } if subFlags.NArg() != 2 { - return fmt.Errorf("usage: Workflow --dry-run keyspace.workflow start/stop/delete/list/listall") + return fmt.Errorf("usage: Workflow --dry-run keyspace[.workflow] start/stop/delete/list/listall") } keyspace := subFlags.Arg(0) action := strings.ToLower(subFlags.Arg(1)) diff --git a/go/vt/wrangler/switcher.go b/go/vt/wrangler/switcher.go index e721211bf5f..a19c084c433 100644 --- a/go/vt/wrangler/switcher.go +++ b/go/vt/wrangler/switcher.go @@ -49,12 +49,12 @@ func (r *switcher) dropSourceShards(ctx context.Context) error { return r.ts.dropSourceShards(ctx) } -func (r *switcher) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { - return r.ts.switchShardReads(ctx, cells, servedType, direction) +func (r *switcher) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + return r.ts.switchShardReads(ctx, cells, servedTypes, direction) } -func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { - return r.ts.switchTableReads(ctx, cells, servedType, direction) +func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + return r.ts.switchTableReads(ctx, cells, servedTypes, direction) } func (r *switcher) startReverseVReplication(ctx context.Context) error { diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index f80245bfd23..b29ddc4e280 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -37,7 +37,7 @@ type switcherDryRun struct { ts *trafficSwitcher } -func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { sourceShards := make([]string, 0) targetShards := make([]string, 0) for _, source := range dr.ts.sources { @@ -58,7 +58,7 @@ func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, return nil } -func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { ks := dr.ts.targetKeyspace if direction == DirectionBackward { ks = dr.ts.sourceKeyspace diff --git a/go/vt/wrangler/switcher_interface.go b/go/vt/wrangler/switcher_interface.go index 77f40904e92..2c09fad9e1c 100644 --- a/go/vt/wrangler/switcher_interface.go +++ b/go/vt/wrangler/switcher_interface.go @@ -37,8 +37,8 @@ type iswitcher interface { changeRouting(ctx context.Context) error streamMigraterfinalize(ctx context.Context, ts *trafficSwitcher, workflows []string) error startReverseVReplication(ctx context.Context) error - switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error - switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error + switchTableReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error + switchShardReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error validateWorkflowHasCompleted(ctx context.Context) error removeSourceTables(ctx context.Context, removalType TableRemovalType) error dropSourceShards(ctx context.Context) error diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index a1ed6bc447e..4f2469831fb 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -126,17 +126,115 @@ type tsSource struct { journaled bool } -// SwitchReads is a generic way of switching read traffic for a resharding workflow. -func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedType topodatapb.TabletType, cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { - if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { - return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) - } +const ( + workflowTypeReshard = "Reshard" + workflowTypeMoveTables = "MoveTables" + workflowTypeMaterialize = "Materialize" +) + +type workflowState struct { + Workflow string + SourceKeyspace string + TargetKeyspace string + WorkflowType string + ReplicaReadsSwitched bool + RdonlyReadsSwitched bool + WritesSwitched bool +} + +func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, *workflowState, error) { + ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) if err != nil { wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + return nil, nil, err + } + ws.SourceKeyspace = ts.sourceKeyspace + if ts.frozen { + ws.WritesSwitched = true + } + if ts.migrationType == binlogdatapb.MigrationType_TABLES { + ws.WorkflowType = workflowTypeMoveTables + rules, err := wr.getRoutingRules(ctx) + if err != nil { + return nil, nil, err + } + getKeyspace := func(ruleTarget string) (string, error) { + arr := strings.Split(ruleTarget, ".") + if len(arr) != 2 { + return "", fmt.Errorf("rule target is not correctly formatted: %s", ruleTarget) + } + return arr[0], nil + } + // we assume a consistent state, so only choose routing rule for one table for replica/rdonly + if len(ts.tables) == 0 { + return nil, nil, fmt.Errorf("no tables in workflow %s.%s", targetKeyspace, workflow) + + } + table := ts.tables[0] + + ruleTargets, ok := rules[table] + if len(ruleTargets) == 0 || !ok { + return nil, nil, fmt.Errorf("no rule defined for table %s", table) + } + tableKs, err := getKeyspace(ruleTargets[0]) + if err != nil { + return nil, nil, err + } + var replicaKs, rdonlyKs string + ruleTargets, ok = rules[table+"@replica"] + if !ok { + replicaKs = tableKs + } else { + replicaKs, err = getKeyspace(ruleTargets[0]) + if err != nil { + return nil, nil, err + } + } + ruleTargets, ok = rules[table+"@rdonly"] + if !ok { + rdonlyKs = tableKs + } else { + rdonlyKs, err = getKeyspace(ruleTargets[0]) + if err != nil { + return nil, nil, err + } + } + ws.RdonlyReadsSwitched = rdonlyKs == ts.targetKeyspace + ws.ReplicaReadsSwitched = replicaKs == ts.targetKeyspace + } else { + ws.WorkflowType = workflowTypeReshard + tks, err := wr.ts.GetKeyspace(ctx, targetKeyspace) + if err != nil { + return nil, nil, err + } + ws.ReplicaReadsSwitched = tks.GetServedFrom(topodatapb.TabletType_REPLICA) != nil + ws.RdonlyReadsSwitched = tks.GetServedFrom(topodatapb.TabletType_RDONLY) != nil + } + + return ts, ws, nil +} + +// SwitchReads is a generic way of switching read traffic for a resharding workflow. +func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedTypes []topodatapb.TabletType, cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + if err != nil { + wr.Logger().Errorf("getWorkflowState failed: %v", err) return nil, err } + for _, servedType := range servedTypes { + if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { + return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) + } + if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && !ws.ReplicaReadsSwitched { + return nil, fmt.Errorf("requesting reversal of SwitchReads for REPLICAs but REPLICA reads have not been switched") + } + if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && !ws.RdonlyReadsSwitched { + return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") + } + } + //If journals exist notify user and fail journalsExist, _, err := ts.checkJournals(ctx) if err != nil { @@ -145,7 +243,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } if journalsExist { wr.Logger().Errorf("Found a previous journal entry for %d", ts.id) - return nil, fmt.Errorf("found an entry from a previous run for migration id %d in _vt.resharding_journal, please review and delete it before proceeding", ts.id) + //return nil, fmt.Errorf("found an entry from a previous run for migration id %d in _vt.resharding_journal, please review and delete it before proceeding", ts.id) } var sw iswitcher @@ -172,13 +270,13 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st defer unlock(&err) if ts.migrationType == binlogdatapb.MigrationType_TABLES { - if err := sw.switchTableReads(ctx, cells, servedType, direction); err != nil { + if err := sw.switchTableReads(ctx, cells, servedTypes, direction); err != nil { ts.wr.Logger().Errorf("switchTableReads failed: %v", err) return nil, err } return sw.logs(), nil } - if err := ts.switchShardReads(ctx, cells, servedType, direction); err != nil { + if err := ts.switchShardReads(ctx, cells, servedTypes, direction); err != nil { ts.wr.Logger().Errorf("switchShardReads failed: %v", err) return nil, err } @@ -186,13 +284,21 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. -func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, filteredReplicationWaitTime time.Duration, cancelMigrate, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { - ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) +func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, timeout time.Duration, cancel, reverse, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + _ = ws if err != nil { - wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + wr.Logger().Errorf("getWorkflowState failed: %v", err) return 0, nil, err } + if reverse { + if !ws.WritesSwitched { + return 0, nil, fmt.Errorf("-reverse was passed but writes have not yet been switched for %s.%s", ws.TargetKeyspace, ws.Workflow) + } + workflow = reverseName(workflow) + } + var sw iswitcher if dryRun { sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} @@ -237,12 +343,12 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } if !journalsExist { ts.wr.Logger().Infof("No previous journals were found. Proceeding normally.") - sm, err := buildStreamMigrater(ctx, ts, cancelMigrate) + sm, err := buildStreamMigrater(ctx, ts, cancel) if err != nil { ts.wr.Logger().Errorf("buildStreamMigrater failed: %v", err) return 0, nil, err } - if cancelMigrate { + if cancel { sw.cancelMigration(ctx, sm) return 0, sw.logs(), nil } @@ -266,7 +372,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } ts.wr.Logger().Infof("Waiting for streams to catchup") - if err := sw.waitForCatchup(ctx, filteredReplicationWaitTime); err != nil { + if err := sw.waitForCatchup(ctx, timeout); err != nil { ts.wr.Logger().Errorf("waitForCatchup failed: %v", err) sw.cancelMigration(ctx, sm) return 0, nil, err @@ -286,7 +392,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s return 0, nil, err } } else { - if cancelMigrate { + if cancel { err := fmt.Errorf("traffic switching has reached the point of no return, cannot cancel") ts.wr.Logger().Errorf("%v", err) return 0, nil, err @@ -648,7 +754,7 @@ func (ts *trafficSwitcher) compareShards(ctx context.Context, keyspace string, s return nil } -func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return err @@ -658,16 +764,18 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, // targetKeyspace.table -> sourceKeyspace.table // For forward migration, we add tablet type specific rules to redirect traffic to the target. // For backward, we delete them. - tt := strings.ToLower(servedType.String()) - for _, table := range ts.tables { - if direction == DirectionForward { - rules[table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - } else { - delete(rules, table+"@"+tt) - delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + for _, servedType := range servedTypes { + tt := strings.ToLower(servedType.String()) + for _, table := range ts.tables { + if direction == DirectionForward { + rules[table+"@"+tt] = []string{ts.targetKeyspace + "." + table} + rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} + rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} + } else { + delete(rules, table+"@"+tt) + delete(rules, ts.targetKeyspace+"."+table+"@"+tt) + delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + } } } if err := ts.wr.saveRoutingRules(ctx, rules); err != nil { @@ -676,21 +784,26 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, return ts.wr.ts.RebuildSrvVSchema(ctx, cells) } -func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, servedType topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { var fromShards, toShards []*topo.ShardInfo if direction == DirectionForward { fromShards, toShards = ts.sourceShards(), ts.targetShards() } else { fromShards, toShards = ts.targetShards(), ts.sourceShards() } - - if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, fromShards, cells, servedType, true /* isFrom */, false /* clearSourceShards */); err != nil { - return err - } - if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, toShards, cells, servedType, false, false); err != nil { - return err + for _, servedType := range servedTypes { + if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, fromShards, cells, servedType, true /* isFrom */, false /* clearSourceShards */); err != nil { + return err + } + if err := ts.wr.updateShardRecords(ctx, ts.sourceKeyspace, toShards, cells, servedType, false, false); err != nil { + return err + } + err := ts.wr.ts.MigrateServedType(ctx, ts.sourceKeyspace, toShards, fromShards, servedType, cells) + if err != nil { + return err + } } - return ts.wr.ts.MigrateServedType(ctx, ts.sourceKeyspace, toShards, fromShards, servedType, cells) + return nil } func (wr *Wrangler) checkIfJournalExistsOnTablet(ctx context.Context, tablet *topodatapb.Tablet, migrationID int64) (*binlogdatapb.Journal, bool, error) { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index 8e80c03a362..7b76b1fe612 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -86,7 +86,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell RDONLY migration. - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, []string{"cell1"}, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, []string{"cell1"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -117,7 +117,7 @@ func TestTableMigrateMainflow(t *testing.T) { // So, adding routes for replica and deploying to cell2 will also cause // cell2 to switch rdonly. This is a quirk that can be fixed later if necessary. // TODO(sougou): check if it's worth fixing, or clearly document the quirk. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -156,7 +156,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell backward REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -177,7 +177,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Switch all REPLICA. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -204,7 +204,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -225,7 +225,7 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -239,7 +239,7 @@ func TestTableMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch master with SwitchReads. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_MASTER, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_MASTER}, nil, DirectionForward, false) want := "tablet type must be REPLICA or RDONLY: MASTER" if err == nil || err.Error() != want { t.Errorf("SwitchReads(master) err: %v, want %v", err, want) @@ -248,7 +248,7 @@ func TestTableMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want = "missing tablet type specific routing, read-only traffic must be switched before switching writes" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -260,12 +260,12 @@ func TestTableMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() // Switch all the reads first. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -316,7 +316,7 @@ func TestTableMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -432,7 +432,7 @@ func TestTableMigrateMainflow(t *testing.T) { } deleteTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -470,7 +470,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell RDONLY migration. - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, []string{"cell1"}, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, []string{"cell1"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -487,7 +487,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Other cell REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -504,7 +504,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell backward REPLICA migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, []string{"cell2"}, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, []string{"cell2"}, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -524,7 +524,7 @@ func TestShardMigrateMainflow(t *testing.T) { // This is an extra step that does not exist in the tables test. // The per-cell migration mechanism is different for tables. So, this // extra step is needed to bring things in sync. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -537,7 +537,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Switch all REPLICA. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -550,7 +550,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // All cells RDONLY backward migration. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionBackward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionBackward, false) if err != nil { t.Fatal(err) } @@ -562,7 +562,7 @@ func TestShardMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch master with SwitchReads. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_MASTER, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_MASTER}, nil, DirectionForward, false) want := "tablet type must be REPLICA or RDONLY: MASTER" if err == nil || err.Error() != want { t.Errorf("SwitchReads(master) err: %v, want %v", err, want) @@ -571,7 +571,7 @@ func TestShardMigrateMainflow(t *testing.T) { //------------------------------------------------------------------------------------------------------------------- // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want = "cannot switch MASTER away" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -582,7 +582,7 @@ func TestShardMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() // Switch all the reads first. - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -633,7 +633,7 @@ func TestShardMigrateMainflow(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) want = "DeadlineExceeded" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -722,7 +722,7 @@ func TestShardMigrateMainflow(t *testing.T) { } freezeTargetVReplication() - journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + journalID, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -751,12 +751,12 @@ func TestTableMigrateOneToMany(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -822,7 +822,7 @@ func TestTableMigrateOneToMany(t *testing.T) { require.Error(t, err, "Workflow has not completed, cannot DropSources") tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) if err != nil { t.Fatal(err) } @@ -946,15 +946,15 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { "Unlock keyspace ks1", } tme.expectNoPreviousJournals() - dryRunResults, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, true) + dryRunResults, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunReads, *dryRunResults)) tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) require.NoError(t, err) tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) require.NoError(t, err) verifyQueries(t, tme.allDBClients) @@ -1021,7 +1021,7 @@ func TestTableMigrateOneToManyDryRun(t *testing.T) { } deleteTargetVReplication() - _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true) + _, results, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunWrites, *results)) } @@ -1034,12 +1034,12 @@ func TestMigrateFailJournal(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) require.NoError(t, err) // mi.checkJournals @@ -1106,7 +1106,7 @@ func TestMigrateFailJournal(t *testing.T) { tme.dbSourceClients[0].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) tme.dbSourceClients[1].addQueryRE("insert into _vt.resharding_journal", nil, errors.New("journaling intentionally failed")) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "journaling intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites(0 timeout) err: %v, must contain %v", err, want) @@ -1130,12 +1130,12 @@ func TestTableMigrateJournalExists(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1167,7 +1167,7 @@ func TestTableMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 1", stoppedResult(1), nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -1194,12 +1194,12 @@ func TestShardMigrateJournalExists(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1231,7 +1231,7 @@ func TestShardMigrateJournalExists(t *testing.T) { tme.dbTargetClients[1].addQuery("update _vt.vreplication set message = 'FROZEN' where id in (2)", &sqltypes.Result{}, nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -1254,12 +1254,12 @@ func TestTableMigrateCancel(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1292,7 +1292,7 @@ func TestTableMigrateCancel(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) if err != nil { t.Fatal(err) } @@ -1313,12 +1313,12 @@ func TestTableMigrateCancelDryRun(t *testing.T) { } tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1351,7 +1351,7 @@ func TestTableMigrateCancelDryRun(t *testing.T) { } cancelMigration() - _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, true) + _, dryRunResults, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(want, *dryRunResults)) } @@ -1362,12 +1362,12 @@ func TestTableMigrateNoReverse(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1449,7 +1449,7 @@ func TestTableMigrateNoReverse(t *testing.T) { } deleteTargetVReplication() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, false, false) if err != nil { t.Fatal(err) } @@ -1462,13 +1462,13 @@ func TestMigrateFrozen(t *testing.T) { defer tme.stopTablets(t) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1491,7 +1491,7 @@ func TestMigrateFrozen(t *testing.T) { tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) want := "cannot switch reads while SwitchWrites is in progress" if err == nil || err.Error() != want { t.Errorf("SwitchReads(frozen) err: %v, want %v", err, want) @@ -1504,7 +1504,7 @@ func TestMigrateFrozen(t *testing.T) { ), nil) tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) if err != nil { t.Fatal(err) } @@ -1520,7 +1520,7 @@ func TestMigrateNoStreamsFound(t *testing.T) { tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "no streams found in keyspace ks2 for: test" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1552,7 +1552,7 @@ func TestMigrateDistinctSources(t *testing.T) { ), nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "source keyspaces are mismatched across streams" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1582,7 +1582,7 @@ func TestMigrateMismatchedTables(t *testing.T) { ) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "table lists are mismatched across streams" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1597,7 +1597,7 @@ func TestTableMigrateAllShardsNotPresent(t *testing.T) { tme.dbTargetClients[0].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "mismatched shards for keyspace" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) @@ -1655,7 +1655,7 @@ func TestMigrateNoTableWildcards(t *testing.T) { fmt.Sprintf("1|%v|||", bls3), ), nil) tme.expectNoPreviousJournals() - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) want := "cannot migrate streams with wild card table names" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) diff --git a/test/local_example.sh b/test/local_example.sh index 43ca3d0e6e8..daf0df71990 100755 --- a/test/local_example.sh +++ b/test/local_example.sh @@ -44,7 +44,7 @@ for shard in "customer/0"; do done; ./202_move_tables.sh - +exit sleep 3 # required for now ./203_switch_reads.sh From 8c1d6d131f37c985f9482be140a715c9349d9c00 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 23 Nov 2020 18:54:33 +0100 Subject: [PATCH 02/26] Added tests for new sr/sw flows. Remove validations no longer applicable. Setup routing rules for new flow Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/cluster.go | 2 +- go/test/endtoend/vreplication/helper.go | 17 ++ .../uniform_switch_read_writes_test.go | 227 ++++++++++++++++++ .../vreplication/vreplication_test.go | 24 +- go/vt/wrangler/materializer.go | 4 + go/vt/wrangler/traffic_switcher.go | 35 +-- 6 files changed, 279 insertions(+), 30 deletions(-) create mode 100644 go/test/endtoend/vreplication/uniform_switch_read_writes_test.go diff --git a/go/test/endtoend/vreplication/cluster.go b/go/test/endtoend/vreplication/cluster.go index 7dec3d56a2c..7c7eb073937 100644 --- a/go/test/endtoend/vreplication/cluster.go +++ b/go/test/endtoend/vreplication/cluster.go @@ -21,7 +21,7 @@ import ( ) var ( - debug = false // set to true to always use local env vtdataroot for local debugging + debug = true // set to true to always use local env vtdataroot for local debugging originalVtdataroot string vtdataroot string ) diff --git a/go/test/endtoend/vreplication/helper.go b/go/test/endtoend/vreplication/helper.go index 31856bc46bf..78867cb182a 100644 --- a/go/test/endtoend/vreplication/helper.go +++ b/go/test/endtoend/vreplication/helper.go @@ -239,3 +239,20 @@ func printShardPositions(vc *VitessCluster, ksShards []string) { } } } + +func clearRoutingRules(t *testing.T, vc *VitessCluster) error { + if _, err := vc.VtctlClient.ExecuteCommandWithOutput("ApplyRoutingRules", "-rules={}"); err != nil { + return err + } + return nil +} + +func printRoutingRules(t *testing.T, vc *VitessCluster, msg string) error { + var output string + var err error + if output, err = vc.VtctlClient.ExecuteCommandWithOutput("GetRoutingRules"); err != nil { + return err + } + fmt.Printf("Routing Rules::%s:\n%s\n", msg, output) + return nil +} diff --git a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go new file mode 100644 index 00000000000..e90b0a6a494 --- /dev/null +++ b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go @@ -0,0 +1,227 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vreplication + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSwitchReadsWritesInAnyOrder(t *testing.T) { + cells := []string{"zone1"} + + vc = InitCluster(t, cells) + require.NotNil(t, vc) + defaultCellName := "zone1" + allCellNames = defaultCellName + defaultCell = vc.Cells[defaultCellName] + + //defer vc.TearDown() + + cell1 := vc.Cells["zone1"] + vc.AddKeyspace(t, []*Cell{cell1}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, defaultRdonly, 100) + + vtgate = cell1.Vtgates[0] + require.NotNil(t, vtgate) + vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "product", "0"), 1) + vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "product", "0"), 2) + + vtgateConn = getConnection(t, globalConfig.vtgateMySQLPort) + defer vtgateConn.Close() + verifyClusterHealth(t) + insertInitialData(t) + moveCustomerTableSwitchFlows(t, []*Cell{cell1}, "zone1") +} + +func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { + output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "-cells="+cells, + "-tablet_types=rdonly,replica", fmt.Sprintf("-reverse=%t", reverse), ksWorkflow) + require.NoError(t, err, fmt.Sprintf("SwitchReads Error: %s: %s", err, output)) + if output != "" { + fmt.Printf("SwitchReads output: %s\n", output) + } +} + +func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias string) { + workflow := "p2c" + sourceKs := "product" + targetKs := "customer" + ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) + reverseKsWorkflow := fmt.Sprintf("%s.%s_reverse", sourceKs, workflow) + + if _, err := vc.AddKeyspace(t, cells, "customer", "-80,80-", customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "80-"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 1); err != nil { + t.Fatal(err) + } + tables := "customer" + + // Assume we are operating on first cell + defaultCell := cells[0] + custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] + customerTab1 := custKs.Shards["-80"].Tablets["zone1-200"].Vttablet + customerReplicaTab1 := custKs.Shards["-80"].Tablets["zone1-201"].Vttablet + customerTab2 := custKs.Shards["80-"].Tablets["zone1-300"].Vttablet + + productTab := vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet + productReplicaTab := vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet + query := "select * from customer" + + validateReadsRouteToSource := func() { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productReplicaTab, "product@replica", query, query)) + } + + validateReadsRouteToTarget := func() { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerReplicaTab1, "product@replica", query, query)) + } + + validateWritesRouteToSource := func() { + insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" + matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" + require.False(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") + } + validateWritesRouteToTarget := func() { + insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" + matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab2, "customer", insertQuery, matchInsertQuery)) + insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab1, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") + } + + revert := func() { + switchWrites(t, reverseKsWorkflow, false) + validateWritesRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, true) + validateReadsRouteToSource() + queries := []string{ + "delete from _vt.vreplication", + "delete from _vt.resharding_journal", + } + + for _, query := range queries { + customerTab1.QueryTablet(query, "customer", true) + customerTab2.QueryTablet(query, "customer", true) + productTab.QueryTablet(query, "product", true) + } + customerTab1.QueryTablet("drop table vt_customer.customer", "customer", true) + customerTab2.QueryTablet("drop table vt_customer.customer", "customer", true) + + clearRoutingRules(t, vc) + } + + var moveTables = func() { + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) + catchup(t, customerTab1, workflow, "MoveTables") + catchup(t, customerTab2, workflow, "MoveTables") + vdiff(t, ksWorkflow) + //TODO: delete from target tables directly! + //time.Sleep(100*time.Millisecond) + } + + var switchReadsFollowedBySwitchWrites = func() { + moveTables() + + validateReadsRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget() + + validateWritesRouteToSource() + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget() + + revert() + } + var switchWritesFollowedBySwitchReads = func() { + moveTables() + + validateWritesRouteToSource() + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget() + + validateReadsRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget() + + revert() + } + + var switchReadsReverseSwitchWritesSwitchReads = func() { + moveTables() + + validateReadsRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget() + + switchReadsNew(t, allCellNames, ksWorkflow, true) + validateReadsRouteToSource() + printRoutingRules(t, vc, "After reversing SwitchReads") + + validateWritesRouteToSource() + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget() + + printRoutingRules(t, vc, "After SwitchWrites and reversing SwitchReads") + validateReadsRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget() + + revert() + } + + var switchWritesReverseSwitchReadsSwitchWrites = func() { + moveTables() + + validateWritesRouteToSource() + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget() + + switchWrites(t, ksWorkflow, true) + validateWritesRouteToSource() + + validateReadsRouteToSource() + switchReadsNew(t, allCellNames, ksWorkflow, false) + validateReadsRouteToTarget() + + validateWritesRouteToSource() + switchWrites(t, ksWorkflow, false) + validateWritesRouteToTarget() + + revert() + } + _ = switchReadsFollowedBySwitchWrites + _ = switchWritesFollowedBySwitchReads + _ = switchReadsReverseSwitchWritesSwitchReads + _ = switchWritesReverseSwitchReadsSwitchWrites + switchReadsFollowedBySwitchWrites() + //switchWritesFollowedBySwitchReads() + //switchReadsReverseSwitchWritesSwitchReads() + //switchWritesReverseSwitchReadsSwitchWrites() +} diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 43aaecc0b85..176a10e1af7 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -220,7 +220,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl switchReads(t, allCellNames, ksWorkflow) require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", query, query)) switchWritesDryRun(t, ksWorkflow, dryRunResultsSwitchWritesCustomerShard) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) ksShards := []string{"product/0", "customer/-80", "customer/80-"} printShardPositions(vc, ksShards) insertQuery2 := "insert into customer(name, cid) values('tempCustomer2', 100)" @@ -236,7 +236,8 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl if testReverse { //Reverse Replicate switchReads(t, allCellNames, reverseKsWorkflow) - switchWrites(t, reverseKsWorkflow) + printShardPositions(vc, ksShards) + switchWrites(t, reverseKsWorkflow, false) insertQuery1 = "insert into customer(cid, name) values(1002, 'tempCustomer5')" require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "product", insertQuery1, matchInsertQuery1)) @@ -248,8 +249,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl //Go forward again switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) - + switchWrites(t, ksWorkflow, false) dropSourcesDryRun(t, ksWorkflow, false, dryRunResultsDropSourcesDropCustomerShard) dropSourcesDryRun(t, ksWorkflow, true, dryRunResultsDropSourcesRenameCustomerShard) @@ -417,7 +417,7 @@ func reshard(t *testing.T, ksName string, tableName string, workflow string, sou if dryRunResultswitchWrites != nil { switchWritesDryRun(t, ksWorkflow, dryRunResultswitchWrites) } - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) for tabletName, count := range counts { @@ -445,7 +445,7 @@ func shardOrders(t *testing.T) { catchup(t, customerTab2, workflow, "MoveTables") vdiff(t, ksWorkflow) switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) validateCountInTablet(t, customerTab1, "customer", "orders", 1) validateCountInTablet(t, customerTab2, "customer", "orders", 2) @@ -477,7 +477,7 @@ func shardMerchant(t *testing.T) { vdiff(t, "merchant.p2m") switchReads(t, allCellNames, ksWorkflow) - switchWrites(t, ksWorkflow) + switchWrites(t, ksWorkflow, false) dropSources(t, ksWorkflow) validateCountInTablet(t, merchantTab1, "merchant", "merchant", 1) @@ -615,7 +615,7 @@ func verifyClusterHealth(t *testing.T) { func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info string) { const MaxWait = 10 * time.Second err := vc.WaitForVReplicationToCatchup(vttablet, workflow, fmt.Sprintf("vt_%s", vttablet.Keyspace), MaxWait) - require.NoError(nil, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) + require.NoError(t, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) } func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables string) { @@ -679,18 +679,16 @@ func printSwitchWritesExtraDebug(t *testing.T, ksWorkflow, msg string) { } } -func switchWrites(t *testing.T, ksWorkflow string) { +func switchWrites(t *testing.T, ksWorkflow string, reverse bool) { const SwitchWritesTimeout = "91s" // max: 3 tablet picker 30s waits + 1 output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchWrites", - "-filtered_replication_wait_time="+SwitchWritesTimeout, ksWorkflow) + "-filtered_replication_wait_time="+SwitchWritesTimeout, fmt.Sprintf("-reverse=%t", reverse), ksWorkflow) if output != "" { fmt.Printf("Output of SwitchWrites for %s:\n++++++\n%s\n--------\n", ksWorkflow, output) } //printSwitchWritesExtraDebug is useful when debugging failures in SwitchWrites due to corner cases/races _ = printSwitchWritesExtraDebug - if err != nil { - require.FailNow(t, fmt.Sprintf("SwitchWrites Error: %s: %s", err, output)) - } + require.NoError(t, err, fmt.Sprintf("SwitchWrites Error: %s: %s", err, output)) } func dropSourcesDryRun(t *testing.T, ksWorkflow string, renameTables bool, dryRunResults []string) { diff --git a/go/vt/wrangler/materializer.go b/go/vt/wrangler/materializer.go index cce22b66fe3..179e2a5605a 100644 --- a/go/vt/wrangler/materializer.go +++ b/go/vt/wrangler/materializer.go @@ -157,6 +157,10 @@ func (wr *Wrangler) MoveTables(ctx context.Context, workflow, sourceKeyspace, ta for _, table := range tables { rules[table] = []string{sourceKeyspace + "." + table} rules[targetKeyspace+"."+table] = []string{sourceKeyspace + "." + table} + rules[targetKeyspace+"."+table+"@replica"] = []string{sourceKeyspace + "." + table} + rules[targetKeyspace+"."+table+"@rdonly"] = []string{sourceKeyspace + "." + table} + rules[sourceKeyspace+"."+table+"@replica"] = []string{sourceKeyspace + "." + table} + rules[sourceKeyspace+"."+table+"@rdonly"] = []string{sourceKeyspace + "." + table} } if err := wr.saveRoutingRules(ctx, rules); err != nil { return err diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 4f2469831fb..8fd314df8d8 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -245,7 +245,6 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st wr.Logger().Errorf("Found a previous journal entry for %d", ts.id) //return nil, fmt.Errorf("found an entry from a previous run for migration id %d in _vt.resharding_journal, please review and delete it before proceeding", ts.id) } - var sw iswitcher if dryRun { sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} @@ -254,7 +253,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } if ts.frozen { - return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") + //return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") } if err := ts.validate(ctx, false /* isWrite */); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) @@ -679,11 +678,11 @@ func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { } } if isWrite { - return ts.validateTableForWrite(ctx) + //return ts.validateTableForWrite(ctx) } } else { // binlogdatapb.MigrationType_SHARDS if isWrite { - return ts.validateShardForWrite(ctx) + //return ts.validateShardForWrite(ctx) } } return nil @@ -755,6 +754,7 @@ func (ts *trafficSwitcher) compareShards(ctx context.Context, keyspace string, s } func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { + log.Infof("switchTableReads: servedTypes: %+v, direction %t", servedTypes, direction) rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return err @@ -772,9 +772,12 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} } else { - delete(rules, table+"@"+tt) - delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + //delete(rules, table+"@"+tt) + //delete(rules, ts.targetKeyspace+"."+table+"@"+tt) + //delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + rules[table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} + rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} + rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} } } } @@ -1121,12 +1124,12 @@ func (ts *trafficSwitcher) allowTableTargetWrites(ctx context.Context) error { func (ts *trafficSwitcher) changeRouting(ctx context.Context) error { if ts.migrationType == binlogdatapb.MigrationType_TABLES { - return ts.changeTableRouting(ctx) + return ts.changeWriteRoute(ctx) } return ts.changeShardRouting(ctx) } -func (ts *trafficSwitcher) changeTableRouting(ctx context.Context) error { +func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error { rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return err @@ -1141,13 +1144,13 @@ func (ts *trafficSwitcher) changeTableRouting(ctx context.Context) error { // table -> targetKeyspace.table // sourceKeyspace.table -> targetKeyspace.table for _, table := range ts.tables { - for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { - tt := strings.ToLower(tabletType.String()) - delete(rules, table+"@"+tt) - delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) - ts.wr.Logger().Infof("Delete routing: %v %v %v", table+"@"+tt, ts.targetKeyspace+"."+table+"@"+tt, ts.sourceKeyspace+"."+table+"@"+tt) - } + //for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { + // tt := strings.ToLower(tabletType.String()) + // delete(rules, table+"@"+tt) + // delete(rules, ts.targetKeyspace+"."+table+"@"+tt) + // delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) + // ts.wr.Logger().Infof("Delete routing: %v %v %v", table+"@"+tt, ts.targetKeyspace+"."+table+"@"+tt, ts.sourceKeyspace+"."+table+"@"+tt) + //} delete(rules, ts.targetKeyspace+"."+table) ts.wr.Logger().Infof("Delete routing: %v", ts.targetKeyspace+"."+table) rules[table] = []string{ts.targetKeyspace + "." + table} From 88c7a95f65d08245167bf5f81b4d9c5e2347b46a Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 27 Nov 2020 20:23:30 +0100 Subject: [PATCH 03/26] Initial commit of a state-machine based MoveTables workflow Signed-off-by: Rohit Nayak --- go.mod | 1 + go.sum | 2 + .../uniform_switch_read_writes_test.go | 76 ++++++++-- .../vreplication/vreplication_test.go | 10 +- go/vt/vtctl/vtctl.go | 93 +++++++++--- go/vt/wrangler/workflow.go | 136 ++++++++++++++++++ 6 files changed, 286 insertions(+), 32 deletions(-) create mode 100644 go/vt/wrangler/workflow.go diff --git a/go.mod b/go.mod index d5e6020f8da..93894813e95 100644 --- a/go.mod +++ b/go.mod @@ -52,6 +52,7 @@ require ( github.com/klauspost/pgzip v1.2.4 github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect github.com/krishicks/yaml-patch v0.0.10 + github.com/looplab/fsm v0.2.0 github.com/magiconair/properties v1.8.1 github.com/martini-contrib/auth v0.0.0-20150219114609-fa62c19b7ae8 github.com/martini-contrib/gzip v0.0.0-20151124214156-6c035326b43f diff --git a/go.sum b/go.sum index 6b1485cb833..dedaa3e54fc 100644 --- a/go.sum +++ b/go.sum @@ -425,6 +425,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/krishicks/yaml-patch v0.0.10 h1:H4FcHpnNwVmw8u0MjPRjWyIXtco6zM2F78t+57oNM3E= github.com/krishicks/yaml-patch v0.0.10/go.mod h1:Sm5TchwZS6sm7RJoyg87tzxm2ZcKzdRE4Q7TjNhPrME= +github.com/looplab/fsm v0.2.0 h1:M8hf5EF4AYLcT1FNKVUX8nu7D0xfp291iGeuigSxfrw= +github.com/looplab/fsm v0.2.0/go.mod h1:p+IElwgCnAByqr2DWMuNbPjgMwqcHvTRZZn3dvKEke0= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= diff --git a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go index e90b0a6a494..04f8e255e32 100644 --- a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go +++ b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go @@ -20,10 +20,20 @@ import ( "fmt" "testing" + "vitess.io/vitess/go/test/endtoend/cluster" + "vitess.io/vitess/go/vt/wrangler" + "github.com/stretchr/testify/require" ) -func TestSwitchReadsWritesInAnyOrder(t *testing.T) { +func TestNewMoveTablesWorkflow(t *testing.T) { + vc = setupCluster(t) + setupCustomerKeyspace(t) + moveTablesNew(t) //moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) + +} + +func setupCluster(t *testing.T) *VitessCluster { cells := []string{"zone1"} vc = InitCluster(t, cells) @@ -46,7 +56,48 @@ func TestSwitchReadsWritesInAnyOrder(t *testing.T) { defer vtgateConn.Close() verifyClusterHealth(t) insertInitialData(t) - moveCustomerTableSwitchFlows(t, []*Cell{cell1}, "zone1") + return vc +} + +const ( + moveTablesWorkflowName = "p2c" + sourceKs = "product" + targetKs = "customer" + ksWorkflow = targetKs + "." + moveTablesWorkflowName + reverseKsWorkflow = sourceKs + "." + moveTablesWorkflowName + "_reverse" + tablesToMove = "customer" + defaultCellName = "zone1" +) + +var ( + customerTab1, customerTab2 *cluster.VttabletProcess +) + +func setupCustomerKeyspace(t *testing.T) { + if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells[defaultCellName]}, "customer", "-80,80-", + customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "80-"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 1); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 1); err != nil { + t.Fatal(err) + } + custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] + customerTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet + customerTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet +} + +func TestSwitchReadsWritesInAnyOrder(t *testing.T) { + vc = setupCluster(t) + moveCustomerTableSwitchFlows(t, []*Cell{vc.Cells["zone1"]}, "zone1") } func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { @@ -58,6 +109,13 @@ func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { } } +var moveTablesNew = func(t *testing.T) { + moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) + catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") + catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") + vdiff(t, ksWorkflow) +} + func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias string) { workflow := "p2c" sourceKs := "product" @@ -137,17 +195,15 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias clearRoutingRules(t, vc) } - var moveTables = func() { - moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) + var moveTablesNew = func() { + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables, wrangler.WorkflowEventStart) catchup(t, customerTab1, workflow, "MoveTables") catchup(t, customerTab2, workflow, "MoveTables") vdiff(t, ksWorkflow) - //TODO: delete from target tables directly! - //time.Sleep(100*time.Millisecond) } var switchReadsFollowedBySwitchWrites = func() { - moveTables() + moveTablesNew() validateReadsRouteToSource() switchReadsNew(t, allCellNames, ksWorkflow, false) @@ -160,7 +216,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias revert() } var switchWritesFollowedBySwitchReads = func() { - moveTables() + moveTablesNew() validateWritesRouteToSource() switchWrites(t, ksWorkflow, false) @@ -174,7 +230,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias } var switchReadsReverseSwitchWritesSwitchReads = func() { - moveTables() + moveTablesNew() validateReadsRouteToSource() switchReadsNew(t, allCellNames, ksWorkflow, false) @@ -197,7 +253,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias } var switchWritesReverseSwitchReadsSwitchWrites = func() { - moveTables() + moveTablesNew() validateWritesRouteToSource() switchWrites(t, ksWorkflow, false) diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 176a10e1af7..f6233acbc89 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -198,7 +198,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl t.Fatal(err) } tables := "customer" - moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables, "") // Assume we are operating on first cell defaultCell := cells[0] @@ -436,7 +436,7 @@ func shardOrders(t *testing.T) { tables := "orders" ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) applyVSchema(t, ordersVSchema, targetKs) - moveTables(t, cell, workflow, sourceKs, targetKs, tables) + moveTables(t, cell, workflow, sourceKs, targetKs, tables, "") custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] customerTab1 := custKs.Shards["-80"].Tablets["zone1-200"].Vttablet @@ -468,7 +468,7 @@ func shardMerchant(t *testing.T) { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "merchant", "80-"), 1); err != nil { t.Fatal(err) } - moveTables(t, cell, workflow, sourceKs, targetKs, tables) + moveTables(t, cell, workflow, sourceKs, targetKs, tables, "") merchantKs := vc.Cells[defaultCell.Name].Keyspaces["merchant"] merchantTab1 := merchantKs.Shards["-80"].Tablets["zone1-400"].Vttablet merchantTab2 := merchantKs.Shards["80-"].Tablets["zone1-500"].Vttablet @@ -618,9 +618,9 @@ func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info str require.NoError(t, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) } -func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables string) { +func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables, action string) { if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, "-workflow="+workflow, - "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables); err != nil { + "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables, action); err != nil { t.Fatalf("MoveTables command failed with %+v\n", err) } } diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index f5db8915b75..33bd84097fc 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1909,8 +1909,59 @@ func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.F return wr.Reshard(ctx, keyspace, workflow, source, target, *skipSchemaCopy, *cells, *tabletTypes) } +func parseMoveTablesArgs(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, allTables *bool) ( + action, source, target, tableSpecs, workflow string, err error) { + nArgs := subFlags.NArg() + if *allTables { + switch nArgs { + case 2: + case 3: + action = subFlags.Arg(2) + default: + return "", "", "", "", "", + fmt.Errorf("following arguments are required: source_keyspace, target_keyspace [,action]") + } + source = subFlags.Arg(0) + target = subFlags.Arg(1) + } else { + if nArgs == 2 { //expect MoveTables targetKeyspace.workflow [SwitchReads|SwitchReads|Complete|Abort] + ksWorkflow := subFlags.Arg(0) + action = subFlags.Arg(1) + if action == wrangler.WorkflowEventStart || !wr.IsUserFacingEvent(action) { + return "", "", "", "", "", + fmt.Errorf("unexpected workflow action %s", action) + } + target, workflow, err = splitKeyspaceWorkflow(ksWorkflow) + if err != nil { + return "", "", "", "", "", err + } + _, err = wr.TopoServer().GetKeyspace(ctx, target) + if err != nil { + wr.Logger().Errorf("keyspace %s not found", target) + } + } else { + switch nArgs { + case 3: + case 4: + action = subFlags.Arg(3) + default: + return "", "", "", "", "", + fmt.Errorf("following arguments are required: source_keyspace, target_keyspace, tableSpecs [,action]") + } + source = subFlags.Arg(0) + target = subFlags.Arg(1) + tableSpecs = subFlags.Arg(2) + if action != wrangler.WorkflowEventStart { + return "", "", "", "", "", + fmt.Errorf("expected workflow action %s", wrangler.WorkflowEventStart) + } + } + } + return action, source, target, tableSpecs, workflow, nil +} + func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { - workflow := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") + workflowName := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") @@ -1919,26 +1970,34 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla if err := subFlags.Parse(args); err != nil { return err } - if *workflow == "" { - return fmt.Errorf("a workflow name must be specified") - } - if !*allTables && len(*excludes) > 0 { - return fmt.Errorf("you can only specify tables to exclude if all tables are to be moved (with -all)") + action, source, target, tableSpecs, wfName, err := parseMoveTablesArgs(ctx, wr, subFlags, allTables) + if err != nil { + return err } - if *allTables { - if subFlags.NArg() != 2 { - return fmt.Errorf("two arguments are required: source_keyspace, target_keyspace") + if action == "" { + log.Infof("Calling deprecated MoveTables") + if *workflowName == "" { + return fmt.Errorf("a workflow name must be specified") } - } else { - if subFlags.NArg() != 3 { - return fmt.Errorf("three arguments are required: source_keyspace, target_keyspace, tableSpecs") + if !*allTables && len(*excludes) > 0 { + return fmt.Errorf("you can only specify tables to exclude if all tables are to be moved (with -all)") } + return wr.MoveTables(ctx, *workflowName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) } - - source := subFlags.Arg(0) - target := subFlags.Arg(1) - tableSpecs := subFlags.Arg(2) - return wr.MoveTables(ctx, *workflow, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) + log.Infof("Calling new MoveTables workflow") + if action == wrangler.WorkflowEventStart { + wfName = *workflowName + } + wf, err := wr.NewMoveTablesWorkflow(ctx, wfName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) + if err != nil { + log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) + return err + } + if err := wf.Start(); err != nil { + log.Warningf("NewMoveTablesWorkflow Start error %+v", wf) + return err + } + return nil } func commandCreateLookupVindex(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go new file mode 100644 index 00000000000..4cf67a89a04 --- /dev/null +++ b/go/vt/wrangler/workflow.go @@ -0,0 +1,136 @@ +package wrangler + +import ( + "context" + + "github.com/looplab/fsm" + "vitess.io/vitess/go/vt/log" +) + +const ( + WorkflowStateNotStarted = "Not Started" + WorkflowStateCopying = "Copying" + WorkflowStateReplicating = "Replicating" + WorkflowStateOnlyReadsSwitched = "Reads Switched" + WorkflowStateOnlyWritesSwitched = "Writes Switched" + WorkflowStateReadsAndWritesSwitched = "Both Reads and Writes Switched" + WorkflowStateCompleted = "Completed" + WorkflowStateAborted = "Aborted" +) + +const ( + WorkflowEventStart = "Start" + WorkflowEventCopyCompleted = "CopyCompleted" + WorkflowEventSwitchReads = "SwitchReads" + WorkflowEventSwitchWrites = "SwitchWrites" + WorkflowEventComplete = "Complete" + WorkflowEventAbort = "Abort" + WorkflowEventReverseReads = "ReverseReads" + WorkflowEventReverseWrites = "ReverseWrites" +) + +type Workflow struct { + name string + wsm *fsm.FSM + typ string + isReplicating bool + isRunning bool + hasErrors bool +} + +func init() { +} + +func (wr *Wrangler) IsUserFacingEvent(ev string) bool { + allUserFacingEvents := []string{WorkflowEventStart, WorkflowEventSwitchReads, WorkflowEventSwitchWrites, + WorkflowEventComplete, WorkflowEventAbort} + for _, ev2 := range allUserFacingEvents { + if ev2 == ev { + return true + } + } + return false +} + +func getWorkflowTransitions() []fsm.EventDesc { + return []fsm.EventDesc{ + {Name: WorkflowEventStart, Src: []string{WorkflowStateNotStarted}, Dst: WorkflowStateCopying}, + {Name: WorkflowEventCopyCompleted, Src: []string{WorkflowStateCopying}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateOnlyReadsSwitched}, + {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateOnlyWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateOnlyReadsSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + {Name: WorkflowEventComplete, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateCompleted}, + {Name: WorkflowEventAbort, Src: []string{WorkflowStateNotStarted, WorkflowStateCopying, + WorkflowStateReplicating, WorkflowStateOnlyReadsSwitched, WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateAborted}, + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateOnlyReadsSwitched}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateOnlyWritesSwitched}, + {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateOnlyReadsSwitched}, + } +} + +func NewWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*Workflow, error) { + wf := &Workflow{ + name: name, typ: typ, + } + + wf.wsm = fsm.NewFSM(WorkflowStateNotStarted, getWorkflowTransitions(), callbacks) + return wf, nil +} + +type MoveTablesWorkflow struct { + ctx context.Context + wf *Workflow + allTables bool + wr *Wrangler + + sourceKeyspace, targetKeyspace, tableSpecs, cell, tabletTypes, excludeTables string +} + +func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, workflow, sourceKeyspace, targetKeyspace, tableSpecs, + cell, tabletTypes string, allTables bool, excludeTables string) (*MoveTablesWorkflow, error) { + callbacks := make(map[string]fsm.Callback) + mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, sourceKeyspace: sourceKeyspace, targetKeyspace: targetKeyspace, + tabletTypes: tabletTypes, tableSpecs: tableSpecs, cell: cell, + allTables: allTables, excludeTables: excludeTables} + callbacks["before_Start"] = func(e *fsm.Event) { mtwf.initMoveTables() } + wf, err := NewWorkflow(workflow, "MoveTables", callbacks) + if err != nil { + return nil, err + } + mtwf.wf = wf + return mtwf, nil +} + +func (mtwf *MoveTablesWorkflow) Start() error { + log.Infof("In MoveTablesWorkflow.Start() for %+v", mtwf) + mtwf.wf.wsm.Event(WorkflowEventStart) + return nil +} + +func (mtwf *MoveTablesWorkflow) initMoveTables() error { + log.Infof("In MoveTablesWorkflow.initMoveTables() for %+v", mtwf) + return mtwf.wr.MoveTables(mtwf.ctx, mtwf.wf.name, mtwf.sourceKeyspace, mtwf.targetKeyspace, mtwf.tableSpecs, + mtwf.cell, mtwf.tabletTypes, mtwf.allTables, mtwf.excludeTables) +} + +/* + +New +GetState + +Start +Pause +Restart + +SwitchReads +ResetReads +SwitchWrites +ResetWrites + +GetProgress +Abort +Finalize + +*/ From 6bb0454c55ebc70f81cbae598c13ccd6c57dea7b Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 30 Nov 2020 09:16:03 +0100 Subject: [PATCH 04/26] Minor fixes/cleanup Signed-off-by: Rohit Nayak --- .../uniform_switch_read_writes_test.go | 224 ++++++++++-------- .../vreplication/vreplication_test.go | 11 +- go/vt/vtctl/vtctl.go | 9 +- go/vt/wrangler/traffic_switcher.go | 60 +++-- go/vt/wrangler/workflow.go | 137 ++++++++--- 5 files changed, 286 insertions(+), 155 deletions(-) diff --git a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go index 04f8e255e32..bc4643feead 100644 --- a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go +++ b/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go @@ -26,11 +26,95 @@ import ( "github.com/stretchr/testify/require" ) +const ( + moveTablesWorkflowName = "p2c" + sourceKs = "product" + targetKs = "customer" + ksWorkflow = targetKs + "." + moveTablesWorkflowName + reverseKsWorkflow = sourceKs + "." + moveTablesWorkflowName + "_reverse" + tablesToMove = "customer" + defaultCellName = "zone1" + query = "select * from customer" +) + +var ( + customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess +) + +func moveTablesStart(t *testing.T) { + moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) + catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") + catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") + vdiff(t, ksWorkflow) +} + +func moveTablesSwitchReads(t *testing.T) { + moveTables(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchReads) +} + +func moveTablesSwitchWrites(t *testing.T) { + moveTables(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchWrites) +} + +func validateReadsRouteToSource(t *testing.T) { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productReplicaTab, "product@replica", query, query)) +} + +func validateReadsRouteToTarget(t *testing.T) { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerReplicaTab1, "product@replica", query, query)) +} + +func validateWritesRouteToSource(t *testing.T) { + insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" + matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" + require.False(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") +} +func validateWritesRouteToTarget(t *testing.T) { + insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" + matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab2, "customer", insertQuery, matchInsertQuery)) + insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab1, "customer", insertQuery, matchInsertQuery)) + execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") +} + +func revert(t *testing.T) { + switchWrites(t, reverseKsWorkflow, false) + validateWritesRouteToSource(t) + switchReadsNew(t, allCellNames, ksWorkflow, true) + validateReadsRouteToSource(t) + queries := []string{ + "delete from _vt.vreplication", + "delete from _vt.resharding_journal", + } + + for _, query := range queries { + customerTab1.QueryTablet(query, "customer", true) + customerTab2.QueryTablet(query, "customer", true) + productTab.QueryTablet(query, "product", true) + } + customerTab1.QueryTablet("drop table vt_customer.customer", "customer", true) + customerTab2.QueryTablet("drop table vt_customer.customer", "customer", true) + + clearRoutingRules(t, vc) +} + func TestNewMoveTablesWorkflow(t *testing.T) { vc = setupCluster(t) + defer vtgateConn.Close() + //defer vc.TearDown() + setupCustomerKeyspace(t) - moveTablesNew(t) //moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) + moveTablesStart(t) + + validateReadsRouteToSource(t) + moveTablesSwitchReads(t) + validateReadsRouteToTarget(t) + validateWritesRouteToSource(t) + moveTablesSwitchWrites(t) + validateReadsRouteToTarget(t) } func setupCluster(t *testing.T) *VitessCluster { @@ -42,8 +126,6 @@ func setupCluster(t *testing.T) *VitessCluster { allCellNames = defaultCellName defaultCell = vc.Cells[defaultCellName] - //defer vc.TearDown() - cell1 := vc.Cells["zone1"] vc.AddKeyspace(t, []*Cell{cell1}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, defaultRdonly, 100) @@ -53,25 +135,14 @@ func setupCluster(t *testing.T) *VitessCluster { vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "product", "0"), 2) vtgateConn = getConnection(t, globalConfig.vtgateMySQLPort) - defer vtgateConn.Close() verifyClusterHealth(t) insertInitialData(t) - return vc -} -const ( - moveTablesWorkflowName = "p2c" - sourceKs = "product" - targetKs = "customer" - ksWorkflow = targetKs + "." + moveTablesWorkflowName - reverseKsWorkflow = sourceKs + "." + moveTablesWorkflowName + "_reverse" - tablesToMove = "customer" - defaultCellName = "zone1" -) + productReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet + productTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet -var ( - customerTab1, customerTab2 *cluster.VttabletProcess -) + return vc +} func setupCustomerKeyspace(t *testing.T) { if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells[defaultCellName]}, "customer", "-80,80-", @@ -93,6 +164,8 @@ func setupCustomerKeyspace(t *testing.T) { custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] customerTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet customerTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet + customerReplicaTab1 = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet + } func TestSwitchReadsWritesInAnyOrder(t *testing.T) { @@ -109,19 +182,11 @@ func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { } } -var moveTablesNew = func(t *testing.T) { - moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) - catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") - catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") - vdiff(t, ksWorkflow) -} - func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias string) { workflow := "p2c" sourceKs := "product" targetKs := "customer" ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) - reverseKsWorkflow := fmt.Sprintf("%s.%s_reverse", sourceKs, workflow) if _, err := vc.AddKeyspace(t, cells, "customer", "-80,80-", customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { t.Fatal(err) @@ -144,57 +209,8 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias defaultCell := cells[0] custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] customerTab1 := custKs.Shards["-80"].Tablets["zone1-200"].Vttablet - customerReplicaTab1 := custKs.Shards["-80"].Tablets["zone1-201"].Vttablet customerTab2 := custKs.Shards["80-"].Tablets["zone1-300"].Vttablet - productTab := vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet - productReplicaTab := vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet - query := "select * from customer" - - validateReadsRouteToSource := func() { - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productReplicaTab, "product@replica", query, query)) - } - - validateReadsRouteToTarget := func() { - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerReplicaTab1, "product@replica", query, query)) - } - - validateWritesRouteToSource := func() { - insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" - matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" - require.False(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) - execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") - } - validateWritesRouteToTarget := func() { - insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" - matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab2, "customer", insertQuery, matchInsertQuery)) - insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab1, "customer", insertQuery, matchInsertQuery)) - execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") - } - - revert := func() { - switchWrites(t, reverseKsWorkflow, false) - validateWritesRouteToSource() - switchReadsNew(t, allCellNames, ksWorkflow, true) - validateReadsRouteToSource() - queries := []string{ - "delete from _vt.vreplication", - "delete from _vt.resharding_journal", - } - - for _, query := range queries { - customerTab1.QueryTablet(query, "customer", true) - customerTab2.QueryTablet(query, "customer", true) - productTab.QueryTablet(query, "product", true) - } - customerTab1.QueryTablet("drop table vt_customer.customer", "customer", true) - customerTab2.QueryTablet("drop table vt_customer.customer", "customer", true) - - clearRoutingRules(t, vc) - } - var moveTablesNew = func() { moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables, wrangler.WorkflowEventStart) catchup(t, customerTab1, workflow, "MoveTables") @@ -205,79 +221,79 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias var switchReadsFollowedBySwitchWrites = func() { moveTablesNew() - validateReadsRouteToSource() + validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget() + validateReadsRouteToTarget(t) - validateWritesRouteToSource() + validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) - validateWritesRouteToTarget() + validateWritesRouteToTarget(t) - revert() + revert(t) } var switchWritesFollowedBySwitchReads = func() { moveTablesNew() - validateWritesRouteToSource() + validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) - validateWritesRouteToTarget() + validateWritesRouteToTarget(t) - validateReadsRouteToSource() + validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget() + validateReadsRouteToTarget(t) - revert() + revert(t) } var switchReadsReverseSwitchWritesSwitchReads = func() { moveTablesNew() - validateReadsRouteToSource() + validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget() + validateReadsRouteToTarget(t) switchReadsNew(t, allCellNames, ksWorkflow, true) - validateReadsRouteToSource() + validateReadsRouteToSource(t) printRoutingRules(t, vc, "After reversing SwitchReads") - validateWritesRouteToSource() + validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) - validateWritesRouteToTarget() + validateWritesRouteToTarget(t) printRoutingRules(t, vc, "After SwitchWrites and reversing SwitchReads") - validateReadsRouteToSource() + validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget() + validateReadsRouteToTarget(t) - revert() + revert(t) } var switchWritesReverseSwitchReadsSwitchWrites = func() { moveTablesNew() - validateWritesRouteToSource() + validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) - validateWritesRouteToTarget() + validateWritesRouteToTarget(t) switchWrites(t, ksWorkflow, true) - validateWritesRouteToSource() + validateWritesRouteToSource(t) - validateReadsRouteToSource() + validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget() + validateReadsRouteToTarget(t) - validateWritesRouteToSource() + validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) - validateWritesRouteToTarget() + validateWritesRouteToTarget(t) - revert() + revert(t) } _ = switchReadsFollowedBySwitchWrites _ = switchWritesFollowedBySwitchReads _ = switchReadsReverseSwitchWritesSwitchReads _ = switchWritesReverseSwitchReadsSwitchWrites switchReadsFollowedBySwitchWrites() - //switchWritesFollowedBySwitchReads() - //switchReadsReverseSwitchWritesSwitchReads() - //switchWritesReverseSwitchReadsSwitchWrites() + switchWritesFollowedBySwitchReads() + switchReadsReverseSwitchWritesSwitchReads() + switchWritesReverseSwitchReadsSwitchWrites() } diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index f6233acbc89..462627fbed9 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -619,8 +619,15 @@ func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info str } func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables, action string) { - if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, "-workflow="+workflow, - "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables, action); err != nil { + if action == "" || action == "Start" { + if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, "-workflow="+workflow, + "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables, action); err != nil { + t.Fatalf("MoveTables command failed with %+v\n", err) + } + return + } + if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, + fmt.Sprintf("%s.%s", targetKs, workflow), action); err != nil { t.Fatalf("MoveTables command failed with %+v\n", err) } } diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 33bd84097fc..7b8ffbb9c25 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1988,15 +1988,16 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla if action == wrangler.WorkflowEventStart { wfName = *workflowName } - wf, err := wr.NewMoveTablesWorkflow(ctx, wfName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) + mtwf, err := wr.NewMoveTablesWorkflow(ctx, wfName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) if err != nil { - log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) + log.Warningf("NewMoveTablesWorkflow returned error %+v", mtwf) return err } - if err := wf.Start(); err != nil { - log.Warningf("NewMoveTablesWorkflow Start error %+v", wf) + if err := mtwf.FireEvent(action); err != nil { + log.Warningf("NewMoveTablesWorkflow %s error: %+v", action, mtwf) return err } + wr.Logger().Printf("MoveTables %s was successful\n\n%s", action, mtwf) return nil } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 8fd314df8d8..cd3a2550dd9 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -47,7 +47,8 @@ import ( ) const ( - frozenStr = "FROZEN" + frozenStr = "FROZEN" + ErrorNoStreams = "no streams found in keyspace %s for: %s" ) // TrafficSwitchDirection specifies the switching direction. @@ -143,12 +144,17 @@ type workflowState struct { } func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, *workflowState, error) { - ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) + if err != nil { + if err.Error() == fmt.Sprintf(ErrorNoStreams, targetKeyspace, workflow) { + return nil, nil, nil + } wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) return nil, nil, err } + + ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} ws.SourceKeyspace = ts.sourceKeyspace if ts.frozen { ws.WritesSwitched = true @@ -222,6 +228,11 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st wr.Logger().Errorf("getWorkflowState failed: %v", err) return nil, err } + if ts == nil { + errorMsg := fmt.Sprintf("workflow %s not found in keyspace", workflow, targetKeyspace) + wr.Logger().Errorf(errorMsg) + return nil, fmt.Errorf(errorMsg) + } for _, servedType := range servedTypes { if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { @@ -285,11 +296,17 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, timeout time.Duration, cancel, reverse, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) - _ = ws if err != nil { wr.Logger().Errorf("getWorkflowState failed: %v", err) return 0, nil, err } + if ts == nil { + if ts == nil { + errorMsg := fmt.Sprintf("workflow %s not found in keyspace", workflow, targetKeyspace) + wr.Logger().Errorf(errorMsg) + return 0, nil, fmt.Errorf(errorMsg) + } + } if reverse { if !ws.WritesSwitched { @@ -495,10 +512,11 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st } func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, error) { - targets, frozen, optCells, optTabletTypes, err := wr.buildTargets(ctx, targetKeyspace, workflow) + tgtInfo, err := wr.buildTargets(ctx, targetKeyspace, workflow) if err != nil { return nil, err } + targets, frozen, optCells, optTabletTypes := tgtInfo.targets, tgtInfo.frozen, tgtInfo.optCells, tgtInfo.optTabletTypes ts := &trafficSwitcher{ wr: wr, @@ -581,11 +599,21 @@ func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, wo return ts, nil } -func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow string) (targets map[string]*tsTarget, frozen bool, optCells string, optTabletTypes string, err error) { - targets = make(map[string]*tsTarget) +type targetInfo struct { + targets map[string]*tsTarget + frozen bool + optCells string + optTabletTypes string +} + +func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow string) (*targetInfo, error) { + var err error + var frozen bool + var optCells, optTabletTypes string + targets := make(map[string]*tsTarget) targetShards, err := wr.ts.GetShardNames(ctx, targetKeyspace) if err != nil { - return nil, false, "", "", err + return nil, err } // We check all target shards. All of them may not have a stream. // For example, if we're splitting -80 to -40,40-80, only those @@ -593,19 +621,19 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s for _, targetShard := range targetShards { targetsi, err := wr.ts.GetShard(ctx, targetKeyspace, targetShard) if err != nil { - return nil, false, "", "", err + return nil, err } if targetsi.MasterAlias == nil { // This can happen if bad inputs are given. - return nil, false, "", "", fmt.Errorf("shard %v:%v doesn't have a master set", targetKeyspace, targetShard) + return nil, fmt.Errorf("shard %v:%v doesn't have a master set", targetKeyspace, targetShard) } targetMaster, err := wr.ts.GetTablet(ctx, targetsi.MasterAlias) if err != nil { - return nil, false, "", "", err + return nil, err } p3qr, err := wr.tmc.VReplicationExec(ctx, targetMaster.Tablet, fmt.Sprintf("select id, source, message, cell, tablet_types from _vt.vreplication where workflow=%s and db_name=%s", encodeString(workflow), encodeString(targetMaster.DbName()))) if err != nil { - return nil, false, "", "", err + return nil, err } // If there's no vreplication stream, check the next target. if len(p3qr.Rows) < 1 { @@ -621,12 +649,12 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s for _, row := range qr.Rows { id, err := evalengine.ToInt64(row[0]) if err != nil { - return nil, false, "", "", err + return nil, err } var bls binlogdatapb.BinlogSource if err := proto.UnmarshalText(row[1].ToString(), &bls); err != nil { - return nil, false, "", "", err + return nil, err } targets[targetShard].sources[uint32(id)] = &bls @@ -638,9 +666,11 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s } } if len(targets) == 0 { - return nil, false, "", "", fmt.Errorf("no streams found in keyspace %s for: %s", targetKeyspace, workflow) + err2 := fmt.Errorf(ErrorNoStreams, targetKeyspace, workflow) + return nil, err2 } - return targets, frozen, optCells, optTabletTypes, nil + tinfo := &targetInfo{targets: targets, frozen: frozen, optCells: optCells, optTabletTypes: optTabletTypes} + return tinfo, nil } // hashStreams produces a reproducible hash based on the input parameters. diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 4cf67a89a04..6b0537df14d 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -2,31 +2,41 @@ package wrangler import ( "context" + "fmt" + + topodatapb "vitess.io/vitess/go/vt/proto/topodata" "github.com/looplab/fsm" "vitess.io/vitess/go/vt/log" ) const ( - WorkflowStateNotStarted = "Not Started" - WorkflowStateCopying = "Copying" - WorkflowStateReplicating = "Replicating" - WorkflowStateOnlyReadsSwitched = "Reads Switched" - WorkflowStateOnlyWritesSwitched = "Writes Switched" - WorkflowStateReadsAndWritesSwitched = "Both Reads and Writes Switched" - WorkflowStateCompleted = "Completed" - WorkflowStateAborted = "Aborted" + WorkflowStateNotStarted = "Not Started" + WorkflowStateCopying = "Copying" + WorkflowStateReplicating = "Replicating" + WorkflowStateReplicaReadsSwitched = "Replica Reads Switched" + WorkflowStateRdonlyReadsSwitched = "Rdonly Reads Switched" + WorkflowStateReadsSwitched = "Reads Switched" + WorkflowStateWritesSwitched = "Writes Switched" + WorkflowStateReplicaReadsAndWritesSwitched = "Replica Reads and Writes Switched" + WorkflowStateRdonlyReadsAndWritesSwitched = "Rdonly Reads and Writes Switched" + WorkflowStateReadsAndWritesSwitched = "Both Reads and Writes Switched" + WorkflowStateCompleted = "Completed" + WorkflowStateAborted = "Aborted" + WorkflowStateError = "Error" ) const ( - WorkflowEventStart = "Start" - WorkflowEventCopyCompleted = "CopyCompleted" - WorkflowEventSwitchReads = "SwitchReads" - WorkflowEventSwitchWrites = "SwitchWrites" - WorkflowEventComplete = "Complete" - WorkflowEventAbort = "Abort" - WorkflowEventReverseReads = "ReverseReads" - WorkflowEventReverseWrites = "ReverseWrites" + WorkflowEventStart = "Start" + WorkflowEventCopyCompleted = "CopyCompleted" + WorkflowEventSwitchReads = "SwitchReads" + WorkflowEventSwitchReplicaReads = "SwitchReplicaReads" + WorkflowEventSwitchRdonlyReads = "SwitchRdonlyReads" + WorkflowEventSwitchWrites = "SwitchWrites" + WorkflowEventComplete = "Complete" + WorkflowEventAbort = "Abort" + WorkflowEventReverseReads = "ReverseReads" + WorkflowEventReverseWrites = "ReverseWrites" ) type Workflow struct { @@ -38,9 +48,6 @@ type Workflow struct { hasErrors bool } -func init() { -} - func (wr *Wrangler) IsUserFacingEvent(ev string) bool { allUserFacingEvents := []string{WorkflowEventStart, WorkflowEventSwitchReads, WorkflowEventSwitchWrites, WorkflowEventComplete, WorkflowEventAbort} @@ -56,17 +63,28 @@ func getWorkflowTransitions() []fsm.EventDesc { return []fsm.EventDesc{ {Name: WorkflowEventStart, Src: []string{WorkflowStateNotStarted}, Dst: WorkflowStateCopying}, {Name: WorkflowEventCopyCompleted, Src: []string{WorkflowStateCopying}, Dst: WorkflowStateReplicating}, - {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateOnlyReadsSwitched}, - {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateOnlyWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateOnlyReadsSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + + {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateReplicaReadsSwitched}, + {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateReadsSwitched}, + {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReplicaReadsAndWritesSwitched}, + + {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateRdonlyReadsSwitched}, + {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateReplicaReadsSwitched}, Dst: WorkflowStateReadsSwitched}, + {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateRdonlyReadsAndWritesSwitched}, + + {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateReadsSwitched}, + {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReadsSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + {Name: WorkflowEventComplete, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateCompleted}, {Name: WorkflowEventAbort, Src: []string{WorkflowStateNotStarted, WorkflowStateCopying, - WorkflowStateReplicating, WorkflowStateOnlyReadsSwitched, WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateAborted}, - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateOnlyReadsSwitched}, Dst: WorkflowStateReplicating}, - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateOnlyWritesSwitched}, - {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateOnlyWritesSwitched}, Dst: WorkflowStateReplicating}, - {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateOnlyReadsSwitched}, + WorkflowStateReplicating, WorkflowStateReadsSwitched, WorkflowStateWritesSwitched}, Dst: WorkflowStateAborted}, + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsSwitched}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateWritesSwitched}, + {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateReadsSwitched}, } } @@ -88,24 +106,62 @@ type MoveTablesWorkflow struct { sourceKeyspace, targetKeyspace, tableSpecs, cell, tabletTypes, excludeTables string } +func (mtwf *MoveTablesWorkflow) String() string { + s := fmt.Sprintf("%s Workflow %s from keyspace %s to keyspace %s. Current State: %s\n", + mtwf.wf.typ, mtwf.wf.name, mtwf.targetKeyspace, mtwf.sourceKeyspace, mtwf.wf.wsm.Current()) + return s +} + func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, workflow, sourceKeyspace, targetKeyspace, tableSpecs, cell, tabletTypes string, allTables bool, excludeTables string) (*MoveTablesWorkflow, error) { callbacks := make(map[string]fsm.Callback) mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, sourceKeyspace: sourceKeyspace, targetKeyspace: targetKeyspace, tabletTypes: tabletTypes, tableSpecs: tableSpecs, cell: cell, allTables: allTables, excludeTables: excludeTables} - callbacks["before_Start"] = func(e *fsm.Event) { mtwf.initMoveTables() } + + callbacks["before_"+WorkflowEventStart] = func(e *fsm.Event) { mtwf.initMoveTables() } + callbacks["before_"+WorkflowEventSwitchReads] = func(e *fsm.Event) { mtwf.switchReads() } + callbacks["before_"+WorkflowEventSwitchWrites] = func(e *fsm.Event) { mtwf.switchWrites() } + + ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + if err != nil { + return nil, err + } wf, err := NewWorkflow(workflow, "MoveTables", callbacks) if err != nil { return nil, err } + mtwf.sourceKeyspace = ts.sourceKeyspace + + state := "" + if ts == nil { + state = WorkflowStateNotStarted + } else if ws.RdonlyReadsSwitched && ws.ReplicaReadsSwitched { + state = WorkflowStateReadsSwitched + } else if ws.WritesSwitched { + state = WorkflowStateWritesSwitched + } else { + state = WorkflowStateReplicating //FIXME: copying, error, ... + } + if state == "" { + return nil, fmt.Errorf("workflow is in an inconsistent state: %+v", mtwf) + } + wf.wsm.SetState(state) mtwf.wf = wf return mtwf, nil } +func (mtwf *MoveTablesWorkflow) FireEvent(ev string) error { + return mtwf.wf.wsm.Event(ev) + +} + func (mtwf *MoveTablesWorkflow) Start() error { log.Infof("In MoveTablesWorkflow.Start() for %+v", mtwf) - mtwf.wf.wsm.Event(WorkflowEventStart) + err := mtwf.wf.wsm.Event(WorkflowEventStart) + if err != nil { + return err + } return nil } @@ -115,6 +171,27 @@ func (mtwf *MoveTablesWorkflow) initMoveTables() error { mtwf.cell, mtwf.tabletTypes, mtwf.allTables, mtwf.excludeTables) } +func (mtwf *MoveTablesWorkflow) switchReads() error { + log.Infof("In MoveTablesWorkflow.switchReads() for %+v", mtwf) + _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.targetKeyspace, mtwf.wf.name, + []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) + if err != nil { + return err + } + return nil +} + +func (mtwf *MoveTablesWorkflow) switchWrites() error { + log.Infof("In MoveTablesWorkflow.switchWrites() for %+v", mtwf) + journalId, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.targetKeyspace, mtwf.wf.name, DefaultFilteredReplicationWaitTime, + false, false, true, false) + if err != nil { + return err + } + log.Infof("switchWrites succeeded with journal id %s", journalId) + return nil +} + /* New From 601182c24917dd17091f76d173bb3e6da4128a1b Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sun, 6 Dec 2020 10:54:52 +0100 Subject: [PATCH 05/26] MoveTablesv2: -v2 flag, Refactored tests/code. Added code for Workflow Progress, ReverseReads/Writes Signed-off-by: Rohit Nayak --- ...est.go => resharding_workflows_v2_test.go} | 65 +-- .../vreplication/vreplication_test.go | 20 +- go/vt/vtctl/vtctl.go | 154 ++++--- go/vt/wrangler/traffic_switcher.go | 25 +- go/vt/wrangler/workflow.go | 389 ++++++++++++++---- 5 files changed, 451 insertions(+), 202 deletions(-) rename go/test/endtoend/vreplication/{uniform_switch_read_writes_test.go => resharding_workflows_v2_test.go} (85%) diff --git a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go similarity index 85% rename from go/test/endtoend/vreplication/uniform_switch_read_writes_test.go rename to go/test/endtoend/vreplication/resharding_workflows_v2_test.go index bc4643feead..b5af5ecd460 100644 --- a/go/test/endtoend/vreplication/uniform_switch_read_writes_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -18,6 +18,7 @@ package vreplication import ( "fmt" + "strings" "testing" "vitess.io/vitess/go/test/endtoend/cluster" @@ -42,18 +43,39 @@ var ( ) func moveTablesStart(t *testing.T) { - moveTables(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) + moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") vdiff(t, ksWorkflow) } +func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action string) { + var args []string + args = append(args, "MoveTables", "-v2") + action = strings.ToLower(action) + switch action { + case "start": + args = append(args, "-source", sourceKs, "-tables", tables) + case "switchreads": + case "switchwrites": + } + if cells != "" { + args = append(args, "-cells", cells) + } + ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) + args = append(args, ksWorkflow, action) + if err := vc.VtctlClient.ExecuteCommand(args...) + err != nil { + t.Fatalf("MoveTables command failed with %+v\n", err) + } +} + func moveTablesSwitchReads(t *testing.T) { - moveTables(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchReads) + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchReads) } func moveTablesSwitchWrites(t *testing.T) { - moveTables(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchWrites) + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchWrites) } func validateReadsRouteToSource(t *testing.T) { @@ -100,7 +122,7 @@ func revert(t *testing.T) { clearRoutingRules(t, vc) } -func TestNewMoveTablesWorkflow(t *testing.T) { +func TestMoveTablesV2Workflow(t *testing.T) { vc = setupCluster(t) defer vtgateConn.Close() //defer vc.TearDown() @@ -187,39 +209,18 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias sourceKs := "product" targetKs := "customer" ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) - - if _, err := vc.AddKeyspace(t, cells, "customer", "-80,80-", customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { - t.Fatal(err) - } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { - t.Fatal(err) - } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "80-"), 1); err != nil { - t.Fatal(err) - } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 1); err != nil { - t.Fatal(err) - } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 1); err != nil { - t.Fatal(err) - } tables := "customer" + setupCustomerKeyspace(t) - // Assume we are operating on first cell - defaultCell := cells[0] - custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] - customerTab1 := custKs.Shards["-80"].Tablets["zone1-200"].Vttablet - customerTab2 := custKs.Shards["80-"].Tablets["zone1-300"].Vttablet - - var moveTablesNew = func() { - moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables, wrangler.WorkflowEventStart) + var moveTablesAndWait = func() { + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) catchup(t, customerTab1, workflow, "MoveTables") catchup(t, customerTab2, workflow, "MoveTables") vdiff(t, ksWorkflow) } var switchReadsFollowedBySwitchWrites = func() { - moveTablesNew() + moveTablesAndWait() validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) @@ -232,7 +233,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias revert(t) } var switchWritesFollowedBySwitchReads = func() { - moveTablesNew() + moveTablesAndWait() validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) @@ -246,7 +247,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias } var switchReadsReverseSwitchWritesSwitchReads = func() { - moveTablesNew() + moveTablesAndWait() validateReadsRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, false) @@ -269,7 +270,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias } var switchWritesReverseSwitchReadsSwitchWrites = func() { - moveTablesNew() + moveTablesAndWait() validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 462627fbed9..48cdaf4d853 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -198,7 +198,7 @@ func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAl t.Fatal(err) } tables := "customer" - moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables, "") + moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) // Assume we are operating on first cell defaultCell := cells[0] @@ -436,7 +436,7 @@ func shardOrders(t *testing.T) { tables := "orders" ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) applyVSchema(t, ordersVSchema, targetKs) - moveTables(t, cell, workflow, sourceKs, targetKs, tables, "") + moveTables(t, cell, workflow, sourceKs, targetKs, tables) custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] customerTab1 := custKs.Shards["-80"].Tablets["zone1-200"].Vttablet @@ -468,7 +468,7 @@ func shardMerchant(t *testing.T) { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "merchant", "80-"), 1); err != nil { t.Fatal(err) } - moveTables(t, cell, workflow, sourceKs, targetKs, tables, "") + moveTables(t, cell, workflow, sourceKs, targetKs, tables) merchantKs := vc.Cells[defaultCell.Name].Keyspaces["merchant"] merchantTab1 := merchantKs.Shards["-80"].Tablets["zone1-400"].Vttablet merchantTab2 := merchantKs.Shards["80-"].Tablets["zone1-500"].Vttablet @@ -618,20 +618,12 @@ func catchup(t *testing.T, vttablet *cluster.VttabletProcess, workflow, info str require.NoError(t, err, fmt.Sprintf("%s timed out for workflow %s on tablet %s.%s.%s", info, workflow, vttablet.Keyspace, vttablet.Shard, vttablet.Name)) } -func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables, action string) { - if action == "" || action == "Start" { - if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, "-workflow="+workflow, - "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables, action); err != nil { - t.Fatalf("MoveTables command failed with %+v\n", err) - } - return - } - if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, - fmt.Sprintf("%s.%s", targetKs, workflow), action); err != nil { +func moveTables(t *testing.T, cell, workflow, sourceKs, targetKs, tables string) { + if err := vc.VtctlClient.ExecuteCommand("MoveTables", "-cells="+cell, "-workflow="+workflow, + "-tablet_types="+"master,replica,rdonly", sourceKs, targetKs, tables); err != nil { t.Fatalf("MoveTables command failed with %+v\n", err) } } - func applyVSchema(t *testing.T, vschema, keyspace string) { err := vc.VtctlClient.ExecuteCommand("ApplyVSchema", "-vschema", vschema, keyspace) require.NoError(t, err) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 7b8ffbb9c25..4d73fb66e2b 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1909,95 +1909,123 @@ func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.F return wr.Reshard(ctx, keyspace, workflow, source, target, *skipSchemaCopy, *cells, *tabletTypes) } -func parseMoveTablesArgs(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, allTables *bool) ( - action, source, target, tableSpecs, workflow string, err error) { - nArgs := subFlags.NArg() +func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + for _, arg := range args { + if arg == "-v2" { + wr.Logger().Infof("*** Using MoveTables v2 flow ***") + return commandMoveTables2(ctx, wr, subFlags, args) + } + } + workflow := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") + cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") + tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") + allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") + excludes := subFlags.String("exclude", "", "Tables to exclude (comma-separated) if -all is specified") + + if err := subFlags.Parse(args); err != nil { + return err + } + if *workflow == "" { + return fmt.Errorf("a workflow name must be specified") + } + if !*allTables && len(*excludes) > 0 { + return fmt.Errorf("you can only specify tables to exclude if all tables are to be moved (with -all)") + } if *allTables { - switch nArgs { - case 2: - case 3: - action = subFlags.Arg(2) - default: - return "", "", "", "", "", - fmt.Errorf("following arguments are required: source_keyspace, target_keyspace [,action]") + if subFlags.NArg() != 2 { + return fmt.Errorf("two arguments are required: source_keyspace, target_keyspace") } - source = subFlags.Arg(0) - target = subFlags.Arg(1) } else { - if nArgs == 2 { //expect MoveTables targetKeyspace.workflow [SwitchReads|SwitchReads|Complete|Abort] - ksWorkflow := subFlags.Arg(0) - action = subFlags.Arg(1) - if action == wrangler.WorkflowEventStart || !wr.IsUserFacingEvent(action) { - return "", "", "", "", "", - fmt.Errorf("unexpected workflow action %s", action) - } - target, workflow, err = splitKeyspaceWorkflow(ksWorkflow) - if err != nil { - return "", "", "", "", "", err - } - _, err = wr.TopoServer().GetKeyspace(ctx, target) - if err != nil { - wr.Logger().Errorf("keyspace %s not found", target) - } - } else { - switch nArgs { - case 3: - case 4: - action = subFlags.Arg(3) - default: - return "", "", "", "", "", - fmt.Errorf("following arguments are required: source_keyspace, target_keyspace, tableSpecs [,action]") - } - source = subFlags.Arg(0) - target = subFlags.Arg(1) - tableSpecs = subFlags.Arg(2) - if action != wrangler.WorkflowEventStart { - return "", "", "", "", "", - fmt.Errorf("expected workflow action %s", wrangler.WorkflowEventStart) - } + if subFlags.NArg() != 3 { + return fmt.Errorf("three arguments are required: source_keyspace, target_keyspace, tableSpecs") } } - return action, source, target, tableSpecs, workflow, nil + + source := subFlags.Arg(0) + target := subFlags.Arg(1) + tableSpecs := subFlags.Arg(2) + return wr.MoveTables(ctx, *workflow, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) } -func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { - workflowName := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") +func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + sourceKeyspace := subFlags.String("source", "", "Source keyspace") + tables := subFlags.String("tables", "", "A table spec or a list of tables") cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") excludes := subFlags.String("exclude", "", "Tables to exclude (comma-separated) if -all is specified") + dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") + timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") + reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") + _ = subFlags.Bool("v2", true, "") + _, _, _ = dryRun, timeout, reverseReplication + _, _, _ = cells, tabletTypes, excludes if err := subFlags.Parse(args); err != nil { return err } - action, source, target, tableSpecs, wfName, err := parseMoveTablesArgs(ctx, wr, subFlags, allTables) + if subFlags.NArg() != 2 { + return fmt.Errorf("two arguments are needed: action, keyspace.workflow") + } + action := subFlags.Arg(0) // TODO: actions will be SubCommands in the new cobra based vtctld implementation + ksWorkflow := subFlags.Arg(1) + target, workflow, err := splitKeyspaceWorkflow(ksWorkflow) if err != nil { return err } - if action == "" { - log.Infof("Calling deprecated MoveTables") - if *workflowName == "" { - return fmt.Errorf("a workflow name must be specified") + _, err = wr.TopoServer().GetKeyspace(ctx, target) + if err != nil { + wr.Logger().Errorf("keyspace %s not found", target) + } + + mtp := &wrangler.MoveTablesParams{ + TargetKeyspace: target, + Workflow: workflow, + DryRun: *dryRun, + } + + //TODO: check if invalid parameters were passed in that do not apply to this action + originalAction := action + action = strings.ToLower(action) // allow users to input action in a case-insensitive manner + switch action { + case "start": + if *sourceKeyspace == "" { + return fmt.Errorf("source keyspace is not specified") } - if !*allTables && len(*excludes) > 0 { - return fmt.Errorf("you can only specify tables to exclude if all tables are to be moved (with -all)") + if !*allTables && *tables == "" { + return fmt.Errorf("no tables specified to move") } - return wr.MoveTables(ctx, *workflowName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) + mtp.SourceKeyspace = *sourceKeyspace + mtp.Tables = *tables + mtp.AllTables = *allTables + mtp.ExcludeTables = *excludes + mtp.TabletTypes = *tabletTypes + case "switchreads", "switchrdonlyreads", "switchreplicareads": + mtp.Cells = *cells + mtp.TabletTypes = *tabletTypes + case "switchwrites", "reversewrites": + mtp.Timeout = *timeout + mtp.EnableReverseReplication = *reverseReplication } - log.Infof("Calling new MoveTables workflow") - if action == wrangler.WorkflowEventStart { - wfName = *workflowName - } - mtwf, err := wr.NewMoveTablesWorkflow(ctx, wfName, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) + + wf, err := wr.NewMoveTablesWorkflow(ctx, mtp) if err != nil { - log.Warningf("NewMoveTablesWorkflow returned error %+v", mtwf) + log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) return err } - if err := mtwf.FireEvent(action); err != nil { - log.Warningf("NewMoveTablesWorkflow %s error: %+v", action, mtwf) + if action == "visualize" { // outputs a GraphViz of the workflow's state machine. Remove? + wr.Logger().Printf("%s", wf.Visualize()) + return nil + } + if !wf.IsActionValid(action) { + return fmt.Errorf("invalid Action: %s. Workflow %s.%s is currently in state: %s", + originalAction, target, workflow, wf.CurrentState()) + } + if err := wf.FireEvent(action); err != nil { + log.Warningf("NewMoveTablesWorkflow %s error: %+v", action, wf) return err } - wr.Logger().Printf("MoveTables %s was successful\n\n%s", action, mtwf) + wr.Logger().Printf("MoveTables %s was successful\n\n%s", action, wf) return nil } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index cd3a2550dd9..3e059c480b8 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -48,7 +48,7 @@ import ( const ( frozenStr = "FROZEN" - ErrorNoStreams = "no streams found in keyspace %s for: %s" + errorNoStreams = "no streams found in keyspace %s for: %s" ) // TrafficSwitchDirection specifies the switching direction. @@ -147,7 +147,7 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) if err != nil { - if err.Error() == fmt.Sprintf(ErrorNoStreams, targetKeyspace, workflow) { + if err.Error() == fmt.Sprintf(errorNoStreams, targetKeyspace, workflow) { return nil, nil, nil } wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) @@ -222,14 +222,17 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl } // SwitchReads is a generic way of switching read traffic for a resharding workflow. -func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedTypes []topodatapb.TabletType, cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { +func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedTypes []topodatapb.TabletType, + cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { + + log.Infof("SwitchReads: targetKeyspace %s, direction %d", targetKeyspace, direction) ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) if err != nil { wr.Logger().Errorf("getWorkflowState failed: %v", err) return nil, err } if ts == nil { - errorMsg := fmt.Sprintf("workflow %s not found in keyspace", workflow, targetKeyspace) + errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) wr.Logger().Errorf(errorMsg) return nil, fmt.Errorf(errorMsg) } @@ -296,25 +299,19 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st // SwitchWrites is a generic way of migrating write traffic for a resharding workflow. func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow string, timeout time.Duration, cancel, reverse, reverseReplication bool, dryRun bool) (journalID int64, dryRunResults *[]string, err error) { ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) + _ = ws if err != nil { wr.Logger().Errorf("getWorkflowState failed: %v", err) return 0, nil, err } if ts == nil { if ts == nil { - errorMsg := fmt.Sprintf("workflow %s not found in keyspace", workflow, targetKeyspace) + errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) wr.Logger().Errorf(errorMsg) return 0, nil, fmt.Errorf(errorMsg) } } - if reverse { - if !ws.WritesSwitched { - return 0, nil, fmt.Errorf("-reverse was passed but writes have not yet been switched for %s.%s", ws.TargetKeyspace, ws.Workflow) - } - workflow = reverseName(workflow) - } - var sw iswitcher if dryRun { sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} @@ -666,7 +663,7 @@ func (wr *Wrangler) buildTargets(ctx context.Context, targetKeyspace, workflow s } } if len(targets) == 0 { - err2 := fmt.Errorf(ErrorNoStreams, targetKeyspace, workflow) + err2 := fmt.Errorf(errorNoStreams, targetKeyspace, workflow) return nil, err2 } tinfo := &targetInfo{targets: targets, frozen: frozen, optCells: optCells, optTabletTypes: optTabletTypes} @@ -798,10 +795,12 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, tt := strings.ToLower(servedType.String()) for _, table := range ts.tables { if direction == DirectionForward { + log.Infof("Route direction forward") rules[table+"@"+tt] = []string{ts.targetKeyspace + "." + table} rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} } else { + log.Infof("Route direction backwards") //delete(rules, table+"@"+tt) //delete(rules, ts.targetKeyspace+"."+table+"@"+tt) //delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 6b0537df14d..5bb74e6c7c7 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -3,32 +3,46 @@ package wrangler import ( "context" "fmt" + "strings" + "time" + "vitess.io/vitess/go/sqltypes" topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/topo/topoproto" + "vitess.io/vitess/go/vt/vtgate/evalengine" "github.com/looplab/fsm" "vitess.io/vitess/go/vt/log" ) +/* + TODO + * Actions: Abort, Complete, Status + * Unit Tests (lots of!) + * expand e2e for testing all possible transitions + + * implement Reshard same as MoveTables! + +*/ + +// Possible workflow states const ( WorkflowStateNotStarted = "Not Started" - WorkflowStateCopying = "Copying" - WorkflowStateReplicating = "Replicating" + WorkflowStateStarted = "Replicating, Reads and Writes Not Switched" WorkflowStateReplicaReadsSwitched = "Replica Reads Switched" WorkflowStateRdonlyReadsSwitched = "Rdonly Reads Switched" WorkflowStateReadsSwitched = "Reads Switched" WorkflowStateWritesSwitched = "Writes Switched" - WorkflowStateReplicaReadsAndWritesSwitched = "Replica Reads and Writes Switched" - WorkflowStateRdonlyReadsAndWritesSwitched = "Rdonly Reads and Writes Switched" + WorkflowStateReplicaReadsAndWritesSwitched = "Writes and Replica Reads Switched" + WorkflowStateRdonlyReadsAndWritesSwitched = "Writes and Rdonly Reads Switched" WorkflowStateReadsAndWritesSwitched = "Both Reads and Writes Switched" WorkflowStateCompleted = "Completed" WorkflowStateAborted = "Aborted" - WorkflowStateError = "Error" ) +// Possible events that cause workflow state transitions const ( WorkflowEventStart = "Start" - WorkflowEventCopyCompleted = "CopyCompleted" WorkflowEventSwitchReads = "SwitchReads" WorkflowEventSwitchReplicaReads = "SwitchReplicaReads" WorkflowEventSwitchRdonlyReads = "SwitchRdonlyReads" @@ -40,54 +54,117 @@ const ( ) type Workflow struct { - name string - wsm *fsm.FSM - typ string - isReplicating bool - isRunning bool - hasErrors bool + name string + wsm *fsm.FSM + typ string + hasErrors bool } -func (wr *Wrangler) IsUserFacingEvent(ev string) bool { - allUserFacingEvents := []string{WorkflowEventStart, WorkflowEventSwitchReads, WorkflowEventSwitchWrites, - WorkflowEventComplete, WorkflowEventAbort} - for _, ev2 := range allUserFacingEvents { - if ev2 == ev { - return true - } +var eventNameMap map[string]string + +func init() { + eventNameMap = make(map[string]string) + transitions := getWorkflowTransitions() + for _, transition := range transitions { + eventNameMap[strings.ToLower(transition.Name)] = transition.Name } - return false } +// region FSM setup + func getWorkflowTransitions() []fsm.EventDesc { return []fsm.EventDesc{ - {Name: WorkflowEventStart, Src: []string{WorkflowStateNotStarted}, Dst: WorkflowStateCopying}, - {Name: WorkflowEventCopyCompleted, Src: []string{WorkflowStateCopying}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventStart, Src: []string{WorkflowStateNotStarted}, Dst: WorkflowStateStarted}, - {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateReplicaReadsSwitched}, + {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateReplicaReadsSwitched}, {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateReadsSwitched}, {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReplicaReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateRdonlyReadsSwitched}, + {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateRdonlyReadsSwitched}, {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateReplicaReadsSwitched}, Dst: WorkflowStateReadsSwitched}, {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateRdonlyReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateReadsSwitched}, + {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateReadsSwitched}, {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicating}, Dst: WorkflowStateWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateWritesSwitched}, {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReadsSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicaReadsSwitched}, Dst: WorkflowStateReplicaReadsAndWritesSwitched}, + {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateRdonlyReadsAndWritesSwitched}, {Name: WorkflowEventComplete, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateCompleted}, - {Name: WorkflowEventAbort, Src: []string{WorkflowStateNotStarted, WorkflowStateCopying, - WorkflowStateReplicating, WorkflowStateReadsSwitched, WorkflowStateWritesSwitched}, Dst: WorkflowStateAborted}, - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsSwitched}, Dst: WorkflowStateReplicating}, - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateWritesSwitched}, - {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReplicating}, + {Name: WorkflowEventAbort, Src: []string{WorkflowStateNotStarted, + WorkflowStateStarted, WorkflowStateReadsSwitched, WorkflowStateWritesSwitched}, Dst: WorkflowStateAborted}, + + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsSwitched, + WorkflowStateReplicaReadsSwitched, WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateStarted}, + {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched, + WorkflowStateReplicaReadsAndWritesSwitched, WorkflowStateRdonlyReadsAndWritesSwitched}, Dst: WorkflowStateWritesSwitched}, + + {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateStarted}, {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateReadsSwitched}, } } +func (mtwf *MoveTablesWorkflow) getCallbacks() map[string]fsm.Callback { + callbacks := make(map[string]fsm.Callback) + callbacks["before_"+WorkflowEventStart] = func(e *fsm.Event) { + if err := mtwf.initMoveTables(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventSwitchReads] = func(e *fsm.Event) { + mtwf.params.TabletTypes = "replica,rdonly" + if err := mtwf.switchReads(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventSwitchReplicaReads] = func(e *fsm.Event) { + mtwf.params.TabletTypes = "replica" + if err := mtwf.switchReads(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventSwitchRdonlyReads] = func(e *fsm.Event) { + mtwf.params.TabletTypes = "rdonly" + if err := mtwf.switchReads(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventSwitchWrites] = func(e *fsm.Event) { + if err := mtwf.switchWrites(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventReverseReads] = func(e *fsm.Event) { + var tabletTypes []string + if mtwf.ws.ReplicaReadsSwitched && mtwf.ws.RdonlyReadsSwitched { + tabletTypes = append(tabletTypes, "replica", "rdonly") + } else if mtwf.ws.ReplicaReadsSwitched { + tabletTypes = append(tabletTypes, "replica") + } else if mtwf.ws.RdonlyReadsSwitched { + tabletTypes = append(tabletTypes, "rdonly") + + } else { + e.Cancel(fmt.Errorf("reads have not been switched for %s.%s", mtwf.params.TargetKeyspace, mtwf.params.Workflow)) + return + } + mtwf.params.TabletTypes = strings.Join(tabletTypes, ",") + mtwf.params.Direction = DirectionBackward + if err := mtwf.switchReads(); err != nil { + e.Cancel(err) + } + } + callbacks["before_"+WorkflowEventReverseWrites] = func(e *fsm.Event) { + mtwf.params.Direction = DirectionBackward + if err := mtwf.switchWrites(); err != nil { + e.Cancel(err) + } + } + + return callbacks +} + func NewWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*Workflow, error) { wf := &Workflow{ name: name, typ: typ, @@ -97,84 +174,139 @@ func NewWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*Workflow return wf, nil } -type MoveTablesWorkflow struct { - ctx context.Context - wf *Workflow - allTables bool - wr *Wrangler +// endregion + +// region Move Tables Public API - sourceKeyspace, targetKeyspace, tableSpecs, cell, tabletTypes, excludeTables string +type MoveTablesWorkflow struct { + ctx context.Context + wf *Workflow + wr *Wrangler + action string + params *MoveTablesParams + ts *trafficSwitcher + ws *workflowState } func (mtwf *MoveTablesWorkflow) String() string { s := fmt.Sprintf("%s Workflow %s from keyspace %s to keyspace %s. Current State: %s\n", - mtwf.wf.typ, mtwf.wf.name, mtwf.targetKeyspace, mtwf.sourceKeyspace, mtwf.wf.wsm.Current()) + mtwf.wf.typ, mtwf.wf.name, mtwf.params.SourceKeyspace, mtwf.params.TargetKeyspace, mtwf.wf.wsm.Current()) return s } -func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, workflow, sourceKeyspace, targetKeyspace, tableSpecs, - cell, tabletTypes string, allTables bool, excludeTables string) (*MoveTablesWorkflow, error) { - callbacks := make(map[string]fsm.Callback) - mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, sourceKeyspace: sourceKeyspace, targetKeyspace: targetKeyspace, - tabletTypes: tabletTypes, tableSpecs: tableSpecs, cell: cell, - allTables: allTables, excludeTables: excludeTables} +type MoveTablesParams struct { + Workflow, SourceKeyspace, TargetKeyspace, Tables string + Cells, TabletTypes, ExcludeTables string + EnableReverseReplication, DryRun, AllTables bool - callbacks["before_"+WorkflowEventStart] = func(e *fsm.Event) { mtwf.initMoveTables() } - callbacks["before_"+WorkflowEventSwitchReads] = func(e *fsm.Event) { mtwf.switchReads() } - callbacks["before_"+WorkflowEventSwitchWrites] = func(e *fsm.Event) { mtwf.switchWrites() } + Timeout time.Duration + Direction TrafficSwitchDirection +} - ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) +func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTablesParams) (*MoveTablesWorkflow, error) { + log.Infof("NewMoveTablesWorkflow with params %+v", params) + mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, params: params} + ts, ws, err := wr.getWorkflowState(ctx, params.TargetKeyspace, params.Workflow) if err != nil { return nil, err } - wf, err := NewWorkflow(workflow, "MoveTables", callbacks) + log.Infof("Workflow state is %+v", ws) + wf, err := NewWorkflow(params.Workflow, "MoveTables", mtwf.getCallbacks()) if err != nil { return nil, err } - mtwf.sourceKeyspace = ts.sourceKeyspace - + if ts != nil { //Other than on Start we need to get SourceKeyspace from the workflow + mtwf.params.SourceKeyspace = ts.sourceKeyspace + mtwf.ts = ts + } + mtwf.ws = ws state := "" if ts == nil { state = WorkflowStateNotStarted + } else if ws.WritesSwitched { + if ws.ReplicaReadsSwitched && ws.RdonlyReadsSwitched { + state = WorkflowStateReadsAndWritesSwitched + } else if ws.RdonlyReadsSwitched { + state = WorkflowStateRdonlyReadsAndWritesSwitched + } else if ws.ReplicaReadsSwitched { + state = WorkflowStateReplicaReadsAndWritesSwitched + } else { + state = WorkflowStateWritesSwitched + } } else if ws.RdonlyReadsSwitched && ws.ReplicaReadsSwitched { state = WorkflowStateReadsSwitched - } else if ws.WritesSwitched { - state = WorkflowStateWritesSwitched + } else if ws.RdonlyReadsSwitched { + state = WorkflowStateRdonlyReadsSwitched + } else if ws.ReplicaReadsSwitched { + state = WorkflowStateReplicaReadsSwitched } else { - state = WorkflowStateReplicating //FIXME: copying, error, ... + state = WorkflowStateStarted } if state == "" { return nil, fmt.Errorf("workflow is in an inconsistent state: %+v", mtwf) } + log.Infof("Setting workflow state to %s", state) wf.wsm.SetState(state) mtwf.wf = wf return mtwf, nil } func (mtwf *MoveTablesWorkflow) FireEvent(ev string) error { + log.Infof("Firing Event %s", ev) + ev = eventNameMap[strings.ToLower(ev)] + log.Infof("Firing Event, converted %s", ev) return mtwf.wf.wsm.Event(ev) +} +func (mtwf *MoveTablesWorkflow) IsActionValid(ev string) bool { + ev = eventNameMap[strings.ToLower(ev)] + log.Infof("IsActionValid for %s, %t", ev, mtwf.wf.wsm.Can(ev)) + return mtwf.wf.wsm.Can(ev) } -func (mtwf *MoveTablesWorkflow) Start() error { - log.Infof("In MoveTablesWorkflow.Start() for %+v", mtwf) - err := mtwf.wf.wsm.Event(WorkflowEventStart) - if err != nil { - return err +func (mtwf *MoveTablesWorkflow) CurrentState() string { + return mtwf.wf.wsm.Current() +} + +func (mtwf *MoveTablesWorkflow) Visualize() string { + return fsm.Visualize(mtwf.wf.wsm) +} + +// endregion + +// region Helpers + +func (mtwf *MoveTablesWorkflow) getCellsAsArray() []string { + if mtwf.params.Cells != "" { + return strings.Split(mtwf.params.Cells, ",") } return nil } +func (mtwf *MoveTablesWorkflow) getTabletTypes() []topodatapb.TabletType { + tabletTypesArr := strings.Split(mtwf.params.TabletTypes, ",") + var tabletTypes []topodatapb.TabletType + for _, tabletType := range tabletTypesArr { + servedType, _ := topoproto.ParseTabletType(tabletType) + tabletTypes = append(tabletTypes, servedType) + } + return tabletTypes +} + +// endregion + +// region Core Actions + func (mtwf *MoveTablesWorkflow) initMoveTables() error { log.Infof("In MoveTablesWorkflow.initMoveTables() for %+v", mtwf) - return mtwf.wr.MoveTables(mtwf.ctx, mtwf.wf.name, mtwf.sourceKeyspace, mtwf.targetKeyspace, mtwf.tableSpecs, - mtwf.cell, mtwf.tabletTypes, mtwf.allTables, mtwf.excludeTables) + return mtwf.wr.MoveTables(mtwf.ctx, mtwf.wf.name, mtwf.params.SourceKeyspace, mtwf.params.TargetKeyspace, mtwf.params.Tables, + mtwf.params.Cells, mtwf.params.TabletTypes, mtwf.params.AllTables, mtwf.params.ExcludeTables) } func (mtwf *MoveTablesWorkflow) switchReads() error { log.Infof("In MoveTablesWorkflow.switchReads() for %+v", mtwf) - _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.targetKeyspace, mtwf.wf.name, - []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) + _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.wf.name, mtwf.getTabletTypes(), + mtwf.getCellsAsArray(), mtwf.params.Direction, false) if err != nil { return err } @@ -183,8 +315,15 @@ func (mtwf *MoveTablesWorkflow) switchReads() error { func (mtwf *MoveTablesWorkflow) switchWrites() error { log.Infof("In MoveTablesWorkflow.switchWrites() for %+v", mtwf) - journalId, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.targetKeyspace, mtwf.wf.name, DefaultFilteredReplicationWaitTime, - false, false, true, false) + if mtwf.params.Direction == DirectionBackward { + keyspace := mtwf.params.SourceKeyspace + mtwf.params.SourceKeyspace = mtwf.params.TargetKeyspace + mtwf.params.TargetKeyspace = keyspace + mtwf.params.Workflow = reverseName(mtwf.params.Workflow) + log.Infof("In MoveTablesWorkflow.switchWrites(reverse) for %+v", mtwf) + } + journalId, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow, mtwf.params.Timeout, + false, mtwf.params.Direction == DirectionBackward, mtwf.params.EnableReverseReplication, false) if err != nil { return err } @@ -192,22 +331,112 @@ func (mtwf *MoveTablesWorkflow) switchWrites() error { return nil } -/* - -New -GetState +// endregion -Start -Pause -Restart +// region Copy Progress +type TableCopyProgress struct { + targetRowCount, targetTableSize int64 + sourceRowCount, sourceTableSize int64 +} -SwitchReads -ResetReads -SwitchWrites -ResetWrites +type CopyProgress map[string]*TableCopyProgress + +func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { + ctx := context.Background() + getTablesQuery := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id == cs.vrepl_id and vr.id = %d" + getRowCountQuery := "select table_name, table_rows, data_length from information_schema.tables where table_schema = database() and table_name in (%s)" + tables := make(map[string]bool) + sourceMasters := make(map[*topodatapb.TabletAlias]bool) + for shard, target := range mtwf.ts.targets { + log.Infof("GCP: %s, %v", shard, target) + for id, bls := range target.sources { + query := fmt.Sprintf(getTablesQuery, id) + p3qr, err := mtwf.wr.tmc.VReplicationExec(ctx, target.master.Tablet, query) + if err != nil { + return nil, err + } + if len(p3qr.Rows) < 1 { + continue + } + qr := sqltypes.Proto3ToResult(p3qr) + for i := 0; i < len(p3qr.Rows); i++ { + tables[qr.Rows[0][0].String()] = true + } + sourcesi, err := mtwf.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) + if err != nil { + return nil, err + } + sourceMasters[sourcesi.MasterAlias] = true + } + } + tableList := "" + targetRowCounts := make(map[string]int64) + sourceRowCounts := make(map[string]int64) + targetTableSizes := make(map[string]int64) + sourceTableSizes := make(map[string]int64) + + for table := range tables { + if tableList != "" { + tableList += "," + } + tableList += encodeString(table) + targetRowCounts[table] = 0 + sourceRowCounts[table] = 0 + targetTableSizes[table] = 0 + sourceTableSizes[table] = 0 + } + log.Infof("table list is %s", tableList) + query := fmt.Sprintf(getRowCountQuery, tableList) + log.Infof("query is %s", query) + + var getTableMetrics = func(tablet *topodatapb.Tablet, rowCounts *map[string]int64, tableSizes *map[string]int64) error { + p3qr, err := mtwf.wr.tmc.ExecuteFetchAsDba(ctx, tablet, true, []byte(query), len(tables), false, false) + if err != nil { + return err + } + qr := sqltypes.Proto3ToResult(p3qr) + for i := 0; i < len(qr.Rows); i++ { + table := qr.Rows[0][0].String() + rowCount, err := evalengine.ToInt64(qr.Rows[0][1]) + if err != nil { + return err + } + tableSize, err := evalengine.ToInt64(qr.Rows[0][2]) + if err != nil { + return err + } + targetRowCounts[table] += rowCount + targetTableSizes[table] += tableSize + } + return nil + } + for _, target := range mtwf.ts.targets { + tablet := target.master.Tablet + if err := getTableMetrics(tablet, &targetRowCounts, &targetTableSizes); err != nil { + return nil, err + } + } + for source := range sourceMasters { + ti, err := mtwf.wr.ts.GetTablet(ctx, source) + tablet := ti.Tablet + if err != nil { + return nil, err + } + if err := getTableMetrics(tablet, &sourceRowCounts, &sourceTableSizes); err != nil { + return nil, err + } + } -GetProgress -Abort -Finalize + copyProgress := CopyProgress{} + for table, rowCount := range targetRowCounts { + copyProgress[table] = &TableCopyProgress{ + targetRowCount: rowCount, + targetTableSize: targetTableSizes[table], + sourceRowCount: sourceRowCounts[table], + sourceTableSize: sourceTableSizes[table], + } + } + return ©Progress, nil +} -*/ +// endregion From 0e2016a20fad041ae09779508b9d103b86066392 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Tue, 15 Dec 2020 19:03:53 +0100 Subject: [PATCH 06/26] Test reverse flows, show copy progress Signed-off-by: Rohit Nayak --- go/cmd/vtctlclient/main.go | 6 +- .../resharding_workflows_v2_test.go | 61 +++++++++-- go/vt/vtctl/vtctl.go | 68 +++++++++++- go/vt/wrangler/keyspace.go | 4 +- go/vt/wrangler/traffic_switcher.go | 4 +- go/vt/wrangler/workflow.go | 102 ++++++++++++------ test/local_example.sh | 3 +- 7 files changed, 197 insertions(+), 51 deletions(-) diff --git a/go/cmd/vtctlclient/main.go b/go/cmd/vtctlclient/main.go index 339be1ac84d..7e44d2313b4 100644 --- a/go/cmd/vtctlclient/main.go +++ b/go/cmd/vtctlclient/main.go @@ -19,7 +19,9 @@ package main import ( "errors" "flag" + "fmt" "os" + "strings" "time" "golang.org/x/net/context" @@ -64,7 +66,9 @@ func main() { logutil.LogEvent(logger, e) }) if err != nil { - log.Error(err) + errStr := strings.Replace(err.Error(), "remote error: ", "", -1) + fmt.Printf("%s Error: %s\n", flag.Arg(0), errStr) + //log.Error(err) os.Exit(1) } } diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index b5af5ecd460..25efd2b1b02 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -63,21 +63,37 @@ func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, acti args = append(args, "-cells", cells) } ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) - args = append(args, ksWorkflow, action) - if err := vc.VtctlClient.ExecuteCommand(args...) - err != nil { + args = append(args, action, ksWorkflow) + if err := vc.VtctlClient.ExecuteCommand(args...); err != nil { t.Fatalf("MoveTables command failed with %+v\n", err) } } -func moveTablesSwitchReads(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchReads) +func moveTablesSwitchReads(t *testing.T, typ string) { + var action string + switch typ { + case "replica": + action = wrangler.WorkflowEventSwitchReplicaReads + case "rdonly": + action = wrangler.WorkflowEventSwitchRdonlyReads + default: + action = wrangler.WorkflowEventSwitchReads + } + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", action) } func moveTablesSwitchWrites(t *testing.T) { moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchWrites) } +func moveTablesReverseWrites(t *testing.T) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventReverseWrites) +} + +func moveTablesReverseReads(t *testing.T) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventReverseReads) +} + func validateReadsRouteToSource(t *testing.T) { require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productReplicaTab, "product@replica", query, query)) } @@ -129,14 +145,45 @@ func TestMoveTablesV2Workflow(t *testing.T) { setupCustomerKeyspace(t) moveTablesStart(t) - + printRoutingRules(t, vc, "After MoveTables Started") validateReadsRouteToSource(t) - moveTablesSwitchReads(t) + validateWritesRouteToSource(t) + + moveTablesSwitchReads(t, "") + printRoutingRules(t, vc, "After SwitchReads") + validateReadsRouteToTarget(t) + validateWritesRouteToSource(t) + + moveTablesSwitchWrites(t) + printRoutingRules(t, vc, "After SwitchWrites") validateReadsRouteToTarget(t) + validateWritesRouteToTarget(t) + moveTablesReverseReads(t) + printRoutingRules(t, vc, "After ReverseReads") + validateReadsRouteToSource(t) + validateWritesRouteToTarget(t) + + moveTablesReverseWrites(t) + validateReadsRouteToSource(t) validateWritesRouteToSource(t) + printRoutingRules(t, vc, "After ReverseWrites") + moveTablesSwitchWrites(t) + validateReadsRouteToSource(t) + validateWritesRouteToTarget(t) + + moveTablesReverseWrites(t) + validateReadsRouteToSource(t) + validateWritesRouteToSource(t) + + moveTablesSwitchReads(t, "") validateReadsRouteToTarget(t) + validateWritesRouteToSource(t) + + moveTablesReverseReads(t) + validateReadsRouteToSource(t) + validateWritesRouteToSource(t) } func setupCluster(t *testing.T) *VitessCluster { diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 4d73fb66e2b..1cb9346f51f 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1912,7 +1912,7 @@ func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.F func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { for _, arg := range args { if arg == "-v2" { - wr.Logger().Infof("*** Using MoveTables v2 flow ***") + fmt.Printf("*** Using MoveTables v2 flow ***") return commandMoveTables2(ctx, wr, subFlags, args) } } @@ -2013,19 +2013,77 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) return err } - if action == "visualize" { // outputs a GraphViz of the workflow's state machine. Remove? + + printDetails := func() error { + s := "" + res, err := wr.ShowWorkflow(ctx, workflow, target) + if err != nil { + return err + } + s += "Following vreplication streams are running for this workflow:\n\n" + for ksShard := range res.ShardStatuses { + statuses := res.ShardStatuses[ksShard].MasterReplicationStatuses + for _, st := range statuses { + //status.State, status.TransactionTimestamp, status.TimeUpdated, status.Tablet, status.ID, status.Message, status.Pos + now := time.Now().Nanosecond() + msg := "" + updateLag := int64(now) - st.TimeUpdated + if updateLag > 0*1e9 { + msg += " Vstream may not be running." + } + txLag := int64(now) - st.TransactionTimestamp + msg += fmt.Sprintf(" VStream Lag: %ds", txLag/1e9) + s += fmt.Sprintf("Stream %s (id=%d) :: Status: %s.%s\n", ksShard, st.ID, st.State, msg) + } + } + wr.Logger().Printf("\n%s\n\n", s) + return nil + } + switch action { + case "show": + wr.Logger().Printf("\n%s.\nAvailable actions are: %s\n", wf.String(), wf.AvailableActions()) + return printDetails() + case "progress": + wr.Logger().Printf("%s.\nAvailable actions are: %s\n", wf.String(), wf.AvailableActions()) + copyProgress, err := wf.GetCopyProgress() + if err != nil { + return err + } + if copyProgress == nil { + wr.Logger().Printf("\nCopy Completed.\n") + } else { + wr.Logger().Printf("\nCopy Progress (approx.):\n") + var tables []string + for table := range *copyProgress { + tables = append(tables, table) + } + sort.Strings(tables) + s := "" + var progress wrangler.TableCopyProgress + for table := range *copyProgress { + progress = *(*copyProgress)[table] + rowCountPct := 100.0 * progress.TargetRowCount / progress.SourceRowCount + tableSizePct := 100.0 * progress.TargetTableSize / progress.SourceTableSize + s += fmt.Sprintf("%s: rows copied %d/%d (%d%%), size copied %d/%d (%d%%)\n", + table, progress.TargetRowCount, progress.SourceRowCount, rowCountPct, + progress.TargetTableSize, progress.SourceTableSize, tableSizePct) + } + wr.Logger().Printf("\n%s\n", s) + } + return printDetails() + case "visualize": wr.Logger().Printf("%s", wf.Visualize()) return nil } if !wf.IsActionValid(action) { - return fmt.Errorf("invalid Action: %s. Workflow %s.%s is currently in state: %s", - originalAction, target, workflow, wf.CurrentState()) + return fmt.Errorf("invalid Action: %s. Workflow %s.%s is currently in state: %s.\nAvailable actions are: %s", + originalAction, target, workflow, wf.CurrentState(), wf.AvailableActions()) } if err := wf.FireEvent(action); err != nil { log.Warningf("NewMoveTablesWorkflow %s error: %+v", action, wf) return err } - wr.Logger().Printf("MoveTables %s was successful\n\n%s", action, wf) + wr.Logger().Printf("MoveTables %s was successful\n\n%s\n\n", action, wf) return nil } diff --git a/go/vt/wrangler/keyspace.go b/go/vt/wrangler/keyspace.go index d6ecaee2114..87b7324d9e6 100644 --- a/go/vt/wrangler/keyspace.go +++ b/go/vt/wrangler/keyspace.go @@ -121,8 +121,8 @@ func (wr *Wrangler) validateNewWorkflow(ctx context.Context, keyspace, workflow fmt.Sprintf("workflow %s already exists in keyspace %s on tablet %d", workflow, keyspace, master.Alias.Uid), }, { fmt.Sprintf("select 1 from _vt.vreplication where db_name=%s and message='FROZEN'", encodeString(master.DbName())), - fmt.Sprintf("workflow %s.%s is in a frozen state on tablet %d, please review and delete it before resharding", - keyspace, workflow, master.Alias.Uid), + fmt.Sprintf("found previous frozen workflow on tablet %d, please review and delete it first before creating a new workflow", + master.Alias.Uid), }} for _, validation := range validations { p3qr, err := wr.tmc.VReplicationExec(ctx, master.Tablet, validation.query) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 3e059c480b8..27b79a93476 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -527,7 +527,7 @@ func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, wo optCells: optCells, optTabletTypes: optTabletTypes, } - ts.wr.Logger().Infof("Migration ID for workflow %s: %d", workflow, ts.id) + log.Infof("Migration ID for workflow %s: %d", workflow, ts.id) // Build the sources for _, target := range targets { @@ -1447,7 +1447,7 @@ func (wr *Wrangler) getRoutingRules(ctx context.Context) (map[string][]string, e } func (wr *Wrangler) saveRoutingRules(ctx context.Context, rules map[string][]string) error { - wr.Logger().Infof("Saving routing rules %v\n", rules) + log.Infof("Saving routing rules %v\n", rules) rrs := &vschemapb.RoutingRules{Rules: make([]*vschemapb.RoutingRule, 0, len(rules))} for from, to := range rules { rrs.Rules = append(rrs.Rules, &vschemapb.RoutingRule{ diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 5bb74e6c7c7..781cb218709 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -17,7 +17,7 @@ import ( /* TODO - * Actions: Abort, Complete, Status + * Actions: Abort, Complete * Unit Tests (lots of!) * expand e2e for testing all possible transitions @@ -53,7 +53,7 @@ const ( WorkflowEventReverseWrites = "ReverseWrites" ) -type Workflow struct { +type reshardingWorkflowInfo struct { name string wsm *fsm.FSM typ string @@ -165,8 +165,8 @@ func (mtwf *MoveTablesWorkflow) getCallbacks() map[string]fsm.Callback { return callbacks } -func NewWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*Workflow, error) { - wf := &Workflow{ +func newWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*reshardingWorkflowInfo, error) { + wf := &reshardingWorkflowInfo{ name: name, typ: typ, } @@ -178,9 +178,10 @@ func NewWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*Workflow // region Move Tables Public API +// MoveTablesWorkflow stores various internal objects for a workflow type MoveTablesWorkflow struct { ctx context.Context - wf *Workflow + wf *reshardingWorkflowInfo wr *Wrangler action string params *MoveTablesParams @@ -189,11 +190,17 @@ type MoveTablesWorkflow struct { } func (mtwf *MoveTablesWorkflow) String() string { - s := fmt.Sprintf("%s Workflow %s from keyspace %s to keyspace %s. Current State: %s\n", + s := fmt.Sprintf("%s workflow %s from keyspace %s to keyspace %s.\nCurrent State: %s", mtwf.wf.typ, mtwf.wf.name, mtwf.params.SourceKeyspace, mtwf.params.TargetKeyspace, mtwf.wf.wsm.Current()) return s } +// AvailableActions returns all available actions for a workflow, for display purposes +func (mtwf *MoveTablesWorkflow) AvailableActions() string { + return strings.Join(mtwf.wf.wsm.AvailableTransitions(), ",") +} + +// MoveTablesParams stores args and options passed to a MoveTables command type MoveTablesParams struct { Workflow, SourceKeyspace, TargetKeyspace, Tables string Cells, TabletTypes, ExcludeTables string @@ -203,6 +210,8 @@ type MoveTablesParams struct { Direction TrafficSwitchDirection } +// NewMoveTablesWorkflow sets up a MoveTables workflow object based on options provided, deduces the state of the +// workflow from the persistent state stored in the vreplication table and the topo func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTablesParams) (*MoveTablesWorkflow, error) { log.Infof("NewMoveTablesWorkflow with params %+v", params) mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, params: params} @@ -211,7 +220,7 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable return nil, err } log.Infof("Workflow state is %+v", ws) - wf, err := NewWorkflow(params.Workflow, "MoveTables", mtwf.getCallbacks()) + wf, err := newWorkflow(params.Workflow, "MoveTables", mtwf.getCallbacks()) if err != nil { return nil, err } @@ -251,23 +260,24 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable return mtwf, nil } +// FireEvent causes the transition of the workflow by applying a valid action specified in MoveTables func (mtwf *MoveTablesWorkflow) FireEvent(ev string) error { - log.Infof("Firing Event %s", ev) ev = eventNameMap[strings.ToLower(ev)] - log.Infof("Firing Event, converted %s", ev) return mtwf.wf.wsm.Event(ev) } +// IsActionValid checks if a MoveTables subcommand is a valid event for the current state of the workflow func (mtwf *MoveTablesWorkflow) IsActionValid(ev string) bool { ev = eventNameMap[strings.ToLower(ev)] - log.Infof("IsActionValid for %s, %t", ev, mtwf.wf.wsm.Can(ev)) return mtwf.wf.wsm.Can(ev) } +// CurrentState returns the current state of the workflow's finite state machine func (mtwf *MoveTablesWorkflow) CurrentState() string { return mtwf.wf.wsm.Current() } +// Visualize returns a graphViz script for the (static) resharding workflow state machine func (mtwf *MoveTablesWorkflow) Visualize() string { return fsm.Visualize(mtwf.wf.wsm) } @@ -322,36 +332,40 @@ func (mtwf *MoveTablesWorkflow) switchWrites() error { mtwf.params.Workflow = reverseName(mtwf.params.Workflow) log.Infof("In MoveTablesWorkflow.switchWrites(reverse) for %+v", mtwf) } - journalId, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow, mtwf.params.Timeout, + journalID, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow, mtwf.params.Timeout, false, mtwf.params.Direction == DirectionBackward, mtwf.params.EnableReverseReplication, false) if err != nil { return err } - log.Infof("switchWrites succeeded with journal id %s", journalId) + log.Infof("switchWrites succeeded with journal id %s", journalID) return nil } // endregion // region Copy Progress + +// TableCopyProgress stores the row counts and disk sizes of the source and target tables type TableCopyProgress struct { - targetRowCount, targetTableSize int64 - sourceRowCount, sourceTableSize int64 + TargetRowCount, TargetTableSize int64 + SourceRowCount, SourceTableSize int64 } +// CopyProgress stores the TableCopyProgress for all tables still being copied type CopyProgress map[string]*TableCopyProgress +// GetCopyProgress returns the progress of all tables being copied in the workflow func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { ctx := context.Background() - getTablesQuery := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id == cs.vrepl_id and vr.id = %d" - getRowCountQuery := "select table_name, table_rows, data_length from information_schema.tables where table_schema = database() and table_name in (%s)" + getTablesQuery := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = %d" + getRowCountQuery := "select table_name, table_rows, data_length from information_schema.tables where table_schema = %s and table_name in (%s)" tables := make(map[string]bool) + const MaxRows = 1000 sourceMasters := make(map[*topodatapb.TabletAlias]bool) - for shard, target := range mtwf.ts.targets { - log.Infof("GCP: %s, %v", shard, target) + for _, target := range mtwf.ts.targets { for id, bls := range target.sources { query := fmt.Sprintf(getTablesQuery, id) - p3qr, err := mtwf.wr.tmc.VReplicationExec(ctx, target.master.Tablet, query) + p3qr, err := mtwf.wr.tmc.ExecuteFetchAsDba(ctx, target.master.Tablet, true, []byte(query), MaxRows, false, false) if err != nil { return nil, err } @@ -360,7 +374,7 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { } qr := sqltypes.Proto3ToResult(p3qr) for i := 0; i < len(p3qr.Rows); i++ { - tables[qr.Rows[0][0].String()] = true + tables[qr.Rows[0][0].ToString()] = true } sourcesi, err := mtwf.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) if err != nil { @@ -369,6 +383,9 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { sourceMasters[sourcesi.MasterAlias] = true } } + if len(tables) == 0 { + return nil, nil + } tableList := "" targetRowCounts := make(map[string]int64) sourceRowCounts := make(map[string]int64) @@ -385,18 +402,15 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { targetTableSizes[table] = 0 sourceTableSizes[table] = 0 } - log.Infof("table list is %s", tableList) - query := fmt.Sprintf(getRowCountQuery, tableList) - log.Infof("query is %s", query) - var getTableMetrics = func(tablet *topodatapb.Tablet, rowCounts *map[string]int64, tableSizes *map[string]int64) error { + var getTableMetrics = func(tablet *topodatapb.Tablet, query string, rowCounts *map[string]int64, tableSizes *map[string]int64) error { p3qr, err := mtwf.wr.tmc.ExecuteFetchAsDba(ctx, tablet, true, []byte(query), len(tables), false, false) if err != nil { return err } qr := sqltypes.Proto3ToResult(p3qr) for i := 0; i < len(qr.Rows); i++ { - table := qr.Rows[0][0].String() + table := qr.Rows[0][0].ToString() rowCount, err := evalengine.ToInt64(qr.Rows[0][1]) if err != nil { return err @@ -405,24 +419,46 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { if err != nil { return err } - targetRowCounts[table] += rowCount - targetTableSizes[table] += tableSize + (*rowCounts)[table] += rowCount + (*tableSizes)[table] += tableSize } return nil } + sourceDbName := "" + for _, tsSource := range mtwf.ts.sources { + sourceDbName = tsSource.master.DbName() + break + } + if sourceDbName == "" { + return nil, fmt.Errorf("no sources found for workflow %s.%s", mtwf.ws.TargetKeyspace, mtwf.ws.Workflow) + } + targetDbName := "" + for _, tsTarget := range mtwf.ts.targets { + targetDbName = tsTarget.master.DbName() + break + } + if sourceDbName == "" || targetDbName == "" { + return nil, fmt.Errorf("workflow %s.%s is incorrectly configured", mtwf.ws.TargetKeyspace, mtwf.ws.Workflow) + } + + query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tableList) + log.Infof("query is %s", query) for _, target := range mtwf.ts.targets { tablet := target.master.Tablet - if err := getTableMetrics(tablet, &targetRowCounts, &targetTableSizes); err != nil { + if err := getTableMetrics(tablet, query, &targetRowCounts, &targetTableSizes); err != nil { return nil, err } } + + query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tableList) + log.Infof("query is %s", query) for source := range sourceMasters { ti, err := mtwf.wr.ts.GetTablet(ctx, source) tablet := ti.Tablet if err != nil { return nil, err } - if err := getTableMetrics(tablet, &sourceRowCounts, &sourceTableSizes); err != nil { + if err := getTableMetrics(tablet, query, &sourceRowCounts, &sourceTableSizes); err != nil { return nil, err } } @@ -430,10 +466,10 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { copyProgress := CopyProgress{} for table, rowCount := range targetRowCounts { copyProgress[table] = &TableCopyProgress{ - targetRowCount: rowCount, - targetTableSize: targetTableSizes[table], - sourceRowCount: sourceRowCounts[table], - sourceTableSize: sourceTableSizes[table], + TargetRowCount: rowCount, + TargetTableSize: targetTableSizes[table], + SourceRowCount: sourceRowCounts[table], + SourceTableSize: sourceTableSizes[table], } } return ©Progress, nil diff --git a/test/local_example.sh b/test/local_example.sh index daf0df71990..9fe153240b4 100755 --- a/test/local_example.sh +++ b/test/local_example.sh @@ -44,7 +44,7 @@ for shard in "customer/0"; do done; ./202_move_tables.sh -exit + sleep 3 # required for now ./203_switch_reads.sh @@ -75,6 +75,7 @@ done; sleep 3 # TODO: Required for now! + ./304_switch_reads.sh ./305_switch_writes.sh From 7bdb87098059f737fe1371c10ccc13e3bd586e6c Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Wed, 16 Dec 2020 13:37:05 +0100 Subject: [PATCH 07/26] Update wrangler table migration unit tests Signed-off-by: Rohit Nayak --- .../tabletserver/vstreamer/vstreamer_test.go | 2 - go/vt/wrangler/fake_dbclient_test.go | 4 +- go/vt/wrangler/stream_migrater_test.go | 83 ++++++------ go/vt/wrangler/traffic_switcher.go | 23 ++-- go/vt/wrangler/traffic_switcher_test.go | 127 +++++++++++------- go/vt/wrangler/workflow.go | 9 +- 6 files changed, 142 insertions(+), 106 deletions(-) diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go index 7fe33ca8d8d..c6d84a8e446 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go @@ -2013,9 +2013,7 @@ func startStream(ctx context.Context, t *testing.T, filter *binlogdatapb.Filter, go func() { defer close(ch) defer wg.Done() - log.Infof(">>>>>>>>>>> before vstream") vstream(ctx, t, position, tablePKs, filter, ch) - log.Infof(">>>>>>>>>> after vstream") }() return &wg, ch } diff --git a/go/vt/wrangler/fake_dbclient_test.go b/go/vt/wrangler/fake_dbclient_test.go index 11fee5d68fa..007722505d9 100644 --- a/go/vt/wrangler/fake_dbclient_test.go +++ b/go/vt/wrangler/fake_dbclient_test.go @@ -150,12 +150,12 @@ func (dc *fakeDBClient) verifyQueries(t *testing.T) { t.Helper() for query, dbrs := range dc.queries { if !dbrs.exhausted() { - assert.FailNow(t, "expected query: %v did not get executed during the test", query) + assert.FailNowf(t, "expected query did not get executed during the test", query) } } for query, dbrs := range dc.queriesRE { if !dbrs.exhausted() { - assert.FailNow(t, "expected regex query: %v did not get executed during the test", query) + assert.FailNowf(t, "expected regex query did not get executed during the test", query) } } } diff --git a/go/vt/wrangler/stream_migrater_test.go b/go/vt/wrangler/stream_migrater_test.go index 80b9857466b..356c1dc6722 100644 --- a/go/vt/wrangler/stream_migrater_test.go +++ b/go/vt/wrangler/stream_migrater_test.go @@ -34,6 +34,11 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" ) +var ( + rdOnly = []topodatapb.TabletType{topodatapb.TabletType_RDONLY} + replica = []topodatapb.TabletType{topodatapb.TabletType_REPLICA} +) + func TestStreamMigrateMainflow(t *testing.T) { ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) @@ -42,13 +47,13 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectCheckJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -166,7 +171,7 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectCreateReverseVReplication() tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -195,12 +200,12 @@ func TestStreamMigrateTwoStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -340,7 +345,7 @@ func TestStreamMigrateTwoStreams(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -364,12 +369,12 @@ func TestStreamMigrateOneToMany(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -474,7 +479,7 @@ func TestStreamMigrateOneToMany(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -497,12 +502,12 @@ func TestStreamMigrateManyToOne(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -610,7 +615,7 @@ func TestStreamMigrateManyToOne(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -632,12 +637,12 @@ func TestStreamMigrateSyncSuccess(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -800,7 +805,7 @@ func TestStreamMigrateSyncSuccess(t *testing.T) { tme.expectStartReverseVReplication() tme.expectFrozenTargetVReplication() - if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false); err != nil { + if _, _, err := tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false); err != nil { t.Fatal(err) } @@ -824,12 +829,12 @@ func TestStreamMigrateSyncFail(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -931,7 +936,7 @@ func TestStreamMigrateSyncFail(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "does not match" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -946,12 +951,12 @@ func TestStreamMigrateCancel(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1025,7 +1030,7 @@ func TestStreamMigrateCancel(t *testing.T) { } cancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "intentionally failed" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, want %s", err, want) @@ -1051,12 +1056,12 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1095,7 +1100,7 @@ func TestStreamMigrateStoppedStreams(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "cannot migrate until all streams are running: 0: 10" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1110,12 +1115,12 @@ func TestStreamMigrateCancelWithStoppedStreams(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1163,7 +1168,7 @@ func TestStreamMigrateCancelWithStoppedStreams(t *testing.T) { tme.expectCancelMigration() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, true, false, false, false) if err != nil { t.Fatal(err) } @@ -1177,12 +1182,12 @@ func TestStreamMigrateStillCopying(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1225,7 +1230,7 @@ func TestStreamMigrateStillCopying(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "cannot migrate while vreplication streams in source shards are still copying: 0" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1240,12 +1245,12 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1287,7 +1292,7 @@ func TestStreamMigrateEmptyWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "VReplication streams must have named workflows for migration: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1302,12 +1307,12 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1349,7 +1354,7 @@ func TestStreamMigrateDupWorkflow(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "VReplication stream has the same workflow name as the resharding workflow: shard: ks:0, stream: 1" if err == nil || err.Error() != want { t.Errorf("SwitchWrites err: %v, want %v", err, want) @@ -1365,12 +1370,12 @@ func TestStreamMigrateStreamsMismatch(t *testing.T) { tme.expectNoPreviousJournals() // Migrate reads - _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_RDONLY, nil, DirectionForward, false) + _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", rdOnly, nil, DirectionForward, false) if err != nil { t.Fatal(err) } tme.expectNoPreviousJournals() - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", topodatapb.TabletType_REPLICA, nil, DirectionForward, false) + _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", replica, nil, DirectionForward, false) if err != nil { t.Fatal(err) } @@ -1422,7 +1427,7 @@ func TestStreamMigrateStreamsMismatch(t *testing.T) { } stopStreams() - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, true, false) + _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) want := "streams are mismatched across source shards" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchWrites err: %v, must contain %v", err, want) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 27b79a93476..799dcd3f2dd 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -128,9 +128,8 @@ type tsSource struct { } const ( - workflowTypeReshard = "Reshard" - workflowTypeMoveTables = "MoveTables" - workflowTypeMaterialize = "Materialize" + workflowTypeReshard = "Reshard" + workflowTypeMoveTables = "MoveTables" ) type workflowState struct { @@ -225,7 +224,6 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow string, servedTypes []topodatapb.TabletType, cells []string, direction TrafficSwitchDirection, dryRun bool) (*[]string, error) { - log.Infof("SwitchReads: targetKeyspace %s, direction %d", targetKeyspace, direction) ts, ws, err := wr.getWorkflowState(ctx, targetKeyspace, workflow) if err != nil { wr.Logger().Errorf("getWorkflowState failed: %v", err) @@ -266,9 +264,10 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st sw = &switcher{ts: ts, wr: wr} } - if ts.frozen { - //return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") - } + // FIXME: revisit marking streams frozen + //if ts.frozen { + //return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") + //} if err := ts.validate(ctx, false /* isWrite */); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) return nil, err @@ -705,11 +704,11 @@ func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { } } if isWrite { - //return ts.validateTableForWrite(ctx) + return ts.validateTableForWrite(ctx) } } else { // binlogdatapb.MigrationType_SHARDS if isWrite { - //return ts.validateShardForWrite(ctx) + return ts.validateShardForWrite(ctx) } } return nil @@ -1189,7 +1188,11 @@ func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error { if err := ts.wr.saveRoutingRules(ctx, rules); err != nil { return err } - return ts.wr.ts.RebuildSrvVSchema(ctx, nil) + var cells []string + if len(ts.optCells) > 0 { + cells = strings.Split(ts.optCells, ",") + } + return ts.wr.ts.RebuildSrvVSchema(ctx, cells) } func (ts *trafficSwitcher) changeShardRouting(ctx context.Context) error { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index 7b76b1fe612..78510f0a9ab 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -152,7 +152,6 @@ func TestTableMigrateMainflow(t *testing.T) { "ks1.t2@replica": {"ks2.t2"}, }) verifyQueries(t, tme.allDBClients) - tme.expectNoPreviousJournals() //------------------------------------------------------------------------------------------------------------------- // Single cell backward REPLICA migration. @@ -160,7 +159,7 @@ func TestTableMigrateMainflow(t *testing.T) { if err != nil { t.Fatal(err) } - checkRouting(t, tme.wr, map[string][]string{ + checkCellRouting(t, tme.wr, "cell1", map[string][]string{ "t1": {"ks1.t1"}, "ks2.t1": {"ks1.t1"}, "t2": {"ks1.t2"}, @@ -172,6 +171,24 @@ func TestTableMigrateMainflow(t *testing.T) { "ks2.t2@rdonly": {"ks2.t2"}, "ks1.t2@rdonly": {"ks2.t2"}, }) + checkCellRouting(t, tme.wr, "cell2", map[string][]string{ + "t1": {"ks1.t1"}, + "ks2.t1": {"ks1.t1"}, + "t2": {"ks1.t2"}, + "ks2.t2": {"ks1.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, + "t1@replica": {"ks1.t1"}, + "ks2.t1@replica": {"ks1.t1"}, + "ks1.t1@replica": {"ks1.t1"}, + "t2@replica": {"ks1.t2"}, + "ks2.t2@replica": {"ks1.t2"}, + "ks1.t2@replica": {"ks1.t2"}, + }) verifyQueries(t, tme.allDBClients) tme.expectNoPreviousJournals() @@ -219,6 +236,12 @@ func TestTableMigrateMainflow(t *testing.T) { "t2@replica": {"ks2.t2"}, "ks2.t2@replica": {"ks2.t2"}, "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks1.t1"}, + "ks2.t1@rdonly": {"ks1.t1"}, + "ks1.t1@rdonly": {"ks1.t1"}, + "t2@rdonly": {"ks1.t2"}, + "ks2.t2@rdonly": {"ks1.t2"}, + "ks1.t2@rdonly": {"ks1.t2"}, }) verifyQueries(t, tme.allDBClients) @@ -230,10 +253,22 @@ func TestTableMigrateMainflow(t *testing.T) { t.Fatal(err) } checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks1.t1"}, - "ks2.t1": {"ks1.t1"}, - "t2": {"ks1.t2"}, - "ks2.t2": {"ks1.t2"}, + "t1": {"ks1.t1"}, + "ks2.t1": {"ks1.t1"}, + "t2": {"ks1.t2"}, + "ks2.t2": {"ks1.t2"}, + "t1@replica": {"ks1.t1"}, + "ks2.t1@replica": {"ks1.t1"}, + "ks1.t1@replica": {"ks1.t1"}, + "t2@replica": {"ks1.t2"}, + "ks2.t2@replica": {"ks1.t2"}, + "ks1.t2@replica": {"ks1.t2"}, + "t1@rdonly": {"ks1.t1"}, + "ks2.t1@rdonly": {"ks1.t1"}, + "ks1.t1@rdonly": {"ks1.t1"}, + "t2@rdonly": {"ks1.t2"}, + "ks2.t2@rdonly": {"ks1.t2"}, + "ks1.t2@rdonly": {"ks1.t2"}, }) verifyQueries(t, tme.allDBClients) @@ -246,15 +281,6 @@ func TestTableMigrateMainflow(t *testing.T) { } verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- - // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) - want = "missing tablet type specific routing, read-only traffic must be switched before switching writes" - if err == nil || !strings.Contains(err.Error(), want) { - t.Errorf("SwitchWrites err: %v, want %v", err, want) - } - verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- // Test SwitchWrites cancelation on failure. @@ -421,14 +447,6 @@ func TestTableMigrateMainflow(t *testing.T) { tme.dbTargetClients[1].addQuery("update _vt.vreplication set message = 'FROZEN' where id in (1, 2)", &sqltypes.Result{}, nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 1", stoppedResult(1), nil) tme.dbTargetClients[1].addQuery("select * from _vt.vreplication where id = 2", stoppedResult(2), nil) - /* - tme.dbTargetClients[0].addQuery("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid12, nil) - tme.dbTargetClients[1].addQuery("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid12, nil) - tme.dbTargetClients[0].addQuery("delete from _vt.vreplication where id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[0].addQuery("delete from _vt.copy_state where vrepl_id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[1].addQuery("delete from _vt.vreplication where id in (1, 2)", &sqltypes.Result{}, nil) - tme.dbTargetClients[1].addQuery("delete from _vt.copy_state where vrepl_id in (1, 2)", &sqltypes.Result{}, nil) - */ } deleteTargetVReplication() @@ -441,10 +459,22 @@ func TestTableMigrateMainflow(t *testing.T) { } checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks2.t1"}, - "ks1.t1": {"ks2.t1"}, - "t2": {"ks2.t2"}, - "ks1.t2": {"ks2.t2"}, + "t1": {"ks2.t1"}, + "ks1.t1": {"ks2.t1"}, + "t2": {"ks2.t2"}, + "ks1.t2": {"ks2.t2"}, + "t1@replica": {"ks2.t1"}, + "ks2.t1@replica": {"ks2.t1"}, + "ks1.t1@replica": {"ks2.t1"}, + "t2@replica": {"ks2.t2"}, + "ks2.t2@replica": {"ks2.t2"}, + "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, }) checkBlacklist(t, tme.ts, "ks1:-40", []string{"t1", "t2"}) checkBlacklist(t, tme.ts, "ks1:40-", []string{"t1", "t2"}) @@ -457,6 +487,7 @@ func TestTableMigrateMainflow(t *testing.T) { // TestShardMigrate tests table mode migrations. // This has to be kept in sync with TestTableMigrate. func TestShardMigrateMainflow(t *testing.T) { + t.Skip() // FIXME: skipping since resharding not fully implemented ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) defer tme.stopTablets(t) @@ -1174,10 +1205,22 @@ func TestTableMigrateJournalExists(t *testing.T) { // Routes will be redone. checkRouting(t, tme.wr, map[string][]string{ - "t1": {"ks2.t1"}, - "ks1.t1": {"ks2.t1"}, - "t2": {"ks2.t2"}, - "ks1.t2": {"ks2.t2"}, + "t1": {"ks2.t1"}, + "ks1.t1": {"ks2.t1"}, + "t2": {"ks2.t2"}, + "ks1.t2": {"ks2.t2"}, + "t1@replica": {"ks2.t1"}, + "ks2.t1@replica": {"ks2.t1"}, + "ks1.t1@replica": {"ks2.t1"}, + "t2@replica": {"ks2.t2"}, + "ks2.t2@replica": {"ks2.t2"}, + "ks1.t2@replica": {"ks2.t2"}, + "t1@rdonly": {"ks2.t1"}, + "ks2.t1@rdonly": {"ks2.t1"}, + "ks1.t1@rdonly": {"ks2.t1"}, + "t2@rdonly": {"ks2.t2"}, + "ks2.t2@rdonly": {"ks2.t2"}, + "ks1.t2@rdonly": {"ks2.t2"}, }) // We're showing that there are no blacklisted tables. But in real life, // tables on ks1 should be blacklisted from the previous failed attempt. @@ -1188,6 +1231,7 @@ func TestTableMigrateJournalExists(t *testing.T) { verifyQueries(t, tme.allDBClients) } + func TestShardMigrateJournalExists(t *testing.T) { ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) @@ -1248,6 +1292,7 @@ func TestShardMigrateJournalExists(t *testing.T) { verifyQueries(t, tme.allDBClients) } + func TestTableMigrateCancel(t *testing.T) { ctx := context.Background() tme := newTestTableMigrater(ctx, t) @@ -1478,7 +1523,7 @@ func TestMigrateFrozen(t *testing.T) { Shard: "-40", Filter: &binlogdatapb.Filter{ Rules: []*binlogdatapb.Rule{{ - Match: "/.*", + Match: "t1", Filter: "", }}, }, @@ -1490,20 +1535,6 @@ func TestMigrateFrozen(t *testing.T) { ), nil) tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) - tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) - _, err = tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}, nil, DirectionForward, false) - want := "cannot switch reads while SwitchWrites is in progress" - if err == nil || err.Error() != want { - t.Errorf("SwitchReads(frozen) err: %v, want %v", err, want) - } - - tme.dbTargetClients[0].addQuery(vreplQueryks2, sqltypes.MakeTestResult(sqltypes.MakeTestFields( - "id|source|message|cell|tablet_type", - "int64|varchar|varchar|varchar|varchar"), - fmt.Sprintf("1|%v|FROZEN||", bls1), - ), nil) - tme.dbTargetClients[1].addQuery(vreplQueryks2, &sqltypes.Result{}, nil) - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 0*time.Second, false, false, true, false) if err != nil { t.Fatal(err) @@ -1521,7 +1552,7 @@ func TestMigrateNoStreamsFound(t *testing.T) { tme.expectNoPreviousJournals() _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) - want := "no streams found in keyspace ks2 for: test" + want := "workflow test not found in keyspace ks2" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) } @@ -1656,7 +1687,7 @@ func TestMigrateNoTableWildcards(t *testing.T) { ), nil) tme.expectNoPreviousJournals() _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) - want := "cannot migrate streams with wild card table names" + want := "no rule defined for table /.*" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 781cb218709..bd62fdbf753 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -17,6 +17,7 @@ import ( /* TODO + * use SwitchTraffic and ReverseTraffic * Actions: Abort, Complete * Unit Tests (lots of!) * expand e2e for testing all possible transitions @@ -54,10 +55,9 @@ const ( ) type reshardingWorkflowInfo struct { - name string - wsm *fsm.FSM - typ string - hasErrors bool + name string + wsm *fsm.FSM + typ string } var eventNameMap map[string]string @@ -183,7 +183,6 @@ type MoveTablesWorkflow struct { ctx context.Context wf *reshardingWorkflowInfo wr *Wrangler - action string params *MoveTablesParams ts *trafficSwitcher ws *workflowState From 0f52f237cc3063e5dcab128caf7142752e3e6996 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 18 Dec 2020 00:21:02 +0100 Subject: [PATCH 08/26] Removed state machine since it does not fit the use case, use SwitchTraffic/ReverseTraffic instead of SwitchWrites/SwitchReads, added Complete (similar to DropSources) and Abort (drop targets) commands Signed-off-by: Rohit Nayak --- go/vt/topo/srv_keyspace.go | 1 - go/vt/vtctl/vtctl.go | 41 ++-- go/vt/wrangler/stream_migrater_test.go | 2 +- go/vt/wrangler/switcher.go | 10 +- go/vt/wrangler/switcher_dry_run.go | 39 ++++ go/vt/wrangler/switcher_interface.go | 2 + go/vt/wrangler/traffic_switcher.go | 299 ++++++++++++++++++------ go/vt/wrangler/traffic_switcher_test.go | 12 +- go/vt/wrangler/workflow.go | 290 ++++++++--------------- test/local_example.sh | 2 +- 10 files changed, 406 insertions(+), 292 deletions(-) diff --git a/go/vt/topo/srv_keyspace.go b/go/vt/topo/srv_keyspace.go index 3b0f375240c..97bf0619d61 100644 --- a/go/vt/topo/srv_keyspace.go +++ b/go/vt/topo/srv_keyspace.go @@ -488,7 +488,6 @@ func (ts *Server) MigrateServedType(ctx context.Context, keyspace string, shards if err = CheckKeyspaceLocked(ctx, keyspace); err != nil { return err } - // The caller intents to update all cells in this case if len(cells) == 0 { cells, err = ts.GetCellInfoNames(ctx) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 1cb9346f51f..7759747b7f2 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1957,6 +1957,8 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") + renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") + keepData := subFlags.Bool("keep_data", false, "Do not drop tables or shards (if true, only vreplication artifacts are cleaned up)") _ = subFlags.Bool("v2", true, "") _, _, _ = dryRun, timeout, reverseReplication @@ -1985,7 +1987,7 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } //TODO: check if invalid parameters were passed in that do not apply to this action - originalAction := action + //originalAction := action action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { case "start": @@ -2000,12 +2002,16 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl mtp.AllTables = *allTables mtp.ExcludeTables = *excludes mtp.TabletTypes = *tabletTypes - case "switchreads", "switchrdonlyreads", "switchreplicareads": + case "switchtraffic", "reversetraffic": mtp.Cells = *cells mtp.TabletTypes = *tabletTypes - case "switchwrites", "reversewrites": mtp.Timeout = *timeout mtp.EnableReverseReplication = *reverseReplication + case "abort": + mtp.KeepData = *keepData + case "complete": + mtp.RenameTables = *renameTables + mtp.KeepData = *keepData } wf, err := wr.NewMoveTablesWorkflow(ctx, mtp) @@ -2013,6 +2019,9 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) return err } + if !wf.Exists() && action != "start" { + return fmt.Errorf("workflow %s does not exist", ksWorkflow) + } printDetails := func() error { s := "" @@ -2041,10 +2050,8 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } switch action { case "show": - wr.Logger().Printf("\n%s.\nAvailable actions are: %s\n", wf.String(), wf.AvailableActions()) return printDetails() case "progress": - wr.Logger().Printf("%s.\nAvailable actions are: %s\n", wf.String(), wf.AvailableActions()) copyProgress, err := wf.GetCopyProgress() if err != nil { return err @@ -2071,17 +2078,16 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl wr.Logger().Printf("\n%s\n", s) } return printDetails() - case "visualize": - wr.Logger().Printf("%s", wf.Visualize()) - return nil - } - if !wf.IsActionValid(action) { - return fmt.Errorf("invalid Action: %s. Workflow %s.%s is currently in state: %s.\nAvailable actions are: %s", - originalAction, target, workflow, wf.CurrentState(), wf.AvailableActions()) - } - if err := wf.FireEvent(action); err != nil { - log.Warningf("NewMoveTablesWorkflow %s error: %+v", action, wf) - return err + case "switchtraffic": + if err := wf.SwitchTraffic(); err != nil { + log.Warningf("SwitchTraffic %s error: %+v", action, wf) + return err + } + case "reversetraffic": + if err := wf.ReverseTraffic(); err != nil { + log.Warningf("ReverseTraffic %s error: %+v", action, wf) + return err + } } wr.Logger().Printf("MoveTables %s was successful\n\n%s\n\n", action, wf) return nil @@ -2255,6 +2261,7 @@ func commandMigrateServedFrom(ctx context.Context, wr *wrangler.Wrangler, subFla func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { dryRun := subFlags.Bool("dry_run", false, "Does a dry run of commandDropSources and only reports the actions to be taken") renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") + keepData := subFlags.Bool("keep_data", false, "Do not drop tables or shards (if true, only vreplication artifacts are cleaned up)") if err := subFlags.Parse(args); err != nil { return err } @@ -2272,7 +2279,7 @@ func commandDropSources(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } _, _, _ = dryRun, keyspace, workflow - dryRunResults, err := wr.DropSources(ctx, keyspace, workflow, removalType, *dryRun) + dryRunResults, err := wr.DropSources(ctx, keyspace, workflow, removalType, *keepData, false, *dryRun) if err != nil { return err } diff --git a/go/vt/wrangler/stream_migrater_test.go b/go/vt/wrangler/stream_migrater_test.go index 356c1dc6722..b43e39e72fb 100644 --- a/go/vt/wrangler/stream_migrater_test.go +++ b/go/vt/wrangler/stream_migrater_test.go @@ -187,7 +187,7 @@ func TestStreamMigrateMainflow(t *testing.T) { tme.expectDeleteReverseVReplication() tme.expectDeleteTargetVReplication() - if _, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false); err != nil { + if _, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false); err != nil { t.Fatal(err) } verifyQueries(t, tme.allDBClients) diff --git a/go/vt/wrangler/switcher.go b/go/vt/wrangler/switcher.go index a19c084c433..a44909146d0 100644 --- a/go/vt/wrangler/switcher.go +++ b/go/vt/wrangler/switcher.go @@ -39,8 +39,6 @@ func (r *switcher) validateWorkflowHasCompleted(ctx context.Context) error { return r.ts.validateWorkflowHasCompleted(ctx) } -//TODO: do we need to disable ForeignKey before dropping tables? -//TODO: delete multiple tables in single statement? func (r *switcher) removeSourceTables(ctx context.Context, removalType TableRemovalType) error { return r.ts.removeSourceTables(ctx, removalType) } @@ -118,6 +116,14 @@ func (r *switcher) dropSourceReverseVReplicationStreams(ctx context.Context) err return r.ts.dropSourceReverseVReplicationStreams(ctx) } +func (r *switcher) removeTargetTables(ctx context.Context) error { + return r.ts.removeTargetTables(ctx) +} + +func (r *switcher) dropTargetShards(ctx context.Context) error { + return r.ts.dropTargetShards(ctx) +} + func (r *switcher) logs() *[]string { return nil } diff --git a/go/vt/wrangler/switcher_dry_run.go b/go/vt/wrangler/switcher_dry_run.go index b29ddc4e280..a2d37c65447 100644 --- a/go/vt/wrangler/switcher_dry_run.go +++ b/go/vt/wrangler/switcher_dry_run.go @@ -318,3 +318,42 @@ func (dr *switcherDryRun) dropSourceBlacklistedTables(ctx context.Context) error func (dr *switcherDryRun) logs() *[]string { return &dr.drLog.logs } + +func (dr *switcherDryRun) removeTargetTables(ctx context.Context) error { + logs := make([]string, 0) + for _, target := range dr.ts.targets { + for _, tableName := range dr.ts.tables { + logs = append(logs, fmt.Sprintf("\tKeyspace %s Shard %s DbName %s Tablet %d Table %s", + target.master.Keyspace, target.master.Shard, target.master.DbName(), target.master.Alias.Uid, tableName)) + } + } + if len(logs) > 0 { + dr.drLog.Log("Dropping following tables:") + dr.drLog.LogSlice(logs) + } + return nil +} + +func (dr *switcherDryRun) dropTargetShards(ctx context.Context) error { + logs := make([]string, 0) + tabletsList := make(map[string][]string) + for _, si := range dr.ts.targetShards() { + tabletAliases, err := dr.ts.wr.TopoServer().FindAllTabletAliasesInShard(ctx, si.Keyspace(), si.ShardName()) + if err != nil { + return err + } + tabletsList[si.ShardName()] = make([]string, 0) + for _, t := range tabletAliases { + tabletsList[si.ShardName()] = append(tabletsList[si.ShardName()], fmt.Sprintf("\t\t%d", t.Uid)) + } + sort.Strings(tabletsList[si.ShardName()]) + logs = append(logs, fmt.Sprintf("\tCell %s Keyspace %s Shard\n%s", + si.Shard.MasterAlias.Cell, si.Keyspace(), si.ShardName()), strings.Join(tabletsList[si.ShardName()], "\n")) + } + if len(logs) > 0 { + dr.drLog.Log("Deleting following shards (and all related tablets):") + dr.drLog.LogSlice(logs) + } + + return nil +} diff --git a/go/vt/wrangler/switcher_interface.go b/go/vt/wrangler/switcher_interface.go index 2c09fad9e1c..6b532ce8425 100644 --- a/go/vt/wrangler/switcher_interface.go +++ b/go/vt/wrangler/switcher_interface.go @@ -46,5 +46,7 @@ type iswitcher interface { freezeTargetVReplication(ctx context.Context) error dropSourceReverseVReplicationStreams(ctx context.Context) error dropTargetVReplicationStreams(ctx context.Context) error + removeTargetTables(ctx context.Context) error + dropTargetShards(ctx context.Context) error logs() *[]string } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 799dcd3f2dd..51165e1ab65 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -133,13 +133,99 @@ const ( ) type workflowState struct { - Workflow string - SourceKeyspace string - TargetKeyspace string - WorkflowType string - ReplicaReadsSwitched bool - RdonlyReadsSwitched bool - WritesSwitched bool + Workflow string + SourceKeyspace string + TargetKeyspace string + WorkflowType string + + ReplicaCellsSwitched []string + ReplicaCellsNotSwitched []string + + RdonlyCellsSwitched []string + RdonlyCellsNotSwitched []string + + ReplicaReadsSwitched, RdonlyReadsSwitched bool + WritesSwitched bool +} + +func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKeyspace string, si *topo.ShardInfo, tabletType string) ( + cellsSwitched, cellsNotSwitched []string, err error) { + + cells, err := wr.ts.GetCellInfoNames(ctx) + if err != nil { + return nil, nil, err + } + + for _, cell := range cells { + wr.Logger().Infof("cell %s", cell) + srvKeyspace, err := wr.ts.GetSrvKeyspace(ctx, cell, targetKeyspace) + if err != nil { + return nil, nil, err + } + // Checking one shard is enough. + var shardServedTypes []string + for _, partition := range srvKeyspace.GetPartitions() { + if partition.GetServedType().String() != tabletType { + continue + } + for _, shardReference := range partition.GetShardReferences() { + if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { + shardServedTypes = append(shardServedTypes, partition.GetServedType().String()) + } + } + } + if len(shardServedTypes) > 0 { + cellsNotSwitched = append(cellsNotSwitched, cell) + } else { + cellsSwitched = append(cellsSwitched, cell) + } + } + return cellsSwitched, cellsNotSwitched, nil +} + +func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKeyspace, table, tabletType string) ( + cellsSwitched, cellsNotSwitched []string, err error) { + + cells, err := wr.ts.GetCellInfoNames(ctx) + if err != nil { + return nil, nil, err + } + getKeyspace := func(ruleTarget string) (string, error) { + arr := strings.Split(ruleTarget, ".") + if len(arr) != 2 { + return "", fmt.Errorf("rule target is not correctly formatted: %s", ruleTarget) + } + return arr[0], nil + } + for _, cell := range cells { + srvVSchema, err := wr.ts.GetSrvVSchema(ctx, cell) + if err != nil { + return nil, nil, err + } + rules := srvVSchema.RoutingRules.Rules + for _, rule := range rules { + ruleName := fmt.Sprintf("%s@%s", table, tabletType) + if rule.FromTable == ruleName { + switched := false + for _, to := range rule.ToTables { + ks, err := getKeyspace(to) + if err != nil { + return nil, nil, err + } + if ks == targetKeyspace { + switched = true + } + } + if switched { + cellsSwitched = append(cellsSwitched, cell) + } else { + cellsNotSwitched = append(cellsNotSwitched, cell) + } + break + } + } + } + return cellsSwitched, cellsNotSwitched, nil } func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, *workflowState, error) { @@ -158,19 +244,10 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl if ts.frozen { ws.WritesSwitched = true } + var cellsSwitched, cellsNotSwitched []string if ts.migrationType == binlogdatapb.MigrationType_TABLES { ws.WorkflowType = workflowTypeMoveTables - rules, err := wr.getRoutingRules(ctx) - if err != nil { - return nil, nil, err - } - getKeyspace := func(ruleTarget string) (string, error) { - arr := strings.Split(ruleTarget, ".") - if len(arr) != 2 { - return "", fmt.Errorf("rule target is not correctly formatted: %s", ruleTarget) - } - return arr[0], nil - } + // we assume a consistent state, so only choose routing rule for one table for replica/rdonly if len(ts.tables) == 0 { return nil, nil, fmt.Errorf("no tables in workflow %s.%s", targetKeyspace, workflow) @@ -178,43 +255,31 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl } table := ts.tables[0] - ruleTargets, ok := rules[table] - if len(ruleTargets) == 0 || !ok { - return nil, nil, fmt.Errorf("no rule defined for table %s", table) - } - tableKs, err := getKeyspace(ruleTargets[0]) + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, targetKeyspace, table, "rdonly") if err != nil { return nil, nil, err } - var replicaKs, rdonlyKs string - ruleTargets, ok = rules[table+"@replica"] - if !ok { - replicaKs = tableKs - } else { - replicaKs, err = getKeyspace(ruleTargets[0]) - if err != nil { - return nil, nil, err - } - } - ruleTargets, ok = rules[table+"@rdonly"] - if !ok { - rdonlyKs = tableKs - } else { - rdonlyKs, err = getKeyspace(ruleTargets[0]) - if err != nil { - return nil, nil, err - } + ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, targetKeyspace, table, "replica") + if err != nil { + return nil, nil, err } - ws.RdonlyReadsSwitched = rdonlyKs == ts.targetKeyspace - ws.ReplicaReadsSwitched = replicaKs == ts.targetKeyspace + ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched } else { ws.WorkflowType = workflowTypeReshard - tks, err := wr.ts.GetKeyspace(ctx, targetKeyspace) + + // we assume a consistent state, so only choose one shard + oneSourceShard := ts.sourceShards()[0] + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, oneSourceShard, "rdonly") if err != nil { return nil, nil, err } - ws.ReplicaReadsSwitched = tks.GetServedFrom(topodatapb.TabletType_REPLICA) != nil - ws.RdonlyReadsSwitched = tks.GetServedFrom(topodatapb.TabletType_RDONLY) != nil + ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, oneSourceShard, "replica") + if err != nil { + return nil, nil, err + } + ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched } return ts, ws, nil @@ -234,15 +299,15 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st wr.Logger().Errorf(errorMsg) return nil, fmt.Errorf(errorMsg) } - + wr.Logger().Infof("SwitchReads: %s.%s tt %+v, cells %+v, state: %+v", targetKeyspace, workflow, servedTypes, cells, ws) for _, servedType := range servedTypes { if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) } - if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && !ws.ReplicaReadsSwitched { + if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && len(ws.ReplicaCellsNotSwitched) > 0 { return nil, fmt.Errorf("requesting reversal of SwitchReads for REPLICAs but REPLICA reads have not been switched") } - if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && !ws.RdonlyReadsSwitched { + if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && len(ws.RdonlyCellsNotSwitched) > 0 { return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") } } @@ -288,10 +353,13 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } return sw.logs(), nil } + wr.Logger().Infof("switchShardReads: %+v, %+v, %+v", cells, servedTypes, direction) if err := ts.switchShardReads(ctx, cells, servedTypes, direction); err != nil { ts.wr.Logger().Errorf("switchShardReads failed: %v", err) return nil, err } + x1, x2, err := wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, ts.sourceShards()[0], servedTypes[0].String()) + wr.Logger().Infof("State: %+v,%+v,%v", x1, x2, err) return sw.logs(), nil } @@ -450,8 +518,8 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s return ts.id, sw.logs(), nil } -// DropSources cleans up source tables, shards and blacklisted tables after a MoveTables/Reshard is completed -func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow string, removalType TableRemovalType, dryRun bool) (*[]string, error) { +// DropTargets cleans up target tables, shards and blacklisted tables after a MoveTables/Reshard is completed +func (wr *Wrangler) DropTargets(ctx context.Context, targetKeyspace, workflow string, keepData, dryRun bool) (*[]string, error) { ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) if err != nil { wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) @@ -464,7 +532,7 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st sw = &switcher{ts: ts, wr: wr} } var tctx context.Context - tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropSources") + tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropTargets") if lockErr != nil { ts.wr.Logger().Errorf("Source LockKeyspace failed: %v", lockErr) return nil, lockErr @@ -472,7 +540,7 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st defer sourceUnlock(&err) ctx = tctx if ts.targetKeyspace != ts.sourceKeyspace { - tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropSources") + tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropTargets") if lockErr != nil { ts.wr.Logger().Errorf("Target LockKeyspace failed: %v", lockErr) return nil, lockErr @@ -480,30 +548,95 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st defer targetUnlock(&err) ctx = tctx } - if err := sw.validateWorkflowHasCompleted(ctx); err != nil { - wr.Logger().Errorf("Workflow has not completed, cannot DropSources: %v", err) + if !keepData { + switch ts.migrationType { + case binlogdatapb.MigrationType_TABLES: + log.Infof("Deleting target tables") + if err := sw.removeTargetTables(ctx); err != nil { + return nil, err + } + if err := sw.dropSourceBlacklistedTables(ctx); err != nil { + return nil, err + } + case binlogdatapb.MigrationType_SHARDS: + log.Infof("Removing target shards") + if err := sw.dropTargetShards(ctx); err != nil { + return nil, err + } + } + } + if err := wr.dropArtifacts(ctx, sw); err != nil { return nil, err } - switch ts.migrationType { - case binlogdatapb.MigrationType_TABLES: - if err := sw.removeSourceTables(ctx, removalType); err != nil { - return nil, err - } - if err := sw.dropSourceBlacklistedTables(ctx); err != nil { - return nil, err + return sw.logs(), nil +} + +func (wr *Wrangler) dropArtifacts(ctx context.Context, sw iswitcher) error { + if err := sw.dropSourceReverseVReplicationStreams(ctx); err != nil { + return err + } + if err := sw.dropTargetVReplicationStreams(ctx); err != nil { + return err + } + return nil +} + +// DropSources cleans up source tables, shards and blacklisted tables after a MoveTables/Reshard is completed +func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow string, removalType TableRemovalType, keepData, force, dryRun bool) (*[]string, error) { + ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) + if err != nil { + wr.Logger().Errorf("buildTrafficSwitcher failed: %v", err) + return nil, err + } + var sw iswitcher + if dryRun { + sw = &switcherDryRun{ts: ts, drLog: NewLogRecorder()} + } else { + sw = &switcher{ts: ts, wr: wr} + } + var tctx context.Context + tctx, sourceUnlock, lockErr := sw.lockKeyspace(ctx, ts.sourceKeyspace, "DropSources") + if lockErr != nil { + ts.wr.Logger().Errorf("Source LockKeyspace failed: %v", lockErr) + return nil, lockErr + } + defer sourceUnlock(&err) + ctx = tctx + if ts.targetKeyspace != ts.sourceKeyspace { + tctx, targetUnlock, lockErr := sw.lockKeyspace(ctx, ts.targetKeyspace, "DropSources") + if lockErr != nil { + ts.wr.Logger().Errorf("Target LockKeyspace failed: %v", lockErr) + return nil, lockErr } - case binlogdatapb.MigrationType_SHARDS: - if err := sw.dropSourceShards(ctx); err != nil { + defer targetUnlock(&err) + ctx = tctx + } + if !force { + if err := sw.validateWorkflowHasCompleted(ctx); err != nil { + wr.Logger().Errorf("Workflow has not completed, cannot DropSources: %v", err) return nil, err } } - if err := sw.dropSourceReverseVReplicationStreams(ctx); err != nil { - return nil, err + if !keepData { + switch ts.migrationType { + case binlogdatapb.MigrationType_TABLES: + log.Infof("Deleting tables") + if err := sw.removeSourceTables(ctx, removalType); err != nil { + return nil, err + } + if err := sw.dropSourceBlacklistedTables(ctx); err != nil { + return nil, err + } + case binlogdatapb.MigrationType_SHARDS: + log.Infof("Removing shards") + if err := sw.dropSourceShards(ctx); err != nil { + return nil, err + } + } } - if err := sw.dropTargetVReplicationStreams(ctx); err != nil { + if err := wr.dropArtifacts(ctx, sw); err != nil { return nil, err } - return sw.logs(), nil } @@ -1437,6 +1570,36 @@ func (ts *trafficSwitcher) dropSourceReverseVReplicationStreams(ctx context.Cont }) } +func (ts *trafficSwitcher) removeTargetTables(ctx context.Context) error { + return ts.forAllTargets(func(target *tsTarget) error { + for _, tableName := range ts.tables { + query := fmt.Sprintf("drop table %s.%s", target.master.DbName(), tableName) + ts.wr.Logger().Infof("Dropping table %s.%s\n", target.master.DbName(), tableName) + _, err := ts.wr.ExecuteFetchAsDba(ctx, target.master.Alias, query, 1, false, true) + if err != nil { + ts.wr.Logger().Errorf("Error removing table %s: %v", tableName, err) + return err + } + ts.wr.Logger().Infof("Removed table %s.%s\n", target.master.DbName(), tableName) + + } + return nil + }) +} + +func (ts *trafficSwitcher) dropTargetShards(ctx context.Context) error { + return ts.forAllTargets(func(target *tsTarget) error { + ts.wr.Logger().Infof("Deleting shard %s.%s\n", target.si.Keyspace(), target.si.ShardName()) + err := ts.wr.DeleteShard(ctx, target.si.Keyspace(), target.si.ShardName(), true, false) + if err != nil { + ts.wr.Logger().Errorf("Error deleting shard %s: %v", target.si.ShardName(), err) + return err + } + ts.wr.Logger().Infof("Deleted shard %s.%s\n", target.si.Keyspace(), target.si.ShardName()) + return nil + }) +} + func (wr *Wrangler) getRoutingRules(ctx context.Context) (map[string][]string, error) { rrs, err := wr.ts.GetRoutingRules(ctx) if err != nil { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index 78510f0a9ab..a46fd338570 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -487,7 +487,7 @@ func TestTableMigrateMainflow(t *testing.T) { // TestShardMigrate tests table mode migrations. // This has to be kept in sync with TestTableMigrate. func TestShardMigrateMainflow(t *testing.T) { - t.Skip() // FIXME: skipping since resharding not fully implemented + //t.Skip("To be fixed before release") //FIXME ctx := context.Background() tme := newTestShardMigrater(ctx, t, []string{"-40", "40-"}, []string{"-80", "80-"}) defer tme.stopTablets(t) @@ -849,7 +849,7 @@ func TestTableMigrateOneToMany(t *testing.T) { tme.dbTargetClients[1].addQuery("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", &sqltypes.Result{}, nil) } dropSourcesInvalid() - _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false) + _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false) require.Error(t, err, "Workflow has not completed, cannot DropSources") tme.dbSourceClients[0].addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) @@ -879,7 +879,7 @@ func TestTableMigrateOneToMany(t *testing.T) { "Unlock keyspace ks2", "Unlock keyspace ks1", } - results, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, true) + results, err := tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunDropSources, *results)) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), []string{"t1", "t2"}) @@ -905,7 +905,7 @@ func TestTableMigrateOneToMany(t *testing.T) { "Unlock keyspace ks2", "Unlock keyspace ks1", } - results, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", RenameTable, true) + results, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", RenameTable, false, false, true) require.NoError(t, err) require.Empty(t, cmp.Diff(wantdryRunRenameSources, *results)) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), []string{"t1", "t2"}) @@ -921,7 +921,7 @@ func TestTableMigrateOneToMany(t *testing.T) { } dropSources() - _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false) + _, err = tme.wr.DropSources(ctx, tme.targetKeyspace, "test", DropTable, false, false, false) require.NoError(t, err) checkBlacklist(t, tme.ts, fmt.Sprintf("%s:%s", "ks1", "0"), nil) @@ -1687,7 +1687,7 @@ func TestMigrateNoTableWildcards(t *testing.T) { ), nil) tme.expectNoPreviousJournals() _, err := tme.wr.SwitchReads(ctx, tme.targetKeyspace, "test", []topodatapb.TabletType{topodatapb.TabletType_RDONLY}, nil, DirectionForward, false) - want := "no rule defined for table /.*" + want := "cannot migrate streams with wild card table names: /.*" if err == nil || !strings.Contains(err.Error(), want) { t.Errorf("SwitchReads: %v, must contain %v", err, want) } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index bd62fdbf753..00fa3d4308b 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -11,166 +11,29 @@ import ( "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/vtgate/evalengine" - "github.com/looplab/fsm" "vitess.io/vitess/go/vt/log" ) /* TODO - * use SwitchTraffic and ReverseTraffic - * Actions: Abort, Complete - * Unit Tests (lots of!) - * expand e2e for testing all possible transitions + * validations for each command + * implement/test Reshard same as MoveTables! + * test cells/tablet_types/etc options to MoveTables - * implement Reshard same as MoveTables! + * Unit Tests (run coverage first and identify) + * expand e2e for testing all possible transitions */ -// Possible workflow states -const ( - WorkflowStateNotStarted = "Not Started" - WorkflowStateStarted = "Replicating, Reads and Writes Not Switched" - WorkflowStateReplicaReadsSwitched = "Replica Reads Switched" - WorkflowStateRdonlyReadsSwitched = "Rdonly Reads Switched" - WorkflowStateReadsSwitched = "Reads Switched" - WorkflowStateWritesSwitched = "Writes Switched" - WorkflowStateReplicaReadsAndWritesSwitched = "Writes and Replica Reads Switched" - WorkflowStateRdonlyReadsAndWritesSwitched = "Writes and Rdonly Reads Switched" - WorkflowStateReadsAndWritesSwitched = "Both Reads and Writes Switched" - WorkflowStateCompleted = "Completed" - WorkflowStateAborted = "Aborted" -) - -// Possible events that cause workflow state transitions -const ( - WorkflowEventStart = "Start" - WorkflowEventSwitchReads = "SwitchReads" - WorkflowEventSwitchReplicaReads = "SwitchReplicaReads" - WorkflowEventSwitchRdonlyReads = "SwitchRdonlyReads" - WorkflowEventSwitchWrites = "SwitchWrites" - WorkflowEventComplete = "Complete" - WorkflowEventAbort = "Abort" - WorkflowEventReverseReads = "ReverseReads" - WorkflowEventReverseWrites = "ReverseWrites" -) - type reshardingWorkflowInfo struct { name string - wsm *fsm.FSM typ string } -var eventNameMap map[string]string - -func init() { - eventNameMap = make(map[string]string) - transitions := getWorkflowTransitions() - for _, transition := range transitions { - eventNameMap[strings.ToLower(transition.Name)] = transition.Name - } -} - -// region FSM setup - -func getWorkflowTransitions() []fsm.EventDesc { - return []fsm.EventDesc{ - {Name: WorkflowEventStart, Src: []string{WorkflowStateNotStarted}, Dst: WorkflowStateStarted}, - - {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateReplicaReadsSwitched}, - {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateReadsSwitched}, - {Name: WorkflowEventSwitchReplicaReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReplicaReadsAndWritesSwitched}, - - {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateRdonlyReadsSwitched}, - {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateReplicaReadsSwitched}, Dst: WorkflowStateReadsSwitched}, - {Name: WorkflowEventSwitchRdonlyReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateRdonlyReadsAndWritesSwitched}, - - {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateReadsSwitched}, - {Name: WorkflowEventSwitchReads, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, - - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateStarted}, Dst: WorkflowStateWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReadsSwitched}, Dst: WorkflowStateReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateReplicaReadsSwitched}, Dst: WorkflowStateReplicaReadsAndWritesSwitched}, - {Name: WorkflowEventSwitchWrites, Src: []string{WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateRdonlyReadsAndWritesSwitched}, - - {Name: WorkflowEventComplete, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateCompleted}, - {Name: WorkflowEventAbort, Src: []string{WorkflowStateNotStarted, - WorkflowStateStarted, WorkflowStateReadsSwitched, WorkflowStateWritesSwitched}, Dst: WorkflowStateAborted}, - - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsSwitched, - WorkflowStateReplicaReadsSwitched, WorkflowStateRdonlyReadsSwitched}, Dst: WorkflowStateStarted}, - {Name: WorkflowEventReverseReads, Src: []string{WorkflowStateReadsAndWritesSwitched, - WorkflowStateReplicaReadsAndWritesSwitched, WorkflowStateRdonlyReadsAndWritesSwitched}, Dst: WorkflowStateWritesSwitched}, - - {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateWritesSwitched}, Dst: WorkflowStateStarted}, - {Name: WorkflowEventReverseWrites, Src: []string{WorkflowStateReadsAndWritesSwitched}, Dst: WorkflowStateReadsSwitched}, - } -} - -func (mtwf *MoveTablesWorkflow) getCallbacks() map[string]fsm.Callback { - callbacks := make(map[string]fsm.Callback) - callbacks["before_"+WorkflowEventStart] = func(e *fsm.Event) { - if err := mtwf.initMoveTables(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventSwitchReads] = func(e *fsm.Event) { - mtwf.params.TabletTypes = "replica,rdonly" - if err := mtwf.switchReads(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventSwitchReplicaReads] = func(e *fsm.Event) { - mtwf.params.TabletTypes = "replica" - if err := mtwf.switchReads(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventSwitchRdonlyReads] = func(e *fsm.Event) { - mtwf.params.TabletTypes = "rdonly" - if err := mtwf.switchReads(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventSwitchWrites] = func(e *fsm.Event) { - if err := mtwf.switchWrites(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventReverseReads] = func(e *fsm.Event) { - var tabletTypes []string - if mtwf.ws.ReplicaReadsSwitched && mtwf.ws.RdonlyReadsSwitched { - tabletTypes = append(tabletTypes, "replica", "rdonly") - } else if mtwf.ws.ReplicaReadsSwitched { - tabletTypes = append(tabletTypes, "replica") - } else if mtwf.ws.RdonlyReadsSwitched { - tabletTypes = append(tabletTypes, "rdonly") - - } else { - e.Cancel(fmt.Errorf("reads have not been switched for %s.%s", mtwf.params.TargetKeyspace, mtwf.params.Workflow)) - return - } - mtwf.params.TabletTypes = strings.Join(tabletTypes, ",") - mtwf.params.Direction = DirectionBackward - if err := mtwf.switchReads(); err != nil { - e.Cancel(err) - } - } - callbacks["before_"+WorkflowEventReverseWrites] = func(e *fsm.Event) { - mtwf.params.Direction = DirectionBackward - if err := mtwf.switchWrites(); err != nil { - e.Cancel(err) - } - } - - return callbacks -} - -func newWorkflow(name, typ string, callbacks map[string]fsm.Callback) (*reshardingWorkflowInfo, error) { +func newWorkflow(name, typ string) (*reshardingWorkflowInfo, error) { wf := &reshardingWorkflowInfo{ name: name, typ: typ, } - - wf.wsm = fsm.NewFSM(WorkflowStateNotStarted, getWorkflowTransitions(), callbacks) return wf, nil } @@ -189,24 +52,18 @@ type MoveTablesWorkflow struct { } func (mtwf *MoveTablesWorkflow) String() string { - s := fmt.Sprintf("%s workflow %s from keyspace %s to keyspace %s.\nCurrent State: %s", - mtwf.wf.typ, mtwf.wf.name, mtwf.params.SourceKeyspace, mtwf.params.TargetKeyspace, mtwf.wf.wsm.Current()) + s := "" return s } -// AvailableActions returns all available actions for a workflow, for display purposes -func (mtwf *MoveTablesWorkflow) AvailableActions() string { - return strings.Join(mtwf.wf.wsm.AvailableTransitions(), ",") -} - // MoveTablesParams stores args and options passed to a MoveTables command type MoveTablesParams struct { Workflow, SourceKeyspace, TargetKeyspace, Tables string Cells, TabletTypes, ExcludeTables string EnableReverseReplication, DryRun, AllTables bool - - Timeout time.Duration - Direction TrafficSwitchDirection + RenameTables, KeepData bool + Timeout time.Duration + Direction TrafficSwitchDirection } // NewMoveTablesWorkflow sets up a MoveTables workflow object based on options provided, deduces the state of the @@ -219,66 +76,83 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable return nil, err } log.Infof("Workflow state is %+v", ws) - wf, err := newWorkflow(params.Workflow, "MoveTables", mtwf.getCallbacks()) + wf, err := newWorkflow(params.Workflow, "MoveTables") if err != nil { return nil, err } if ts != nil { //Other than on Start we need to get SourceKeyspace from the workflow + mtwf.params.TargetKeyspace = ts.targetKeyspace + mtwf.params.Workflow = ts.workflow mtwf.params.SourceKeyspace = ts.sourceKeyspace mtwf.ts = ts } mtwf.ws = ws - state := "" - if ts == nil { - state = WorkflowStateNotStarted - } else if ws.WritesSwitched { - if ws.ReplicaReadsSwitched && ws.RdonlyReadsSwitched { - state = WorkflowStateReadsAndWritesSwitched - } else if ws.RdonlyReadsSwitched { - state = WorkflowStateRdonlyReadsAndWritesSwitched - } else if ws.ReplicaReadsSwitched { - state = WorkflowStateReplicaReadsAndWritesSwitched - } else { - state = WorkflowStateWritesSwitched - } - } else if ws.RdonlyReadsSwitched && ws.ReplicaReadsSwitched { - state = WorkflowStateReadsSwitched - } else if ws.RdonlyReadsSwitched { - state = WorkflowStateRdonlyReadsSwitched - } else if ws.ReplicaReadsSwitched { - state = WorkflowStateReplicaReadsSwitched - } else { - state = WorkflowStateStarted - } - if state == "" { - return nil, fmt.Errorf("workflow is in an inconsistent state: %+v", mtwf) - } - log.Infof("Setting workflow state to %s", state) - wf.wsm.SetState(state) mtwf.wf = wf return mtwf, nil } -// FireEvent causes the transition of the workflow by applying a valid action specified in MoveTables -func (mtwf *MoveTablesWorkflow) FireEvent(ev string) error { - ev = eventNameMap[strings.ToLower(ev)] - return mtwf.wf.wsm.Event(ev) -} - -// IsActionValid checks if a MoveTables subcommand is a valid event for the current state of the workflow -func (mtwf *MoveTablesWorkflow) IsActionValid(ev string) bool { - ev = eventNameMap[strings.ToLower(ev)] - return mtwf.wf.wsm.Can(ev) +// Exists checks if the workflow has already been initiated +func (mtwf *MoveTablesWorkflow) Exists() bool { + return mtwf.ts == nil } // CurrentState returns the current state of the workflow's finite state machine func (mtwf *MoveTablesWorkflow) CurrentState() string { - return mtwf.wf.wsm.Current() + return "" //FIXME +} + +// Start initiates a workflow +func (mtwf *MoveTablesWorkflow) Start() error { + return mtwf.initMoveTables() } -// Visualize returns a graphViz script for the (static) resharding workflow state machine -func (mtwf *MoveTablesWorkflow) Visualize() string { - return fsm.Visualize(mtwf.wf.wsm) +// SwitchTraffic switches traffic forward for tablet_types passed +func (mtwf *MoveTablesWorkflow) SwitchTraffic() error { + mtwf.params.Direction = DirectionForward + + hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() + if err != nil { + return err + } + if hasReplica || hasRdonly { + if err := mtwf.switchReads(); err != nil { + return err + } + } + if hasMaster { + if err := mtwf.switchWrites(); err != nil { + return err + } + } + return nil +} + +// ReverseTraffic switches traffic backwards for tablet_types passed +func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { + mtwf.params.Direction = DirectionBackward + return mtwf.SwitchTraffic() +} + +// Complete cleans up a successful workflow +func (mtwf *MoveTablesWorkflow) Complete() error { + var renameTable TableRemovalType + if mtwf.params.RenameTables { + renameTable = RenameTable + } else { + renameTable = DropTable + } + if _, err := mtwf.wr.DropSources(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, renameTable, mtwf.params.KeepData, false, false); err != nil { + return err + } + return nil +} + +// Abort deletes all artifacts from a workflow which has not yet been switched +func (mtwf *MoveTablesWorkflow) Abort() error { + if _, err := mtwf.wr.DropTargets(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, mtwf.params.KeepData, false); err != nil { + return err + } + return nil } // endregion @@ -302,6 +176,23 @@ func (mtwf *MoveTablesWorkflow) getTabletTypes() []topodatapb.TabletType { return tabletTypes } +func (mtwf *MoveTablesWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasMaster bool, err error) { + tabletTypesArr := strings.Split(mtwf.params.TabletTypes, ",") + for _, tabletType := range tabletTypesArr { + switch tabletType { + case "replica": + hasReplica = true + case "rdonly": + hasRdonly = true + case "master": + hasMaster = true + default: + return false, false, false, fmt.Errorf("invalid tablet type passed %s", tabletType) + } + } + return hasReplica, hasRdonly, hasMaster, nil +} + // endregion // region Core Actions @@ -314,7 +205,14 @@ func (mtwf *MoveTablesWorkflow) initMoveTables() error { func (mtwf *MoveTablesWorkflow) switchReads() error { log.Infof("In MoveTablesWorkflow.switchReads() for %+v", mtwf) - _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.wf.name, mtwf.getTabletTypes(), + var tabletTypes []topodatapb.TabletType + for _, tt := range mtwf.getTabletTypes() { + if tt != topodatapb.TabletType_MASTER { + tabletTypes = append(tabletTypes, tt) + } + } + + _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.wf.name, tabletTypes, mtwf.getCellsAsArray(), mtwf.params.Direction, false) if err != nil { return err diff --git a/test/local_example.sh b/test/local_example.sh index 9fe153240b4..a6938f71923 100755 --- a/test/local_example.sh +++ b/test/local_example.sh @@ -44,7 +44,7 @@ for shard in "customer/0"; do done; ./202_move_tables.sh - +exit sleep 3 # required for now ./203_switch_reads.sh From 583035d51242646cae78ce7d5d080630f5050427 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 18 Dec 2020 12:38:16 +0100 Subject: [PATCH 09/26] Add more validations, implement workflow state Signed-off-by: Rohit Nayak --- go/vt/vtctl/vtctl.go | 66 +++++++++++++++++------------- go/vt/wrangler/traffic_switcher.go | 3 +- go/vt/wrangler/workflow.go | 50 +++++++++++++++++++++- 3 files changed, 88 insertions(+), 31 deletions(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 7759747b7f2..f4324a73088 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1986,6 +1986,41 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl DryRun: *dryRun, } + printDetails := func() error { + s := "" + res, err := wr.ShowWorkflow(ctx, workflow, target) + if err != nil { + return err + } + s += "Following vreplication streams are running for this workflow:\n\n" + for ksShard := range res.ShardStatuses { + statuses := res.ShardStatuses[ksShard].MasterReplicationStatuses + for _, st := range statuses { + //status.State, status.TransactionTimestamp, status.TimeUpdated, status.Tablet, status.ID, status.Message, status.Pos + now := time.Now().Nanosecond() + msg := "" + updateLag := int64(now) - st.TimeUpdated + if updateLag > 0*1e9 { + msg += " Vstream may not be running." + } + txLag := int64(now) - st.TransactionTimestamp + msg += fmt.Sprintf(" VStream Lag: %ds", txLag/1e9) + s += fmt.Sprintf("Stream %s (id=%d) :: Status: %s.%s\n", ksShard, st.ID, st.State, msg) + } + } + wr.Logger().Printf("\n%s\n\n", s) + return nil + } + + wrapError := func(wf *wrangler.MoveTablesWorkflow, err error) error { + wr.Logger().Errorf("\n%s\n", err.Error()) + wr.Logger().Infof("Workflow Status: %s\n", wf.CurrentState()) + if wf.Exists() { + printDetails() + } + return err + } + //TODO: check if invalid parameters were passed in that do not apply to this action //originalAction := action action = strings.ToLower(action) // allow users to input action in a case-insensitive manner @@ -2023,31 +2058,6 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl return fmt.Errorf("workflow %s does not exist", ksWorkflow) } - printDetails := func() error { - s := "" - res, err := wr.ShowWorkflow(ctx, workflow, target) - if err != nil { - return err - } - s += "Following vreplication streams are running for this workflow:\n\n" - for ksShard := range res.ShardStatuses { - statuses := res.ShardStatuses[ksShard].MasterReplicationStatuses - for _, st := range statuses { - //status.State, status.TransactionTimestamp, status.TimeUpdated, status.Tablet, status.ID, status.Message, status.Pos - now := time.Now().Nanosecond() - msg := "" - updateLag := int64(now) - st.TimeUpdated - if updateLag > 0*1e9 { - msg += " Vstream may not be running." - } - txLag := int64(now) - st.TransactionTimestamp - msg += fmt.Sprintf(" VStream Lag: %ds", txLag/1e9) - s += fmt.Sprintf("Stream %s (id=%d) :: Status: %s.%s\n", ksShard, st.ID, st.State, msg) - } - } - wr.Logger().Printf("\n%s\n\n", s) - return nil - } switch action { case "show": return printDetails() @@ -2081,15 +2091,15 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl case "switchtraffic": if err := wf.SwitchTraffic(); err != nil { log.Warningf("SwitchTraffic %s error: %+v", action, wf) - return err + return wrapError(wf, err) } case "reversetraffic": if err := wf.ReverseTraffic(); err != nil { log.Warningf("ReverseTraffic %s error: %+v", action, wf) - return err + return wrapError(wf, err) } } - wr.Logger().Printf("MoveTables %s was successful\n\n%s\n\n", action, wf) + wr.Logger().Printf("MoveTables %s was successful\n\nCurrent State: %s\n\n", action, wf.CurrentState()) return nil } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 51165e1ab65..fa6421ffe06 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -144,8 +144,7 @@ type workflowState struct { RdonlyCellsSwitched []string RdonlyCellsNotSwitched []string - ReplicaReadsSwitched, RdonlyReadsSwitched bool - WritesSwitched bool + WritesSwitched bool } func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKeyspace string, si *topo.ShardInfo, tabletType string) ( diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 00fa3d4308b..cb0e98e7ebc 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -98,16 +98,52 @@ func (mtwf *MoveTablesWorkflow) Exists() bool { // CurrentState returns the current state of the workflow's finite state machine func (mtwf *MoveTablesWorkflow) CurrentState() string { - return "" //FIXME + var stateInfo []string + ws := mtwf.ws + s := "" + if !mtwf.Exists() { + stateInfo = append(stateInfo, "Not Started") + } else { + if len(ws.RdonlyCellsNotSwitched) == 0 && len(ws.ReplicaCellsNotSwitched) == 0 { + s = "All Reads Switched" + } else if len(ws.RdonlyCellsSwitched) == 0 && len(ws.ReplicaCellsSwitched) == 0 { + s = "Reads Not Switched" + } else { + s = "Reads Partially Switched: " + if len(ws.ReplicaCellsNotSwitched) == 0 { + s += "All Replica Reads Switched" + } else { + s += "Replicas switched in cells: " + strings.Join(ws.ReplicaCellsSwitched, ",") + } + if len(ws.RdonlyCellsNotSwitched) == 0 { + s += "All Rdonly Reads Switched" + } else { + s += "Rdonly switched in cells: " + strings.Join(ws.RdonlyCellsSwitched, ",") + } + } + stateInfo = append(stateInfo, s) + if ws.WritesSwitched { + stateInfo = append(stateInfo, "Writes Switched") + } else { + stateInfo = append(stateInfo, "Writes Not Switched") + } + } + return strings.Join(stateInfo, ". ") } // Start initiates a workflow func (mtwf *MoveTablesWorkflow) Start() error { + if mtwf.Exists() { + return fmt.Errorf("workflow has already been started") + } return mtwf.initMoveTables() } // SwitchTraffic switches traffic forward for tablet_types passed func (mtwf *MoveTablesWorkflow) SwitchTraffic() error { + if !mtwf.Exists() { + return fmt.Errorf("workflow has not yet been started") + } mtwf.params.Direction = DirectionForward hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() @@ -129,12 +165,20 @@ func (mtwf *MoveTablesWorkflow) SwitchTraffic() error { // ReverseTraffic switches traffic backwards for tablet_types passed func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { + if !mtwf.Exists() { + return fmt.Errorf("workflow has not yet been started") + } + mtwf.params.Direction = DirectionBackward return mtwf.SwitchTraffic() } // Complete cleans up a successful workflow func (mtwf *MoveTablesWorkflow) Complete() error { + ws := mtwf.ws + if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { + return fmt.Errorf("cannot complete workflow because you have not yet switched all read and write traffic") + } var renameTable TableRemovalType if mtwf.params.RenameTables { renameTable = RenameTable @@ -149,6 +193,10 @@ func (mtwf *MoveTablesWorkflow) Complete() error { // Abort deletes all artifacts from a workflow which has not yet been switched func (mtwf *MoveTablesWorkflow) Abort() error { + ws := mtwf.ws + if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { + return fmt.Errorf("cannot abort workflow because you have already switched some or all read and write traffic") + } if _, err := mtwf.wr.DropTargets(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, mtwf.params.KeepData, false); err != nil { return err } From b990a3fc18b1eaf64c0c9252ba89d76784dda376 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 18 Dec 2020 20:26:38 +0100 Subject: [PATCH 10/26] Add single step switch/reverse traffic test, get existing tests working Signed-off-by: Rohit Nayak --- .../resharding_workflows_v2_test.go | 122 ++++++++++-------- go/vt/vtctl/vtctl.go | 13 +- go/vt/wrangler/workflow.go | 30 +++-- 3 files changed, 99 insertions(+), 66 deletions(-) diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index 25efd2b1b02..1fe88f8c02e 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -35,33 +35,35 @@ const ( reverseKsWorkflow = sourceKs + "." + moveTablesWorkflowName + "_reverse" tablesToMove = "customer" defaultCellName = "zone1" - query = "select * from customer" + readQuery = "select * from customer" ) var ( customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess ) -func moveTablesStart(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowEventStart) +func moveTables2Start(t *testing.T) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowActionStart, "") catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") vdiff(t, ksWorkflow) } -func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action string) { +func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes string) { var args []string args = append(args, "MoveTables", "-v2") - action = strings.ToLower(action) switch action { - case "start": + case wrangler.WorkflowActionStart: args = append(args, "-source", sourceKs, "-tables", tables) - case "switchreads": - case "switchwrites": + case wrangler.WorkflowActionSwitchTraffic: + case wrangler.WorkflowActionReverseTraffic: } if cells != "" { args = append(args, "-cells", cells) } + if tabletTypes != "" { + args = append(args, "-tablet_types", tabletTypes) + } ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) args = append(args, action, ksWorkflow) if err := vc.VtctlClient.ExecuteCommand(args...); err != nil { @@ -69,37 +71,47 @@ func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, acti } } -func moveTablesSwitchReads(t *testing.T, typ string) { - var action string - switch typ { - case "replica": - action = wrangler.WorkflowEventSwitchReplicaReads - case "rdonly": - action = wrangler.WorkflowEventSwitchRdonlyReads - default: - action = wrangler.WorkflowEventSwitchReads - } - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", action) +func moveTablesSwitchReads(t *testing.T, tabletTypes string) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "replica,rdonly") +} + +func moveTablesReverseReads(t *testing.T, tabletTypes string) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "replica,rdonly") } func moveTablesSwitchWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventSwitchWrites) + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "master") } func moveTablesReverseWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventReverseWrites) + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "master") +} + +func moveTablesSwitchReadsAndWrites(t *testing.T) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "replica,rdonly,master") +} + +func moveTablesReverseReadsAndWrites(t *testing.T) { + moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "replica,rdonly,master") } -func moveTablesReverseReads(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowEventReverseReads) +func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.VttabletProcess) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + for _, tt := range []string{"replica", "rdonly"} { + if strings.Contains(tabletTypes, tt) { + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, tablet, "product@"+tt, readQuery, readQuery)) + } + } } -func validateReadsRouteToSource(t *testing.T) { - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productReplicaTab, "product@replica", query, query)) +func validateReadsRouteToSource(t *testing.T, tabletTypes string) { + validateReadsRoute(t, tabletTypes, productReplicaTab) } -func validateReadsRouteToTarget(t *testing.T) { - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerReplicaTab1, "product@replica", query, query)) +func validateReadsRouteToTarget(t *testing.T, tabletTypes string) { + validateReadsRoute(t, tabletTypes, customerReplicaTab1) } func validateWritesRouteToSource(t *testing.T) { @@ -121,7 +133,7 @@ func revert(t *testing.T) { switchWrites(t, reverseKsWorkflow, false) validateWritesRouteToSource(t) switchReadsNew(t, allCellNames, ksWorkflow, true) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") queries := []string{ "delete from _vt.vreplication", "delete from _vt.resharding_journal", @@ -144,46 +156,54 @@ func TestMoveTablesV2Workflow(t *testing.T) { //defer vc.TearDown() setupCustomerKeyspace(t) - moveTablesStart(t) + moveTables2Start(t) printRoutingRules(t, vc, "After MoveTables Started") - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchReads(t, "") printRoutingRules(t, vc, "After SwitchReads") - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchWrites(t) printRoutingRules(t, vc, "After SwitchWrites") - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) - moveTablesReverseReads(t) + moveTablesReverseReads(t, "") printRoutingRules(t, vc, "After ReverseReads") - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseWrites(t) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) printRoutingRules(t, vc, "After ReverseWrites") moveTablesSwitchWrites(t) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseWrites(t) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchReads(t, "") - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) - moveTablesReverseReads(t) - validateReadsRouteToSource(t) + moveTablesReverseReads(t, "") + validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) + + moveTablesSwitchReadsAndWrites(t) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToTarget(t) + moveTablesReverseReadsAndWrites(t) + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + } func setupCluster(t *testing.T) *VitessCluster { @@ -269,9 +289,9 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias var switchReadsFollowedBySwitchWrites = func() { moveTablesAndWait() - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) @@ -286,9 +306,9 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias switchWrites(t, ksWorkflow, false) validateWritesRouteToTarget(t) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") revert(t) } @@ -296,12 +316,12 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias var switchReadsReverseSwitchWritesSwitchReads = func() { moveTablesAndWait() - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, true) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") printRoutingRules(t, vc, "After reversing SwitchReads") validateWritesRouteToSource(t) @@ -309,9 +329,9 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias validateWritesRouteToTarget(t) printRoutingRules(t, vc, "After SwitchWrites and reversing SwitchReads") - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") revert(t) } @@ -326,9 +346,9 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias switchWrites(t, ksWorkflow, true) validateWritesRouteToSource(t) - validateReadsRouteToSource(t) + validateReadsRouteToSource(t, "replica") switchReadsNew(t, allCellNames, ksWorkflow, false) - validateReadsRouteToTarget(t) + validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) switchWrites(t, ksWorkflow, false) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index f4324a73088..aa40efa955d 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -2014,6 +2014,7 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl wrapError := func(wf *wrangler.MoveTablesWorkflow, err error) error { wr.Logger().Errorf("\n%s\n", err.Error()) + log.Infof("In wrapError wf is %+v", wf) wr.Logger().Infof("Workflow Status: %s\n", wf.CurrentState()) if wf.Exists() { printDetails() @@ -2088,14 +2089,20 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl wr.Logger().Printf("\n%s\n", s) } return printDetails() + case "start": + if err := wf.Start(); err != nil { + log.Warningf("Start %s error: %v", action, wf) + + return wrapError(wf, err) + } case "switchtraffic": - if err := wf.SwitchTraffic(); err != nil { - log.Warningf("SwitchTraffic %s error: %+v", action, wf) + if err := wf.SwitchTraffic(wrangler.DirectionForward); err != nil { + log.Warningf("SwitchTraffic %s error: %v", action, wf) return wrapError(wf, err) } case "reversetraffic": if err := wf.ReverseTraffic(); err != nil { - log.Warningf("ReverseTraffic %s error: %+v", action, wf) + log.Warningf("ReverseTraffic %s error: %v", action, wf) return wrapError(wf, err) } } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index cb0e98e7ebc..2db6aa7f087 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -16,15 +16,21 @@ import ( /* TODO - * validations for each command - * implement/test Reshard same as MoveTables! - * test cells/tablet_types/etc options to MoveTables + * expand e2e for testing all possible transitions * Unit Tests (run coverage first and identify) - * expand e2e for testing all possible transitions + * implement/test Reshard same as MoveTables! */ +const ( + WorkflowActionStart = "Start" + WorkflowActionSwitchTraffic = "SwitchTraffic" + WorkflowActionReverseTraffic = "ReverseTraffic" + WorkflowActionComplete = "Complete" + WorkflowActionAbort = "Abort" +) + type reshardingWorkflowInfo struct { name string typ string @@ -84,8 +90,8 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable mtwf.params.TargetKeyspace = ts.targetKeyspace mtwf.params.Workflow = ts.workflow mtwf.params.SourceKeyspace = ts.sourceKeyspace - mtwf.ts = ts } + mtwf.ts = ts mtwf.ws = ws mtwf.wf = wf return mtwf, nil @@ -93,11 +99,14 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable // Exists checks if the workflow has already been initiated func (mtwf *MoveTablesWorkflow) Exists() bool { - return mtwf.ts == nil + log.Infof("mtwf %v", mtwf) + + return mtwf.ws != nil } // CurrentState returns the current state of the workflow's finite state machine func (mtwf *MoveTablesWorkflow) CurrentState() string { + log.Infof("mtwf %v", mtwf) var stateInfo []string ws := mtwf.ws s := "" @@ -140,12 +149,11 @@ func (mtwf *MoveTablesWorkflow) Start() error { } // SwitchTraffic switches traffic forward for tablet_types passed -func (mtwf *MoveTablesWorkflow) SwitchTraffic() error { +func (mtwf *MoveTablesWorkflow) SwitchTraffic(direction TrafficSwitchDirection) error { if !mtwf.Exists() { return fmt.Errorf("workflow has not yet been started") } - mtwf.params.Direction = DirectionForward - + mtwf.params.Direction = direction hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() if err != nil { return err @@ -168,9 +176,7 @@ func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { if !mtwf.Exists() { return fmt.Errorf("workflow has not yet been started") } - - mtwf.params.Direction = DirectionBackward - return mtwf.SwitchTraffic() + return mtwf.SwitchTraffic(DirectionBackward) } // Complete cleans up a successful workflow From b64025d9204beb9e709a6e08fe0abd8e26d552e9 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 19 Dec 2020 14:50:34 +0100 Subject: [PATCH 11/26] Fix more tests Signed-off-by: Rohit Nayak --- .../resharding_workflows_v2_test.go | 3 +- .../tabletmanager/vreplication/controller.go | 2 +- go/vt/wrangler/traffic_switcher.go | 39 +--------- go/vt/wrangler/traffic_switcher_test.go | 8 -- go/vt/wrangler/vdiff.go | 2 +- go/vt/wrangler/workflow.go | 16 +++- go/vt/wrangler/workflow_test.go | 76 +++++++++++++++++++ 7 files changed, 96 insertions(+), 50 deletions(-) create mode 100644 go/vt/wrangler/workflow_test.go diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index 1fe88f8c02e..ee5ca41e753 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -153,7 +153,7 @@ func revert(t *testing.T) { func TestMoveTablesV2Workflow(t *testing.T) { vc = setupCluster(t) defer vtgateConn.Close() - //defer vc.TearDown() + defer vc.TearDown() setupCustomerKeyspace(t) moveTables2Start(t) @@ -259,6 +259,7 @@ func setupCustomerKeyspace(t *testing.T) { func TestSwitchReadsWritesInAnyOrder(t *testing.T) { vc = setupCluster(t) + defer vc.TearDown() moveCustomerTableSwitchFlows(t, []*Cell{vc.Cells["zone1"]}, "zone1") } diff --git a/go/vt/vttablet/tabletmanager/vreplication/controller.go b/go/vt/vttablet/tabletmanager/vreplication/controller.go index dbc8f6c6170..e92c3304afc 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/controller.go +++ b/go/vt/vttablet/tabletmanager/vreplication/controller.go @@ -155,7 +155,7 @@ func (ct *controller) run(ctx context.Context) { timer := time.NewTimer(*retryDelay) select { case <-ctx.Done(): - log.Warningf("context canceleld: %s", err.Error()) + log.Warningf("context canceled: %s", err.Error()) timer.Stop() return case <-timer.C: diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index fa6421ffe06..7988b8d6d91 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -328,11 +328,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st sw = &switcher{ts: ts, wr: wr} } - // FIXME: revisit marking streams frozen - //if ts.frozen { - //return nil, fmt.Errorf("cannot switch reads while SwitchWrites is in progress") - //} - if err := ts.validate(ctx, false /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) return nil, err } @@ -391,7 +387,7 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s } ts.wr.Logger().Infof("Built switching metadata: %+v", ts) - if err := ts.validate(ctx, true /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate failed: %v", err) return 0, nil, err } @@ -820,7 +816,7 @@ func hashStreams(targetKeyspace string, targets map[string]*tsTarget) int64 { return int64(hasher.Sum64() & math.MaxInt64) } -func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { +func (ts *trafficSwitcher) validate(ctx context.Context) error { if ts.migrationType == binlogdatapb.MigrationType_TABLES { // All shards must be present. if err := ts.compareShards(ctx, ts.sourceKeyspace, ts.sourceShards()); err != nil { @@ -835,13 +831,6 @@ func (ts *trafficSwitcher) validate(ctx context.Context, isWrite bool) error { return fmt.Errorf("cannot migrate streams with wild card table names: %v", table) } } - if isWrite { - return ts.validateTableForWrite(ctx) - } - } else { // binlogdatapb.MigrationType_SHARDS - if isWrite { - return ts.validateShardForWrite(ctx) - } } return nil } @@ -1294,23 +1283,7 @@ func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error { if err != nil { return err } - // We assume that the following rules were setup when the targets were created: - // table -> sourceKeyspace.table - // targetKeyspace.table -> sourceKeyspace.table - // Additionally, SwitchReads would have added rules like this: - // table@replica -> targetKeyspace.table - // targetKeyspace.table@replica -> targetKeyspace.table - // After this step, only the following rules will be left: - // table -> targetKeyspace.table - // sourceKeyspace.table -> targetKeyspace.table for _, table := range ts.tables { - //for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { - // tt := strings.ToLower(tabletType.String()) - // delete(rules, table+"@"+tt) - // delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - // delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) - // ts.wr.Logger().Infof("Delete routing: %v %v %v", table+"@"+tt, ts.targetKeyspace+"."+table+"@"+tt, ts.sourceKeyspace+"."+table+"@"+tt) - //} delete(rules, ts.targetKeyspace+"."+table) ts.wr.Logger().Infof("Delete routing: %v", ts.targetKeyspace+"."+table) rules[table] = []string{ts.targetKeyspace + "." + table} @@ -1320,11 +1293,7 @@ func (ts *trafficSwitcher) changeWriteRoute(ctx context.Context) error { if err := ts.wr.saveRoutingRules(ctx, rules); err != nil { return err } - var cells []string - if len(ts.optCells) > 0 { - cells = strings.Split(ts.optCells, ",") - } - return ts.wr.ts.RebuildSrvVSchema(ctx, cells) + return ts.wr.ts.RebuildSrvVSchema(ctx, nil) } func (ts *trafficSwitcher) changeShardRouting(ctx context.Context) error { diff --git a/go/vt/wrangler/traffic_switcher_test.go b/go/vt/wrangler/traffic_switcher_test.go index a46fd338570..043d08230b6 100644 --- a/go/vt/wrangler/traffic_switcher_test.go +++ b/go/vt/wrangler/traffic_switcher_test.go @@ -600,14 +600,6 @@ func TestShardMigrateMainflow(t *testing.T) { } verifyQueries(t, tme.allDBClients) - //------------------------------------------------------------------------------------------------------------------- - // Can't switch writes if REPLICA and RDONLY have not fully switched yet. - _, _, err = tme.wr.SwitchWrites(ctx, tme.targetKeyspace, "test", 1*time.Second, false, false, true, false) - want = "cannot switch MASTER away" - if err == nil || !strings.Contains(err.Error(), want) { - t.Errorf("SwitchWrites err: %v, want %v", err, want) - } - verifyQueries(t, tme.allDBClients) //------------------------------------------------------------------------------------------------------------------- // Test SwitchWrites cancelation on failure. diff --git a/go/vt/wrangler/vdiff.go b/go/vt/wrangler/vdiff.go index b232dc25a8e..4e72ab30bb8 100644 --- a/go/vt/wrangler/vdiff.go +++ b/go/vt/wrangler/vdiff.go @@ -146,7 +146,7 @@ func (wr *Wrangler) VDiff(ctx context.Context, targetKeyspace, workflow, sourceC wr.Logger().Errorf("buildTrafficSwitcher: %v", err) return nil, err } - if err := ts.validate(ctx, false /* isWrite */); err != nil { + if err := ts.validate(ctx); err != nil { ts.wr.Logger().Errorf("validate: %v", err) return nil, err } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 2db6aa7f087..c97c67fb4f4 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -17,8 +17,11 @@ import ( /* TODO * expand e2e for testing all possible transitions + (Complete/Abort Switch/Reverse Replica/Rdonly) * Unit Tests (run coverage first and identify) + (CurrentState()) + * dry run * implement/test Reshard same as MoveTables! */ @@ -58,7 +61,7 @@ type MoveTablesWorkflow struct { } func (mtwf *MoveTablesWorkflow) String() string { - s := "" + s := "" //FIXME return s } @@ -99,7 +102,7 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable // Exists checks if the workflow has already been initiated func (mtwf *MoveTablesWorkflow) Exists() bool { - log.Infof("mtwf %v", mtwf) + log.Infof("mtwf %+v", *mtwf) return mtwf.ws != nil } @@ -179,11 +182,16 @@ func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { return mtwf.SwitchTraffic(DirectionBackward) } +const ( + ErrWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" + ErrWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" +) + // Complete cleans up a successful workflow func (mtwf *MoveTablesWorkflow) Complete() error { ws := mtwf.ws if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { - return fmt.Errorf("cannot complete workflow because you have not yet switched all read and write traffic") + return fmt.Errorf(ErrWorkflowNotFullySwitched) } var renameTable TableRemovalType if mtwf.params.RenameTables { @@ -201,7 +209,7 @@ func (mtwf *MoveTablesWorkflow) Complete() error { func (mtwf *MoveTablesWorkflow) Abort() error { ws := mtwf.ws if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { - return fmt.Errorf("cannot abort workflow because you have already switched some or all read and write traffic") + return fmt.Errorf(ErrWorkflowPartiallySwitched) } if _, err := mtwf.wr.DropTargets(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, mtwf.params.KeepData, false); err != nil { return err diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go new file mode 100644 index 00000000000..0eac7f0165d --- /dev/null +++ b/go/vt/wrangler/workflow_test.go @@ -0,0 +1,76 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package wrangler + +import ( + "testing" + + "github.com/stretchr/testify/require" + "golang.org/x/net/context" + "vitess.io/vitess/go/vt/proto/topodata" +) + +func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *MoveTablesWorkflow { + mtp := &MoveTablesParams{ + Workflow: "wf1", + SourceKeyspace: "sourceks", + TargetKeyspace: "targetks", + Tables: "customer,corder", + Cells: cells, + TabletTypes: tabletTypes, + } + wf, _ := newWorkflow("wf1", "MoveTables") + mtwf := &MoveTablesWorkflow{ + ctx: context.Background(), + wf: wf, + wr: nil, + params: mtp, + ts: nil, + ws: nil, + } + return mtwf +} + +func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { + mtwf := getMoveTablesWorkflow(t, "cell1,cell2", "replica,rdonly") + require.False(t, mtwf.Exists()) + mtwf.ws = &workflowState{} + require.True(t, mtwf.Exists()) + require.Errorf(t, mtwf.Complete(), ErrWorkflowNotFullySwitched) + mtwf.ws.WritesSwitched = true + require.Errorf(t, mtwf.Abort(), ErrWorkflowPartiallySwitched) + + require.ElementsMatch(t, mtwf.getCellsAsArray(), []string{"cell1", "cell2"}) + require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY}) + hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() + require.NoError(t, err) + require.True(t, hasReplica) + require.True(t, hasRdonly) + require.False(t, hasMaster) + + mtwf.params.TabletTypes = "replica,rdonly,master" + require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY, topodata.TabletType_MASTER}) + + hasReplica, hasRdonly, hasMaster, err = mtwf.parseTabletTypes() + require.NoError(t, err) + require.True(t, hasReplica) + require.True(t, hasRdonly) + require.True(t, hasMaster) +} + +func TestReshardingWorkflowCurrentState(t *testing.T) { +} From 820e6e94d4bf0a60e9981dcf0b1edd80d757b559 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 19 Dec 2020 22:14:59 +0100 Subject: [PATCH 12/26] Add tests for Complete/Abort, fix check for switched writes Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/config.go | 22 ++++- .../resharding_workflows_v2_test.go | 99 ++++++++++++++----- .../vreplication/unsharded_init_data.sql | 5 +- go/vt/vtctl/vtctl.go | 32 +++--- go/vt/wrangler/traffic_switcher.go | 68 +++---------- go/vt/wrangler/vexec.go | 2 +- go/vt/wrangler/workflow.go | 40 ++++---- go/vt/wrangler/workflow_test.go | 4 +- 8 files changed, 158 insertions(+), 114 deletions(-) diff --git a/go/test/endtoend/vreplication/config.go b/go/test/endtoend/vreplication/config.go index ee5dbc349d2..d937b7a4948 100644 --- a/go/test/endtoend/vreplication/config.go +++ b/go/test/endtoend/vreplication/config.go @@ -4,22 +4,28 @@ var ( initialProductSchema = ` create table product(pid int, description varbinary(128), primary key(pid)); create table customer(cid int, name varbinary(128), typ enum('individual','soho','enterprise'), sport set('football','cricket','baseball'),ts timestamp not null default current_timestamp, primary key(cid)); +create table customer_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; create table merchant(mname varchar(128), category varchar(128), primary key(mname)) DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; create table orders(oid int, cid int, pid int, mname varchar(128), price int, primary key(oid)); -create table customer_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; create table order_seq(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; +create table customer2(cid int, name varbinary(128), typ enum('individual','soho','enterprise'), sport set('football','cricket','baseball'),ts timestamp not null default current_timestamp, primary key(cid)); +create table customer_seq2(id int, next_id bigint, cache bigint, primary key(id)) comment 'vitess_sequence'; ` initialProductVSchema = ` { "tables": { "product": {}, - "customer": {}, "merchant": {}, "orders": {}, + "customer": {}, "customer_seq": { "type": "sequence" }, + "customer2": {}, + "customer_seq2": { + "type": "sequence" + }, "order_seq": { "type": "sequence" } @@ -47,6 +53,18 @@ create table order_seq(id int, next_id bigint, cache bigint, primary key(id)) co "column": "cid", "sequence": "customer_seq" } + }, + "customer2": { + "column_vindexes": [ + { + "column": "cid", + "name": "reverse_bits" + } + ], + "auto_increment": { + "column": "cid", + "sequence": "customer_seq2" + } } } diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index ee5ca41e753..966ec3975b1 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -21,10 +21,8 @@ import ( "strings" "testing" - "vitess.io/vitess/go/test/endtoend/cluster" - "vitess.io/vitess/go/vt/wrangler" - "github.com/stretchr/testify/require" + "vitess.io/vitess/go/test/endtoend/cluster" ) const ( @@ -38,25 +36,40 @@ const ( readQuery = "select * from customer" ) +const ( + workflowActionStart = "Start" + workflowActionSwitchTraffic = "SwitchTraffic" + workflowActionReverseTraffic = "ReverseTraffic" + workflowActionComplete = "Complete" + workflowActionAbort = "Abort" +) + var ( customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess ) -func moveTables2Start(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, wrangler.WorkflowActionStart, "") +func moveTables2Start(t *testing.T, tables string) error { + if tables == "" { + tables = tablesToMove + } + err := moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tables, workflowActionStart, "") + require.NoError(t, err) catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") vdiff(t, ksWorkflow) + return nil } -func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes string) { +func moveTables2Action(t *testing.T, action, tabletTypes, cells string) error { + return moveTables2(t, cells, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, action, tabletTypes) +} + +func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes string) error { var args []string args = append(args, "MoveTables", "-v2") switch action { - case wrangler.WorkflowActionStart: + case workflowActionStart: args = append(args, "-source", sourceKs, "-tables", tables) - case wrangler.WorkflowActionSwitchTraffic: - case wrangler.WorkflowActionReverseTraffic: } if cells != "" { args = append(args, "-cells", cells) @@ -66,33 +79,43 @@ func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, acti } ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) args = append(args, action, ksWorkflow) - if err := vc.VtctlClient.ExecuteCommand(args...); err != nil { - t.Fatalf("MoveTables command failed with %+v\n", err) + if output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...); err != nil { + t.Logf("MoveTables command failed with %+v\n", err) + return fmt.Errorf("%s: %s", err, output) } + return nil } func moveTablesSwitchReads(t *testing.T, tabletTypes string) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "replica,rdonly") + require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "replica,rdonly", "")) } func moveTablesReverseReads(t *testing.T, tabletTypes string) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "replica,rdonly") + require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "replica,rdonly", "")) } func moveTablesSwitchWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "master") + require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "master", "")) } func moveTablesReverseWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "master") + require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "master", "")) } func moveTablesSwitchReadsAndWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionSwitchTraffic, "replica,rdonly,master") + require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "replica,rdonly,master", "")) } func moveTablesReverseReadsAndWrites(t *testing.T) { - moveTables2(t, defaultCellName, moveTablesWorkflowName, "", targetKs, "", wrangler.WorkflowActionReverseTraffic, "replica,rdonly,master") + require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "replica,rdonly,master", "")) +} + +func moveTablesComplete(t *testing.T) error { + return moveTables2Action(t, workflowActionComplete, "", "") +} + +func moveTablesAbort(t *testing.T) error { + return moveTables2Action(t, workflowActionAbort, "", "") } func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.VttabletProcess) { @@ -153,33 +176,33 @@ func revert(t *testing.T) { func TestMoveTablesV2Workflow(t *testing.T) { vc = setupCluster(t) defer vtgateConn.Close() - defer vc.TearDown() + //defer vc.TearDown() setupCustomerKeyspace(t) - moveTables2Start(t) - printRoutingRules(t, vc, "After MoveTables Started") + moveTables2Start(t, "customer") + //printRoutingRules(t, vc, "After MoveTables Started") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchReads(t, "") - printRoutingRules(t, vc, "After SwitchReads") + //printRoutingRules(t, vc, "After SwitchReads") validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchWrites(t) - printRoutingRules(t, vc, "After SwitchWrites") + //printRoutingRules(t, vc, "After SwitchWrites") validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseReads(t, "") - printRoutingRules(t, vc, "After ReverseReads") + //printRoutingRules(t, vc, "After ReverseReads") validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseWrites(t) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - printRoutingRules(t, vc, "After ReverseWrites") + //printRoutingRules(t, vc, "After ReverseWrites") moveTablesSwitchWrites(t) validateReadsRouteToSource(t, "replica") @@ -204,6 +227,33 @@ func TestMoveTablesV2Workflow(t *testing.T) { validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) + var err error + var output string + + err = moveTablesComplete(t) + require.Error(t, err) + require.Contains(t, err.Error(), "cannot complete workflow because you have not yet switched all read and write traffic") + + moveTablesSwitchReadsAndWrites(t) + validateReadsRouteToTarget(t, "replica") + validateWritesRouteToTarget(t) + + err = moveTablesComplete(t) + require.NoError(t, err) + + listAllArgs := []string{"workflow", "customer", "listall"} + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") + + moveTables2Start(t, "customer2") + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "Following workflow(s) found in keyspace customer: p2c") + + err = moveTablesAbort(t) + require.NoError(t, err) + + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") } func setupCluster(t *testing.T) *VitessCluster { @@ -356,6 +406,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias validateWritesRouteToTarget(t) revert(t) + } _ = switchReadsFollowedBySwitchWrites _ = switchWritesFollowedBySwitchReads diff --git a/go/test/endtoend/vreplication/unsharded_init_data.sql b/go/test/endtoend/vreplication/unsharded_init_data.sql index 9a13e5bb22f..f8e0cc5d86f 100644 --- a/go/test/endtoend/vreplication/unsharded_init_data.sql +++ b/go/test/endtoend/vreplication/unsharded_init_data.sql @@ -7,4 +7,7 @@ insert into product(pid, description) values(1, 'keyboard'); insert into product(pid, description) values(2, 'monitor'); insert into orders(oid, cid, mname, pid, price) values(1, 1, 'monoprice', 1, 10); insert into orders(oid, cid, mname, pid, price) values(2, 1, 'newegg', 2, 15); -insert into orders(oid, cid, mname, pid, price) values(3, 2, 'monoprice', 2, 20); \ No newline at end of file +insert into orders(oid, cid, mname, pid, price) values(3, 2, 'monoprice', 2, 20); +insert into customer2(cid, name, typ, sport) values(1, 'john',1,'football,baseball'); +insert into customer2(cid, name, typ, sport) values(2, 'paul','soho','cricket'); +insert into customer2(cid, name, typ, sport) values(3, 'ringo','enterprise',''); diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index aa40efa955d..36870433a70 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -2023,7 +2023,7 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } //TODO: check if invalid parameters were passed in that do not apply to this action - //originalAction := action + originalAction := action action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { case "start": @@ -2059,6 +2059,8 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl return fmt.Errorf("workflow %s does not exist", ksWorkflow) } + startState := wf.CachedState() + wr.Logger().Printf("\nCachedState: %s\n", startState) switch action { case "show": return printDetails() @@ -2090,23 +2092,23 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl } return printDetails() case "start": - if err := wf.Start(); err != nil { - log.Warningf("Start %s error: %v", action, wf) - - return wrapError(wf, err) - } + err = wf.Start() case "switchtraffic": - if err := wf.SwitchTraffic(wrangler.DirectionForward); err != nil { - log.Warningf("SwitchTraffic %s error: %v", action, wf) - return wrapError(wf, err) - } + err = wf.SwitchTraffic(wrangler.DirectionForward) case "reversetraffic": - if err := wf.ReverseTraffic(); err != nil { - log.Warningf("ReverseTraffic %s error: %v", action, wf) - return wrapError(wf, err) - } + err = wf.ReverseTraffic() + case "complete": + err = wf.Complete() + case "abort": + err = wf.Abort() + default: + return fmt.Errorf("found unsupported action %s", originalAction) + } + if err != nil { + log.Warningf(" %s error: %v", originalAction, wf) + return wrapError(wf, err) } - wr.Logger().Printf("MoveTables %s was successful\n\nCurrent State: %s\n\n", action, wf.CurrentState()) + wr.Logger().Printf("MoveTables %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", action, startState, wf.CurrentState()) return nil } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 7988b8d6d91..d9bf926400e 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -230,7 +230,7 @@ func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKe func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, *workflowState, error) { ts, err := wr.buildTrafficSwitcher(ctx, targetKeyspace, workflow) - if err != nil { + if ts == nil || err != nil { if err.Error() == fmt.Sprintf(errorNoStreams, targetKeyspace, workflow) { return nil, nil, nil } @@ -240,9 +240,6 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} ws.SourceKeyspace = ts.sourceKeyspace - if ts.frozen { - ws.WritesSwitched = true - } var cellsSwitched, cellsNotSwitched []string if ts.migrationType == binlogdatapb.MigrationType_TABLES { ws.WorkflowType = workflowTypeMoveTables @@ -264,6 +261,18 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl return nil, nil, err } ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched + + rules, err := ts.wr.getRoutingRules(ctx) + if err != nil { + return nil, nil, err + } + for _, table := range ts.tables { + rr := rules[table] + // if a rule exists for the table and points to the target keyspace, writes have been switched + if len(rr) > 0 && rr[0] == fmt.Sprintf("%s.%s", ts.targetKeyspace, table) { + ws.WritesSwitched = true + } + } } else { ws.WorkflowType = workflowTypeReshard @@ -279,6 +288,9 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl return nil, nil, err } ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched + if ts.targetShards()[0].IsMasterServing { + ws.WritesSwitched = true + } } return ts, ws, nil @@ -835,54 +847,6 @@ func (ts *trafficSwitcher) validate(ctx context.Context) error { return nil } -func (ts *trafficSwitcher) validateTableForWrite(ctx context.Context) error { - rules, err := ts.wr.getRoutingRules(ctx) - if err != nil { - return err - } - for _, table := range ts.tables { - for _, tabletType := range []topodatapb.TabletType{topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} { - tt := strings.ToLower(tabletType.String()) - if rules[table+"@"+tt] == nil || rules[ts.targetKeyspace+"."+table+"@"+tt] == nil { - return fmt.Errorf("missing tablet type specific routing, read-only traffic must be switched before switching writes: %v", table) - } - } - } - return nil -} - -func (ts *trafficSwitcher) validateShardForWrite(ctx context.Context) error { - srvKeyspaces, err := ts.wr.ts.GetSrvKeyspaceAllCells(ctx, ts.sourceKeyspace) - if err != nil { - return err - } - - // Checking one shard is enough. - var si *topo.ShardInfo - for _, source := range ts.sources { - si = source.si - break - } - - for _, srvKeyspace := range srvKeyspaces { - var shardServedTypes []string - for _, partition := range srvKeyspace.GetPartitions() { - if partition.GetServedType() == topodatapb.TabletType_MASTER { - continue - } - for _, shardReference := range partition.GetShardReferences() { - if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { - shardServedTypes = append(shardServedTypes, partition.GetServedType().String()) - } - } - } - if len(shardServedTypes) > 0 { - return fmt.Errorf("cannot switch MASTER away from %v/%v until everything else is switched. Make sure that the following types are switched first: %v", si.Keyspace(), si.ShardName(), strings.Join(shardServedTypes, ", ")) - } - } - return nil -} - func (ts *trafficSwitcher) compareShards(ctx context.Context, keyspace string, sis []*topo.ShardInfo) error { var shards []string for _, si := range sis { diff --git a/go/vt/wrangler/vexec.go b/go/vt/wrangler/vexec.go index eeaa9e3a822..5c7c9eb1d21 100644 --- a/go/vt/wrangler/vexec.go +++ b/go/vt/wrangler/vexec.go @@ -575,7 +575,7 @@ func dumpStreamListAsJSON(replStatus *ReplicationStatusResult, wr *Wrangler) err func (wr *Wrangler) printWorkflowList(keyspace string, workflows []string) { list := strings.Join(workflows, ", ") if list == "" { - wr.Logger().Printf("No workflows found in keyspace %s", keyspace) + wr.Logger().Printf("No workflows found in keyspace %s\n", keyspace) return } wr.Logger().Printf("Following workflow(s) found in keyspace %s: %v\n", keyspace, list) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index c97c67fb4f4..33196b4fd29 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -26,14 +26,6 @@ import ( * implement/test Reshard same as MoveTables! */ -const ( - WorkflowActionStart = "Start" - WorkflowActionSwitchTraffic = "SwitchTraffic" - WorkflowActionReverseTraffic = "ReverseTraffic" - WorkflowActionComplete = "Complete" - WorkflowActionAbort = "Abort" -) - type reshardingWorkflowInfo struct { name string typ string @@ -100,6 +92,23 @@ func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTable return mtwf, nil } +// CurrentState reloads and returns a human readable workflow state +func (mtwf *MoveTablesWorkflow) CurrentState() string { + _, ws, err := mtwf.wr.getWorkflowState(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow) + if err != nil { + return err.Error() + } + if ws == nil { + return "Workflow Not Found" + } + return mtwf.stateAsString(ws) +} + +// CachedState returns a human readable workflow state +func (mtwf *MoveTablesWorkflow) CachedState() string { + return mtwf.stateAsString(mtwf.ws) +} + // Exists checks if the workflow has already been initiated func (mtwf *MoveTablesWorkflow) Exists() bool { log.Infof("mtwf %+v", *mtwf) @@ -107,16 +116,13 @@ func (mtwf *MoveTablesWorkflow) Exists() bool { return mtwf.ws != nil } -// CurrentState returns the current state of the workflow's finite state machine -func (mtwf *MoveTablesWorkflow) CurrentState() string { - log.Infof("mtwf %v", mtwf) +func (mtwf *MoveTablesWorkflow) stateAsString(ws *workflowState) string { var stateInfo []string - ws := mtwf.ws s := "" if !mtwf.Exists() { stateInfo = append(stateInfo, "Not Started") } else { - if len(ws.RdonlyCellsNotSwitched) == 0 && len(ws.ReplicaCellsNotSwitched) == 0 { + if len(ws.RdonlyCellsNotSwitched) == 0 && len(ws.ReplicaCellsNotSwitched) == 0 && len(ws.ReplicaCellsSwitched) > 0 { s = "All Reads Switched" } else if len(ws.RdonlyCellsSwitched) == 0 && len(ws.ReplicaCellsSwitched) == 0 { s = "Reads Not Switched" @@ -183,15 +189,15 @@ func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { } const ( - ErrWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" - ErrWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" + errWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" + errWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" ) // Complete cleans up a successful workflow func (mtwf *MoveTablesWorkflow) Complete() error { ws := mtwf.ws if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { - return fmt.Errorf(ErrWorkflowNotFullySwitched) + return fmt.Errorf(errWorkflowNotFullySwitched) } var renameTable TableRemovalType if mtwf.params.RenameTables { @@ -209,7 +215,7 @@ func (mtwf *MoveTablesWorkflow) Complete() error { func (mtwf *MoveTablesWorkflow) Abort() error { ws := mtwf.ws if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { - return fmt.Errorf(ErrWorkflowPartiallySwitched) + return fmt.Errorf(errWorkflowPartiallySwitched) } if _, err := mtwf.wr.DropTargets(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, mtwf.params.KeepData, false); err != nil { return err diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index 0eac7f0165d..c9491ef059a 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -50,9 +50,9 @@ func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { require.False(t, mtwf.Exists()) mtwf.ws = &workflowState{} require.True(t, mtwf.Exists()) - require.Errorf(t, mtwf.Complete(), ErrWorkflowNotFullySwitched) + require.Errorf(t, mtwf.Complete(), errWorkflowNotFullySwitched) mtwf.ws.WritesSwitched = true - require.Errorf(t, mtwf.Abort(), ErrWorkflowPartiallySwitched) + require.Errorf(t, mtwf.Abort(), errWorkflowPartiallySwitched) require.ElementsMatch(t, mtwf.getCellsAsArray(), []string{"cell1", "cell2"}) require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY}) From c2883341ea13d6a101c05b9cc8abb9d37e3240d9 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Dec 2020 00:14:17 +0100 Subject: [PATCH 13/26] Fix state computation for reverse writes, add tests for states pre/post actions Signed-off-by: Rohit Nayak --- .../resharding_workflows_v2_test.go | 27 ++++++++++++---- go/vt/wrangler/traffic_switcher.go | 32 +++++++++++++------ go/vt/wrangler/workflow.go | 6 ++-- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index 966ec3975b1..cb85bd267ea 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -46,6 +46,7 @@ const ( var ( customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess + lastOutput string ) func moveTables2Start(t *testing.T, tables string) error { @@ -79,10 +80,13 @@ func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, acti } ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) args = append(args, action, ksWorkflow) - if output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...); err != nil { + output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...) + lastOutput = output + if err != nil { t.Logf("MoveTables command failed with %+v\n", err) return fmt.Errorf("%s: %s", err, output) } + fmt.Printf("----------\n%+v\n%s\n----------\n", args, output) return nil } @@ -173,38 +177,47 @@ func revert(t *testing.T) { clearRoutingRules(t, vc) } +func checkStates(t *testing.T, startState, endState string) { + require.Contains(t, lastOutput, fmt.Sprintf("Start State: %s", startState)) + require.Contains(t, lastOutput, fmt.Sprintf("Current State: %s", endState)) +} + +// ideally this should be broken up into multiple tests for full flow, replica/rdonly flow, reverse flows etc +// but CI currently fails on creating multiple clusters even after the previous ones are torn down func TestMoveTablesV2Workflow(t *testing.T) { vc = setupCluster(t) defer vtgateConn.Close() //defer vc.TearDown() + // test basic forward and reverse flows setupCustomerKeyspace(t) moveTables2Start(t, "customer") - //printRoutingRules(t, vc, "After MoveTables Started") + checkStates(t, "Not Started", "Not Started") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchReads(t, "") - //printRoutingRules(t, vc, "After SwitchReads") + checkStates(t, "Reads Not Switched. Writes Not Switched", "All Reads Switched. Writes Not Switched") validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) moveTablesSwitchWrites(t) - //printRoutingRules(t, vc, "After SwitchWrites") + checkStates(t, "All Reads Switched. Writes Not Switched", "All Reads Switched. Writes Switched") validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseReads(t, "") - //printRoutingRules(t, vc, "After ReverseReads") + checkStates(t, "All Reads Switched. Writes Switched", "Reads Not Switched. Writes Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) moveTablesReverseWrites(t) + checkStates(t, "Reads Not Switched. Writes Switched", "Reads Not Switched. Writes Not Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - //printRoutingRules(t, vc, "After ReverseWrites") moveTablesSwitchWrites(t) + checkStates(t, "Reads Not Switched. Writes Not Switched", "Reads Not Switched. Writes Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) @@ -227,6 +240,7 @@ func TestMoveTablesV2Workflow(t *testing.T) { validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) + // test complete and abort var err error var output string @@ -254,6 +268,7 @@ func TestMoveTablesV2Workflow(t *testing.T) { output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) require.Contains(t, output, "No workflows found in keyspace customer") + } func setupCluster(t *testing.T) *VitessCluster { diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index d9bf926400e..ea2758061a0 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -241,22 +241,31 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl ws := &workflowState{Workflow: workflow, TargetKeyspace: targetKeyspace} ws.SourceKeyspace = ts.sourceKeyspace var cellsSwitched, cellsNotSwitched []string + var keyspace string + var reverse bool + if strings.HasSuffix(workflow, "_reverse") { + reverse = true + keyspace = ws.SourceKeyspace + workflow = reverseName(workflow) + } else { + keyspace = targetKeyspace + } if ts.migrationType == binlogdatapb.MigrationType_TABLES { ws.WorkflowType = workflowTypeMoveTables // we assume a consistent state, so only choose routing rule for one table for replica/rdonly if len(ts.tables) == 0 { - return nil, nil, fmt.Errorf("no tables in workflow %s.%s", targetKeyspace, workflow) + return nil, nil, fmt.Errorf("no tables in workflow %s.%s", keyspace, workflow) } table := ts.tables[0] - cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, targetKeyspace, table, "rdonly") + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, keyspace, table, "rdonly") if err != nil { return nil, nil, err } ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched - cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, targetKeyspace, table, "replica") + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithTableReadsSwitched(ctx, keyspace, table, "replica") if err != nil { return nil, nil, err } @@ -269,7 +278,7 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl for _, table := range ts.tables { rr := rules[table] // if a rule exists for the table and points to the target keyspace, writes have been switched - if len(rr) > 0 && rr[0] == fmt.Sprintf("%s.%s", ts.targetKeyspace, table) { + if len(rr) > 0 && rr[0] == fmt.Sprintf("%s.%s", keyspace, table) { ws.WritesSwitched = true } } @@ -277,19 +286,24 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl ws.WorkflowType = workflowTypeReshard // we assume a consistent state, so only choose one shard - oneSourceShard := ts.sourceShards()[0] - cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, oneSourceShard, "rdonly") + var shard *topo.ShardInfo + if reverse { + shard = ts.targetShards()[0] + } else { + shard = ts.sourceShards()[0] + } + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, keyspace, shard, "rdonly") if err != nil { return nil, nil, err } ws.RdonlyCellsNotSwitched, ws.RdonlyCellsSwitched = cellsNotSwitched, cellsSwitched - cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, oneSourceShard, "replica") + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, keyspace, shard, "replica") if err != nil { return nil, nil, err } ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched - if ts.targetShards()[0].IsMasterServing { - ws.WritesSwitched = true + if shard.IsMasterServing { + ws.WritesSwitched = false } } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 33196b4fd29..99cb5478a32 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -17,7 +17,7 @@ import ( /* TODO * expand e2e for testing all possible transitions - (Complete/Abort Switch/Reverse Replica/Rdonly) + (Switch/Reverse Replica/Rdonly) * Unit Tests (run coverage first and identify) (CurrentState()) @@ -53,7 +53,9 @@ type MoveTablesWorkflow struct { } func (mtwf *MoveTablesWorkflow) String() string { - s := "" //FIXME + s := "" + s += fmt.Sprintf("Parameters: %+v\n", mtwf.params) + s += fmt.Sprintf("State: %+v", mtwf.CachedState()) return s } From 8df1ca7c1a29719ab64732d5028dc8a5355ef802 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Mon, 21 Dec 2020 15:26:12 +0100 Subject: [PATCH 14/26] Common VReplicationWorkflow abstraction for both MoveTables and Reshard Signed-off-by: Rohit Nayak --- go/vt/vtctl/vtctl.go | 117 +++++++++------ go/vt/wrangler/workflow.go | 252 ++++++++++++++++++-------------- go/vt/wrangler/workflow_test.go | 6 +- 3 files changed, 216 insertions(+), 159 deletions(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 36870433a70..c5d89bdc91b 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1891,6 +1891,12 @@ func commandValidateKeyspace(ctx context.Context, wr *wrangler.Wrangler, subFlag } func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { + for _, arg := range args { + if arg == "-v2" { + fmt.Printf("*** Using Reshard v2 flow ***") + return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.ReshardWorkflow) + } + } cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from.") skipSchemaCopy := subFlags.Bool("skip_schema_copy", false, "Skip copying of schema to targets") @@ -1913,7 +1919,7 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla for _, arg := range args { if arg == "-v2" { fmt.Printf("*** Using MoveTables v2 flow ***") - return commandMoveTables2(ctx, wr, subFlags, args) + return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.MoveTablesWorkflow) } } workflow := subFlags.String("workflow", "", "Workflow name. Can be any descriptive string. Will be used to later migrate traffic via SwitchReads/SwitchWrites.") @@ -1947,29 +1953,37 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return wr.MoveTables(ctx, *workflow, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) } -func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { - sourceKeyspace := subFlags.String("source", "", "Source keyspace") - tables := subFlags.String("tables", "", "A table spec or a list of tables") +func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string, + workflowType wrangler.VReplicationWorkflowType) error { + cells := subFlags.String("cells", "", "Cell(s) or CellAlias(es) (comma-separated) to replicate from.") tabletTypes := subFlags.String("tablet_types", "", "Source tablet types to replicate from (e.g. master, replica, rdonly). Defaults to -vreplication_tablet_type parameter value for the tablet, which has the default value of replica.") - allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") - excludes := subFlags.String("exclude", "", "Tables to exclude (comma-separated) if -all is specified") dryRun := subFlags.Bool("dry_run", false, "Does a dry run of SwitchReads and only reports the actions to be taken") timeout := subFlags.Duration("timeout", 30*time.Second, "Specifies the maximum time to wait, in seconds, for vreplication to catch up on master migrations. The migration will be aborted on timeout.") reverseReplication := subFlags.Bool("reverse_replication", true, "Also reverse the replication") - renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") keepData := subFlags.Bool("keep_data", false, "Do not drop tables or shards (if true, only vreplication artifacts are cleaned up)") + + sourceKeyspace := subFlags.String("source", "", "Source keyspace") + tables := subFlags.String("tables", "", "A table spec or a list of tables") + allTables := subFlags.Bool("all", false, "Move all tables from the source keyspace") + excludes := subFlags.String("exclude", "", "Tables to exclude (comma-separated) if -all is specified") + renameTables := subFlags.Bool("rename_tables", false, "Rename tables instead of dropping them") + + sourceShards := subFlags.String("source_shards", "", "Source shards") + targetShards := subFlags.String("target_shards", "", "Target shards") + skipSchemaCopy := subFlags.Bool("skip_schema_copy", false, "Skip copying of schema to target shards") + _ = subFlags.Bool("v2", true, "") - _, _, _ = dryRun, timeout, reverseReplication - _, _, _ = cells, tabletTypes, excludes + _, _, _ = dryRun, reverseReplication, skipSchemaCopy if err := subFlags.Parse(args); err != nil { return err } + if subFlags.NArg() != 2 { return fmt.Errorf("two arguments are needed: action, keyspace.workflow") } - action := subFlags.Arg(0) // TODO: actions will be SubCommands in the new cobra based vtctld implementation + action := subFlags.Arg(0) ksWorkflow := subFlags.Arg(1) target, workflow, err := splitKeyspaceWorkflow(ksWorkflow) if err != nil { @@ -1980,7 +1994,7 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl wr.Logger().Errorf("keyspace %s not found", target) } - mtp := &wrangler.MoveTablesParams{ + vrwp := &wrangler.VReplicationWorkflowParams{ TargetKeyspace: target, Workflow: workflow, DryRun: *dryRun, @@ -2012,7 +2026,7 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl return nil } - wrapError := func(wf *wrangler.MoveTablesWorkflow, err error) error { + wrapError := func(wf *wrangler.VReplicationWorkflow, err error) error { wr.Logger().Errorf("\n%s\n", err.Error()) log.Infof("In wrapError wf is %+v", wf) wr.Logger().Infof("Workflow Status: %s\n", wf.CurrentState()) @@ -2026,45 +2040,62 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl originalAction := action action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { - case "start": - if *sourceKeyspace == "" { - return fmt.Errorf("source keyspace is not specified") - } - if !*allTables && *tables == "" { - return fmt.Errorf("no tables specified to move") + case wrangler.VReplicationWorkflowActionStart: + switch workflowType { + case wrangler.MoveTablesWorkflow: + if *sourceKeyspace == "" { + return fmt.Errorf("source keyspace is not specified") + } + if !*allTables && *tables == "" { + return fmt.Errorf("no tables specified to move") + } + vrwp.SourceKeyspace = *sourceKeyspace + vrwp.Tables = *tables + vrwp.AllTables = *allTables + vrwp.ExcludeTables = *excludes + case wrangler.ReshardWorkflow: + if *sourceShards == "" || *targetShards == "" { + return fmt.Errorf("source and target shards are not specified") + } + vrwp.SkipSchemaCopy = *skipSchemaCopy + default: + return fmt.Errorf("unknown workflow type passed: %v", workflowType) + } + + vrwp.Cells = *cells + vrwp.TabletTypes = *tabletTypes + case wrangler.VReplicationWorkflowActionSwitchTraffic, wrangler.VReplicationWorkflowActionReverseTraffic: + vrwp.Cells = *cells + vrwp.TabletTypes = *tabletTypes + vrwp.Timeout = *timeout + vrwp.EnableReverseReplication = *reverseReplication + case wrangler.VReplicationWorkflowActionAbort: + vrwp.KeepData = *keepData + case wrangler.VReplicationWorkflowActionComplete: + switch workflowType { + case wrangler.MoveTablesWorkflow: + vrwp.RenameTables = *renameTables + default: + return fmt.Errorf("unknown workflow type passed: %v", workflowType) } - mtp.SourceKeyspace = *sourceKeyspace - mtp.Tables = *tables - mtp.AllTables = *allTables - mtp.ExcludeTables = *excludes - mtp.TabletTypes = *tabletTypes - case "switchtraffic", "reversetraffic": - mtp.Cells = *cells - mtp.TabletTypes = *tabletTypes - mtp.Timeout = *timeout - mtp.EnableReverseReplication = *reverseReplication - case "abort": - mtp.KeepData = *keepData - case "complete": - mtp.RenameTables = *renameTables - mtp.KeepData = *keepData + vrwp.KeepData = *keepData } - wf, err := wr.NewMoveTablesWorkflow(ctx, mtp) + wf, err := wr.NewVReplicationWorkflow(ctx, wrangler.MoveTablesWorkflow, vrwp) if err != nil { - log.Warningf("NewMoveTablesWorkflow returned error %+v", wf) + log.Warningf("NewVReplicationWorkflow returned error %+v", wf) return err } - if !wf.Exists() && action != "start" { + if !wf.Exists() && action != wrangler.VReplicationWorkflowActionStart { return fmt.Errorf("workflow %s does not exist", ksWorkflow) } startState := wf.CachedState() wr.Logger().Printf("\nCachedState: %s\n", startState) switch action { - case "show": + case wrangler.VReplicationWorkflowActionShow: return printDetails() - case "progress": + case wrangler.VReplicationWorkflowActionProgress: copyProgress, err := wf.GetCopyProgress() if err != nil { return err @@ -2091,15 +2122,15 @@ func commandMoveTables2(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl wr.Logger().Printf("\n%s\n", s) } return printDetails() - case "start": + case wrangler.VReplicationWorkflowActionStart: err = wf.Start() - case "switchtraffic": + case wrangler.VReplicationWorkflowActionSwitchTraffic: err = wf.SwitchTraffic(wrangler.DirectionForward) - case "reversetraffic": + case wrangler.VReplicationWorkflowActionReverseTraffic: err = wf.ReverseTraffic() - case "complete": + case wrangler.VReplicationWorkflowActionComplete: err = wf.Complete() - case "abort": + case wrangler.VReplicationWorkflowActionAbort: err = wf.Abort() default: return fmt.Errorf("found unsupported action %s", originalAction) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 99cb5478a32..db903385614 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -24,104 +24,117 @@ import ( * dry run * implement/test Reshard same as MoveTables! + VReplicationWorkflow as common to both MoveTables/Reshard */ -type reshardingWorkflowInfo struct { - name string - typ string -} +// VReplicationWorkflowType specifies the switching direction. +type VReplicationWorkflowType int -func newWorkflow(name, typ string) (*reshardingWorkflowInfo, error) { - wf := &reshardingWorkflowInfo{ - name: name, typ: typ, - } - return wf, nil -} +const ( + MoveTablesWorkflow = VReplicationWorkflowType(iota) + ReshardWorkflow +) -// endregion +// VReplicationWorkflowAction defines subcommands passed to vtctl for movetables or reshard +type VReplicationWorkflowAction string + +const ( + VReplicationWorkflowActionStart = "start" + VReplicationWorkflowActionSwitchTraffic = "switchtraffic" + VReplicationWorkflowActionReverseTraffic = "reversetraffic" + VReplicationWorkflowActionComplete = "complete" + VReplicationWorkflowActionAbort = "abort" + VReplicationWorkflowActionShow = "show" + VReplicationWorkflowActionProgress = "progress" +) // region Move Tables Public API -// MoveTablesWorkflow stores various internal objects for a workflow -type MoveTablesWorkflow struct { - ctx context.Context - wf *reshardingWorkflowInfo - wr *Wrangler - params *MoveTablesParams - ts *trafficSwitcher - ws *workflowState +// VReplicationWorkflow stores various internal objects for a workflow +type VReplicationWorkflow struct { + workflowType VReplicationWorkflowType + ctx context.Context + wr *Wrangler + params *VReplicationWorkflowParams + ts *trafficSwitcher + ws *workflowState } -func (mtwf *MoveTablesWorkflow) String() string { +func (vrw *VReplicationWorkflow) String() string { s := "" - s += fmt.Sprintf("Parameters: %+v\n", mtwf.params) - s += fmt.Sprintf("State: %+v", mtwf.CachedState()) + s += fmt.Sprintf("Parameters: %+v\n", vrw.params) + s += fmt.Sprintf("State: %+v", vrw.CachedState()) return s } -// MoveTablesParams stores args and options passed to a MoveTables command -type MoveTablesParams struct { - Workflow, SourceKeyspace, TargetKeyspace, Tables string - Cells, TabletTypes, ExcludeTables string - EnableReverseReplication, DryRun, AllTables bool - RenameTables, KeepData bool - Timeout time.Duration - Direction TrafficSwitchDirection +// VReplicationWorkflowParams stores args and options passed to a VReplicationWorkflow command +type VReplicationWorkflowParams struct { + Workflow, TargetKeyspace string + Cells, TabletTypes, ExcludeTables string + EnableReverseReplication, DryRun bool + KeepData bool + Timeout time.Duration + Direction TrafficSwitchDirection + + // MoveTables specific + SourceKeyspace, Tables string + AllTables, RenameTables bool + + // Reshard specific + SourceShards, TargetShards []string + SkipSchemaCopy bool } -// NewMoveTablesWorkflow sets up a MoveTables workflow object based on options provided, deduces the state of the +// NewVReplicationWorkflow sets up a MoveTables or Reshard workflow based on options provided, deduces the state of the // workflow from the persistent state stored in the vreplication table and the topo -func (wr *Wrangler) NewMoveTablesWorkflow(ctx context.Context, params *MoveTablesParams) (*MoveTablesWorkflow, error) { - log.Infof("NewMoveTablesWorkflow with params %+v", params) - mtwf := &MoveTablesWorkflow{wr: wr, ctx: ctx, params: params} +func (wr *Wrangler) NewVReplicationWorkflow(ctx context.Context, workflowType VReplicationWorkflowType, + params *VReplicationWorkflowParams) (*VReplicationWorkflow, error) { + + log.Infof("NewVReplicationWorkflow with params %+v", params) + vrw := &VReplicationWorkflow{wr: wr, ctx: ctx, params: params, workflowType: workflowType} ts, ws, err := wr.getWorkflowState(ctx, params.TargetKeyspace, params.Workflow) if err != nil { return nil, err } log.Infof("Workflow state is %+v", ws) - wf, err := newWorkflow(params.Workflow, "MoveTables") - if err != nil { - return nil, err - } if ts != nil { //Other than on Start we need to get SourceKeyspace from the workflow - mtwf.params.TargetKeyspace = ts.targetKeyspace - mtwf.params.Workflow = ts.workflow - mtwf.params.SourceKeyspace = ts.sourceKeyspace - } - mtwf.ts = ts - mtwf.ws = ws - mtwf.wf = wf - return mtwf, nil + vrw.params.TargetKeyspace = ts.targetKeyspace + vrw.params.Workflow = ts.workflow + vrw.params.SourceKeyspace = ts.sourceKeyspace + } + vrw.ts = ts + vrw.ws = ws + return vrw, nil } // CurrentState reloads and returns a human readable workflow state -func (mtwf *MoveTablesWorkflow) CurrentState() string { - _, ws, err := mtwf.wr.getWorkflowState(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow) +func (vrw *VReplicationWorkflow) CurrentState() string { + _, ws, err := vrw.wr.getWorkflowState(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow) if err != nil { return err.Error() } if ws == nil { return "Workflow Not Found" } - return mtwf.stateAsString(ws) + return vrw.stateAsString(ws) } // CachedState returns a human readable workflow state -func (mtwf *MoveTablesWorkflow) CachedState() string { - return mtwf.stateAsString(mtwf.ws) +func (vrw *VReplicationWorkflow) CachedState() string { + return vrw.stateAsString(vrw.ws) } // Exists checks if the workflow has already been initiated -func (mtwf *MoveTablesWorkflow) Exists() bool { - log.Infof("mtwf %+v", *mtwf) +func (vrw *VReplicationWorkflow) Exists() bool { + log.Infof("vrw %+v", *vrw) - return mtwf.ws != nil + return vrw.ws != nil } -func (mtwf *MoveTablesWorkflow) stateAsString(ws *workflowState) string { +func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { var stateInfo []string s := "" - if !mtwf.Exists() { + if !vrw.Exists() { stateInfo = append(stateInfo, "Not Started") } else { if len(ws.RdonlyCellsNotSwitched) == 0 && len(ws.ReplicaCellsNotSwitched) == 0 && len(ws.ReplicaCellsSwitched) > 0 { @@ -152,30 +165,37 @@ func (mtwf *MoveTablesWorkflow) stateAsString(ws *workflowState) string { } // Start initiates a workflow -func (mtwf *MoveTablesWorkflow) Start() error { - if mtwf.Exists() { +func (vrw *VReplicationWorkflow) Start() error { + if vrw.Exists() { return fmt.Errorf("workflow has already been started") } - return mtwf.initMoveTables() + switch vrw.workflowType { + case MoveTablesWorkflow: + return vrw.initMoveTables() + case ReshardWorkflow: + return vrw.initReshard() + default: + return fmt.Errorf("unknown workflow type %s", vrw.workflowType) + } } // SwitchTraffic switches traffic forward for tablet_types passed -func (mtwf *MoveTablesWorkflow) SwitchTraffic(direction TrafficSwitchDirection) error { - if !mtwf.Exists() { +func (vrw *VReplicationWorkflow) SwitchTraffic(direction TrafficSwitchDirection) error { + if !vrw.Exists() { return fmt.Errorf("workflow has not yet been started") } - mtwf.params.Direction = direction - hasReplica, hasRdonly, hasMaster, err := mtwf.parseTabletTypes() + vrw.params.Direction = direction + hasReplica, hasRdonly, hasMaster, err := vrw.parseTabletTypes() if err != nil { return err } if hasReplica || hasRdonly { - if err := mtwf.switchReads(); err != nil { + if err := vrw.switchReads(); err != nil { return err } } if hasMaster { - if err := mtwf.switchWrites(); err != nil { + if err := vrw.switchWrites(); err != nil { return err } } @@ -183,11 +203,11 @@ func (mtwf *MoveTablesWorkflow) SwitchTraffic(direction TrafficSwitchDirection) } // ReverseTraffic switches traffic backwards for tablet_types passed -func (mtwf *MoveTablesWorkflow) ReverseTraffic() error { - if !mtwf.Exists() { +func (vrw *VReplicationWorkflow) ReverseTraffic() error { + if !vrw.Exists() { return fmt.Errorf("workflow has not yet been started") } - return mtwf.SwitchTraffic(DirectionBackward) + return vrw.SwitchTraffic(DirectionBackward) } const ( @@ -196,30 +216,30 @@ const ( ) // Complete cleans up a successful workflow -func (mtwf *MoveTablesWorkflow) Complete() error { - ws := mtwf.ws +func (vrw *VReplicationWorkflow) Complete() error { + ws := vrw.ws if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { return fmt.Errorf(errWorkflowNotFullySwitched) } var renameTable TableRemovalType - if mtwf.params.RenameTables { + if vrw.params.RenameTables { renameTable = RenameTable } else { renameTable = DropTable } - if _, err := mtwf.wr.DropSources(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, renameTable, mtwf.params.KeepData, false, false); err != nil { + if _, err := vrw.wr.DropSources(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, renameTable, vrw.params.KeepData, false, false); err != nil { return err } return nil } // Abort deletes all artifacts from a workflow which has not yet been switched -func (mtwf *MoveTablesWorkflow) Abort() error { - ws := mtwf.ws +func (vrw *VReplicationWorkflow) Abort() error { + ws := vrw.ws if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { return fmt.Errorf(errWorkflowPartiallySwitched) } - if _, err := mtwf.wr.DropTargets(mtwf.ctx, mtwf.ws.TargetKeyspace, mtwf.ws.Workflow, mtwf.params.KeepData, false); err != nil { + if _, err := vrw.wr.DropTargets(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, vrw.params.KeepData, false); err != nil { return err } return nil @@ -229,15 +249,15 @@ func (mtwf *MoveTablesWorkflow) Abort() error { // region Helpers -func (mtwf *MoveTablesWorkflow) getCellsAsArray() []string { - if mtwf.params.Cells != "" { - return strings.Split(mtwf.params.Cells, ",") +func (vrw *VReplicationWorkflow) getCellsAsArray() []string { + if vrw.params.Cells != "" { + return strings.Split(vrw.params.Cells, ",") } return nil } -func (mtwf *MoveTablesWorkflow) getTabletTypes() []topodatapb.TabletType { - tabletTypesArr := strings.Split(mtwf.params.TabletTypes, ",") +func (vrw *VReplicationWorkflow) getTabletTypes() []topodatapb.TabletType { + tabletTypesArr := strings.Split(vrw.params.TabletTypes, ",") var tabletTypes []topodatapb.TabletType for _, tabletType := range tabletTypesArr { servedType, _ := topoproto.ParseTabletType(tabletType) @@ -246,8 +266,8 @@ func (mtwf *MoveTablesWorkflow) getTabletTypes() []topodatapb.TabletType { return tabletTypes } -func (mtwf *MoveTablesWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasMaster bool, err error) { - tabletTypesArr := strings.Split(mtwf.params.TabletTypes, ",") +func (vrw *VReplicationWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasMaster bool, err error) { + tabletTypesArr := strings.Split(vrw.params.TabletTypes, ",") for _, tabletType := range tabletTypesArr { switch tabletType { case "replica": @@ -267,40 +287,46 @@ func (mtwf *MoveTablesWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasMa // region Core Actions -func (mtwf *MoveTablesWorkflow) initMoveTables() error { - log.Infof("In MoveTablesWorkflow.initMoveTables() for %+v", mtwf) - return mtwf.wr.MoveTables(mtwf.ctx, mtwf.wf.name, mtwf.params.SourceKeyspace, mtwf.params.TargetKeyspace, mtwf.params.Tables, - mtwf.params.Cells, mtwf.params.TabletTypes, mtwf.params.AllTables, mtwf.params.ExcludeTables) +func (vrw *VReplicationWorkflow) initMoveTables() error { + log.Infof("In VReplicationWorkflow.initMoveTables() for %+v", vrw) + return vrw.wr.MoveTables(vrw.ctx, vrw.params.Workflow, vrw.params.SourceKeyspace, vrw.params.TargetKeyspace, vrw.params.Tables, + vrw.params.Cells, vrw.params.TabletTypes, vrw.params.AllTables, vrw.params.ExcludeTables) +} + +func (vrw *VReplicationWorkflow) initReshard() error { + log.Infof("In VReplicationWorkflow.initReshard() for %+v", vrw) + return vrw.wr.Reshard(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.SourceShards, vrw.params.TargetShards, + vrw.params.SkipSchemaCopy, vrw.params.Cells, vrw.params.TabletTypes) } -func (mtwf *MoveTablesWorkflow) switchReads() error { - log.Infof("In MoveTablesWorkflow.switchReads() for %+v", mtwf) +func (vrw *VReplicationWorkflow) switchReads() error { + log.Infof("In VReplicationWorkflow.switchReads() for %+v", vrw) var tabletTypes []topodatapb.TabletType - for _, tt := range mtwf.getTabletTypes() { + for _, tt := range vrw.getTabletTypes() { if tt != topodatapb.TabletType_MASTER { tabletTypes = append(tabletTypes, tt) } } - _, err := mtwf.wr.SwitchReads(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.wf.name, tabletTypes, - mtwf.getCellsAsArray(), mtwf.params.Direction, false) + _, err := vrw.wr.SwitchReads(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, tabletTypes, + vrw.getCellsAsArray(), vrw.params.Direction, false) if err != nil { return err } return nil } -func (mtwf *MoveTablesWorkflow) switchWrites() error { - log.Infof("In MoveTablesWorkflow.switchWrites() for %+v", mtwf) - if mtwf.params.Direction == DirectionBackward { - keyspace := mtwf.params.SourceKeyspace - mtwf.params.SourceKeyspace = mtwf.params.TargetKeyspace - mtwf.params.TargetKeyspace = keyspace - mtwf.params.Workflow = reverseName(mtwf.params.Workflow) - log.Infof("In MoveTablesWorkflow.switchWrites(reverse) for %+v", mtwf) - } - journalID, _, err := mtwf.wr.SwitchWrites(mtwf.ctx, mtwf.params.TargetKeyspace, mtwf.params.Workflow, mtwf.params.Timeout, - false, mtwf.params.Direction == DirectionBackward, mtwf.params.EnableReverseReplication, false) +func (vrw *VReplicationWorkflow) switchWrites() error { + log.Infof("In VReplicationWorkflow.switchWrites() for %+v", vrw) + if vrw.params.Direction == DirectionBackward { + keyspace := vrw.params.SourceKeyspace + vrw.params.SourceKeyspace = vrw.params.TargetKeyspace + vrw.params.TargetKeyspace = keyspace + vrw.params.Workflow = reverseName(vrw.params.Workflow) + log.Infof("In VReplicationWorkflow.switchWrites(reverse) for %+v", vrw) + } + journalID, _, err := vrw.wr.SwitchWrites(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.Timeout, + false, vrw.params.Direction == DirectionBackward, vrw.params.EnableReverseReplication, false) if err != nil { return err } @@ -322,17 +348,17 @@ type TableCopyProgress struct { type CopyProgress map[string]*TableCopyProgress // GetCopyProgress returns the progress of all tables being copied in the workflow -func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { +func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { ctx := context.Background() getTablesQuery := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = %d" getRowCountQuery := "select table_name, table_rows, data_length from information_schema.tables where table_schema = %s and table_name in (%s)" tables := make(map[string]bool) const MaxRows = 1000 sourceMasters := make(map[*topodatapb.TabletAlias]bool) - for _, target := range mtwf.ts.targets { + for _, target := range vrw.ts.targets { for id, bls := range target.sources { query := fmt.Sprintf(getTablesQuery, id) - p3qr, err := mtwf.wr.tmc.ExecuteFetchAsDba(ctx, target.master.Tablet, true, []byte(query), MaxRows, false, false) + p3qr, err := vrw.wr.tmc.ExecuteFetchAsDba(ctx, target.master.Tablet, true, []byte(query), MaxRows, false, false) if err != nil { return nil, err } @@ -343,7 +369,7 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { for i := 0; i < len(p3qr.Rows); i++ { tables[qr.Rows[0][0].ToString()] = true } - sourcesi, err := mtwf.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) + sourcesi, err := vrw.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) if err != nil { return nil, err } @@ -371,7 +397,7 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { } var getTableMetrics = func(tablet *topodatapb.Tablet, query string, rowCounts *map[string]int64, tableSizes *map[string]int64) error { - p3qr, err := mtwf.wr.tmc.ExecuteFetchAsDba(ctx, tablet, true, []byte(query), len(tables), false, false) + p3qr, err := vrw.wr.tmc.ExecuteFetchAsDba(ctx, tablet, true, []byte(query), len(tables), false, false) if err != nil { return err } @@ -392,25 +418,25 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { return nil } sourceDbName := "" - for _, tsSource := range mtwf.ts.sources { + for _, tsSource := range vrw.ts.sources { sourceDbName = tsSource.master.DbName() break } if sourceDbName == "" { - return nil, fmt.Errorf("no sources found for workflow %s.%s", mtwf.ws.TargetKeyspace, mtwf.ws.Workflow) + return nil, fmt.Errorf("no sources found for workflow %s.%s", vrw.ws.TargetKeyspace, vrw.ws.Workflow) } targetDbName := "" - for _, tsTarget := range mtwf.ts.targets { + for _, tsTarget := range vrw.ts.targets { targetDbName = tsTarget.master.DbName() break } if sourceDbName == "" || targetDbName == "" { - return nil, fmt.Errorf("workflow %s.%s is incorrectly configured", mtwf.ws.TargetKeyspace, mtwf.ws.Workflow) + return nil, fmt.Errorf("workflow %s.%s is incorrectly configured", vrw.ws.TargetKeyspace, vrw.ws.Workflow) } query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tableList) log.Infof("query is %s", query) - for _, target := range mtwf.ts.targets { + for _, target := range vrw.ts.targets { tablet := target.master.Tablet if err := getTableMetrics(tablet, query, &targetRowCounts, &targetTableSizes); err != nil { return nil, err @@ -420,7 +446,7 @@ func (mtwf *MoveTablesWorkflow) GetCopyProgress() (*CopyProgress, error) { query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tableList) log.Infof("query is %s", query) for source := range sourceMasters { - ti, err := mtwf.wr.ts.GetTablet(ctx, source) + ti, err := vrw.wr.ts.GetTablet(ctx, source) tablet := ti.Tablet if err != nil { return nil, err diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index c9491ef059a..20ba351eb78 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -24,8 +24,8 @@ import ( "vitess.io/vitess/go/vt/proto/topodata" ) -func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *MoveTablesWorkflow { - mtp := &MoveTablesParams{ +func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *VReplicationWorkflow { + mtp := &VReplicationWorkflowParams{ Workflow: "wf1", SourceKeyspace: "sourceks", TargetKeyspace: "targetks", @@ -34,7 +34,7 @@ func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *MoveTablesW TabletTypes: tabletTypes, } wf, _ := newWorkflow("wf1", "MoveTables") - mtwf := &MoveTablesWorkflow{ + mtwf := &VReplicationWorkflow{ ctx: context.Background(), wf: wf, wr: nil, From d093aa3a63142b47f34f317b0343a14f2e0b04ed Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Tue, 22 Dec 2020 20:55:18 +0100 Subject: [PATCH 15/26] Mods for reshard v2 command, Tests for reshard flow Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/helper.go | 5 +- .../resharding_workflows_v2_test.go | 210 ++++++++++++------ .../vreplication/vreplication_test.go | 1 - go/vt/vtctl/vtctl.go | 13 +- go/vt/vttablet/tabletserver/schema/engine.go | 4 +- go/vt/wrangler/traffic_switcher.go | 27 ++- go/vt/wrangler/workflow.go | 4 +- go/vt/wrangler/workflow_test.go | 2 + 8 files changed, 183 insertions(+), 83 deletions(-) diff --git a/go/test/endtoend/vreplication/helper.go b/go/test/endtoend/vreplication/helper.go index 78867cb182a..584cb004286 100644 --- a/go/test/endtoend/vreplication/helper.go +++ b/go/test/endtoend/vreplication/helper.go @@ -10,9 +10,8 @@ import ( "strings" "testing" - "github.com/stretchr/testify/require" - "github.com/buger/jsonparser" + "github.com/stretchr/testify/require" "vitess.io/vitess/go/test/endtoend/cluster" @@ -151,7 +150,7 @@ func getQueryCount(url string, query string) int { foundQuery = strings.ReplaceAll(foundQuery, "_", "") cleanQuery := re.ReplaceAllLiteralString(query, "") cleanQuery = strings.ReplaceAll(cleanQuery, "_", "") - if foundQuery == cleanQuery { + if foundQuery == cleanQuery || strings.Contains(foundQuery, cleanQuery) { count, _ = strconv.Atoi(row[countIndex]) } } diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index cb85bd267ea..0db195c5856 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -20,20 +20,23 @@ import ( "fmt" "strings" "testing" + "time" + + "vitess.io/vitess/go/vt/wrangler" "github.com/stretchr/testify/require" "vitess.io/vitess/go/test/endtoend/cluster" ) const ( - moveTablesWorkflowName = "p2c" - sourceKs = "product" - targetKs = "customer" - ksWorkflow = targetKs + "." + moveTablesWorkflowName - reverseKsWorkflow = sourceKs + "." + moveTablesWorkflowName + "_reverse" - tablesToMove = "customer" - defaultCellName = "zone1" - readQuery = "select * from customer" + workflowName = "wf1" + sourceKs = "product" + targetKs = "customer" + ksWorkflow = targetKs + "." + workflowName + reverseKsWorkflow = sourceKs + "." + workflowName + "_reverse" + tablesToMove = "customer" + defaultCellName = "zone1" + readQuery = "select * from customer" ) const ( @@ -47,30 +50,53 @@ const ( var ( customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess lastOutput string + currentWorkflowType wrangler.VReplicationWorkflowType ) +func reshard2Start(t *testing.T, sourceShards, targetShards string) error { + err := tstWorkflowExec(t, defaultCellName, workflowName, targetKs, targetKs, + "", workflowActionStart, "", sourceShards, targetShards) + require.NoError(t, err) + time.Sleep(1 * time.Second) + catchup(t, customerTab1, workflowName, "Reshard") + catchup(t, customerTab2, workflowName, "Reshard") + vdiff(t, ksWorkflow) + return nil +} + func moveTables2Start(t *testing.T, tables string) error { if tables == "" { tables = tablesToMove } - err := moveTables2(t, defaultCellName, moveTablesWorkflowName, sourceKs, targetKs, tables, workflowActionStart, "") + err := tstWorkflowExec(t, defaultCellName, workflowName, sourceKs, targetKs, + tables, workflowActionStart, "", "", "") require.NoError(t, err) - catchup(t, customerTab1, moveTablesWorkflowName, "MoveTables") - catchup(t, customerTab2, moveTablesWorkflowName, "MoveTables") + catchup(t, customerTab1, workflowName, "MoveTables") + catchup(t, customerTab2, workflowName, "MoveTables") + time.Sleep(1 * time.Second) vdiff(t, ksWorkflow) return nil } -func moveTables2Action(t *testing.T, action, tabletTypes, cells string) error { - return moveTables2(t, cells, moveTablesWorkflowName, sourceKs, targetKs, tablesToMove, action, tabletTypes) +func tstWorkflowAction(t *testing.T, action, tabletTypes, cells string) error { + return tstWorkflowExec(t, cells, workflowName, sourceKs, targetKs, tablesToMove, action, tabletTypes, "", "") } -func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes string) error { +func tstWorkflowExec(t *testing.T, cells, workflow, sourceKs, targetKs, tables, action, tabletTypes, sourceShards, targetShards string) error { var args []string - args = append(args, "MoveTables", "-v2") + if currentWorkflowType == wrangler.MoveTablesWorkflow { + args = append(args, "MoveTables") + } else { + args = append(args, "Reshard") + } + args = append(args, "-v2") switch action { case workflowActionStart: - args = append(args, "-source", sourceKs, "-tables", tables) + if currentWorkflowType == wrangler.MoveTablesWorkflow { + args = append(args, "-source", sourceKs, "-tables", tables) + } else { + args = append(args, "-source_shards", sourceShards, "-target_shards", targetShards) + } } if cells != "" { args = append(args, "-cells", cells) @@ -83,43 +109,43 @@ func moveTables2(t *testing.T, cells, workflow, sourceKs, targetKs, tables, acti output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...) lastOutput = output if err != nil { - t.Logf("MoveTables command failed with %+v\n", err) + t.Logf("%s command failed with %+v\n", args[0], err) return fmt.Errorf("%s: %s", err, output) } fmt.Printf("----------\n%+v\n%s\n----------\n", args, output) return nil } -func moveTablesSwitchReads(t *testing.T, tabletTypes string) { - require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "replica,rdonly", "")) +func tstWorkflowSwitchReads(t *testing.T, tabletTypes string) { + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "replica,rdonly", "")) } -func moveTablesReverseReads(t *testing.T, tabletTypes string) { - require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "replica,rdonly", "")) +func tstWorkflowReverseReads(t *testing.T, tabletTypes string) { + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "replica,rdonly", "")) } -func moveTablesSwitchWrites(t *testing.T) { - require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "master", "")) +func tstWorkflowSwitchWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "master", "")) } -func moveTablesReverseWrites(t *testing.T) { - require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "master", "")) +func tstWorkflowReverseWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "master", "")) } -func moveTablesSwitchReadsAndWrites(t *testing.T) { - require.NoError(t, moveTables2Action(t, workflowActionSwitchTraffic, "replica,rdonly,master", "")) +func tstWorkflowSwitchReadsAndWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "replica,rdonly,master", "")) } -func moveTablesReverseReadsAndWrites(t *testing.T) { - require.NoError(t, moveTables2Action(t, workflowActionReverseTraffic, "replica,rdonly,master", "")) +func tstWorkflowReverseReadsAndWrites(t *testing.T) { + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "replica,rdonly,master", "")) } -func moveTablesComplete(t *testing.T) error { - return moveTables2Action(t, workflowActionComplete, "", "") +func tstWorkflowComplete(t *testing.T) error { + return tstWorkflowAction(t, workflowActionComplete, "", "") } -func moveTablesAbort(t *testing.T) error { - return moveTables2Action(t, workflowActionAbort, "", "") +func tstWorkflowAbort(t *testing.T) error { + return tstWorkflowAction(t, workflowActionAbort, "", "") } func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.VttabletProcess) { @@ -127,8 +153,9 @@ func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.Vttabl tabletTypes = "replica,rdonly" } for _, tt := range []string{"replica", "rdonly"} { + destination := fmt.Sprintf("%s:%s@%s", tablet.Keyspace, tablet.Shard, tt) if strings.Contains(tabletTypes, tt) { - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, tablet, "product@"+tt, readQuery, readQuery)) + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, tablet, destination, readQuery, readQuery)) } } } @@ -143,13 +170,13 @@ func validateReadsRouteToTarget(t *testing.T, tabletTypes string) { func validateWritesRouteToSource(t *testing.T) { insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" - matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" - require.False(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) + matchInsertQuery := "insert into customer(name, cid) values" + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") } func validateWritesRouteToTarget(t *testing.T) { insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" - matchInsertQuery := "insert into customer(name, cid) values (:vtg1, :_cid0)" + matchInsertQuery := "insert into customer(name, cid) values" require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab2, "customer", insertQuery, matchInsertQuery)) insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab1, "customer", insertQuery, matchInsertQuery)) @@ -184,11 +211,33 @@ func checkStates(t *testing.T, startState, endState string) { // ideally this should be broken up into multiple tests for full flow, replica/rdonly flow, reverse flows etc // but CI currently fails on creating multiple clusters even after the previous ones are torn down -func TestMoveTablesV2Workflow(t *testing.T) { + +func TestBasicV2Workflows(t *testing.T) { vc = setupCluster(t) defer vtgateConn.Close() //defer vc.TearDown() + testMoveTablesV2Workflow(t) + testReshardV2Workflow(t) +} + +func testReshardV2Workflow(t *testing.T) { + currentWorkflowType = wrangler.ReshardWorkflow + + createAdditionalCustomerShards(t, "-40,40-80,80-c0,c0-") + reshard2Start(t, "-80,80-", "-40,40-80,80-c0,c0-") + + checkStates(t, "Not Started", "Not Started") + validateReadsRouteToSource(t, "replica") + validateWritesRouteToSource(t) + + testRestOfWorkflow(t) + +} + +func testMoveTablesV2Workflow(t *testing.T) { + currentWorkflowType = wrangler.MoveTablesWorkflow + // test basic forward and reverse flows setupCustomerKeyspace(t) moveTables2Start(t, "customer") @@ -196,79 +245,83 @@ func TestMoveTablesV2Workflow(t *testing.T) { validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - moveTablesSwitchReads(t, "") + testRestOfWorkflow(t) + + listAllArgs := []string{"workflow", "customer", "listall"} + output, _ := vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") + + moveTables2Start(t, "customer2") + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "Following workflow(s) found in keyspace customer: wf1") + + err := tstWorkflowAbort(t) + require.NoError(t, err) + + output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) + require.Contains(t, output, "No workflows found in keyspace customer") + +} +func testRestOfWorkflow(t *testing.T) { + // test basic forward and reverse flows + tstWorkflowSwitchReads(t, "") checkStates(t, "Reads Not Switched. Writes Not Switched", "All Reads Switched. Writes Not Switched") validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) - moveTablesSwitchWrites(t) + tstWorkflowSwitchWrites(t) checkStates(t, "All Reads Switched. Writes Not Switched", "All Reads Switched. Writes Switched") validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) - moveTablesReverseReads(t, "") + tstWorkflowReverseReads(t, "") checkStates(t, "All Reads Switched. Writes Switched", "Reads Not Switched. Writes Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) - moveTablesReverseWrites(t) + tstWorkflowReverseWrites(t) checkStates(t, "Reads Not Switched. Writes Switched", "Reads Not Switched. Writes Not Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - moveTablesSwitchWrites(t) + tstWorkflowSwitchWrites(t) checkStates(t, "Reads Not Switched. Writes Not Switched", "Reads Not Switched. Writes Switched") validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) - moveTablesReverseWrites(t) + tstWorkflowReverseWrites(t) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - moveTablesSwitchReads(t, "") + tstWorkflowSwitchReads(t, "") validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) - moveTablesReverseReads(t, "") + tstWorkflowReverseReads(t, "") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - moveTablesSwitchReadsAndWrites(t) + tstWorkflowSwitchReadsAndWrites(t) validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) - moveTablesReverseReadsAndWrites(t) + tstWorkflowReverseReadsAndWrites(t) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) // test complete and abort var err error - var output string - err = moveTablesComplete(t) + err = tstWorkflowComplete(t) require.Error(t, err) require.Contains(t, err.Error(), "cannot complete workflow because you have not yet switched all read and write traffic") - moveTablesSwitchReadsAndWrites(t) + tstWorkflowSwitchReadsAndWrites(t) validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) - err = moveTablesComplete(t) + err = tstWorkflowComplete(t) require.NoError(t, err) - listAllArgs := []string{"workflow", "customer", "listall"} - output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) - require.Contains(t, output, "No workflows found in keyspace customer") - - moveTables2Start(t, "customer2") - output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) - require.Contains(t, output, "Following workflow(s) found in keyspace customer: p2c") - - err = moveTablesAbort(t) - require.NoError(t, err) - - output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) - require.Contains(t, output, "No workflows found in keyspace customer") - } func setupCluster(t *testing.T) *VitessCluster { @@ -319,7 +372,6 @@ func setupCustomerKeyspace(t *testing.T) { customerTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet customerTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet customerReplicaTab1 = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet - } func TestSwitchReadsWritesInAnyOrder(t *testing.T) { @@ -338,7 +390,7 @@ func switchReadsNew(t *testing.T, cells, ksWorkflow string, reverse bool) { } func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias string) { - workflow := "p2c" + workflow := "wf1" sourceKs := "product" targetKs := "customer" ksWorkflow := fmt.Sprintf("%s.%s", targetKs, workflow) @@ -432,3 +484,25 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias switchReadsReverseSwitchWritesSwitchReads() switchWritesReverseSwitchReadsSwitchWrites() } + +func createAdditionalCustomerShards(t *testing.T, shards string) { + ksName := "customer" + keyspace := vc.Cells[defaultCell.Name].Keyspaces[ksName] + require.NoError(t, vc.AddShards(t, []*Cell{defaultCell}, keyspace, shards, defaultReplicas, defaultRdonly, 400)) + arrTargetShardNames := strings.Split(shards, ",") + + for _, shardName := range arrTargetShardNames { + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", ksName, shardName), 1); err != nil { + t.Fatal(err) + } + } + //FIXME + custKs := vc.Cells[defaultCell.Name].Keyspaces[ksName] + customerTab2 = custKs.Shards["80-c0"].Tablets["zone1-600"].Vttablet + customerTab1 = custKs.Shards["40-80"].Tablets["zone1-500"].Vttablet + customerReplicaTab1 = custKs.Shards["-40"].Tablets["zone1-401"].Vttablet + + productReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["customer"].Shards["-80"].Tablets["zone1-201"].Vttablet + productTab = vc.Cells[defaultCell.Name].Keyspaces["customer"].Shards["-80"].Tablets["zone1-200"].Vttablet + +} diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 48cdaf4d853..4299e0319a7 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -182,7 +182,6 @@ func insertMoreProducts(t *testing.T) { execVtgateQuery(t, vtgateConn, "product", sql) } -// FIXME: if testReverse if false we don't dropsources and that creates a problem later on in the test due to existence of blacklisted tables func shardCustomer(t *testing.T, testReverse bool, cells []*Cell, sourceCellOrAlias string) { workflow := "p2c" sourceKs := "product" diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index c5d89bdc91b..1e3a95fb390 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -2041,6 +2041,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { case wrangler.VReplicationWorkflowActionStart: + switch workflowType { case wrangler.MoveTablesWorkflow: if *sourceKeyspace == "" { @@ -2053,11 +2054,17 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla vrwp.Tables = *tables vrwp.AllTables = *allTables vrwp.ExcludeTables = *excludes + workflowType = wrangler.MoveTablesWorkflow case wrangler.ReshardWorkflow: if *sourceShards == "" || *targetShards == "" { return fmt.Errorf("source and target shards are not specified") } + vrwp.SourceShards = strings.Split(*sourceShards, ",") + vrwp.TargetShards = strings.Split(*targetShards, ",") vrwp.SkipSchemaCopy = *skipSchemaCopy + vrwp.SourceKeyspace = target + workflowType = wrangler.ReshardWorkflow + log.Infof("params are %s, %s, %+v", *sourceShards, *targetShards, vrwp) default: return fmt.Errorf("unknown workflow type passed: %v", workflowType) } @@ -2075,13 +2082,14 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla switch workflowType { case wrangler.MoveTablesWorkflow: vrwp.RenameTables = *renameTables + case wrangler.ReshardWorkflow: default: return fmt.Errorf("unknown workflow type passed: %v", workflowType) } vrwp.KeepData = *keepData } - wf, err := wr.NewVReplicationWorkflow(ctx, wrangler.MoveTablesWorkflow, vrwp) + wf, err := wr.NewVReplicationWorkflow(ctx, workflowType, vrwp) if err != nil { log.Warningf("NewVReplicationWorkflow returned error %+v", wf) return err @@ -2139,7 +2147,8 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla log.Warningf(" %s error: %v", originalAction, wf) return wrapError(wf, err) } - wr.Logger().Printf("MoveTables %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", action, startState, wf.CurrentState()) + time.Sleep(1 * time.Second) + wr.Logger().Printf("%s %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", workflowType, action, startState, wf.CurrentState()) return nil } diff --git a/go/vt/vttablet/tabletserver/schema/engine.go b/go/vt/vttablet/tabletserver/schema/engine.go index 702a21e0617..de449863e78 100644 --- a/go/vt/vttablet/tabletserver/schema/engine.go +++ b/go/vt/vttablet/tabletserver/schema/engine.go @@ -284,9 +284,9 @@ func (se *Engine) ReloadAt(ctx context.Context, pos mysql.Position) error { // reload reloads the schema. It can also be used to initialize it. func (se *Engine) reload(ctx context.Context) error { - start := time.Now() + //start := time.Now() defer func() { - log.Infof("Time taken to load the schema: %v", time.Since(start)) + //log.Infof("Time taken to load the schema: %v", time.Since(start)) se.env.LogError() }() diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index ea2758061a0..67589eb8a7c 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -154,7 +154,6 @@ func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKe if err != nil { return nil, nil, err } - for _, cell := range cells { wr.Logger().Infof("cell %s", cell) srvKeyspace, err := wr.ts.GetSrvKeyspace(ctx, cell, targetKeyspace) @@ -163,17 +162,32 @@ func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKe } // Checking one shard is enough. var shardServedTypes []string + found := false + noControls := true for _, partition := range srvKeyspace.GetPartitions() { - if partition.GetServedType().String() != tabletType { + if !strings.EqualFold(partition.GetServedType().String(), tabletType) { continue } for _, shardReference := range partition.GetShardReferences() { if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { - shardServedTypes = append(shardServedTypes, partition.GetServedType().String()) + found = true + break + } + } + if len(partition.GetShardTabletControls()) == 0 { + noControls = true + break + } + for _, tabletControl := range partition.GetShardTabletControls() { + if key.KeyRangeEqual(tabletControl.GetKeyRange(), si.GetKeyRange()) { + if !tabletControl.GetQueryServiceDisabled() { + shardServedTypes = append(shardServedTypes, si.ShardName()) + } + break } } } - if len(shardServedTypes) > 0 { + if found && (len(shardServedTypes) > 0 || noControls) { cellsNotSwitched = append(cellsNotSwitched, cell) } else { cellsSwitched = append(cellsSwitched, cell) @@ -292,6 +306,7 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl } else { shard = ts.sourceShards()[0] } + cellsSwitched, cellsNotSwitched, err = wr.getCellsWithShardReadsSwitched(ctx, keyspace, shard, "rdonly") if err != nil { return nil, nil, err @@ -302,8 +317,8 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl return nil, nil, err } ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched - if shard.IsMasterServing { - ws.WritesSwitched = false + if !shard.IsMasterServing { + ws.WritesSwitched = true } } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index db903385614..6f947163f87 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -31,7 +31,9 @@ import ( type VReplicationWorkflowType int const ( + // MoveTablesWorkflow specifies that the workflow is for moving tables from one keyspace to another MoveTablesWorkflow = VReplicationWorkflowType(iota) + // ReshardWorkflow specifies that the workflow is for resharding a keyspace ReshardWorkflow ) @@ -175,7 +177,7 @@ func (vrw *VReplicationWorkflow) Start() error { case ReshardWorkflow: return vrw.initReshard() default: - return fmt.Errorf("unknown workflow type %s", vrw.workflowType) + return fmt.Errorf("unknown workflow type %d", vrw.workflowType) } } diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index 20ba351eb78..535ab158474 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -16,6 +16,7 @@ limitations under the License. package wrangler +/* import ( "testing" @@ -74,3 +75,4 @@ func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { func TestReshardingWorkflowCurrentState(t *testing.T) { } +*/ From 2157e91c1c28706d5933ca8366a2e285ce068d55 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Tue, 22 Dec 2020 22:44:41 +0100 Subject: [PATCH 16/26] Minor cleanup Signed-off-by: Rohit Nayak --- go/cmd/vtctlclient/main.go | 2 +- go/vt/topo/srv_keyspace.go | 1 + go/vt/vtctl/vtctl.go | 44 +++++++++++------------------- go/vt/wrangler/traffic_switcher.go | 7 +---- go/vt/wrangler/workflow_test.go | 1 + 5 files changed, 20 insertions(+), 35 deletions(-) diff --git a/go/cmd/vtctlclient/main.go b/go/cmd/vtctlclient/main.go index 7e44d2313b4..1c311518f79 100644 --- a/go/cmd/vtctlclient/main.go +++ b/go/cmd/vtctlclient/main.go @@ -68,7 +68,7 @@ func main() { if err != nil { errStr := strings.Replace(err.Error(), "remote error: ", "", -1) fmt.Printf("%s Error: %s\n", flag.Arg(0), errStr) - //log.Error(err) + log.Error(err) os.Exit(1) } } diff --git a/go/vt/topo/srv_keyspace.go b/go/vt/topo/srv_keyspace.go index 97bf0619d61..3b0f375240c 100644 --- a/go/vt/topo/srv_keyspace.go +++ b/go/vt/topo/srv_keyspace.go @@ -488,6 +488,7 @@ func (ts *Server) MigrateServedType(ctx context.Context, keyspace string, shards if err = CheckKeyspaceLocked(ctx, keyspace); err != nil { return err } + // The caller intents to update all cells in this case if len(cells) == 0 { cells, err = ts.GetCellInfoNames(ctx) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 1e3a95fb390..acad4f422a7 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1893,7 +1893,7 @@ func commandValidateKeyspace(ctx context.Context, wr *wrangler.Wrangler, subFlag func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { for _, arg := range args { if arg == "-v2" { - fmt.Printf("*** Using Reshard v2 flow ***") + fmt.Println("*** Using Reshard v2 flow ***") return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.ReshardWorkflow) } } @@ -1918,7 +1918,7 @@ func commandReshard(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.F func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { for _, arg := range args { if arg == "-v2" { - fmt.Printf("*** Using MoveTables v2 flow ***") + fmt.Println("*** Using MoveTables v2 flow ***") return commandVRWorkflow(ctx, wr, subFlags, args, wrangler.MoveTablesWorkflow) } } @@ -1975,7 +1975,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla _ = subFlags.Bool("v2", true, "") - _, _, _ = dryRun, reverseReplication, skipSchemaCopy + _ = dryRun //TODO: add dry run functionality if err := subFlags.Parse(args); err != nil { return err } @@ -2006,11 +2006,10 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla if err != nil { return err } - s += "Following vreplication streams are running for this workflow:\n\n" + s += "Following vreplication streams are running in this workflow:\n\n" for ksShard := range res.ShardStatuses { statuses := res.ShardStatuses[ksShard].MasterReplicationStatuses for _, st := range statuses { - //status.State, status.TransactionTimestamp, status.TimeUpdated, status.Tablet, status.ID, status.Message, status.Pos now := time.Now().Nanosecond() msg := "" updateLag := int64(now) - st.TimeUpdated @@ -2041,7 +2040,6 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { case wrangler.VReplicationWorkflowActionStart: - switch workflowType { case wrangler.MoveTablesWorkflow: if *sourceKeyspace == "" { @@ -2068,7 +2066,6 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla default: return fmt.Errorf("unknown workflow type passed: %v", workflowType) } - vrwp.Cells = *cells vrwp.TabletTypes = *tabletTypes case wrangler.VReplicationWorkflowActionSwitchTraffic, wrangler.VReplicationWorkflowActionReverseTraffic: @@ -2098,12 +2095,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return fmt.Errorf("workflow %s does not exist", ksWorkflow) } - startState := wf.CachedState() - wr.Logger().Printf("\nCachedState: %s\n", startState) - switch action { - case wrangler.VReplicationWorkflowActionShow: - return printDetails() - case wrangler.VReplicationWorkflowActionProgress: + printCopyProgress := func() error { copyProgress, err := wf.GetCopyProgress() if err != nil { return err @@ -2130,6 +2122,15 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla wr.Logger().Printf("\n%s\n", s) } return printDetails() + + } + startState := wf.CachedState() + wr.Logger().Printf("\nCachedState: %s\n", startState) + switch action { + case wrangler.VReplicationWorkflowActionShow: + return printDetails() + case wrangler.VReplicationWorkflowActionProgress: + return printCopyProgress() case wrangler.VReplicationWorkflowActionStart: err = wf.Start() case wrangler.VReplicationWorkflowActionSwitchTraffic: @@ -2148,7 +2149,8 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return wrapError(wf, err) } time.Sleep(1 * time.Second) - wr.Logger().Printf("%s %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", workflowType, action, startState, wf.CurrentState()) + wr.Logger().Printf("%s %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", + workflowType, action, startState, wf.CurrentState()) return nil } @@ -2391,12 +2393,6 @@ func commandSwitchReads(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl if err != nil { return err } - /* - if strings.HasSuffix(workflow, "_reverse") { - return fmt.Errorf("workflow cannot end with _reverse, it is reserved for vreplication to create a reverse workflow") - } - - */ dryRunResults, err := wr.SwitchReads(ctx, keyspace, workflow, servedTypes, cells, direction, *dryRun) if err != nil { return err @@ -2426,14 +2422,6 @@ func commandSwitchWrites(ctx context.Context, wr *wrangler.Wrangler, subFlags *f if err != nil { return err } - /* - TODO: uncomment for subsequent release - if strings.HasSuffix(workflow, "_reverse") { - return fmt.Errorf("workflow cannot end with _reverse, it is reserved for vreplication to create a reverse workflow") - } - - */ - if filteredReplicationWaitTime != timeout { timeout = filteredReplicationWaitTime } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 67589eb8a7c..b0ae4af4117 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -360,7 +360,6 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } if journalsExist { wr.Logger().Errorf("Found a previous journal entry for %d", ts.id) - //return nil, fmt.Errorf("found an entry from a previous run for migration id %d in _vt.resharding_journal, please review and delete it before proceeding", ts.id) } var sw iswitcher if dryRun { @@ -394,8 +393,6 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st ts.wr.Logger().Errorf("switchShardReads failed: %v", err) return nil, err } - x1, x2, err := wr.getCellsWithShardReadsSwitched(ctx, targetKeyspace, ts.sourceShards()[0], servedTypes[0].String()) - wr.Logger().Infof("State: %+v,%+v,%v", x1, x2, err) return sw.logs(), nil } @@ -914,9 +911,6 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} } else { log.Infof("Route direction backwards") - //delete(rules, table+"@"+tt) - //delete(rules, ts.targetKeyspace+"."+table+"@"+tt) - //delete(rules, ts.sourceKeyspace+"."+table+"@"+tt) rules[table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} @@ -1484,6 +1478,7 @@ func (ts *trafficSwitcher) removeSourceTables(ctx context.Context, removalType T }) } +// FIXME: even after dropSourceShards there are still entries in the topo, need to research and fix func (ts *trafficSwitcher) dropSourceShards(ctx context.Context) error { return ts.forAllSources(func(source *tsSource) error { ts.wr.Logger().Infof("Deleting shard %s.%s\n", source.si.Keyspace(), source.si.ShardName()) diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index 535ab158474..e75c55b643d 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -16,6 +16,7 @@ limitations under the License. package wrangler +//FIXME: update test for recent changes /* import ( "testing" From a40c31f6a55d9c7cdeb32947c079ead02cc2116f Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Wed, 23 Dec 2020 23:40:37 +0100 Subject: [PATCH 17/26] Minor cleanup, some comments Signed-off-by: Rohit Nayak --- go/vt/vtctl/vtctl.go | 37 ++++++++++++++++++++---------- go/vt/wrangler/traffic_switcher.go | 15 +++++++++++- go/vt/wrangler/workflow.go | 34 +++++++-------------------- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index acad4f422a7..bee0b06095b 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1953,6 +1953,19 @@ func commandMoveTables(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return wr.MoveTables(ctx, *workflow, source, target, tableSpecs, *cells, *tabletTypes, *allTables, *excludes) } +// VReplicationWorkflowAction defines subcommands passed to vtctl for movetables or reshard +type VReplicationWorkflowAction string + +const ( + vReplicationWorkflowActionStart = "start" + vReplicationWorkflowActionSwitchTraffic = "switchtraffic" + vReplicationWorkflowActionReverseTraffic = "reversetraffic" + vReplicationWorkflowActionComplete = "complete" + vReplicationWorkflowActionAbort = "abort" + vReplicationWorkflowActionShow = "show" + vReplicationWorkflowActionProgress = "progress" +) + func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string, workflowType wrangler.VReplicationWorkflowType) error { @@ -2039,7 +2052,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla originalAction := action action = strings.ToLower(action) // allow users to input action in a case-insensitive manner switch action { - case wrangler.VReplicationWorkflowActionStart: + case vReplicationWorkflowActionStart: switch workflowType { case wrangler.MoveTablesWorkflow: if *sourceKeyspace == "" { @@ -2068,14 +2081,14 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla } vrwp.Cells = *cells vrwp.TabletTypes = *tabletTypes - case wrangler.VReplicationWorkflowActionSwitchTraffic, wrangler.VReplicationWorkflowActionReverseTraffic: + case vReplicationWorkflowActionSwitchTraffic, vReplicationWorkflowActionReverseTraffic: vrwp.Cells = *cells vrwp.TabletTypes = *tabletTypes vrwp.Timeout = *timeout vrwp.EnableReverseReplication = *reverseReplication - case wrangler.VReplicationWorkflowActionAbort: + case vReplicationWorkflowActionAbort: vrwp.KeepData = *keepData - case wrangler.VReplicationWorkflowActionComplete: + case vReplicationWorkflowActionComplete: switch workflowType { case wrangler.MoveTablesWorkflow: vrwp.RenameTables = *renameTables @@ -2091,7 +2104,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla log.Warningf("NewVReplicationWorkflow returned error %+v", wf) return err } - if !wf.Exists() && action != wrangler.VReplicationWorkflowActionStart { + if !wf.Exists() && action != vReplicationWorkflowActionStart { return fmt.Errorf("workflow %s does not exist", ksWorkflow) } @@ -2127,19 +2140,19 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla startState := wf.CachedState() wr.Logger().Printf("\nCachedState: %s\n", startState) switch action { - case wrangler.VReplicationWorkflowActionShow: + case vReplicationWorkflowActionShow: return printDetails() - case wrangler.VReplicationWorkflowActionProgress: + case vReplicationWorkflowActionProgress: return printCopyProgress() - case wrangler.VReplicationWorkflowActionStart: + case vReplicationWorkflowActionStart: err = wf.Start() - case wrangler.VReplicationWorkflowActionSwitchTraffic: + case vReplicationWorkflowActionSwitchTraffic: err = wf.SwitchTraffic(wrangler.DirectionForward) - case wrangler.VReplicationWorkflowActionReverseTraffic: + case vReplicationWorkflowActionReverseTraffic: err = wf.ReverseTraffic() - case wrangler.VReplicationWorkflowActionComplete: + case vReplicationWorkflowActionComplete: err = wf.Complete() - case wrangler.VReplicationWorkflowActionAbort: + case vReplicationWorkflowActionAbort: err = wf.Abort() default: return fmt.Errorf("found unsupported action %s", originalAction) diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index b0ae4af4117..669ba12acf3 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -147,6 +147,8 @@ type workflowState struct { WritesSwitched bool } +// For a Reshard, to check whether we have switched reads for a tablet type, we check if any one of the source shards has +// the query service disabled in its tablet control record func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKeyspace string, si *topo.ShardInfo, tabletType string) ( cellsSwitched, cellsNotSwitched []string, err error) { @@ -168,12 +170,17 @@ func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKe if !strings.EqualFold(partition.GetServedType().String(), tabletType) { continue } + + // If reads and writes are both switched it is possible that the shard is not in the partition table for _, shardReference := range partition.GetShardReferences() { if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { found = true break } } + + // It is possible that there are no tablet controls if the target shards are not yet serving + // or once reads and writes are both switched, if len(partition.GetShardTabletControls()) == 0 { noControls = true break @@ -196,6 +203,8 @@ func (wr *Wrangler) getCellsWithShardReadsSwitched(ctx context.Context, targetKe return cellsSwitched, cellsNotSwitched, nil } +// For MoveTables, to check whether we have switched reads for a tablet type, we check whether the routing rule +// for the tablet_type is pointing to the target keyspace func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKeyspace, table, tabletType string) ( cellsSwitched, cellsNotSwitched []string, err error) { @@ -257,6 +266,10 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl var cellsSwitched, cellsNotSwitched []string var keyspace string var reverse bool + + // we reverse writes by using the source_keyspace.workflowname_reverse workflow spec, so we need to use the + // source of the reverse workflow, which is the target of the workflow initiated by the user for checking routing rules + // Similarly we use a target shard of the reverse workflow as the original source to check if writes have been switched if strings.HasSuffix(workflow, "_reverse") { reverse = true keyspace = ws.SourceKeyspace @@ -352,7 +365,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st } } - //If journals exist notify user and fail + // If journals exist notify user and fail journalsExist, _, err := ts.checkJournals(ctx) if err != nil { wr.Logger().Errorf("checkJournals failed: %v", err) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 6f947163f87..500f401cb2d 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -23,11 +23,9 @@ import ( (CurrentState()) * dry run - * implement/test Reshard same as MoveTables! - VReplicationWorkflow as common to both MoveTables/Reshard */ -// VReplicationWorkflowType specifies the switching direction. +// VReplicationWorkflowType specifies whether workflow is MoveTables or Reshard type VReplicationWorkflowType int const ( @@ -37,19 +35,6 @@ const ( ReshardWorkflow ) -// VReplicationWorkflowAction defines subcommands passed to vtctl for movetables or reshard -type VReplicationWorkflowAction string - -const ( - VReplicationWorkflowActionStart = "start" - VReplicationWorkflowActionSwitchTraffic = "switchtraffic" - VReplicationWorkflowActionReverseTraffic = "reversetraffic" - VReplicationWorkflowActionComplete = "complete" - VReplicationWorkflowActionAbort = "abort" - VReplicationWorkflowActionShow = "show" - VReplicationWorkflowActionProgress = "progress" -) - // region Move Tables Public API // VReplicationWorkflow stores various internal objects for a workflow @@ -121,15 +106,13 @@ func (vrw *VReplicationWorkflow) CurrentState() string { return vrw.stateAsString(ws) } -// CachedState returns a human readable workflow state +// CachedState returns a human readable workflow state at the time the workflow was created func (vrw *VReplicationWorkflow) CachedState() string { return vrw.stateAsString(vrw.ws) } // Exists checks if the workflow has already been initiated func (vrw *VReplicationWorkflow) Exists() bool { - log.Infof("vrw %+v", *vrw) - return vrw.ws != nil } @@ -229,7 +212,8 @@ func (vrw *VReplicationWorkflow) Complete() error { } else { renameTable = DropTable } - if _, err := vrw.wr.DropSources(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, renameTable, vrw.params.KeepData, false, false); err != nil { + if _, err := vrw.wr.DropSources(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, renameTable, vrw.params.KeepData, + false, false); err != nil { return err } return nil @@ -291,14 +275,14 @@ func (vrw *VReplicationWorkflow) parseTabletTypes() (hasReplica, hasRdonly, hasM func (vrw *VReplicationWorkflow) initMoveTables() error { log.Infof("In VReplicationWorkflow.initMoveTables() for %+v", vrw) - return vrw.wr.MoveTables(vrw.ctx, vrw.params.Workflow, vrw.params.SourceKeyspace, vrw.params.TargetKeyspace, vrw.params.Tables, - vrw.params.Cells, vrw.params.TabletTypes, vrw.params.AllTables, vrw.params.ExcludeTables) + return vrw.wr.MoveTables(vrw.ctx, vrw.params.Workflow, vrw.params.SourceKeyspace, vrw.params.TargetKeyspace, + vrw.params.Tables, vrw.params.Cells, vrw.params.TabletTypes, vrw.params.AllTables, vrw.params.ExcludeTables) } func (vrw *VReplicationWorkflow) initReshard() error { log.Infof("In VReplicationWorkflow.initReshard() for %+v", vrw) - return vrw.wr.Reshard(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.SourceShards, vrw.params.TargetShards, - vrw.params.SkipSchemaCopy, vrw.params.Cells, vrw.params.TabletTypes) + return vrw.wr.Reshard(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow, vrw.params.SourceShards, + vrw.params.TargetShards, vrw.params.SkipSchemaCopy, vrw.params.Cells, vrw.params.TabletTypes) } func (vrw *VReplicationWorkflow) switchReads() error { @@ -437,7 +421,6 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { } query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tableList) - log.Infof("query is %s", query) for _, target := range vrw.ts.targets { tablet := target.master.Tablet if err := getTableMetrics(tablet, query, &targetRowCounts, &targetTableSizes); err != nil { @@ -446,7 +429,6 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { } query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tableList) - log.Infof("query is %s", query) for source := range sourceMasters { ti, err := vrw.wr.ts.GetTablet(ctx, source) tablet := ti.Tablet From 6d8f21cd6927b42f515a847b8ebd3fab6798be12 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Thu, 24 Dec 2020 21:55:35 +0100 Subject: [PATCH 18/26] Tests for switching cells partially, add rdonly tablets to test Signed-off-by: Rohit Nayak --- .../resharding_workflows_v2_test.go | 201 ++++++++++++------ .../vreplication/vreplication_test.go | 3 +- go/vt/vtctl/vtctl.go | 11 +- go/vt/wrangler/materializer.go | 15 +- go/vt/wrangler/traffic_switcher.go | 45 ++-- go/vt/wrangler/workflow.go | 48 +++-- 6 files changed, 213 insertions(+), 110 deletions(-) diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index 0db195c5856..d4a7f885c57 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -22,6 +22,8 @@ import ( "testing" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/wrangler" "github.com/stretchr/testify/require" @@ -48,9 +50,11 @@ const ( ) var ( - customerTab1, customerTab2, productReplicaTab, customerReplicaTab1, productTab *cluster.VttabletProcess - lastOutput string - currentWorkflowType wrangler.VReplicationWorkflowType + targetTab1, targetTab2, targetReplicaTab1 *cluster.VttabletProcess + sourceReplicaTab, sourceTab *cluster.VttabletProcess + + lastOutput string + currentWorkflowType wrangler.VReplicationWorkflowType ) func reshard2Start(t *testing.T, sourceShards, targetShards string) error { @@ -58,8 +62,8 @@ func reshard2Start(t *testing.T, sourceShards, targetShards string) error { "", workflowActionStart, "", sourceShards, targetShards) require.NoError(t, err) time.Sleep(1 * time.Second) - catchup(t, customerTab1, workflowName, "Reshard") - catchup(t, customerTab2, workflowName, "Reshard") + catchup(t, targetTab1, workflowName, "Reshard") + catchup(t, targetTab2, workflowName, "Reshard") vdiff(t, ksWorkflow) return nil } @@ -71,8 +75,8 @@ func moveTables2Start(t *testing.T, tables string) error { err := tstWorkflowExec(t, defaultCellName, workflowName, sourceKs, targetKs, tables, workflowActionStart, "", "", "") require.NoError(t, err) - catchup(t, customerTab1, workflowName, "MoveTables") - catchup(t, customerTab2, workflowName, "MoveTables") + catchup(t, targetTab1, workflowName, "MoveTables") + catchup(t, targetTab2, workflowName, "MoveTables") time.Sleep(1 * time.Second) vdiff(t, ksWorkflow) return nil @@ -109,19 +113,24 @@ func tstWorkflowExec(t *testing.T, cells, workflow, sourceKs, targetKs, tables, output, err := vc.VtctlClient.ExecuteCommandWithOutput(args...) lastOutput = output if err != nil { - t.Logf("%s command failed with %+v\n", args[0], err) return fmt.Errorf("%s: %s", err, output) } - fmt.Printf("----------\n%+v\n%s\n----------\n", args, output) + fmt.Printf("----------\n%+v\n%s----------\n", args, output) return nil } -func tstWorkflowSwitchReads(t *testing.T, tabletTypes string) { - require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, "replica,rdonly", "")) +func tstWorkflowSwitchReads(t *testing.T, tabletTypes, cells string) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + require.NoError(t, tstWorkflowAction(t, workflowActionSwitchTraffic, tabletTypes, cells)) } -func tstWorkflowReverseReads(t *testing.T, tabletTypes string) { - require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, "replica,rdonly", "")) +func tstWorkflowReverseReads(t *testing.T, tabletTypes, cells string) { + if tabletTypes == "" { + tabletTypes = "replica,rdonly" + } + require.NoError(t, tstWorkflowAction(t, workflowActionReverseTraffic, tabletTypes, cells)) } func tstWorkflowSwitchWrites(t *testing.T) { @@ -161,25 +170,26 @@ func validateReadsRoute(t *testing.T, tabletTypes string, tablet *cluster.Vttabl } func validateReadsRouteToSource(t *testing.T, tabletTypes string) { - validateReadsRoute(t, tabletTypes, productReplicaTab) + validateReadsRoute(t, tabletTypes, sourceReplicaTab) } func validateReadsRouteToTarget(t *testing.T, tabletTypes string) { - validateReadsRoute(t, tabletTypes, customerReplicaTab1) + validateReadsRoute(t, tabletTypes, targetReplicaTab1) } func validateWritesRouteToSource(t *testing.T) { insertQuery := "insert into customer(name, cid) values('tempCustomer2', 200)" matchInsertQuery := "insert into customer(name, cid) values" - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, productTab, "customer", insertQuery, matchInsertQuery)) + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, sourceTab, "customer", insertQuery, matchInsertQuery)) execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") } + func validateWritesRouteToTarget(t *testing.T) { insertQuery := "insert into customer(name, cid) values('tempCustomer3', 101)" matchInsertQuery := "insert into customer(name, cid) values" - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab2, "customer", insertQuery, matchInsertQuery)) + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, targetTab2, "customer", insertQuery, matchInsertQuery)) insertQuery = "insert into customer(name, cid) values('tempCustomer3', 102)" - require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, customerTab1, "customer", insertQuery, matchInsertQuery)) + require.True(t, validateThatQueryExecutesOnTablet(t, vtgateConn, targetTab1, "customer", insertQuery, matchInsertQuery)) execVtgateQuery(t, vtgateConn, "customer", "delete from customer where cid > 100") } @@ -194,12 +204,12 @@ func revert(t *testing.T) { } for _, query := range queries { - customerTab1.QueryTablet(query, "customer", true) - customerTab2.QueryTablet(query, "customer", true) - productTab.QueryTablet(query, "product", true) + targetTab1.QueryTablet(query, "customer", true) + targetTab2.QueryTablet(query, "customer", true) + sourceTab.QueryTablet(query, "product", true) } - customerTab1.QueryTablet("drop table vt_customer.customer", "customer", true) - customerTab2.QueryTablet("drop table vt_customer.customer", "customer", true) + targetTab1.QueryTablet("drop table vt_customer.customer", "customer", true) + targetTab2.QueryTablet("drop table vt_customer.customer", "customer", true) clearRoutingRules(t, vc) } @@ -209,6 +219,13 @@ func checkStates(t *testing.T, startState, endState string) { require.Contains(t, lastOutput, fmt.Sprintf("Current State: %s", endState)) } +func getCurrentState(t *testing.T) string { + if err := tstWorkflowAction(t, "GetState", "", ""); err != nil { + return err.Error() + } + return strings.TrimSpace(strings.Trim(lastOutput, "\n")) +} + // ideally this should be broken up into multiple tests for full flow, replica/rdonly flow, reverse flows etc // but CI currently fails on creating multiple clusters even after the previous ones are torn down @@ -219,6 +236,7 @@ func TestBasicV2Workflows(t *testing.T) { testMoveTablesV2Workflow(t) testReshardV2Workflow(t) + log.Flush() } func testReshardV2Workflow(t *testing.T) { @@ -227,12 +245,11 @@ func testReshardV2Workflow(t *testing.T) { createAdditionalCustomerShards(t, "-40,40-80,80-c0,c0-") reshard2Start(t, "-80,80-", "-40,40-80,80-c0,c0-") - checkStates(t, "Not Started", "Not Started") + checkStates(t, wrangler.WorkflowStateNotStarted, wrangler.WorkflowStateNotSwitched) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) testRestOfWorkflow(t) - } func testMoveTablesV2Workflow(t *testing.T) { @@ -241,7 +258,7 @@ func testMoveTablesV2Workflow(t *testing.T) { // test basic forward and reverse flows setupCustomerKeyspace(t) moveTables2Start(t, "customer") - checkStates(t, "Not Started", "Not Started") + checkStates(t, wrangler.WorkflowStateNotStarted, wrangler.WorkflowStateNotSwitched) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) @@ -260,32 +277,73 @@ func testMoveTablesV2Workflow(t *testing.T) { output, _ = vc.VtctlClient.ExecuteCommandWithOutput(listAllArgs...) require.Contains(t, output, "No workflows found in keyspace customer") +} + +func testPartialSwitches(t *testing.T) { + //nothing switched + require.Equal(t, getCurrentState(t), wrangler.WorkflowStateNotSwitched) + tstWorkflowSwitchReads(t, "replica,rdonly", "zone1") + nextState := "Reads partially switched. Replica switched in cells: zone1. Rdonly switched in cells: zone1. Writes Not Switched" + checkStates(t, wrangler.WorkflowStateNotSwitched, nextState) + tstWorkflowSwitchReads(t, "replica,rdonly", "zone2") + currentState := nextState + nextState = wrangler.WorkflowStateReadsSwitched + checkStates(t, currentState, nextState) + + tstWorkflowSwitchReads(t, "", "") + checkStates(t, nextState, nextState) //idempotency + + tstWorkflowSwitchWrites(t) + currentState = nextState + nextState = wrangler.WorkflowStateAllSwitched + checkStates(t, currentState, nextState) + + tstWorkflowSwitchWrites(t) + checkStates(t, nextState, nextState) //idempotency + + tstWorkflowReverseReads(t, "replica,rdonly", "zone1") + currentState = nextState + nextState = "Reads partially switched. Replica switched in cells: zone2. Rdonly switched in cells: zone2. Writes Switched" + checkStates(t, currentState, nextState) + + tstWorkflowReverseReads(t, "replica,rdonly", "zone2") + currentState = nextState + nextState = wrangler.WorkflowStateWritesSwitched + checkStates(t, currentState, nextState) + + tstWorkflowReverseWrites(t) + currentState = nextState + nextState = wrangler.WorkflowStateNotSwitched + checkStates(t, currentState, nextState) } + func testRestOfWorkflow(t *testing.T) { + testPartialSwitches(t) + // test basic forward and reverse flows - tstWorkflowSwitchReads(t, "") - checkStates(t, "Reads Not Switched. Writes Not Switched", "All Reads Switched. Writes Not Switched") + tstWorkflowSwitchReads(t, "", "") + checkStates(t, wrangler.WorkflowStateNotSwitched, wrangler.WorkflowStateReadsSwitched) validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) tstWorkflowSwitchWrites(t) - checkStates(t, "All Reads Switched. Writes Not Switched", "All Reads Switched. Writes Switched") + checkStates(t, wrangler.WorkflowStateReadsSwitched, wrangler.WorkflowStateAllSwitched) validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) - tstWorkflowReverseReads(t, "") - checkStates(t, "All Reads Switched. Writes Switched", "Reads Not Switched. Writes Switched") + tstWorkflowReverseReads(t, "", "") + checkStates(t, wrangler.WorkflowStateAllSwitched, wrangler.WorkflowStateWritesSwitched) validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) tstWorkflowReverseWrites(t) - checkStates(t, "Reads Not Switched. Writes Switched", "Reads Not Switched. Writes Not Switched") + checkStates(t, wrangler.WorkflowStateWritesSwitched, wrangler.WorkflowStateNotSwitched) validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) tstWorkflowSwitchWrites(t) - checkStates(t, "Reads Not Switched. Writes Not Switched", "Reads Not Switched. Writes Switched") + checkStates(t, wrangler.WorkflowStateNotSwitched, wrangler.WorkflowStateWritesSwitched) validateReadsRouteToSource(t, "replica") validateWritesRouteToTarget(t) @@ -293,11 +351,11 @@ func testRestOfWorkflow(t *testing.T) { validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - tstWorkflowSwitchReads(t, "") + tstWorkflowSwitchReads(t, "", "") validateReadsRouteToTarget(t, "replica") validateWritesRouteToSource(t) - tstWorkflowReverseReads(t, "") + tstWorkflowReverseReads(t, "", "") validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) @@ -308,24 +366,22 @@ func testRestOfWorkflow(t *testing.T) { validateReadsRouteToSource(t, "replica") validateWritesRouteToSource(t) - // test complete and abort - var err error - - err = tstWorkflowComplete(t) + // trying to complete an unswitched workflow should error + err := tstWorkflowComplete(t) require.Error(t, err) - require.Contains(t, err.Error(), "cannot complete workflow because you have not yet switched all read and write traffic") + require.Contains(t, err.Error(), wrangler.ErrWorkflowNotFullySwitched) + // fully switch and complete tstWorkflowSwitchReadsAndWrites(t) validateReadsRouteToTarget(t, "replica") validateWritesRouteToTarget(t) err = tstWorkflowComplete(t) require.NoError(t, err) - } func setupCluster(t *testing.T) *VitessCluster { - cells := []string{"zone1"} + cells := []string{"zone1", "zone2"} vc = InitCluster(t, cells) require.NotNil(t, vc) @@ -333,27 +389,30 @@ func setupCluster(t *testing.T) *VitessCluster { allCellNames = defaultCellName defaultCell = vc.Cells[defaultCellName] - cell1 := vc.Cells["zone1"] - vc.AddKeyspace(t, []*Cell{cell1}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, defaultRdonly, 100) + zone1 := vc.Cells["zone1"] + zone2 := vc.Cells["zone2"] + + vc.AddKeyspace(t, []*Cell{zone1, zone2}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, 1, 100) - vtgate = cell1.Vtgates[0] + vtgate = zone1.Vtgates[0] require.NotNil(t, vtgate) vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "product", "0"), 1) vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "product", "0"), 2) + vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "product", "0"), 2) vtgateConn = getConnection(t, globalConfig.vtgateMySQLPort) verifyClusterHealth(t) insertInitialData(t) - productReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet - productTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet + sourceReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-101"].Vttablet + sourceTab = vc.Cells[defaultCell.Name].Keyspaces["product"].Shards["0"].Tablets["zone1-100"].Vttablet return vc } func setupCustomerKeyspace(t *testing.T) { - if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells[defaultCellName]}, "customer", "-80,80-", - customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { + if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells["zone1"], vc.Cells["zone2"]}, "customer", "-80,80-", + customerVSchema, customerSchema, defaultReplicas, 1, 200); err != nil { t.Fatal(err) } if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { @@ -362,16 +421,22 @@ func setupCustomerKeyspace(t *testing.T) { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "80-"), 1); err != nil { t.Fatal(err) } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 1); err != nil { + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "-80"), 2); err != nil { t.Fatal(err) } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 1); err != nil { + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 2); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "customer", "80-"), 2); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "customer", "-80"), 2); err != nil { t.Fatal(err) } custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] - customerTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet - customerTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet - customerReplicaTab1 = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet + targetTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet + targetTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet + targetReplicaTab1 = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet } func TestSwitchReadsWritesInAnyOrder(t *testing.T) { @@ -399,8 +464,8 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias var moveTablesAndWait = func() { moveTables(t, sourceCellOrAlias, workflow, sourceKs, targetKs, tables) - catchup(t, customerTab1, workflow, "MoveTables") - catchup(t, customerTab2, workflow, "MoveTables") + catchup(t, targetTab1, workflow, "MoveTables") + catchup(t, targetTab2, workflow, "MoveTables") vdiff(t, ksWorkflow) } @@ -475,10 +540,6 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias revert(t) } - _ = switchReadsFollowedBySwitchWrites - _ = switchWritesFollowedBySwitchReads - _ = switchReadsReverseSwitchWritesSwitchReads - _ = switchWritesReverseSwitchReadsSwitchWrites switchReadsFollowedBySwitchWrites() switchWritesFollowedBySwitchReads() switchReadsReverseSwitchWritesSwitchReads() @@ -488,21 +549,25 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias func createAdditionalCustomerShards(t *testing.T, shards string) { ksName := "customer" keyspace := vc.Cells[defaultCell.Name].Keyspaces[ksName] - require.NoError(t, vc.AddShards(t, []*Cell{defaultCell}, keyspace, shards, defaultReplicas, defaultRdonly, 400)) + require.NoError(t, vc.AddShards(t, []*Cell{defaultCell, vc.Cells["zone2"]}, keyspace, shards, defaultReplicas, 1, 400)) arrTargetShardNames := strings.Split(shards, ",") for _, shardName := range arrTargetShardNames { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", ksName, shardName), 1); err != nil { t.Fatal(err) } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", ksName, shardName), 2); err != nil { + t.Fatal(err) + } + if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", ksName, shardName), 2); err != nil { + t.Fatal(err) + } } - //FIXME custKs := vc.Cells[defaultCell.Name].Keyspaces[ksName] - customerTab2 = custKs.Shards["80-c0"].Tablets["zone1-600"].Vttablet - customerTab1 = custKs.Shards["40-80"].Tablets["zone1-500"].Vttablet - customerReplicaTab1 = custKs.Shards["-40"].Tablets["zone1-401"].Vttablet - - productReplicaTab = vc.Cells[defaultCell.Name].Keyspaces["customer"].Shards["-80"].Tablets["zone1-201"].Vttablet - productTab = vc.Cells[defaultCell.Name].Keyspaces["customer"].Shards["-80"].Tablets["zone1-200"].Vttablet + targetTab2 = custKs.Shards["80-c0"].Tablets["zone1-600"].Vttablet + targetTab1 = custKs.Shards["40-80"].Tablets["zone1-500"].Vttablet + targetReplicaTab1 = custKs.Shards["-40"].Tablets["zone1-401"].Vttablet + sourceReplicaTab = custKs.Shards["-80"].Tablets["zone1-201"].Vttablet + sourceTab = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet } diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 4299e0319a7..adfdd93730e 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -49,8 +49,9 @@ func init() { func TestBasicVreplicationWorkflow(t *testing.T) { defaultCellName := "zone1" + allCells := []string{"zone1"} allCellNames = "zone1" - vc = InitCluster(t, []string{defaultCellName}) + vc = InitCluster(t, allCells) require.NotNil(t, vc) defaultReplicas = 0 // because of CI resource constraints we can only run this test with master tablets defer func() { defaultReplicas = 1 }() diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index bee0b06095b..3fd9585e53d 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -1964,6 +1964,7 @@ const ( vReplicationWorkflowActionAbort = "abort" vReplicationWorkflowActionShow = "show" vReplicationWorkflowActionProgress = "progress" + vReplicationWorkflowActionGetState = "getstate" ) func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string, @@ -2138,7 +2139,6 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla } startState := wf.CachedState() - wr.Logger().Printf("\nCachedState: %s\n", startState) switch action { case vReplicationWorkflowActionShow: return printDetails() @@ -2146,6 +2146,7 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla return printCopyProgress() case vReplicationWorkflowActionStart: err = wf.Start() + //TODO: wait for streams to start or report error (pos != "", Message contains error, tx/update time recent) case vReplicationWorkflowActionSwitchTraffic: err = wf.SwitchTraffic(wrangler.DirectionForward) case vReplicationWorkflowActionReverseTraffic: @@ -2154,6 +2155,9 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla err = wf.Complete() case vReplicationWorkflowActionAbort: err = wf.Abort() + case vReplicationWorkflowActionGetState: + wr.Logger().Printf(wf.CachedState() + "\n") + return nil default: return fmt.Errorf("found unsupported action %s", originalAction) } @@ -2161,9 +2165,8 @@ func commandVRWorkflow(ctx context.Context, wr *wrangler.Wrangler, subFlags *fla log.Warningf(" %s error: %v", originalAction, wf) return wrapError(wf, err) } - time.Sleep(1 * time.Second) - wr.Logger().Printf("%s %s was successful\nStart State: %s\n\nCurrent State: %s\n\n", - workflowType, action, startState, wf.CurrentState()) + wr.Logger().Printf("%s was successful\nStart State: %s\nCurrent State: %s\n\n", + originalAction, startState, wf.CurrentState()) return nil } diff --git a/go/vt/wrangler/materializer.go b/go/vt/wrangler/materializer.go index 179e2a5605a..9a19c918b16 100644 --- a/go/vt/wrangler/materializer.go +++ b/go/vt/wrangler/materializer.go @@ -155,12 +155,15 @@ func (wr *Wrangler) MoveTables(ctx context.Context, workflow, sourceKeyspace, ta return err } for _, table := range tables { - rules[table] = []string{sourceKeyspace + "." + table} - rules[targetKeyspace+"."+table] = []string{sourceKeyspace + "." + table} - rules[targetKeyspace+"."+table+"@replica"] = []string{sourceKeyspace + "." + table} - rules[targetKeyspace+"."+table+"@rdonly"] = []string{sourceKeyspace + "." + table} - rules[sourceKeyspace+"."+table+"@replica"] = []string{sourceKeyspace + "." + table} - rules[sourceKeyspace+"."+table+"@rdonly"] = []string{sourceKeyspace + "." + table} + toSource := []string{sourceKeyspace + "." + table} + rules[table] = toSource + rules[table+"@replica"] = toSource + rules[table+"@rdonly"] = toSource + rules[targetKeyspace+"."+table] = toSource + rules[targetKeyspace+"."+table+"@replica"] = toSource + rules[targetKeyspace+"."+table+"@rdonly"] = toSource + rules[sourceKeyspace+"."+table+"@replica"] = toSource + rules[sourceKeyspace+"."+table+"@rdonly"] = toSource } if err := wr.saveRoutingRules(ctx, rules); err != nil { return err diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 669ba12acf3..b88d89b2e3e 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -225,27 +225,34 @@ func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKe return nil, nil, err } rules := srvVSchema.RoutingRules.Rules + log.Infof("Rules for srvVSchema for cell %s are %+v", cell, rules) + found := false + switched := false for _, rule := range rules { - ruleName := fmt.Sprintf("%s@%s", table, tabletType) + ruleName := fmt.Sprintf("%s.%s@%s", targetKeyspace, table, tabletType) if rule.FromTable == ruleName { - switched := false + found = true for _, to := range rule.ToTables { ks, err := getKeyspace(to) if err != nil { + log.Errorf(err.Error()) return nil, nil, err } if ks == targetKeyspace { switched = true + break // if one table in workflow is switched we are done } } - if switched { - cellsSwitched = append(cellsSwitched, cell) - } else { - cellsNotSwitched = append(cellsNotSwitched, cell) - } + } + if found { break } } + if switched { + cellsSwitched = append(cellsSwitched, cell) + } else { + cellsNotSwitched = append(cellsNotSwitched, cell) + } } return cellsSwitched, cellsNotSwitched, nil } @@ -297,7 +304,6 @@ func (wr *Wrangler) getWorkflowState(ctx context.Context, targetKeyspace, workfl return nil, nil, err } ws.ReplicaCellsNotSwitched, ws.ReplicaCellsSwitched = cellsNotSwitched, cellsSwitched - rules, err := ts.wr.getRoutingRules(ctx) if err != nil { return nil, nil, err @@ -352,15 +358,15 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflow st wr.Logger().Errorf(errorMsg) return nil, fmt.Errorf(errorMsg) } - wr.Logger().Infof("SwitchReads: %s.%s tt %+v, cells %+v, state: %+v", targetKeyspace, workflow, servedTypes, cells, ws) + wr.Logger().Infof("SwitchReads: %s.%s tt %+v, cells %+v, workflow state: %+v", targetKeyspace, workflow, servedTypes, cells, ws) for _, servedType := range servedTypes { if servedType != topodatapb.TabletType_REPLICA && servedType != topodatapb.TabletType_RDONLY { return nil, fmt.Errorf("tablet type must be REPLICA or RDONLY: %v", servedType) } - if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && len(ws.ReplicaCellsNotSwitched) > 0 { + if direction == DirectionBackward && servedType == topodatapb.TabletType_REPLICA && len(ws.ReplicaCellsSwitched) == 0 { return nil, fmt.Errorf("requesting reversal of SwitchReads for REPLICAs but REPLICA reads have not been switched") } - if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && len(ws.RdonlyCellsNotSwitched) > 0 { + if direction == DirectionBackward && servedType == topodatapb.TabletType_RDONLY && len(ws.RdonlyCellsSwitched) == 0 { return nil, fmt.Errorf("requesting reversal of SwitchReads for RDONLYs but RDONLY reads have not been switched") } } @@ -689,6 +695,7 @@ func (wr *Wrangler) DropSources(ctx context.Context, targetKeyspace, workflow st func (wr *Wrangler) buildTrafficSwitcher(ctx context.Context, targetKeyspace, workflow string) (*trafficSwitcher, error) { tgtInfo, err := wr.buildTargets(ctx, targetKeyspace, workflow) if err != nil { + log.Infof("Error building targets: %s", err) return nil, err } targets, frozen, optCells, optTabletTypes := tgtInfo.targets, tgtInfo.frozen, tgtInfo.optCells, tgtInfo.optTabletTypes @@ -913,20 +920,22 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, // table -> sourceKeyspace.table // targetKeyspace.table -> sourceKeyspace.table // For forward migration, we add tablet type specific rules to redirect traffic to the target. - // For backward, we delete them. + // For backward, we redirect to source for _, servedType := range servedTypes { tt := strings.ToLower(servedType.String()) for _, table := range ts.tables { if direction == DirectionForward { log.Infof("Route direction forward") - rules[table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} - rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.targetKeyspace + "." + table} + toTarget := []string{ts.targetKeyspace + "." + table} + rules[table+"@"+tt] = toTarget + rules[ts.targetKeyspace+"."+table+"@"+tt] = toTarget + rules[ts.sourceKeyspace+"."+table+"@"+tt] = toTarget } else { log.Infof("Route direction backwards") - rules[table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} - rules[ts.targetKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} - rules[ts.sourceKeyspace+"."+table+"@"+tt] = []string{ts.sourceKeyspace + "." + table} + toSource := []string{ts.sourceKeyspace + "." + table} + rules[table+"@"+tt] = toSource + rules[ts.targetKeyspace+"."+table+"@"+tt] = toSource + rules[ts.sourceKeyspace+"."+table+"@"+tt] = toSource } } } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 500f401cb2d..c0829ec3d5f 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -28,13 +28,21 @@ import ( // VReplicationWorkflowType specifies whether workflow is MoveTables or Reshard type VReplicationWorkflowType int +// VReplicationWorkflowType enums const ( - // MoveTablesWorkflow specifies that the workflow is for moving tables from one keyspace to another MoveTablesWorkflow = VReplicationWorkflowType(iota) - // ReshardWorkflow specifies that the workflow is for resharding a keyspace ReshardWorkflow ) +// Workflow state display strings +const ( + WorkflowStateNotStarted = "Not Started" + WorkflowStateNotSwitched = "Reads Not Switched. Writes Not Switched" + WorkflowStateReadsSwitched = "All Reads Switched. Writes Not Switched" + WorkflowStateWritesSwitched = "Reads Not Switched. Writes Switched" + WorkflowStateAllSwitched = "All Reads Switched. Writes Switched" +) + // region Move Tables Public API // VReplicationWorkflow stores various internal objects for a workflow @@ -96,14 +104,15 @@ func (wr *Wrangler) NewVReplicationWorkflow(ctx context.Context, workflowType VR // CurrentState reloads and returns a human readable workflow state func (vrw *VReplicationWorkflow) CurrentState() string { - _, ws, err := vrw.wr.getWorkflowState(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow) + var err error + vrw.ts, vrw.ws, err = vrw.wr.getWorkflowState(vrw.ctx, vrw.params.TargetKeyspace, vrw.params.Workflow) if err != nil { return err.Error() } - if ws == nil { + if vrw.ws == nil { return "Workflow Not Found" } - return vrw.stateAsString(ws) + return vrw.stateAsString(vrw.ws) } // CachedState returns a human readable workflow state at the time the workflow was created @@ -117,6 +126,7 @@ func (vrw *VReplicationWorkflow) Exists() bool { } func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { + log.Infof("Workflow state is %+v", ws) var stateInfo []string s := "" if !vrw.Exists() { @@ -127,14 +137,20 @@ func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { } else if len(ws.RdonlyCellsSwitched) == 0 && len(ws.ReplicaCellsSwitched) == 0 { s = "Reads Not Switched" } else { - s = "Reads Partially Switched: " + stateInfo = append(stateInfo, "Reads partially switched") if len(ws.ReplicaCellsNotSwitched) == 0 { s += "All Replica Reads Switched" + } else if len(ws.ReplicaCellsSwitched) == 0 { + s += "Replica not switched" } else { - s += "Replicas switched in cells: " + strings.Join(ws.ReplicaCellsSwitched, ",") + s += "Replica switched in cells: " + strings.Join(ws.ReplicaCellsSwitched, ",") } + stateInfo = append(stateInfo, s) + s = "" if len(ws.RdonlyCellsNotSwitched) == 0 { s += "All Rdonly Reads Switched" + } else if len(ws.RdonlyCellsSwitched) == 0 { + s += "Rdonly not switched" } else { s += "Rdonly switched in cells: " + strings.Join(ws.RdonlyCellsSwitched, ",") } @@ -151,17 +167,22 @@ func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { // Start initiates a workflow func (vrw *VReplicationWorkflow) Start() error { + var err error if vrw.Exists() { return fmt.Errorf("workflow has already been started") } switch vrw.workflowType { case MoveTablesWorkflow: - return vrw.initMoveTables() + err = vrw.initMoveTables() case ReshardWorkflow: - return vrw.initReshard() + err = vrw.initReshard() default: return fmt.Errorf("unknown workflow type %d", vrw.workflowType) } + if err != nil { + return err + } + return nil } // SwitchTraffic switches traffic forward for tablet_types passed @@ -195,16 +216,17 @@ func (vrw *VReplicationWorkflow) ReverseTraffic() error { return vrw.SwitchTraffic(DirectionBackward) } +// Workflow errors const ( - errWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" - errWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" + ErrWorkflowNotFullySwitched = "cannot complete workflow because you have not yet switched all read and write traffic" + ErrWorkflowPartiallySwitched = "cannot abort workflow because you have already switched some or all read and write traffic" ) // Complete cleans up a successful workflow func (vrw *VReplicationWorkflow) Complete() error { ws := vrw.ws if !ws.WritesSwitched || len(ws.ReplicaCellsNotSwitched) > 0 || len(ws.RdonlyCellsNotSwitched) > 0 { - return fmt.Errorf(errWorkflowNotFullySwitched) + return fmt.Errorf(ErrWorkflowNotFullySwitched) } var renameTable TableRemovalType if vrw.params.RenameTables { @@ -223,7 +245,7 @@ func (vrw *VReplicationWorkflow) Complete() error { func (vrw *VReplicationWorkflow) Abort() error { ws := vrw.ws if ws.WritesSwitched || len(ws.ReplicaCellsSwitched) > 0 || len(ws.RdonlyCellsSwitched) > 0 { - return fmt.Errorf(errWorkflowPartiallySwitched) + return fmt.Errorf(ErrWorkflowPartiallySwitched) } if _, err := vrw.wr.DropTargets(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, vrw.params.KeepData, false); err != nil { return err From 3fce881b2c10ba6c5739d84028731d8dc055feca Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 25 Dec 2020 23:00:28 +0100 Subject: [PATCH 19/26] Unit tests for move tables and copy progress Signed-off-by: Rohit Nayak --- go/vt/wrangler/fake_dbclient_test.go | 10 ++ go/vt/wrangler/traffic_switcher.go | 1 - go/vt/wrangler/traffic_switcher_env_test.go | 3 + go/vt/wrangler/workflow.go | 51 +++--- go/vt/wrangler/workflow_test.go | 164 ++++++++++++++++++-- 5 files changed, 187 insertions(+), 42 deletions(-) diff --git a/go/vt/wrangler/fake_dbclient_test.go b/go/vt/wrangler/fake_dbclient_test.go index 007722505d9..5ed43622fb0 100644 --- a/go/vt/wrangler/fake_dbclient_test.go +++ b/go/vt/wrangler/fake_dbclient_test.go @@ -19,9 +19,11 @@ package wrangler import ( "fmt" "regexp" + "strings" "testing" "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/sqltypes" ) @@ -46,6 +48,7 @@ type dbResult struct { func (dbrs *dbResults) next(query string) (*sqltypes.Result, error) { if dbrs.exhausted() { + log.Infof(fmt.Sprintf("Unexpected query >%s<", query)) return nil, fmt.Errorf("code executed this query, but the test did not expect it: %s", query) } i := dbrs.index @@ -143,6 +146,13 @@ func (dc *fakeDBClient) ExecuteFetch(query string, maxrows int) (qr *sqltypes.Re if result := dc.invariants[query]; result != nil { return result, nil } + for q, result := range dc.invariants { //supports allowing just a prefix of an expected query + if strings.Contains(query, q) { + return result, nil + } + } + + log.Infof("Missing query: >%s<" + query) return nil, fmt.Errorf("unexpected query: %s", query) } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index b88d89b2e3e..07192b363a7 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -225,7 +225,6 @@ func (wr *Wrangler) getCellsWithTableReadsSwitched(ctx context.Context, targetKe return nil, nil, err } rules := srvVSchema.RoutingRules.Rules - log.Infof("Rules for srvVSchema for cell %s are %+v", cell, rules) found := false switched := false for _, rule := range rules { diff --git a/go/vt/wrangler/traffic_switcher_env_test.go b/go/vt/wrangler/traffic_switcher_env_test.go index 28d032b2d41..72fc035bd9d 100644 --- a/go/vt/wrangler/traffic_switcher_env_test.go +++ b/go/vt/wrangler/traffic_switcher_env_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/mysql/fakesqldb" "golang.org/x/net/context" @@ -361,6 +363,7 @@ func (tme *testMigraterEnv) createDBClients(ctx context.Context, t *testing.T) { master.TM.VREngine.Open(ctx) } for _, master := range tme.targetMasters { + log.Infof("Adding as targetMaster %s", master.Tablet.Alias) dbclient := newFakeDBClient() tme.dbTargetClients = append(tme.dbTargetClients, dbclient) dbClientFactory := func() binlogplayer.DBClient { return dbclient } diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index c0829ec3d5f..6d8c53950e8 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -3,6 +3,7 @@ package wrangler import ( "context" "fmt" + "sort" "strings" "time" @@ -14,17 +15,6 @@ import ( "vitess.io/vitess/go/vt/log" ) -/* - TODO - * expand e2e for testing all possible transitions - (Switch/Reverse Replica/Rdonly) - - * Unit Tests (run coverage first and identify) - (CurrentState()) - * dry run - -*/ - // VReplicationWorkflowType specifies whether workflow is MoveTables or Reshard type VReplicationWorkflowType int @@ -168,8 +158,11 @@ func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { // Start initiates a workflow func (vrw *VReplicationWorkflow) Start() error { var err error - if vrw.Exists() { - return fmt.Errorf("workflow has already been started") + if !vrw.Exists() { + return fmt.Errorf("workflow now found") + } + if vrw.CachedState() != WorkflowStateNotStarted { + return fmt.Errorf("workflow has already been started, state is %s", vrw.CachedState()) } switch vrw.workflowType { case MoveTablesWorkflow: @@ -375,29 +368,34 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { } qr := sqltypes.Proto3ToResult(p3qr) for i := 0; i < len(p3qr.Rows); i++ { - tables[qr.Rows[0][0].ToString()] = true + tables[qr.Rows[i][0].ToString()] = true } sourcesi, err := vrw.wr.ts.GetShard(ctx, bls.Keyspace, bls.Shard) if err != nil { return nil, err } - sourceMasters[sourcesi.MasterAlias] = true + found := false + for existingSource := range sourceMasters { + if existingSource.Uid == sourcesi.MasterAlias.Uid { + found = true + } + } + if !found { + sourceMasters[sourcesi.MasterAlias] = true + } } } if len(tables) == 0 { return nil, nil } - tableList := "" + var tableList []string targetRowCounts := make(map[string]int64) sourceRowCounts := make(map[string]int64) targetTableSizes := make(map[string]int64) sourceTableSizes := make(map[string]int64) for table := range tables { - if tableList != "" { - tableList += "," - } - tableList += encodeString(table) + tableList = append(tableList, encodeString(table)) targetRowCounts[table] = 0 sourceRowCounts[table] = 0 targetTableSizes[table] = 0 @@ -411,12 +409,12 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { } qr := sqltypes.Proto3ToResult(p3qr) for i := 0; i < len(qr.Rows); i++ { - table := qr.Rows[0][0].ToString() - rowCount, err := evalengine.ToInt64(qr.Rows[0][1]) + table := qr.Rows[i][0].ToString() + rowCount, err := evalengine.ToInt64(qr.Rows[i][1]) if err != nil { return err } - tableSize, err := evalengine.ToInt64(qr.Rows[0][2]) + tableSize, err := evalengine.ToInt64(qr.Rows[i][2]) if err != nil { return err } @@ -441,8 +439,9 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { if sourceDbName == "" || targetDbName == "" { return nil, fmt.Errorf("workflow %s.%s is incorrectly configured", vrw.ws.TargetKeyspace, vrw.ws.Workflow) } - - query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tableList) + sort.Strings(tableList) // sort list for repeatability for mocking in tests + tablesStr := strings.Join(tableList, ",") + query := fmt.Sprintf(getRowCountQuery, encodeString(targetDbName), tablesStr) for _, target := range vrw.ts.targets { tablet := target.master.Tablet if err := getTableMetrics(tablet, query, &targetRowCounts, &targetTableSizes); err != nil { @@ -450,7 +449,7 @@ func (vrw *VReplicationWorkflow) GetCopyProgress() (*CopyProgress, error) { } } - query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tableList) + query = fmt.Sprintf(getRowCountQuery, encodeString(sourceDbName), tablesStr) for source := range sourceMasters { ti, err := vrw.wr.ts.GetTablet(ctx, source) tablet := ti.Tablet diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index e75c55b643d..8a92cbaf55e 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -16,18 +16,18 @@ limitations under the License. package wrangler -//FIXME: update test for recent changes -/* import ( "testing" "github.com/stretchr/testify/require" "golang.org/x/net/context" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/proto/topodata" ) func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *VReplicationWorkflow { - mtp := &VReplicationWorkflowParams{ + p := &VReplicationWorkflowParams{ Workflow: "wf1", SourceKeyspace: "sourceks", TargetKeyspace: "targetks", @@ -35,14 +35,13 @@ func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *VReplicatio Cells: cells, TabletTypes: tabletTypes, } - wf, _ := newWorkflow("wf1", "MoveTables") mtwf := &VReplicationWorkflow{ - ctx: context.Background(), - wf: wf, - wr: nil, - params: mtp, - ts: nil, - ws: nil, + workflowType: MoveTablesWorkflow, + ctx: context.Background(), + wr: nil, + params: p, + ts: nil, + ws: nil, } return mtwf } @@ -52,9 +51,9 @@ func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { require.False(t, mtwf.Exists()) mtwf.ws = &workflowState{} require.True(t, mtwf.Exists()) - require.Errorf(t, mtwf.Complete(), errWorkflowNotFullySwitched) + require.Errorf(t, mtwf.Complete(), ErrWorkflowNotFullySwitched) mtwf.ws.WritesSwitched = true - require.Errorf(t, mtwf.Abort(), errWorkflowPartiallySwitched) + require.Errorf(t, mtwf.Abort(), ErrWorkflowPartiallySwitched) require.ElementsMatch(t, mtwf.getCellsAsArray(), []string{"cell1", "cell2"}) require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY}) @@ -65,7 +64,8 @@ func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { require.False(t, hasMaster) mtwf.params.TabletTypes = "replica,rdonly,master" - require.ElementsMatch(t, mtwf.getTabletTypes(), []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY, topodata.TabletType_MASTER}) + require.ElementsMatch(t, mtwf.getTabletTypes(), + []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY, topodata.TabletType_MASTER}) hasReplica, hasRdonly, hasMaster, err = mtwf.parseTabletTypes() require.NoError(t, err) @@ -74,6 +74,140 @@ func TestReshardingWorkflowErrorsAndMisc(t *testing.T) { require.True(t, hasMaster) } -func TestReshardingWorkflowCurrentState(t *testing.T) { +func TestCopyProgress(t *testing.T) { + var err error + var wf *VReplicationWorkflow + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err = tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + + expectCopyProgressQueries(t, tme) + + cp, err2 := wf.GetCopyProgress() + require.NoError(t, err2) + log.Infof("CopyProgress is %+v,%+v", (*cp)["t1"], (*cp)["t2"]) + + require.Equal(t, int64(800), (*cp)["t1"].SourceRowCount) + require.Equal(t, int64(200), (*cp)["t1"].TargetRowCount) + require.Equal(t, int64(4000), (*cp)["t1"].SourceTableSize) + require.Equal(t, int64(2000), (*cp)["t1"].TargetTableSize) + + require.Equal(t, int64(2000), (*cp)["t2"].SourceRowCount) + require.Equal(t, int64(400), (*cp)["t2"].TargetRowCount) + require.Equal(t, int64(4000), (*cp)["t2"].SourceTableSize) + require.Equal(t, int64(1000), (*cp)["t2"].TargetTableSize) +} + +func expectCopyProgressQueries(t *testing.T, tme *testMigraterEnv) { + db := tme.tmeDB + query := "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 1" + rows := []string{"t1", "t2"} + result := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name", + "varchar"), + rows...) + db.AddQuery(query, result) + query = "select table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 2" + db.AddQuery(query, result) + + query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks2' and table_name in ('t1','t2')" + result = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name|table_rows|data_length", + "varchar|int64|int64"), + "t1|100|1000", + "t2|200|500") + db.AddQuery(query, result) + + query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks1' and table_name in ('t1','t2')" + result = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "table_name|table_rows|data_length", + "varchar|int64|int64"), + "t1|400|2000", + "t2|1000|2000") + db.AddQuery(query, result) + +} +func TestMoveTablesV2(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectMoveTablesQueries(t, tme) + tme.expectNoPreviousJournals() + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) + require.NoError(t, wf.Complete()) +} + +func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv) { + var query string + //var result *sqltypes.Result + noResult := &sqltypes.Result{} + for _, dbclient := range tme.dbTargetClients { + query = "update _vt.vreplication set state = 'Running', message = '' where id in (1)" + dbclient.addInvariant(query, noResult) + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid1) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + query = "update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1" + dbclient.addInvariant(query, noResult) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", noResult) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + + // + } + + for _, dbclient := range tme.dbSourceClients { + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks1' and workflow = 'test_reverse'", resultid1) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + } + state := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-892|Running", + ) + tme.dbTargetClients[0].addQuery("select pos, state, message from _vt.vreplication where id=1", state, nil) + tme.dbTargetClients[0].addQuery("select pos, state, message from _vt.vreplication where id=2", state, nil) + tme.dbTargetClients[1].addQuery("select pos, state, message from _vt.vreplication where id=1", state, nil) + tme.dbTargetClients[1].addQuery("select pos, state, message from _vt.vreplication where id=2", state, nil) + tme.tmeDB.AddQueryPattern("drop table vt_ks1.t1", &sqltypes.Result{}) + tme.tmeDB.AddQueryPattern("drop table vt_ks1.t2", &sqltypes.Result{}) } -*/ From 9dcd560d7b2f723d93a76d7cce1a73baaf34a6b9 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Fri, 25 Dec 2020 23:46:37 +0100 Subject: [PATCH 20/26] Add tests for reverse and abort Signed-off-by: Rohit Nayak --- go/vt/wrangler/fake_dbclient_test.go | 2 +- go/vt/wrangler/traffic_switcher.go | 8 +- go/vt/wrangler/traffic_switcher_env_test.go | 33 +++++++ go/vt/wrangler/workflow_test.go | 95 +++++++++++++++++++-- 4 files changed, 124 insertions(+), 14 deletions(-) diff --git a/go/vt/wrangler/fake_dbclient_test.go b/go/vt/wrangler/fake_dbclient_test.go index 5ed43622fb0..d716e3ad79b 100644 --- a/go/vt/wrangler/fake_dbclient_test.go +++ b/go/vt/wrangler/fake_dbclient_test.go @@ -152,7 +152,7 @@ func (dc *fakeDBClient) ExecuteFetch(query string, maxrows int) (qr *sqltypes.Re } } - log.Infof("Missing query: >%s<" + query) + log.Infof("Missing query: >>>>>>>>>>>>>>>>>>%s<<<<<<<<<<<<<<<", query) return nil, fmt.Errorf("unexpected query: %s", query) } diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 07192b363a7..6d909f535f7 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -423,11 +423,9 @@ func (wr *Wrangler) SwitchWrites(ctx context.Context, targetKeyspace, workflow s return 0, nil, err } if ts == nil { - if ts == nil { - errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) - wr.Logger().Errorf(errorMsg) - return 0, nil, fmt.Errorf(errorMsg) - } + errorMsg := fmt.Sprintf("workflow %s not found in keyspace %s", workflow, targetKeyspace) + wr.Logger().Errorf(errorMsg) + return 0, nil, fmt.Errorf(errorMsg) } var sw iswitcher diff --git a/go/vt/wrangler/traffic_switcher_env_test.go b/go/vt/wrangler/traffic_switcher_env_test.go index 72fc035bd9d..686ff2049f4 100644 --- a/go/vt/wrangler/traffic_switcher_env_test.go +++ b/go/vt/wrangler/traffic_switcher_env_test.go @@ -44,6 +44,7 @@ import ( const vreplQueryks = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test' and db_name='vt_ks'" const vreplQueryks2 = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test' and db_name='vt_ks2'" +const vreplQueryks1 = "select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test_reverse' and db_name='vt_ks1'" type testMigraterEnv struct { ts *topo.Server @@ -193,6 +194,31 @@ func newTestTableMigraterCustom(ctx context.Context, t *testing.T, sourceShards, ) } + for i, sourceShard := range sourceShards { + var rows []string + for j, targetShard := range targetShards { + bls := &binlogdatapb.BinlogSource{ + Keyspace: "ks2", + Shard: targetShard, + Filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "t1", + Filter: fmt.Sprintf(fmtQuery, fmt.Sprintf("from t1 where in_keyrange('%s')", sourceShard)), + }, { + Match: "t2", + Filter: fmt.Sprintf(fmtQuery, fmt.Sprintf("from t2 where in_keyrange('%s')", sourceShard)), + }}, + }, + } + rows = append(rows, fmt.Sprintf("%d|%v|||", j+1, bls)) + } + tme.dbSourceClients[i].addInvariant(vreplQueryks1, sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "id|source|message|cell|tablet_types", + "int64|varchar|varchar|varchar|varchar"), + rows...), + ) + } + if err := tme.wr.saveRoutingRules(ctx, map[string][]string{ "t1": {"ks1.t1"}, "ks2.t1": {"ks1.t1"}, @@ -406,6 +432,13 @@ func (tme *testMigraterEnv) expectNoPreviousJournals() { } } +func (tme *testMigraterEnv) expectNoPreviousReverseJournals() { + // validate that no previous journals exist + for _, dbclient := range tme.dbTargetClients { + dbclient.addQueryRE(tsCheckJournals, &sqltypes.Result{}, nil) + } +} + func (tme *testShardMigraterEnv) forAllStreams(f func(i, j int)) { for i := range tme.targetShards { for j := range tme.sourceShards { diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index 8a92cbaf55e..b661ceff555 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -140,6 +140,7 @@ func expectCopyProgressQueries(t *testing.T, tme *testMigraterEnv) { db.AddQuery(query, result) } + func TestMoveTablesV2(t *testing.T) { ctx := context.Background() p := &VReplicationWorkflowParams{ @@ -162,6 +163,57 @@ func TestMoveTablesV2(t *testing.T) { tme.expectNoPreviousJournals() require.NoError(t, wf.SwitchTraffic(DirectionForward)) require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + tme.expectNoPreviousReverseJournals() + require.NoError(t, wf.ReverseTraffic()) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) +} + +func TestAbortMoveTablesV2(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + Tables: "t1,t2", + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + expectMoveTablesQueries(t, tme) + require.NoError(t, wf.Abort()) +} + +func TestReshardV2(t *testing.T) { + ctx := context.Background() + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks1", + TargetKeyspace: "ks2", + SourceShards: []string{"-40", "40-"}, + TargetShards: []string{"-80", "80-"}, + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestTableMigrater(ctx, t) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectMoveTablesQueries(t, tme) + tme.expectNoPreviousJournals() + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) require.NoError(t, wf.Complete()) } @@ -186,11 +238,25 @@ func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv) { dbclient.addInvariant("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", noResult) dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) - - // + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + dbclient.addInvariant("select val from _vt.resharding_journal", noResult) + dbclient.addInvariant("select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test_reverse' and db_name='vt_ks1'", + sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "id|source|message|cell|tablet_types", + "int64|varchar|varchar|varchar|varchar"), + ""), + ) + //select pos, state, message from _vt.vreplication where id=1 } for _, dbclient := range tme.dbSourceClients { + dbclient.addInvariant("select val from _vt.resharding_journal", noResult) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks1' and workflow = 'test_reverse'", resultid1) dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) @@ -204,10 +270,23 @@ func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv) { "varchar|varchar|varchar"), "MariaDB/5-456-892|Running", ) - tme.dbTargetClients[0].addQuery("select pos, state, message from _vt.vreplication where id=1", state, nil) - tme.dbTargetClients[0].addQuery("select pos, state, message from _vt.vreplication where id=2", state, nil) - tme.dbTargetClients[1].addQuery("select pos, state, message from _vt.vreplication where id=1", state, nil) - tme.dbTargetClients[1].addQuery("select pos, state, message from _vt.vreplication where id=2", state, nil) - tme.tmeDB.AddQueryPattern("drop table vt_ks1.t1", &sqltypes.Result{}) - tme.tmeDB.AddQueryPattern("drop table vt_ks1.t2", &sqltypes.Result{}) + tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + + state = sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-893|Running", + ) + tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + tme.tmeDB.AddQuery("drop table vt_ks1.t1", noResult) + tme.tmeDB.AddQuery("drop table vt_ks1.t2", noResult) + tme.tmeDB.AddQuery("drop table vt_ks2.t1", noResult) + tme.tmeDB.AddQuery("drop table vt_ks2.t2", noResult) + tme.tmeDB.AddQuery("update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1", noResult) } From 2a7a4925887861904df09d6957337b2f60c01d32 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 26 Dec 2020 12:53:18 +0100 Subject: [PATCH 21/26] Fix Start validation Signed-off-by: Rohit Nayak --- go/vt/wrangler/workflow.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 6d8c53950e8..53bb53266fd 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -158,8 +158,8 @@ func (vrw *VReplicationWorkflow) stateAsString(ws *workflowState) string { // Start initiates a workflow func (vrw *VReplicationWorkflow) Start() error { var err error - if !vrw.Exists() { - return fmt.Errorf("workflow now found") + if vrw.Exists() { + return fmt.Errorf("workflow already exists found") } if vrw.CachedState() != WorkflowStateNotStarted { return fmt.Errorf("workflow has already been started, state is %s", vrw.CachedState()) From 962b81c7917ff2e17943be3555a9c497fb0227c8 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 26 Dec 2020 16:01:39 +0100 Subject: [PATCH 22/26] Filter out backquotes while checking if query is executed on a tablet: vtgate seems to be quoting some characters now Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/helper.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/vreplication/helper.go b/go/test/endtoend/vreplication/helper.go index 584cb004286..5c0794b0707 100644 --- a/go/test/endtoend/vreplication/helper.go +++ b/go/test/endtoend/vreplication/helper.go @@ -144,12 +144,15 @@ func getQueryCount(url string, query string) int { if len(row) != len(headings) { continue } + filterChars := []string{"_", "`"} //Queries seem to include non-printable characters at times and hence equality fails unless these are removed re := regexp.MustCompile("[[:^ascii:]]") foundQuery := re.ReplaceAllLiteralString(row[queryIndex], "") - foundQuery = strings.ReplaceAll(foundQuery, "_", "") cleanQuery := re.ReplaceAllLiteralString(query, "") - cleanQuery = strings.ReplaceAll(cleanQuery, "_", "") + for _, filterChar := range filterChars { + foundQuery = strings.ReplaceAll(foundQuery, filterChar, "") + cleanQuery = strings.ReplaceAll(cleanQuery, filterChar, "") + } if foundQuery == cleanQuery || strings.Contains(foundQuery, cleanQuery) { count, _ = strconv.Atoi(row[countIndex]) } From 62715147c98a9e8b4b823743618838098512fcb1 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 26 Dec 2020 17:23:51 +0100 Subject: [PATCH 23/26] Remove rdonly for CI and move V2 test to new test shard Signed-off-by: Rohit Nayak --- .../vreplication/resharding_workflows_v2_test.go | 16 +++------------- test/config.json | 11 ++++++++++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go index d4a7f885c57..86e88fc08ff 100644 --- a/go/test/endtoend/vreplication/resharding_workflows_v2_test.go +++ b/go/test/endtoend/vreplication/resharding_workflows_v2_test.go @@ -392,13 +392,12 @@ func setupCluster(t *testing.T) *VitessCluster { zone1 := vc.Cells["zone1"] zone2 := vc.Cells["zone2"] - vc.AddKeyspace(t, []*Cell{zone1, zone2}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, 1, 100) + vc.AddKeyspace(t, []*Cell{zone1, zone2}, "product", "0", initialProductVSchema, initialProductSchema, defaultReplicas, defaultRdonly, 100) vtgate = zone1.Vtgates[0] require.NotNil(t, vtgate) vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "product", "0"), 1) vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "product", "0"), 2) - vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "product", "0"), 2) vtgateConn = getConnection(t, globalConfig.vtgateMySQLPort) verifyClusterHealth(t) @@ -412,7 +411,7 @@ func setupCluster(t *testing.T) *VitessCluster { func setupCustomerKeyspace(t *testing.T) { if _, err := vc.AddKeyspace(t, []*Cell{vc.Cells["zone1"], vc.Cells["zone2"]}, "customer", "-80,80-", - customerVSchema, customerSchema, defaultReplicas, 1, 200); err != nil { + customerVSchema, customerSchema, defaultReplicas, defaultRdonly, 200); err != nil { t.Fatal(err) } if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.master", "customer", "-80"), 1); err != nil { @@ -427,12 +426,6 @@ func setupCustomerKeyspace(t *testing.T) { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", "customer", "80-"), 2); err != nil { t.Fatal(err) } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "customer", "80-"), 2); err != nil { - t.Fatal(err) - } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", "customer", "-80"), 2); err != nil { - t.Fatal(err) - } custKs := vc.Cells[defaultCell.Name].Keyspaces["customer"] targetTab1 = custKs.Shards["-80"].Tablets["zone1-200"].Vttablet targetTab2 = custKs.Shards["80-"].Tablets["zone1-300"].Vttablet @@ -549,7 +542,7 @@ func moveCustomerTableSwitchFlows(t *testing.T, cells []*Cell, sourceCellOrAlias func createAdditionalCustomerShards(t *testing.T, shards string) { ksName := "customer" keyspace := vc.Cells[defaultCell.Name].Keyspaces[ksName] - require.NoError(t, vc.AddShards(t, []*Cell{defaultCell, vc.Cells["zone2"]}, keyspace, shards, defaultReplicas, 1, 400)) + require.NoError(t, vc.AddShards(t, []*Cell{defaultCell, vc.Cells["zone2"]}, keyspace, shards, defaultReplicas, defaultRdonly, 400)) arrTargetShardNames := strings.Split(shards, ",") for _, shardName := range arrTargetShardNames { @@ -559,9 +552,6 @@ func createAdditionalCustomerShards(t *testing.T, shards string) { if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", ksName, shardName), 2); err != nil { t.Fatal(err) } - if err := vtgate.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", ksName, shardName), 2); err != nil { - t.Fatal(err) - } } custKs := vc.Cells[defaultCell.Name].Keyspaces[ksName] targetTab2 = custKs.Shards["80-c0"].Tablets["zone1-600"].Vttablet diff --git a/test/config.json b/test/config.json index 4ac0cd5cc0c..a7a48e23a1b 100644 --- a/test/config.json +++ b/test/config.json @@ -600,7 +600,7 @@ }, "vreplication_basic": { "File": "unused.go", - "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "Basic"], + "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "TestBasicVreplicationWorkflow"], "Command": [], "Manual": false, "Shard": 24, @@ -615,6 +615,15 @@ "Shard": 22, "RetryMax": 0, "Tags": [] + }, + "vreplication_v2": { + "File": "unused.go", + "Args": ["vitess.io/vitess/go/test/endtoend/vreplication", "-run", "TestBasicV2Workflows"], + "Command": [], + "Manual": false, + "Shard": 21, + "RetryMax": 3, + "Tags": [] } } } From b3c1d7b0dcb4c8d0c753aaf3b609adf342b7a1c6 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sat, 26 Dec 2020 19:39:18 +0100 Subject: [PATCH 24/26] Revert local changes to local example script Signed-off-by: Rohit Nayak --- test/local_example.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/local_example.sh b/test/local_example.sh index a6938f71923..a72a7bde0bb 100755 --- a/test/local_example.sh +++ b/test/local_example.sh @@ -44,7 +44,6 @@ for shard in "customer/0"; do done; ./202_move_tables.sh -exit sleep 3 # required for now ./203_switch_reads.sh From 35cd400a12b8a01c51cf2b51920188dcbfcf2c59 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Wed, 30 Dec 2020 22:39:29 +0100 Subject: [PATCH 25/26] Improve test coverage, fix reshard test Signed-off-by: Rohit Nayak --- go/vt/wrangler/workflow.go | 1 + go/vt/wrangler/workflow_test.go | 155 ++++++++++++++++++++++++++++++-- 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/go/vt/wrangler/workflow.go b/go/vt/wrangler/workflow.go index 53bb53266fd..44ac91d6561 100644 --- a/go/vt/wrangler/workflow.go +++ b/go/vt/wrangler/workflow.go @@ -243,6 +243,7 @@ func (vrw *VReplicationWorkflow) Abort() error { if _, err := vrw.wr.DropTargets(vrw.ctx, vrw.ws.TargetKeyspace, vrw.ws.Workflow, vrw.params.KeepData, false); err != nil { return err } + vrw.ts = nil return nil } diff --git a/go/vt/wrangler/workflow_test.go b/go/vt/wrangler/workflow_test.go index b661ceff555..71cd2d9133f 100644 --- a/go/vt/wrangler/workflow_test.go +++ b/go/vt/wrangler/workflow_test.go @@ -170,7 +170,7 @@ func TestMoveTablesV2(t *testing.T) { require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) } -func TestAbortMoveTablesV2(t *testing.T) { +func TestMoveTablesV2Partial(t *testing.T) { ctx := context.Background() p := &VReplicationWorkflowParams{ Workflow: "test", @@ -187,39 +187,180 @@ func TestAbortMoveTablesV2(t *testing.T) { require.NoError(t, err) require.NotNil(t, wf) require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() expectMoveTablesQueries(t, tme) - require.NoError(t, wf.Abort()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica" + wf.params.Cells = "cell1" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica switched in cells: cell1. Rdonly not switched. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica" + wf.params.Cells = "cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. All Replica Reads Switched. Rdonly not switched. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell1,cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, WorkflowStateReadsSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "replica,rdonly" + require.NoError(t, wf.SwitchTraffic(DirectionBackward)) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell1" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica not switched. Rdonly switched in cells: cell1. Writes Not Switched", wf.CurrentState()) + + tme.expectNoPreviousJournals() + wf.params.TabletTypes = "rdonly" + wf.params.Cells = "cell2" + require.NoError(t, wf.SwitchTraffic(DirectionForward)) + require.Equal(t, "Reads partially switched. Replica not switched. All Rdonly Reads Switched. Writes Not Switched", wf.CurrentState()) + } -func TestReshardV2(t *testing.T) { +func TestMoveTablesV2Abort(t *testing.T) { ctx := context.Background() p := &VReplicationWorkflowParams{ Workflow: "test", SourceKeyspace: "ks1", TargetKeyspace: "ks2", - SourceShards: []string{"-40", "40-"}, - TargetShards: []string{"-80", "80-"}, + Tables: "t1,t2", Cells: "cell1,cell2", TabletTypes: "replica,rdonly,master", Timeout: DefaultActionTimeout, } tme := newTestTableMigrater(ctx, t) defer tme.stopTablets(t) + expectMoveTablesQueries(t, tme) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + expectMoveTablesQueries(t, tme) + require.NoError(t, wf.Abort()) +} + +func TestReshardV2(t *testing.T) { + ctx := context.Background() + sourceShards := []string{"-40", "40-"} + targetShards := []string{"-80", "80-"} + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks", + TargetKeyspace: "ks", + SourceShards: sourceShards, + TargetShards: targetShards, + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestShardMigrater(ctx, t, sourceShards, targetShards) + defer tme.stopTablets(t) wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p) require.NoError(t, err) require.NotNil(t, wf) require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) tme.expectNoPreviousJournals() - expectMoveTablesQueries(t, tme) + expectReshardQueries(t, tme) tme.expectNoPreviousJournals() require.NoError(t, wf.SwitchTraffic(DirectionForward)) require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState()) require.NoError(t, wf.Complete()) + si, err := wf.wr.ts.GetShard(ctx, "ks", "-40") + require.Contains(t, err.Error(), "node doesn't exist") + require.Nil(t, si) + si, err = wf.wr.ts.GetShard(ctx, "ks", "-80") + require.NoError(t, err) + require.NotNil(t, si) +} + +func TestReshardV2Abort(t *testing.T) { + ctx := context.Background() + sourceShards := []string{"-40", "40-"} + targetShards := []string{"-80", "80-"} + p := &VReplicationWorkflowParams{ + Workflow: "test", + SourceKeyspace: "ks", + TargetKeyspace: "ks", + SourceShards: sourceShards, + TargetShards: targetShards, + Cells: "cell1,cell2", + TabletTypes: "replica,rdonly,master", + Timeout: DefaultActionTimeout, + } + tme := newTestShardMigrater(ctx, t, sourceShards, targetShards) + defer tme.stopTablets(t) + wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p) + require.NoError(t, err) + require.NotNil(t, wf) + require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState()) + tme.expectNoPreviousJournals() + expectReshardQueries(t, tme) + require.NoError(t, wf.Abort()) +} + +func expectReshardQueries(t *testing.T, tme *testShardMigraterEnv) { + + sourceQueries := []string{ + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'", + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse'", + } + noResult := &sqltypes.Result{} + for _, dbclient := range tme.dbSourceClients { + for _, query := range sourceQueries { + dbclient.addInvariant(query, noResult) + } + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test_reverse'", resultid1) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name)", &sqltypes.Result{InsertID: uint64(1)}) + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + dbclient.addInvariant("insert into _vt.resharding_journal", noResult) + + } + + targetQueries := []string{ + "select id, workflow, source, pos from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'", + } + + for _, dbclient := range tme.dbTargetClients { + for _, query := range targetQueries { + dbclient.addInvariant(query, noResult) + } + dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1) + dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult) + dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult) + dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1)) + dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2)) + state := sqltypes.MakeTestResult(sqltypes.MakeTestFields( + "pos|state|message", + "varchar|varchar|varchar"), + "MariaDB/5-456-892|Running") + dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=2", state) + dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=1", state) + dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test'", resultid1) + dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult) + dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult) + dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult) + + } } func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv) { var query string - //var result *sqltypes.Result noResult := &sqltypes.Result{} for _, dbclient := range tme.dbTargetClients { query = "update _vt.vreplication set state = 'Running', message = '' where id in (1)" From 4b8eb3902f9fba9deb7c798e3a33abf5891cfa68 Mon Sep 17 00:00:00 2001 From: Rohit Nayak Date: Sun, 3 Jan 2021 09:42:02 +0100 Subject: [PATCH 26/26] Fix debug flag Signed-off-by: Rohit Nayak --- go/test/endtoend/vreplication/cluster.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go/test/endtoend/vreplication/cluster.go b/go/test/endtoend/vreplication/cluster.go index 7c7eb073937..e1e2b8820bd 100644 --- a/go/test/endtoend/vreplication/cluster.go +++ b/go/test/endtoend/vreplication/cluster.go @@ -21,7 +21,8 @@ import ( ) var ( - debug = true // set to true to always use local env vtdataroot for local debugging + debug = false // set to true to always use local env vtdataroot for local debugging + originalVtdataroot string vtdataroot string )