Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go/vt/mysqlctl/backupengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ func FindBackupToRestore(ctx context.Context, params RestoreParams, bhs []backup
continue
}
}
if !checkBackupTime /* not snapshot */ || backupTime.Equal(params.StartTime) || backupTime.Before(params.StartTime) {
params.Logger.Infof("Restore: found backup %v %v to restore", bh.Directory(), bh.Name())
if !checkBackupTime || backupTime.Equal(params.StartTime) || backupTime.Before(params.StartTime) {
params.Logger.Infof("Restore: found backup %v %v to restore using as start timestamp %v", bh.Directory(), bh.Name(), params.StartTime.Format(BackupTimestampFormat))
break
}
}
Expand Down
26 changes: 21 additions & 5 deletions go/vt/vttablet/tabletmanager/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ import (
// It is only enabled if restore_from_backup is set.

var (
restoreFromBackup = flag.Bool("restore_from_backup", false, "(init restore parameter) will check BackupStorage for a recent backup at startup and start there")
restoreFromBackup = flag.Bool("restore_from_backup", false, "(init restore parameter) will check BackupStorage for a recent backup at startup and start there")
restoreFromBackupTs = flag.String("restore_from_backup_ts", "", "(init restore parameter) if set, restore the last backup taken at or before this timestamp. Example: '2021-04-29.133050'")

restoreConcurrency = flag.Int("restore_concurrency", 4, "(init restore parameter) how many concurrent files to restore at once")
waitForBackupInterval = flag.Duration("wait_for_backup_interval", 0, "(init restore parameter) if this is greater than 0, instead of starting up empty when no backups are found, keep checking at this interval for a backup to appear")

Expand All @@ -65,7 +67,7 @@ var (
// It will either work, fail gracefully, or return
// an error in case of a non-recoverable error.
// It takes the action lock so no RPC interferes.
func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error {
func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool, restoreFromBackupTs string) error {
if err := tm.lock(ctx); err != nil {
return err
}
Expand Down Expand Up @@ -119,7 +121,7 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger,

startTime = time.Now()

err = tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore)
err = tm.restoreDataLocked(ctx, logger, waitForBackupInterval, deleteBeforeRestore, restoreFromBackupTs)
if err != nil {
return err
}
Expand All @@ -137,7 +139,7 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger,
return nil
}

func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error {
func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool, restoreFromBackupTs string) error {

tablet := tm.Tablet()
originalType := tablet.Type
Expand All @@ -152,6 +154,17 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
if err != nil {
return err
}

// Check if we need to use a the latest or a custom backup timestamp for the restore
var startTime time.Time

if restoreFromBackupTs != "" {
startTime, err = time.Parse(mysqlctl.BackupTimestampFormat, restoreFromBackupTs)
if err != nil {
return vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, fmt.Sprintf("unable to parse the timestamp passed via -restore_from_backup_ts: %v", err))
}
}

// For a SNAPSHOT keyspace, we have to look for backups of BaseKeyspace
// so we will pass the BaseKeyspace in RestoreParams instead of tablet.Keyspace
if keyspaceInfo.KeyspaceType == topodatapb.KeyspaceType_SNAPSHOT {
Expand All @@ -160,6 +173,9 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
}
keyspace = keyspaceInfo.BaseKeyspace
log.Infof("Using base_keyspace %v to restore keyspace %v", keyspace, tablet.Keyspace)

startTime = logutil.ProtoToTime(keyspaceInfo.SnapshotTime)
log.Infof("Using %v as backup time", startTime)
}

params := mysqlctl.RestoreParams{
Expand All @@ -173,7 +189,7 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
DbName: topoproto.TabletDbName(tablet),
Keyspace: keyspace,
Shard: tablet.Shard,
StartTime: logutil.ProtoToTime(keyspaceInfo.SnapshotTime),
StartTime: startTime,
}

// Check whether we're going to restore before changing to RESTORE type,
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vttablet/tabletmanager/rpc_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func (tm *TabletManager) Backup(ctx context.Context, concurrency int, logger log
return returnErr
}

// RestoreFromBackup deletes all local data and restores anew from the latest backup.
// RestoreFromBackup deletes all local data and restores a new from the latest backup.
func (tm *TabletManager) RestoreFromBackup(ctx context.Context, logger logutil.Logger) error {
if err := tm.lock(ctx); err != nil {
return err
Expand All @@ -169,7 +169,7 @@ func (tm *TabletManager) RestoreFromBackup(ctx context.Context, logger logutil.L
l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger)

// now we can run restore
err = tm.restoreDataLocked(ctx, l, 0 /* waitForBackupInterval */, true /* deleteBeforeRestore */)
err = tm.restoreDataLocked(ctx, l, 0 /* waitForBackupInterval */, true /* deleteBeforeRestore */, "" /*restoreFromBackupTs */)

// re-run health check to be sure to capture any replication delay
tm.QueryServiceControl.BroadcastHealth()
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vttablet/tabletmanager/tm_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) {

// restoreFromBackup will just be a regular action
// (same as if it was triggered remotely)
if err := tm.RestoreData(ctx, logutil.NewConsoleLogger(), *waitForBackupInterval, false /* deleteBeforeRestore */); err != nil {
if err := tm.RestoreData(ctx, logutil.NewConsoleLogger(), *waitForBackupInterval, false /* deleteBeforeRestore */, *restoreFromBackupTs); err != nil {
log.Exitf("RestoreFromBackup failed: %v", err)
}
}()
Expand Down
12 changes: 6 additions & 6 deletions go/vt/wrangler/testlib/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ func TestBackupRestore(t *testing.T) {
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
Expand Down Expand Up @@ -224,7 +224,7 @@ func TestBackupRestore(t *testing.T) {
master.FakeMysqlDaemon.SetReplicationPositionPos = master.FakeMysqlDaemon.CurrentMasterPosition

// restore master from backup
require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */), "RestoreData failed")
// tablet was created as MASTER, so it's baseTabletType is MASTER
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
Expand All @@ -238,7 +238,7 @@ func TestBackupRestore(t *testing.T) {
"SHOW TABLES FROM `vt_test_keyspace`": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("a")}}},
}

require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */), "RestoreData failed")
// Tablet type should not change
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
Expand Down Expand Up @@ -416,7 +416,7 @@ func TestBackupRestoreLagged(t *testing.T) {

errCh = make(chan error, 1)
go func(ctx context.Context, tablet *FakeTablet) {
errCh <- tablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */)
errCh <- tablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */)
}(ctx, destTablet)

timer = time.NewTicker(1 * time.Second)
Expand Down Expand Up @@ -588,7 +588,7 @@ func TestRestoreUnreachableMaster(t *testing.T) {
// set a short timeout so that we don't have to wait 30 seconds
*topo.RemoteOperationTimeout = 2 * time.Second
// Restore should still succeed
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
Expand Down Expand Up @@ -739,7 +739,7 @@ func TestDisableActiveReparents(t *testing.T) {
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */, "" /* restoreFromBackupTs */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.False(t, destTablet.FakeMysqlDaemon.Replicating)
Expand Down