Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions go/cmd/vtbackup/vtbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ func takeBackup(ctx context.Context, topoServer *topo.Server, backupStorage back
return fmt.Errorf("no backup found; not starting up empty since -initial_backup flag was not enabled")
}
restorePos = mysql.Position{}
case mysqlctl.ErrExistingDB:
return fmt.Errorf("can't run vtbackup because data directory is not empty")
default:
return fmt.Errorf("can't restore from backup: %v", err)
}
Expand Down
2 changes: 2 additions & 0 deletions go/test/endtoend/backup/vtbackup/backup_only_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func firstBackupTest(t *testing.T, tabletType string) {
// check that the restored replica has the right local_metadata
result, err := replica2.VttabletProcess.QueryTabletWithDB("select * from local_metadata", "_vt")
require.Nil(t, err)
require.NotNil(t, result)
require.NotEmpty(t, result.Rows)
assert.Equal(t, replica2.Alias, result.Rows[0][1].ToString(), "Alias")
assert.Equal(t, "ks.0", result.Rows[1][1].ToString(), "ClusterAlias")
assert.Equal(t, cell, result.Rows[2][1].ToString(), "DataCenter")
Expand Down
42 changes: 15 additions & 27 deletions go/vt/mysqlctl/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ var (
// but none of them are complete.
ErrNoCompleteBackup = errors.New("backup(s) found but none are complete")

// ErrExistingDB is returned when there's already an active DB.
ErrExistingDB = errors.New("skipping restore due to existing database")

// backupStorageHook contains the hook name to use to process
// backup files. If not set, we will not process the files. It is
// only used at backup time. Then it is put in the manifest,
Expand Down Expand Up @@ -218,34 +215,25 @@ func removeExistingFiles(cnf *Mycnf) error {
return nil
}

// ShouldRestore checks whether a database with tables already exists
// and returns whether a restore action should be performed
func ShouldRestore(ctx context.Context, params RestoreParams) (bool, error) {
if params.DeleteBeforeRestore || RestoreWasInterrupted(params.Cnf) {
return true, nil
}
params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState)
// Wait for mysqld to be ready, in case it was launched in parallel with us.
// If this doesn't succeed, we should not attempt a restore
if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil {
return false, err
}
return checkNoDB(ctx, params.Mysqld, params.DbName)
}

// Restore is the main entry point for backup restore. If there is no
// appropriate backup on the BackupStorage, Restore logs an error
// and returns ErrNoBackup. Any other error is returned.
func Restore(ctx context.Context, params RestoreParams) (*BackupManifest, error) {

if !params.DeleteBeforeRestore {
params.Logger.Infof("Restore: Checking if a restore is in progress")
if !RestoreWasInterrupted(params.Cnf) {
params.Logger.Infof("Restore: No %v file found, checking no existing data is present", RestoreState)
// Wait for mysqld to be ready, in case it was launched in parallel with us.
if err := params.Mysqld.Wait(ctx, params.Cnf); err != nil {
return nil, err
}

ok, err := checkNoDB(ctx, params.Mysqld, params.DbName)
if err != nil {
return nil, err
}
if !ok {
params.Logger.Infof("Auto-restore is enabled, but mysqld already contains data. Assuming vttablet was just restarted.")
if err = PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName); err == nil {
err = ErrExistingDB
}
return nil, err
}
}
}

// find the right backup handle: most recent one, with a MANIFEST
params.Logger.Infof("Restore: looking for a suitable backup to restore")
bs, err := backupstorage.GetBackupStorage()
Expand Down
28 changes: 19 additions & 9 deletions go/vt/vttablet/tabletmanager/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,9 @@ func (tm *TabletManager) RestoreData(ctx context.Context, logger logutil.Logger,
}

func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.Logger, waitForBackupInterval time.Duration, deleteBeforeRestore bool) error {

tablet := tm.Tablet()
originalType := tablet.Type
if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil {
return err
}

// Try to restore. Depending on the reason for failure, we may be ok.
// If we're not ok, return an error and the tm will log.Fatalf,
// causing the process to be restarted and the restore retried.
Expand Down Expand Up @@ -117,6 +114,24 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
StartTime: logutil.ProtoToTime(keyspaceInfo.SnapshotTime),
}

// Check whether we're going to restore before changing to RESTORE type,
// so we keep our MasterTermStartTime (if any) if we aren't actually restoring.
ok, err := mysqlctl.ShouldRestore(ctx, params)
if err != nil {
return err
}
if !ok {
params.Logger.Infof("Attempting to restore, but mysqld already contains data. Assuming vttablet was just restarted.")
return mysqlctl.PopulateMetadataTables(params.Mysqld, params.LocalMetadata, params.DbName)
}
// We should not become master after restore, because that would incorrectly
// start a new master term, and it's likely our data dir will be out of date.
if originalType == topodatapb.TabletType_MASTER {
originalType = tm.baseTabletType
}
if err := tm.tmState.ChangeTabletType(ctx, topodatapb.TabletType_RESTORE); err != nil {
return err
}
// Loop until a backup exists, unless we were told to give up immediately.
var backupManifest *mysqlctl.BackupManifest
for {
Expand Down Expand Up @@ -161,11 +176,6 @@ func (tm *TabletManager) restoreDataLocked(ctx context.Context, logger logutil.L
}
case mysqlctl.ErrNoBackup:
// No-op, starting with empty database.
case mysqlctl.ErrExistingDB:
// No-op, assuming we've just restarted. Note the
// replication reporter may restart replication at the
// next health check if it thinks it should. We do not
// alter replication here.
default:
// If anything failed, we should reset the original tablet type
if err := tm.tmState.ChangeTabletType(ctx, originalType); err != nil {
Expand Down
138 changes: 70 additions & 68 deletions go/vt/wrangler/testlib/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ import (
"testing"
"time"

"github.com/stretchr/testify/require"

"github.com/stretchr/testify/assert"

"vitess.io/vitess/go/vt/discovery"

"golang.org/x/net/context"
Expand Down Expand Up @@ -74,9 +78,7 @@ func TestBackupRestore(t *testing.T) {

// Initialize our temp dirs
root, err := ioutil.TempDir("", "backuptest")
if err != nil {
t.Fatalf("os.TempDir failed: %v", err)
}
require.NoError(t, err)
defer os.RemoveAll(root)

// Initialize BackupStorage
Expand All @@ -90,19 +92,11 @@ func TestBackupRestore(t *testing.T) {
sourceDataDir := path.Join(root, "source_data")
sourceDataDbDir := path.Join(sourceDataDir, "vt_db")
for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} {
if err := os.MkdirAll(s, os.ModePerm); err != nil {
t.Fatalf("failed to create directory %v: %v", s, err)
}
}
if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_data_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_log_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil {
t.Fatalf("failed to write file db.opt: %v", err)
require.NoError(t, os.MkdirAll(s, os.ModePerm))
}
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm))

// create a master tablet, set its master position
master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db)
Expand Down Expand Up @@ -150,20 +144,12 @@ func TestBackupRestore(t *testing.T) {
}

// run the backup
if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil {
t.Fatalf("Backup failed: %v", err)
}
require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}))

// verify the full status
if err := sourceTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("sourceTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
}
if !sourceTablet.FakeMysqlDaemon.Replicating {
t.Errorf("sourceTablet.FakeMysqlDaemon.Replicating not set")
}
if !sourceTablet.FakeMysqlDaemon.Running {
t.Errorf("sourceTablet.FakeMysqlDaemon.Running not set")
}
require.NoError(t, sourceTablet.FakeMysqlDaemon.CheckSuperQueryList())
assert.True(t, sourceTablet.FakeMysqlDaemon.Replicating)
assert.True(t, sourceTablet.FakeMysqlDaemon.Running)

// create a destination tablet, set it up so we can do restores
destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db)
Expand Down Expand Up @@ -204,21 +190,59 @@ func TestBackupRestore(t *testing.T) {
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil {
t.Fatalf("RestoreData failed: %v", err)
require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
// verify the full status
require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)

// Initialize mycnf, required for restore
masterInnodbDataDir := path.Join(root, "master_innodb_data")
masterInnodbLogDir := path.Join(root, "master_innodb_log")
masterDataDir := path.Join(root, "master_data")
master.TM.Cnf = &mysqlctl.Mycnf{
DataDir: masterDataDir,
InnodbDataHomeDir: masterInnodbDataDir,
InnodbLogGroupHomeDir: masterInnodbLogDir,
BinLogPath: path.Join(root, "bin-logs/filename_prefix"),
RelayLogPath: path.Join(root, "relay-logs/filename_prefix"),
RelayLogIndexPath: path.Join(root, "relay-log.index"),
RelayLogInfoPath: path.Join(root, "relay-log.info"),
}

// verify the full status
if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {},
}
if !destTablet.FakeMysqlDaemon.Replicating {
t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set")
master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
"RESET SLAVE ALL",
"FAKE SET SLAVE POSITION",
"FAKE SET MASTER",
"START SLAVE",
}
if !destTablet.FakeMysqlDaemon.Running {
t.Errorf("destTablet.FakeMysqlDaemon.Running not set")

master.FakeMysqlDaemon.SetReplicationPositionPos = master.FakeMysqlDaemon.CurrentMasterPosition

// restore master from backup
require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
// tablet was created as MASTER, so it's baseTabletType is MASTER
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
assert.True(t, master.FakeMysqlDaemon.Running)

// restore master when database already exists
// checkNoDb should return false
// so fake the necessary queries
master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*sqltypes.Result{
"SHOW DATABASES": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("vt_test_keyspace")}}},
"SHOW TABLES FROM `vt_test_keyspace`": {Rows: [][]sqltypes.Value{{sqltypes.NewVarBinary("a")}}},
}

require.NoError(t, master.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */), "RestoreData failed")
// Tablet type should not change
assert.Equal(t, topodatapb.TabletType_MASTER, master.Tablet.Type)
assert.False(t, master.FakeMysqlDaemon.Replicating)
assert.True(t, master.FakeMysqlDaemon.Running)
}

func TestRestoreUnreachableMaster(t *testing.T) {
Expand Down Expand Up @@ -252,9 +276,7 @@ func TestRestoreUnreachableMaster(t *testing.T) {

// Initialize our temp dirs
root, err := ioutil.TempDir("", "backuptest")
if err != nil {
t.Fatalf("os.TempDir failed: %v", err)
}
require.NoError(t, err)
defer os.RemoveAll(root)

// Initialize BackupStorage
Expand All @@ -268,19 +290,11 @@ func TestRestoreUnreachableMaster(t *testing.T) {
sourceDataDir := path.Join(root, "source_data")
sourceDataDbDir := path.Join(sourceDataDir, "vt_db")
for _, s := range []string{sourceInnodbDataDir, sourceInnodbLogDir, sourceDataDbDir} {
if err := os.MkdirAll(s, os.ModePerm); err != nil {
t.Fatalf("failed to create directory %v: %v", s, err)
}
}
if err := ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_data_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm); err != nil {
t.Fatalf("failed to write file innodb_log_1: %v", err)
}
if err := ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm); err != nil {
t.Fatalf("failed to write file db.opt: %v", err)
require.NoError(t, os.MkdirAll(s, os.ModePerm))
}
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbDataDir, "innodb_data_1"), []byte("innodb data 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceInnodbLogDir, "innodb_log_1"), []byte("innodb log 1 contents"), os.ModePerm))
require.NoError(t, ioutil.WriteFile(path.Join(sourceDataDbDir, "db.opt"), []byte("db opt file"), os.ModePerm))

// create a master tablet, set its master position
master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db)
Expand Down Expand Up @@ -327,9 +341,7 @@ func TestRestoreUnreachableMaster(t *testing.T) {
}

// run the backup
if err := vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}); err != nil {
t.Fatalf("Backup failed: %v", err)
}
require.NoError(t, vp.Run([]string{"Backup", topoproto.TabletAliasString(sourceTablet.Tablet.Alias)}))

// create a destination tablet, set it up so we can do restores
destTablet := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db)
Expand Down Expand Up @@ -376,19 +388,9 @@ func TestRestoreUnreachableMaster(t *testing.T) {
// set a short timeout so that we don't have to wait 30 seconds
*topo.RemoteOperationTimeout = 2 * time.Second
// Restore should still succeed
if err := destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */); err != nil {
t.Fatalf("RestoreData failed: %v", err)
}

require.NoError(t, destTablet.TM.RestoreData(ctx, logutil.NewConsoleLogger(), 0 /* waitForBackupInterval */, false /* deleteBeforeRestore */))
// verify the full status
if err := destTablet.FakeMysqlDaemon.CheckSuperQueryList(); err != nil {
t.Errorf("destTablet.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err)
}
if !destTablet.FakeMysqlDaemon.Replicating {
t.Errorf("destTablet.FakeMysqlDaemon.Replicating not set")
}
if !destTablet.FakeMysqlDaemon.Running {
t.Errorf("destTablet.FakeMysqlDaemon.Running not set")
}

require.NoError(t, destTablet.FakeMysqlDaemon.CheckSuperQueryList(), "destTablet.FakeMysqlDaemon.CheckSuperQueryList failed")
assert.True(t, destTablet.FakeMysqlDaemon.Replicating)
assert.True(t, destTablet.FakeMysqlDaemon.Running)
}