diff --git a/go/vt/schema/tablegc_test.go b/go/vt/schema/tablegc_test.go index dfa03012c8c..90b31ff90fa 100644 --- a/go/vt/schema/tablegc_test.go +++ b/go/vt/schema/tablegc_test.go @@ -21,6 +21,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestIsGCTableName(t *testing.T) { @@ -152,3 +153,16 @@ func TestParseGCLifecycle(t *testing.T) { }) } } + +func TestGenerateRenameStatementWithUUID(t *testing.T) { + uuid := "997342e3_e91d_11eb_aaae_0a43f95f28a3" + tableName := "mytbl" + countIterations := 5 + toTableNames := map[string]bool{} + for i := 0; i < countIterations; i++ { + _, toTableName, err := GenerateRenameStatementWithUUID(tableName, PurgeTableGCState, OnlineDDLToGCUUID(uuid), time.Now().Add(time.Duration(i)*time.Second).UTC()) + require.NoError(t, err) + toTableNames[toTableName] = true + } + assert.Equal(t, countIterations, len(toTableNames)) +} diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index e8034270ee0..8326de06272 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -420,15 +420,9 @@ func (e *Executor) dropOnlineDDLUser(ctx context.Context) error { // tableExists checks if a given table exists. func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, error) { - conn, err := e.pool.Get(ctx) - if err != nil { - return false, err - } - defer conn.Recycle() - tableName = strings.ReplaceAll(tableName, `_`, `\_`) parsed := sqlparser.BuildParsedQuery(sqlShowTablesLike, tableName) - rs, err := conn.Exec(ctx, parsed.Query, 1, true) + rs, err := e.execQuery(ctx, parsed.Query) if err != nil { return false, err } @@ -2362,6 +2356,10 @@ func (e *Executor) reviewStaleMigrations(ctx context.Context) error { if err := e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed); err != nil { return err } + _ = e.updateMigrationStartedTimestamp(ctx, uuid) + if err := e.updateMigrationTimestamp(ctx, "completed_timestamp", uuid); err != nil { + return err + } _ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "stale migration") } @@ -2376,7 +2374,7 @@ func (e *Executor) retryTabletFailureMigrations(ctx context.Context) error { } // gcArtifactTable garbage-collects a single table -func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid string) error { +func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid string, t time.Time) error { tableExists, err := e.tableExists(ctx, artifactTable) if err != nil { return err @@ -2386,17 +2384,11 @@ func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid stri } // We've already concluded in gcArtifacts() that this table was held for long enough. // We therefore move it into PURGE state. - renameStatement, _, err := schema.GenerateRenameStatementWithUUID(artifactTable, schema.PurgeTableGCState, schema.OnlineDDLToGCUUID(uuid), time.Now().UTC()) - if err != nil { - return err - } - conn, err := e.pool.Get(ctx) + renameStatement, _, err := schema.GenerateRenameStatementWithUUID(artifactTable, schema.PurgeTableGCState, schema.OnlineDDLToGCUUID(uuid), t) if err != nil { return err } - defer conn.Recycle() - - _, err = conn.Exec(ctx, renameStatement, 1, true) + _, err = e.execQuery(ctx, renameStatement) return err } @@ -2405,6 +2397,13 @@ func (e *Executor) gcArtifacts(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() + if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil { + // This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp' + // see https://github.com/vitessio/vitess/issues/8499 + // Running this query retroactively sets completed_timestamp + // This 'if' clause can be removed in version v13 + return err + } query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts, sqltypes.Int64BindVariable(int64((*retainOnlineDDLTables).Seconds())), ) @@ -2420,8 +2419,14 @@ func (e *Executor) gcArtifacts(ctx context.Context) error { artifacts := row["artifacts"].ToString() artifactTables := textutil.SplitDelimitedList(artifacts) - for _, artifactTable := range artifactTables { - if err := e.gcArtifactTable(ctx, artifactTable, uuid); err != nil { + + timeNow := time.Now() + for i, artifactTable := range artifactTables { + // We wish to generate distinct timestamp values for each table in this UUID, + // because all tables will be renamed as _something_UUID_timestamp. Since UUID + // is shared for all artifacts in this loop, we differentiate via timestamp + t := timeNow.Add(time.Duration(i) * time.Second).UTC() + if err := e.gcArtifactTable(ctx, artifactTable, uuid, t); err != nil { return err } log.Infof("Executor.gcArtifacts: renamed away artifact %s", artifactTable) diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 2222d3f510d..0628e28d960 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -114,8 +114,9 @@ const ( WHERE migration_uuid=%a ` - sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations - SET started_timestamp=IFNULL(started_timestamp, NOW()) + sqlUpdateMigrationStartedTimestamp = `UPDATE _vt.schema_migrations SET + started_timestamp =IFNULL(started_timestamp, NOW()), + liveness_timestamp=IFNULL(liveness_timestamp, NOW()) WHERE migration_uuid=%a ` @@ -130,7 +131,7 @@ const ( migration_uuid=%a ` sqlUpdateArtifacts = `UPDATE _vt.schema_migrations - SET artifacts=concat(%a, ',', artifacts) + SET artifacts=concat(%a, ',', artifacts), cleanup_timestamp=NULL WHERE migration_uuid=%a ` @@ -270,6 +271,14 @@ const ( AND cleanup_timestamp IS NULL AND completed_timestamp <= NOW() - INTERVAL %a SECOND ` + sqlFixCompletedTimestamp = `UPDATE _vt.schema_migrations + SET + completed_timestamp=NOW() + WHERE + migration_status='failed' + AND cleanup_timestamp IS NULL + AND completed_timestamp IS NULL + ` sqlSelectMigration = `SELECT id, migration_uuid,