-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Online DDL: ready_to_complete
race fix
#12612
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,6 @@ import ( | |
"regexp" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" | ||
"vitess.io/vitess/go/vt/sqlparser" | ||
|
@@ -84,19 +83,20 @@ const ( | |
|
||
// OnlineDDL encapsulates the relevant information in an online schema change request | ||
type OnlineDDL struct { | ||
Keyspace string `json:"keyspace,omitempty"` | ||
Table string `json:"table,omitempty"` | ||
Schema string `json:"schema,omitempty"` | ||
SQL string `json:"sql,omitempty"` | ||
UUID string `json:"uuid,omitempty"` | ||
Strategy DDLStrategy `json:"strategy,omitempty"` | ||
Options string `json:"options,omitempty"` | ||
RequestTime int64 `json:"time_created,omitempty"` | ||
MigrationContext string `json:"context,omitempty"` | ||
Status OnlineDDLStatus `json:"status,omitempty"` | ||
TabletAlias string `json:"tablet,omitempty"` | ||
Retries int64 `json:"retries,omitempty"` | ||
ReadyToComplete int64 `json:"ready_to_complete,omitempty"` | ||
Keyspace string `json:"keyspace,omitempty"` | ||
Table string `json:"table,omitempty"` | ||
Schema string `json:"schema,omitempty"` | ||
SQL string `json:"sql,omitempty"` | ||
UUID string `json:"uuid,omitempty"` | ||
Strategy DDLStrategy `json:"strategy,omitempty"` | ||
Options string `json:"options,omitempty"` | ||
// Stateful fields: | ||
MigrationContext string `json:"context,omitempty"` | ||
Status OnlineDDLStatus `json:"status,omitempty"` | ||
TabletAlias string `json:"tablet,omitempty"` | ||
Retries int64 `json:"retries,omitempty"` | ||
ReadyToComplete int64 `json:"ready_to_complete,omitempty"` | ||
WasReadyToComplete int64 `json:"was_ready_to_complete,omitempty"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was going to ask if we can't use |
||
} | ||
|
||
// FromJSON creates an OnlineDDL from json | ||
|
@@ -249,7 +249,6 @@ func NewOnlineDDL(keyspace string, table string, sql string, ddlStrategySetting | |
UUID: onlineDDLUUID, | ||
Strategy: ddlStrategySetting.Strategy, | ||
Options: ddlStrategySetting.Options, | ||
RequestTime: time.Now().UnixNano(), | ||
MigrationContext: migrationContext, | ||
Status: OnlineDDLStatusRequested, | ||
}, nil | ||
|
@@ -328,11 +327,6 @@ func (onlineDDL *OnlineDDL) StrategySetting() *DDLStrategySetting { | |
return NewDDLStrategySetting(onlineDDL.Strategy, onlineDDL.Options) | ||
} | ||
|
||
// RequestTimeSeconds converts request time to seconds (losing nano precision) | ||
func (onlineDDL *OnlineDDL) RequestTimeSeconds() int64 { | ||
return onlineDDL.RequestTime / int64(time.Second) | ||
} | ||
|
||
// ToJSON exports this onlineDDL to JSON | ||
func (onlineDDL *OnlineDDL) ToJSON() ([]byte, error) { | ||
return json.Marshal(onlineDDL) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -395,7 +395,9 @@ func (e *Executor) proposedMigrationConflictsWithRunningMigration(runningMigrati | |
// Specifically, if the running migration is an ALTER, and is still busy with copying rows (copy_state), then | ||
// we consider the two to be conflicting. But, if the running migration is done copying rows, and is now only | ||
// applying binary logs, and is up-to-date, then we consider a new ALTER migration to be non-conflicting. | ||
return atomic.LoadInt64(&runningMigration.ReadyToComplete) == 0 | ||
if atomic.LoadInt64(&runningMigration.WasReadyToComplete) == 0 { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
@@ -1321,10 +1323,6 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem | |
// make sure there's no vreplication workflow running under same name | ||
_ = e.terminateVReplMigration(ctx, onlineDDL.UUID) | ||
|
||
if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound { | ||
return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table) | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for this double-verification, since we now run synchronously and under same mutex protection. |
||
if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY { | ||
return ErrExecutorNotWritableTablet | ||
} | ||
|
@@ -1428,10 +1426,6 @@ func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schem | |
// Validation included testing the backend MySQL server and the gh-ost binary itself | ||
// Execution runs first a dry run, then an actual migration | ||
func (e *Executor) ExecuteWithGhost(ctx context.Context, onlineDDL *schema.OnlineDDL) error { | ||
if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound { | ||
return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table) | ||
} | ||
|
||
if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY { | ||
return ErrExecutorNotWritableTablet | ||
} | ||
|
@@ -1646,10 +1640,6 @@ exit $exit_code | |
// Validation included testing the backend MySQL server and the pt-online-schema-change binary itself | ||
// Execution runs first a dry run, then an actual migration | ||
func (e *Executor) ExecuteWithPTOSC(ctx context.Context, onlineDDL *schema.OnlineDDL) error { | ||
if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound { | ||
return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table) | ||
} | ||
|
||
if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY { | ||
return ErrExecutorNotWritableTablet | ||
} | ||
|
@@ -1867,15 +1857,13 @@ export MYSQL_PWD | |
|
||
func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *schema.OnlineDDL, row sqltypes.RowNamedValues, err error) { | ||
|
||
parsed := sqlparser.BuildParsedQuery(sqlSelectMigration, ":migration_uuid") | ||
bindVars := map[string]*querypb.BindVariable{ | ||
"migration_uuid": sqltypes.StringBindVariable(uuid), | ||
} | ||
bound, err := parsed.GenerateQuery(bindVars, nil) | ||
query, err := sqlparser.ParseAndBind(sqlSelectMigration, | ||
sqltypes.StringBindVariable(uuid), | ||
) | ||
if err != nil { | ||
return onlineDDL, nil, err | ||
} | ||
r, err := e.execQuery(ctx, bound) | ||
r, err := e.execQuery(ctx, query) | ||
if err != nil { | ||
return onlineDDL, nil, err | ||
} | ||
|
@@ -1885,18 +1873,19 @@ func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *s | |
return nil, nil, ErrMigrationNotFound | ||
} | ||
onlineDDL = &schema.OnlineDDL{ | ||
Keyspace: row["keyspace"].ToString(), | ||
Table: row["mysql_table"].ToString(), | ||
Schema: row["mysql_schema"].ToString(), | ||
SQL: row["migration_statement"].ToString(), | ||
UUID: row["migration_uuid"].ToString(), | ||
Strategy: schema.DDLStrategy(row["strategy"].ToString()), | ||
Options: row["options"].ToString(), | ||
Status: schema.OnlineDDLStatus(row["migration_status"].ToString()), | ||
Retries: row.AsInt64("retries", 0), | ||
ReadyToComplete: row.AsInt64("ready_to_complete", 0), | ||
TabletAlias: row["tablet"].ToString(), | ||
MigrationContext: row["migration_context"].ToString(), | ||
Keyspace: row["keyspace"].ToString(), | ||
Table: row["mysql_table"].ToString(), | ||
Schema: row["mysql_schema"].ToString(), | ||
SQL: row["migration_statement"].ToString(), | ||
UUID: row["migration_uuid"].ToString(), | ||
Strategy: schema.DDLStrategy(row["strategy"].ToString()), | ||
Options: row["options"].ToString(), | ||
Status: schema.OnlineDDLStatus(row["migration_status"].ToString()), | ||
Retries: row.AsInt64("retries", 0), | ||
ReadyToComplete: row.AsInt64("ready_to_complete", 0), | ||
WasReadyToComplete: row.AsInt64("was_ready_to_complete", 0), | ||
TabletAlias: row["tablet"].ToString(), | ||
MigrationContext: row["migration_context"].ToString(), | ||
} | ||
return onlineDDL, row, nil | ||
} | ||
|
@@ -2981,41 +2970,21 @@ func (e *Executor) executeAlterDDLActionMigration(ctx context.Context, onlineDDL | |
// OK, nothing special about this ALTER. Let's go ahead and execute it. | ||
switch onlineDDL.Strategy { | ||
case schema.DDLStrategyOnline, schema.DDLStrategyVitess: | ||
go func() { | ||
e.migrationMutex.Lock() | ||
defer e.migrationMutex.Unlock() | ||
|
||
if err := e.ExecuteWithVReplication(ctx, onlineDDL, nil); err != nil { | ||
failMigration(err) | ||
} | ||
}() | ||
if err := e.ExecuteWithVReplication(ctx, onlineDDL, nil); err != nil { | ||
return failMigration(err) | ||
} | ||
case schema.DDLStrategyGhost: | ||
go func() { | ||
e.migrationMutex.Lock() | ||
defer e.migrationMutex.Unlock() | ||
|
||
if err := e.ExecuteWithGhost(ctx, onlineDDL); err != nil { | ||
failMigration(err) | ||
} | ||
}() | ||
if err := e.ExecuteWithGhost(ctx, onlineDDL); err != nil { | ||
return failMigration(err) | ||
} | ||
case schema.DDLStrategyPTOSC: | ||
go func() { | ||
e.migrationMutex.Lock() | ||
defer e.migrationMutex.Unlock() | ||
|
||
if err := e.ExecuteWithPTOSC(ctx, onlineDDL); err != nil { | ||
failMigration(err) | ||
} | ||
}() | ||
if err := e.ExecuteWithPTOSC(ctx, onlineDDL); err != nil { | ||
return failMigration(err) | ||
} | ||
case schema.DDLStrategyMySQL: | ||
go func() { | ||
e.migrationMutex.Lock() | ||
defer e.migrationMutex.Unlock() | ||
|
||
if _, err := e.executeDirectly(ctx, onlineDDL); err != nil { | ||
failMigration(err) | ||
} | ||
}() | ||
if _, err := e.executeDirectly(ctx, onlineDDL); err != nil { | ||
return failMigration(err) | ||
} | ||
default: | ||
{ | ||
return failMigration(fmt.Errorf("Unsupported strategy: %+v", onlineDDL.Strategy)) | ||
|
@@ -3160,14 +3129,9 @@ func (e *Executor) executeMigration(ctx context.Context, onlineDDL *schema.Onlin | |
case sqlparser.AlterDDLAction: | ||
return e.executeAlterDDLActionMigration(ctx, onlineDDL) | ||
case sqlparser.RevertDDLAction: | ||
go func() { | ||
e.migrationMutex.Lock() | ||
defer e.migrationMutex.Unlock() | ||
|
||
if err := e.executeRevert(ctx, onlineDDL); err != nil { | ||
failMigration(err) | ||
} | ||
}() | ||
if err := e.executeRevert(ctx, onlineDDL); err != nil { | ||
failMigration(err) | ||
} | ||
} | ||
return nil | ||
} | ||
|
@@ -4279,8 +4243,13 @@ func (e *Executor) updateMigrationSetImmediateOperation(ctx context.Context, uui | |
} | ||
|
||
func (e *Executor) updateMigrationReadyToComplete(ctx context.Context, uuid string, isReady bool) error { | ||
query, err := sqlparser.ParseAndBind(sqlUpdateMigrationReadyToComplete, | ||
sqltypes.BoolBindVariable(isReady), | ||
var queryTemplate string | ||
if isReady { | ||
queryTemplate = sqlSetMigrationReadyToComplete | ||
} else { | ||
queryTemplate = sqlClearMigrationReadyToComplete | ||
} | ||
query, err := sqlparser.ParseAndBind(queryTemplate, | ||
sqltypes.StringBindVariable(uuid), | ||
) | ||
if err != nil { | ||
|
@@ -4294,6 +4263,7 @@ func (e *Executor) updateMigrationReadyToComplete(ctx context.Context, uuid stri | |
var storeValue int64 | ||
if isReady { | ||
storeValue = 1 | ||
atomic.StoreInt64(&runningMigration.WasReadyToComplete, 1) // WasReadyToComplete is set once and never cleared | ||
} | ||
atomic.StoreInt64(&runningMigration.ReadyToComplete, storeValue) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Found that this field(
RequestTime
) was unused.