-
Notifications
You must be signed in to change notification settings - Fork 13
WIP: ApplySchema to run online schema changes via gh-ost
#67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 69 commits
f287663
5b99487
2a97a0e
4b08c81
3af6df5
6d65f9f
4776c74
7098531
cddd0de
52d6b06
fff8340
e1a1170
addfd0e
020b2e7
9dee129
5f968fc
b4f2207
efcf8b3
290ea36
2f22ad3
ea790b5
109aab6
6dd319d
ff9dd14
30f98a8
e8dba28
42fa9ad
c3bb83e
8e23766
453497f
991dde7
124b245
017f2a9
6af2b16
9f2ea0a
0babfd4
b1adbdc
1ba7b63
e88b4f5
635bb2f
9632050
7189cb8
a2ec700
a50662a
8ae485d
09228c6
d386dc4
4029435
1c3de85
d3f4fef
b716b97
95deb90
77955f7
1e89dd8
a429c7b
a622661
f7c1323
a4584e6
a83e4c6
483fc51
bebfb01
6e7fc2f
1cd2d47
4496d54
78aad71
73ad39b
4d3fbf3
1d27705
39f642f
6df82bd
1346284
3d623eb
015d974
7299301
5a89653
58b9a3a
e329bda
fa0e551
c551a03
2741151
225bc3b
d13b41b
cafb9c1
079a987
3393fa5
2336eac
115de10
759e76d
718e890
fbd2457
c78021c
142f06f
e87faa5
5029bd7
bb4b328
8481c3c
2b075df
de9b561
a8ae617
6a37b43
23b60b5
0822dd0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ import ( | |
| "golang.org/x/net/context" | ||
|
|
||
| "vitess.io/vitess/go/sync2" | ||
| "vitess.io/vitess/go/vt/mysqlctl/tmutils" | ||
| "vitess.io/vitess/go/vt/sqlparser" | ||
| "vitess.io/vitess/go/vt/wrangler" | ||
|
|
||
|
|
@@ -36,6 +37,7 @@ type TabletExecutor struct { | |
| tablets []*topodatapb.Tablet | ||
| isClosed bool | ||
| allowBigSchemaChange bool | ||
| onlineSchemaChange bool | ||
| keyspace string | ||
| waitReplicasTimeout time.Duration | ||
| } | ||
|
|
@@ -46,6 +48,7 @@ func NewTabletExecutor(wr *wrangler.Wrangler, waitReplicasTimeout time.Duration) | |
| wr: wr, | ||
| isClosed: true, | ||
| allowBigSchemaChange: false, | ||
| onlineSchemaChange: false, | ||
| waitReplicasTimeout: waitReplicasTimeout, | ||
| } | ||
| } | ||
|
|
@@ -62,6 +65,11 @@ func (exec *TabletExecutor) DisallowBigSchemaChange() { | |
| exec.allowBigSchemaChange = false | ||
| } | ||
|
|
||
| // SetOnlineSchemaChange sets TabletExecutor such that it initiates online schema change migrations | ||
| func (exec *TabletExecutor) SetOnlineSchemaChange() { | ||
| exec.onlineSchemaChange = true | ||
| } | ||
|
|
||
| // Open opens a connection to the master for every shard. | ||
| func (exec *TabletExecutor) Open(ctx context.Context, keyspace string) error { | ||
| if !exec.isClosed { | ||
|
|
@@ -160,6 +168,11 @@ func (exec *TabletExecutor) detectBigSchemaChanges(ctx context.Context, parsedDD | |
| switch ddl.Action { | ||
| case sqlparser.DropStr, sqlparser.CreateStr, sqlparser.TruncateStr, sqlparser.RenameStr: | ||
| continue | ||
| case sqlparser.AlterStr: | ||
| if exec.onlineSchemaChange { | ||
| // Seeing that we intend to run an online-schema-change, we can skip the "big change" check. | ||
| continue | ||
| } | ||
| } | ||
| tableName := ddl.Table.Name.String() | ||
| if rowCount, ok := tableWithCount[tableName]; ok { | ||
|
|
@@ -217,15 +230,32 @@ func (exec *TabletExecutor) Execute(ctx context.Context, sqls []string) *Execute | |
|
|
||
| for index, sql := range sqls { | ||
| execResult.CurSQLIndex = index | ||
| exec.executeOnAllTablets(ctx, &execResult, sql) | ||
|
|
||
| stat, err := sqlparser.Parse(sql) | ||
| if err != nil { | ||
| execResult.ExecutorErr = err.Error() | ||
| return &execResult | ||
| } | ||
| executeOnlineSchemaChange := false | ||
| switch ddl := stat.(type) { | ||
| case *sqlparser.DDL: | ||
| if ddl.Action == sqlparser.AlterStr && exec.onlineSchemaChange { | ||
| executeOnlineSchemaChange = true | ||
| } | ||
| } | ||
| exec.wr.Logger().Infof("Received DDL request. online schema change = %t", executeOnlineSchemaChange) | ||
| exec.executeOnAllTablets(ctx, &execResult, sql, executeOnlineSchemaChange) | ||
| if len(execResult.FailedShards) > 0 { | ||
| break | ||
| } | ||
| } | ||
| return &execResult | ||
| } | ||
|
|
||
| func (exec *TabletExecutor) executeOnAllTablets(ctx context.Context, execResult *ExecuteResult, sql string) { | ||
| func (exec *TabletExecutor) executeOnAllTablets( | ||
| ctx context.Context, execResult *ExecuteResult, sql string, | ||
| executeOnlineSchemaChange bool, | ||
| ) { | ||
| var wg sync.WaitGroup | ||
| numOfMasterTablets := len(exec.tablets) | ||
| wg.Add(numOfMasterTablets) | ||
|
|
@@ -234,7 +264,11 @@ func (exec *TabletExecutor) executeOnAllTablets(ctx context.Context, execResult | |
| for _, tablet := range exec.tablets { | ||
| go func(tablet *topodatapb.Tablet) { | ||
| defer wg.Done() | ||
| exec.executeOneTablet(ctx, tablet, sql, errChan, successChan) | ||
| if executeOnlineSchemaChange { | ||
| exec.executeOnlineSchemaChangeOneTablet(ctx, tablet, sql, errChan, successChan) | ||
| } else { | ||
| exec.executeOneTablet(ctx, tablet, sql, errChan, successChan) | ||
| } | ||
| }(tablet) | ||
| } | ||
| wg.Wait() | ||
|
|
@@ -297,6 +331,29 @@ func (exec *TabletExecutor) executeOneTablet( | |
| } | ||
| } | ||
|
|
||
| // executeOnlineSchemaChangeOneTablet will request the tablet to run an online schema change | ||
| func (exec *TabletExecutor) executeOnlineSchemaChangeOneTablet( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function can be changed to invoke
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 I need to learn more about that |
||
| ctx context.Context, | ||
| tablet *topodatapb.Tablet, | ||
| sql string, | ||
| errChan chan ShardWithError, | ||
| successChan chan ShardResult, | ||
| ) { | ||
| change := &tmutils.SchemaChange{ | ||
| SQL: sql, | ||
| Online: true, | ||
| Hint: "", // TODO(shlomi) generate and populate hint | ||
| } | ||
| _, err := exec.wr.TabletManagerClient().ApplySchema(ctx, tablet, change) | ||
| if err != nil { | ||
| errChan <- ShardWithError{Shard: tablet.Shard, Err: err.Error()} | ||
| return | ||
| } | ||
| successChan <- ShardResult{ | ||
| Shard: tablet.Shard, | ||
| } | ||
| } | ||
|
|
||
| // Close clears tablet executor states | ||
| func (exec *TabletExecutor) Close() { | ||
| if !exec.isClosed { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just out of curiosity. I can see from security perspective, installing the binary on each tablet is preferred, what are the other tradeoffs to install the binary on each tablet vs remotely on a dedicated server/pod?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right; the above is just the
localdocker setup, and so does not represent a production environment. The above is also likely temporary. At any case, an important observation is thatgh-ostreads data from MySQL and then mostly writes it back. It reads the data both by connecting into the replication stream as well as normal queries. What we've observed at GitHub is that latency between he machine wheregh-ostruns, to the MySQL servers, is important. e.g. we would only rungh-ostin same DC as affected servers. But, I think taking it to the next level and runninggh-oston the very sametablet+MySQL master servers can be beneficial.Another thing about running
gh-ostfrom dedicated servers is the amount of migrations you'd be able to run concurrently. I don't have the numbers, but I guess if you'd want to run 100 concurrent migrations (say you have 100 shards), then I suspect running 100gh-ostinstances on same dedicated server is unlikely to perform well. Actual testing needed but that's my suspicion.When you run
gh-ostright on the master server, that problem implicitly doesn't exist.