-
Notifications
You must be signed in to change notification settings - Fork 2.3k
VTGR: Vitess + MySQL group replication #8387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
9bab27d
Add mysql gr flavor
5antelope 73a7eaf
Add more gr status to orc
5antelope 1655b9d
vtgr core
5antelope 6f18a72
Set up end to end test
5antelope 6e08329
Fix race condition in test
5antelope 7054d0b
Fix race in tests
5antelope eea9346
Remove over-optimization code in rebootstrap
5antelope 4f374bc
Add licence header and use vterrors
5antelope 4c0e507
Add licence for VTGR
5antelope 51fbb6a
Clean up repair test format
5antelope 573f045
More licence headers
5antelope 39263f2
Clean up comments and rename functions
5antelope dee20cf
move ErrNoGroupStatus into flavor
5antelope 5177214
Fix format and imports
5antelope c93b2d4
Merge remote-tracking branch 'upstream/main' into ywu/vtgr
5antelope File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| /* | ||
| Copyright 2021 The Vitess Authors. | ||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package main | ||
|
|
||
| import ( | ||
| "flag" | ||
| "strings" | ||
|
|
||
| "golang.org/x/net/context" | ||
|
|
||
| "vitess.io/vitess/go/vt/vtgr" | ||
| ) | ||
|
|
||
| func main() { | ||
| clustersToWatch := flag.String("clusters_to_watch", "", "Comma-separated list of keyspaces or keyspace/shards that this instance will monitor and repair. Defaults to all clusters in the topology. Example: \"ks1,ks2/-80\"") | ||
| flag.Parse() | ||
|
|
||
| // openTabletDiscovery will open up a connection to topo server | ||
| // and populate the tablets in memory | ||
| vtgr := vtgr.OpenTabletDiscovery(context.Background(), nil, strings.Split(*clustersToWatch, ",")) | ||
| vtgr.RefreshCluster() | ||
| vtgr.ScanAndRepair() | ||
|
|
||
| // block here so that we don't exit directly | ||
| select {} | ||
5antelope marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,228 @@ | ||
| /* | ||
| Copyright 2021 The Vitess Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package mysql | ||
|
|
||
| import ( | ||
| "errors" | ||
| "fmt" | ||
| "math" | ||
|
|
||
| "vitess.io/vitess/go/vt/proto/vtrpc" | ||
|
|
||
| "vitess.io/vitess/go/vt/vterrors" | ||
|
|
||
| "vitess.io/vitess/go/sqltypes" | ||
| ) | ||
|
|
||
| // GRFlavorID is the string identifier for the MysqlGR flavor. | ||
| const GRFlavorID = "MysqlGR" | ||
|
|
||
| // ErrNoGroupStatus means no status for group replication. | ||
| var ErrNoGroupStatus = errors.New("no group status") | ||
|
|
||
| // mysqlGRFlavor implements the Flavor interface for Mysql. | ||
| type mysqlGRFlavor struct { | ||
| mysqlFlavor | ||
| } | ||
|
|
||
| // newMysqlGRFlavor creates a new mysqlGR flavor. | ||
| func newMysqlGRFlavor() flavor { | ||
| return &mysqlGRFlavor{} | ||
| } | ||
|
|
||
| // startReplicationCommand returns the command to start the replication. | ||
| // we return empty here since `START GROUP_REPLICATION` should be called by | ||
| // the external orchestrator | ||
| func (mysqlGRFlavor) startReplicationCommand() string { | ||
| return "" | ||
| } | ||
|
|
||
| // restartReplicationCommands is disabled in mysqlGRFlavor | ||
| func (mysqlGRFlavor) restartReplicationCommands() []string { | ||
| return []string{} | ||
| } | ||
|
|
||
| // startReplicationUntilAfter is disabled in mysqlGRFlavor | ||
| func (mysqlGRFlavor) startReplicationUntilAfter(pos Position) string { | ||
| return "" | ||
| } | ||
|
|
||
| // stopReplicationCommand returns the command to stop the replication. | ||
| // we return empty here since `STOP GROUP_REPLICATION` should be called by | ||
| // the external orchestrator | ||
| func (mysqlGRFlavor) stopReplicationCommand() string { | ||
| return "" | ||
| } | ||
|
|
||
| // stopIOThreadCommand is disabled in mysqlGRFlavor | ||
| func (mysqlGRFlavor) stopIOThreadCommand() string { | ||
| return "" | ||
| } | ||
|
|
||
| // resetReplicationCommands is disabled in mysqlGRFlavor | ||
| func (mysqlGRFlavor) resetReplicationCommands(c *Conn) []string { | ||
| return []string{} | ||
| } | ||
|
|
||
| // setReplicationPositionCommands is disabled in mysqlGRFlavor | ||
| func (mysqlGRFlavor) setReplicationPositionCommands(pos Position) []string { | ||
| return []string{} | ||
| } | ||
|
|
||
| // status returns the result of the appropriate status command, | ||
| // with parsed replication position. | ||
| // | ||
| // Note: primary will skip this function, only replica will call it. | ||
| // TODO: Right now the GR's lag is defined as the lag between a node processing a txn | ||
| // and the time the txn was committed. We should consider reporting lag between current queueing txn timestamp | ||
| // from replication_connection_status and the current processing txn's commit timestamp | ||
| func (mysqlGRFlavor) status(c *Conn) (ReplicationStatus, error) { | ||
| res := ReplicationStatus{} | ||
| // Get master node information | ||
| query := `SELECT | ||
| MEMBER_HOST, | ||
| MEMBER_PORT | ||
| FROM | ||
| performance_schema.replication_group_members | ||
| WHERE | ||
| MEMBER_ROLE='PRIMARY' AND MEMBER_STATE='ONLINE'` | ||
| err := fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { | ||
| parsePrimaryGroupMember(&res, values) | ||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return ReplicationStatus{}, err | ||
| } | ||
|
|
||
| query = `SELECT | ||
| MEMBER_STATE | ||
| FROM | ||
| performance_schema.replication_group_members | ||
| WHERE | ||
| MEMBER_HOST=convert(@@hostname using ascii) AND MEMBER_PORT=@@port` | ||
| var chanel string | ||
| err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { | ||
| state := values[0].ToString() | ||
| if state == "ONLINE" { | ||
| chanel = "group_replication_applier" | ||
| } else if state == "RECOVERING" { | ||
| chanel = "group_replication_recovery" | ||
| } else { // OFFLINE, ERROR, UNREACHABLE | ||
| // If the member is not in healthy state, use max int as lag | ||
| res.SecondsBehindMaster = math.MaxUint32 | ||
| } | ||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return ReplicationStatus{}, err | ||
| } | ||
| // if chanel is not set, it means the state is not ONLINE or RECOVERING | ||
| // return partial result early | ||
| if chanel == "" { | ||
| return res, nil | ||
| } | ||
|
|
||
| // Populate IOThreadRunning from replication_connection_status | ||
| query = fmt.Sprintf(`SELECT SERVICE_STATE | ||
| FROM performance_schema.replication_connection_status | ||
| WHERE CHANNEL_NAME='%s'`, chanel) | ||
| var ioThreadRunning bool | ||
| err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { | ||
| ioThreadRunning = values[0].ToString() == "ON" | ||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return ReplicationStatus{}, err | ||
| } | ||
| res.IOThreadRunning = ioThreadRunning | ||
| // Populate SQLThreadRunning from replication_connection_status | ||
| var sqlThreadRunning bool | ||
| query = fmt.Sprintf(`SELECT SERVICE_STATE | ||
| FROM performance_schema.replication_applier_status_by_coordinator | ||
| WHERE CHANNEL_NAME='%s'`, chanel) | ||
| err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { | ||
| sqlThreadRunning = values[0].ToString() == "ON" | ||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return ReplicationStatus{}, err | ||
| } | ||
| res.SQLThreadRunning = sqlThreadRunning | ||
|
|
||
| // Collect lag information | ||
| // we use the difference between the last processed transaction's commit time | ||
| // and the end buffer time as the proxy to the lag | ||
| query = fmt.Sprintf(`SELECT | ||
| TIMESTAMPDIFF(SECOND, LAST_PROCESSED_TRANSACTION_ORIGINAL_COMMIT_TIMESTAMP, LAST_PROCESSED_TRANSACTION_END_BUFFER_TIMESTAMP) | ||
| FROM | ||
| performance_schema.replication_applier_status_by_coordinator | ||
| WHERE | ||
| CHANNEL_NAME='%s'`, chanel) | ||
| err = fetchStatusForGroupReplication(c, query, func(values []sqltypes.Value) error { | ||
| parseReplicationApplierLag(&res, values) | ||
| return nil | ||
| }) | ||
| if err != nil { | ||
| return ReplicationStatus{}, err | ||
| } | ||
| return res, nil | ||
| } | ||
|
|
||
| func parsePrimaryGroupMember(res *ReplicationStatus, row []sqltypes.Value) { | ||
| res.MasterHost = row[0].ToString() /* MEMBER_HOST */ | ||
| memberPort, _ := row[1].ToInt64() /* MEMBER_PORT */ | ||
| res.MasterPort = int(memberPort) | ||
| } | ||
|
|
||
| func parseReplicationApplierLag(res *ReplicationStatus, row []sqltypes.Value) { | ||
| lagSec, err := row[0].ToInt64() | ||
| // if the error is not nil, SecondsBehindMaster will remain to be MaxUint32 | ||
| if err == nil { | ||
| // Only set where there is no error | ||
| // The value can be NULL when there is no replication applied yet | ||
| res.SecondsBehindMaster = uint(lagSec) | ||
| } | ||
| } | ||
|
|
||
| func fetchStatusForGroupReplication(c *Conn, query string, onResult func([]sqltypes.Value) error) error { | ||
| qr, err := c.ExecuteFetch(query, 100, true /* wantfields */) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| // if group replication related query returns 0 rows, it means the group replication is not set up | ||
| if len(qr.Rows) == 0 { | ||
| return ErrNoGroupStatus | ||
| } | ||
| if len(qr.Rows) > 1 { | ||
| return vterrors.Errorf(vtrpc.Code_INTERNAL, "unexpected results for %v: %v", query, qr.Rows) | ||
| } | ||
| return onResult(qr.Rows[0]) | ||
| } | ||
|
|
||
| // primarymasterStatusStatus returns the result of 'SHOW MASTER STATUS', | ||
| // with parsed executed position. | ||
| func (mysqlGRFlavor) primaryStatus(c *Conn) (PrimaryStatus, error) { | ||
| return mysqlFlavor{}.primaryStatus(c) | ||
| } | ||
|
|
||
| func (mysqlGRFlavor) baseShowTablesWithSizes() string { | ||
| return TablesWithSize80 | ||
| } | ||
|
|
||
| func init() { | ||
| flavors[GRFlavorID] = newMysqlGRFlavor | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| /* | ||
| Copyright 2021 The Vitess Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
| package mysql | ||
5antelope marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| "gotest.tools/assert" | ||
|
|
||
| "vitess.io/vitess/go/sqltypes" | ||
| querypb "vitess.io/vitess/go/vt/proto/query" | ||
| ) | ||
|
|
||
| func TestMysqlGRParsePrimaryGroupMember(t *testing.T) { | ||
| res := ReplicationStatus{} | ||
| rows := []sqltypes.Value{ | ||
| sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("host1")), | ||
| sqltypes.MakeTrusted(querypb.Type_INT32, []byte("10")), | ||
| } | ||
| parsePrimaryGroupMember(&res, rows) | ||
| assert.Equal(t, "host1", res.MasterHost) | ||
| assert.Equal(t, 10, res.MasterPort) | ||
| assert.Equal(t, false, res.IOThreadRunning) | ||
| assert.Equal(t, false, res.SQLThreadRunning) | ||
| } | ||
|
|
||
| func TestMysqlGRReplicationApplierLagParse(t *testing.T) { | ||
| res := ReplicationStatus{} | ||
| row := []sqltypes.Value{ | ||
| sqltypes.MakeTrusted(querypb.Type_INT32, []byte("NULL")), | ||
| } | ||
| parseReplicationApplierLag(&res, row) | ||
| // strconv.NumError will leave SecondsBehindMaster unset | ||
| assert.Equal(t, uint(0), res.SecondsBehindMaster) | ||
| row = []sqltypes.Value{ | ||
| sqltypes.MakeTrusted(querypb.Type_INT32, []byte("100")), | ||
| } | ||
| parseReplicationApplierLag(&res, row) | ||
| assert.Equal(t, uint(100), res.SecondsBehindMaster) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.