Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions go/test/endtoend/vtorc/primaryfailure/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ import (
"os"
"testing"

"vitess.io/vitess/go/test/endtoend/vtorc/utils"

"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/test/endtoend/vtorc/utils"
)

var clusterInfo *utils.VTOrcClusterInfo
Expand All @@ -34,7 +33,7 @@ func TestMain(m *testing.M) {
cellInfos = append(cellInfos, &utils.CellInfo{
CellName: utils.Cell1,
NumReplicas: 12,
NumRdonly: 2,
NumRdonly: 3,
UIDBase: 100,
})
cellInfos = append(cellInfos, &utils.CellInfo{
Expand Down
21 changes: 14 additions & 7 deletions go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,22 @@ import (
"testing"
"time"

"vitess.io/vitess/go/test/endtoend/vtorc/utils"
"vitess.io/vitess/go/vt/vtorc/logic"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/test/endtoend/vtorc/utils"
"vitess.io/vitess/go/vt/vtorc/logic"
)

// bring down primary, let orc promote replica
// covers the test case master-failover from orchestrator
// Also tests that VTOrc can handle multiple failures, if the durability policies allow it
func TestDownPrimary(t *testing.T) {
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
}, 1, "")
}, 1, "semi_sync")
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]
// find primary from topo
Expand All @@ -58,21 +58,28 @@ func TestDownPrimary(t *testing.T) {
assert.NotNil(t, replica, "could not find replica tablet")
assert.NotNil(t, rdonly, "could not find rdonly tablet")

// Start a cross-cell replica
crossCellReplica := utils.StartVttablet(t, clusterInfo, utils.Cell2, false)

// check that the replication is setup correctly before we failover
utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica}, 10*time.Second)
utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica, crossCellReplica}, 10*time.Second)

// Make the rdonly tablet unavailable
err := rdonly.MysqlctlProcess.Stop()
require.NoError(t, err)
// Make the current primary database unavailable.
err := curPrimary.MysqlctlProcess.Stop()
err = curPrimary.MysqlctlProcess.Stop()
require.NoError(t, err)
defer func() {
// we remove the tablet from our global list since its mysqlctl process has stopped and cannot be reused for other tests
utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary)
utils.PermanentlyRemoveVttablet(clusterInfo, rdonly)
}()

// check that the replica gets promoted
utils.CheckPrimaryTablet(t, clusterInfo, replica, true)
// also check that the replication is working correctly after failover
utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{rdonly}, 10*time.Second)
utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{crossCellReplica}, 10*time.Second)
utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1)
}

Expand Down
15 changes: 6 additions & 9 deletions go/test/endtoend/vtorc/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,18 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

// This imports toposervers to register their implementations of TopoServer.
_ "vitess.io/vitess/go/vt/topo/consultopo"
_ "vitess.io/vitess/go/vt/topo/etcd2topo"
_ "vitess.io/vitess/go/vt/topo/k8stopo"
_ "vitess.io/vitess/go/vt/topo/zk2topo"

"vitess.io/vitess/go/json2"
"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/vt/log"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/topo"
_ "vitess.io/vitess/go/vt/topo/consultopo"
_ "vitess.io/vitess/go/vt/topo/etcd2topo"
_ "vitess.io/vitess/go/vt/topo/k8stopo"
"vitess.io/vitess/go/vt/topo/topoproto"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
_ "vitess.io/vitess/go/vt/topo/zk2topo"
)

const (
Expand Down Expand Up @@ -647,7 +644,7 @@ func PermanentlyRemoveVttablet(clusterInfo *VTOrcClusterInfo, tablet *cluster.Vt
for i, vttablet := range cellInfo.RdonlyTablets {
if vttablet == tablet {
// remove this tablet since its mysql has stopped
cellInfo.ReplicaTablets = append(cellInfo.ReplicaTablets[:i], cellInfo.ReplicaTablets[i+1:]...)
cellInfo.RdonlyTablets = append(cellInfo.RdonlyTablets[:i], cellInfo.RdonlyTablets[i+1:]...)
KillTablets([]*cluster.Vttablet{tablet})
return
}
Expand Down
10 changes: 5 additions & 5 deletions go/vt/vtctl/reparentutil/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,15 @@ import (
"vitess.io/vitess/go/vt/concurrency"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/logutil"
replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/topotools"
"vitess.io/vitess/go/vt/topotools/events"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vttablet/tmclient"

replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)

// FindValidEmergencyReparentCandidates will find candidates for an emergency
Expand Down Expand Up @@ -312,8 +311,9 @@ func stopReplicationAndBuildStatusMaps(
errgroup := concurrency.ErrorGroup{
NumGoroutines: len(tabletMap) - ignoredTablets.Len(),
NumRequiredSuccesses: len(tabletMap) - ignoredTablets.Len() - 1,
NumAllowedErrors: 1,
NumErrorsToWaitFor: numErrorsToWaitFor,
NumAllowedErrors: len(tabletMap), // We set the number of allowed errors to a very high value, because we don't want to exit early
// even in case of multiple failures. We rely on the revoke function below to determine if we have more failures than we can tolerate
NumErrorsToWaitFor: numErrorsToWaitFor,
}

errRecorder := errgroup.Wait(groupCancel, errChan)
Expand Down
5 changes: 2 additions & 3 deletions go/vt/vtorc/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ import (
"sync"
"time"

"github.com/openark/golib/sqlutils"
"github.com/patrickmn/go-cache"
"github.com/rcrowley/go-metrics"
"github.com/sjmudd/stopwatch"

"github.com/openark/golib/sqlutils"

vitessmysql "vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/tb"
"vitess.io/vitess/go/vt/log"
Expand Down Expand Up @@ -454,7 +453,7 @@ Cleanup:
// tried to check the instance. last_attempted_check is also
// updated on success by writeInstance.
latency.Start("backend")
_ = UpdateInstanceLastChecked(&instance.Key, partialSuccess)
_ = UpdateInstanceLastChecked(instanceKey, partialSuccess)
latency.Stop("backend")
return nil, err
}
Expand Down