Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions go/test/endtoend/vtorc/general/vtorc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"vitess.io/vitess/go/test/endtoend/vtorc/utils"
"vitess.io/vitess/go/vt/log"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/vtctl/reparentutil/policy"
"vitess.io/vitess/go/vt/vtorc/inst"
"vitess.io/vitess/go/vt/vtorc/logic"
)
Expand Down Expand Up @@ -401,6 +402,76 @@ func TestRepairAfterTER(t *testing.T) {
utils.CheckReplication(t, clusterInfo, newPrimary, []*cluster.Vttablet{curPrimary}, 15*time.Second)
}

// TestStalePrimary tests that an old primary that remains writable and of tablet type PRIMARY in the topo
// is properly demoted to a read-only replica by VTOrc.
func TestStalePrimary(t *testing.T) {
ctx := t.Context()

defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 4, 0, []string{"--topo-information-refresh-duration", "1s"}, cluster.VTOrcConfiguration{
PreventCrossCellFailover: true,
}, 1, policy.DurabilitySemiSync)
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]

curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0)
assert.NotNil(t, curPrimary, "should have elected a primary")
utils.CheckPrimaryTablet(t, clusterInfo, curPrimary, true)

var badPrimary, healthyReplica *cluster.Vttablet
for _, tablet := range shard0.Vttablets {
if tablet.Alias == curPrimary.Alias {
continue
}

if badPrimary == nil {
badPrimary = tablet
continue
}

healthyReplica = tablet
}

utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{badPrimary, healthyReplica}, 15*time.Second)

curPrimaryTopo, err := clusterInfo.Ts.GetTablet(ctx, curPrimary.GetAlias())
require.NoError(t, err, "expected to read current primary topo record")

curPrimaryTermStart := protoutil.TimeFromProto(curPrimaryTopo.PrimaryTermStartTime)
require.False(t, curPrimaryTermStart.IsZero(), "expected current primary term start time to be set")

err = utils.RunSQLs(t, []string{"SET GLOBAL read_only = OFF"}, badPrimary, "")
require.NoError(t, err)
require.True(t, utils.WaitForReadOnlyValue(t, badPrimary, 0))

// We set the tablet's type in the topology to PRIMARY. This mimics the situation where during a demotion
// in a hypothetical ERS, the old primary starts running as a replica, but fails before updating the topology
// accordingly.
_, err = clusterInfo.Ts.UpdateTabletFields(ctx, badPrimary.GetAlias(), func(tablet *topodatapb.Tablet) error {
tablet.Type = topodatapb.TabletType_PRIMARY
tablet.PrimaryTermStartTime = protoutil.TimeToProto(curPrimaryTermStart.Add(-1 * time.Minute))
return nil
})
require.NoError(t, err)

// Expect VTOrc to demote the stale primary to a read-only replica.
require.Eventuallyf(t, func() bool {
topoTablet, topoErr := clusterInfo.Ts.GetTablet(ctx, badPrimary.GetAlias())
if topoErr != nil {
t.Logf("stale primary probe: topo error=%v", topoErr)
return false
}

readOnly, readErr := badPrimary.VttabletProcess.GetDBVar("read_only", "")
if readErr != nil {
t.Logf("stale primary probe: alias=%s topo=%v read_only error=%v", badPrimary.Alias, topoTablet.Type, readErr)
return false
}

return topoTablet.Type == topodatapb.TabletType_REPLICA && readOnly == "ON"
}, 30*time.Second, time.Second, "expected demotion to REPLICA with read_only=ON")
}

// TestSemiSync tests that semi-sync is setup correctly by vtorc if it is incorrectly set
func TestSemiSync(t *testing.T) {
// stop any vtorc instance running due to a previous test.
Expand Down
5 changes: 5 additions & 0 deletions go/vt/external/golib/sqlutils/sqlutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ func (rm *RowMap) GetTime(key string) time.Time {
if t, err := time.Parse(DateTimeFormat, rm.GetString(key)); err == nil {
return t
}

if t, err := time.Parse(time.RFC3339Nano, rm.GetString(key)); err == nil {
return t
}

return time.Time{}
}

Expand Down
22 changes: 16 additions & 6 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ const (
PrimarySemiSyncBlocked AnalysisCode = "PrimarySemiSyncBlocked"
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
PrimaryDiskStalled AnalysisCode = "PrimaryDiskStalled"

// StaleTopoPrimary describes when a tablet still has the type PRIMARY in the topology when a newer primary
// has been elected. VTOrc should demote this primary to a replica.
StaleTopoPrimary AnalysisCode = "StaleTopoPrimary"
)

type StructureAnalysisCode string
Expand Down Expand Up @@ -88,12 +92,18 @@ type ReplicationAnalysisHints struct {
type ReplicationAnalysis struct {
AnalyzedInstanceAlias string
AnalyzedInstancePrimaryAlias string
TabletType topodatapb.TabletType
CurrentTabletType topodatapb.TabletType
PrimaryTimeStamp time.Time
ClusterDetails ClusterInfo
AnalyzedKeyspace string
AnalyzedShard string

// TabletType is the tablet's type as seen in the topology.
TabletType topodatapb.TabletType

// CurrentTabletType is the type this tablet is currently running as.
CurrentTabletType topodatapb.TabletType

PrimaryTimeStamp time.Time
ClusterDetails ClusterInfo
AnalyzedKeyspace string
AnalyzedShard string

// ShardPrimaryTermTimestamp is the primary term start time stored in the shard record.
ShardPrimaryTermTimestamp string
AnalyzedInstanceBinlogCoordinates BinlogCoordinates
Expand Down
21 changes: 20 additions & 1 deletion go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@ type clusterAnalysis struct {
hasClusterwideAction bool
totalTablets int
primaryAlias string
durability policy.Durabler

// primaryTimestamp is the most recent primary term start time observed for the shard.
primaryTimestamp time.Time

// durability is the shard's current durability policy.
durability policy.Durabler
}

// GetReplicationAnalysis will check for replication problems (dead primary; unreachable primary; etc)
Expand Down Expand Up @@ -379,6 +384,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
if a.TabletType == topodatapb.TabletType_PRIMARY {
a.IsClusterPrimary = true
clusters[keyspaceShard].primaryAlias = a.AnalyzedInstanceAlias
clusters[keyspaceShard].primaryTimestamp = a.PrimaryTimeStamp
}
durabilityPolicy := m.GetString("durability_policy")
if durabilityPolicy == "" {
Expand Down Expand Up @@ -458,6 +464,9 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
} else if topo.IsReplicaType(a.TabletType) && a.ErrantGTID != "" {
a.Analysis = ErrantGTIDDetected
a.Description = "Tablet has errant GTIDs"
} else if isStaleTopoPrimary(a, ca) {
a.Analysis = StaleTopoPrimary
a.Description = "Primary tablet is stale, older than current primary"
} else if topo.IsReplicaType(a.TabletType) && ca.primaryAlias == "" && a.ShardPrimaryTermTimestamp == "" {
// ClusterHasNoPrimary should only be detected when the shard record doesn't have any primary term start time specified either.
a.Analysis = ClusterHasNoPrimary
Expand Down Expand Up @@ -603,6 +612,16 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
return result, err
}

// isStaleTopoPrimary returns true when a tablet has type PRIMARY in the topology and has an older primary term
// start time than the shard's current primary.
func isStaleTopoPrimary(tablet *ReplicationAnalysis, cluster *clusterAnalysis) bool {
if tablet.TabletType != topodatapb.TabletType_PRIMARY {
return false
}

return tablet.PrimaryTimeStamp.Before(cluster.primaryTimestamp)
}

// postProcessAnalyses is used to update different analyses based on the information gleaned from looking at all the analyses together instead of individual data.
func postProcessAnalyses(result []*ReplicationAnalysis, clusters map[string]*clusterAnalysis) []*ReplicationAnalysis {
for {
Expand Down
Loading
Loading