Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go/flags/endtoend/vtorc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ vtorc \

Flags:
--allow-emergency-reparent Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary (default true)
--allow-recovery Whether VTOrc should be allowed to run recovery actions (default true)
--alsologtostderr log to standard error as well as files
--audit-file-location string File location where the audit logs are to be stored
--audit-purge-duration duration Duration for which audit logs are held before being purged. Should be in multiples of days (default 168h0m0s)
Expand Down
1 change: 1 addition & 0 deletions go/test/endtoend/vtorc/readtopologyinstance/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
"--topo_global_root", clusterInfo.ClusterInstance.VtctlProcess.TopoGlobalRoot,
}
servenv.ParseFlags("vtorc")
config.Config.AllowRecovery = true
config.Config.RecoveryPeriodBlockSeconds = 1
config.Config.InstancePollSeconds = 1
config.MarkConfigurationLoaded()
Expand Down
10 changes: 10 additions & 0 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ var (
tolerableReplicationLag = 0 * time.Second
topoInformationRefreshDuration = 15 * time.Second
recoveryPollDuration = 1 * time.Second
allowRecovery = true
ersEnabled = true
convertTabletsWithErrantGTIDs = false
)
Expand All @@ -83,6 +84,7 @@ func RegisterFlags(fs *pflag.FlagSet) {
fs.DurationVar(&tolerableReplicationLag, "tolerable-replication-lag", tolerableReplicationLag, "Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS")
fs.DurationVar(&topoInformationRefreshDuration, "topo-information-refresh-duration", topoInformationRefreshDuration, "Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topology server")
fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery")
fs.BoolVar(&allowRecovery, "allow-recovery", allowRecovery, "Whether VTOrc should be allowed to run recovery actions")
fs.BoolVar(&ersEnabled, "allow-emergency-reparent", ersEnabled, "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
fs.BoolVar(&convertTabletsWithErrantGTIDs, "change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs, "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
}
Expand All @@ -106,6 +108,7 @@ type Configuration struct {
WaitReplicasTimeoutSeconds int // Timeout on amount of time to wait for the replicas in case of ERS. Should be a small value because we should fail-fast. Should not be larger than LockTimeout since that is the total time we use for an ERS.
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
AllowRecovery bool // Allow recoveries.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
}

Expand Down Expand Up @@ -137,6 +140,7 @@ func UpdateConfigValuesFromFlags() {
Config.WaitReplicasTimeoutSeconds = int(waitReplicasTimeout / time.Second)
Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
Config.AllowRecovery = allowRecovery
Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second)
}

Expand All @@ -150,6 +154,11 @@ func SetERSEnabled(val bool) {
ersEnabled = val
}

// GetAllowRecovery is a getter function.
func GetAllowRecovery() bool {
return allowRecovery
}

// ConvertTabletWithErrantGTIDs reports whether VTOrc is allowed to change the tablet type of tablets with errant GTIDs to DRAINED.
func ConvertTabletWithErrantGTIDs() bool {
return convertTabletsWithErrantGTIDs
Expand Down Expand Up @@ -181,6 +190,7 @@ func newConfiguration() *Configuration {
PreventCrossDataCenterPrimaryFailover: false,
WaitReplicasTimeoutSeconds: 30,
TopoInformationRefreshSeconds: 15,
AllowRecovery: true,
RecoveryPollSeconds: 1,
}
}
Expand Down
8 changes: 8 additions & 0 deletions go/vt/vtorc/logic/vtorc.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,14 @@ func ContinuousDiscovery() {
checkAndRecoverWaitPeriod := 3 * instancePollSecondsDuration()
recentDiscoveryOperationKeys = cache.New(instancePollSecondsDuration(), time.Second)

if !config.GetAllowRecovery() {
log.Info("--allow-recovery is set to 'false', disabling recovery actions")
if err := DisableRecovery(); err != nil {
log.Errorf("failed to disable recoveries: %+v", err)
return
}
}

go handleDiscoveryRequests()

healthTick := time.Tick(config.HealthPollSeconds * time.Second)
Expand Down
Loading