Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/pages/includes/config-reference/instance-wide.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ teleport:
# # The cache is enabled by default, it can be disabled with this flag
# enabled: true

# The duration (in string form) of the delay between receiving a termination
# signal and the beginning of the shutdown procedures. It can be used to
# give time to load balancers to stop routing connections to the Teleport
# instance while the instance is still capable of handling them. If unset or
# negative, no delay is applied.
#shutdown_delay: "0s"

# Teleport can limit the number of connections coming from each client
# IP address to avoid abuse. Note that these limits are enforced separately
# for each service (SSH, Kubernetes, etc.)
Expand Down
2 changes: 2 additions & 0 deletions lib/config/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,8 @@ func ApplyFileConfig(fc *FileConfig, cfg *servicecfg.Config) error {
}
cfg.CachePolicy = *cachePolicy

cfg.ShutdownDelay = time.Duration(fc.ShutdownDelay)

// Apply (TLS) cipher suites and (SSH) ciphers, KEX algorithms, and MAC
// algorithms.
if len(fc.CipherSuites) > 0 {
Expand Down
4 changes: 4 additions & 0 deletions lib/config/configuration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,8 @@ func TestApplyConfig(t *testing.T) {

require.Equal(t, "tcp://127.0.0.1:3000", cfg.DiagnosticAddr.FullAddress())

require.Equal(t, 7*time.Minute+35*time.Second, cfg.ShutdownDelay)

u2fCAFromFile, err := os.ReadFile("testdata/u2f_attestation_ca.pem")
require.NoError(t, err)
require.Empty(t, cmp.Diff(cfg.Auth.Preference, &types.AuthPreferenceV2{
Expand Down Expand Up @@ -1419,6 +1421,8 @@ func checkStaticConfig(t *testing.T, conf *FileConfig) {
require.Equal(t, "10.10.10.1:3022", conf.AdvertiseIP)
require.Equal(t, "/var/run/teleport.pid", conf.PIDFile)

require.Zero(t, conf.ShutdownDelay)

require.Empty(t, cmp.Diff(conf.Limits, ConnectionLimits{
MaxConnections: 90,
MaxUsers: 91,
Expand Down
4 changes: 4 additions & 0 deletions lib/config/fileconf.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,10 @@ type Global struct {
AdvertiseIP string `yaml:"advertise_ip,omitempty"`
CachePolicy CachePolicy `yaml:"cache,omitempty"`

// ShutdownDelay is a fixed delay between receiving a termination signal and
// the beginning of the shutdown procedures.
ShutdownDelay types.Duration `yaml:"shutdown_delay,omitempty"`

// CipherSuites is a list of TLS ciphersuites that Teleport supports. If
// omitted, a Teleport selected list of defaults will be used.
CipherSuites []string `yaml:"ciphersuites,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions lib/config/testdata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ teleport:
log:
output: stderr
severity: INFO
shutdown_delay: "7m35s"
connection_limits:
max_connections: 90
max_users: 91
Expand Down
52 changes: 44 additions & 8 deletions lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,18 @@ const (
// all listening sockets and exiting.
TeleportExitEvent = "TeleportExit"

// TeleportTerminatingEvent is generated when the Teleport process receives
// a signal to shut down. It's always generated as part of the process
// lifecycle and it's always generated before TeleportExitEvent, but there
// might be some configured delay between this event and the
// TeleportExitEvent signaling the actual beginning of the shut down
// procedures. It should be used to advertise the fact that the Teleport
// instance is going to shut down at some near time in the future, not to
// reduce the functionality of services - i.e., it's perfectly fine for
// services to ignore this event altogether, and nothing should get closed
// as a result of this event.
TeleportTerminatingEvent = "TeleportTerminating"

// TeleportPhaseChangeEvent is generated to indicate that the CA rotation
// phase has been updated, used in tests.
TeleportPhaseChangeEvent = "TeleportPhaseChange"
Expand Down Expand Up @@ -6497,24 +6509,45 @@ func (process *TeleportProcess) WaitWithContext(ctx context.Context) {
// StartShutdown launches non-blocking graceful shutdown process that signals
// completion, returns context that will be closed once the shutdown is done
func (process *TeleportProcess) StartShutdown(ctx context.Context) context.Context {
// by the time we get here we've already extracted the parent pipe, which is
// the only potential imported file descriptor that's not a listening
// socket, so closing every imported FD with a prefix of "" will close all
// imported listeners that haven't been used so far
warnOnErr(process.ExitContext(), process.closeImportedDescriptors(""), process.logger)
warnOnErr(process.ExitContext(), process.stopListeners(), process.logger)
shutdownDelayTimer := process.Clock.NewTimer(process.Config.ShutdownDelay)
defer shutdownDelayTimer.Stop()

hasChildren := process.forkedTeleportCount.Load() > 0
if hasChildren {
ctx = services.ProcessForkedContext(ctx)
}

process.BroadcastEvent(Event{Name: TeleportTerminatingEvent})

if process.inventoryHandle != nil {
deleteResources := !hasChildren
if err := process.inventoryHandle.SetAndSendGoodbye(ctx, deleteResources, hasChildren); err != nil {
process.logger.WarnContext(process.ExitContext(), "Failed sending inventory goodbye during shutdown", "error", err)
}
}
if hasChildren {
ctx = services.ProcessForkedContext(ctx)

if d := process.Config.ShutdownDelay; d > 0 {
if hasChildren {
process.logger.InfoContext(ctx, "Ignoring shutdown delay due to the presence of forked processes")
} else {
process.logger.InfoContext(ctx, "Waiting for shutdown delay", "shutdown_delay", d.String())
select {
case <-shutdownDelayTimer.Chan():
case <-process.ExitContext().Done():
process.logger.WarnContext(ctx, "Skipping shutdown delay early due to process exit")
case <-ctx.Done():
process.logger.WarnContext(ctx, "Skipping shutdown delay early due to context cancellation")
}
}
}

// by the time we get here we've already extracted the parent pipe, which is
// the only potential imported file descriptor that's not a listening
// socket, so closing every imported FD with a prefix of "" will close all
// imported listeners that haven't been used so far
warnOnErr(process.ExitContext(), process.closeImportedDescriptors(""), process.logger)
warnOnErr(process.ExitContext(), process.stopListeners(), process.logger)

process.BroadcastEvent(Event{Name: TeleportExitEvent, Payload: ctx})
localCtx, cancel := context.WithCancel(ctx)
go func() {
Expand Down Expand Up @@ -6555,6 +6588,9 @@ func (process *TeleportProcess) Shutdown(ctx context.Context) {

// Close broadcasts close signals and exits immediately
func (process *TeleportProcess) Close() error {
// generate a TeleportTerminatingEvent to unblock any service waiting on
// that event before TeleportExitEvent
process.BroadcastEvent(Event{Name: TeleportTerminatingEvent})
process.BroadcastEvent(Event{Name: TeleportExitEvent})

var errors []error
Expand Down
4 changes: 4 additions & 0 deletions lib/service/servicecfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ type Config struct {
// in case if they lose connection to auth servers
CachePolicy CachePolicy

// ShutdownDelay is a fixed delay between receiving a termination signal and
// the beginning of the shutdown procedures.
ShutdownDelay time.Duration

// Auth service configuration. Manages cluster state and configuration.
Auth AuthConfig

Expand Down
Loading