Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 25 additions & 36 deletions lib/service/signals.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ import (
"github.com/gravitational/teleport/lib/utils"
)

const (
// fastShutdownTimeout is how long we're going to wait before connections
// are forcibly terminated during a fast shutdown.
fastShutdownTimeout = time.Second * 3

// fastShutdownGrace is how long we're going to wait for the shutdown
// procedure to complete after the fastShutdownTimeout is hit.
fastShutdownGrace = time.Second * 2
)

// printShutdownStatus prints running services until shut down
func (process *TeleportProcess) printShutdownStatus(ctx context.Context) {
statusInterval := defaults.HighResPollingPeriod
Expand Down Expand Up @@ -91,19 +101,25 @@ func (process *TeleportProcess) WaitForSignals(ctx context.Context) error {
process.log.Infof("All services stopped, exiting.")
return nil
case syscall.SIGTERM, syscall.SIGINT:
timeout := getShutdownTimeout(process.log)
timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
process.log.Infof("Got signal %q, exiting within %vs.", signal, timeout.Seconds())
// we run the shutdown in a goroutine and return when the
// context is done even if Shutdown hasn't returned because we
// want to ensure that we exit shortly after SIGTERM even in
// case of bugs
process.log.Infof("Got signal %q, exiting within %s.", signal, fastShutdownTimeout)
// we run the shutdown in a goroutine so we can return and exit
// the process even if Shutdown takes longer to return than we
// expected (due to bugs, for example)
shutdownDone := make(chan struct{})
go func() {
defer close(shutdownDone)
timeoutCtx, cancel := context.WithTimeout(ctx, fastShutdownTimeout)
defer cancel()
process.Shutdown(timeoutCtx)
}()
<-timeoutCtx.Done()
process.log.Infof("All services stopped or timeout passed, exiting immediately.")
graceTimer := time.NewTimer(fastShutdownTimeout + fastShutdownGrace)
defer graceTimer.Stop()
select {
case <-graceTimer.C:
process.log.Warn("Shutdown still hasn't completed, exiting anyway.")
case <-shutdownDone:
process.log.Info("All services stopped, exiting.")
}
return nil
case syscall.SIGUSR1:
// All programs placed diagnostics on the standard output.
Expand Down Expand Up @@ -180,33 +196,6 @@ func (process *TeleportProcess) WaitForSignals(ctx context.Context) error {
}
}

const (
defaultShutdownTimeout = time.Second * 3
maxShutdownTimeout = time.Minute * 10
)

func getShutdownTimeout(log logrus.FieldLogger) time.Duration {
timeout := defaultShutdownTimeout

// read undocumented env var TELEPORT_UNSTABLE_SHUTDOWN_TIMEOUT.
// TODO(Tener): DELETE IN 15.0. after ironing out all possible shutdown bugs.
override := os.Getenv("TELEPORT_UNSTABLE_SHUTDOWN_TIMEOUT")
if override != "" {
t, err := time.ParseDuration(override)
if err != nil {
log.Warnf("Cannot parse timeout override %q, using default instead.", override)
}
if err == nil {
if t > maxShutdownTimeout {
log.Warnf("Timeout override %q exceeds maximum value, reducing.", override)
t = maxShutdownTimeout
}
timeout = t
}
}
return timeout
}

// ErrTeleportReloading is returned when signal waiter exits
// because the teleport process has initiaded shutdown
var ErrTeleportReloading = &trace.CompareFailedError{Message: "teleport process is reloading"}
Expand Down
67 changes: 0 additions & 67 deletions lib/service/signals_test.go

This file was deleted.