Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 124 additions & 18 deletions pkg/supervisor/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,36 +55,142 @@ const k0sManaged = "_K0S_MANAGED=yes"

// processWaitQuit waits for a process to exit or a shut down signal
// returns true if shutdown is requested
func (s *Supervisor) processWaitQuit(ctx context.Context) bool {
waitresult := make(chan error)
func (s *Supervisor) processWaitQuit(ctx context.Context, cmd *exec.Cmd) bool {
waitresult := make(chan error, 1)
go func() {
waitresult <- s.cmd.Wait()
waitresult <- cmd.Wait()
}()

defer os.Remove(s.PidFile)

select {
case <-ctx.Done():
for {
s.log.Info("Requesting graceful shutdown")
if err := requestGracefulShutdown(s.cmd.Process); err != nil {
s.log.WithError(err).Warn("Failed to request graceful shutdown")
}
select {
case <-time.After(s.TimeoutStop):
continue
case <-waitresult:
return true
var exitErr *exec.ExitError
err := s.endProcess(cmd, waitresult)
// Ignore errors for processes that don't handle signals
if errors.Is(err, errProcessShutdownFailed) && errors.As(err, &exitErr) {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok && status.Signal() == syscall.SIGTERM {
s.log.Debug("Process didn't handle SIGTERM")
err = nil
}
}
case err := <-waitresult:
if err != nil {
s.log.WithError(err).Warn("Failed to wait for process")
s.log.WithError(err).Error("Error while ending process")
} else {
s.log.Warnf("Process exited: %s", s.cmd.ProcessState)
s.log.Info("Process finished")
}
return true

case err := <-waitresult:
var exitErr *exec.ExitError
switch {
case err == nil:
s.log.Error("Process finished prematurely")
case errors.As(err, &exitErr):
s.log.WithError(exitErr).Error("Process failed")
default:
s.log.WithError(err).Error("Failed to wait for process")
}
return false
}
}

var errProcessShutdownFailed = errors.New("process shutdown failed")

func (s *Supervisor) endProcess(cmd *exec.Cmd, waitresult <-chan error) error {
err := requestGracefulShutdown(cmd.Process)
var expectGracefulShutdown bool
switch {
// Shutdown request sent, wait for process to finish.
case err == nil:
expectGracefulShutdown = true
s.log.Debug("Awaiting graceful process shutdown for ", s.TimeoutStop)
select {
case err := <-waitresult:
var exitErr *exec.ExitError
switch {
case err == nil:
return nil
case errors.As(err, &exitErr):
return fmt.Errorf("%w: %w", errProcessShutdownFailed, exitErr)
default:
return fmt.Errorf("failed to wait for process: %w", exitErr)
}

case <-time.After(s.TimeoutStop):
err = fmt.Errorf("timed out after %s while waiting for process to finish", s.TimeoutStop)
}

// The process has finished even before the shutdown could be requested.
case errors.Is(err, os.ErrProcessDone):
select {
case err = <-waitresult:
var exitErr *exec.ExitError
switch {
case err == nil:
err = errors.New("process finished prematurely")
case errors.As(err, &exitErr):
err = fmt.Errorf("process failed: %w", exitErr)
default:
err = fmt.Errorf("failed to wait for process: %w", err)
}

default:
err = errors.New("process state unavailable")
}

return fmt.Errorf("process exited before shutdown could be requested: %w", err)

// Something else went wrong
default:
// Continue with killing the process
}

shutdownErr := err
err = cmd.Process.Kill()
switch {
// Process killed, wait for process to exit.
case err == nil:
timeoutKill := 30 * time.Second
s.log.Debug("Awaiting termination of killed process for ", timeoutKill)
select {
case <-waitresult:
err = errors.New("process has been killed")
case <-time.After(timeoutKill):
err = fmt.Errorf("timed out after %s while waiting for killed process to terminate, giving up", timeoutKill)
}
return fmt.Errorf("%w; %w", shutdownErr, err)

// Process exited before it could be killed.
case errors.Is(err, os.ErrProcessDone):
select {
case err := <-waitresult:
var exitErr *exec.ExitError
switch {
case err == nil:
if expectGracefulShutdown {
return nil
}
return fmt.Errorf("%w; process finished prematurely", shutdownErr)

case errors.As(err, &exitErr):
if expectGracefulShutdown {
return fmt.Errorf("%w: %w", errProcessShutdownFailed, exitErr)
}
return fmt.Errorf("%w; process failed: %w", shutdownErr, exitErr)

default:
return fmt.Errorf("%w; failed to wait for process: %w", shutdownErr, exitErr)
}

default:
return fmt.Errorf("%w; process exited but no process state is available", shutdownErr)
}

// Something else went wrong
default:
return fmt.Errorf("%w; failed to kill process, giving up: %w", shutdownErr, err)
}
return false
}

// Supervise Starts supervising the given process
Expand Down Expand Up @@ -178,7 +284,7 @@ func (s *Supervisor) Supervise() error {
s.log.Infof("Restarted (%d)", restarts)
}
restarts++
if s.processWaitQuit(ctx) {
if s.processWaitQuit(ctx, s.cmd) {
return
}
}
Expand Down