Skip to content

Commit

Permalink
root: handle SIGHUP and SIGUSR2, healthcheck gunicorn
Browse files Browse the repository at this point in the history
This is the first step to handle configuration reloading. With those
changes, it is already possible to do so, by sending a SIGUSR2 signal to
the Go server process. The next step would be to watch for changes to
configuration files and call the Restart function of the GoUnicorn
instance.

SIGHUP is catched by the go server and forwarded as-is to gunicorn,
which causes it to restart its workers. However, that does not trigger
a reload of the Django settings, probably because they are already
loaded in the master, before creating any of the worker instances.

SIGUSR2, however, can be used to spawn a new gunicorn master process,
but handling it is a bit trickier. Please refer to Gunicorn's
documentation[0] for details, especially the "Upgrading to a new binary
on the fly" section.

As we are now effectively killing the gunicorn processed launched by the
server, we need to handle some sort of check to make sure it is still
running. That's done by using the already existing healthchecks, making
them useful not only for the application start, but also for its
lifetime. If a check is failed too many times in a given time period,
the gunicorn processed is killed (if necessary) and then restarted.

[0] https://docs.gunicorn.org/en/20.1.0/signals.html

Other relevant links and documentation:

Python library handling the processing swaping upon a SIGUSR2:
https://github.com/flupke/rainbow-saddle/

Golang cannot easily check if a process exists on Unix systems:
golang/go#34396

Signed-off-by: Marc 'risson' Schmitt <[email protected]>
  • Loading branch information
rissson committed Aug 25, 2023
1 parent b08ca98 commit e934be4
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 7 deletions.
37 changes: 36 additions & 1 deletion cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@ import (
"fmt"
"net/http"
"net/url"
"os"
"os/signal"
"syscall"
"time"

"github.com/getsentry/sentry-go"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"

"goauthentik.io/internal/common"
"goauthentik.io/internal/config"
"goauthentik.io/internal/constants"
Expand Down Expand Up @@ -70,6 +74,21 @@ var rootCmd = &cobra.Command{
l.Info("shutting down gunicorn")
g.Kill()
}()

c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP, syscall.SIGUSR2)
go func() {
sig := <-c
if sig == syscall.SIGHUP {
log.Info("SIGHUP received, forwarding to gunicorn")
g.Reload()
}
if sig == syscall.SIGUSR2 {
log.Info("SIGUSR2 received, restarting gunicorn")
g.Restart()
}
}()

ws := web.NewWebServer(g)
g.HealthyCallback = func() {
if !config.Get().Outposts.DisableEmbeddedOutpost {
Expand All @@ -92,8 +111,24 @@ func attemptStartBackend(g *gounicorn.GoUnicorn) {
if !running {
return
}
g.Kill()
log.WithField("logger", "authentik.router").Info("starting gunicorn")
err := g.Start()
log.WithField("logger", "authentik.router").WithError(err).Warning("gunicorn process died, restarting")
if err != nil {
log.WithField("logger", "authentik.router").WithError(err).Error("gunicorn failed to start, restarting")
continue
}
failedChecks := 0
for range time.Tick(30 * time.Second) {
if !g.IsRunning() {
log.WithField("logger", "authentik.router").Warningf("gunicorn process failed healthcheck %d times", failedChecks)
failedChecks += 1
}
if failedChecks >= 3 {
log.WithField("logger", "authentik.router").WithError(err).Error("gunicorn process failed healthcheck three times, restarting")
break
}
}
}
}

Expand Down
94 changes: 88 additions & 6 deletions internal/gounicorn/gounicorn.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package gounicorn

import (
"fmt"
"net/http"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"syscall"
"time"

log "github.com/sirupsen/logrus"

"goauthentik.io/internal/config"
"goauthentik.io/internal/utils"
"goauthentik.io/internal/utils/web"
)

Expand All @@ -18,6 +23,7 @@ type GoUnicorn struct {

log *log.Entry
p *exec.Cmd
pidFile *string
started bool
killed bool
alive bool
Expand All @@ -27,6 +33,7 @@ func New() *GoUnicorn {
logger := log.WithField("logger", "authentik.router.unicorn")
g := &GoUnicorn{
log: logger,
pidFile: nil,
started: false,
killed: false,
alive: false,
Expand All @@ -37,8 +44,13 @@ func New() *GoUnicorn {
}

func (g *GoUnicorn) initCmd() {
pidFile, _ := os.CreateTemp("", "authentik-gunicorn.*.pid")
g.pidFile = func() *string { s := pidFile.Name(); return &s }()
command := "gunicorn"
args := []string{"-c", "./lifecycle/gunicorn.conf.py", "authentik.root.asgi:application"}
if g.pidFile != nil {
args = append(args, "--pid", *g.pidFile)
}
if config.Get().Debug {
command = "./manage.py"
args = []string{"runserver"}
Expand All @@ -55,16 +67,13 @@ func (g *GoUnicorn) IsRunning() bool {
}

func (g *GoUnicorn) Start() error {
if g.killed {
g.log.Debug("Not restarting gunicorn since we're shutdown")
return nil
}
if g.started {
g.initCmd()
}
g.killed = false
g.started = true
go g.healthcheck()
return g.p.Run()
return g.p.Start()
}

func (g *GoUnicorn) healthcheck() {
Expand Down Expand Up @@ -96,8 +105,77 @@ func (g *GoUnicorn) healthcheck() {
}
}

func (g *GoUnicorn) Reload() {
g.log.WithField("method", "reload").Info("reloading gunicorn")
err := g.p.Process.Signal(syscall.SIGHUP)
if err != nil {
g.log.WithError(err).Warning("failed to reload gunicorn")
}
}

func (g *GoUnicorn) Restart() {
g.log.WithField("method", "restart").Info("restart gunicorn")
if g.pidFile == nil {
g.log.Warning("pidfile is non existent, cannot restart")
return
}

err := g.p.Process.Signal(syscall.SIGUSR2)
if err != nil {
g.log.WithError(err).Warning("failed to restart gunicorn")
return
}

newPidFile := fmt.Sprintf("%s.2", *g.pidFile)

// Wait for the new PID file to be created
for range time.Tick(1 * time.Second) {

Check failure on line 132 in internal/gounicorn/gounicorn.go

View workflow job for this annotation

GitHub Actions / lint-golint

SA1015: using time.Tick leaks the underlying ticker, consider using it only in endless functions, tests and the main package, and use time.NewTicker here (staticcheck)
_, err = os.Stat(newPidFile)
if err == nil || !os.IsNotExist(err) {
break
}
g.log.Debugf("waiting for new gunicorn pidfile to appear at %s", newPidFile)
}
if err != nil {
g.log.WithError(err).Warning("failed to find the new gunicorn process, aborting")
return
}

newPidB, err := os.ReadFile(newPidFile)
if err != nil {
g.log.WithError(err).Warning("failed to find the new gunicorn process, aborting")
return
}
newPidS := strings.TrimSpace(string(newPidB[:]))
newPid, err := strconv.Atoi(newPidS)
if err != nil {
g.log.WithError(err).Warning("failed to find the new gunicorn process, aborting")
return
}
g.log.Warningf("new gunicorn PID is %d", newPid)

newProcess, err := utils.FindProcess(newPid)
if newProcess == nil || err != nil {
g.log.WithError(err).Warning("failed to find the new gunicorn process, aborting")
return
}

// The new process has started, let's gracefully kill the old one
g.log.Warningf("killing old gunicorn")
err = g.p.Process.Signal(syscall.SIGTERM)
if err != nil {
g.log.Warning("failed to kill old instance of gunicorn")
}

g.p.Process = newProcess

// No need to close any files and the .2 pid file is deleted by Gunicorn
}

func (g *GoUnicorn) Kill() {
g.killed = true
if !g.started {
return
}
var err error
if runtime.GOOS == "darwin" {
g.log.WithField("method", "kill").Warning("stopping gunicorn")
Expand All @@ -109,4 +187,8 @@ func (g *GoUnicorn) Kill() {
if err != nil {
g.log.WithError(err).Warning("failed to stop gunicorn")
}
if g.pidFile != nil {
os.Remove(*g.pidFile)
}
g.killed = true
}
39 changes: 39 additions & 0 deletions internal/utils/process.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package utils

import (
"errors"
"fmt"
"os"
"syscall"
)

func FindProcess(pid int) (*os.Process, error) {
if pid <= 0 {
return nil, fmt.Errorf("invalid pid %v", pid)
}
// The error doesn't mean anything on Unix systems, let's just check manually
// that the new gunicorn master has properly started
// https://github.com/golang/go/issues/34396
proc, err := os.FindProcess(int(pid))
if err != nil {
return nil, err
}
err = proc.Signal(syscall.Signal(0))
if err == nil {
return proc, nil
}
if errors.Is(err, os.ErrProcessDone) {
return nil, nil
}
errno, ok := err.(syscall.Errno)
if !ok {
return nil, err
}
switch errno {
case syscall.ESRCH:
return nil, nil
case syscall.EPERM:
return proc, nil
}
return nil, err
}

0 comments on commit e934be4

Please sign in to comment.