From 9e8da3bdd041b1a42fd8d2ef1d732907bb0630c7 Mon Sep 17 00:00:00 2001 From: mustard Date: Fri, 25 Oct 2024 21:04:55 +0800 Subject: [PATCH] [supervisor] set pod failure reason when supervisor is reaped (#20318) * [supervisor] set pod failed reason when supervisor been reap * Debug commit * upgrade pkg * Revert "Debug commit" This reverts commit 1b6bde80c410034b62e235438b8f7262fa7f63b6. --- components/supervisor/cmd/init.go | 48 +++++++++++++++++++++++++------ components/supervisor/go.mod | 2 +- components/supervisor/go.sum | 6 ++-- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/components/supervisor/cmd/init.go b/components/supervisor/cmd/init.go index b0045292c79b1a..0a120bf995c47b 100644 --- a/components/supervisor/cmd/init.go +++ b/components/supervisor/cmd/init.go @@ -22,8 +22,8 @@ import ( "github.com/gitpod-io/gitpod/common-go/process" "github.com/gitpod-io/gitpod/supervisor/pkg/shared" "github.com/gitpod-io/gitpod/supervisor/pkg/supervisor" + reaper "github.com/gitpod-io/go-reaper" "github.com/prometheus/procfs" - reaper "github.com/ramr/go-reaper" "github.com/spf13/cobra" ) @@ -77,25 +77,55 @@ var initCmd = &cobra.Command{ } supervisorDone := make(chan struct{}) + handledByReaper := make(chan int) + handleSupervisorExit := func(exitCode int) { + if exitCode == 0 { + return + } + logs := extractFailureFromRun() + if shared.IsExpectedShutdown(exitCode) { + log.Fatal(logs) + } else { + log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code") + } + } go func() { defer close(supervisorDone) err := runCommand.Wait() - if err != nil && !(strings.Contains(err.Error(), "signal: ") || strings.Contains(err.Error(), "no child processes")) { + if err == nil { + return + } + // exited by reaper + if strings.Contains(err.Error(), "no child processes") { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + select { + case <-ctx.Done(): // timeout + case exitCode := <-handledByReaper: + handleSupervisorExit(exitCode) + } + } else if !(strings.Contains(err.Error(), "signal: ")) { if eerr, ok := err.(*exec.ExitError); ok && eerr.ExitCode() != 0 { - logs := extractFailureFromRun() - if shared.IsExpectedShutdown(eerr.ExitCode()) { - log.Fatal(logs) - } else { - log.WithError(fmt.Errorf(logs)).Fatal("supervisor run error with unexpected exit code") - } + handleSupervisorExit(eerr.ExitCode()) } log.WithError(err).Error("supervisor run error") return } }() // start the reaper to clean up zombie processes - reaper.Reap() + reaper.Start(reaper.Config{ + Pid: -1, + Options: 0, + DisablePid1Check: false, + OnReap: func(pid int, wstatus syscall.WaitStatus) { + if pid != runCommand.Process.Pid { + return + } + exitCode := wstatus.ExitStatus() + handledByReaper <- exitCode + }, + }) select { case <-supervisorDone: diff --git a/components/supervisor/go.mod b/components/supervisor/go.mod index 899e75fb6e5439..87d7fa7c40fc38 100644 --- a/components/supervisor/go.mod +++ b/components/supervisor/go.mod @@ -16,6 +16,7 @@ require ( github.com/gitpod-io/gitpod/ide-metrics-api v0.0.0-00010101000000-000000000000 github.com/gitpod-io/gitpod/supervisor/api v0.0.0-00010101000000-000000000000 github.com/gitpod-io/gitpod/ws-daemon/api v0.0.0-00010101000000-000000000000 + github.com/gitpod-io/go-reaper v0.0.0-20241024192051-78d04cc2e25f github.com/golang/mock v1.6.0 github.com/google/go-cmp v0.6.0 github.com/google/uuid v1.6.0 @@ -29,7 +30,6 @@ require ( github.com/prometheus/common v0.42.0 github.com/prometheus/procfs v0.10.1 github.com/prometheus/pushgateway v1.5.1 - github.com/ramr/go-reaper v0.2.1 github.com/sirupsen/logrus v1.9.3 github.com/soheilhy/cmux v0.1.5 github.com/spf13/cobra v1.4.0 diff --git a/components/supervisor/go.sum b/components/supervisor/go.sum index 2c24d8496bbba4..3f5fee0ea5bdbb 100644 --- a/components/supervisor/go.sum +++ b/components/supervisor/go.sum @@ -119,6 +119,10 @@ github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= github.com/gin-gonic/gin v1.7.4 h1:QmUZXrvJ9qZ3GfWvQ+2wnW/1ePrTEJqPKMYEU3lD/DM= github.com/gin-gonic/gin v1.7.4/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY= +github.com/gitpod-io/go-reaper v0.0.0-20241023132555-bf7fe3193e95 h1:8CExGQuXMl8ZgFzkBfU1+K8+yFLxqttXz9JGPBaoY0g= +github.com/gitpod-io/go-reaper v0.0.0-20241023132555-bf7fe3193e95/go.mod h1:WJlnZLfag2J4+z28ZjM0CxgVqjYVYF8pnspnleDwrcA= +github.com/gitpod-io/go-reaper v0.0.0-20241024192051-78d04cc2e25f h1:jC8c/ONG+vsaxY6y17rM+Du7JN/faYfSBiMCFIi2NoA= +github.com/gitpod-io/go-reaper v0.0.0-20241024192051-78d04cc2e25f/go.mod h1:WJlnZLfag2J4+z28ZjM0CxgVqjYVYF8pnspnleDwrcA= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= @@ -319,8 +323,6 @@ github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+Pymzi github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= github.com/prometheus/pushgateway v1.5.1 h1:F+meNVGklhdkJTrpt5l3pXLc86m6IG3+ZF3VfeOwubE= github.com/prometheus/pushgateway v1.5.1/go.mod h1:BcYcT8no4mdtnZxqpkXH3EMT0/Jf2nKgSlwu6GHirU4= -github.com/ramr/go-reaper v0.2.1 h1:zww+wlQOvTjBZuk1920R/e0GFEb6O7+B0WQLV6dM924= -github.com/ramr/go-reaper v0.2.1/go.mod h1:AVypdzrcCXjSc/JYnlXl8TsB+z84WyFzxWE8Jh0MOJc= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/rs/cors v1.8.2 h1:KCooALfAYGs415Cwu5ABvv9n9509fSiG5SQJn/AQo4U=