From c01f11c68c33a96f14705d0a6aa5ad36b7b0ac30 Mon Sep 17 00:00:00 2001 From: Pavel Zbitskiy Date: Thu, 12 Feb 2026 14:16:27 -0500 Subject: [PATCH 1/2] node: collect goroutinese stacks before SIGKILL --- nodecontrol/NodeController.go | 35 ++++++++++++++++++++++++++++++++++- nodecontrol/algodControl.go | 2 +- nodecontrol/kmdControl.go | 2 +- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/nodecontrol/NodeController.go b/nodecontrol/NodeController.go index 957c9dc39d..6702111e67 100644 --- a/nodecontrol/NodeController.go +++ b/nodecontrol/NodeController.go @@ -17,6 +17,9 @@ package nodecontrol import ( + "context" + "fmt" + "os" "path/filepath" "syscall" "time" @@ -111,7 +114,7 @@ func (nc NodeController) stopProcesses() (kmdAlreadyStopped bool, err error) { return } -func killPID(pid int) (killed bool, err error) { +func killPID(pid int, beforeKill func()) (killed bool, err error) { process, err := util.FindProcess(pid) if process == nil || err != nil { return false, err @@ -130,8 +133,38 @@ func killPID(pid int) (killed bool, err error) { } select { case <-waitLong: + if beforeKill != nil { + beforeKill() + } return true, util.KillProcess(pid, syscall.SIGKILL) case <-time.After(time.Millisecond * 100): } } } + +// collectGoroutineStacks fetches goroutine stacks from the node's pprof endpoint +// and saves them to a file in the data directory. +func (nc *NodeController) collectGoroutineStacks() { + algodClient, err := nc.AlgodClient() + if err != nil { + fmt.Fprintf(os.Stderr, "failed to create algod client for goroutine dump: %v\n", err) + return + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + goRoutines, err := algodClient.GetGoRoutines(ctx) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to fetch goroutine stacks: %v\n", err) + return + } + + dumpFile := filepath.Join(nc.algodDataDir, fmt.Sprintf("goroutine-dump-%s.txt", time.Now().Format("20060102-150405"))) + err = os.WriteFile(dumpFile, []byte(goRoutines), 0600) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to write goroutine dump to %s: %v\n", dumpFile, err) + return + } + fmt.Fprintf(os.Stderr, "goroutine dump saved to %s\n", dumpFile) +} diff --git a/nodecontrol/algodControl.go b/nodecontrol/algodControl.go index e5f3ef06bf..625a4b763e 100644 --- a/nodecontrol/algodControl.go +++ b/nodecontrol/algodControl.go @@ -175,7 +175,7 @@ func (nc *NodeController) StopAlgod() (err error) { algodPID, err := nc.GetAlgodPID() if err == nil { // Kill algod by PID - killed, killErr := killPID(int(algodPID)) + killed, killErr := killPID(int(algodPID), nc.collectGoroutineStacks) if killErr != nil { return killErr } diff --git a/nodecontrol/kmdControl.go b/nodecontrol/kmdControl.go index 2acdd78e80..17c4b0f3f6 100644 --- a/nodecontrol/kmdControl.go +++ b/nodecontrol/kmdControl.go @@ -121,7 +121,7 @@ func (kc *KMDController) StopKMD() (alreadyStopped bool, err error) { kmdPID, err := kc.GetKMDPID() if err == nil { // Kill kmd by PID - killed, killErr := killPID(int(kmdPID)) + killed, killErr := killPID(int(kmdPID), nil) if killErr != nil { return false, killErr } From dc210e2bb64852ab9c5f50778654f5a944f35046 Mon Sep 17 00:00:00 2001 From: Pavel Zbitskiy Date: Thu, 26 Feb 2026 13:44:24 -0500 Subject: [PATCH 2/2] update error message if pprof disabled --- nodecontrol/NodeController.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nodecontrol/NodeController.go b/nodecontrol/NodeController.go index 6702111e67..1c8ba7f508 100644 --- a/nodecontrol/NodeController.go +++ b/nodecontrol/NodeController.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "syscall" "time" @@ -156,7 +157,11 @@ func (nc *NodeController) collectGoroutineStacks() { goRoutines, err := algodClient.GetGoRoutines(ctx) if err != nil { - fmt.Fprintf(os.Stderr, "failed to fetch goroutine stacks: %v\n", err) + msg := err.Error() + if strings.Contains(msg, "404 Not Found") { + msg = "pprof most likely disabled, consider setting EnableProfiler=true for further debugging" + } + fmt.Fprintf(os.Stderr, "cannot fetch goroutine stacks: %s\n", msg) return }