From 1c06e93146b04c8b08fc32d4bf95e0a594c8a921 Mon Sep 17 00:00:00 2001 From: Cristian Ciutea Date: Wed, 1 Jul 2020 10:46:28 +0200 Subject: [PATCH] Handle Agent Child processes on windows (#18) * Added missing skipTests param * Added Windows Job Object to handle the child processes when agent process exits. Co-authored-by: cciutea --- internal/agent/service/job_windows.go | 67 +++++++++++++++++++++++ internal/agent/service/service_windows.go | 36 ++++++++++-- win_build.ps1 | 5 +- 3 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 internal/agent/service/job_windows.go diff --git a/internal/agent/service/job_windows.go b/internal/agent/service/job_windows.go new file mode 100644 index 000000000..15a1bd559 --- /dev/null +++ b/internal/agent/service/job_windows.go @@ -0,0 +1,67 @@ +// Copyright 2020 New Relic Corporation. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +package service + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/windows" +) + +// JobObject on Windows allows groups of processes to be managed as a unit. +type JobObject struct { + handle windows.Handle +} + +// NewJob creates a new instance of a Windows Job Object. +func NewJob() (*JobObject, error) { + jobObjectHandle, err := windows.CreateJobObject(nil, nil) + if err != nil { + return nil, fmt.Errorf("failed to create job object: %v", err) + } + + jobInfo := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{ + BasicLimitInformation: windows.JOBOBJECT_BASIC_LIMIT_INFORMATION{ + LimitFlags: windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE, + }, + } + _, err = windows.SetInformationJobObject( + jobObjectHandle, + windows.JobObjectExtendedLimitInformation, + uintptr(unsafe.Pointer(&jobInfo)), + uint32(unsafe.Sizeof(jobInfo)), + ) + if err != nil { + return nil, fmt.Errorf("failed to set job object info: %v", err) + } + return &JobObject{ + handle: jobObjectHandle, + }, nil +} + +// AddProcess to the JobObject. +func (jo *JobObject) AddProcess(process *os.Process) error { + + // Using unsafe operation we are using the handle inside os.Process. + processHandle := (*struct { + pid int + handle windows.Handle + })(unsafe.Pointer(process)).handle + + err := windows.AssignProcessToJobObject(jo.handle, processHandle) + if err != nil { + return fmt.Errorf("failed to assign process to job object: %v", err) + } + return nil +} + +// Close will terminate the JobObject and also stop all the subprocesses. +func (jo *JobObject) Close() error { + err := windows.CloseHandle(jo.handle) + if err != nil { + return fmt.Errorf("failed to close the job object: %v", err) + } + return nil +} diff --git a/internal/agent/service/service_windows.go b/internal/agent/service/service_windows.go index 92bb11de5..7fdb7773d 100644 --- a/internal/agent/service/service_windows.go +++ b/internal/agent/service/service_windows.go @@ -8,7 +8,6 @@ import ( "os/exec" "github.com/kardianos/service" - "github.com/newrelic/infrastructure-agent/internal/os/api" "github.com/newrelic/infrastructure-agent/pkg/helpers/windows" "github.com/newrelic/infrastructure-agent/pkg/ipc" @@ -27,7 +26,7 @@ func (svc *Service) Start(s service.Service) (err error) { // - if we start and stop the service immediately, the agent may not stop properly // and so we have to kill it forcefully func (svc *Service) Stop(s service.Service) (err error) { - log.Info("service is stopping. notifying agent process...") + log.Info("service is stopping. notifying agent process.") svc.daemon.Lock() defer svc.daemon.Unlock() @@ -55,7 +54,7 @@ func (svc *Service) Stop(s service.Service) (err error) { // - if we start the service and shutdown the host immediately, the agent may not stop properly // and so we have to kill it forcefully func (svc *Service) Shutdown(s service.Service) (err error) { - log.Debug("Host is shutting down. notifying agent process.") + log.Debug("host is shutting down. notifying agent process.") svc.daemon.Lock() defer svc.daemon.Unlock() @@ -80,6 +79,7 @@ func (svc *Service) Shutdown(s service.Service) (err error) { func (d *daemon) run() { for { + d.Lock() d.ctx, d.cancel = context.WithCancel(context.Background()) d.cmd = exec.CommandContext(d.ctx, GetCommandPath(d.args[0]), d.args[1:]...) @@ -87,7 +87,7 @@ func (d *daemon) run() { d.cmd.Stderr = os.Stderr d.Unlock() - exitCode := api.CheckExitCode(d.cmd.Run()) + exitCode := api.CheckExitCode(runAgentCmd(d.cmd)) switch exitCode { case api.ExitCodeRestart: @@ -100,3 +100,31 @@ func (d *daemon) run() { } } } + +// runAgentCmd will run the agent process and wait it to exit. The process will be added +// to an Windows job object to handle the child processes. +func runAgentCmd(cmd *exec.Cmd) error { + jobObject, err := NewJob() + if err != nil { + log.Warnf("failed to create Job Object for Agent: %v", err) + } + defer func() { + if jobObject != nil { + if err := jobObject.Close(); err != nil { + log.Warnf("failed to close Agent Job Object: %v", err) + } + } + }() + + if err := cmd.Start(); err != nil { + return err + } + + if jobObject != nil { + if err := jobObject.AddProcess(cmd.Process); err != nil { + log.Warnf("failed to add Agent process to Job Object: %v", err) + } + } + + return cmd.Wait() +} diff --git a/win_build.ps1 b/win_build.ps1 index 68cc8f57d..b9da7da3c 100644 --- a/win_build.ps1 +++ b/win_build.ps1 @@ -5,7 +5,10 @@ param ( # Target architecture: amd64 (default) or 386 [ValidateSet("amd64", "386")] - [string]$arch="amd64" + [string]$arch="amd64", + + # Skip tests + [switch]$skipTests=$false ) echo "--- Checking dependencies"