Skip to content

Commit

Permalink
Fix logging, collect status of forked processes
Browse files Browse the repository at this point in the history
fixes #1785, fixes #1776

This commit fixes several issues with output:

First teleport start now prints output
matching quickstart guide and sets default
console logging to ERROR.

SIGCHLD handler now only collects
processes PID forked during live restart
to avoid confusing other wait calls that
have no process status to collect any more.
  • Loading branch information
klizhentas committed Mar 19, 2018
1 parent 8ea0442 commit 7d05c05
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 13 deletions.
3 changes: 3 additions & 0 deletions constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ const (
// ComponentDiagnostic is a diagnostic service
ComponentDiagnostic = "diagnostic"

// ComponentClient is a client
ComponentClient = "client"

// ComponentTunClient is a tunnel client
ComponentTunClient = "client:tunnel"

Expand Down
2 changes: 1 addition & 1 deletion lib/service/cfg.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ type Config struct {

// UploadEventsC is a channel for upload events
// used in tests
UploadEventsC chan *events.UploadEvent
UploadEventsC chan *events.UploadEvent `json:"-"`
}

// ApplyToken assigns a given token to all internal services but only if token
Expand Down
13 changes: 12 additions & 1 deletion lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ type TeleportProcess struct {
// importedDescriptors is a list of imported file descriptors
// passed by the parent process
importedDescriptors []FileDescriptor

// forkedPIDs is a collection of a teleport processes forked
// during restart used to collect their status in case if the
// child process crashed.
forkedPIDs []int
}

// GetAuthServer returns the process' auth server
Expand Down Expand Up @@ -700,7 +705,7 @@ func (process *TeleportProcess) initAuthService() error {
// requests to the Auth API
var authTunnel *auth.AuthTunnel
process.RegisterFunc("auth.ssh", func() error {
log.Infof("Auth SSH service is starting on %v", cfg.Auth.SSHAddr.Addr)
log.Infof("Auth SSH service is starting on %v.", cfg.Auth.SSHAddr.Addr)
authTunnel, err = auth.NewTunnel(
cfg.Auth.SSHAddr,
identity.KeySigner,
Expand Down Expand Up @@ -728,6 +733,8 @@ func (process *TeleportProcess) initAuthService() error {
})

process.RegisterFunc("auth.tls", func() error {
utils.Consolef(cfg.Console, teleport.ComponentAuth, "Auth service is starting on %v.", cfg.Auth.SSHAddr.Addr)

// since tlsServer.Serve is a blocking call, we emit this even right before
// the service has started
process.BroadcastEvent(Event{Name: AuthTLSReady, Payload: nil})
Expand Down Expand Up @@ -963,6 +970,7 @@ func (process *TeleportProcess) initSSH() error {
}

log.Infof("Service is starting on %v %v.", cfg.SSH.Addr.Addr, process.Config.CachePolicy)
utils.Consolef(cfg.Console, teleport.ComponentNode, "Service is starting on %v.", cfg.SSH.Addr.Addr)
go s.Serve(listener)

// broadcast that the node has started
Expand Down Expand Up @@ -1401,6 +1409,7 @@ func (process *TeleportProcess) initProxyEndpoint(conn *Connector) error {
return trace.Wrap(err)
}
process.RegisterFunc("proxy.reveresetunnel.server", func() error {
utils.Consolef(cfg.Console, teleport.ComponentProxy, "Reverse tunnel service is starting on %v.", cfg.Proxy.ReverseTunnelListenAddr.Addr)
log.Infof("Starting on %v using %v", cfg.Proxy.ReverseTunnelListenAddr.Addr, process.Config.CachePolicy)
if err := tsrv.Start(); err != nil {
log.Error(err)
Expand Down Expand Up @@ -1443,6 +1452,7 @@ func (process *TeleportProcess) initProxyEndpoint(conn *Connector) error {
Handler: proxyLimiter,
}
process.RegisterFunc("proxy.web", func() error {
utils.Consolef(cfg.Console, teleport.ComponentProxy, "Web proxy service is starting on %v.", cfg.Proxy.WebAddr.Addr)
log.Infof("Web proxy service is starting on %v.", cfg.Proxy.WebAddr.Addr)
defer webHandler.Close()
process.BroadcastEvent(Event{Name: ProxyWebServerReady, Payload: webHandler})
Expand Down Expand Up @@ -1482,6 +1492,7 @@ func (process *TeleportProcess) initProxyEndpoint(conn *Connector) error {
}

process.RegisterFunc("proxy.ssh", func() error {
utils.Consolef(cfg.Console, teleport.ComponentProxy, "SSH proxy service is starting on %v.", cfg.Proxy.SSHAddr.Addr)
log.Infof("SSH proxy service is starting on %v", cfg.Proxy.SSHAddr.Addr)
go sshProxy.Serve(listener)
// broadcast that the proxy ssh server has started
Expand Down
64 changes: 60 additions & 4 deletions lib/service/signals.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func (process *TeleportProcess) printShutdownStatus(ctx context.Context) {
// WaitForSignals waits for system signals and processes them.
// Should not be called twice by the process.
func (process *TeleportProcess) WaitForSignals(ctx context.Context) error {

sigC := make(chan os.Signal, 1024)
signal.Notify(sigC,
syscall.SIGQUIT, // graceful shutdown
Expand Down Expand Up @@ -120,9 +121,7 @@ func (process *TeleportProcess) WaitForSignals(ctx context.Context) error {
log.Infof("All services stopped, exiting.")
return nil
case syscall.SIGCHLD:
log.Debugf("Child exited, got %q, collecting status.", signal)
var wait syscall.WaitStatus
syscall.Wait4(-1, &wait, syscall.WNOHANG, nil)
process.collectStatuses()
default:
log.Infof("Ignoring %q.", signal)
}
Expand Down Expand Up @@ -367,11 +366,68 @@ func (process *TeleportProcess) forkChild() error {
Files: files,
Sys: &syscall.SysProcAttr{},
})

if err != nil {
return trace.ConvertSystemError(err)
}
process.pushForkedPID(p.Pid)

log.WithFields(logrus.Fields{"pid": p.Pid}).Infof("Started new child process.")
return nil
}

// collectStatuses attempts to collect exit statuses from
// forked teleport child processes.
// If forked teleport process exited with an error during graceful
// restart, parent process has to collect the child process status
// otherwise the child process will become a zombie process.
// Call Wait4(-1) is trying to collect status of any child
// leads to warnings in logs, because other parts of the program could
// have tried to collect the status of this process.
// Instead this logic tries to collect statuses of the processes
// forked during restart procedure.
func (process *TeleportProcess) collectStatuses() {
pids := process.getForkedPIDs()
if len(pids) == 0 {
return
}
for _, pid := range pids {
var wait syscall.WaitStatus
rpid, err := syscall.Wait4(pid, &wait, syscall.WNOHANG, nil)
if err != nil {
log.Errorf("Wait call failed: %v.", err)
continue
}
if rpid == pid {
process.popForkedPID(pid)
log.Warningf("Forked teleport process %v has exited with status: %v.", pid, wait.ExitStatus())
}
}
}

func (process *TeleportProcess) pushForkedPID(pid int) {
process.Lock()
defer process.Unlock()
process.forkedPIDs = append(process.forkedPIDs, pid)
}

func (process *TeleportProcess) popForkedPID(pid int) {
process.Lock()
defer process.Unlock()
for i, p := range process.forkedPIDs {
if p == pid {
process.forkedPIDs = append(process.forkedPIDs[:i], process.forkedPIDs[i+1:]...)
return
}
}
}

func (process *TeleportProcess) getForkedPIDs() []int {
process.Lock()
defer process.Unlock()
if len(process.forkedPIDs) == 0 {
return nil
}
out := make([]int, len(process.forkedPIDs))
copy(out, process.forkedPIDs)
return out
}
11 changes: 8 additions & 3 deletions lib/utils/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"log/syslog"
"os"
"strconv"
"strings"

"github.com/gravitational/teleport"

Expand Down Expand Up @@ -139,12 +140,16 @@ func UserMessageFromError(err error) string {

// Consolef prints the same message to a 'ui console' (if defined) and also to
// the logger with INFO priority
func Consolef(w io.Writer, msg string, params ...interface{}) {
func Consolef(w io.Writer, component string, msg string, params ...interface{}) {
entry := log.WithFields(log.Fields{
trace.Component: component,
})
msg = fmt.Sprintf(msg, params...)
entry.Info(msg)
if w != nil {
fmt.Fprintln(w, msg)
component := strings.ToUpper(component)
fmt.Fprintf(w, "[%v]%v%v\n", strings.ToUpper(component), strings.Repeat(" ", 8-len(component)), msg)
}
log.Info(msg)
}

// InitCLIParser configures kingpin command line args parser with
Expand Down
5 changes: 3 additions & 2 deletions tool/tctl/common/tctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,9 @@ func connectToAuthService(cfg *service.Config) (client auth.ClientI, err error)
// check connectivity by calling something on a clinet:
conn, err := client.GetDialer()(context.TODO())
if err != nil {
utils.Consolef(os.Stderr,
"Cannot connect to the auth server: %v.\nIs the auth server running on %v?", err, cfg.AuthServers[0].Addr)
utils.Consolef(os.Stderr, teleport.ComponentClient,
"Cannot connect to the auth server: %v.\nIs the auth server running on %v?",
err, cfg.AuthServers[0].Addr)
os.Exit(1)
}
conn.Close()
Expand Down
2 changes: 1 addition & 1 deletion tool/teleport/common/teleport.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func Run(options Options) (executedCommand string, conf *service.Config) {
}
// configure logger for a typical CLI scenario until configuration file is
// parsed
utils.InitLogger(utils.LoggingForDaemon, log.WarnLevel)
utils.InitLogger(utils.LoggingForDaemon, log.ErrorLevel)
app := utils.InitCLIParser("teleport", "Clustered SSH service. Learn more at https://gravitational.com/teleport")

// define global flags:
Expand Down
2 changes: 1 addition & 1 deletion tool/teleport/common/teleport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (s *MainTestSuite) TestDefault(c *check.C) {
c.Assert(conf.SSH.Enabled, check.Equals, true)
c.Assert(conf.Proxy.Enabled, check.Equals, true)
c.Assert(conf.Console, check.Equals, os.Stdout)
c.Assert(log.GetLevel(), check.Equals, log.WarnLevel)
c.Assert(log.GetLevel(), check.Equals, log.ErrorLevel)
}

func (s *MainTestSuite) TestRolesFlag(c *check.C) {
Expand Down

0 comments on commit 7d05c05

Please sign in to comment.