Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions lib/bpf/bpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,21 @@ func New(config *Config) (BPF, error) {
}

// Close will stop any running BPF programs. Note this is only for a graceful
// shutdown, from the man page for BPF: "Generally, eBPF programs are loaded
// by the user process and automatically unloaded when the process exits."
func (s *Service) Close() error {
// shutdown, from the man page for BPF: "Generally, eBPF programs are loaded by
// the user process and automatically unloaded when the process exits". The
// restarting parameter indicates that Teleport is shutting down because of a
// restart, and thus we should skip any deinitialization that would interfere
// with the new Teleport instance.
func (s *Service) Close(restarting bool) error {
// Unload the BPF programs.
s.exec.close()
s.open.close()
s.conn.close()

// Close cgroup service.
s.cgroup.Close()
// Close cgroup service. We should not unmount the cgroup filesystem if
// we're restarting.
skipCgroupUnmount := restarting
s.cgroup.Close(skipCgroupUnmount)

// Signal to the loop pulling events off the perf buffer to shutdown.
s.closeFunc()
Expand Down
6 changes: 4 additions & 2 deletions lib/bpf/bpf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func TestRootWatch(t *testing.T) {
require.NoError(t, err)

t.Cleanup(func() {
require.NoError(t, service.Close())
const restarting = false
require.NoError(t, service.Close(restarting))
})

// Create a fake audit log that can be used to capture the events emitted.
Expand Down Expand Up @@ -465,7 +466,8 @@ func moveIntoCgroup(t *testing.T, pid int) (uint64, error) {
return 0, trace.Wrap(err)
}
t.Cleanup(func() {
require.NoError(t, cgroupSrv.Close())
const skipUnmount = false
require.NoError(t, cgroupSrv.Close(skipUnmount))
})

sessionID := uuid.New().String()
Expand Down
4 changes: 2 additions & 2 deletions lib/bpf/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type BPF interface {
CloseSession(ctx *SessionContext) error

// Close will stop any running BPF programs.
Close() error
Close(restarting bool) error
}

// SessionContext contains all the information needed to track and emit
Expand Down Expand Up @@ -125,7 +125,7 @@ type NOP struct {
}

// Close closes the NOP service. Note this function does nothing.
func (s *NOP) Close() error {
func (s *NOP) Close(bool) error {
return nil
}

Expand Down
10 changes: 8 additions & 2 deletions lib/cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,19 @@ func New(config *Config) (*Service, error) {
return s, nil
}

// Close will unmount the cgroup filesystem.
func (s *Service) Close() error {
// Close will clean up the session cgroups and unmount the cgroup2 filesystem,
// unless otherwise requested.
func (s *Service) Close(skipUnmount bool) error {
err := s.cleanupHierarchy()
if err != nil {
return trace.Wrap(err)
}

if skipUnmount {
log.Debugf("Cleaned up Teleport session hierarchy at: %v.", s.teleportRoot)
return nil
}

err = s.unmount()
if err != nil {
return trace.Wrap(err)
Expand Down
40 changes: 38 additions & 2 deletions lib/cgroup/cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func TestRootCreate(t *testing.T) {
require.NoDirExists(t, cgroupPath)

// Close the cgroup service, this should unmound the cgroup filesystem.
err = service.Close()
const skipUnmount = false
err = service.Close(skipUnmount)
require.NoError(t, err)

// Make sure the cgroup filesystem has been unmounted.
Expand All @@ -96,7 +97,8 @@ func TestRootCleanup(t *testing.T) {
MountPath: dir,
})
require.NoError(t, err)
defer service.Close()
const skipUnmount = false
defer service.Close(skipUnmount)

// Create fake session ID and cgroup.
sessionID := uuid.New().String()
Expand All @@ -112,6 +114,40 @@ func TestRootCleanup(t *testing.T) {
require.NoDirExists(t, cgroupPath)
}

// TestRootSkipUnmount checks that closing the service with skipUnmount set to
// true works correctly; i.e. it cleans up the cgroups we're responsible for but
// doesn't unmount the cgroup2 file system.
func TestRootSkipUnmount(t *testing.T) {
// This test must be run as root. Only root can create cgroups.
if !isRoot() {
t.Skip("Tests for package cgroup can only be run as root.")
}

t.Parallel()

// Start a cgroup service with a temporary directory as the mount path.
service, err := New(&Config{
MountPath: t.TempDir(),
})
require.NoError(t, err)

sessionID := uuid.NewString()
sessionPath := path.Join(service.teleportRoot, sessionID)
require.NoError(t, service.Create(sessionID))

require.DirExists(t, sessionPath)

const skipUnmount = true
require.NoError(t, service.Close(skipUnmount))

require.DirExists(t, service.teleportRoot)
require.NoDirExists(t, path.Join(service.teleportRoot, sessionID))

require.NoError(t, service.unmount())

require.NoDirExists(t, service.teleportRoot)
}

// isRoot returns a boolean if the test is being run as root or not. Tests
// for this package must be run as root.
func isRoot() bool {
Expand Down
12 changes: 10 additions & 2 deletions lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -2137,6 +2137,12 @@ func (process *TeleportProcess) initSSH() error {
proxyGetter := reversetunnel.NewConnectedProxyGetter()

process.RegisterCriticalFunc("ssh.node", func() error {
// restartingOnGracefulShutdown will be set to true before the function
// exits if the function is exiting because Teleport is gracefully
// shutting down as a consequence of internally-triggered reloading or
// being signaled to restart.
var restartingOnGracefulShutdown bool

conn, err := process.waitForConnector(SSHIdentityEvent, log)
if conn == nil {
return trace.Wrap(err)
Expand Down Expand Up @@ -2189,7 +2195,7 @@ func (process *TeleportProcess) initSSH() error {
if err != nil {
return trace.Wrap(err)
}
defer func() { warnOnErr(ebpf.Close(), log) }()
defer func() { warnOnErr(ebpf.Close(restartingOnGracefulShutdown), log) }()

// Start access control programs. This is blocking and if the BPF programs fail to
// load, the node will not start. If access control is not enabled, this will simply
Expand Down Expand Up @@ -2396,7 +2402,9 @@ func (process *TeleportProcess) initSSH() error {
warnOnErr(s.Close(), log)
} else {
log.Infof("Shutting down gracefully.")
warnOnErr(s.Shutdown(payloadContext(event.Payload, log)), log)
ctx := payloadContext(event.Payload, log)
restartingOnGracefulShutdown = services.IsProcessReloading(ctx) || services.HasProcessForked(ctx)
warnOnErr(s.Shutdown(ctx), log)
}

s.Wait()
Expand Down