diff --git a/lib/vnet/admin_process_darwin.go b/lib/vnet/admin_process_darwin.go index 0465dd8e8aae5..9a7b789657a50 100644 --- a/lib/vnet/admin_process_darwin.go +++ b/lib/vnet/admin_process_darwin.go @@ -19,6 +19,7 @@ package vnet import ( "context" "errors" + "os" "time" "github.com/gravitational/trace" @@ -89,18 +90,21 @@ func RunDarwinAdminProcess(ctx context.Context, config daemon.Config) error { g, ctx := errgroup.WithContext(ctx) g.Go(func() error { + defer log.InfoContext(ctx, "Network stack terminated.") if err := networkStack.run(ctx); err != nil { return trace.Wrap(err, "running network stack") } return errors.New("network stack terminated") }) g.Go(func() error { + defer log.InfoContext(ctx, "OS configuration loop exited.") if err := osConfigurator.runOSConfigurationLoop(ctx); err != nil { return trace.Wrap(err, "running OS configuration loop") } return errors.New("OS configuration loop terminated") }) g.Go(func() error { + defer log.InfoContext(ctx, "Ping loop exited.") tick := time.Tick(time.Second) for { select { @@ -113,7 +117,27 @@ func RunDarwinAdminProcess(ctx context.Context, config daemon.Config) error { } } }) - return trace.Wrap(g.Wait(), "running VNet admin process") + + done := make(chan error) + go func() { + done <- g.Wait() + }() + + select { + case err := <-done: + return trace.Wrap(err, "running VNet admin process") + case <-ctx.Done(): + } + + select { + case err := <-done: + // network stack exited cleanly within timeout + return trace.Wrap(err, "running VNet admin process") + case <-time.After(10 * time.Second): + log.ErrorContext(ctx, "VNet admin process did not exit within 10 seconds, forcing shutdown.") + os.Exit(1) + return nil + } } func createTUNDevice(ctx context.Context) (tun.Device, string, error) { diff --git a/lib/vnet/network_stack.go b/lib/vnet/network_stack.go index 6c7d38ba14415..b2bf30d67d9fa 100644 --- a/lib/vnet/network_stack.go +++ b/lib/vnet/network_stack.go @@ -328,13 +328,16 @@ func (ns *networkStack) run(ctx context.Context) error { // When the context is canceled for any reason (the caller or one of the other concurrent tasks may // have canceled it) destroy everything and quit. <-ctx.Done() + ns.slog.InfoContext(ctx, "Context canceled, beginning network stack shutdown.") // In-flight connections should start terminating after closing [ns.destroyed]. close(ns.destroyed) // Close the link endpoint and the TUN, this should cause [forwardBetweenTunAndNetstack] to terminate // if it hasn't already. + ns.slog.InfoContext(ctx, "Closing link endpoint.") ns.linkEndpoint.Close() + ns.slog.InfoContext(ctx, "Closing TUN device.") err := trace.Wrap(ns.tun.Close(), "closing TUN device") allErrors <- err @@ -345,9 +348,11 @@ func (ns *networkStack) run(ctx context.Context) error { _ = g.Wait() // Wait for all connections and goroutines to clean themselves up. + ns.slog.InfoContext(ctx, "Waiting for all connections and goroutines to clean up.") ns.wg.Wait() // Now we can destroy the gVisor networking stack and wait for all its goroutines to terminate. + ns.slog.InfoContext(ctx, "Destroying networking stack.") ns.stack.Destroy() close(allErrors) @@ -620,7 +625,9 @@ func forwardBetweenTunAndNetstack(ctx context.Context, tun tunDevice, linkEndpoi g, ctx := errgroup.WithContext(ctx) g.Go(func() error { return forwardNetstackToTUN(ctx, linkEndpoint, tun) }) g.Go(func() error { return forwardTUNtoNetstack(ctx, tun, linkEndpoint) }) - return trace.Wrap(g.Wait()) + err := g.Wait() + slog.DebugContext(ctx, "Finished forwarding IP packets between OS and VNet.") + return trace.Wrap(err) } func forwardNetstackToTUN(ctx context.Context, linkEndpoint *channel.Endpoint, tun tunDevice) error {