diff --git a/lib/vnet/fqdn_resolver.go b/lib/vnet/fqdn_resolver.go index c204ae7047f1e..38dad60b12419 100644 --- a/lib/vnet/fqdn_resolver.go +++ b/lib/vnet/fqdn_resolver.go @@ -226,11 +226,12 @@ func (r *fqdnResolver) resolveAppInfoForCluster( }, nil } -// VNet SSH handles SSH hostnames matching ".." or -// "...". tryResolveSSH checks if -// fqdn matches that pattern for any logged-in cluster and if so returns a -// match. We never actually query for whether or not a matching SSH node exists, -// we just attempt to dial it when the client connects to the assigned IP. +// VNet SSH handles SSH hostnames matching "..", where +// the may be the name of a root or leaf cluster. +// tryResolveSSH checks if fqdn matches that pattern for any known cluster +// and if so returns a match. We never actually query for whether or not a +// matching SSH node exists, we just attempt to dial it when the client +// connects to the assigned IP. func (r *fqdnResolver) tryResolveSSH(ctx context.Context, profileNames []string, fqdn string) (*vnetv1.ResolveFQDNResponse, error) { for _, profileName := range profileNames { log := log.With("profile", profileName) @@ -240,23 +241,21 @@ func (r *fqdnResolver) tryResolveSSH(ctx context.Context, profileNames []string, continue } rootClusterName := rootClient.ClusterName() - if !isDescendantSubdomain(fqdn, rootClusterName) { - continue - } + log = log.With("root_cluster", rootClusterName) leafClusters, err := r.cfg.leafClusterCache.getLeafClusters(ctx, rootClient) if err != nil { // Good chance we're here because the user is not logged in to the profile. log.ErrorContext(ctx, "Failed to list leaf clusters, SSH nodes in this cluster will not be resolved", "error", err) - return nil, errNoMatch + continue } rootDialOpts, err := r.cfg.clientApplication.GetDialOptions(ctx, profileName) if err != nil { log.ErrorContext(ctx, "Failed to get cluster dial options, SSH nodes in this cluster will not be resolved", "error", err) - return nil, errNoMatch + continue } for _, leafClusterName := range leafClusters { log := log.With("leaf_cluster", leafClusterName) - if !isDescendantSubdomain(fqdn, leafClusterName+"."+rootClusterName) { + if !isDescendantSubdomain(fqdn, leafClusterName) { continue } leafClient, err := r.cfg.clientApplication.GetCachedClient(ctx, profileName, leafClusterName) @@ -282,8 +281,10 @@ func (r *fqdnResolver) tryResolveSSH(ctx context.Context, profileNames []string, }, }, nil } - // If it didn't match any leaf cluster assume it matches the root - // cluster. + // Didn't match any leaf, check if it's in the root cluster. + if !isDescendantSubdomain(fqdn, rootClusterName) { + continue + } clusterConfig, err := r.cfg.clusterConfigCache.GetClusterConfig(ctx, rootClient) if err != nil { log.ErrorContext(ctx, "Failed to get VNet config, SSH nodes in this cluster will not be resolved", "error", err) diff --git a/lib/vnet/opensshconfig.go b/lib/vnet/opensshconfig.go index bd7ce4f50d3cc..65f2587f63796 100644 --- a/lib/vnet/opensshconfig.go +++ b/lib/vnet/opensshconfig.go @@ -144,6 +144,7 @@ type sshConfigurator struct { type sshConfiguratorConfig struct { clientApplication ClientApplication + leafClusterCache *leafClusterCache homePath string clock clockwork.Clock } @@ -199,6 +200,16 @@ func (c *sshConfigurator) updateSSHConfiguration(ctx context.Context) error { continue } hostMatchers = append(hostMatchers, hostMatcher(rootClient.RootClusterName())) + leafClusters, err := c.cfg.leafClusterCache.getLeafClusters(ctx, rootClient) + if err != nil { + log.WarnContext(ctx, + "Failed to list leaf clusters, not configuring VNet SSH for leaf clusters of this cluster", + "root_cluster", rootClient.ClusterName(), "error", err) + continue + } + for _, leafCluster := range leafClusters { + hostMatchers = append(hostMatchers, hostMatcher(leafCluster)) + } } hostMatchers = utils.Deduplicate(hostMatchers) slices.Sort(hostMatchers) diff --git a/lib/vnet/opensshconfig_test.go b/lib/vnet/opensshconfig_test.go index 3289577f783aa..4e933fc3bc915 100644 --- a/lib/vnet/opensshconfig_test.go +++ b/lib/vnet/opensshconfig_test.go @@ -33,23 +33,33 @@ func TestSSHConfigurator(t *testing.T) { t.Parallel() ctx, cancel := context.WithCancel(context.Background()) defer cancel() - clock := clockwork.NewFakeClockAt(time.Now()) homePath := t.TempDir() + // This test gives a fake clock only to the SSH configurator and a real + // clock to everything else, so that fakeClock.BlockUntilContext will + // reliably only capture the SSH configuration loop and nothing else. + fakeClock := clockwork.NewFakeClockAt(time.Now()) + realClock := clockwork.NewRealClock() + fakeClientApp := newFakeClientApp(ctx, t, &fakeClientAppConfig{ clusters: map[string]testClusterSpec{ - "cluster1": {}, + "cluster1": { + leafClusters: map[string]testClusterSpec{ + "leaf1": {}, + }, + }, "cluster2": {}, }, - // Give the fake client app a different clock so we can rely on - // clock.BlockUntilContext only capturing the SSH configuration loop. - clock: clockwork.NewRealClock(), + clock: realClock, }) + leafClusterCache, err := newLeafClusterCache(realClock) + require.NoError(t, err) c := newSSHConfigurator(sshConfiguratorConfig{ clientApplication: fakeClientApp, + leafClusterCache: leafClusterCache, homePath: homePath, - clock: clock, + clock: fakeClock, }) errC := make(chan error) go func() { @@ -80,25 +90,29 @@ func TestSSHConfigurator(t *testing.T) { // Wait until the configurator has had a chance to write the initial config // file and then get blocked in the loop. - clock.BlockUntilContext(ctx, 1) + fakeClock.BlockUntilContext(ctx, 1) // Assert the config file contains both root clusters reported by // fakeClientApp. - assertConfigFile("*.cluster1 *.cluster2") + assertConfigFile("*.cluster1 *.cluster2 *.leaf1") - // Add a root cluster, wait until the configurator is blocked in the loop, - // advance the clock, wait until the configurator is blocked again - // indicating it should have updated the config and made it back into the - // loop, and then assert that the new cluster is in the config file. - fakeClientApp.cfg.clusters["cluster3"] = testClusterSpec{} - clock.BlockUntilContext(ctx, 1) - clock.Advance(sshConfigurationUpdateInterval) - clock.BlockUntilContext(ctx, 1) - assertConfigFile("*.cluster1 *.cluster2 *.cluster3") + // Add a new root and leaf cluster, wait until the configurator is blocked + // in the loop, advance the clock, wait until the configurator is blocked + // again indicating it should have updated the config and made it back into + // the loop, and then assert that the new clusters are in the config file. + fakeClientApp.cfg.clusters["cluster3"] = testClusterSpec{ + leafClusters: map[string]testClusterSpec{ + "leaf2": {}, + }, + } + fakeClock.BlockUntilContext(ctx, 1) + fakeClock.Advance(sshConfigurationUpdateInterval) + fakeClock.BlockUntilContext(ctx, 1) + assertConfigFile("*.cluster1 *.cluster2 *.cluster3 *.leaf1 *.leaf2") // Kill the configurator, wait for it to return, and assert that the config // file was deleted. cancel() require.ErrorIs(t, <-errC, context.Canceled) - _, err := os.Stat(keypaths.VNetSSHConfigPath(homePath)) + _, err = os.Stat(keypaths.VNetSSHConfigPath(homePath)) require.ErrorIs(t, err, os.ErrNotExist) } diff --git a/lib/vnet/ssh_handler.go b/lib/vnet/ssh_handler.go index 02b95f548d325..1ca080d294ee9 100644 --- a/lib/vnet/ssh_handler.go +++ b/lib/vnet/ssh_handler.go @@ -59,14 +59,13 @@ func (h *sshHandler) handleTCPConnector(ctx context.Context, localPort uint16, c return trace.Wrap(err) } defer targetConn.Close() - return trace.Wrap(h.handleTCPConnectorWithTargetConn(ctx, localPort, connector, targetConn)) + return trace.Wrap(h.handleTCPConnectorWithTargetConn(ctx, connector, targetConn)) } // handleTCPConnectorWithTargetTCPConn handles an incoming TCP connection from // VNet when a TCP connection to the target host has already been established. func (h *sshHandler) handleTCPConnectorWithTargetConn( ctx context.Context, - localPort uint16, connector func() (net.Conn, error), targetConn net.Conn, ) error { diff --git a/lib/vnet/ssh_provider.go b/lib/vnet/ssh_provider.go index dd964b4967f04..1c54bab4048a3 100644 --- a/lib/vnet/ssh_provider.go +++ b/lib/vnet/ssh_provider.go @@ -17,6 +17,7 @@ package vnet import ( + "cmp" "context" "crypto/tls" "crypto/x509" @@ -241,18 +242,15 @@ type dialTarget struct { } func computeDialTarget(matchedCluster *vnetv1.MatchedCluster, fqdn string) dialTarget { - targetCluster := matchedCluster.GetRootCluster() - targetHost := strings.TrimSuffix(fqdn, "."+matchedCluster.GetRootCluster()+".") - leafCluster := matchedCluster.GetLeafCluster() - if leafCluster != "" { - targetCluster = leafCluster - targetHost = strings.TrimSuffix(targetHost, "."+leafCluster) - } + // matchedCluster.LeafCluster will be set if the host was in a leaf + // cluster, else it will be unset and the target cluster is the root. + targetCluster := cmp.Or(matchedCluster.GetLeafCluster(), matchedCluster.GetRootCluster()) + targetHost := strings.TrimSuffix(fqdn, "."+fullyQualify(targetCluster)) return dialTarget{ fqdn: fqdn, profile: matchedCluster.GetProfile(), rootCluster: matchedCluster.GetRootCluster(), - leafCluster: leafCluster, + leafCluster: matchedCluster.GetLeafCluster(), cluster: targetCluster, hostname: targetHost, addr: targetHost + ":0", diff --git a/lib/vnet/tcp_handler_resolver.go b/lib/vnet/tcp_handler_resolver.go index 47791a6b4e30c..7d4d0dbe16696 100644 --- a/lib/vnet/tcp_handler_resolver.go +++ b/lib/vnet/tcp_handler_resolver.go @@ -209,7 +209,7 @@ func (h *undecidedHandler) handleTCPConnector(ctx context.Context, localPort uin h.setDecidedHandler(sshHandler) // Handle the incoming connection with the TCP connection to the target // SSH node that has already been established. - return sshHandler.handleTCPConnectorWithTargetConn(ctx, localPort, connector, targetConn) + return sshHandler.handleTCPConnectorWithTargetConn(ctx, connector, targetConn) } return trace.Errorf("rejecting connection to %s:%d", h.cfg.fqdn, localPort) } diff --git a/lib/vnet/user_process.go b/lib/vnet/user_process.go index 48eb3dbb38595..dc84f2b6c2f2c 100644 --- a/lib/vnet/user_process.go +++ b/lib/vnet/user_process.go @@ -116,6 +116,7 @@ func RunUserProcess(ctx context.Context, clientApplication ClientApplication) (* processManager, processCtx := newProcessManager() sshConfigurator := newSSHConfigurator(sshConfiguratorConfig{ clientApplication: clientApplication, + leafClusterCache: leafClusterCache, }) processManager.AddCriticalBackgroundTask("SSH configuration loop", func() error { return trace.Wrap(sshConfigurator.runConfigurationLoop(processCtx)) diff --git a/lib/vnet/vnet_test.go b/lib/vnet/vnet_test.go index 9a1a8f47d9801..4feb8a8b95a9f 100644 --- a/lib/vnet/vnet_test.go +++ b/lib/vnet/vnet_test.go @@ -750,20 +750,16 @@ func TestDialFakeApp(t *testing.T) { clusters: map[string]testClusterSpec{ "root1.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1.root1.example.com"}, - appSpec{publicAddr: "echo2.root1.example.com"}, - appSpec{publicAddr: "echo.myzone.example.com"}, - appSpec{publicAddr: "echo.nested.myzone.example.com"}, - appSpec{publicAddr: "not.in.a.custom.zone"}, - appSpec{ + {publicAddr: "echo1.root1.example.com"}, + {publicAddr: "echo2.root1.example.com"}, + {publicAddr: "echo.myzone.example.com"}, + {publicAddr: "echo.nested.myzone.example.com"}, + {publicAddr: "not.in.a.custom.zone"}, + { publicAddr: "multi-port.root1.example.com", tcpPorts: []*types.PortRange{ - &types.PortRange{ - Port: 1337, - }, - &types.PortRange{ - Port: 4242, - }, + {Port: 1337}, + {Port: 4242}, }, }, }, @@ -774,36 +770,32 @@ func TestDialFakeApp(t *testing.T) { leafClusters: map[string]testClusterSpec{ "leaf1.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1.leaf1.example.com"}, - appSpec{ + {publicAddr: "echo1.leaf1.example.com"}, + { publicAddr: "multi-port.leaf1.example.com", tcpPorts: []*types.PortRange{ - &types.PortRange{ - Port: 1337, - }, - &types.PortRange{ - Port: 4242, - }, + {Port: 1337}, + {Port: 4242}, }, }, }, }, "leaf2.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1.leaf2.example.com"}, + {publicAddr: "echo1.leaf2.example.com"}, }, }, }, }, "root2.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1.root2.example.com"}, - appSpec{publicAddr: "echo2.root2.example.com"}, + {publicAddr: "echo1.root2.example.com"}, + {publicAddr: "echo2.root2.example.com"}, }, leafClusters: map[string]testClusterSpec{ "leaf3.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1.leaf3.example.com"}, + {publicAddr: "echo1.leaf3.example.com"}, }, }, }, @@ -1045,7 +1037,7 @@ func TestOnNewConnection(t *testing.T) { clusters: map[string]testClusterSpec{ "root1.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1"}, + {publicAddr: "echo1"}, }, cidrRange: "192.168.2.0/24", leafClusters: map[string]testClusterSpec{}, @@ -1106,15 +1098,15 @@ func testWithAlgorithmSuite(t *testing.T, suite types.SignatureAlgorithmSuite) { clusters: map[string]testClusterSpec{ "root.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1"}, - appSpec{publicAddr: "echo2"}, + {publicAddr: "echo1"}, + {publicAddr: "echo2"}, }, cidrRange: "192.168.2.0/24", leafClusters: map[string]testClusterSpec{ "leaf.example.com": { apps: []appSpec{ - appSpec{publicAddr: "echo1"}, - appSpec{publicAddr: "echo2"}, + {publicAddr: "echo1"}, + {publicAddr: "echo2"}, }, cidrRange: "192.168.2.0/24", }, @@ -1297,7 +1289,7 @@ func TestSSH(t *testing.T) { }, { // Connection to node in leaf cluster should work. - dialAddr: "node.leaf1.example.com.root1.example.com", + dialAddr: "node.leaf1.example.com", dialPort: 22, expectCIDR: leaf1CIDR, sshUser: "testuser", @@ -1315,7 +1307,7 @@ func TestSSH(t *testing.T) { { // Connection to node in leaf cluster in alternate profile should // work. - dialAddr: "node.leaf2.example.com.root2.example.com", + dialAddr: "node.leaf2.example.com", dialPort: 22, expectCIDR: leaf2CIDR, sshUser: "testuser", diff --git a/rfd/0207-vnet-ssh.md b/rfd/0207-vnet-ssh.md index a78cf3c5fe8dc..66ebabf392673 100644 --- a/rfd/0207-vnet-ssh.md +++ b/rfd/0207-vnet-ssh.md @@ -7,9 +7,9 @@ state: draft ## Required Approvers -* Engineering: @espadolini && @rosstimothy -* Security: doyensec -* Product: @klizhentas +- Engineering: @espadolini && @rosstimothy +- Security: doyensec +- Product: @klizhentas ## What @@ -24,7 +24,7 @@ Advanced Teleport features like per-session MFA and hardware keys will be fully supported. Here's a demo with a proof-of-concept of the feature in action: -https://goteleport.zoom.us/clips/share/3xSvI4taSD6YgM1C0l12nQ + ## Why @@ -197,13 +197,11 @@ we already have an SSH forwarding server implemented in `lib/srv/forward/sshserv VNet SSH will support SSH connections to DNS names matching any of the following: - `.` -- `..` - `.` -- `..` These are the same patterns supported by our existing OpenSSH client integration, which will offer a seamless transition for users switching to VNet SSH -https://goteleport.com/docs/enroll-resources/server-access/openssh/openssh-agentless/#step-23-generate-an-ssh-client-configuration + If users prefer to use a shorter name to connect to SSH hosts, they can add a `CanonicalDomains` option to their `~/.ssh/config` file, e.g. @@ -236,7 +234,7 @@ If the user runs `tsh vnet` instead of Connect, we won't add anything to `vnet_ssh_config` will include the following: ``` -Host *.teleport.example.com *.leaf.teleport.example.com +Host *.root.example.com *.leaf.example.com IdentityFile "/Users/nic/Library/Application Support/tsh/id_vnet" GlobalKnownHostsFile "/Users/nic/Library/Application Support/tsh/vnet_known_hosts" UserKnownHostsFile /dev/null @@ -299,9 +297,8 @@ When the VNet process receives a DNS query this is how it will be resolved: 1. If it matches a web app the DNS request will be forwarded to upstream DNS servers (this is also as it implicitly works today, now we'll do it explicitly to skip assigning a VNet IP for web apps). -1. If the name does not match `*.` or - `*..` for any profile, the request will - be forwarded to upstream DNS servers. +1. If the name does not match `*.` or for any cluster, the + request will be forwarded to upstream DNS servers. 1. VNet will assign a free IP address to the FQDN, but at this point it will not know if this IP will later resolve to an SSH host or an app or neither. 1. VNet will return the IP address in an authoritative DNS answer. @@ -310,6 +307,7 @@ When the VNet process receives a DNS query this is how it will be resolved: When the VNet process receives a TCP connection at an address that has been assigned to an FQDN but does not yet know if there is a matching app or SSH host: + 1. An app lookup will run first in case an app has been added since the DNS query that assigned this IP. If the queried FQDN matches a TCP app then the IP will be permanently