From 5ba0256b3b779a7a3c0f189de931766d56cb278f Mon Sep 17 00:00:00 2001 From: Tiago Silva Date: Fri, 28 Jun 2024 10:14:12 +0100 Subject: [PATCH] kube: resolve Kubernetes cluster domain (#43584) This PR enhances teleport's intelligence by no longer assuming the cluster domain is `cluster.local`. Since many clusters use a non-default cluster domain, this assumption can disrupt app discovery access. Fixes #39007 Signed-off-by: Tiago Silva --- .../zz_generated.teleport-kube-agent.mdx | 11 ++++ .../templates/deployment.yaml | 4 ++ .../__snapshot__/deployment_test.yaml.snap | 60 +++++++++++++++++++ .../chart/teleport-kube-agent/values.yaml | 6 ++ lib/services/app.go | 33 ++++++++-- lib/services/app_test.go | 43 +++++++++++-- lib/srv/discovery/fetchers/kube_services.go | 12 +--- 7 files changed, 147 insertions(+), 22 deletions(-) diff --git a/docs/pages/reference/helm-reference/includes/zz_generated.teleport-kube-agent.mdx b/docs/pages/reference/helm-reference/includes/zz_generated.teleport-kube-agent.mdx index d73c3cf505241..68cbd79bb65e3 100644 --- a/docs/pages/reference/helm-reference/includes/zz_generated.teleport-kube-agent.mdx +++ b/docs/pages/reference/helm-reference/includes/zz_generated.teleport-kube-agent.mdx @@ -198,6 +198,17 @@ Once `appResources` is set, you can dynamically register application with `tsh` by following [the Dynamic App Registration guide](../../../application-access/guides/dynamic-registration.mdx). +## `clusterDomain` + +| Type | Default | +|------|---------| +| `string` | `"cluster.local"` | + +`clusterDomain` sets the domain name used by the Kubernetes cluster. This value is used to build the +FQDN application URIs. For example, if the cluster domain is `anything.local`, the agent will proxy the application +`myapp` running in the `default` namespace at `http://myapp.default.svc.anything.local`. You must manually set this value +to match your cluster domain if it is different from the default value `cluster.local`. + ## `awsDatabases` | Type | Default | diff --git a/examples/chart/teleport-kube-agent/templates/deployment.yaml b/examples/chart/teleport-kube-agent/templates/deployment.yaml index 3b57789c0f33f..d9c7a92e477ec 100644 --- a/examples/chart/teleport-kube-agent/templates/deployment.yaml +++ b/examples/chart/teleport-kube-agent/templates/deployment.yaml @@ -136,6 +136,10 @@ spec: - name: TELEPORT_EXT_UPGRADER_VERSION value: {{ include "teleport-kube-agent.version" . }} {{- end }} + {{- if .Values.clusterDomain }} + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: {{ .Values.clusterDomain | quote }} + {{- end }} {{- if (gt (len .Values.extraEnv) 0) }} {{- toYaml .Values.extraEnv | nindent 8 }} {{- end }} diff --git a/examples/chart/teleport-kube-agent/tests/__snapshot__/deployment_test.yaml.snap b/examples/chart/teleport-kube-agent/tests/__snapshot__/deployment_test.yaml.snap index 86a3974acd1ff..fa1936bc7e4c4 100644 --- a/examples/chart/teleport-kube-agent/tests/__snapshot__/deployment_test.yaml.snap +++ b/examples/chart/teleport-kube-agent/tests/__snapshot__/deployment_test.yaml.snap @@ -30,6 +30,8 @@ sets Deployment annotations when specified if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -103,6 +105,8 @@ sets Deployment labels when specified if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -163,6 +167,8 @@ sets Pod annotations when specified if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -223,6 +229,8 @@ sets Pod labels when specified if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -300,6 +308,8 @@ should add emptyDir for data when existingDataVolume is not set if action is Upg env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -361,6 +371,8 @@ should add insecureSkipProxyTLSVerify to args when set in values if action is Up env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -421,6 +433,8 @@ should correctly configure existingDataVolume when set if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -479,6 +493,8 @@ should expose diag port if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -551,6 +567,8 @@ should have multiple replicas when replicaCount is set (using .replicaCount, dep env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -623,6 +641,8 @@ should have multiple replicas when replicaCount is set (using highAvailability.r env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -683,6 +703,8 @@ should have one replica when replicaCount is not set if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -743,6 +765,8 @@ should mount extraVolumes and extraVolumeMounts if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -808,6 +832,8 @@ should mount tls.existingCASecretName and set environment when set in values if env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local - name: SSL_CERT_FILE value: /etc/teleport-tls-ca/ca.pem image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 @@ -876,6 +902,8 @@ should mount tls.existingCASecretName and set extra environment when set in valu env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local - name: HTTPS_PROXY value: http://username:password@my.proxy.host:3128 - name: SSL_CERT_FILE @@ -946,6 +974,8 @@ should provision initContainer correctly when set in values if action is Upgrade env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1042,6 +1072,8 @@ should set SecurityContext if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1122,6 +1154,8 @@ should set affinity when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1182,6 +1216,8 @@ should set default serviceAccountName when not set in values if action is Upgrad env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1253,6 +1289,8 @@ should set environment when extraEnv set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local - name: HTTPS_PROXY value: http://username:password@my.proxy.host:3128 image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 @@ -1315,6 +1353,8 @@ should set image and tag correctly if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:12.2.1 imagePullPolicy: IfNotPresent livenessProbe: @@ -1375,6 +1415,8 @@ should set imagePullPolicy when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: Always livenessProbe: @@ -1435,6 +1477,8 @@ should set nodeSelector if set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1497,6 +1541,8 @@ should set not set priorityClassName when not set in values if action is Upgrade env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1569,6 +1615,8 @@ should set preferred affinity when more than one replica is used if action is Up env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1629,6 +1677,8 @@ should set priorityClassName when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1690,6 +1740,8 @@ should set probeTimeoutSeconds when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1760,6 +1812,8 @@ should set required affinity when highAvailability.requireAntiAffinity is set if env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1820,6 +1874,8 @@ should set resources when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1887,6 +1943,8 @@ should set serviceAccountName when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: @@ -1947,6 +2005,8 @@ should set tolerations when set in values if action is Upgrade: env: - name: TELEPORT_INSTALL_METHOD_HELM_KUBE_AGENT value: "true" + - name: TELEPORT_KUBE_CLUSTER_DOMAIN + value: cluster.local image: public.ecr.aws/gravitational/teleport-distroless:15.4.6 imagePullPolicy: IfNotPresent livenessProbe: diff --git a/examples/chart/teleport-kube-agent/values.yaml b/examples/chart/teleport-kube-agent/values.yaml index 2a03a062c9ce6..c753fe3da7043 100644 --- a/examples/chart/teleport-kube-agent/values.yaml +++ b/examples/chart/teleport-kube-agent/values.yaml @@ -162,6 +162,12 @@ apps: [] # appResources: [] +# clusterDomain(string) -- sets the domain name used by the Kubernetes cluster. This value is used to build the +# FQDN application URIs. For example, if the cluster domain is `anything.local`, the agent will proxy the application +# `myapp` running in the `default` namespace at `http://myapp.default.svc.anything.local`. You must manually set this value +# to match your cluster domain if it is different from the default value `cluster.local`. +clusterDomain: "cluster.local" + ################################################################ # Values that must be provided if Database access is enabled. ################################################################ diff --git a/lib/services/app.go b/lib/services/app.go index 96aa219a0af6b..85123ffbab35d 100644 --- a/lib/services/app.go +++ b/lib/services/app.go @@ -22,7 +22,9 @@ import ( "context" "fmt" "net/url" + "os" "strings" + "sync" "github.com/gravitational/trace" corev1 "k8s.io/api/core/v1" @@ -168,7 +170,7 @@ func UnmarshalAppServer(data []byte, opts ...MarshalOption) (types.AppServer, er // It transforms service fields and annotations into appropriate Teleport app fields. // Service labels are copied to app labels. func NewApplicationFromKubeService(service corev1.Service, clusterName, protocol string, port corev1.ServicePort) (types.Application, error) { - appURI := buildAppURI(protocol, getServiceFQDN(service), port.Port) + appURI := buildAppURI(protocol, GetServiceFQDN(service), port.Port) rewriteConfig, err := getAppRewriteConfig(service.GetAnnotations()) if err != nil { @@ -202,14 +204,15 @@ func NewApplicationFromKubeService(service corev1.Service, clusterName, protocol return app, nil } -func getServiceFQDN(s corev1.Service) string { +// GetServiceFQDN returns the fully qualified domain name for the service. +func GetServiceFQDN(service corev1.Service) string { // If service type is ExternalName it points to external DNS name, to keep correct // HOST for HTTP requests we return already final external DNS name. // https://kubernetes.io/docs/concepts/services-networking/service/#externalname - if s.Spec.Type == corev1.ServiceTypeExternalName { - return s.Spec.ExternalName + if service.Spec.Type == corev1.ServiceTypeExternalName { + return service.Spec.ExternalName } - return fmt.Sprintf("%s.%s.svc.cluster.local", s.GetName(), s.GetNamespace()) + return fmt.Sprintf("%s.%s.svc.%s", service.GetName(), service.GetNamespace(), clusterDomainResolver()) } func buildAppURI(protocol, serviceFQDN string, port int32) string { @@ -280,3 +283,23 @@ func getAppLabels(serviceLabels map[string]string, clusterName string) (map[stri return result, nil } + +var ( + // clusterDomainResolver is a function that resolves the cluster domain once and caches the result. + // It's used to lazily resolve the cluster domain from the env var "TELEPORT_KUBE_CLUSTER_DOMAIN" or fallback to + // a default value. + // It's only used when agent is running in the Kubernetes cluster. + clusterDomainResolver = sync.OnceValue[string](getClusterDomain) +) + +const ( + // teleportKubeClusterDomain is the environment variable that specifies the cluster domain. + teleportKubeClusterDomain = "TELEPORT_KUBE_CLUSTER_DOMAIN" +) + +func getClusterDomain() string { + if envDomain := os.Getenv(teleportKubeClusterDomain); envDomain != "" { + return envDomain + } + return "cluster.local" +} diff --git a/lib/services/app_test.go b/lib/services/app_test.go index eb21002a27959..dc79a7426a65f 100644 --- a/lib/services/app_test.go +++ b/lib/services/app_test.go @@ -137,20 +137,51 @@ func TestGetAppName(t *testing.T) { } } +func TestGetKubeClusterDomain(t *testing.T) { + t.Setenv("KUBERNETES_SERVICE_HOST", "k8s") + tests := []struct { + name string + envVar string + expected string + }{ + { + name: "service1 fallback to cluster.local", + expected: "cluster.local", + }, + { + name: "service1 dns resolution", + envVar: "k8s.com", + expected: "k8s.com", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.envVar != "" { + t.Setenv("TELEPORT_KUBE_CLUSTER_DOMAIN", tt.envVar) + } + require.Equal(t, tt.expected, getClusterDomain()) + }) + } +} + func TestGetServiceFQDN(t *testing.T) { tests := []struct { name string + serviceName string namespace string externalName string expected string }{ { - name: "service1", - namespace: "ns1", - expected: "service1.ns1.svc.cluster.local", + name: "service1 fallback to cluster.local", + serviceName: "service1", + namespace: "ns1", + expected: "service1.ns1.svc.cluster.local", }, { name: "service2", + serviceName: "service2", externalName: "external-service2", namespace: "ns2", expected: "external-service2", @@ -158,10 +189,10 @@ func TestGetServiceFQDN(t *testing.T) { } for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { + t.Run(tt.name, func(t *testing.T) { service := v1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: tt.name, + Name: tt.serviceName, Namespace: tt.namespace, }, Spec: v1.ServiceSpec{ @@ -171,7 +202,7 @@ func TestGetServiceFQDN(t *testing.T) { if tt.externalName != "" { service.Spec.Type = v1.ServiceTypeExternalName } - require.Equal(t, tt.expected, getServiceFQDN(service)) + require.Equal(t, tt.expected, GetServiceFQDN(service)) }) } } diff --git a/lib/srv/discovery/fetchers/kube_services.go b/lib/srv/discovery/fetchers/kube_services.go index 2288695176da7..8a7be3086c540 100644 --- a/lib/srv/discovery/fetchers/kube_services.go +++ b/lib/srv/discovery/fetchers/kube_services.go @@ -192,7 +192,7 @@ func (f *KubeAppFetcher) Get(ctx context.Context) (types.ResourcesWithLabels, er case protoHTTPS, protoHTTP, protoTCP: portProtocols[port] = protocolAnnotation default: - if p := autoProtocolDetection(getServiceFQDN(service), port, f.ProtocolChecker); p != protoTCP { + if p := autoProtocolDetection(services.GetServiceFQDN(service), port, f.ProtocolChecker); p != protoTCP { portProtocols[port] = p } } @@ -274,16 +274,6 @@ func autoProtocolDetection(serviceFQDN string, port v1.ServicePort, pc ProtocolC return protoTCP } -func getServiceFQDN(s v1.Service) string { - // If service type is ExternalName it points to external DNS name, to keep correct - // HOST for HTTP requests we return already final external DNS name. - // https://kubernetes.io/docs/concepts/services-networking/service/#externalname - if s.Spec.Type == v1.ServiceTypeExternalName { - return s.Spec.ExternalName - } - return fmt.Sprintf("%s.%s.svc.cluster.local", s.GetName(), s.GetNamespace()) -} - // ProtocolChecker is an interface used to check what protocol uri serves type ProtocolChecker interface { CheckProtocol(uri string) string