diff --git a/config/recipes/elastic-agent/README.asciidoc b/config/recipes/elastic-agent/README.asciidoc index c5ceb57e1f5..fae16c7ab0c 100644 --- a/config/recipes/elastic-agent/README.asciidoc +++ b/config/recipes/elastic-agent/README.asciidoc @@ -33,7 +33,9 @@ Deploys Elastic Agent as a DaemonSet in Fleet mode with System and Kubernetes in ===== System and Kubernetes integrations running as non-root - `fleet-kubernetes-integration-nonroot.yaml` -The provided example is functionally identical to the previous section but runs the Elastic Agent processes (both the Elastic Agent running as the Fleet server and the Elastic Agent connected to Fleet) as a non-root user by utilizing a DaemonSet to ensure directory and file permissions. *Note* The DaemonSet itself must run as root to set up permissions and ECK >= 2.10.0 is required. +The provided example is functionally identical to the previous section but runs the Elastic Agent processes (both the Elastic Agent running as the Fleet server and the Elastic Agent connected to Fleet) as a non-root user by using a DaemonSet to ensure directory and file permissions. The init-container approach remains useful for security-conscious users who want to keep `securityContext.allowPrivilegeEscalation: false` in the main Agent pods and avoid granting additional Linux capabilities. This approach is also required when Elastic Agent is < 8.16.0. The DaemonSet itself must run as root to set up permissions and ECK >= 2.10.0 is required. + +Starting with Elastic Agent 8.16.0, another rootless approach is available without the init container: set `securityContext.allowPrivilegeEscalation: true` and grant `CAP_CHOWN` when the agent state is persisted on hostPath. `CAP_SETPCAP` is recommended so Elastic Agent can propagate needed capabilities to its child processes. These settings are enabled by default in most container runtimes, but if you explicitly drop all capabilities (for example, `capabilities.drop: ["ALL"]`) you must add them back. If you are using `emptyDir` for state and your Agent components do not need extra capabilities, you can run rootless with privilege escalation disabled and all capabilities dropped. ===== Custom logs integration with autodiscover - `fleet-custom-logs-integration.yaml` diff --git a/config/recipes/elastic-agent/fleet-kubernetes-integration-nonroot.yaml b/config/recipes/elastic-agent/fleet-kubernetes-integration-nonroot.yaml index 26b237686d6..477c5c7da6d 100644 --- a/config/recipes/elastic-agent/fleet-kubernetes-integration-nonroot.yaml +++ b/config/recipes/elastic-agent/fleet-kubernetes-integration-nonroot.yaml @@ -1,4 +1,8 @@ --- +# This example uses an init container to adjust hostPath permissions so Agent can run as non-root. +# Use this when you want `allowPrivilegeEscalation: false` in the main Agent pods or when +# running Elastic Agent versions < v8.16.0. For the 8.16+ rootless alternative without an +# init container, see fleet-kubernetes-integration.yaml (requires allowPrivilegeEscalation and CAP_CHOWN). apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/config/recipes/elastic-agent/fleet-kubernetes-integration.yaml b/config/recipes/elastic-agent/fleet-kubernetes-integration.yaml index d3683bba69d..f3d29646d2c 100644 --- a/config/recipes/elastic-agent/fleet-kubernetes-integration.yaml +++ b/config/recipes/elastic-agent/fleet-kubernetes-integration.yaml @@ -80,8 +80,18 @@ spec: spec: serviceAccountName: fleet-server automountServiceAccountToken: true - securityContext: - runAsUser: 0 + # Since Elastic Agent v8.16.0, the runAsUser: 0 is not needed + # as Agent changes ownership of the data directory to the container user id. + # For persisted state on hostPath, rootless operation requires privilege escalation + # and CAP_CHOWN (CAP_SETPCAP helps Elastic Agent pass capabilities to child processes). + # These are defaults in most runtimes, but if you set drop: ["ALL"], add them back. + # securityContext: + # allowPrivilegeEscalation: true + # capabilities: + # add: ["CHOWN", "SETPCAP"] + # If legacy behavior is required: + # securityContext: + # runAsUser: 0 --- apiVersion: agent.k8s.elastic.co/v1alpha1 kind: Agent @@ -114,8 +124,18 @@ spec: hostNetwork: true dnsPolicy: ClusterFirstWithHostNet automountServiceAccountToken: true - securityContext: - runAsUser: 0 + # Since Elastic Agent v8.16.0, the runAsUser: 0 is not needed + # as Agent changes ownership of the data directory to the container user id. + # For persisted state on hostPath, rootless operation requires privilege escalation + # and CAP_CHOWN (CAP_SETPCAP helps Elastic Agent pass capabilities to child processes). + # These are defaults in most runtimes, but if you set drop: ["ALL"], add them back. + # securityContext: + # allowPrivilegeEscalation: true + # capabilities: + # add: ["CHOWN", "SETPCAP"] + # If legacy behavior is required: + # securityContext: + # runAsUser: 0 --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/test/e2e/agent/recipes_test.go b/test/e2e/agent/recipes_test.go index fa6a6a7ffa7..8d2954d1748 100644 --- a/test/e2e/agent/recipes_test.go +++ b/test/e2e/agent/recipes_test.go @@ -124,68 +124,6 @@ func TestFleetKubernetesIntegrationRecipe(t *testing.T) { runAgentRecipe(t, "fleet-kubernetes-integration.yaml", customize) } -func TestFleetKubernetesNonRootIntegrationRecipe(t *testing.T) { - v := version.MustParse(test.Ctx().ElasticStackVersion) - - if (v.GE(version.MinFor(7, 17, 28)) && v.LT(version.MinFor(8, 0, 0))) || - (v.GE(version.MinFor(8, 1, 3)) && v.LT(version.MinFor(8, 2, 0))) { - t.Skipf("Skipped as version %s is affected by https://github.com/elastic/kibana/pull/236788", v) - } - - // https://github.com/elastic/cloud-on-k8s/issues/6331 - if v.LT(version.MinFor(8, 7, 0)) && v.GE(version.MinFor(8, 6, 0)) { - t.SkipNow() - } - - if (v.GE(version.MinFor(9, 0, 1)) && v.LE(version.MinFor(9, 0, 4))) || - (v.EQ(version.From(9, 1, 0))) { - t.Skipf("Skipped as version %s is affected by https://github.com/elastic/kibana/pull/230211", v) - } - - // Do not test between 9.1.0 and 9.1.5 due to broken ssl settings in Kibana, see https://github.com/elastic/cloud-on-k8s/issues/8820 - if v.GE(version.From(9, 1, 0)) && v.LT(version.From(9, 1, 5)) { - t.Skipf("Skipped as version %s is affected by https://github.com/elastic/kibana/issues/233780", v) - } - - // The recipe does not work fully within an openshift cluster without modifications. - if test.Ctx().OcpCluster { - t.SkipNow() - } - - customize := func(builder agent.Builder) agent.Builder { - if !builder.Agent.Spec.FleetServerEnabled { - return builder - } - - return builder. - WithFleetAgentDataStreamsValidation(). - // TODO API server should generate event in time but on kind we see repeatedly no metrics being reported in time - // see https://github.com/elastic/cloud-on-k8s/issues/4092 - // WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.apiserver", "k8s")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.container", "default")). - // Might not generate an event in time for this check to succeed in all environments - // WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.event", "k8s")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.node", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.pod", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.proxy", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.system", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "kubernetes.volume", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.cpu", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.diskio", "default")). - // to be reinstated once https://github.com/elastic/beats/issues/30590 is addressed - // WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.fsstat", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.load", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.memory", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.network", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.process", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.process.summary", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.socket_summary", "default")). - WithDefaultESValidation(agent.HasWorkingDataStream(agent.MetricsType, "system.uptime", "default")) - } - - runAgentRecipe(t, "fleet-kubernetes-integration-nonroot.yaml", customize) -} - func TestFleetCustomLogsIntegrationRecipe(t *testing.T) { v := version.MustParse(test.Ctx().ElasticStackVersion) diff --git a/test/e2e/test/helper/yaml.go b/test/e2e/test/helper/yaml.go index 84d235cebda..be46f15ce9d 100644 --- a/test/e2e/test/helper/yaml.go +++ b/test/e2e/test/helper/yaml.go @@ -383,12 +383,8 @@ func transformToE2E(namespace, fullTestName, suffix string, transformers []Build decodedObj.Namespace = namespace decodedObj.Name = decodedObj.Name + "-" + suffix case *appsv1.DaemonSet: - name := decodedObj.Name + "-" + suffix decodedObj.Namespace = namespace - decodedObj.Name = name - decodedObj.Spec.Selector.MatchLabels["app.kubernetes.io/instance"] = name - decodedObj.Spec.Template.ObjectMeta.Labels["app.kubernetes.io/instance"] = name - maybeMutateForAgentNonRootTests(decodedObj, namespace, suffix) + decodedObj.Name = decodedObj.Name + "-" + suffix } if builder != nil { @@ -413,30 +409,6 @@ func transformToE2E(namespace, fullTestName, suffix string, transformers []Build return builders, otherObjects } -// maybeMutateForAgentNonRootTests will possibly mutate the given daemonset when -// running tests for Elastic Agent running as non-root. This is required as the -// directories depend on both the namespace and the random suffix of the e2e tests. -func maybeMutateForAgentNonRootTests(ds *appsv1.DaemonSet, namespace, suffix string) { - for i, init := range ds.Spec.Template.Spec.InitContainers { - if init.Name == "manage-agent-hostpath-permissions" { - for j, cmd := range ds.Spec.Template.Spec.InitContainers[i].Command { - updatedCmd := strings.Replace( - cmd, - "/var/lib/elastic-agent/default/elastic-agent/state", - fmt.Sprintf("/var/lib/elastic-agent/%s/elastic-agent-%s/state", namespace, suffix), - 1, - ) - ds.Spec.Template.Spec.InitContainers[i].Command[j] = strings.Replace( - updatedCmd, - "/var/lib/elastic-agent/default/fleet-server/state", - fmt.Sprintf("/var/lib/elastic-agent/%s/fleet-server-%s/state", namespace, suffix), - 1, - ) - } - } - } -} - // sortBuilders mutates the given builder slice to sort them by test priority: // Elasticsearch > Kibana > APMServer > Enterprise Search > Beats // The underlying goal is, for example, to ensure Elasticsearch is available before we start testing Beats. @@ -522,48 +494,6 @@ func tweakConfigLiterals(config *commonv1.Config, suffix string, namespace strin } } - fleetOutputsKey := "xpack.fleet.outputs" - - // This is only used when testing Agent+Fleet running as non-root. (config/recipes/elastic-agent/fleet-kubernetes-integration-nonroot.yaml) - // - // Adjust the Kibana's spec.config.xpack.fleet.outputs section to both - // 1. Point to the valid Elasticsearch instance with suffix + namespace being random - // 2. Point to the valid mounted Elasticsearch CA with a random suffix + namespace in the mount path. - if untypedOutputs, ok := data[fleetOutputsKey]; ok { //nolint:nestif - if untypedXpackOutputsSlice, ok := untypedOutputs.([]any); ok { - for _, untypedOutputMap := range untypedXpackOutputsSlice { - if outputMap, ok := untypedOutputMap.(map[string]any); ok { - if outputMap["id"] == "eck-fleet-agent-output-elasticsearch" { - if outputSlice, ok := outputMap["hosts"].([]any); ok { - for j, untypedHost := range outputSlice { - if host, ok := untypedHost.(string); ok { - outputSlice[j] = strings.ReplaceAll( - host, - "elasticsearch-es-http.default", - fmt.Sprintf("elasticsearch-%s-es-http.%s", suffix, namespace), - ) - } - } - } - if untypedSSL, ok := outputMap["ssl"].(map[string]any); ok { - if untypedCAs, ok := untypedSSL["certificate_authorities"].([]any); ok { - for k, untypedCA := range untypedCAs { - if ca, ok := untypedCA.(string); ok { - untypedCAs[k] = strings.ReplaceAll( - ca, - "elasticsearch-association/default/elasticsearch/", - fmt.Sprintf("elasticsearch-association/%s/elasticsearch-%s/", namespace, suffix), - ) - } - } - } - } - } - } - } - } - } - return data }