From 5ba4dca36aa82455a8ecf34bed4029ed810e5cd0 Mon Sep 17 00:00:00 2001 From: Jaime Soriano Pastor Date: Mon, 26 Apr 2021 11:47:31 +0200 Subject: [PATCH] Kubernetes autodiscover suite (#1064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a new suite to test Kubernetes autodiscover features. Reproduces some issues with short-living containers affecting Beats before 7.13 and for what we didn't have tests coverage. It allows to define the scenarios using the gherkin language for feature files. Scenarios use templates that are mostly kubernetes manifests. A simple system of options is included to allow to select configuration blocks in these templates. It uses kubectl to interact with a kubernetes cluster. If no cluster is available, it starts one with kind. Each scenario is executed in a different namespace, namespace is destroyed after the scenario is run to clean all created resources. It currently works only with loads in a single namespace, in a single node, this is enough to test many autodiscover cases, but it could be extended in the future to cover multi-node or multi-namespace scenarios. Co-authored-by: Manuel de la Peña Co-authored-by: Adam Stokes <51892+adam-stokes@users.noreply.github.com> (cherry picked from commit c52395dc53adf8ed95ff0187de82c86498d0abdc) --- .ci/.e2e-tests.yaml | 10 + .ci/Jenkinsfile | 8 +- .ci/scripts/install-helm-test-dependencies.sh | 12 +- ...bernetes-autodiscover-test-dependencies.sh | 32 ++ .pre-commit-config.yaml | 2 +- cli/config/config.go | 2 +- e2e/Makefile | 2 +- e2e/README.md | 9 +- e2e/_suites/helm/README.md | 2 +- e2e/_suites/helm/helm_charts_test.go | 4 +- e2e/_suites/kubernetes-autodiscover/README.md | 174 +++++++ .../autodiscover_test.go | 437 ++++++++++++++++++ .../features/filebeat.feature | 37 ++ .../features/heartbeat.feature | 31 ++ .../features/metricbeat.feature | 17 + .../kubernetes-autodiscover/kubernetes.go | 175 +++++++ .../kubernetes-autodiscover/testdata/kind.yml | 4 + .../testdata/templates/a-failing-pod.yml.tmpl | 13 + .../testdata/templates/a-pod.yml.tmpl | 23 + .../templates/a-short-living-cronjob.yml.tmpl | 19 + .../testdata/templates/filebeat.yml.tmpl | 143 ++++++ .../testdata/templates/heartbeat.yml.tmpl | 145 ++++++ .../testdata/templates/metricbeat.yml.tmpl | 185 ++++++++ .../testdata/templates/redis-service.yml.tmpl | 16 + .../testdata/templates/redis.yml.tmpl | 30 ++ internal/shell/shell.go | 15 +- 26 files changed, 1528 insertions(+), 19 deletions(-) create mode 100755 .ci/scripts/install-kubernetes-autodiscover-test-dependencies.sh create mode 100644 e2e/_suites/kubernetes-autodiscover/README.md create mode 100644 e2e/_suites/kubernetes-autodiscover/autodiscover_test.go create mode 100644 e2e/_suites/kubernetes-autodiscover/features/filebeat.feature create mode 100644 e2e/_suites/kubernetes-autodiscover/features/heartbeat.feature create mode 100644 e2e/_suites/kubernetes-autodiscover/features/metricbeat.feature create mode 100644 e2e/_suites/kubernetes-autodiscover/kubernetes.go create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/kind.yml create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/a-failing-pod.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/a-pod.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/a-short-living-cronjob.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/filebeat.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/heartbeat.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/metricbeat.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/redis-service.yml.tmpl create mode 100644 e2e/_suites/kubernetes-autodiscover/testdata/templates/redis.yml.tmpl diff --git a/.ci/.e2e-tests.yaml b/.ci/.e2e-tests.yaml index 7a29411b23..c9613c115b 100644 --- a/.ci/.e2e-tests.yaml +++ b/.ci/.e2e-tests.yaml @@ -62,3 +62,13 @@ SUITES: tags: "integrations && redisenterprise" - name: "vSphere" tags: "integrations && vsphere" + - suite: "kubernetes-autodiscover" + platforms: + - "ubuntu-18.04" + scenarios: + - name: "kubernetes autodiscover with filebeat" + tags: "kubernetes-autodiscover && filebeat" + - name: "kubernetes autodiscover with heartbeat" + tags: "kubernetes-autodiscover && heartbeat" + - name: "kubernetes autodiscover with metricbeat" + tags: "kubernetes-autodiscover && metricbeat" diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile index 2233322266..f1ef652ca0 100644 --- a/.ci/Jenkinsfile +++ b/.ci/Jenkinsfile @@ -50,8 +50,8 @@ pipeline { string(name: 'STACK_VERSION', defaultValue: '7.x-SNAPSHOT', description: 'SemVer version of the stack to be used for the tests.') string(name: 'HELM_CHART_VERSION', defaultValue: '7.11.2', description: 'SemVer version of Helm chart to be used.') string(name: 'HELM_VERSION', defaultValue: '3.5.2', description: 'SemVer version of Helm to be used.') - string(name: 'HELM_KIND_VERSION', defaultValue: '0.10.0', description: 'SemVer version of Kind to be used.') - string(name: 'HELM_KUBERNETES_VERSION', defaultValue: '1.18.2', description: 'SemVer version of Kubernetes to be used.') + string(name: 'KIND_VERSION', defaultValue: '0.10.0', description: 'SemVer version of Kind to be used.') + string(name: 'KUBERNETES_VERSION', defaultValue: '1.18.2', description: 'SemVer version of Kubernetes to be used.') string(name: 'GITHUB_CHECK_NAME', defaultValue: '', description: 'Name of the GitHub check to be updated. Only if this build is triggered from another parent stream.') string(name: 'GITHUB_CHECK_REPO', defaultValue: '', description: 'Name of the GitHub repo to be updated. Only if this build is triggered from another parent stream.') string(name: 'GITHUB_CHECK_SHA1', defaultValue: '', description: 'Git SHA for the Beats upstream project (branch or PR)') @@ -76,8 +76,8 @@ pipeline { FORCE_SKIP_PRESUBMIT = "${params.forceSkipPresubmit}" HELM_CHART_VERSION = "${params.HELM_CHART_VERSION.trim()}" HELM_VERSION = "${params.HELM_VERSION.trim()}" - HELM_KIND_VERSION = "${params.HELM_KIND_VERSION.trim()}" - HELM_KUBERNETES_VERSION = "${params.HELM_KUBERNETES_VERSION.trim()}" + KIND_VERSION = "${params.KIND_VERSION.trim()}" + KUBERNETES_VERSION = "${params.KUBERNETES_VERSION.trim()}" LOG_LEVEL = "${params.LOG_LEVEL.trim()}" TIMEOUT_FACTOR = "${params.TIMEOUT_FACTOR.trim()}" } diff --git a/.ci/scripts/install-helm-test-dependencies.sh b/.ci/scripts/install-helm-test-dependencies.sh index 8037fc3453..f5c054d6ce 100755 --- a/.ci/scripts/install-helm-test-dependencies.sh +++ b/.ci/scripts/install-helm-test-dependencies.sh @@ -10,8 +10,8 @@ set -euxo pipefail # # Parameters: # - HELM_VERSION - that's the Helm version which will be installed and enabled. -# - HELM_KIND_VERSION - that's the Kind version which will be installed and enabled. -# - HELM_KUBERNETES_VERSION - that's the Kubernetes version which will be installed and enabled. +# - KIND_VERSION - that's the Kind version which will be installed and enabled. +# - KUBERNETES_VERSION - that's the Kubernetes version which will be installed and enabled. # MSG="parameter missing." @@ -19,20 +19,20 @@ HOME=${HOME:?$MSG} HELM_VERSION="${HELM_VERSION:-"3.5.2"}" HELM_TAR_GZ_FILE="helm-v${HELM_VERSION}-linux-amd64.tar.gz" -HELM_KIND_VERSION="v${HELM_KIND_VERSION:-"0.10.0"}" -HELM_KUBERNETES_VERSION="${HELM_KUBERNETES_VERSION:-"1.18.2"}" +KIND_VERSION="v${KIND_VERSION:-"0.10.0"}" +KUBERNETES_VERSION="${KUBERNETES_VERSION:-"1.18.2"}" HELM_CMD="${HOME}/bin/helm" KBC_CMD="${HOME}/bin/kubectl" # Install kind as a Go binary -GO111MODULE="on" go get sigs.k8s.io/kind@${HELM_KIND_VERSION} +GO111MODULE="on" go get sigs.k8s.io/kind@${KIND_VERSION} mkdir -p "${HOME}/bin" "${HOME}/.kube" touch "${HOME}/.kube/config" # Install kubectl -curl -sSLo "${KBC_CMD}" "https://storage.googleapis.com/kubernetes-release/release/v${HELM_KUBERNETES_VERSION}/bin/linux/amd64/kubectl" +curl -sSLo "${KBC_CMD}" "https://storage.googleapis.com/kubernetes-release/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl" chmod +x "${KBC_CMD}" ${KBC_CMD} version --client diff --git a/.ci/scripts/install-kubernetes-autodiscover-test-dependencies.sh b/.ci/scripts/install-kubernetes-autodiscover-test-dependencies.sh new file mode 100755 index 0000000000..95ebd43169 --- /dev/null +++ b/.ci/scripts/install-kubernetes-autodiscover-test-dependencies.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +## Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +## or more contributor license agreements. Licensed under the Elastic License; +## you may not use this file except in compliance with the Elastic License. + +set -euxo pipefail +# +# Install the dependencies using the install and test make goals. +# +# Parameters: +# - KIND_VERSION - that's the Kind version which will be installed and enabled. +# - KUBERNETES_VERSION - that's the Kubernetes version which will be installed and enabled. +# + +MSG="parameter missing." +HOME=${HOME:?$MSG} + +KIND_VERSION="v${KIND_VERSION:-"0.10.0"}" +KUBERNETES_VERSION="${KUBERNETES_VERSION:-"1.18.2"}" + +KUBECTL_CMD="${HOME}/bin/kubectl" + +# Install kind as a Go binary +GO111MODULE="on" go get sigs.k8s.io/kind@${KIND_VERSION} + +mkdir -p "${HOME}/bin" + +# Install kubectl +curl -sSLo "${KUBECTL_CMD}" "https://storage.googleapis.com/kubernetes-release/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl" +chmod +x "${KUBECTL_CMD}" +${KUBECTL_CMD} version --client diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6b06ba224..623a0c91f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,6 +45,6 @@ repos: - id: check-jjbb - id: check-gherkin-lint args: [ - "--disable", "AvoidOutlineForSingleExample,TooClumsy,TooLongStep,TooManyDifferentTags,TooManySteps", + "--disable", "AvoidOutlineForSingleExample,TooClumsy,TooLongStep,TooManyDifferentTags,TooManySteps,UseBackground", "e2e/_suites/**/*.feature", ] diff --git a/cli/config/config.go b/cli/config/config.go index 4eae0ccc93..caafaa7575 100644 --- a/cli/config/config.go +++ b/cli/config/config.go @@ -128,7 +128,7 @@ func Init() { "docker", "docker-compose", } - shell.CheckInstalledSoftware(binaries) + shell.CheckInstalledSoftware(binaries...) InitConfig() } diff --git a/e2e/Makefile b/e2e/Makefile index 1b466647bc..24a96d0908 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -85,7 +85,7 @@ functional-test: install-godog .PHONY: lint lint: - @docker run -t --rm -v $(PWD):/src -w /src gherkin/lint **/*.feature --disable AvoidOutlineForSingleExample,TooClumsy,TooManySteps,TooManyDifferentTags,TooLongStep + @docker run -t --rm -v $(PWD):/src -w /src gherkin/lint **/*.feature --disable AvoidOutlineForSingleExample,TooClumsy,TooManySteps,TooManyDifferentTags,TooLongStep,UseBackground ## Test examples diff --git a/e2e/README.md b/e2e/README.md index 5b83f02b43..c79bfddc5c 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -128,8 +128,13 @@ We are going to enumerate the variables that will affect the product versions us #### Helm charts - `HELM_CHART_VERSION`. Set this environment variable to the proper version of the Helm charts to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L43 - `HELM_VERSION`. Set this environment variable to the proper version of Helm to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L44 -- `HELM_KIND_VERSION`. Set this environment variable to the proper version of Kind (Kubernetes in Docker) to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L45 -- `HELM_KUBERNETES_VERSION`. Set this environment variable to the proper version of Kubernetes to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L46 +- `KIND_VERSION`. Set this environment variable to the proper version of Kind (Kubernetes in Docker) to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L45 +- `KUBERNETES_VERSION`. Set this environment variable to the proper version of Kubernetes to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L46 + +#### Kubernetes autodiscover charts +- `BEAT_VERSION`. Set this environment variable to the proper version of the Metricbeat to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/70b1d3ddaf39567aeb4c322054b93ad7ce53e825/.ci/Jenkinsfile#L44 +- `KIND_VERSION`. Set this environment variable to the proper version of Kind (Kubernetes in Docker) to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L45 +- `KUBERNETES_VERSION`. Set this environment variable to the proper version of Kubernetes to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/0446248bae1ff604219735998841a21a7576bfdd/.ci/Jenkinsfile#L46 #### Metricbeat - `BEAT_VERSION`. Set this environment variable to the proper version of the Metricbeat to be used in the current execution. Default: See https://github.com/elastic/e2e-testing/blob/70b1d3ddaf39567aeb4c322054b93ad7ce53e825/.ci/Jenkinsfile#L44 diff --git a/e2e/_suites/helm/README.md b/e2e/_suites/helm/README.md index bb7484904d..565aed3913 100644 --- a/e2e/_suites/helm/README.md +++ b/e2e/_suites/helm/README.md @@ -35,7 +35,7 @@ This is an example of the optional configuration: # Depending on the versions used, export HELM_VERSION="3.5.2" # Helm version: for Helm v2.x.x we have to initialise Tiller right after the k8s cluster export HELM_CHART_VERSION="7.11.2" # version of the Elastic's Observability Helm charts - export HELM_KUBERNETES_VERSION="1.18.2" # version of the cluster to be passed to kind + export KUBERNETES_VERSION="1.18.2" # version of the cluster to be passed to kind ``` 3. Install dependencies. diff --git a/e2e/_suites/helm/helm_charts_test.go b/e2e/_suites/helm/helm_charts_test.go index e1f5dcdea7..b0d81b35c6 100644 --- a/e2e/_suites/helm/helm_charts_test.go +++ b/e2e/_suites/helm/helm_charts_test.go @@ -79,7 +79,7 @@ func setupSuite() { helmVersion = shell.GetEnv("HELM_VERSION", helmVersion) helmChartVersion = shell.GetEnv("HELM_CHART_VERSION", helmChartVersion) - kubernetesVersion = shell.GetEnv("HELM_KUBERNETES_VERSION", kubernetesVersion) + kubernetesVersion = shell.GetEnv("KUBERNETES_VERSION", kubernetesVersion) timeoutFactor = shell.GetEnvInteger("TIMEOUT_FACTOR", timeoutFactor) stackVersion = shell.GetEnv("STACK_VERSION", stackVersion) @@ -751,5 +751,5 @@ func toolsAreInstalled() { "helm", } - shell.CheckInstalledSoftware(binaries) + shell.CheckInstalledSoftware(binaries...) } diff --git a/e2e/_suites/kubernetes-autodiscover/README.md b/e2e/_suites/kubernetes-autodiscover/README.md new file mode 100644 index 0000000000..2f35723856 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/README.md @@ -0,0 +1,174 @@ +# Observability Helm charts End-To-End tests + +## Motivation + +Kubernetes autodiscover is a key feature of any observability solution for this +orchestrator, resources change dynamically and observers configurations have to +adapt to these changes. +Discovery of resources in Kubernetes poses some challenges, there are several +corner cases that need to be handled, that involve keeping track of changes of +state that are not always deterministic. This complicates the implementation and +its testing. With lack of good test coverage and many cases to cover, it is easy to +introduce regressions even in basic use cases. +This suite covers a set of use cases that are better tested with real Kubernetes +implementations. + +## How do the tests work? + +At the topmost level, the test framework uses a BDD framework written in Go, where +we set the expected behavior of use cases in a feature file using Gherkin, and +implementing the steps in Go code. + +`kubectl` is used to configure resources in a Kubernetes cluster. `kind` can be +used to provide a local cluster. + +The tests will follow this general high-level approach: + +1. It uses `kubectl` to interact with a Kubernetes cluster. +1. If there is no configured Kubernetes cluster in kubectl, a new one + is deployed using `kind`. If a cluster is created, it is also removed after + the suite is executed. +1. Execute BDD steps representing each scenario. Each scenario is executed in a + different Kubernetes namespace, so it is easier to cleanup and avoid one + scenarios affecting the others. These namespaces are created and destroyed + before and after each scenario. +1. New scenarios can be configured providing the Gherkin definition and + templatized Kubernetes manifests. + +### Adding new scenarios + +Scenarios defined in this suite are based in a sequence of actions and +expectations defined in the feature files, and in some templates of resources to +deploy in Kubernetes. Templates are stored in `testdata/templates`, and must +have the `.yml.tmpl` extension. + +Several of the available steps can be parameterized with template names, these +names can be written as the name of the template without the extension. Spaces +in these names are replaced with hyphens. + +There are steps intended to define a desired state for the resources in the +template, such as the following ones: +* `"filebeat" is running` deploys the template `filebeat.yml.tmpl` and waits for + filebeat pods to be running. This step expects some pod to be labeled with + `k8s-app:filebeat`. +* `"a service" is deployed` deploys the resources in the template + `a-service.yml.tmpl`, and continues without expecting any state of the + deployed resources. +* `"a pod" is deleted` deletes the resources defined in the template + `a-pod.yml.tmpl`. + +Any of these steps can be parameterized with an option that can be used to +select different configuration blocks in the template. For example the following +step would select the configuration block marked as `monitor annotations` in +the template: +```shell + `"a service" is deployed with "monitor annotations" +``` + +These option blocks can be defined in the template like this: +```yaml +meta: + annotations: +{{ if option "monitor annotations" }} + co.elastic.monitor/type: tcp + co.elastic.monitor/hosts: "${data.host}:6379" +{{ end }} +``` + +Steps defining expectations are mostly based in checking the events generated by +the deployed observers. Steps available for that are like the following ones: +* `"filebeat" collects events with "kubernetes.pod.name:a-pod"` checks that the + filebeat pod has collected at least one event with the field + `kubernetes.pod.name` set to a `pod`. +* `"metricbeat" does not collect events with "kubernetes.pod.name:a-pod" during "30s"` + expects to have a period of time of 30 seconds without collecting events with + the given field and value. + +These steps expect to find the events in the `/tmp/beats-events` file in pods marked +with the label `k8s-app`. + +There are other more specific steps. Examples for them can be found in the +feature files. + + +### Diagnosing test failures + +The first step in determining the exact failure is to try and reproduce the test run +locally, ideally using the DEBUG log level to enhance the log output. Once you've +done that, look at the output from the test run. + +### Running the tests + +1. Clone this repository, say into a folder named `e2e-testing`. + + ``` shell + git clone git@github.com:elastic/e2e-testing.git + ``` + +2. Configure the version of the tools you want to test (Optional). + +This is an example of the optional configuration: + + ```shell + # Depending on the versions used, + export BEAT_VERSION=7.12.0 # version of beats to use + export BEATS_USE_CI_SNAPSHOTS=true # to select snapshots built by beats-ci + export KUBERNETES_VERSION="1.18.2" # version of the cluster to be passed to kind + ``` + +3. Install dependencies. + + - Install Kubectl 1.18 or newer + - Install Kind 0.10.0 or newer + - Install Go: `https://golang.org/doc/install` _(The CI uses [GVM](https://github.com/andrewkroh/gvm))_ + - Install godog (from project's root directory): `make -C e2e install-godog` + +4. Run the tests. + ```shell + cd e2e/_suites/kubernetes-autodiscover + OP_LOG_LEVEL=DEBUG godog + ``` + + Optionally, you can run only one of the feature files + ```shell + cd e2e/_suites/kubernetes-autodiscover + OP_LOG_LEVEL=DEBUG godog features/filebeat.feature + ``` + + The tests will take a few minutes to run, spinning up the Kubernetes cluster + if needed. + +### Problems with the environment + +If a Kubernetes cluster is pre-configured in kubectl, you can directly use this +command to investigate the resources deployed in the cluster by the suite. If +the cluster was deployed by the suite, it will have a randomized name, and will +use a temporary configuration file for kubectl. + +The name of the cluster can be obtained with `kubectl get clusters`, clusters +created by this suite will follow the pattern `kind-`. + +The temporary configuration file is logged by the suite at the info level. If a +cluster is created by the suite, you will see something like this: +```shell +INFO[0000] Kubernetes cluster not available, will start one using kind +INFO[0000] Using kind v0.10.0 go1.15.7 linux/amd64 +INFO[0046] Kubeconfig in /tmp/test-252418601/kubeconfig +``` + +Then you could use the following command to control the resources with +`kubectl`: +```shell +kubectl --kubeconfig /tmp/test-252418601/kubeconfig ... +``` + +Each scenario creates its own namespace, you can find them with `kubectl get +ns`, they will follow the pattern `test-`. + +Interrupting the tests with Ctrl-C will leave all resources as they were, you +can use the previous instructions to investigate problems or access to logs of +the deployed pods. + +### I cannot move on + +Please open an issue here: https://github.com/elastic/e2e-testing/issues/new diff --git a/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go b/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go new file mode 100644 index 0000000000..561cfb01ae --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/autodiscover_test.go @@ -0,0 +1,437 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + "text/template" + "time" + + "github.com/cenkalti/backoff/v4" + "github.com/cucumber/godog" + messages "github.com/cucumber/messages-go/v10" + log "github.com/sirupsen/logrus" + + "github.com/elastic/e2e-testing/internal/shell" + "github.com/elastic/e2e-testing/internal/utils" +) + +const defaultBeatVersion = "8.0.0-SNAPSHOT" +const defaultEventsWaitTimeout = 120 * time.Second +const defaultDeployWaitTimeout = 120 * time.Second + +type podsManager struct { + kubectl kubernetesControl + ctx context.Context +} + +func (m *podsManager) executeTemplateFor(podName string, writer io.Writer, options []string) error { + path := filepath.Join("testdata/templates", sanitizeName(podName)+".yml.tmpl") + + usedOptions := make(map[string]bool) + funcs := template.FuncMap{ + "option": func(o string) bool { + usedOptions[o] = true + for _, option := range options { + if o == option { + return true + } + } + return false + }, + "beats_namespace": func() string { + return utils.GetDockerNamespaceEnvVar("beats") + }, + "beats_version": func() string { + return shell.GetEnv("GITHUB_CHECK_SHA1", shell.GetEnv("BEAT_VERSION", defaultBeatVersion)) + }, + "namespace": func() string { + return m.kubectl.Namespace + }, + // Can be used to add owner references so cluster-level resources + // are removed when removing the namespace. + "namespace_uid": func() string { + return m.kubectl.NamespaceUID + }, + } + + t, err := template.New(filepath.Base(path)).Funcs(funcs).ParseFiles(path) + if os.IsNotExist(err) { + log.Debugf("template %s does not exist", path) + return godog.ErrPending + } + if err != nil { + return fmt.Errorf("parsing template %s: %w", path, err) + } + + err = t.ExecuteTemplate(writer, filepath.Base(path), nil) + if err != nil { + return fmt.Errorf("executing template %s: %w", path, err) + } + + for _, option := range options { + if _, used := usedOptions[option]; !used { + log.Debugf("option '%s' is not used in template for '%s'", option, podName) + return godog.ErrPending + } + } + + return nil +} + +func (m *podsManager) isDeleted(podName string, options []string) error { + var buf bytes.Buffer + err := m.executeTemplateFor(podName, &buf, options) + if err != nil { + return err + } + + _, err = m.kubectl.RunWithStdin(m.ctx, &buf, "delete", "-f", "-") + if err != nil { + return fmt.Errorf("failed to delete '%s': %w", podName, err) + } + return nil +} + +func (m *podsManager) isDeployed(podName string, options []string) error { + var buf bytes.Buffer + err := m.executeTemplateFor(podName, &buf, options) + if err != nil { + return err + } + + _, err = m.kubectl.RunWithStdin(m.ctx, &buf, "apply", "-f", "-") + if err != nil { + return fmt.Errorf("failed to deploy '%s': %w", podName, err) + } + return nil +} + +func (m *podsManager) isRunning(podName string, options []string) error { + err := m.isDeployed(podName, options) + if err != nil { + return err + } + + ctx, cancel := context.WithTimeout(m.ctx, defaultDeployWaitTimeout) + defer cancel() + + _, err = m.getPodInstances(ctx, podName) + if err != nil { + return fmt.Errorf("waiting for instance of '%s': %w", podName, err) + } + return nil +} + +func (m *podsManager) resourceIs(podName string, state string, options ...string) error { + switch state { + case "running": + return m.isRunning(podName, options) + case "deployed": + return m.isDeployed(podName, options) + case "deleted": + return m.isDeleted(podName, options) + default: + return godog.ErrPending + } +} + +// This only works as JSON, not as YAML. +// From https://kubernetes.io/docs/concepts/workloads/pods/ephemeral-containers/#ephemeral-containers-api +const ephemeralContainerTemplate = ` +{ + "apiVersion": "v1", + "kind": "EphemeralContainers", + "metadata": { + "name": "{{ .podName }}" + }, + "ephemeralContainers": [{ + "name": "ephemeral-container", + "command": [ + "/bin/sh", "-c", + "while true; do echo Hi from an ephemeral container; sleep 1; done" + ], + "image": "busybox", + "imagePullPolicy": "IfNotPresent", + "stdin": true, + "tty": true, + "terminationMessagePolicy": "File" + }] +} +` + +func (m *podsManager) startEphemeralContainerIn(podName string) error { + podName = sanitizeName(podName) + t := template.Must(template.New("ephemeral-container").Parse(ephemeralContainerTemplate)) + var buf bytes.Buffer + err := t.Execute(&buf, map[string]string{"podName": podName}) + if err != nil { + return fmt.Errorf("executing ephemeral-container template: %w", err) + } + + path := fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/ephemeralcontainers", m.kubectl.Namespace, podName) + _, err = m.kubectl.RunWithStdin(m.ctx, &buf, "replace", "--raw", path, "-f", "-") + if err != nil { + return fmt.Errorf("failed to create ephemeral container: %w. Is EphemeralContainers feature flag enabled in the cluster?", err) + } + return nil +} + +func (m *podsManager) collectsEventsWith(podName string, condition string) error { + _, _, ok := splitCondition(condition) + if !ok { + return fmt.Errorf("invalid condition '%s'", condition) + } + + return m.waitForEventsCondition(podName, func(ctx context.Context, localPath string) (bool, error) { + ok, err := containsEventsWith(localPath, condition) + if ok { + return true, nil + } + if err != nil { + log.Debugf("Error checking if %v contains %v: %v", localPath, condition, err) + } + return false, nil + }) +} + +func (m *podsManager) doesNotCollectEvents(podName, condition, duration string) error { + _, _, ok := splitCondition(condition) + if !ok { + return fmt.Errorf("invalid condition '%s'", condition) + } + + d, err := time.ParseDuration(duration) + if err != nil { + return fmt.Errorf("invalid duration %s: %w", d, err) + } + + return m.waitForEventsCondition(podName, func(ctx context.Context, localPath string) (bool, error) { + events, err := readEventsWith(localPath, condition) + if err != nil { + return false, err + } + // No events ever received, so condition satisfied. + if len(events) == 0 { + return true, nil + } + + lastEvent := events[len(events)-1] + lastTimestamp, ok := lastEvent["@timestamp"].(string) + if !ok { + return false, fmt.Errorf("event %v doesn't contain a @timestamp", lastEvent) + } + t, err := time.Parse(time.RFC3339, lastTimestamp) + if err != nil { + return false, fmt.Errorf("failed to parse @timestamp %s: %w", lastTimestamp, err) + } + if sinceLast := time.Now().Sub(t); sinceLast <= d { + // Condition cannot be satisfied until the duration has passed after the last + // event. So wait till then. + select { + case <-ctx.Done(): + case <-time.After(d - sinceLast): + } + return false, nil + } + + return true, nil + }) +} + +func (m *podsManager) waitForEventsCondition(podName string, conditionFn func(ctx context.Context, localPath string) (bool, error)) error { + ctx, cancel := context.WithTimeout(m.ctx, defaultEventsWaitTimeout) + defer cancel() + + instances, err := m.getPodInstances(ctx, podName) + if err != nil { + return fmt.Errorf("failed to get pod name: %w", err) + } + + tmpDir, err := ioutil.TempDir(os.TempDir(), "test-") + if err != nil { + return fmt.Errorf("failed to create temporary directory: %w", err) + } + defer os.RemoveAll(tmpDir) + + containerPath := fmt.Sprintf("%s/%s:/tmp/beats-events", m.kubectl.Namespace, instances[0]) + localPath := filepath.Join(tmpDir, "events") + exp := backoff.WithContext(backoff.NewConstantBackOff(1*time.Second), ctx) + return backoff.Retry(func() error { + _, err := m.kubectl.Run(ctx, "cp", "--no-preserve", containerPath, localPath) + if err != nil { + log.Debugf("Failed to copy events from %s to %s: %s", containerPath, localPath, err) + return err + } + ok, err := conditionFn(ctx, localPath) + if !ok { + return fmt.Errorf("events do not satisfy condition") + } + return nil + }, exp) +} + +func (m *podsManager) getPodInstances(ctx context.Context, podName string) (instances []string, err error) { + app := sanitizeName(podName) + ticker := backoff.WithContext(backoff.NewConstantBackOff(1*time.Second), ctx) + err = backoff.Retry(func() error { + output, err := m.kubectl.Run(ctx, "get", "pods", + "-l", "k8s-app="+app, + "--template", `{{range .items}}{{ if eq .status.phase "Running" }}{{.metadata.name}}{{"\n"}}{{ end }}{{end}}`) + if err != nil { + return err + } + if output == "" { + return fmt.Errorf("no running pods with label k8s-app=%s found", app) + } + instances = strings.Split(strings.TrimSpace(output), "\n") + return nil + }, ticker) + return +} + +func splitCondition(c string) (key string, value string, ok bool) { + fields := strings.SplitN(c, ":", 2) + if len(fields) != 2 || len(fields[0]) == 0 { + return + } + + return fields[0], fields[1], true +} + +func flattenMap(m map[string]interface{}) map[string]interface{} { + flattened := make(map[string]interface{}) + for k, v := range m { + switch child := v.(type) { + case map[string]interface{}: + childMap := flattenMap(child) + for ck, cv := range childMap { + flattened[k+"."+ck] = cv + } + default: + flattened[k] = v + } + } + return flattened +} + +func containsEventsWith(path string, condition string) (bool, error) { + events, err := readEventsWith(path, condition) + if err != nil { + return false, err + } + return len(events) > 0, nil +} + +func readEventsWith(path string, condition string) ([]map[string]interface{}, error) { + key, value, ok := splitCondition(condition) + if !ok { + return nil, fmt.Errorf("invalid condition '%s'", condition) + } + + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("opening %s: %w", path, err) + } + defer f.Close() + + var events []map[string]interface{} + decoder := json.NewDecoder(f) + for decoder.More() { + var event map[string]interface{} + err := decoder.Decode(&event) + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("decoding event: %w", err) + } + + event = flattenMap(event) + if v, ok := event[key]; ok && fmt.Sprint(v) == value { + events = append(events, event) + } + } + + return events, nil +} + +func sanitizeName(name string) string { + return strings.ReplaceAll(strings.ToLower(name), " ", "-") +} + +func waitDuration(ctx context.Context, duration string) error { + d, err := time.ParseDuration(duration) + if err != nil { + return fmt.Errorf("invalid duration %s: %w", d, err) + } + + select { + case <-time.After(d): + case <-ctx.Done(): + } + + return nil +} + +var cluster kubernetesCluster + +func InitializeTestSuite(ctx *godog.TestSuiteContext) { + suiteContext, cancel := context.WithCancel(context.Background()) + log.DeferExitHandler(cancel) + + ctx.BeforeSuite(func() { + err := cluster.initialize(suiteContext) + if err != nil { + log.WithError(err).Fatal("Failed to initialize cluster") + } + log.DeferExitHandler(func() { + cluster.cleanup(suiteContext) + }) + }) + + ctx.AfterSuite(func() { + cluster.cleanup(suiteContext) + cancel() + }) +} + +func InitializeScenario(ctx *godog.ScenarioContext) { + scenarioCtx, cancel := context.WithCancel(context.Background()) + log.DeferExitHandler(cancel) + + var kubectl kubernetesControl + var pods podsManager + ctx.BeforeScenario(func(*messages.Pickle) { + kubectl = cluster.Kubectl().WithNamespace(scenarioCtx, "") + if kubectl.Namespace != "" { + log.Debugf("Running scenario in namespace: %s", kubectl.Namespace) + } + pods.kubectl = kubectl + pods.ctx = scenarioCtx + log.DeferExitHandler(func() { kubectl.Cleanup(scenarioCtx) }) + }) + ctx.AfterScenario(func(*messages.Pickle, error) { + kubectl.Cleanup(scenarioCtx) + cancel() + }) + + ctx.Step(`^"([^"]*)" have passed$`, func(d string) error { return waitDuration(scenarioCtx, d) }) + + ctx.Step(`^"([^"]*)" is ([a-z]*)$`, func(name, state string) error { + return pods.resourceIs(name, state) + }) + ctx.Step(`^"([^"]*)" is ([a-z]*) with "([^"]*)"$`, func(name, state, option string) error { + return pods.resourceIs(name, state, option) + }) + + ctx.Step(`^"([^"]*)" collects events with "([^"]*:[^"]*)"$`, pods.collectsEventsWith) + ctx.Step(`^"([^"]*)" does not collect events with "([^"]*)" during "([^"]*)"$`, pods.doesNotCollectEvents) + ctx.Step(`^an ephemeral container is started in "([^"]*)"$`, pods.startEphemeralContainerIn) +} diff --git a/e2e/_suites/kubernetes-autodiscover/features/filebeat.feature b/e2e/_suites/kubernetes-autodiscover/features/filebeat.feature new file mode 100644 index 0000000000..13d881401f --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/features/filebeat.feature @@ -0,0 +1,37 @@ +@kubernetes-autodiscover +@filebeat +Feature: Filebeat + Use Kubernetes autodiscover features in Filebeat to collect logs. + +Scenario: Logs collection from running pod + Given "filebeat" is running with "hints enabled" + When "a pod" is deployed + Then "filebeat" collects events with "kubernetes.pod.name:a-pod" + +Scenario: Logs collection from failing pod + Given "filebeat" is running with "hints enabled" + When "a failing pod" is deployed + Then "filebeat" collects events with "kubernetes.pod.name:a-failing-pod" + +# This scenario explicitly waits for 60 seconds before doing checks +# to be sure that at least one job has been executed. +Scenario: Logs collection from short-living cronjobs + Given "filebeat" is running with "hints enabled" + And "a short-living cronjob" is deployed + When "60s" have passed + Then "filebeat" collects events with "kubernetes.container.name:cronjob-container" + +Scenario: Logs collection from a pod with an init container + Given "filebeat" is running with "hints enabled" + When "a pod" is deployed with "init container" + Then "filebeat" collects events with "kubernetes.container.name:init-container" + And "filebeat" collects events with "kubernetes.container.name:container-in-pod" + +# Ephemeral containers need to be explicitly enabled in the API server with the +# `EphemeralContainers` feature flag. +Scenario: Logs collection from a pod with an ephemeral container + Given "filebeat" is running with "hints enabled" + And "a pod" is deployed + And "filebeat" collects events with "kubernetes.container.name:container-in-pod" + When an ephemeral container is started in "a pod" + Then "filebeat" collects events with "kubernetes.container.name:ephemeral-container" diff --git a/e2e/_suites/kubernetes-autodiscover/features/heartbeat.feature b/e2e/_suites/kubernetes-autodiscover/features/heartbeat.feature new file mode 100644 index 0000000000..a485aca889 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/features/heartbeat.feature @@ -0,0 +1,31 @@ +@kubernetes-autodiscover +@heartbeat +Feature: Heartbeat + Use Kubernetes autodiscover features in Heartbeat to monitor pods, services and nodes. + +Scenario: Monitor pod availability using hints with named ports + Given "heartbeat" is running with "hints enabled for pods" + When "redis" is deployed with "monitor annotations with named port" + Then "heartbeat" collects events with "kubernetes.pod.name:redis" + And "heartbeat" collects events with "monitor.status:up" + +Scenario: Monitor service availability using hints + Given "heartbeat" is running with "hints enabled for services" + And "redis service" is deployed with "monitor annotations" + When "redis" is running + Then "heartbeat" collects events with "kubernetes.service.name:redis" + And "heartbeat" collects events with "monitor.status:up" + And "heartbeat" does not collect events with "monitor.status:down" during "20s" + +# A service without backend pods should be reported as down. +Scenario: Monitor service unavailability using hints + Given "heartbeat" is running with "hints enabled for services" + When "redis service" is deployed with "monitor annotations" + Then "heartbeat" collects events with "kubernetes.service.name:redis" + And "heartbeat" collects events with "monitor.status:down" + And "heartbeat" does not collect events with "monitor.status:up" during "20s" + +Scenario: Monitor nodes using hints + When "heartbeat" is running with "hints enabled for nodes" + Then "heartbeat" collects events with "url.port:10250" + And "heartbeat" collects events with "monitor.status:up" diff --git a/e2e/_suites/kubernetes-autodiscover/features/metricbeat.feature b/e2e/_suites/kubernetes-autodiscover/features/metricbeat.feature new file mode 100644 index 0000000000..af7656f754 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/features/metricbeat.feature @@ -0,0 +1,17 @@ +@kubernetes-autodiscover +@metricbeat +Feature: Metricbeat + Use Kubernetes autodiscover features in Metricbeat to discover services and + collect metrics from them. + +Scenario: Metrics collection stops when the pod is stopped + Given "metricbeat" is running with "hints enabled" + And "redis" is running with "metrics annotations" + And "metricbeat" collects events with "kubernetes.pod.name:redis" + When "redis" is deleted + Then "metricbeat" does not collect events with "kubernetes.pod.name:redis" during "30s" + +Scenario: Metrics collection configured with hints with named ports + Given "metricbeat" is running with "hints enabled" + When "redis" is running with "metrics annotations with named port" + Then "metricbeat" collects events with "kubernetes.pod.name:redis" diff --git a/e2e/_suites/kubernetes-autodiscover/kubernetes.go b/e2e/_suites/kubernetes-autodiscover/kubernetes.go new file mode 100644 index 0000000000..6143392c86 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/kubernetes.go @@ -0,0 +1,175 @@ +package main + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "time" + + "github.com/cenkalti/backoff/v4" + "github.com/google/uuid" + log "github.com/sirupsen/logrus" + + "github.com/elastic/e2e-testing/internal/common" + "github.com/elastic/e2e-testing/internal/shell" +) + +type kubernetesControl struct { + config string + Namespace string + NamespaceUID string + createdNamespace bool +} + +func (c kubernetesControl) WithConfig(config string) kubernetesControl { + c.config = config + return c +} + +func (c kubernetesControl) WithNamespace(ctx context.Context, namespace string) kubernetesControl { + if namespace == "" { + namespace = "test-" + uuid.New().String() + err := c.createNamespace(ctx, namespace) + if err != nil { + log.WithError(err).Fatalf("Failed to create namespace %s", namespace) + } + c.createdNamespace = true + } + uid, err := c.Run(ctx, "get", "namespace", namespace, "-o", "jsonpath={.metadata.uid}") + if err != nil { + log.WithError(err).Fatalf("Failed to get namespace %s uid", namespace) + } + c.NamespaceUID = uid + c.Namespace = namespace + return c +} + +func (c kubernetesControl) createNamespace(ctx context.Context, namespace string) error { + if namespace == "" { + return nil + } + + _, err := c.Run(ctx, "create", "namespace", namespace) + if err != nil { + return fmt.Errorf("namespace creation failed: %w", err) + } + + // Wait for default account to be available, if not it is not possible to + // deploy pods in this namespace. + timeout := 60 * time.Second + exp := backoff.WithContext(common.GetExponentialBackOff(timeout), ctx) + return backoff.Retry(func() error { + _, err := c.Run(ctx, "get", "serviceaccount", "default") + if err != nil { + return fmt.Errorf("namespace was created but still not ready: %w", err) + } + return nil + }, exp) +} + +func (c kubernetesControl) Cleanup(ctx context.Context) error { + if c.createdNamespace && c.Namespace != "" { + output, err := c.Run(ctx, "delete", "namespace", c.Namespace) + if err != nil { + return fmt.Errorf("failed to delete namespace %s: %v: %s", c.Namespace, err, output) + } + } + return nil +} + +func (c kubernetesControl) Run(ctx context.Context, runArgs ...string) (output string, err error) { + return c.RunWithStdin(ctx, nil, runArgs...) +} + +func (c kubernetesControl) RunWithStdin(ctx context.Context, stdin io.Reader, runArgs ...string) (output string, err error) { + shell.CheckInstalledSoftware("kubectl") + var args []string + if c.config != "" { + args = append(args, "--kubeconfig", c.config) + } + if c.Namespace != "" { + args = append(args, "--namespace", c.Namespace) + } + args = append(args, runArgs...) + return shell.ExecuteWithStdin(ctx, ".", stdin, "kubectl", args...) +} + +type kubernetesCluster struct { + kindName string + kubeconfig string + + tmpDir string +} + +func (c kubernetesCluster) Kubectl() kubernetesControl { + return kubernetesControl{}.WithConfig(c.kubeconfig) +} + +func (c kubernetesCluster) isAvailable(ctx context.Context) error { + _, err := c.Kubectl().Run(ctx, "api-versions") + return err +} + +func (c *kubernetesCluster) initialize(ctx context.Context) error { + err := c.isAvailable(ctx) + if err == nil { + return nil + } + + log.Info("Kubernetes cluster not available, will start one using kind") + shell.CheckInstalledSoftware("kind") + kindVersion, err := shell.Execute(ctx, ".", "kind", "version") + if err != nil { + log.WithError(err).Fatal("Failed to get kind version") + } + log.Infof("Using %s", kindVersion) + + c.tmpDir, err = ioutil.TempDir(os.TempDir(), "test-") + if err != nil { + log.WithError(err).Fatal("Failed to create temporary directory") + } + + name := "kind-" + uuid.New().String() + c.kubeconfig = filepath.Join(c.tmpDir, "kubeconfig") + + args := []string{ + "create", "cluster", + "--name", name, + "--config", "testdata/kind.yml", + "--kubeconfig", c.kubeconfig, + } + if version, ok := os.LookupEnv("KUBERNETES_VERSION"); ok && version != "" { + log.Infof("Installing Kubernetes v%s", version) + args = append(args, "--image", "kindest/node:v"+version) + } + output, err := shell.Execute(ctx, ".", "kind", args...) + if err != nil { + log.WithError(err).WithField("output", output).Fatal("Failed to create kind cluster") + return err + } + c.kindName = name + + log.Infof("Kubeconfig in %s", c.kubeconfig) + + return nil +} + +func (c *kubernetesCluster) cleanup(ctx context.Context) { + if c.kindName != "" { + _, err := shell.Execute(ctx, ".", "kind", "delete", "cluster", "--name", c.kindName) + if err != nil { + log.Warnf("Failed to delete kind cluster %s", c.kindName) + } + c.kindName = "" + log.Infof("kind cluster %s was deleted", c.kindName) + } + if c.tmpDir != "" { + err := os.RemoveAll(c.tmpDir) + if err != nil { + log.Warnf("Failed to remove temporary directory %s", c.tmpDir) + } + } +} diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/kind.yml b/e2e/_suites/kubernetes-autodiscover/testdata/kind.yml new file mode 100644 index 0000000000..95303539bd --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/kind.yml @@ -0,0 +1,4 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +featureGates: + EphemeralContainers: true diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-failing-pod.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-failing-pod.yml.tmpl new file mode 100644 index 0000000000..bb16f810f8 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-failing-pod.yml.tmpl @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Pod +metadata: + name: a-failing-pod +spec: + containers: + - name: failing-container-in-pod + image: busybox + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - echo I will fail now; false diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-pod.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-pod.yml.tmpl new file mode 100644 index 0000000000..966e4e7c0e --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-pod.yml.tmpl @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Pod +metadata: + name: a-pod +spec: +{{ if option "init container" }} + initContainers: + - name: init-container + image: busybox + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - echo Hi from an init container +{{ end }} + containers: + - name: container-in-pod + image: busybox + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - while true; do echo Hi from a container in a pod; sleep 1; done diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-short-living-cronjob.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-short-living-cronjob.yml.tmpl new file mode 100644 index 0000000000..394595080c --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/a-short-living-cronjob.yml.tmpl @@ -0,0 +1,19 @@ +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: a-short-living-cronjob +spec: + schedule: "* * * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: cronjob-container + image: busybox + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - date; echo Hello from the Kubernetes cluster + restartPolicy: OnFailure diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/filebeat.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/filebeat.yml.tmpl new file mode 100644 index 0000000000..70cfce3cb6 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/filebeat.yml.tmpl @@ -0,0 +1,143 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: filebeat-config + labels: + k8s-app: filebeat +data: + filebeat.yml: |- + filebeat.autodiscover: + providers: + - type: kubernetes + node: ${NODE_NAME} +{{ if option "hints enabled" }} + hints.enabled: true + hints.default_config: + type: container + paths: + - /var/log/containers/*${data.kubernetes.container.id}.log +{{ end }} + + output.file: + path: /tmp + filename: beats-events +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: filebeat + labels: + k8s-app: filebeat +spec: + selector: + matchLabels: + k8s-app: filebeat + template: + metadata: + labels: + k8s-app: filebeat + spec: + serviceAccountName: filebeat + terminationGracePeriodSeconds: 30 + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: filebeat + image: docker.elastic.co/{{ beats_namespace }}/filebeat:{{ beats_version }} + args: [ + "-c", "/etc/filebeat.yml", + "-e", + ] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + runAsUser: 0 + # If using Red Hat OpenShift uncomment this: + #privileged: true + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - name: config + mountPath: /etc/filebeat.yml + readOnly: true + subPath: filebeat.yml + #- name: data + # mountPath: /usr/share/filebeat/data + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: varlog + mountPath: /var/log + readOnly: true + volumes: + - name: config + configMap: + defaultMode: 0640 + name: filebeat-config + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: varlog + hostPath: + path: /var/log + # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart + #- name: data + # hostPath: + # # When filebeat runs as non-root user, this directory needs to be writable by group (g+w). + # path: /var/lib/filebeat-data + # type: DirectoryOrCreate +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: filebeat-{{ namespace }} + ownerReferences: + - apiVersion: v1 + kind: Namespace + name: {{ namespace }} + uid: {{ namespace_uid }} +subjects: +- kind: ServiceAccount + name: filebeat + namespace: {{ namespace }} +roleRef: + kind: ClusterRole + name: filebeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: filebeat + labels: + k8s-app: filebeat +rules: +- apiGroups: [""] # "" indicates the core API group + resources: + - namespaces + - pods + - nodes + verbs: + - get + - watch + - list +- apiGroups: ["apps"] + resources: + - replicasets + verbs: ["get", "list", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: filebeat + labels: + k8s-app: filebeat +--- diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/heartbeat.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/heartbeat.yml.tmpl new file mode 100644 index 0000000000..aa02eca6ad --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/heartbeat.yml.tmpl @@ -0,0 +1,145 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: heartbeat-deployment-config + labels: + k8s-app: heartbeat +data: + heartbeat.yml: |- + heartbeat.autodiscover: + providers: +{{ if option "hints enabled for pods" }} + - type: kubernetes + resource: pod + scope: cluster + node: ${NODE_NAME} + hints.enabled: true +{{ end }} +{{ if option "hints enabled for services" }} + - type: kubernetes + resource: service + scope: cluster + node: ${NODE_NAME} + hints.enabled: true +{{ end }} +{{ if option "hints enabled for nodes" }} + - type: kubernetes + resource: node + node: ${NODE_NAME} + scope: cluster + templates: + # Example, check kubeletes port of all cluster nodes: + - config: + - hosts: + - ${data.host}:10250 + name: ${data.kubernetes.node.name} + schedule: '@every 10s' + timeout: 5s + type: tcp +{{ end }} + + output.file: + path: /tmp + filename: beats-events +--- +# Deploy singleton instance in the whole cluster for some unique data sources, like kube-state-metrics +apiVersion: apps/v1 +kind: Deployment +metadata: + name: heartbeat + labels: + k8s-app: heartbeat +spec: + selector: + matchLabels: + k8s-app: heartbeat + template: + metadata: + labels: + k8s-app: heartbeat + spec: + serviceAccountName: heartbeat + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: heartbeat + image: docker.elastic.co/{{ beats_namespace }}/heartbeat:{{ beats_version }} + args: [ + "-c", "/etc/heartbeat.yml", + "-e", + ] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + runAsUser: 0 + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - name: config + mountPath: /etc/heartbeat.yml + readOnly: true + subPath: heartbeat.yml + #- name: data + # mountPath: /usr/share/heartbeat/data + volumes: + - name: config + configMap: + defaultMode: 0600 + name: heartbeat-deployment-config + #- name: data + # hostPath: + # path: /var/lib/heartbeat-data + # type: DirectoryOrCreate + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: heartbeat-{{ namespace }} + ownerReferences: + - apiVersion: v1 + kind: Namespace + name: {{ namespace }} + uid: {{ namespace_uid }} +subjects: +- kind: ServiceAccount + name: heartbeat + namespace: {{ namespace }} +roleRef: + kind: ClusterRole + name: heartbeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: heartbeat + labels: + k8s-app: heartbeat +rules: +- apiGroups: [""] + resources: + - nodes + - namespaces + - pods + - services + verbs: ["get", "list", "watch"] +- apiGroups: ["apps"] + resources: + - replicasets + verbs: ["get", "list", "watch"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: heartbeat + labels: + k8s-app: heartbeat +--- diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/metricbeat.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/metricbeat.yml.tmpl new file mode 100644 index 0000000000..042bb56bd7 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/metricbeat.yml.tmpl @@ -0,0 +1,185 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: metricbeat-daemonset-config + labels: + k8s-app: metricbeat +data: + metricbeat.yml: |- + metricbeat.autodiscover: + providers: + - type: kubernetes + node: ${NODE_NAME} +{{ if option "hints enabled" }} + hints.enabled: true +{{ end }} + + output.file: + path: /tmp + filename: beats-events +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: metricbeat + labels: + k8s-app: metricbeat +spec: + selector: + matchLabels: + k8s-app: metricbeat + template: + metadata: + labels: + k8s-app: metricbeat + spec: + serviceAccountName: metricbeat + terminationGracePeriodSeconds: 30 + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: metricbeat + image: docker.elastic.co/{{ beats_namespace }}/metricbeat:{{ beats_version }} + args: [ + "-c", "/etc/metricbeat.yml", + "-e", + "-system.hostfs=/hostfs", + ] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + runAsUser: 0 + # If using Red Hat OpenShift uncomment this: + #privileged: true + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - name: config + mountPath: /etc/metricbeat.yml + readOnly: true + subPath: metricbeat.yml + #- name: data + # mountPath: /usr/share/metricbeat/data + - name: proc + mountPath: /hostfs/proc + readOnly: true + - name: cgroup + mountPath: /hostfs/sys/fs/cgroup + readOnly: true + volumes: + - name: proc + hostPath: + path: /proc + - name: cgroup + hostPath: + path: /sys/fs/cgroup + - name: config + configMap: + defaultMode: 0640 + name: metricbeat-daemonset-config + #- name: data + # hostPath: + # # When metricbeat runs as non-root user, this directory needs to be writable by group (g+w) + # path: /var/lib/metricbeat-data + # type: DirectoryOrCreate +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metricbeat-{{ namespace }} + ownerReferences: + - apiVersion: v1 + kind: Namespace + name: {{ namespace }} + uid: {{ namespace_uid }} +subjects: +- kind: ServiceAccount + name: metricbeat + namespace: {{ namespace }} +roleRef: + kind: ClusterRole + name: metricbeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: metricbeat +subjects: + - kind: ServiceAccount + name: metricbeat + namespace: {{ namespace }} +roleRef: + kind: Role + name: metricbeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metricbeat + labels: + k8s-app: metricbeat +rules: +- apiGroups: [""] + resources: + - nodes + - namespaces + - events + - pods + - services + verbs: ["get", "list", "watch"] +# Enable this rule only if planing to use Kubernetes keystore +#- apiGroups: [""] +# resources: +# - secrets +# verbs: ["get"] +- apiGroups: ["extensions"] + resources: + - replicasets + verbs: ["get", "list", "watch"] +- apiGroups: ["apps"] + resources: + - statefulsets + - deployments + - replicasets + verbs: ["get", "list", "watch"] +- apiGroups: + - "" + resources: + - nodes/stats + verbs: + - get +- nonResourceURLs: + - "/metrics" + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: metricbeat + labels: + k8s-app: metricbeat +rules: + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: ["get", "create", "update"] +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: metricbeat + labels: + k8s-app: metricbeat +--- diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis-service.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis-service.yml.tmpl new file mode 100644 index 0000000000..b262dabe66 --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis-service.yml.tmpl @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: redis + annotations: +{{ if option "monitor annotations" }} + co.elastic.monitor/type: tcp + co.elastic.monitor/hosts: "${data.host}:6379" +{{ end }} +spec: + selector: + k8s-app: redis + ports: + - protocol: TCP + port: 6379 + targetPort: 6379 diff --git a/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis.yml.tmpl b/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis.yml.tmpl new file mode 100644 index 0000000000..7f6d5ae66e --- /dev/null +++ b/e2e/_suites/kubernetes-autodiscover/testdata/templates/redis.yml.tmpl @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: Pod +metadata: + name: redis + annotations: +{{ if option "metrics annotations" }} + co.elastic.metrics/module: redis + co.elastic.metrics/hosts: "${data.host}:6379" + co.elastic.metrics/period: "5s" +{{ end }} +{{ if option "metrics annotations with named port" }} + co.elastic.metrics/module: redis + co.elastic.metrics/hosts: "${data.host}:${data.ports.redis}" + co.elastic.metrics/period: "5s" +{{ end }} +{{ if option "monitor annotations with named port" }} + co.elastic.monitor/type: tcp + co.elastic.monitor/hosts: "${data.host}:${data.ports.redis}" +{{ end }} + labels: + k8s-app: redis +spec: + containers: + - image: redis + imagePullPolicy: IfNotPresent + name: redis + ports: + - name: redis + containerPort: 6379 + protocol: TCP diff --git a/internal/shell/shell.go b/internal/shell/shell.go index c1e3114092..9904f090e3 100644 --- a/internal/shell/shell.go +++ b/internal/shell/shell.go @@ -7,6 +7,7 @@ package shell import ( "bytes" "context" + "io" "os" "os/exec" "strconv" @@ -17,7 +18,7 @@ import ( ) // CheckInstalledSoftware checks that the required software is present -func CheckInstalledSoftware(binaries []string) { +func CheckInstalledSoftware(binaries ...string) { log.Tracef("Validating required tools: %v", binaries) for _, binary := range binaries { @@ -33,6 +34,15 @@ func CheckInstalledSoftware(binaries []string) { // - command: represents the name of the binary to execute // - args: represents the arguments to be passed to the command func Execute(ctx context.Context, workspace string, command string, args ...string) (string, error) { + return ExecuteWithStdin(ctx, workspace, nil, command, args...) +} + +// ExecuteWithStdin executes a command in the machine the program is running +// - workspace: represents the location where to execute the command +// - stdin: reader to use as standard input for the command +// - command: represents the name of the binary to execute +// - args: represents the arguments to be passed to the command +func ExecuteWithStdin(ctx context.Context, workspace string, stdin io.Reader, command string, args ...string) (string, error) { span, _ := apm.StartSpanOptions(ctx, "Executing shell command", "shell.command.execute", apm.SpanOptions{ Parent: apm.SpanFromContext(ctx).TraceContext(), }) @@ -51,6 +61,9 @@ func Execute(ctx context.Context, workspace string, command string, args ...stri var stderr bytes.Buffer cmd.Stdout = &out cmd.Stderr = &stderr + if stdin != nil { + cmd.Stdin = stdin + } err := cmd.Run() if err != nil {