From 1b06f000f29ff827eaea76e759aa78ad194f5f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20de=20la=20Pe=C3=B1a?= Date: Wed, 28 Jul 2021 17:36:45 +0200 Subject: [PATCH] chore: add APM integration to CI (#1148) * chore: add APM integration to CI * fix: update scenario step to latest version * chore: remove blank lines * fix: remove fleet server from the stand-alone agent * feat: add an scenario for adding integrations * chore: simplify scenarios avoiding testing twice The installation of the integration to the policy is already tested in another scenario * chore: rename scenario * chore: add integrations feature file to the CI * chore: remove references to FleetServerPolicy, as it's not used anymore * fix: bring fleet-server boostrap test back * fix: expose cloud agent in a not used port * chore: extract a method to get Fleet Server URL * fix: get stand-alone agent by hostname from agents list We were getting the 1st agent, and because we have an agent bootstrapped as fleet-server, it was retrieved as the first one, causing that the "is agent 'online'" step always returned true, instead of returning the status of the newly deployed agent. * fix: automatically enroll the stand-alone agent in Fleet * fix: reduce the number of occurrences * fix: there are 2 filebeat instances * chore: move cloud configs to a better place * chore: run APM tests with ubi8 base image * fix: keep original structure * fix: right volume path * chore: remove cloud scenario for APM integration (cherry picked from commit bc65335dc8357d7da8db886dc5d7a841a12e7e55) --- .ci/.e2e-tests.yaml | 6 + .../apm-legacy/config/apm-server.yml | 21 ---- .../apm-legacy/config/capabilities.yml | 5 - .../apm-legacy/config/credentials.yml | 10 -- .../apm-legacy/config/fleet-setup.yml | 9 -- .../elastic-agent/cloud/docker-compose.yml | 2 +- .../services/elastic-agent/docker-compose.yml | 4 + .../agent_endpoint_integration.feature | 3 +- .../fleet/features/apm_integration.feature | 27 ++--- .../fleet/features/integrations.feature | 13 +++ .../fleet/features/linux_integration.feature | 3 +- .../fleet/features/stand_alone_agent.feature | 2 +- e2e/_suites/fleet/fleet.go | 104 +++++++++++------- e2e/_suites/fleet/stand-alone.go | 46 ++++---- internal/kibana/fleet.go | 7 +- 15 files changed, 129 insertions(+), 133 deletions(-) delete mode 100644 cli/config/compose/services/elastic-agent/apm-legacy/config/apm-server.yml delete mode 100644 cli/config/compose/services/elastic-agent/apm-legacy/config/capabilities.yml delete mode 100644 cli/config/compose/services/elastic-agent/apm-legacy/config/credentials.yml delete mode 100644 cli/config/compose/services/elastic-agent/apm-legacy/config/fleet-setup.yml create mode 100644 e2e/_suites/fleet/features/integrations.feature diff --git a/.ci/.e2e-tests.yaml b/.ci/.e2e-tests.yaml index b10297ee76..761b961ba8 100644 --- a/.ci/.e2e-tests.yaml +++ b/.ci/.e2e-tests.yaml @@ -18,6 +18,12 @@ SUITES: - name: "Fleet" pullRequestFilter: " && ~debian" tags: "fleet_mode_agent" + - name: "Integrations" + pullRequestFilter: " && ~debian" + tags: "integrations" + - name: "APM Integration" + pullRequestFilter: " && ~debian" + tags: "apm_server" - name: "Endpoint Integration" platforms: - "ubuntu-18.04" diff --git a/cli/config/compose/services/elastic-agent/apm-legacy/config/apm-server.yml b/cli/config/compose/services/elastic-agent/apm-legacy/config/apm-server.yml deleted file mode 100644 index ac1a563a91..0000000000 --- a/cli/config/compose/services/elastic-agent/apm-legacy/config/apm-server.yml +++ /dev/null @@ -1,21 +0,0 @@ -monitoring.enabled: true -http.enabled: true -http.port: 5067 -http.host: "0.0.0.0" -apm-server: - host: "0.0.0:8200" - secret_token: "1234" - # Enable APM Server Golang expvar support (https://golang.org/pkg/expvar/). - expvar: - enabled: true - url: "/debug/vars" - kibana: - # For APM Agent configuration in Kibana, enabled must be true. - enabled: true - host: "kibana" - username: "elastic" - password: "changeme" -output.elasticsearch: - hosts: ["http://elasticsearch:9200"] - username: "elastic" - password: "changeme" diff --git a/cli/config/compose/services/elastic-agent/apm-legacy/config/capabilities.yml b/cli/config/compose/services/elastic-agent/apm-legacy/config/capabilities.yml deleted file mode 100644 index e2ad548a4c..0000000000 --- a/cli/config/compose/services/elastic-agent/apm-legacy/config/capabilities.yml +++ /dev/null @@ -1,5 +0,0 @@ -capabilities: -- rule: allow - input: fleet-server -- rule: deny - input: "*" diff --git a/cli/config/compose/services/elastic-agent/apm-legacy/config/credentials.yml b/cli/config/compose/services/elastic-agent/apm-legacy/config/credentials.yml deleted file mode 100644 index 90e67b0a2a..0000000000 --- a/cli/config/compose/services/elastic-agent/apm-legacy/config/credentials.yml +++ /dev/null @@ -1,10 +0,0 @@ -fleet_server: - elasticsearch: - host: "elasticsearch" - username: "elastic" - password: "changeme" -kibana: - fleet: - host: "kibana" - username: "elastic" - password: "changeme" diff --git a/cli/config/compose/services/elastic-agent/apm-legacy/config/fleet-setup.yml b/cli/config/compose/services/elastic-agent/apm-legacy/config/fleet-setup.yml deleted file mode 100644 index 30feaf7e85..0000000000 --- a/cli/config/compose/services/elastic-agent/apm-legacy/config/fleet-setup.yml +++ /dev/null @@ -1,9 +0,0 @@ -fleet: - enroll: true - force: false - insecure: true -fleet_server: - enable: true -kibana: - fleet: - setup: true diff --git a/cli/config/compose/services/elastic-agent/cloud/docker-compose.yml b/cli/config/compose/services/elastic-agent/cloud/docker-compose.yml index 9a0fc79ee4..857ddbe1ac 100644 --- a/cli/config/compose/services/elastic-agent/cloud/docker-compose.yml +++ b/cli/config/compose/services/elastic-agent/cloud/docker-compose.yml @@ -21,6 +21,6 @@ services: volumes: - "${apmVolume}:/apm-legacy" ports: - - "127.0.0.1:8220:8220" + - "127.0.0.1:8221:8220" - "127.0.0.1:8200:8200" - "127.0.0.1:5066:5066" diff --git a/cli/config/compose/services/elastic-agent/docker-compose.yml b/cli/config/compose/services/elastic-agent/docker-compose.yml index 9378c34a81..a7fc9a07ee 100644 --- a/cli/config/compose/services/elastic-agent/docker-compose.yml +++ b/cli/config/compose/services/elastic-agent/docker-compose.yml @@ -10,6 +10,10 @@ services: environment: - "FLEET_SERVER_ENABLE=${fleetServerMode:-0}" - "FLEET_SERVER_INSECURE_HTTP=${fleetServerMode:-0}" + - "FLEET_ENROLL=${fleetEnroll:-1}" + - "FLEET_ENROLLMENT_TOKEN=${fleetEnrollmentToken:-}" + - "FLEET_INSECURE=${fleetInsecure:-0}" + - "FLEET_URL=${fleetUrl:-}" platform: ${stackPlatform:-linux/amd64} ports: - "127.0.0.1:${fleetServerPort:-8220}:8220" diff --git a/e2e/_suites/fleet/features/agent_endpoint_integration.feature b/e2e/_suites/fleet/features/agent_endpoint_integration.feature index e13c06a073..57f686e4f5 100644 --- a/e2e/_suites/fleet/features/agent_endpoint_integration.feature +++ b/e2e/_suites/fleet/features/agent_endpoint_integration.feature @@ -6,8 +6,7 @@ Scenario Outline: Adding the Endpoint Integration to an Agent makes the host to Given a "" agent is deployed to Fleet with "tar" installer And the agent is listed in Fleet as "online" When the "Endpoint Security" integration is "added" in the policy - Then the "Endpoint Security" datasource is shown in the policy as added - And the host name is shown in the Administration view in the Security App as "online" + Then the host name is shown in the Administration view in the Security App as "online" @centos Examples: Centos diff --git a/e2e/_suites/fleet/features/apm_integration.feature b/e2e/_suites/fleet/features/apm_integration.feature index cff83f53d7..309a80b85c 100644 --- a/e2e/_suites/fleet/features/apm_integration.feature +++ b/e2e/_suites/fleet/features/apm_integration.feature @@ -3,28 +3,19 @@ Feature: APM Integration Scenarios for APM @install -Scenario Outline: Deploying a stand-alone agent with fleet server mode - Given a "" stand-alone agent is deployed with fleet server mode +Scenario Outline: Deploying a stand-alone agent with the Elastic APM integration + Given a "" stand-alone agent is deployed And the stand-alone agent is listed in Fleet as "online" - When the "Elastic APM" integration is added in the policy - Then the "Elastic APM" datasource is shown in the policy as added - And the "apm-server" process is in the "started" state on the host - - -@default -Examples: default - | image | - | default | - - - -@cloud -Scenario Outline: Deploying a stand-alone agent with fleet server mode on cloud - When a "" stand-alone agent is deployed with fleet server mode on cloud + When the "Elastic APM" integration is "added" in the policy Then the "apm-server" process is in the "started" state on the host - @default Examples: default | image | | default | + +@ubi8 +@skip:arm64 +Examples: Ubi8 +| image | +| ubi8 | diff --git a/e2e/_suites/fleet/features/integrations.feature b/e2e/_suites/fleet/features/integrations.feature new file mode 100644 index 0000000000..fe01327aee --- /dev/null +++ b/e2e/_suites/fleet/features/integrations.feature @@ -0,0 +1,13 @@ +@integrations +Feature: Integrations + Scenarios for operating integrations + +@install +Scenario Outline: Adding an Integration to a Policy + When the "" integration is "added" in the policy + Then the "" datasource is shown in the policy as added +Examples: + | integration | + | Elastic APM | + | Endpoint | + | Linux | diff --git a/e2e/_suites/fleet/features/linux_integration.feature b/e2e/_suites/fleet/features/linux_integration.feature index 2b97588ad8..e37f193e60 100644 --- a/e2e/_suites/fleet/features/linux_integration.feature +++ b/e2e/_suites/fleet/features/linux_integration.feature @@ -7,8 +7,7 @@ Scenario Outline: Adding the Linux Integration to an Agent ... Given a "" agent is deployed to Fleet with "tar" installer And the agent is listed in Fleet as "online" When the "Linux" integration is "added" in the policy - Then the "Linux" datasource is shown in the policy as added - And a Linux data stream exists with some data + Then a Linux data stream exists with some data @centos Examples: Centos diff --git a/e2e/_suites/fleet/features/stand_alone_agent.feature b/e2e/_suites/fleet/features/stand_alone_agent.feature index aa98579610..42ed928e57 100644 --- a/e2e/_suites/fleet/features/stand_alone_agent.feature +++ b/e2e/_suites/fleet/features/stand_alone_agent.feature @@ -7,7 +7,7 @@ Feature: Stand-alone Agent @start-agent Scenario Outline: Starting the agent starts backend processes When a "" stand-alone agent is deployed - Then there are "1" instances of the "filebeat" process in the "started" state + Then there are "2" instances of the "filebeat" process in the "started" state And there are "2" instances of the "metricbeat" process in the "started" state @default diff --git a/e2e/_suites/fleet/fleet.go b/e2e/_suites/fleet/fleet.go index 70775c4753..ea727e710c 100644 --- a/e2e/_suites/fleet/fleet.go +++ b/e2e/_suites/fleet/fleet.go @@ -49,7 +49,6 @@ type FleetTestSuite struct { Integration kibana.IntegrationPackage // the installed integration Policy kibana.Policy PolicyUpdatedAt string // the moment the policy was updated - FleetServerPolicy kibana.Policy Version string // current elastic-agent version kibanaClient *kibana.Client deployer deploy.Deployment @@ -69,50 +68,52 @@ func (fts *FleetTestSuite) afterScenario() { fts.currentContext = apm.ContextWithSpan(context.Background(), span) defer span.End() - serviceName := common.ElasticAgentServiceName - agentService := deploy.NewServiceRequest(serviceName) + if fts.InstallerType != "" { + serviceName := common.ElasticAgentServiceName + agentService := deploy.NewServiceRequest(serviceName) - if !fts.StandAlone { - agentInstaller, _ := installer.Attach(fts.currentContext, fts.deployer, agentService, fts.InstallerType) + if !fts.StandAlone { + agentInstaller, _ := installer.Attach(fts.currentContext, fts.deployer, agentService, fts.InstallerType) - if log.IsLevelEnabled(log.DebugLevel) { - err := agentInstaller.Logs() - if err != nil { - log.WithField("error", err).Warn("Could not get agent logs in the container") + if log.IsLevelEnabled(log.DebugLevel) { + err := agentInstaller.Logs() + if err != nil { + log.WithField("error", err).Warn("Could not get agent logs in the container") + } } - } - // only call it when the elastic-agent is present - if !fts.ElasticAgentStopped { - err := agentInstaller.Uninstall(fts.currentContext) - if err != nil { - log.Warnf("Could not uninstall the agent after the scenario: %v", err) + // only call it when the elastic-agent is present + if !fts.ElasticAgentStopped { + err := agentInstaller.Uninstall(fts.currentContext) + if err != nil { + log.Warnf("Could not uninstall the agent after the scenario: %v", err) + } } + } else if log.IsLevelEnabled(log.DebugLevel) { + _ = fts.deployer.Logs(agentService) } - } else if log.IsLevelEnabled(log.DebugLevel) { - _ = fts.deployer.Logs(agentService) - } - err := fts.unenrollHostname() - if err != nil { - manifest, _ := fts.deployer.Inspect(fts.currentContext, agentService) - log.WithFields(log.Fields{ - "err": err, - "hostname": manifest.Hostname, - }).Warn("The agentIDs for the hostname could not be unenrolled") - } + err := fts.unenrollHostname() + if err != nil { + manifest, _ := fts.deployer.Inspect(fts.currentContext, agentService) + log.WithFields(log.Fields{ + "err": err, + "hostname": manifest.Hostname, + }).Warn("The agentIDs for the hostname could not be unenrolled") + } - if !common.DeveloperMode { - _ = fts.deployer.Remove( - common.FleetProfileServiceRequest, - []deploy.ServiceRequest{ - deploy.NewServiceRequest(serviceName), - }, - common.ProfileEnv) - } else { - log.WithField("service", serviceName).Info("Because we are running in development mode, the service won't be stopped") + if !common.DeveloperMode { + _ = fts.deployer.Remove( + common.FleetProfileServiceRequest, + []deploy.ServiceRequest{ + deploy.NewServiceRequest(serviceName), + }, + common.ProfileEnv) + } else { + log.WithField("service", serviceName).Info("Because we are running in development mode, the service won't be stopped") + } } - err = fts.kibanaClient.DeleteEnrollmentAPIKey(fts.currentContext, fts.CurrentTokenID) + err := fts.kibanaClient.DeleteEnrollmentAPIKey(fts.currentContext, fts.CurrentTokenID) if err != nil { log.WithFields(log.Fields{ "err": err, @@ -125,6 +126,7 @@ func (fts *FleetTestSuite) afterScenario() { // clean up fields fts.CurrentTokenID = "" fts.CurrentToken = "" + fts.InstallerType = "" fts.Image = "" fts.StandAlone = false fts.BeatsProcess = "" @@ -183,7 +185,6 @@ func (fts *FleetTestSuite) contributeSteps(s *godog.ScenarioContext) { // stand-alone only steps s.Step(`^a "([^"]*)" stand-alone agent is deployed$`, fts.aStandaloneAgentIsDeployed) s.Step(`^a "([^"]*)" stand-alone agent is deployed with fleet server mode$`, fts.bootstrapFleetServerFromAStandaloneAgent) - s.Step(`^a "([^"]*)" stand-alone agent is deployed with fleet server mode on cloud$`, fts.aStandaloneAgentIsDeployedWithFleetServerModeOnCloud) s.Step(`^there is new data in the index from agent$`, fts.thereIsNewDataInTheIndexFromAgent) s.Step(`^the "([^"]*)" docker container is stopped$`, fts.theDockerContainerIsStopped) s.Step(`^there is no new data in the index after agent shuts down$`, fts.thereIsNoNewDataInTheIndexAfterAgentShutsDown) @@ -191,7 +192,16 @@ func (fts *FleetTestSuite) contributeSteps(s *godog.ScenarioContext) { } func (fts *FleetTestSuite) theStandaloneAgentIsListedInFleetWithStatus(desiredStatus string) error { + maxTimeout := time.Duration(utils.TimeoutFactor) * time.Minute + exp := utils.GetExponentialBackOff(maxTimeout) + retryCount := 0 + + agentService := deploy.NewServiceRequest(common.ElasticAgentServiceName) + manifest, _ := fts.deployer.Inspect(fts.currentContext, agentService) + waitForAgents := func() error { + retryCount++ + agents, err := fts.kibanaClient.ListAgents(fts.currentContext) if err != nil { return err @@ -201,13 +211,23 @@ func (fts *FleetTestSuite) theStandaloneAgentIsListedInFleetWithStatus(desiredSt return errors.New("No agents found") } - agentZero := agents[0] - hostname := agentZero.LocalMetadata.Host.HostName + for _, agent := range agents { + hostname := agent.LocalMetadata.Host.HostName + + if hostname == manifest.Hostname { + return theAgentIsListedInFleetWithStatus(fts.currentContext, desiredStatus, hostname) + } + } + + err = errors.New("Agent not found in Fleet") + log.WithFields(log.Fields{ + "elapsedTime": exp.GetElapsedTime(), + "hostname": manifest.Hostname, + "retries": retryCount, + }).Warn(err) - return theAgentIsListedInFleetWithStatus(fts.currentContext, desiredStatus, hostname) + return err } - maxTimeout := time.Duration(utils.TimeoutFactor) * time.Minute * 2 - exp := utils.GetExponentialBackOff(maxTimeout) err := backoff.Retry(waitForAgents, exp) if err != nil { diff --git a/e2e/_suites/fleet/stand-alone.go b/e2e/_suites/fleet/stand-alone.go index a29c5c84f9..7bd10ff819 100644 --- a/e2e/_suites/fleet/stand-alone.go +++ b/e2e/_suites/fleet/stand-alone.go @@ -7,15 +7,14 @@ package main import ( "context" "fmt" - "path" "strings" "time" "github.com/cenkalti/backoff/v4" - "github.com/elastic/e2e-testing/cli/config" "github.com/elastic/e2e-testing/internal/common" "github.com/elastic/e2e-testing/internal/deploy" "github.com/elastic/e2e-testing/internal/installer" + "github.com/elastic/e2e-testing/internal/kibana" "github.com/elastic/e2e-testing/internal/shell" "github.com/elastic/e2e-testing/internal/utils" @@ -28,28 +27,12 @@ func (fts *FleetTestSuite) aStandaloneAgentIsDeployed(image string) error { } func (fts *FleetTestSuite) bootstrapFleetServerFromAStandaloneAgent(image string) error { - fleetPolicy, err := fts.kibanaClient.GetDefaultPolicy(fts.currentContext, true) - if err != nil { - return err - } - - fts.FleetServerPolicy = fleetPolicy return fts.startStandAloneAgent(image, "", map[string]string{"fleetServerMode": "1"}) } -func (fts *FleetTestSuite) aStandaloneAgentIsDeployedWithFleetServerModeOnCloud(image string) error { - fleetPolicy, err := fts.kibanaClient.GetDefaultPolicy(fts.currentContext, true) - if err != nil { - return err - } - fts.FleetServerPolicy = fleetPolicy - volume := path.Join(config.OpDir(), "compose", "services", "elastic-agent", "apm-legacy") - return fts.startStandAloneAgent(image, "cloud", map[string]string{"apmVolume": volume}) -} - func (fts *FleetTestSuite) thereIsNewDataInTheIndexFromAgent() error { maxTimeout := time.Duration(utils.TimeoutFactor) * time.Minute * 2 - minimumHitsCount := 25 + minimumHitsCount := 20 agentService := deploy.NewServiceRequest(common.ElasticAgentServiceName).WithFlavour(fts.Image) @@ -117,7 +100,28 @@ func (fts *FleetTestSuite) startStandAloneAgent(image string, flavour string, en dockerImageTag += "-" + arch } - common.ProfileEnv["fleetServerPort"] = "8221" + // Grab a new enrollment key for new agent + enrollmentKey, err := fts.kibanaClient.CreateEnrollmentAPIKey(fts.currentContext, fts.Policy) + if err != nil { + return err + } + fts.CurrentToken = enrollmentKey.APIKey + fts.CurrentTokenID = enrollmentKey.ID + + cfg, err := kibana.NewFleetConfig(fts.CurrentToken) + if err != nil { + return err + } + + // See https://github.com/elastic/beats/blob/4accfa8/x-pack/elastic-agent/pkg/agent/cmd/container.go#L73-L85 + // to understand the environment variables used by the elastic-agent to automatically + // enroll the new agent container in Fleet + common.ProfileEnv["fleetInsecure"] = "1" + common.ProfileEnv["fleetUrl"] = cfg.FleetServerURL() + common.ProfileEnv["fleetEnroll"] = "1" + common.ProfileEnv["fleetEnrollmentToken"] = cfg.EnrollmentToken + + common.ProfileEnv["fleetServerPort"] = "8221" // fixed port to avoid collitions with the stack's fleet-server common.ProfileEnv["elasticAgentDockerImageSuffix"] = "" if image != "default" { common.ProfileEnv["elasticAgentDockerImageSuffix"] = "-" + image @@ -136,7 +140,7 @@ func (fts *FleetTestSuite) startStandAloneAgent(image string, flavour string, en services := []deploy.ServiceRequest{ deploy.NewServiceRequest(common.ElasticAgentServiceName).WithFlavour(flavour), } - err := fts.deployer.Add(fts.currentContext, common.FleetProfileServiceRequest, services, common.ProfileEnv) + err = fts.deployer.Add(fts.currentContext, common.FleetProfileServiceRequest, services, common.ProfileEnv) if err != nil { log.Error("Could not deploy the elastic-agent") return err diff --git a/internal/kibana/fleet.go b/internal/kibana/fleet.go index 36a33dc8da..09e2a1873c 100644 --- a/internal/kibana/fleet.go +++ b/internal/kibana/fleet.go @@ -64,8 +64,13 @@ func NewFleetConfig(token string) (*FleetConfig, error) { func (cfg FleetConfig) Flags() []string { flags := []string{ "-e", "-v", "--force", "--insecure", "--enrollment-token=" + cfg.EnrollmentToken, - "--url", fmt.Sprintf("http://%s:%d", cfg.FleetServerURI, cfg.FleetServerPort), + "--url", cfg.FleetServerURL(), } return flags } + +// FleetServerURL returns the fleet-server URL in the config +func (cfg FleetConfig) FleetServerURL() string { + return fmt.Sprintf("http://%s:%d", cfg.FleetServerURI, cfg.FleetServerPort) +}