Skip to content
This repository was archived by the owner on Sep 17, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .ci/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ pipeline {
parameters {
choice(name: 'runTestsSuite', choices: ['all', 'helm', 'ingest-manager', 'metricbeat'], description: 'Choose which test suite to run (default: all)')
choice(name: 'LOG_LEVEL', choices: ['INFO', 'DEBUG'], description: 'Log level to be used')
choice(name: 'QUERY_MAX_ATTEMPTS', choices: ['5', '10', '20'], description: 'Number of attempts to create the connection to Elasticsearch')
Copy link
Contributor Author

@mdelapenya mdelapenya Jun 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed this argument, as the code is not using a fixed retry option any more

choice(name: 'RETRY_TIMEOUT', choices: ['3', '5', '7', '11'], description: 'Number of seconds between retry')
choice(name: 'RETRY_TIMEOUT', choices: ['3', '5', '7', '11'], description: 'Max number of minutes for timeout backoff strategies')
string(name: 'STACK_VERSION', defaultValue: '7.7.0', description: 'SemVer version of the stack to be used.')
string(name: 'METRICBEAT_VERSION', defaultValue: '7.7.0', description: 'SemVer version of the metricbeat to be used.')
string(name: 'HELM_CHART_VERSION', defaultValue: '7.6.1', description: 'SemVer version of Helm chart to be used.')
Expand All @@ -54,7 +53,6 @@ pipeline {
KIND_VERSION = "${params.KIND_VERSION.trim()}"
KUBERNETES_VERSION = "${params.KUBERNETES_VERSION.trim()}"
LOG_LEVEL = "${params.LOG_LEVEL.trim()}"
QUERY_MAX_ATTEMPTS = "${params.QUERY_MAX_ATTEMPTS.trim()}"
RETRY_TIMEOUT = "${params.RETRY_TIMEOUT.trim()}"
}
stages {
Expand Down
2 changes: 0 additions & 2 deletions e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ FEATURE?=
FORMAT?=pretty
LOG_INCLUDE_TIMESTAMP?=TRUE
LOG_LEVEL?=INFO
QUERY_MAX_ATTEMPTS?=5
RETRY_TIMEOUT?=3
STACK_VERSION?=
METRICBEAT_VERSION?=
Expand Down Expand Up @@ -37,7 +36,6 @@ functional-test: install-godog
cd _suites/${SUITE} && \
OP_LOG_LEVEL=${LOG_LEVEL} \
OP_LOG_INCLUDE_TIMESTAMP=${LOG_INCLUDE_TIMESTAMP} \
OP_QUERY_MAX_ATTEMPTS=${QUERY_MAX_ATTEMPTS} \
OP_RETRY_TIMEOUT=${RETRY_TIMEOUT} \
OP_METRICBEAT_VERSION=${METRICBEAT_VERSION} \
OP_STACK_VERSION=${STACK_VERSION} \
Expand Down
4 changes: 1 addition & 3 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,11 @@ where:
There are some environment variables you can use to improve the experience running the tests with `Make`.

- **METRICBEAT_FETCH_TIMEOUT** (default: 20). This is the time in seconds we leave metricbeat grabbing metrics from the monitored integration module.
- **QUERY_MAX_ATTEMPTS** (default: 5). It's possible that the Elasticsearch is not ready for writes, so we can define a retry strategy to wait for our index to be ready. This variable defines the number of attempts the retry process will happen.
- **RETRY_TIMEOUT** (default: 3). For same reason as above, this variable defines the delay between attempts, before a successful connection to Elasticsearch is made.
- **RETRY_TIMEOUT** (default: 3 minutes). It's possible that the Elasticsearch is not ready for writes, so we can define a retry strategy to wait for our index to be ready. This variable defines the number of minutes the retry process will use as max timeout, where the implementation code will try using a backoff strategy until a condition is met.

>Interested in running the tests directly using Godog? Please check out [the Makefile](./Makefile#L19).

```shell
export OP_QUERY_MAX_ATTEMPTS=${OP_QUERY_MAX_ATTEMPTS:-5}
export OP_RETRY_TIMEOUT=${OP_RETRY_TIMEOUT:-3}
export FORMAT=${FORMAT:-pretty} # valid formats are: pretty, junit
# If you do not pass a '-t moduleName' argument, then all tests will be run
Expand Down
2 changes: 1 addition & 1 deletion e2e/_suites/ingest-manager/features/ingest_manager.feature
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ Scenario: Deploying a stand-alone agent
@stop-agent
Scenario: Stopping the agent container stops data going into ES
Given a stand-alone agent is deployed
When the "agent" docker container is stopped
When the "elastic-agent" docker container is stopped
Then there is no new data in the index after agent shuts down
7 changes: 7 additions & 0 deletions e2e/_suites/ingest-manager/ingest-manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,17 @@ var stackVersion = "8.0.0-SNAPSHOT"
// affecting the runtime dependencies (or profile)
var profileEnv map[string]string

// queryRetryTimeout is the number of seconds between elasticsearch retry queries.
// It can be overriden by OP_RETRY_TIMEOUT env var
var queryRetryTimeout = 3

// All URLs running on localhost as Kibana is expected to be exposed there
const kibanaBaseURL = "http://localhost:5601"

func init() {
config.Init()

queryRetryTimeout = e2e.GetIntegerFromEnv("OP_RETRY_TIMEOUT", queryRetryTimeout)
stackVersion = e2e.GetEnv("OP_STACK_VERSION", stackVersion)
}

Expand Down Expand Up @@ -75,6 +80,8 @@ func IngestManagerFeatureContext(s *godog.Suite) {
"minutes": minutesToBeHealthy,
}).Error("The Kibana instance could not get the healthy status")
}

imts.StandAlone.RuntimeDependenciesStartDate = time.Now()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Marking when the runtime deps are started. We want to have here a lower boundary for queries

})
s.BeforeScenario(func(*messages.Pickle) {
log.Debug("Before Ingest Manager scenario")
Expand Down
166 changes: 162 additions & 4 deletions e2e/_suites/ingest-manager/stand-alone.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package main

import (
"context"
"time"

"github.com/cucumber/godog"
"github.com/elastic/e2e-testing/cli/docker"
"github.com/elastic/e2e-testing/cli/services"
"github.com/elastic/e2e-testing/e2e"
log "github.com/sirupsen/logrus"
Expand All @@ -11,6 +15,10 @@ import (
type StandAloneTestSuite struct {
AgentConfigFilePath string
Cleanup bool
Hostname string
// date controls for queries
AgentStoppedDate time.Time
RuntimeDependenciesStartDate time.Time
}

func (sats *StandAloneTestSuite) contributeSteps(s *godog.Suite) {
Expand Down Expand Up @@ -44,6 +52,13 @@ func (sats *StandAloneTestSuite) aStandaloneAgentIsDeployed() error {
return err
}

// get container hostname once
hostname, err := getContainerHostname(serviceName)
if err != nil {
return err
}

sats.Hostname = hostname
sats.Cleanup = true

if log.IsLevelEnabled(log.DebugLevel) {
Expand All @@ -66,13 +81,156 @@ func (sats *StandAloneTestSuite) aStandaloneAgentIsDeployed() error {
}

func (sats *StandAloneTestSuite) thereIsNewDataInTheIndexFromAgent() error {
return godog.ErrPending
maxTimeout := time.Duration(queryRetryTimeout) * time.Minute
minimumHitsCount := 100

result, err := searchAgentData(sats.Hostname, sats.RuntimeDependenciesStartDate, minimumHitsCount, maxTimeout)
Copy link
Contributor Author

@mdelapenya mdelapenya Jun 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use the moment the runtime deps (kibana + ES) started to check for documents

if err != nil {
return err
}

log.Debugf("Search result: %v", result)

return e2e.AssertHitsArePresent(result)
}

func (sats *StandAloneTestSuite) theDockerContainerIsStopped(arg1 string) error {
return godog.ErrPending
func (sats *StandAloneTestSuite) theDockerContainerIsStopped(serviceName string) error {
serviceManager := services.NewServiceManager()

err := serviceManager.RemoveServicesFromCompose("ingest-manager", []string{serviceName}, profileEnv)
if err != nil {
log.WithFields(log.Fields{
"error": err,
"service": serviceName,
}).Error("Could not stop the service.")

return err
}
sats.AgentStoppedDate = time.Now()

return nil
}

func (sats *StandAloneTestSuite) thereIsNoNewDataInTheIndexAfterAgentShutsDown() error {
return godog.ErrPending
maxTimeout := time.Duration(30) * time.Second
minimumHitsCount := 1
Comment on lines +115 to +116
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With just one hit found in 30 secs, we should fail the assertion


result, err := searchAgentData(sats.Hostname, sats.AgentStoppedDate, minimumHitsCount, maxTimeout)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use the moment the agent stopped to check for documents

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should have caught this earlier, but indeed - we should put in some minimal (1 second or 2??) padding on the chance that a doc was sent right as agent is stopping and is in ES microseconds after the agent stop time is recorded @mdelapenya what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Flakiness at sight!!

TBH not sure, we can add a few seconds to that time

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's merge as is, so we can observe how it behaves

if err != nil {
log.WithFields(log.Fields{
"error": err,
}).Info("No documents were found for the Agent in the index after it stopped")
return nil
}

return e2e.AssertHitsAreNotPresent(result)
}

// we need the container name because we use the Docker Client instead of Docker Compose
func getContainerHostname(serviceName string) (string, error) {
containerName := "ingest-manager_" + serviceName + "_1"

log.WithFields(log.Fields{
"service": serviceName,
"containerName": containerName,
}).Debug("Retrieving container name from the Docker client")

hostname, err := docker.ExecCommandIntoContainer(context.Background(), containerName, "root", []string{"hostname"})
if err != nil {
log.WithFields(log.Fields{
"containerName": containerName,
"error": err,
"service": serviceName,
}).Error("Could not retrieve container name from the Docker client")
return "", err
}

log.WithFields(log.Fields{
"containerName": containerName,
"hostname": hostname,
"service": serviceName,
}).Info("Hostname retrieved from the Docker client")

return hostname, nil
}

func searchAgentData(hostname string, startDate time.Time, minimumHitsCount int, maxTimeout time.Duration) (e2e.SearchResult, error) {
timezone := "America/New_York"
now := time.Now()

esQuery := map[string]interface{}{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Translated query from: #126 (comment)

"version": true,
"size": 500,
"docvalue_fields": []map[string]interface{}{
{
"field": "@timestamp",
"format": "date_time",
},
{
"field": "system.process.cpu.start_time",
"format": "date_time",
},
{
"field": "system.service.state_since",
"format": "date_time",
},
},
"_source": map[string]interface{}{
"excludes": []map[string]interface{}{},
},
"query": map[string]interface{}{
"bool": map[string]interface{}{
"must": []map[string]interface{}{},
"filter": []map[string]interface{}{
{
"bool": map[string]interface{}{
"filter": []map[string]interface{}{
{
"bool": map[string]interface{}{
"should": []map[string]interface{}{
{
"match_phrase": map[string]interface{}{
"host.name": hostname,
},
},
},
"minimum_should_match": 1,
},
},
{
"bool": map[string]interface{}{
"should": []map[string]interface{}{
{
"range": map[string]interface{}{
"@timestamp": map[string]interface{}{
"gte": now,
"time_zone": timezone,
},
},
},
},
"minimum_should_match": 1,
},
},
},
},
},
{
"range": map[string]interface{}{
"@timestamp": map[string]interface{}{
"gte": startDate,
"format": "strict_date_optional_time",
},
},
},
},
"should": []map[string]interface{}{},
"must_not": []map[string]interface{}{},
},
},
}

indexName := "logs-agent-default"

return e2e.WaitForNumberOfHits(indexName, esQuery, minimumHitsCount, maxTimeout)
}
29 changes: 20 additions & 9 deletions e2e/_suites/metricbeat/metricbeat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ import (
// It can be overriden by OP_METRICBEAT_VERSION env var
var metricbeatVersion = "7.7.0"

// queryMaxAttempts is the number of attempts to query elasticsearch before aborting
// It can be overriden by OP_QUERY_MAX_ATTEMPTS env var
var queryMaxAttempts = 5

// queryRetryTimeout is the number of seconds between elasticsearch retry queries.
// It can be overriden by OP_RETRY_TIMEOUT env var
var queryRetryTimeout = 3
Expand All @@ -38,7 +34,6 @@ func init() {
config.Init()

metricbeatVersion = e2e.GetEnv("OP_METRICBEAT_VERSION", metricbeatVersion)
queryMaxAttempts = e2e.GetIntegerFromEnv("OP_QUERY_MAX_ATTEMPTS", queryMaxAttempts)
queryRetryTimeout = e2e.GetIntegerFromEnv("OP_RETRY_TIMEOUT", queryRetryTimeout)
stackVersion = e2e.GetEnv("OP_STACK_VERSION", stackVersion)

Expand Down Expand Up @@ -216,7 +211,7 @@ func (mts *MetricbeatTestSuite) installedAndConfiguredForModule(serviceType stri
// look up configurations under workspace's configurations directory
dir, _ := os.Getwd()
mts.configurationFile = path.Join(dir, "configurations", mts.ServiceName+".yml")
os.Chmod(mts.configurationFile, 0666)
_ = os.Chmod(mts.configurationFile, 0666)

mts.setEventModule(mts.ServiceType)
mts.setServiceVersion(mts.Version)
Expand Down Expand Up @@ -403,12 +398,25 @@ func (mts *MetricbeatTestSuite) thereAreEventsInTheIndex() error {
},
}

result, err := e2e.RetrySearch(mts.getIndexName(), esQuery, queryMaxAttempts, queryRetryTimeout)
minimumHitsCount := 5
maxTimeout := time.Duration(queryRetryTimeout) * time.Minute

result, err := e2e.WaitForNumberOfHits(mts.getIndexName(), esQuery, minimumHitsCount, maxTimeout)
if err != nil {
return err
}

return e2e.AssertHitsArePresent(result, mts.Query)
err = e2e.AssertHitsArePresent(result)
if err != nil {
log.WithFields(log.Fields{
"eventModule": mts.Query.EventModule,
"index": mts.Query.IndexName,
"serviceVersion": mts.Query.ServiceVersion,
}).Error(err.Error())
return err
}

return nil
}

func (mts *MetricbeatTestSuite) thereAreNoErrorsInTheIndex() error {
Expand All @@ -426,7 +434,10 @@ func (mts *MetricbeatTestSuite) thereAreNoErrorsInTheIndex() error {
},
}

result, err := e2e.RetrySearch(mts.getIndexName(), esQuery, queryMaxAttempts, queryRetryTimeout)
minimumHitsCount := 5
maxTimeout := time.Duration(queryRetryTimeout) * time.Minute

result, err := e2e.WaitForNumberOfHits(mts.getIndexName(), esQuery, minimumHitsCount, maxTimeout)
if err != nil {
return err
}
Expand Down
23 changes: 17 additions & 6 deletions e2e/assertions.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,19 @@ import (
)

// AssertHitsArePresent returns an error if no hits are present
func AssertHitsArePresent(hits map[string]interface{}, q ElasticsearchQuery) error {
hitsCount := len(hits["hits"].(map[string]interface{})["hits"].([]interface{}))
if hitsCount == 0 {
return fmt.Errorf(
"There aren't documents for %s-%s on Metricbeat index %s",
q.EventModule, q.ServiceVersion, q.IndexName)
func AssertHitsArePresent(hits map[string]interface{}) error {
if getHitsCount(hits) == 0 {
return fmt.Errorf("There aren't documents in the index")
}

return nil
}

// AssertHitsAreNotPresent returns an error if hits are present
func AssertHitsAreNotPresent(hits map[string]interface{}) error {
count := getHitsCount(hits)
if count != 0 {
return fmt.Errorf("There are %d documents in the index", count)
}

return nil
Expand All @@ -39,3 +46,7 @@ func AssertHitsDoNotContainErrors(hits map[string]interface{}, q ElasticsearchQu

return nil
}

func getHitsCount(hits map[string]interface{}) int {
return len(hits["hits"].(map[string]interface{})["hits"].([]interface{}))
}
Loading