diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile index ff28e607f8..c0f6dd7ab6 100644 --- a/.ci/Jenkinsfile +++ b/.ci/Jenkinsfile @@ -38,6 +38,7 @@ pipeline { } parameters { booleanParam(name: 'Run_As_Main_Branch', defaultValue: false, description: 'Allow to run any steps on a PR, some steps normally only run on main branch.') + booleanParam(name: "DEVELOPER_MODE", defaultValue: false, description: "If checked, cloud resources won't be destroyed at the end of the pipeline. Default false") booleanParam(name: "SKIP_SCENARIOS", defaultValue: true, description: "If it's needed to skip those scenarios marked as @skip. Default true") booleanParam(name: "NIGHTLY_SCENARIOS", defaultValue: false, description: "If it's needed to include the scenarios marked as @nightly in the test execution. Default false") string(name: 'runTestsSuites', defaultValue: '', description: 'A comma-separated list of test suites to run (default: empty to run all test suites)') @@ -93,6 +94,7 @@ pipeline { githubCheckNotify('PENDING') // we want to notify the upstream about the e2e the soonest stash allowEmpty: true, name: 'source', useDefaultExcludes: false setEnvVar("GO_VERSION", readFile("${env.WORKSPACE}/${env.BASE_DIR}/.go-version").trim()) + setEnvVar("LABELS_STRING", "buildURL=${env.BUILD_URL} gitSha=${env.GIT_BASE_COMMIT}") checkSkipTests() } } @@ -204,7 +206,7 @@ pipeline { ansible(stackWorkspace, env.RUN_ID.split('-')[0], - "-t provision-stack --extra-vars=\"nodeLabel=stack nodeImage=${stackMachine.image} nodeInstanceType=${stackMachine.instance_type}\"") + "-t provision-stack --extra-vars=\"${LABELS_STRING} nodeLabel=stack nodeImage=${stackMachine.image} nodeInstanceType=${stackMachine.instance_type}\"") // Must be gathered after deployment as the public IP is known at that time def stackRunner = getNodeIp(stackWorkspace, 'stack') @@ -213,7 +215,7 @@ pipeline { ansible( stackWorkspace, env.RUN_ID.split('-')[0], - "-i \"${stackRunner.ip},\" -t setup-stack --extra-vars=\"nodeLabel=stack nodeImage=${stackMachine.image} nodeInstanceType=${stackMachine.instance_type}\"" + "-i \"${stackRunner.ip},\" -t setup-stack --extra-vars=\"${LABELS_STRING} nodeLabel=stack nodeImage=${stackMachine.image} nodeInstanceType=${stackMachine.instance_type}\"" ) } @@ -268,9 +270,14 @@ pipeline { script { def stackWorkspace = "${env.WORKSPACE}/${env.BASE_DIR}" def stackMachine = getMachineInfo(stackWorkspace, 'stack') - ansible(stackWorkspace, + if (params.DEVELOPER_MODE) { + def stackRunner = getNodeIp(stackWorkspace, 'stack') + log(level: 'DEBUG', text: "Stack instance won't be destroyed after the build. Please SSH into the stack machine on ${stackRunner.ip}") + } else { + ansible(stackWorkspace, env.RUN_ID.split('-')[0], "-t destroy --extra-vars=\"nodeLabel=stack nodeImage=${stackMachine.image} nodeInstanceType=${stackMachine.instance_type}\"") + } } } } @@ -505,14 +512,14 @@ def generateFunctionalTestStep(Map args = [:]){ // Start node, capture ip address ansible("${env.WORKSPACE}", runId, - "-t start-node --extra-vars=\"stackRunner=${stackRunner.ip} nodeLabel=${platform} nodeImage=${machine.image} nodeInstanceType=${machine.instance_type}\"") + "-t start-node --extra-vars=\"${LABELS_STRING} stackRunner=${stackRunner.ip} nodeLabel=${platform} nodeImage=${machine.image} nodeInstanceType=${machine.instance_type}\"") def testRunner = getNodeIp("${env.WORKSPACE}", platform) // Configure node for testing ansible("${env.WORKSPACE}", runId, - "-i \"${testRunner.ip},\" -t setup-node --extra-vars=\"stackRunner=${stackRunner.ip} nodeLabel=${platform} nodeImage=${machine.image} nodeInstanceType=${machine.instance_type}\"") + "-i \"${testRunner.ip},\" -t setup-node --extra-vars=\"${LABELS_STRING} stackRunner=${stackRunner.ip} nodeLabel=${platform} nodeImage=${machine.image} nodeInstanceType=${machine.instance_type}\"") sshexec("${env.WORKSPACE}", testRunner, @@ -530,9 +537,13 @@ def generateFunctionalTestStep(Map args = [:]){ "e2e-testing/outputs/TEST-*${runId}*.xml", "outputs/${testRunner.ip}/.") sh "ls -l outputs/${testRunner.ip}" - ansible("${env.WORKSPACE}", + if (params.DEVELOPER_MODE) { + log(level: 'DEBUG', text: "Cloud instance won't be destroyed after the build. Please SSH into the test runner machine on ${testRunner.ip}. ") + } else { + ansible("${env.WORKSPACE}", runId, "-t destroy --extra-vars=\"nodeLabel=${platform} nodeImage=${machine.image} nodeInstanceType=${machine.instance_type}\"") + } junit allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/${testRunner.ip}/TEST-*${runId}*.xml" diff --git a/.ci/ansible/github-ssh-keys b/.ci/ansible/github-ssh-keys new file mode 100644 index 0000000000..230416513f --- /dev/null +++ b/.ci/ansible/github-ssh-keys @@ -0,0 +1,2 @@ +adam-stokes +mdelapenya diff --git a/.ci/ansible/playbook.yml b/.ci/ansible/playbook.yml index 4e1412e634..236b659863 100644 --- a/.ci/ansible/playbook.yml +++ b/.ci/ansible/playbook.yml @@ -63,6 +63,11 @@ tags: - setup-stack + - name: Add SSH keys to stack + include_tasks: tasks/install_ssh_keys.yml + tags: + - setup-stack + - name: Start stack shell: | sed -i '' -e 's,http://elasticsearch,http://{{inventory_hostname}},g' /home/{{ansible_user}}/e2e-testing/cli/config/compose/profiles/fleet/default/kibana.config.yml @@ -109,6 +114,11 @@ - setup-node - copy-source + - name: Add SSH keys to runner instances + include_tasks: tasks/install_ssh_keys.yml + tags: + - setup-node + - name: Configure test script include_tasks: tasks/setup_test_script.yml tags: diff --git a/.ci/ansible/tasks/install_deps.yml b/.ci/ansible/tasks/install_deps.yml index ce07254fc5..d2a0800d82 100644 --- a/.ci/ansible/tasks/install_deps.yml +++ b/.ci/ansible/tasks/install_deps.yml @@ -9,8 +9,49 @@ - rsync - wget - build-essential + - python3-pip state: latest use: apt register: package_install_res retries: 5 until: package_install_res is success + when: ansible_pkg_mgr == 'apt' or ansible_distribution in ["Debian", "Ubuntu"] + +- name: Install dependencies (SUSE) + zypper: + name: + - autoconf + - bison + - flex + - gcc + - gcc-c++ + - kernel-default-devel + - make + - m4 + - python3-pip + - rsync + - wget + state: present + when: ansible_pkg_mgr == 'zypper' or ansible_os_family == "Suse" + +- name: Install dependencies (CentOS) + ansible.builtin.package: + name: + - autoconf + - bison + - flex + - gcc + - gcc-c++ + - kernel-devel + - make + - m4 + - patch + - python3-pip + - rsync + - wget + state: latest + when: ansible_distribution in ["Fedora", "RedHat", "CentOS"] + +- name: Install ssh-import-id python package to copy public SSH keys from Github accounts + pip: + name: ssh-import-id diff --git a/.ci/ansible/tasks/install_ssh_keys.yml b/.ci/ansible/tasks/install_ssh_keys.yml new file mode 100644 index 0000000000..788ff45ade --- /dev/null +++ b/.ci/ansible/tasks/install_ssh_keys.yml @@ -0,0 +1,5 @@ +--- +- name: Install SSH keys + become: false + shell: | + /home/{{ansible_user}}/e2e-testing/.ci/scripts/import-ssh-keys.sh diff --git a/.ci/ansible/tasks/runners.yml b/.ci/ansible/tasks/runners.yml index 366bdcd65a..66d5f796e1 100644 --- a/.ci/ansible/tasks/runners.yml +++ b/.ci/ansible/tasks/runners.yml @@ -18,7 +18,11 @@ image: '{{nodeImage}}' instance_type: '{{nodeInstanceType}}' instance_tags: + BuildURL: "{{buildURL | default('Not running on CI') }}" + GitSHA: "{{ gitSha }}" + Kind: "{{nodeLabel}}" Name: "e2e-{{nodeLabel}}-{{runId}}" + ReaperMark: "e2e-testing-vm" count_tag: Name: "e2e-{{nodeLabel}}-{{runId}}" volumes: diff --git a/.ci/aws-instances-reaper.groovy b/.ci/aws-instances-reaper.groovy new file mode 100644 index 0000000000..1857946db9 --- /dev/null +++ b/.ci/aws-instances-reaper.groovy @@ -0,0 +1,43 @@ +#!/usr/bin/env groovy + +@Library('apm@current') _ + +pipeline { + agent { label 'ubuntu-20' } + environment { + HOME = "${env.WORKSPACE}" + NOTIFY_TO = credentials('notify-to') + PIPELINE_LOG_LEVEL = 'INFO' + JOB_GIT_CREDENTIALS = "f6c7695a-671e-4f4f-a331-acdce44ff9ba" + AWS_PROVISIONER_SECRET = 'secret/observability-team/ci/elastic-observability-aws-account-auth' + AWS_EC2_INSTANCES_TAG_NAME= 'ReaperMark' + AWS_EC2_INSTANCES_TAG_VALUE= 'e2e-testing-vm' + } + options { + timeout(time: 1, unit: 'HOURS') + buildDiscarder(logRotator(numToKeepStr: '20', artifactNumToKeepStr: '20')) + timestamps() + ansiColor('xterm') + disableResume() + durabilityHint('PERFORMANCE_OPTIMIZED') + rateLimitBuilds(throttle: [count: 60, durationName: 'hour', userBoost: true]) + quietPeriod(10) + } + triggers { + cron '0 0 * * 0' + } + stages { + stage('Reap AWS instances'){ + steps { + withAWSEnv(secret: "${env.AWS_PROVISIONER_SECRET}", forceInstallation: true) { + sh("aws ec2 terminate-instances --instance-ids `aws ec2 describe-instances --filters Name=tag:${env.AWS_EC2_INSTANCES_TAG_NAME},Values=${env.AWS_EC2_INSTANCES_TAG_VALUE} --query Reservations[].Instances[].InstanceId --output text`") + } + } + } + } + post { + cleanup { + notifyBuildResult() + } + } +} diff --git a/.ci/jobs/aws-instances-reaper.yml b/.ci/jobs/aws-instances-reaper.yml new file mode 100644 index 0000000000..810e784323 --- /dev/null +++ b/.ci/jobs/aws-instances-reaper.yml @@ -0,0 +1,22 @@ +--- +- job: + name: e2e-tests/aws-instances-reaper + display-name: AWS Instances Reaper + description: Job to remove cloud resources on Sundays + view: Beats + project-type: pipeline + pipeline-scm: + script-path: .ci/aws-instances-reaper.groovy + scm: + - git: + url: git@github.com:elastic/e2e-testing.git + refspec: +refs/heads/*:refs/remotes/origin/* + wipe-workspace: true + name: origin + shallow-clone: true + credentials-id: f6c7695a-671e-4f4f-a331-acdce44ff9ba + reference-repo: /var/lib/jenkins/.git-references/e2e-testing.git + branches: + - main + triggers: + - timed: '0 0 * * 0' diff --git a/.ci/scripts/import-ssh-keys.sh b/.ci/scripts/import-ssh-keys.sh new file mode 100755 index 0000000000..e3fb09b2d8 --- /dev/null +++ b/.ci/scripts/import-ssh-keys.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +## Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +## or more contributor license agreements. Licensed under the Elastic License; +## you may not use this file except in compliance with the Elastic License. + +set -euxo pipefail + +# +# Imports public SSH keys from Github profiles +# + +BASEDIR=$(dirname "$0") + +input="${BASEDIR}/../ansible/github-ssh-keys" +while IFS= read -r line +do + ssh-import-id "gh:$line" +done < "$input" diff --git a/e2e/TROUBLESHOOTING.md b/e2e/TROUBLESHOOTING.md index 5fca6d7201..ec2b2d5b96 100644 --- a/e2e/TROUBLESHOOTING.md +++ b/e2e/TROUBLESHOOTING.md @@ -5,6 +5,11 @@ The first step in determining the exact failure is to try and reproduce the test Each test suite's documentation should contain the specifics to run the tests, but it's summarises to executing `go test` or `godog` in the right directory. +### SSH into the Cloud machines +On CI, we are running the Elastic Stack and all test suites in AWS instances, so whenever a build failed we would need to access those machines and inspect the state of the machine: logs, files, containers... For that, we are enabling SSH access to those ephemeral machines, which will be kept for debugging purpose if and only if the DEVELOPER_MODE environment variable is set at the Jenkinsfile. In the UI of Jenkins, you can enable it using the DEVELOPMENT_MODE input argument, checking it to true (default is false). After the build finishes, the cloud instances won't be destroyed. + +To access the machines, you must be allowed to do so first, and for that, please submit a PR adding your Github username in alphabetical order to [this file](../.ci/ansible/github-ssh-keys), keeping a blank line as file ending. + ### Tests fail because the product could not be configured or run correctly This type of failure usually indicates that code for these tests itself needs to be changed. See the sections on how to run the tests locally in the specific test suite.