diff --git a/.ci/.e2e-platforms.yaml b/.ci/.e2e-platforms.yaml index 654f7ed6c3..9754643458 100644 --- a/.ci/.e2e-platforms.yaml +++ b/.ci/.e2e-platforms.yaml @@ -66,3 +66,62 @@ PLATFORMS: instance_type: "t3.xlarge" shell_type: sh username: ubuntu + + # Machines with preinstalled dependencies + # Argument "image" doesn't fully specify the AMI name. + # The result image name is concatenated with branch name and corresponds to pattern: + # [image]-[branch] + # for example: + # - ubuntu-2204-e2e-runner-main + # - ubuntu-2204-e2e-runner-8.3 + # - ubuntu-2204-e2e-runner-PR-3698 (a temporary AMI is being built and tested when we change ansible packer dirs) + ubuntu_22_04_amd64_preinstalled: + description: "Ubuntu 22.04 AMD64 Preinstalled" + image: "ubuntu-2204-e2e-runner" + instance_type: "t3.xlarge" + shell_type: sh + username: ubuntu + dependencies_installed: true + debian_10_arm64_preinstalled: + description: "Debian 10 ARM64 Preinstalled" + image: "debian-10-arm64-runner" + instance_type: "a1.large" + shell_type: sh + username: admin + dependencies_installed: true + debian_10_amd64_preinstalled: + description: "Debian 10 AMD64 Preinstalled" + image: "debian-10-amd64-runner" + instance_type: "t3.xlarge" + shell_type: sh + username: admin + dependencies_installed: true + debian_11_amd64_preinstalled: + description: "Debian 11 AMD64 Preinstalled" + image: "debian-11-amd64-runner" + instance_type: "t3.xlarge" + shell_type: sh + username: admin + dependencies_installed: true + centos8_arm64_preinstalled: + description: "CentOS Stream 8 aarch64 preinstalled" + image: "centos-8-arm64-runner" + instance_type: "a1.large" + shell_type: sh + username: centos + dependencies_installed: true + centos8_amd64_preinstalled: + description: "CentOS Stream 8 x86_64" + image: "centos-8-amd64-runner" + instance_type: "t3.xlarge" + shell_type: sh + username: centos + dependencies_installed: true + windows2019_preinstalled: + description: "Windows 2019 x86_64 (HVM) preinstalled" + image: "windows-2019-runner" + instance_type: "c5.2xlarge" + shell_type: cmd + username: ogc + dependencies_installed: true + diff --git a/.ci/.e2e-tests-tmp.yaml b/.ci/.e2e-tests-tmp.yaml new file mode 100644 index 0000000000..5f71503292 --- /dev/null +++ b/.ci/.e2e-tests-tmp.yaml @@ -0,0 +1,19 @@ +--- +SUITES: +# - suite: "helm" +# provider: "docker" +# scenarios: +# - name: "APM Server" +# tags: "apm-server" +# platforms: [ "debian_10_amd64_preinstalled" ] +# - name: "Filebeat" +# tags: "filebeat" +# platforms: [ "debian_10_amd64_preinstalled" ] +# - name: "Metricbeat" +# tags: "metricbeat" +# platforms: [ "debian_10_amd64_preinstalled" ] + - suite: "fleet" + scenarios: + - name: "Fleet" + tags: "fleet_mode" + platforms: ["centos8_amd64_preinstalled"] diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile index 120b109ec4..d8e3db8f63 100644 --- a/.ci/Jenkinsfile +++ b/.ci/Jenkinsfile @@ -1,13 +1,9 @@ #!/usr/bin/env groovy +import groovy.transform.Field @Library('apm@current') _ -import groovy.transform.Field - -/** -Store the worker status so if the CI worker behaves wrongy then let's rerun the stage again -*/ -@Field def workersStatus = [:] +@Field def e2eUtils pipeline { agent { label 'ubuntu-20.04 && immutable && docker' } @@ -27,9 +23,12 @@ pipeline { ELASTIC_CLOUD_SECRET = 'secret/observability-team/ci/elastic-cloud/observability-team-user' GCP_PROVISIONER_SECRET = 'secret/observability-team/ci/service-account/jenkins-gce-provisioner' AWS_PROVISIONER_SECRET = 'secret/observability-team/ci/elastic-observability-aws-account-auth' - TEST_MATRIX_FILE = "${params.testMatrixFile}" E2E_SSH_KEY = "${env.REAL_BASE_DIR}/e2essh" E2E_SSH_KEY_PUB = "${env.E2E_SSH_KEY}.pub" + TEST_MATRIX_FILE = "${params.testMatrixFile}" + RUN_AS_MAIN_BRANCH = "${params.Run_As_Main_Branch}" + DESTROY_CLOUD_RESOURCES = "${params.DESTROY_CLOUD_RESOURCES}" + RUN_TEST_SUITES = "${params.runTestsSuites}" } options { timeout(time: 120, unit: 'MINUTES') @@ -50,7 +49,7 @@ pipeline { booleanParam(name: "SKIP_SCENARIOS", defaultValue: true, description: "If it's needed to skip those scenarios marked as @skip. Default true") booleanParam(name: "NIGHTLY_SCENARIOS", defaultValue: false, description: "Deprecated. Not used in this pipeline any more. Please refer to the 'testMatrixFile' param, which defines what scenarios to run") string(name: 'runTestsSuites', defaultValue: '', description: 'A comma-separated list of test suites to run (default: empty to run all test suites)') - string(name: 'testMatrixFile', defaultValue: '.ci/.e2e-tests.yaml', description: 'The file with the test suite and scenarios to be tested.') + string(name: 'testMatrixFile', defaultValue: '.ci/.e2e-tests-tmp.yaml', description: 'The file with the test suite and scenarios to be tested.') booleanParam(name: "forceSkipGitChecks", defaultValue: false, description: "If it's needed to check for Git changes to filter by modified sources") booleanParam(name: "notifyOnGreenBuilds", defaultValue: false, description: "If it's needed to notify to Slack with green builds.") string(name: 'SLACK_CHANNEL', defaultValue: 'observablt-bots', description: 'The Slack channel(s) where errors will be posted. For multiple channels, use a comma-separated list of channels') @@ -70,7 +69,7 @@ pipeline { string(name: 'GITHUB_CHECK_SHA1', defaultValue: '', description: 'Git SHA for the upstream project (branch or PR)') } stages { - stage('Initializing'){ + stage('Initializing') { options { skipDefaultCheckout() } environment { HOME = "${env.WORKSPACE}" @@ -90,6 +89,7 @@ pipeline { LOG_LEVEL = "${params.LOG_LEVEL.trim()}" TIMEOUT_FACTOR = "${params.TIMEOUT_FACTOR.trim()}" GITHUB_CHECK_REPO = "${params.GITHUB_CHECK_REPO.trim()}" + GITHUB_CHECK_NAME = "${params.GITHUB_CHECK_NAME.trim()}" GITHUB_CHECK_SHA1 = "${params.GITHUB_CHECK_SHA1.trim()}" } stages { @@ -97,12 +97,21 @@ pipeline { steps { pipelineManager([ cancelPreviousRunningBuilds: [ when: 'PR' ] ]) deleteDir() - gitCheckout(basedir: BASE_DIR, githubNotifyFirstTimeContributor: true) - githubCheckNotify('PENDING') // we want to notify the upstream about the e2e the soonest + gitCheckout(basedir: BASE_DIR, githubNotifyFirstTimeContributor: true) stash allowEmpty: true, name: 'source', useDefaultExcludes: false setEnvVar("GO_VERSION", readFile("${env.REAL_BASE_DIR}/.go-version").trim()) setEnvVar("LABELS_STRING", "buildURL=${env.BUILD_URL} gitSha=${env.GIT_BASE_COMMIT} build=${env.BUILD_ID} repo=${env.REPO} branch=${env.BRANCH_NAME.toLowerCase().replaceAll('[^a-z0-9-]', '-')} type=ci") - checkSkipTests() + script { + e2eUtils = load "${env.REAL_BASE_DIR}/.ci/e2eUtils.groovy" + e2eUtils.checkSkipTests() + e2eUtils.checkRebuildAmis() + e2eUtils.githubCheckNotify( + status: 'PENDING', + githubCheckRepo: env.GITHUB_CHECK_REPO, + githubCheckSha1: env.GITHUB_CHECK_SHA1, + githubCheckName: env.GITHUB_GITHUB_CHECK_NAME + ) + } } } stage('Build Docs') { @@ -128,12 +137,36 @@ pipeline { } } } + + stage('Build runner AMIs') { + options { skipDefaultCheckout() } + when { + beforeAgent true + anyOf { + // expression { return env.REBUILD_AMIS == "true" } + expression { return false } + } + } + steps { + withGithubNotify(context: 'Build runner AMIs', tab: 'tests') { + deleteDir() + unstash 'source' + script { + e2eUtils.buildPackerAMIs(amiSuffix: env.CHANGE_ID) + } + } + } + } + stage('Deploy Test Infra') { failFast true options { skipDefaultCheckout() } environment { GO111MODULE = 'on' PATH = "${env.HOME}/bin:${env.REAL_BASE_DIR}/bin:${HOME}/go/bin:${env.PATH}" + SUITE = 'fleet' + STACK_INSTANCE_ID = "${env.BUILD_URL}_stack" + TAGS = "non-existing-tag" } when { beforeAgent true @@ -146,8 +179,7 @@ pipeline { withGithubNotify(context: 'Deploy Stack', tab: 'tests') { deleteDir() unstash 'source' - script { - // Deploy the test infrastructure + script { sh "ssh-keygen -b 4096 -t rsa -f ${E2E_SSH_KEY} -q -N \"\" " dir("${env.REAL_BASE_DIR}") { @@ -156,7 +188,7 @@ pipeline { "STACK_INSTANCE_ID=${env.BUILD_URL}_stack", "TAGS=non-existing-tag" ]) { - ciBuild() { + e2eUtils.ciBuild() { retryWithSleep(retries: 3, seconds: 5, backoff: true){ sh(label: 'Setup Stack node', script: "make -C .ci create-stack") } @@ -186,25 +218,20 @@ pipeline { } } steps { + deleteDir() + unstash 'source' withGithubNotify(context: 'E2E Tests', tab: 'tests') { - script { - def suitesParam = params.runTestsSuites - def existingSuites = readYaml(file: "${env.REAL_BASE_DIR}/${TEST_MATRIX_FILE}") - def parallelTasks = [:] - if (suitesParam == "") { - log(level: 'DEBUG', text: "Iterate through existing test suites") - existingSuites['SUITES'].each { item -> - checkTestSuite(parallelTasks, item) - } - } else { - log(level: 'DEBUG', text: "Iterate through the comma-separated test suites (${suitesParam}), comparing with the existing test suites") - suitesParam.split(',').each { suiteParam -> - existingSuites['SUITES'].findAll { suiteParam.trim() == it.suite }.each { item -> - checkTestSuite(parallelTasks, item) - } - } - } - parallel(parallelTasks) + script { + def testMatrix = readYaml(file: "${env.REAL_BASE_DIR}/${TEST_MATRIX_FILE}") + e2eUtils.runE2ETests( + amiSuffix: env.CHANGE_ID, + testMatrix: testMatrix, + selectedSuites: env.RUN_TEST_SUITES, + runAsMainBranch: env.RUN_AS_MAIN_BRANCH, + githubCheckSha1: env.GITHUB_CHECK_SHA1, + githubCheckRepo: env.GITHUB_CHECK_REP, + destroyTestRunner: env.DESTROY_CLOUD_RESOURCES + ) } } } @@ -212,31 +239,7 @@ pipeline { cleanup { // Once all tests are complete we need to teardown the single instance with the deployed stack script { - dir("${env.REAL_BASE_DIR}") { - ciBuild() { - def stackIP = getNodeIp('stack') - sh(label: 'Grab logs', script:"make -C .ci fetch-test-reports NODE_IP_ADDRESS=${stackIP} NODE_LABEL=debian_10_amd64") - archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") - junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") - } - } - def stackMachine = getMachineInfo('stack') - if (!params.DESTROY_CLOUD_RESOURCES) { - def stackRunnerIP = getNodeIp('stack') - log(level: 'DEBUG', text: "Stack instance won't be destroyed after the build. Please SSH into the stack machine on ${stackRunnerIP}") - } else { - dir("${env.REAL_BASE_DIR}") { - withEnv([ - "STACK_INSTANCE_ID=${env.BUILD_URL}_stack", - ]) { - ciBuild() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Destroy stack node', script: "make -C .ci destroy-stack") - } - } - } - } - } + e2eUtils.destroyStack(destroyCloudResources: env.DESTROY_CLOUD_RESOURCES) } } } @@ -261,345 +264,28 @@ pipeline { } } } - } - } - post { - cleanup { - doNotifyBuildResult(params.notifyOnGreenBuilds) - } - } -} - -// this function evaluates if the test stage of the build must be executed -def checkSkipTests() { - dir("${BASE_DIR}"){ - - // only docs means no tests are run - if (isGitRegionMatch(patterns: [ '.*\\.md' ], shouldMatchAll: true)) { - setEnvVar("SKIP_TESTS", true) - return - } - - // patterns for all places that should trigger a full build - def regexps = [ "^e2e/_suites/fleet/.*", "^e2e/_suites/kubernetes-autodiscover/.*", "^.ci/.*", "^cli/.*", "^e2e/.*\\.go", "^internal/.*\\.go" ] - setEnvVar("SKIP_TESTS", !isGitRegionMatch(patterns: regexps, shouldMatchAll: false)) - } -} - -/* - * Runs the Make build at the CI, executing the closure in the context of Ansible + AWS - */ -def ciBuild(Closure body){ - withEnv([ - "SSH_KEY=${E2E_SSH_KEY}" - ]) { - def awsProps = getVaultSecret(secret: "${AWS_PROVISIONER_SECRET}") - def awsAuthObj = awsProps?.data - withEnv([ - "ANSIBLE_CONFIG=${env.REAL_BASE_DIR}/.ci/ansible/ansible.cfg", - "ANSIBLE_HOST_KEY_CHECKING=False", - ]){ - withVaultToken(){ - withEnvMask(vars: [ - [var: "AWS_ACCESS_KEY_ID", password: awsAuthObj.access_key], - [var: "AWS_SECRET_ACCESS_KEY", password: awsAuthObj.secret_key] - ]) { - withOtelEnv() { - body() - } - } - } - } - } -} - -def getNodeIp(nodeType){ - return sh(label: "Get IP address of the ${nodeType}", script: "cat ${REAL_BASE_DIR}/.ci/.${nodeType}-host-ip", returnStdout: true) -} - -def getRemoteE2EPath(testRunner, platform) { - if (platform.contains("windows")) { - return "C:\\Users\\${testRunner.user}\\e2e-testing\\" - } - - return "/home/${testRunner.user}/e2e-testing/" -} - -def getMachineInfo(platform){ - def machineYaml = readYaml(file: "${env.REAL_BASE_DIR}/.ci/.e2e-platforms.yaml") - def machines = machineYaml['PLATFORMS'] - log(level: 'INFO', text: "getMachineInfo: machines.get(platform)=${machines.get(platform)}") - return machines.get(platform) -} - -def checkTestSuite(Map parallelTasks = [:], Map item = [:]) { - def suite = item.suite - def platforms = item.platforms - - // Predefine the remote provider to use the already provisioned stack VM. - // Each suite or scenario in the CI test suite would be able to define its own provider - // (i.e. docker). If empty, remote will be used as fallback - def suiteProvider = item.provider - if (!suiteProvider || suiteProvider?.trim() == '') { - suiteProvider = 'remote' - } - - item.scenarios.each { scenario -> - def name = scenario.name - def platformsValue = platforms - - def scenarioProvider = scenario.provider - // if the scenario does not set its own provider, use suite's provider - if (!scenarioProvider || scenarioProvider?.trim() == '') { - scenarioProvider = suiteProvider - } - - def scenarioPlatforms = scenario.platforms - if (scenarioPlatforms?.size() > 0) { - // scenario platforms take precedence over suite platforms, overriding them - platformsValue = scenarioPlatforms - } - def pullRequestFilter = scenario.containsKey('pullRequestFilter') ? scenario.pullRequestFilter : '' - def tags = scenario.tags - platformsValue.each { rawPlatform -> - // platform is not space based, so let's ensure no extra spaces can cause misbehaviours. - def platform = rawPlatform.trim() - log(level: 'INFO', text: "Adding ${suite}:${platform}:${tags} test suite to the build execution") - def machineInfo = getMachineInfo(platform) - def stageName = "${suite}_${platform}_${tags}" - parallelTasks["${stageName}"] = generateFunctionalTestStep(name: "${name}", - platform: platform, - provider: scenarioProvider, - suite: "${suite}", - tags: "${tags}", - pullRequestFilter: "${pullRequestFilter}", - machine: machineInfo, - stageName: stageName) - } - } -} - -/* - * Sends out notification of the build result to Slack - */ -def doNotifyBuildResult(boolean slackNotify) { - def doSlackNotify = true // always try to notify on failures - def githubCheckStatus = 'FAILURE' - if (currentBuild.currentResult == 'SUCCESS') { - githubCheckStatus = 'SUCCESS' - doSlackNotify = slackNotify // if the build status is success, read the parameter - } - - githubCheckNotify(githubCheckStatus) - - - def testsSuites = "${params.runTestsSuites}" - if (testsSuites?.trim() == "") { - testsSuites = "All suites" - } - - def channels = "${env.SLACK_CHANNEL}" - if (channels?.trim() == "") { - channels = "observablt-bots" - } - - def header = "*Test Suite*: " + testsSuites - notifyBuildResult(analyzeFlakey: true, - jobName: getFlakyJobName(withBranch: "${env.JOB_BASE_NAME}"), - prComment: true, - slackHeader: header, - slackChannel: "${channels}", - slackComment: true, - slackNotify: doSlackNotify) -} - -/** - Notify the GitHub check of the parent stream - **/ -def githubCheckNotify(String status) { - if (params.GITHUB_CHECK_NAME?.trim() && params.GITHUB_CHECK_REPO?.trim() && params.GITHUB_CHECK_SHA1?.trim()) { - githubNotify context: "${params.GITHUB_CHECK_NAME}", - description: "${params.GITHUB_CHECK_NAME} ${status.toLowerCase()}", - status: "${status}", - targetUrl: "${env.RUN_DISPLAY_URL}", - sha: params.GITHUB_CHECK_SHA1, account: 'elastic', repo: params.GITHUB_CHECK_REPO, credentialsId: env.JOB_GIT_CREDENTIALS - } -} - -def generateFunctionalTestStep(Map args = [:]){ - def name = args.get('name') - def name_normalize = name.replace(' ', '_') - def platform = args.get('platform') - def provider = args.get('provider') - def suite = args.get('suite') - def tags = args.get('tags') - def pullRequestFilter = args.get('pullRequestFilter')?.trim() ? args.get('pullRequestFilter') : '' - def machine = args.get('machine') - def stageName = args.get('stageName') - - // TODO: Is this still relevant? - if (isPR() || isUpstreamTrigger(filter: 'PR-')) { - // when the "Run_As_Main_Branch" param is disabled, we will honour the PR filters, which - // basically exclude some less frequent platforms or operative systems. If the user enabled - // this param, the pipeline will remove the filters from the test runner. - if (!params.Run_As_Main_Branch) { - tags += pullRequestFilter - } - } - - def goArch = "amd64" - if (platform.contains("arm64")) { - goArch = "arm64" - } - - // sanitize tags to create the file - def sanitisedTags = tags.replaceAll("\\s","_") - sanitisedTags = sanitisedTags.replaceAll("~","") - sanitisedTags = sanitisedTags.replaceAll("@","") - - def githubCheckSha1 = params.GITHUB_CHECK_SHA1?.trim() ? params.GITHUB_CHECK_SHA1 : '' - def githubCheckRepo = params.GITHUB_CHECK_REPO?.trim() ? params.GITHUB_CHECK_REPO : '' - - // Setup environment for platform - def envContext = [] - envContext.add("PROVIDER=${provider}") - envContext.add("GITHUB_CHECK_SHA1=${githubCheckSha1}") - envContext.add("GITHUB_CHECK_REPO=${githubCheckRepo}") - envContext.add("SUITE=${suite}") - envContext.add("TAGS=${tags}") - envContext.add("REPORT_PREFIX=${suite}_${platform}_${sanitisedTags}") - envContext.add("ELASTIC_APM_GLOBAL_LABELS=branch_name=${BRANCH_NAME},build_pr=${isPR()},build_id=${env.BUILD_ID},go_arch=${goArch},beat_version=${env.BEAT_VERSION},elastic_agent_version=${env.ELASTIC_AGENT_VERSION},stack_version=${env.STACK_VERSION}") - // VM characteristics - envContext.add("NODE_LABEL=${platform}") - envContext.add("NODE_IMAGE=${machine.image}") - envContext.add("NODE_INSTANCE_ID=${env.BUILD_URL}_${platform}_${suite}_${tags}") - envContext.add("NODE_INSTANCE_TYPE=${machine.instance_type}") - envContext.add("NODE_SHELL_TYPE=${machine.shell_type}") - envContext.add("NODE_USER=${machine.username}") - - return { - // Set the worker as flaky for the time being, this will be changed in the finally closure. - setFlakyWorker(stageName) - retryWithNode(labels: 'ubuntu-20.04 && gobld/machineType:e2-small', forceWorkspace: true, forceWorker: true, stageName: stageName){ - try { - deleteDir() - dir("${env.REAL_BASE_DIR}") { - unstash 'sourceEnvModified' - withEnv(envContext) { - // This step will help to send the APM traces to the - // withOtelEnv is the one that uses the APM service defined by the Otel Jenkins plugin. - // withAPMEnv uses Vault to prepare the context. - // IMPORTANT: withAPMEnv is now the one in used since withOtelEnv uses a specific Opentelemetry Collector at the moment. - // TODO: This will need to be integrated into the provisioned VMs - withAPMEnv() { - // we are separating the different test phases to avoid recreating - ciBuild() { - sh(label: 'Start node', script: "make -C .ci provision-node") - } - - // make goal to run the tests, which is platform-dependant - def runCommand = "run-tests" - - if (platform.contains("windows")) { - runCommand = "run-tests-win" - // Ansible wait_for module is not enough to mitigate the timeout - log(level: 'DEBUG', text: "Sleeping 300 seconds on Windows so that SSH is accessible in the remote instance.") - sleep(300) - } - - ciBuild() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Configure node for testing', script: "make -C .ci setup-node") - } - } - ciBuild() { - sh(label: 'Run tests in the node', script: "make -C .ci ${runCommand}") - } - } - } - } - } finally { - withEnv(envContext) { - dir("${env.REAL_BASE_DIR}") { - // If it reaches this point then the CI worker is most likely behaving correctly - // there is still a chance things might fail afterwards, but this is just the finally - // section so we could say we are good to go. - // It runs after dir so if the worker is gone the an error will be thrown regarding - // the dir cannot be accessed in the existing none worker. - unsetFlakyWorker(stageName) - def testRunnerIP = getNodeIp("node") - sh "mkdir -p outputs/${testRunnerIP} || true" - ciBuild() { - sh(label: 'Fetch tests reports from node', script: "make -C .ci fetch-test-reports") - } - sh "ls -l outputs/${testRunnerIP}" - if (!params.DESTROY_CLOUD_RESOURCES) { - log(level: 'INFO', text: "Cloud instance won't be destroyed after the build. Please SSH into the test runner machine on ${testRunnerIP}.") - } else { - log(level: 'INFO', text: "Destroying Cloud instance") - ciBuild() { - retryWithSleep(retries: 3, seconds: 5, backoff: true){ - sh(label: 'Destroy node', script: "make -C .ci destroy-node") - } - } - } - archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") - junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") + post { + always { + script { + e2eUtils.deregisterAMIs(amisRebuilt: env.REBUILD_AMIS) } } } } } -} - -def retryWithNode(Map args = [:], Closure body) { - try { - incrementRetries(args.stageName) - withNode(args){ - body() - } - } catch (err) { - log(level: 'WARN', text: "Stage '${args.stageName}' failed, let's analyse if it's a flaky CI worker.") - if (isFlakyWorker(args.stageName) && isRetryAvailable(args.stageName)) { - log(level: 'INFO', text: "Rerun '${args.stageName}' in a new worker.") - retryWithNode(args) { - body() + post { + cleanup { + script { + e2eUtils.doNotifyBuildResult( + slackNotify: params.notifyOnGreenBuilds, + slackChannel: env.SLACK_CHANNEL, + runTestsSuites: env.RUN_TEST_SUITES, + githubCheckRepo: env.GITHUB_CHECK_REPO, + githubCheckSha1: env.GITHUB_CHECK_SHA1, + githubCheckName: env.GITHUB_GITHUB_CHECK_NAME + ) } - } else { - error("Error '${err.toString()}'") } } } -def isFlakyWorker(stageName) { - if (workersStatus.containsKey(stageName)) { - return !workersStatus.get(stageName).get('status', true) - } - return false -} - -def isRetryAvailable(stageName) { - return workersStatus.get(stageName).get('retries', 2) < 2 -} - -def incrementRetries(stageName) { - if (workersStatus.containsKey(stageName)) { - def current = workersStatus[stageName].get('retries', 0) - workersStatus[stageName].retries = current + 1 - } else { - setFlakyWorker(stageName) - workersStatus[stageName].retries = 1 - } -} - -def setFlakyWorker(stageName) { - if (workersStatus.containsKey(stageName)) { - workersStatus[stageName].status = false - } else { - workersStatus[stageName] = [ status: false ] - } -} - -def unsetFlakyWorker(stageName) { - workersStatus[stageName].status = true -} diff --git a/.ci/Makefile b/.ci/Makefile index 4f2d618eff..eb26d96c74 100644 --- a/.ci/Makefile +++ b/.ci/Makefile @@ -14,7 +14,7 @@ SSH_OPTS=-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null SSH_OPTS_EXTENDED=-o TCPKeepAlive=yes -o ServerAliveInterval=30 -o ServerAliveCountMax=200 $(SSH_OPTS) # Debian 10 AMD (see .e2e-platforms.yaml) -STACK_IMAGE=ami-0d90bed76900e679a +STACK_IMAGE=debian-10-amd64-20211011-792 STACK_INSTANCE_TYPE=t3.xlarge STACK_LABEL=debian_amd64 STACK_USER=admin @@ -41,6 +41,12 @@ SUITE ?= fleet # Tags to run. Please check out the feature files TAGS ?= fleet_mode +# The suffix to differeciate temporaty and production AMIs that we use for EC2 instances to run tests on +# used in .ci/packer/aws-runners.pkr.hcl +AMI_SUFFIX ?= main + +# AWS organisation arn. Specifies organisation that packer will grant access to the AMIs +AWS_ORG_ARN ?= SHELL = /bin/bash MAKEFLAGS += --silent --no-print-directory .SHELLFLAGS = -ec @@ -122,7 +128,7 @@ setup-stack: setup-env show-env source $(VENV_DIR)/bin/activate; \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE) nodeUser=$(STACK_USER)" \ + --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE)" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$(STACK_SHELL_TYPE) workspace=$(PROJECT_DIR)/ sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t setup-stack \ @@ -137,7 +143,7 @@ destroy-stack: setup-env show-env source $(VENV_DIR)/bin/activate; \ $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/teardown.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars="$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeUser=$(STACK_IMAGE)" \ + --extra-vars="$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE)" \ --extra-vars="runId=$(RUN_ID) instanceID=$(STACK_INSTANCE_ID) nodeShellType=$(STACK_SHELL_TYPE) workspace=$(PROJECT_DIR)/ sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' # rm -fr $(PROJECT_DIR)/.ci/.stack-host-ip @@ -161,7 +167,7 @@ provision-node: setup-env set-env-$(NODE_LABEL) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ + --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE}" \ --extra-vars "runId=$(RUN_ID) instanceID=$(NODE_INSTANCE_ID) nodeShellType=$${NODE_SHELL_TYPE} workspace=$(PROJECT_DIR)/ suite=$(SUITE) sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t provision-node @@ -174,7 +180,7 @@ setup-node: setup-env set-env-$(NODE_LABEL) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/playbook.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ + --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE}" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$${NODE_SHELL_TYPE} workspace=$(PROJECT_DIR)/ suite=$(SUITE) sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='-o ConnectTimeout=180s $(SSH_OPTS)' \ -t setup-node \ @@ -189,7 +195,7 @@ destroy-node: setup-env set-env-$(NODE_LABEL) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/teardown.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars="$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeUser=$${NODE_USER}" \ + --extra-vars="$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE}" \ --extra-vars="runId=$(RUN_ID) instanceID=$(NODE_INSTANCE_ID) sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' rm -fr $(PROJECT_DIR)/.ci/.node-host-ip @@ -201,7 +207,7 @@ fetch-test-reports: setup-env set-env-$(NODE_LABEL) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/fetch-test-reports.yml \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ + --extra-vars "$(LABELS_STRING) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE}" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$${NODE_SHELL_TYPE} workspace=$(PROJECT_DIR)/ sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t fetch-reports \ @@ -229,9 +235,9 @@ start-elastic-stack: setup-env show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) source $(VENV_DIR)/bin/activate; \ PROVIDER="remote" SUITE="$(SUITE)" TAGS="non-existent-tag" \ - $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/elastic-stack.yml \ + $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/elastic-stack.yml --user $(STACK_USER) \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE) nodeUser=$(STACK_USER)" \ + --extra-vars "$(LABELS_STRING) nodeLabel=stack nodeImage=$(STACK_IMAGE) nodeInstanceType=$(STACK_INSTANCE_TYPE)" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$(STACK_SHELL_TYPE) workspace=$(PROJECT_DIR)/ sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t elastic-stack \ @@ -248,9 +254,9 @@ run-tests: setup-env set-env-$(NODE_LABEL) show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && PROVIDER="$(PROVIDER)" SUITE="$(SUITE)" TAGS="$(TAGS)" REPORT_PREFIX="$(SUITE)_$${NODE_LABEL}_$(TAGS)" \ - $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml \ + $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml --user $(NODE_USER) \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ + --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$(NODE_IMAGE) nodeInstanceType=$${NODE_INSTANCE_TYPE}" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$${NODE_SHELL_TYPE} workspace=$(PROJECT_DIR)/ suite=$(SUITE) sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t run-tests \ @@ -262,15 +268,49 @@ run-tests-win: setup-env set-env-$(NODE_LABEL) show-env @:$(call check_defined, RUN_ID, You need to have an unique RUN_ID. To create it please run 'make .runID' goal) source $(VENV_DIR)/bin/activate; \ . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && PROVIDER="$(PROVIDER)" SUITE="$(SUITE)" TAGS="$(TAGS)" REPORT_PREFIX="$(SUITE)_$${NODE_LABEL}_$(TAGS)" \ - $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml \ + $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/run-tests.yml --user $(NODE_USER) \ --private-key="$(SSH_KEY)" \ - --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE} nodeUser=$${NODE_USER}" \ + --extra-vars "$(LABELS_STRING) stackRunner=$(STACK_IP_ADDRESS) nodeLabel=$${NODE_LABEL} nodeImage=$${NODE_IMAGE} nodeInstanceType=$${NODE_INSTANCE_TYPE}" \ --extra-vars "runId=$(RUN_ID) nodeShellType=$${NODE_SHELL_TYPE} workspace=$(PROJECT_DIR)/ suite=$(SUITE) sshPublicKey=$(SSH_KEY_PUBLIC)" \ --ssh-common-args='$(SSH_OPTS)' \ -t run-tests \ -i $(NODE_IP_ADDRESS), . $(PROJECT_DIR)/.ci/.env-$(NODE_LABEL) && ssh $(SSH_OPTS_EXTENDED) -i $(SSH_KEY) $${NODE_USER}@$(NODE_IP_ADDRESS) "powershell \"C:/Users/$${NODE_USER}/e2e-testing/.ci/scripts/functional-test.ps1\"" +.PHONY: packer-init +packer-init: + packer init $(PROJECT_DIR)/.ci/packer/aws-runners.pkr.hcl + +.PHONY: validate-packer-template +validate-packer-template: + echo "galaxy path: $(ANSIBLE_GALAXY)" + echo "playbook path: $(ANSIBLE_PLAYBOOK)" + packer validate \ + -var "ami_suffix=$(AMI_SUFFIX)" \ + -var "galaxy_command=$(ANSIBLE_GALAXY)" \ + -var "playbook_command=$(ANSIBLE_PLAYBOOK)" \ + $(PROJECT_DIR)/.ci/packer/aws-runners.pkr.hcl + +.PHONY: build-amis-$(AMI_SUFFIX) +build-amis-$(AMI_SUFFIX): setup-env packer-init validate-packer-template + @:$(call check_defined, AMI_SUFFIX, You need to define a unique AMI_SUFFIX for testing. Or specify AMI_SUFFIX=main to override existing prodution AMIs) + packer build \ + -var "ami_suffix=$(AMI_SUFFIX)" \ + -var "galaxy_command=$(ANSIBLE_GALAXY)" \ + -var "playbook_command=$(ANSIBLE_PLAYBOOK)" \ + $(PROJECT_DIR)/.ci/packer/aws-runners.pkr.hcl + +.PHONY: sanitize-ami-suffix +sanitize-ami-suffix: +ifeq ($(AMI_SUFFIX),main) + echo "AMI_SUFFIX is main. We do not delete main AMIs"; exit 1 +endif + +.PHONY: deregister-amis-$(AMI_SUFFIX) +deregister-amis-$(AMI_SUFFIX): sanitize-ami-suffix + @:$(call check_defined, AMI_SUFFIX, AMI_SUFFIX must be defined for deregister) + $(ANSIBLE_PLAYBOOK) $(PROJECT_DIR)/.ci/ansible/delete-amis.yml --extra-vars "amiSuffix=$(AMI_SUFFIX)" + .PHONY: clean clean: rm -fr "$(PROJECT_DIR)/.ci/".env-* "$(PROJECT_DIR)/.ci/.node-host-ip" "$(PROJECT_DIR)/.ci/.runID" "$(PROJECT_DIR)/.ci/.stack-host-ip" "$(PROJECT_DIR)/outputs" "$(PROJECT_DIR)/None-sshhosts" "$(PROJECT_DIR)/stack-sshhosts" diff --git a/.ci/ansible/delete-amis.yml b/.ci/ansible/delete-amis.yml new file mode 100644 index 0000000000..888d5c6480 --- /dev/null +++ b/.ci/ansible/delete-amis.yml @@ -0,0 +1,19 @@ +- name: Delete temporary AMIs + hosts: localhost + gather_facts: True + tasks: + - name: find AMIs by tag + amazon.aws.ec2_ami_info: + region: "us-east-2" + filters: + "tag:Branch": '{{amiSuffix}}' + "tag:Project": 'e2e-testing' + register: amis_to_delete + + - name: Deregister AMIs and delete associated snapshots + amazon.aws.ec2_ami: + region: "us-east-2" + image_id: "{{ item.image_id }}" + delete_snapshot: True + state: absent + loop: "{{amis_to_delete.images}}" diff --git a/.ci/ansible/elastic-stack.yml b/.ci/ansible/elastic-stack.yml index 1fc74f9a16..ffa31cad52 100644 --- a/.ci/ansible/elastic-stack.yml +++ b/.ci/ansible/elastic-stack.yml @@ -21,7 +21,7 @@ vars: ansible_python_interpreter: "auto" ansible_shell_type: "{{ nodeShellType | default('sh') }}" - ansible_user: "{{ nodeUser }}" +# ansible_user: "{{ nodeUser }}" pip_package: "python3-pip" tasks: diff --git a/.ci/ansible/fetch-test-reports.yml b/.ci/ansible/fetch-test-reports.yml index 36d4e2e60e..43e4138a2f 100644 --- a/.ci/ansible/fetch-test-reports.yml +++ b/.ci/ansible/fetch-test-reports.yml @@ -21,7 +21,7 @@ vars: ansible_python_interpreter: "python3" ansible_shell_type: "{{ nodeShellType | default('sh') }}" - ansible_user: "{{ nodeUser }}" +# ansible_user: "{{ nodeUser }}" pip_package: "python3-pip" tasks: - name: Fetch test reports diff --git a/.ci/ansible/files/sshd_config b/.ci/ansible/files/sshd_config deleted file mode 100644 index ed201c75ac..0000000000 --- a/.ci/ansible/files/sshd_config +++ /dev/null @@ -1,4 +0,0 @@ -ClientAliveInterval 30 -ClientAliveCountMax 100 -ServerAliveInterval 30 -ServerAliveCountMax 100 diff --git a/.ci/ansible/github-ssh-keys b/.ci/ansible/github-ssh-keys index 382753fe77..5b0c626f85 100644 --- a/.ci/ansible/github-ssh-keys +++ b/.ci/ansible/github-ssh-keys @@ -2,6 +2,6 @@ adam-stokes AndersonQ ChrsMark juliaElastic -mdelapenya narph +pazone ph diff --git a/.ci/ansible/playbook.yml b/.ci/ansible/playbook.yml index 5aceb19dcc..0180b358b4 100644 --- a/.ci/ansible/playbook.yml +++ b/.ci/ansible/playbook.yml @@ -2,7 +2,6 @@ hosts: localhost gather_facts: yes vars: - ansible_user: "{{ nodeUser }}" tasks: - name: Setup nodes include_tasks: tasks/runners.yml @@ -29,8 +28,7 @@ become: True docker_daemon_options: default-ulimit: ["nofile=1024000:1024000"] - docker_users: - - "{{ ansible_user }}" + docker_users: ["{{ ansible_user | default(ansible_env.USER) }}"] - role: geerlingguy.pip become: True - role: andrewrothstein.kubectl @@ -40,7 +38,6 @@ vars: go_version: 1.17 vars: - ansible_user: "{{ nodeUser }}" ansible_python_interpreter: "auto" tasks: - name: Install deps @@ -77,8 +74,11 @@ tags: - setup-stack - - name: Add SSH keys to stack - include_tasks: tasks/install_ssh_keys.yml + - name: Add SSH keys to runner instances + authorized_key: + user: "{{ ansible_user }}" + key: "https://github.com/{{ item }}.keys" + with_lines: cat ./github-ssh-keys tags: - setup-stack @@ -90,6 +90,7 @@ - vars/main.yml tags: - setup-node + - setup-ami pre_tasks: - name: Load a variable file based on the OS type include_vars: "{{ lookup('first_found', params) }}" @@ -111,6 +112,7 @@ include_tasks: tasks/install_deps.yml tags: - setup-node + - setup-ami - name: Upgrade pip (CentOS) become: True ansible.builtin.shell: python3 -m pip install --upgrade pip @@ -124,21 +126,10 @@ docker_users: - "{{ ansible_user }}" when: - - '"arm64" not in nodeLabel' - 'ansible_os_family not in ["Suse", "Windows"]' - - role: geerlingguy.docker_arm - become: True - docker_daemon_options: - default-ulimit: ["nofile=1024000:1024000"] - docker_users: - - "{{ ansible_user }}" - when: - - '"arm64" in nodeLabel' - role: geerlingguy.helm when: - - ansible_facts['os_family'] != "Windows" - - suite is defined - - suite in ["kubernetes-autodiscover"] + - ansible_facts['os_family'] != "Windows" - role: mdelapenya.go become: True when: ansible_facts['os_family'] != "Windows" @@ -154,7 +145,6 @@ vars: ansible_python_interpreter: "auto" ansible_shell_type: "{{ nodeShellType | default('sh') }}" - ansible_user: "{{ nodeUser }}" pip_package: "python3-pip" tasks: @@ -164,21 +154,21 @@ with_items: - andrewrothstein.kubectl - andrewrothstein.kind + tags: + - setup-ami when: - ansible_facts['os_family'] != "Windows" - - suite is defined - - suite in ["kubernetes-autodiscover"] - - - name: Setup source code - include_tasks: tasks/copy_test_files.yml - tags: - - setup-node - - copy-source - name: Add SSH keys to runner instances - include_tasks: tasks/install_ssh_keys.yml + authorized_key: + user: "{{ ansible_user }}" + key: "https://github.com/{{ item }}.keys" + with_lines: cat ./github-ssh-keys + when: + - 'ansible_os_family not in ["Windows"]' tags: - setup-node + - setup-ami - name: Create home dir ansible.builtin.file: @@ -190,6 +180,7 @@ - setup-node - copy-source - scripts + - setup-ami when: ansible_os_family not in ["Windows"] - name: Create home dir for root @@ -203,6 +194,7 @@ - setup-node - copy-source - scripts + - setup-ami when: ansible_os_family not in ["Windows"] - name: Create home dir on Windows @@ -213,22 +205,5 @@ - setup-node - copy-source - scripts - when: ansible_os_family in ["Windows"] - - - name: Configure test script - include_tasks: tasks/setup_test_script.yml - tags: - - setup-node - - copy-source - - scripts - when: - - ansible_facts['os_family'] != "Windows" - - - name: Configure test script (Windows) - include_tasks: tasks/setup_test_script_windows.yml - tags: - - setup-node - - copy-source - - scripts - when: - - ansible_facts['os_family'] == "Windows" + - setup-ami + when: ansible_os_family in ["Windows"] \ No newline at end of file diff --git a/.ci/ansible/run-tests.yml b/.ci/ansible/run-tests.yml index ecb706e46b..fe6b3a43c9 100644 --- a/.ci/ansible/run-tests.yml +++ b/.ci/ansible/run-tests.yml @@ -21,7 +21,7 @@ vars: ansible_python_interpreter: "auto" ansible_shell_type: "{{ nodeShellType | default('sh') }}" - ansible_user: "{{ nodeUser }}" +# ansible_user: "{{ nodeUser }}" pip_package: "python3-pip" tasks: - name: Setup source code diff --git a/.ci/ansible/tasks/deregister-amis.yml b/.ci/ansible/tasks/deregister-amis.yml new file mode 100644 index 0000000000..121dab3b91 --- /dev/null +++ b/.ci/ansible/tasks/deregister-amis.yml @@ -0,0 +1,17 @@ +--- + +- name: find AMIs by tag + amazon.aws.ec2_ami_info: + region: "us-east-2" + filters: + "tag:Branch": '{{amiSuffix}}' + "tag:Project": 'e2e-testing' + register: amis_to_delete + +- name: Deregister AMIs and delete associated snapshots + amazon.aws.ec2_ami: + region: "us-east-2" + image_id: "{{ item.image_id }}" + delete_snapshot: True + state: absent + loop: "{{amis_to_delete.images}}" diff --git a/.ci/ansible/tasks/install_deps.yml b/.ci/ansible/tasks/install_deps.yml index f12f1c58b7..ade5b5af13 100644 --- a/.ci/ansible/tasks/install_deps.yml +++ b/.ci/ansible/tasks/install_deps.yml @@ -92,28 +92,29 @@ until: package_install_res is success when: ansible_distribution in ["OracleLinux"] -- name: Install ssh-import-id python package to copy public SSH keys from Github accounts - become: True - pip: - name: ssh-import-id - when: ansible_distribution not in ["Windows"] - -- name: Set sshd configuration for client alive settings - become: True - ansible.builtin.copy: - src: sshd_config - dest: /etc/ssh/sshd_config - owner: "{{ ansible_user }}" - group: "{{ ansible_user }}" - mode: '0600' - when: ansible_distribution in ["CentOS", "Debian", "Fedora", "RedHat", "Ubuntu"] +# Not required since authorized_key is used +#- name: Install ssh-import-id python package to copy public SSH keys from Github accounts +# become: True +# pip: +# name: ssh-import-id +# when: ansible_distribution not in ["Windows"] -- name: Install Docker for ARM (Debian, Ubuntu) - become: True - ansible.builtin.shell: curl -fsSL test.docker.com -o get-docker.sh && sh get-docker.sh - when: - - ansible_distribution in ["Debian", "Ubuntu"] - - '"arm64" in nodeLabel' +#- name: Set sshd configuration for client alive settings +# become: True +# ansible.builtin.copy: +# src: sshd_config +# dest: /etc/ssh/sshd_config +# owner: "{{ ansible_user }}" +# group: "{{ ansible_user }}" +# mode: '0600' +# when: ansible_distribution in ["CentOS", "Debian", "Fedora", "RedHat", "Ubuntu"] +# # ARM Docker is installed by geerlingguy.docker +# - name: Install Docker for ARM (Debian, Ubuntu) +# become: True +# ansible.builtin.shell: curl -fsSL test.docker.com -o get-docker.sh && sh get-docker.sh +# when: +# - ansible_distribution in ["Debian", "Ubuntu"] +# - '"arm64" in nodeLabel' - name: Install dependencies on Windows chocolatey.chocolatey.win_chocolatey: diff --git a/.ci/ansible/tasks/runners.yml b/.ci/ansible/tasks/runners.yml index 9ae89ba517..241b9a1af2 100644 --- a/.ci/ansible/tasks/runners.yml +++ b/.ci/ansible/tasks/runners.yml @@ -22,15 +22,35 @@ - provision-stack - provision-node -- name: "Create {{nodeLabel}} AWS instances" +- name: transform AMI name to id + amazon.aws.ec2_ami_info: + region: "us-east-2" + filters: + name: '{{nodeImage}}' + register: preinstalled_images + tags: + - provision-node + - provision-stack + +- name: Print found images + ansible.builtin.debug: + var: preinstalled_images + tags: + - provision-node + - provision-stack + +- name: "Create preinstalled {{nodeLabel}} AWS instances" + vars: + ami_image: > + {{ preinstalled_images.images | selectattr('name', 'defined') | sort(attribute='creation_date') | last }} ec2_instance: - state: started wait: true + state: started name: "e2e-{{ instanceID}}" key_name: "e2essh-{{runId}}" region: us-east-2 security_group: e2e - image_id: '{{nodeImage}}' + image_id: '{{ ami_image.image_id }}' instance_type: '{{nodeInstanceType}}' network: assign_public_ip: true @@ -53,11 +73,10 @@ team: eng-productivity project: e2e volumes: - - device_name: /dev/sda1 - ebs: - volume_type: gp3 - volume_size: "{{ (nodeLabel != 'windows2019') | ternary(15, 60) }}" - delete_on_termination: true +# - device_name: /dev/sda1 +# volume_type: gp3 +# volume_size: "{{ (nodeLabel != 'windows2019') | ternary(15, 60) }}" +# delete_on_termination: yes - device_name: /dev/xvda ebs: volume_type: gp3 @@ -70,7 +89,7 @@ - provision-node - name: Add AWS host to ssh address list - no_log: true + # no_log: true lineinfile: state: present line: "- {{ nodeUser }}@{{addr.public_ip_address}}" diff --git a/.ci/ansible/teardown.yml b/.ci/ansible/teardown.yml index 5fe6a36026..89043794dc 100644 --- a/.ci/ansible/teardown.yml +++ b/.ci/ansible/teardown.yml @@ -1,8 +1,8 @@ --- - name: Teardown environment hosts: localhost - vars: - ansible_user: "{{ nodeUser }}" +# vars: +# ansible_user: "{{ nodeUser }}" tasks: - name: Gather information about any instance with a tag key 'name' and value "e2e-{{ instanceID }}" amazon.aws.ec2_instance_info: diff --git a/.ci/ansible/test.yml b/.ci/ansible/test.yml new file mode 100644 index 0000000000..d6d8f882d1 --- /dev/null +++ b/.ci/ansible/test.yml @@ -0,0 +1,19 @@ +- name: test + hosts: localhost + gather_facts: True + tasks: + - name: transform AMI name to id + amazon.aws.ec2_ami_info: + region: "us-east-2" + filters: + name: '{{nodeImage}}' + register: preinstalled_images + tags: + - provision-node + - provision-stack + - name: Print found images + ansible.builtin.debug: + var: preinstalled_images + tags: + - provision-node + - provision-stack diff --git a/.ci/ansible/vars/main.yml b/.ci/ansible/vars/main.yml index 9f120b3e94..d70c442d81 100644 --- a/.ci/ansible/vars/main.yml +++ b/.ci/ansible/vars/main.yml @@ -1,5 +1,5 @@ --- e2e_project_name: "e2e-testing" -e2e_base_dir: "/home/{{ ansible_user }}/{{ e2e_project_name }}/" -e2e_home_dir: "/home/{{ ansible_user }}/.op/" +e2e_base_dir: "/home/{{ ansible_user | default(ansible_env.USER) }}/{{ e2e_project_name }}/" +e2e_home_dir: "/home/{{ ansible_user | default(ansible_env.USER) }}/.op/" diff --git a/.ci/e2eUtils.groovy b/.ci/e2eUtils.groovy new file mode 100644 index 0000000000..4f9d6dc6a9 --- /dev/null +++ b/.ci/e2eUtils.groovy @@ -0,0 +1,449 @@ +import groovy.transform.Field + +/** +Store the worker status so if the CI worker behaves wrongy then let's rerun the stage again +*/ +@Field def workersStatus = [:] + +def runE2ETests(Map args = [:]) { + def parallelTasks = [:] + if (!args.selectedSuites?.trim()) { + log(level: 'DEBUG', text: "Iterate through existing test suites") + args.testMatrix['SUITES'].each { item -> + parallelTasks += convertSuiteToTasks( + item: item, + githubCheckSha1: args.githubCheckSha1 ?: '', + githubCheckRepo: args.githubCheckRepo ?: '', + runAsMainBranch: args.runAsMainBranch, + amiSuffix: args.amiSuffix ?: 'main', + destroyTestRunner: args.destroyTestRunner ?: false + ) + } + } else { + log(level: 'DEBUG', text: "Iterate through the comma-separated test suites (${args.selectedSuites}), comparing with the existing test suites") + args.selectedSuites?.split(',')?.each { selectedSuite -> + args.testMatrix['SUITES'].findAll { selectedSuite.trim() == it.suite }.each { item -> + parallelTasks += convertSuiteToTasks( + item: item, + githubCheckSha1: args.githubCheckSha1 ?: '', + githubCheckRepo: args.githubCheckRepo ?: '', + runAsMainBranch: args.runAsMainBranch, + amiSuffix: args.amiSuffix ?: 'main', + destroyTestRunner: args.destroyTestRunner ?: false + ) + } + } + } + parallel(parallelTasks) +} + +def convertSuiteToTasks(Map args = [:]) { + def parallelTasks = [:] + def suite = args.item.suite + def platforms = args.item.platforms + + // Predefine the remote provider to use the already provisioned stack VM. + // Each suite or scenario in the CI test suite would be able to define its own provider + // (i.e. docker). If empty, remote will be used as fallback + def suiteProvider = args.item.provider + if (!suiteProvider || suiteProvider?.trim() == '') { + suiteProvider = 'remote' + } + + args.item.scenarios.each { scenario -> + def name = scenario.name + def platformsValue = platforms + + def scenarioProvider = scenario.provider + // if the scenario does not set its own provider, use suite's provider + if (scenarioProvider?.trim() == '') { + scenarioProvider = suiteProvider + } + + if (scenario.platforms?.size() > 0) { + // scenario platforms take precedence over suite platforms, overriding them + platformsValue = scenario.platforms + } + def pullRequestFilter = scenario.containsKey('pullRequestFilter') ? scenario.pullRequestFilter : '' + def tags = scenario.tags + platformsValue.each { rawPlatform -> + // platform is not space based, so let's ensure no extra spaces can cause misbehaviours. + def platform = rawPlatform.trim() + log(level: 'INFO', text: "Adding ${suite}:${platform}:${tags} test suite to the build execution") + def machineInfo = getMachineInfo(platform) + def stageName = "${suite}_${platform}_${tags}" + parallelTasks["${stageName}"] = generateFunctionalTestStep( + name: "${name}", + platform: platform, + provider: scenarioProvider, + suite: "${suite}", + tags: "${tags}", + pullRequestFilter: "${pullRequestFilter}", + machine: machineInfo, + stageName: stageName, + runAsMainBranch: args.runAsMainBranch, + amiSuffix: args.amiSuffix, + githubCheckSha1: args.githubCheckSha1, + githubCheckRepo: args.githubCheckRepo, + destroyTestRunner: args.destroyTestRunner + ) + } + } + return parallelTasks +} + +def checkRebuildAmis() { + dir("${BASE_DIR}") { + setEnvVar("REBUILD_AMIS", isGitRegionMatch(patterns: [ "^.ci/ansible/.*", "^.ci/packer/.*"], shouldMatchAll: false)) + } +} + +// this function evaluates whether the test and AMIs stages must be executed +def checkSkipTests() { + dir("${BASE_DIR}") { + + // if only docs changed means no tests are run + if (isGitRegionMatch(patterns: [ '.*\\.md' ], shouldMatchAll: true)) { + setEnvVar("SKIP_TESTS", true) + return + } + + // patterns for all places that should trigger a full build + def tests_regexps = [ + "^e2e/_suites/fleet/.*", + "^e2e/_suites/kubernetes-autodiscover/.*", + "^.ci/.*", + "^cli/.*", + "^e2e/.*\\.go", + "^internal/.*\\.go" + ] + // def ami_regexps = [ "^.ci/ansible/.*", "^.ci/packer/.*"] + setEnvVar("SKIP_TESTS", !isGitRegionMatch(patterns: tests_regexps, shouldMatchAll: false)) + } +} + +/* + * Runs the Make build at the CI, executing the closure in the context of Ansible + AWS + */ +def ciBuild(Closure body) { + withEnv([ + "SSH_KEY=${E2E_SSH_KEY}" + ]) { + def awsProps = getVaultSecret(secret: "${AWS_PROVISIONER_SECRET}") + def awsAuthObj = awsProps?.data + withEnv([ + "ANSIBLE_CONFIG=${env.REAL_BASE_DIR}/.ci/ansible/ansible.cfg", + "ANSIBLE_HOST_KEY_CHECKING=False", + ]){ + withVaultToken(){ + withEnvMask(vars: [ + [var: "AWS_ACCESS_KEY_ID", password: awsAuthObj.access_key], + [var: "AWS_SECRET_ACCESS_KEY", password: awsAuthObj.secret_key] + ]) { + withOtelEnv() { + body() + } + } + } + } + } +} + +def getNodeIp(nodeType){ + return sh(label: "Get IP address of the ${nodeType}", script: "cat ${REAL_BASE_DIR}/.ci/.${nodeType}-host-ip", returnStdout: true) +} + +def getMachineInfo(platform){ + def machineYaml = readYaml(file: "${env.REAL_BASE_DIR}/.ci/.e2e-platforms.yaml") + def machines = machineYaml['PLATFORMS'] + log(level: 'INFO', text: "getMachineInfo: machines.get(platform)=${machines.get(platform)}") + return machines.get(platform) +} + +/* + * Sends out notification of the build result to Slack + */ +def doNotifyBuildResult(Map args = [:]) { + def doSlackNotify = true // always try to notify on failures + def githubCheckStatus = 'FAILURE' + if (currentBuild.currentResult == 'SUCCESS') { + githubCheckStatus = 'SUCCESS' + doSlackNotify = args.slackNotify // if the build status is success, read the parameter + } + + this.githubCheckNotify( + status: githubCheckStatus, + githubCheckRepo: args.githubCheckRepo, + githubCheckSha1: args.githubCheckSha1, + githubCheckName: args.githubCheckName + ) + + def testsSuites = args.runTestsSuites?.trim() ?: "All suites" + def channels = args.slackChannel?.trim() ?: "observablt-bots" + + def header = "*Test Suite*: ${testsSuites}" + notifyBuildResult(analyzeFlakey: true, + jobName: getFlakyJobName(withBranch: "${env.JOB_BASE_NAME}"), + prComment: true, + slackHeader: header, + slackChannel: "${channels}", + slackComment: true, + slackNotify: doSlackNotify) +} + +/** + Notify the GitHub check of the parent stream + **/ +def githubCheckNotify(Map args = [:]) { + if (args.githubCheckName?.trim() && args.githubCheckRepo?.trim() && args.githubCheckSha1?.trim()) { + githubNotify context: "${args.githubCheckName}", + description: "${args.githubCheckName} ${args.status?.toLowerCase()}", + status: "${args.status}", + targetUrl: "${env.RUN_DISPLAY_URL}", + sha: "${args.githubCheckSha1}", + account: 'elastic', + repo: args.githubCheckRepo, + credentialsId: env.JOB_GIT_CREDENTIALS + } +} +/** +* +* +*/ +def generateFunctionalTestStep(Map args = [:]) { + def name = args.get('name') + def name_normalize = name.replace(' ', '_') + def platform = args.get('platform') + def provider = args.get('provider') + def suite = args.get('suite') + def tags = args.get('tags') + def pullRequestFilter = args.get('pullRequestFilter')?.trim() ?: '' + def machine = args.get('machine') + def stageName = args.get('stageName') + def amiSuffix = args.amiSuffix.trim() ?: 'main' + def runAsMainBranch = args.runAsMainBranch ?: false + def destroyTestRunner = args.destroyTestRunner ?: false + + + if (isPR() || isUpstreamTrigger(filter: 'PR-')) { + // when the "Run_As_Main_Branch" param is disabled, we will honour the PR filters, which + // basically exclude some less frequent platforms or operative systems. If the user enabled + // this param, the pipeline will remove the filters from the test runner. + if (!runAsMainBranch) { + tags += pullRequestFilter + } + } + + def goArch = platform.contains("arm64") ? "arm64" : "amd64" + + // sanitize tags to create the file + def sanitisedTags = tags.replaceAll("\\s","_") + sanitisedTags = sanitisedTags.replaceAll("~","") + sanitisedTags = sanitisedTags.replaceAll("@","") + + def githubCheckSha1 = args.githubCheckSha1?.trim() ?: '' + def githubCheckRepo = args.githubCheckRepo?.trim() ?: '' + + // Setup environment for platform + def envContext = [] + envContext.add("PROVIDER=${provider}") + envContext.add("GITHUB_CHECK_SHA1=${githubCheckSha1}") + envContext.add("GITHUB_CHECK_REPO=${githubCheckRepo}") + envContext.add("SUITE=${suite}") + envContext.add("TAGS=${tags}") + envContext.add("REPORT_PREFIX=${suite}_${platform}_${sanitisedTags}") + envContext.add("ELASTIC_APM_GLOBAL_LABELS=branch_name=${BRANCH_NAME},build_pr=${isPR()},build_id=${env.BUILD_ID},go_arch=${goArch},beat_version=${env.BEAT_VERSION},elastic_agent_version=${env.ELASTIC_AGENT_VERSION},stack_version=${env.STACK_VERSION}") + // VM characteristics + envContext.add("NODE_LABEL=${platform}") + envContext.add("NODE_IMAGE=${machine.image}-${amiSuffix}") + envContext.add("NODE_INSTANCE_ID=${env.BUILD_URL}_${platform}_${suite}_${tags}") + envContext.add("NODE_INSTANCE_TYPE=${machine.instance_type}") + envContext.add("NODE_SHELL_TYPE=${machine.shell_type}") + envContext.add("NODE_USER=${machine.username}") + + return { + // Set the worker as flaky for the time being, this will be changed in the finally closure. + setFlakyWorker(stageName) + retryWithNode(labels: 'ubuntu-20.04 && gobld/machineType:e2-small', forceWorkspace: true, forceWorker: true, stageName: stageName){ + try { + deleteDir() + dir("${env.REAL_BASE_DIR}") { + unstash 'sourceEnvModified' + withEnv(envContext) { + // This step will help to send the APM traces to the + // withOtelEnv is the one that uses the APM service defined by the Otel Jenkins plugin. + // withAPMEnv uses Vault to prepare the context. + // IMPORTANT: withAPMEnv is now the one in used since withOtelEnv uses a specific Opentelemetry Collector at the moment. + // TODO: This will need to be integrated into the provisioned VMs + withAPMEnv() { + echo "nodeImage: ${env.NODE_IMAGE}" + // we are separating the different test phases to avoid recreating + ciBuild() { + sh(label: 'Start node', script: "make -C .ci provision-node") + } + + // make goal to run the tests, which is platform-dependant + def runCommand = "run-tests" + + if (platform.contains("windows")) { + runCommand = "run-tests-win" + // Ansible wait_for module is not enough to mitigate the timeout + log(level: 'DEBUG', text: "Sleeping 300 seconds on Windows so that SSH is accessible in the remote instance.") + sleep(300) + } + if (!machine.dependencies_installed) { + ciBuild() { + retryWithSleep(retries: 3, seconds: 5, backoff: true){ + sh(label: 'Configure node for testing', script: "make -C .ci setup-node") + } + } + } + ciBuild() { + sh(label: 'Run tests in the node', script: "make -C .ci ${runCommand}") + } + } + } + } + } finally { + withEnv(envContext) { + dir("${env.REAL_BASE_DIR}") { + // If it reaches this point then the CI worker is most likely behaving correctly + // there is still a chance things might fail afterwards, but this is just the finally + // section so we could say we are good to go. + // It runs after dir so if the worker is gone the an error will be thrown regarding + // the dir cannot be accessed in the existing none worker. + unsetFlakyWorker(stageName) + def testRunnerIP = getNodeIp("node") + sh "mkdir -p outputs/${testRunnerIP} || true" + ciBuild() { + sh(label: 'Fetch tests reports from node', script: "make -C .ci fetch-test-reports") + } + sh "ls -l outputs/${testRunnerIP}" + if (!destroyTestRunner) { + log(level: 'INFO', text: "Cloud instance won't be destroyed after the build. Please SSH into the test runner machine on ${testRunnerIP}.") + } else { + log(level: 'INFO', text: "Destroying Cloud instance") + ciBuild() { + retryWithSleep(retries: 3, seconds: 5, backoff: true){ + sh(label: 'Destroy node', script: "make -C .ci destroy-node") + } + } + } + archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") + junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") + } + } + } + } + } +} + +def buildPackerAMIs(Map args = [:]) { + dir("${BASE_DIR}") { + if (!args.amiSuffix?.trim()) { + error("amiSuffix parameter must be specified in buildPackerAMIs()") + } + setEnvVar("AMI_SUFFIX", args.amiSuffix) + ciBuild() { + withPackerEnv(version: '1.8.1') { + sh(label: "Build AMIS with suffix:${args.amiSuffix}", script: 'make -C .ci build-amis-$AMI_SUFFIX') + } + } + } +} + +def deregisterAMIs(Map args = [:]) { + if (args.amisRebuilt != "true") return + dir("${BASE_DIR}") { + if (!args.amiSuffix?.trim()) { + error("amiSuffix parameter must be specified in deregisterAMIs()") + } + setEnvVar("AMI_SUFFIX", args.amiSuffix) + ciBuild() { + sh(label: "Deregister AMIs with tagged by Branch :${args.amiSuffix}", script: 'make -C .ci deregister-amis-$AMI_SUFFIX') + } + } +} + +def retryWithNode(Map args = [:], Closure body) { + try { + incrementRetries(args.stageName) + withNode(args){ + body() + } + } catch (err) { + log(level: 'WARN', text: "Stage '${args.stageName}' failed, let's analyse if it's a flaky CI worker.") + if (isFlakyWorker(args.stageName) && isRetryAvailable(args.stageName)) { + log(level: 'INFO', text: "Rerun '${args.stageName}' in a new worker.") + retryWithNode(args) { + body() + } + } else { + error("Error '${err.toString()}'") + } + } +} + +def isFlakyWorker(stageName) { + if (workersStatus.containsKey(stageName)) { + return !workersStatus.get(stageName).get('status', true) + } + return false +} + +def isRetryAvailable(stageName) { + return workersStatus.get(stageName).get('retries', 2) < 2 +} + +def incrementRetries(stageName) { + if (workersStatus.containsKey(stageName)) { + def current = workersStatus[stageName].get('retries', 0) + workersStatus[stageName].retries = current + 1 + } else { + setFlakyWorker(stageName) + workersStatus[stageName].retries = 1 + } +} + +def setFlakyWorker(stageName) { + if (workersStatus.containsKey(stageName)) { + workersStatus[stageName].status = false + } else { + workersStatus[stageName] = [ status: false ] + } +} + +def unsetFlakyWorker(stageName) { + workersStatus[stageName].status = true +} + +def destroyStack(Map args = [:]) { + // TODO: extract "pulishJunitReports" + dir("${env.REAL_BASE_DIR}") { + ciBuild() { + def stackIP = getNodeIp('stack') + sh(label: 'Grab logs', script:"make -C .ci fetch-test-reports NODE_IP_ADDRESS=${stackIP} NODE_LABEL=debian_10_amd64") + archiveArtifacts(allowEmptyArchive: true, artifacts: "outputs/**/TEST-*,outputs/**/*.zip,outputs/**/*.tgz") + junit2otel(traceName: 'junit-e2e-tests', allowEmptyResults: true, keepLongStdio: true, testResults: "outputs/**/TEST-*.xml") + } + } + def stackMachine = getMachineInfo('stack') + if (!params.DESTROY_CLOUD_RESOURCES) { + def stackRunnerIP = getNodeIp('stack') + log(level: 'DEBUG', text: "Stack instance won't be destroyed after the build. Please SSH into the stack machine on ${stackRunnerIP}") + } else { + dir("${env.REAL_BASE_DIR}") { + withEnv([ + "STACK_INSTANCE_ID=${env.BUILD_URL}_stack", + ]) { + ciBuild() { + retryWithSleep(retries: 3, seconds: 5, backoff: true) { + sh(label: 'Destroy stack node', script: "make -C .ci destroy-stack") + } + } + } + } + } +} + +return this \ No newline at end of file diff --git a/.ci/packer/aws-runners.pkr.hcl b/.ci/packer/aws-runners.pkr.hcl new file mode 100644 index 0000000000..76b22dcdcc --- /dev/null +++ b/.ci/packer/aws-runners.pkr.hcl @@ -0,0 +1,336 @@ +packer { + required_plugins { + amazon = { + version = ">= 1.1.5" + source = "github.com/hashicorp/amazon" + } + } +} + +variable "skip_create_ami" { + type = bool + default = false +} + +variable "source_set" { + type = string + default = "linux" +} + +variable "ami_suffix" { + type = string + default = "test_suffix" +} + +variable "galaxy_command" { + type = string + default = "ansible-galaxy" +} + +variable "playbook_command" { + type = string + default = "ansible-playbook" +} + +locals { + aws_region = "us-east-2" + force_deregister = true + source_sets = { + "linux" = [ + "source.amazon-ebs.ubuntu", + "source.amazon-ebs.debian-10-amd64", + "source.amazon-ebs.debian-10-arm64", + "source.amazon-ebs.debian-11-amd64", + "source.amazon-ebs.centos-8-amd64", + "source.amazon-ebs.centos-8-arm64", + "source.amazon-ebs.oracle-linux-8", + # "source.amazon-ebs.sles15" + ], + "test" = ["source.amazon-ebs.ubuntu"], + "windows" = ["source.amazon-ebs.windows2019"], + "all" = [ + "source.amazon-ebs.ubuntu", + "source.amazon-ebs.debian-10-amd64", + "source.amazon-ebs.debian-10-arm64", + "source.amazon-ebs.debian-11-amd64", + "source.amazon-ebs.centos-8-amd64", + "source.amazon-ebs.centos-8-arm64", + "source.amazon-ebs.oracle-linux-8", + # "source.amazon-ebs.sles15", + # "source.amazon-ebs.windows2019" + ] + } + common_tags = { + Division = "engineering" + Org = "obs" + Team = "observability-robots" + Project = "e2e-testing", + Branch = var.ami_suffix + } +} + +source "amazon-ebs" "ubuntu" { + ami_name = "ubuntu-2204-e2e-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-0aeb7c931a5a61206" + ssh_username = "ubuntu" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Ubuntu" + Release = "22.04" + Arch = "AMD64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "debian-10-amd64" { + ami_name = "debian-10-amd64-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-0d90bed76900e679a" + ssh_username = "admin" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Debian" + Release = "10" + Arch = "AMD64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "debian-10-arm64" { + ami_name = "debian-10-arm64-runner-${var.ami_suffix}" + instance_type = "a1.large" + region = local.aws_region + source_ami = "ami-06dac44ad759182bd" + ssh_username = "admin" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Debian" + Release = "10" + Arch = "ARM64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "debian-11-amd64" { + ami_name = "debian-11-amd64-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-0c7c4e3c6b4941f0f" + ssh_username = "admin" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Debian" + Release = "11" + Arch = "AMD64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "centos-8-amd64" { + ami_name = "centos-8-amd64-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-045b0a05944af45c1" + ssh_username = "centos" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Centos" + Release = "8" + Arch = "AMD64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "centos-8-arm64" { + ami_name = "centos-8-arm64-runner-${var.ami_suffix}" + instance_type = "a1.large" + region = local.aws_region + source_ami = "ami-01cdc9e8306344fe0" + ssh_username = "centos" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Centos" + Release = "8" + Arch = "ARM64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +source "amazon-ebs" "oracle-linux-8" { + ami_name = "oracle-linux-8-x86-64-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-00371eeb8fd8e0e16" + ssh_username = "ec2-user" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Oracle Linux" + Release = "8" + Arch = "x86-64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister + +} + +source "amazon-ebs" "sles15" { + ami_name = "sles15-runner-${var.ami_suffix}" + instance_type = "t3.xlarge" + region = local.aws_region + source_ami = "ami-0f7cb53c916a75006" + ssh_username = "ec2-user" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 15 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "SUSE Linux Enterprise Server 15 SP3" + Release = "8" + Arch = "ARM64" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +build { + name = "linux" + + sources = local.source_sets[var.source_set] + + provisioner "shell" { + inline = ["echo ${var.playbook_command}"] + } + + provisioner "ansible" { + user = build.User + ansible_env_vars = ["PACKER_BUILD_NAME={{ build_name }}"] + playbook_file = "ansible/playbook.yml" + extra_arguments = ["--tags", "setup-ami"] + galaxy_file = "ansible/requirements.yml" + galaxy_command = var.galaxy_command + command = var.playbook_command + } +} + +# Windows +source "amazon-ebs" "windows2019" { + ami_name = "windows-2019-runner-${var.ami_suffix}" + instance_type = "c5.2xlarge" + region = local.aws_region + source_ami = "ami-0587bd602f1da2f1d" + ssh_username = "ogc" + communicator = "ssh" + launch_block_device_mappings { + device_name = "/dev/sda1" + volume_size = 60 + volume_type = "gp3" + delete_on_termination = true + } + tags = "${merge( + local.common_tags, + { + OS_Version = "Windows" + Release = "2019" + Arch = "x86_64" + Branch = var.ami_suffix + Project = "e2e" + } + )}" + skip_create_ami = var.skip_create_ami + force_deregister = local.force_deregister +} + +build { + name = "windows" + sources = [ + "source.amazon-ebs.windows2019" + ] + + provisioner "ansible" { + user = build.User + ansible_env_vars = ["PACKER_BUILD_NAME={{ build_name }}"] + playbook_file = "ansible/playbook.yml" + extra_arguments = ["--tags", "setup-ami", "--extra-vars", "nodeShellType=cmd"] + galaxy_file = "ansible/requirements.yml" + galaxy_command = var.galaxy_command + command = var.playbook_command + } +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index eb45374653..a999f0f8db 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ hosts *-sshhosts .idea .obs +*.iml