From 618c6b1285ac618b7b38fb52e94a88247dcd2ca1 Mon Sep 17 00:00:00 2001 From: Fred Heinecke Date: Tue, 14 Nov 2023 16:30:41 -0600 Subject: [PATCH 1/2] Added release server publishing retry --- .drone.yml | 18 +++++++++--------- dronegen/relcli.go | 44 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/.drone.yml b/.drone.yml index ed03afe4af368..19866bfba7186 100644 --- a/.drone.yml +++ b/.drone.yml @@ -408,10 +408,8 @@ steps: - echo "$RELEASES_CERT" | base64 -d > "$RELCLI_CERT" - echo "$RELEASES_KEY" | base64 -d > "$RELCLI_KEY" - trap "rm -rf /tmpfs/creds" EXIT - - |- - docker run -i -v /tmpfs/creds:/tmpfs/creds \ - -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL -e RELCLI_CERT -e RELCLI_KEY \ - $RELCLI_IMAGE auto_destroy -f -v 6 + - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL + -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_destroy -f -v 6 environment: RELCLI_BASE_URL: https://releases-prod.platform.teleport.sh RELCLI_CERT: /tmpfs/creds/releases.crt @@ -16356,10 +16354,12 @@ steps: - echo "$RELEASES_CERT" | base64 -d > "$RELCLI_CERT" - echo "$RELEASES_KEY" | base64 -d > "$RELCLI_KEY" - trap "rm -rf /tmpfs/creds" EXIT - - |- - docker run -i -v /tmpfs/creds:/tmpfs/creds \ - -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL -e RELCLI_CERT -e RELCLI_KEY \ - $RELCLI_IMAGE auto_publish -f -v 6 + - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL + -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 || true + - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL + -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 || true + - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL + -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 environment: RELCLI_BASE_URL: https://releases-prod.platform.teleport.sh RELCLI_CERT: /tmpfs/creds/releases.crt @@ -16398,6 +16398,6 @@ image_pull_secrets: - DOCKERHUB_CREDENTIALS --- kind: signature -hmac: 18993516593f5eb36eb9a9352006689624540d14f39886639d92b1cf50faf258 +hmac: d37d662502699f9c04405b6fb695ed249849eafd6441e6ed9cae302ae202221b ... diff --git a/dronegen/relcli.go b/dronegen/relcli.go index 8b580c06a6b5d..ad02e1601074b 100644 --- a/dronegen/relcli.go +++ b/dronegen/relcli.go @@ -14,6 +14,10 @@ package main +import ( + "strings" +) + const relcliImage = "146628656107.dkr.ecr.us-west-2.amazonaws.com/gravitational/relcli:master-57a5d42-20230412T1204687" func relcliPipeline(trigger trigger, name string, stepName string, command string) pipeline { @@ -66,6 +70,34 @@ func pullRelcliStep(awsConfigVolumeRef volumeRef) step { } func executeRelcliStep(name string, command string) step { + commands := []string{ + `mkdir -p /tmpfs/creds`, + `echo "$RELEASES_CERT" | base64 -d > "$RELCLI_CERT"`, + `echo "$RELEASES_KEY" | base64 -d > "$RELCLI_KEY"`, + `trap "rm -rf /tmpfs/creds" EXIT`, + } + + runReleaseServerCLICommand := "docker run -i -v /tmpfs/creds:/tmpfs/creds " + + "-e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL -e RELCLI_CERT -e RELCLI_KEY " + + "$RELCLI_IMAGE " + command + + // This is a workaround for a release server issue, and should be removed after the issue is fixed. + // The release server publish step does not fail on or after the third step, consistently. + if strings.HasPrefix(command, "auto_publish") { + retryCount := 3 + for i := 1; i <= retryCount; i++ { + // Ignore errors on all but the last run of the command + commandSuffix := "" + if i != retryCount { + commandSuffix = " || true" + } + + commands = append(commands, runReleaseServerCLICommand+commandSuffix) + } + } else { + commands = append(commands, runReleaseServerCLICommand) + } + return step{ Name: name, Image: "docker:git", @@ -76,15 +108,7 @@ func executeRelcliStep(name string, command string) step { "RELCLI_CERT": {raw: "/tmpfs/creds/releases.crt"}, "RELCLI_KEY": {raw: "/tmpfs/creds/releases.key"}, }, - Volumes: []volumeRef{volumeRefDocker, volumeRefTmpfs, volumeRefAwsConfig}, - Commands: []string{ - `mkdir -p /tmpfs/creds`, - `echo "$RELEASES_CERT" | base64 -d > "$RELCLI_CERT"`, - `echo "$RELEASES_KEY" | base64 -d > "$RELCLI_KEY"`, - `trap "rm -rf /tmpfs/creds" EXIT`, - `docker run -i -v /tmpfs/creds:/tmpfs/creds \ - -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL -e RELCLI_CERT -e RELCLI_KEY \ - $RELCLI_IMAGE ` + command, - }, + Volumes: []volumeRef{volumeRefDocker, volumeRefTmpfs, volumeRefAwsConfig}, + Commands: commands, } } From bab79d81d1c94b8ad95530481883692d8a4590b6 Mon Sep 17 00:00:00 2001 From: Cam Hutchison Date: Wed, 15 Nov 2023 12:43:10 +1100 Subject: [PATCH 2/2] dronegen: Run auto_publish 10 times (from 3) in a loop Change the drone generation to use a loop to run the `auto_publish` relcli command instead of listing them one-by-one and loop 10 times instead of 3. The loop will terminate the first time `relcli` succeeds. The loop has an `|| false` at the end to ensure the loop command fails if all invocations of `relcli` fail. With `set -e`, even though the exit status of the loop is non-zero, the shell seems to continue. With the `|| false` at the end, it makes it exit on failure. I'm not sure exactly how drone runs the commands so this may not be necessary but it seems safer. e.g. set -e for i in $(seq 10); do false && break; done echo hello This will echo "hello" even though all invocations inside the loop failed. set -e for i in $(seq 10); do false && break; done || false echo hello This will not echo "hello" - `set -e` causes an exit before that command due to the `|| false`. --- .drone.yml | 11 ++++------- dronegen/relcli.go | 15 +++------------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/.drone.yml b/.drone.yml index 19866bfba7186..99f434d8ff52d 100644 --- a/.drone.yml +++ b/.drone.yml @@ -16354,12 +16354,9 @@ steps: - echo "$RELEASES_CERT" | base64 -d > "$RELCLI_CERT" - echo "$RELEASES_KEY" | base64 -d > "$RELCLI_KEY" - trap "rm -rf /tmpfs/creds" EXIT - - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL - -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 || true - - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL - -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 || true - - docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO -e DRONE_TAG -e RELCLI_BASE_URL - -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish -f -v 6 + - for i in $(seq 10); do docker run -i -v /tmpfs/creds:/tmpfs/creds -e DRONE_REPO + -e DRONE_TAG -e RELCLI_BASE_URL -e RELCLI_CERT -e RELCLI_KEY $RELCLI_IMAGE auto_publish + -f -v 6 && break; done || false environment: RELCLI_BASE_URL: https://releases-prod.platform.teleport.sh RELCLI_CERT: /tmpfs/creds/releases.crt @@ -16398,6 +16395,6 @@ image_pull_secrets: - DOCKERHUB_CREDENTIALS --- kind: signature -hmac: d37d662502699f9c04405b6fb695ed249849eafd6441e6ed9cae302ae202221b +hmac: 39e471943631b2c319941811f899842ffeab05ba1b8787ff960ebc0fe72368c1 ... diff --git a/dronegen/relcli.go b/dronegen/relcli.go index ad02e1601074b..d073dabda2ee3 100644 --- a/dronegen/relcli.go +++ b/dronegen/relcli.go @@ -84,19 +84,10 @@ func executeRelcliStep(name string, command string) step { // This is a workaround for a release server issue, and should be removed after the issue is fixed. // The release server publish step does not fail on or after the third step, consistently. if strings.HasPrefix(command, "auto_publish") { - retryCount := 3 - for i := 1; i <= retryCount; i++ { - // Ignore errors on all but the last run of the command - commandSuffix := "" - if i != retryCount { - commandSuffix = " || true" - } - - commands = append(commands, runReleaseServerCLICommand+commandSuffix) - } - } else { - commands = append(commands, runReleaseServerCLICommand) + // Retry the command up to 10 times until success, and fail if none succeed. + runReleaseServerCLICommand = `for i in $(seq 10); do ` + runReleaseServerCLICommand + ` && break; done || false` } + commands = append(commands, runReleaseServerCLICommand) return step{ Name: name,