diff --git a/.github/spot-runner-action/action.yaml b/.github/spot-runner-action/action.yaml index a0290b63b8ad..4b33c7d344f5 100644 --- a/.github/spot-runner-action/action.yaml +++ b/.github/spot-runner-action/action.yaml @@ -40,10 +40,6 @@ inputs: ec2_ami_id: description: 'Ec2 ami ID' required: true - ec2_instance_iam_role: - description: 'IAM role for to associate with ec2 instance' - required: false - default: '' ec2_instance_tags: description: 'List of extra aws resource tags for ec2 instance' required: false diff --git a/.github/spot-runner-action/dist/index.js b/.github/spot-runner-action/dist/index.js index 54dc696e1c35..37aa0ce13a3b 100644 --- a/.github/spot-runner-action/dist/index.js +++ b/.github/spot-runner-action/dist/index.js @@ -53,7 +53,6 @@ class ActionConfig { // Ec2 params this.ec2InstanceType = core.getInput("ec2_instance_type").split(" "); this.ec2AmiId = core.getInput("ec2_ami_id"); - this.ec2InstanceIamRole = core.getInput("ec2_instance_iam_role"); this.ec2InstanceTags = core.getInput("ec2_instance_tags"); this.ec2InstanceTtl = core.getInput("ec2_instance_ttl"); this.ec2SubnetId = core.getInput("ec2_subnet_id"); @@ -308,6 +307,7 @@ class Ec2Instance { InstanceType: instanceType, AvailabilityZone: availabilityZone, SubnetId: this.config.ec2SubnetId, + KeyName: this.config.ec2KeyName, })), }; const createFleetRequest = { @@ -871,23 +871,14 @@ class UserData { `mkdir -p shutdown-refcount`, // Shutdown rules: // - github actions job starts and ends always bump +ec2InstanceTtl minutes - // - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes - `echo "${bumpShutdown}; touch /run/shutdown-refcount/start_run_\\$(date +%s)_\\$RANDOM" > /run/delay_shutdown.sh`, - // `echo "[ \\$(find /run/shutdown-refcount/ -name 'start_run_*' | wc -l) -eq \\$(find /run/shutdown-refcount/ -name 'end_run_*' | wc -l) ] && shutdown -P 5 ; true" > /run/if_refcount0_shutdown.sh`, - `echo "echo refcounting disabled for now" > /run/if_refcount0_shutdown.sh`, - `echo "${bumpShutdown}; touch /run/shutdown-refcount/end_run_\\$(date +%s)_\\$RANDOM ; /run/if_refcount0_shutdown.sh " > /run/refcount_and_delay_shutdown.sh`, - `echo "flock /run/refcount-lock /run/delay_shutdown.sh" > /run/safe_delay_shutdown.sh`, - `echo "flock /run/refcount-lock /run/refcount_and_delay_shutdown.sh" > /run/safe_refcount_and_delay_shutdown.sh`, + // - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes (with a reaper script installed later) + `echo "${bumpShutdown}" > /run/delay_shutdown.sh`, "chmod +x /run/delay_shutdown.sh", - "chmod +x /run/refcount_and_delay_shutdown.sh", - "chmod +x /run/if_refcount0_shutdown.sh", - "chmod +x /run/safe_refcount_and_delay_shutdown.sh", - "chmod +x /run/safe_if_refcount0_shutdown.sh", - "export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/safe_delay_shutdown.sh", - "export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/safe_refcount_and_delay_shutdown.sh", + "export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh", + "export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/delay_shutdown.sh", "mkdir -p actions-runner && cd actions-runner", - 'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/safe_delay_shutdown.sh" > .env', - 'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/safe_refcount_and_delay_shutdown.sh" > .env', + 'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh" > .env', + 'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/delay_shutdown.sh" > .env', `GH_RUNNER_VERSION=${githubActionRunnerVersion}`, 'case $(uname -m) in aarch64) ARCH="arm64" ;; amd64|x86_64) ARCH="x64" ;; esac && export RUNNER_ARCH=${ARCH}', "curl -O -L https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz", diff --git a/.github/spot-runner-action/src/config.ts b/.github/spot-runner-action/src/config.ts index da7ec1c9d8b8..1c15bd00e651 100644 --- a/.github/spot-runner-action/src/config.ts +++ b/.github/spot-runner-action/src/config.ts @@ -19,7 +19,6 @@ export interface ConfigInterface { ec2InstanceType: string[]; ec2AmiId: string; - ec2InstanceIamRole: string; ec2InstanceTags: string; ec2InstanceTtl: string; ec2SecurityGroupId: string; @@ -46,7 +45,6 @@ export class ActionConfig implements ConfigInterface { ec2InstanceType: string[]; ec2AmiId: string; - ec2InstanceIamRole: string; ec2InstanceTags: string; ec2InstanceTtl: string; ec2SecurityGroupId: string; @@ -77,7 +75,6 @@ export class ActionConfig implements ConfigInterface { // Ec2 params this.ec2InstanceType = core.getInput("ec2_instance_type").split(" "); this.ec2AmiId = core.getInput("ec2_ami_id"); - this.ec2InstanceIamRole = core.getInput("ec2_instance_iam_role"); this.ec2InstanceTags = core.getInput("ec2_instance_tags"); this.ec2InstanceTtl = core.getInput("ec2_instance_ttl"); this.ec2SubnetId = core.getInput("ec2_subnet_id"); diff --git a/.github/spot-runner-action/src/ec2.ts b/.github/spot-runner-action/src/ec2.ts index c9a736cd1e39..195aa3fa919a 100644 --- a/.github/spot-runner-action/src/ec2.ts +++ b/.github/spot-runner-action/src/ec2.ts @@ -4,9 +4,7 @@ import { CreateFleetInstance, CreateFleetRequest, CreateLaunchTemplateRequest, - FleetLaunchTemplateConfig, FleetLaunchTemplateConfigRequest, - RunInstancesRequest, } from "aws-sdk/clients/ec2"; import * as crypto from "crypto"; import * as core from "@actions/core"; @@ -229,6 +227,7 @@ export class Ec2Instance { InstanceType: instanceType, AvailabilityZone: availabilityZone, SubnetId: this.config.ec2SubnetId, + KeyName: this.config.ec2KeyName, })), }; const createFleetRequest: CreateFleetRequest = { diff --git a/.github/spot-runner-action/src/userdata.ts b/.github/spot-runner-action/src/userdata.ts index 9e9963932736..4455735b8d2d 100644 --- a/.github/spot-runner-action/src/userdata.ts +++ b/.github/spot-runner-action/src/userdata.ts @@ -35,23 +35,14 @@ export class UserData { `mkdir -p shutdown-refcount`, // Shutdown rules: // - github actions job starts and ends always bump +ec2InstanceTtl minutes - // - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes - `echo "${bumpShutdown}; touch /run/shutdown-refcount/start_run_\\$(date +%s)_\\$RANDOM" > /run/delay_shutdown.sh`, - // `echo "[ \\$(find /run/shutdown-refcount/ -name 'start_run_*' | wc -l) -eq \\$(find /run/shutdown-refcount/ -name 'end_run_*' | wc -l) ] && shutdown -P 5 ; true" > /run/if_refcount0_shutdown.sh`, - `echo "echo refcounting disabled for now" > /run/if_refcount0_shutdown.sh`, - `echo "${bumpShutdown}; touch /run/shutdown-refcount/end_run_\\$(date +%s)_\\$RANDOM ; /run/if_refcount0_shutdown.sh " > /run/refcount_and_delay_shutdown.sh`, - `echo "flock /run/refcount-lock /run/delay_shutdown.sh" > /run/safe_delay_shutdown.sh`, - `echo "flock /run/refcount-lock /run/refcount_and_delay_shutdown.sh" > /run/safe_refcount_and_delay_shutdown.sh`, + // - when the amount of started jobs (start_run_* files) equal the amount of finished jobs (end_run_* files), we shutdown in 5 minutes (with a reaper script installed later) + `echo "${bumpShutdown}" > /run/delay_shutdown.sh`, "chmod +x /run/delay_shutdown.sh", - "chmod +x /run/refcount_and_delay_shutdown.sh", - "chmod +x /run/if_refcount0_shutdown.sh", - "chmod +x /run/safe_refcount_and_delay_shutdown.sh", - "chmod +x /run/safe_if_refcount0_shutdown.sh", - "export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/safe_delay_shutdown.sh", - "export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/safe_refcount_and_delay_shutdown.sh", + "export ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh", + "export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/delay_shutdown.sh", "mkdir -p actions-runner && cd actions-runner", - 'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/safe_delay_shutdown.sh" > .env', - 'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/safe_refcount_and_delay_shutdown.sh" > .env', + 'echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/run/delay_shutdown.sh" > .env', + 'echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/run/delay_shutdown.sh" > .env', `GH_RUNNER_VERSION=${githubActionRunnerVersion}`, 'case $(uname -m) in aarch64) ARCH="arm64" ;; amd64|x86_64) ARCH="x64" ;; esac && export RUNNER_ARCH=${ARCH}', "curl -O -L https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-${RUNNER_ARCH}-${GH_RUNNER_VERSION}.tar.gz", diff --git a/scripts/earthly-ci b/scripts/earthly-ci index 43eeb9b17aa8..5d7f3fdc50e4 100755 --- a/scripts/earthly-ci +++ b/scripts/earthly-ci @@ -29,6 +29,12 @@ elif ! earthly $@ 2>&1 | tee $OUTPUT_FILE >&2 ; then earthly $@ # TODO handle # could not configure satellite: failed getting org: unable to authenticate: failed to execute login request: Post + elif grep 'Error: pull ping error: pull ping response' $OUTPUT_FILE >/dev/null ; then + echo "Got 'Error: pull ping error: pull ping response', intermittent failure when writing out images to docker" + earthly $@ + elif grep '================================= System Info ==================================' $OUTPUT_FILE >/dev/null ; then + echo "Detected an Earthly daemon restart, possibly due to it (mis)detecting a cache setting change, trying again..." + earthly $@ else # otherwise, propagate error exit 1