diff --git a/ci3/aws_request_instance b/ci3/aws_request_instance index 380a9de935c4..47f568b69a1e 100755 --- a/ci3/aws_request_instance +++ b/ci3/aws_request_instance @@ -2,12 +2,24 @@ [ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace set -eu -NAME=$1 -CPUS=$2 -ARCH=$3 +name=$1 +cpus=$2 +arch=$3 cd $(dirname $0) +bid_per_cpu_hour=0.03125 + +instance_types_amd64=( + m6a + m7a + m7i +) + +instance_types_arm64=( + m7g +) + # Declare an associative array to map CPU counts to instance type suffixes. declare -A cpu_map cpu_map=( @@ -23,121 +35,26 @@ cpu_map=( [192]="48xlarge" ) -INSTANCE_TYPE_SUFFIX=${cpu_map[$CPUS]} - -# Check if INSTANCE_TYPE_SUFFIX is set, if not, the CPU count is not recognized. -if [ -z "$INSTANCE_TYPE_SUFFIX" ]; then - >&2 echo "Unrecognized CPU count: $CPUS" - exit 1 -fi - -# Construct the full instance type. We use m6a/m7g currently. -# Ami's are our custum built build-instance amis. See ami_update.sh. -if [ "$ARCH" == "x86_64" ] || [ "$ARCH" == "amd64" ]; then - MACHINE_TYPE="m6a" - AMI=${AMI:-$(cat ./aws/ami_id_amd64)} -elif [ "$ARCH" == "arm64" ]; then - MACHINE_TYPE="m7g" - AMI=${AMI:-$(cat ./aws/ami_id_arm64)} +if [ "$arch" == "x86_64" ] || [ "$arch" == "amd64" ]; then + instances=("${instance_types_amd64[@]}") + ami=${ami:-$(cat ./aws/ami_id_amd64)} +elif [ "$arch" == "arm64" ]; then + instances=("${instance_types_arm64[@]}") + ami=${ami:-$(cat ./aws/ami_id_arm64)} else - echo "Unknown arch: $ARCH" + echo "Unsupported arch: $arch" >&2 exit 1 fi -INSTANCE_TYPE="$MACHINE_TYPE.$INSTANCE_TYPE_SUFFIX" - -export AWS_DEFAULT_REGION=us-east-2 - -BID_PER_CPU_HOUR=0.03125 -PRICE=$(jq -n "$BID_PER_CPU_HOUR*$CPUS*100000 | round / 100000") - -launch_spec=$(cat < "$temp_file" - -info="(name: $NAME) (ami: $AMI) (cpus: $CPUS) (bid: $PRICE)" - -if [ "${NO_SPOT:-0}" -ne 1 ]; then - >&2 echo "Requesting $INSTANCE_TYPE spot instance $info..." - SIR=$(aws ec2 request-spot-instances \ - --spot-price "$PRICE" \ - --instance-count 1 \ - --type "one-time" \ - --launch-specification file://$temp_file \ - --query "SpotInstanceRequests[*].[SpotInstanceRequestId]" \ - --output text) - - >&2 echo "Waiting for instance id for spot request: $SIR..." - sleep 5 - for I in {1..6}; do - IID=$(aws ec2 describe-spot-instance-requests \ - --spot-instance-request-ids $SIR \ - --query "SpotInstanceRequests[*].[InstanceId]" \ - --output text) - - [ -z "$IID" -o "$IID" == "None" ] || break - - if [ $I -eq 6 ]; then - >&2 echo "Timeout waiting for spot request." - # Cancel spot request. We may still get allocated an instance if it's *just* happened. - aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $SIR > /dev/null +IFS=',' read -ra cpu_list <<< "$cpus" +for cpu in "${cpu_list[@]}"; do + price=$(jq -n "$bid_per_cpu_hour*$cpu*100000 | round / 100000") + suffix=${cpu_map[$cpu]} + for inst in "${instances[@]}"; do + instance_type="$inst.$suffix" + if aws_request_instance_type $name $instance_type $price $ami; then + exit 0 fi - - sleep 5 done -fi - -if [ -z "${IID:-}" -o "${IID:-}" == "None" ]; then - # Request on-demand instance. - >&2 echo "Requesting $INSTANCE_TYPE on-demand instance $info..." - IID=$(aws ec2 run-instances \ - --cli-input-json file://$temp_file \ - --query "Instances[*].[InstanceId]" \ - --output text) -fi - -aws ec2 create-tags --resources $IID --tags "Key=Name,Value=$NAME" -aws ec2 create-tags --resources $IID --tags "Key=Group,Value=build-instance" - -while [ -z "${IP:-}" ]; do - sleep 1 - IP=$(aws ec2 describe-instances \ - --filter "Name=instance-id,Values=$IID" \ - --query "Reservations[*].Instances[*].PublicIpAddress" \ - --output=text) -done - -# Wait till ssh port is open. ->&2 echo "Waiting for SSH at $IP..." -SECONDS=0 -SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config} -[ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}" -while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $IP $LIVE_CMD > /dev/null 2>&1; do - if (( SECONDS >= 60 )); then - >&2 echo "Timeout: SSH could not login to $IP within 60 seconds." - exit 1 - fi - sleep 1 done -echo $IP:${SIR:-}:$IID +exit 1 diff --git a/ci3/aws_request_instance_type b/ci3/aws_request_instance_type new file mode 100755 index 000000000000..6aae7df76657 --- /dev/null +++ b/ci3/aws_request_instance_type @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace +set -eu + +NAME=$1 +INSTANCE_TYPE=$2 +PRICE=$3 +AMI=$4 + +cd $(dirname $0) + +export AWS_DEFAULT_REGION=us-east-2 + +launch_spec=$(cat < "$temp_file" + +info="(name: $NAME) (type: $INSTANCE_TYPE) (ami: $AMI) (bid: $PRICE)" + +if [ "${NO_SPOT:-0}" -ne 1 ]; then + >&2 echo "Requesting $INSTANCE_TYPE spot instance $info..." + SIR=$(aws ec2 request-spot-instances \ + --spot-price "$PRICE" \ + --instance-count 1 \ + --type "one-time" \ + --launch-specification file://$temp_file \ + --query "SpotInstanceRequests[*].[SpotInstanceRequestId]" \ + --output text) + + >&2 echo "Waiting for instance id for spot request: $SIR..." + sleep 5 + for I in {1..6}; do + IID=$(aws ec2 describe-spot-instance-requests \ + --spot-instance-request-ids $SIR \ + --query "SpotInstanceRequests[*].[InstanceId]" \ + --output text) + + [ -z "$IID" -o "$IID" == "None" ] || break + + if [ $I -eq 6 ]; then + >&2 echo "Timeout waiting for spot request." + # Cancel spot request. We may still get allocated an instance if it's *just* happened. + aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $SIR > /dev/null + fi + + sleep 5 + done +fi + +if [ -z "${IID:-}" -o "${IID:-}" == "None" ]; then + # Request on-demand instance. + >&2 echo "Requesting $INSTANCE_TYPE on-demand instance $info..." + IID=$(aws ec2 run-instances \ + --cli-input-json file://$temp_file \ + --query "Instances[*].[InstanceId]" \ + --output text) +fi + +aws ec2 create-tags --resources $IID --tags "Key=Name,Value=$NAME" +aws ec2 create-tags --resources $IID --tags "Key=Group,Value=build-instance" + +while [ -z "${IP:-}" ]; do + sleep 1 + IP=$(aws ec2 describe-instances \ + --filter "Name=instance-id,Values=$IID" \ + --query "Reservations[*].Instances[*].PublicIpAddress" \ + --output=text) +done + +# Wait till ssh port is open. +>&2 echo "Waiting for SSH at $IP..." +SECONDS=0 +SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config} +[ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}" +while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $IP $LIVE_CMD > /dev/null 2>&1; do + if (( SECONDS >= 60 )); then + >&2 echo "Timeout: SSH could not login to $IP within 60 seconds." + exit 1 + fi + sleep 1 +done +echo $IP:${SIR:-}:$IID diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 3806e6c7d29b..14310375f969 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -12,9 +12,9 @@ if [ "$arch" == "arm64" ]; then export AWS_SHUTDOWN_TIME=90 else if [ "$CI_FULL" -eq 1 ]; then - cores=192 + cores=192,128,64 else - cores=128 + cores=128,64 fi fi