Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 31 additions & 114 deletions ci3/aws_request_instance
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,24 @@
[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
set -eu

NAME=$1
CPUS=$2
ARCH=$3
name=$1
cpus=$2
arch=$3

cd $(dirname $0)

bid_per_cpu_hour=0.03125

instance_types_amd64=(
m6a
m7a
m7i
)

instance_types_arm64=(
m7g
)

# Declare an associative array to map CPU counts to instance type suffixes.
declare -A cpu_map
cpu_map=(
Expand All @@ -23,121 +35,26 @@ cpu_map=(
[192]="48xlarge"
)

INSTANCE_TYPE_SUFFIX=${cpu_map[$CPUS]}

# Check if INSTANCE_TYPE_SUFFIX is set, if not, the CPU count is not recognized.
if [ -z "$INSTANCE_TYPE_SUFFIX" ]; then
>&2 echo "Unrecognized CPU count: $CPUS"
exit 1
fi

# Construct the full instance type. We use m6a/m7g currently.
# Ami's are our custum built build-instance amis. See ami_update.sh.
if [ "$ARCH" == "x86_64" ] || [ "$ARCH" == "amd64" ]; then
MACHINE_TYPE="m6a"
AMI=${AMI:-$(cat ./aws/ami_id_amd64)}
elif [ "$ARCH" == "arm64" ]; then
MACHINE_TYPE="m7g"
AMI=${AMI:-$(cat ./aws/ami_id_arm64)}
if [ "$arch" == "x86_64" ] || [ "$arch" == "amd64" ]; then
instances=("${instance_types_amd64[@]}")
ami=${ami:-$(cat ./aws/ami_id_amd64)}
elif [ "$arch" == "arm64" ]; then
instances=("${instance_types_arm64[@]}")
ami=${ami:-$(cat ./aws/ami_id_arm64)}
else
echo "Unknown arch: $ARCH"
echo "Unsupported arch: $arch" >&2
exit 1
fi
INSTANCE_TYPE="$MACHINE_TYPE.$INSTANCE_TYPE_SUFFIX"

export AWS_DEFAULT_REGION=us-east-2

BID_PER_CPU_HOUR=0.03125
PRICE=$(jq -n "$BID_PER_CPU_HOUR*$CPUS*100000 | round / 100000")

launch_spec=$(cat <<EOF
{
"ImageId": "$AMI",
"KeyName": "${KEY_NAME:-build-instance}",
"SecurityGroupIds": ["sg-0ccd4e5df0dcca0c9"],
"InstanceType": "$INSTANCE_TYPE",
"BlockDeviceMappings": [
{
"DeviceName": "/dev/sda1",
"Ebs": {
"VolumeSize": 64,
"VolumeType": "gp3",
"Throughput": 1000,
"Iops": 4000
}
}
]
}
EOF
)

# Save the launch specification to a temporary file.
temp_file=$(mktemp)
trap "rm -f $temp_file" EXIT
echo "$launch_spec" > "$temp_file"

info="(name: $NAME) (ami: $AMI) (cpus: $CPUS) (bid: $PRICE)"

if [ "${NO_SPOT:-0}" -ne 1 ]; then
>&2 echo "Requesting $INSTANCE_TYPE spot instance $info..."
SIR=$(aws ec2 request-spot-instances \
--spot-price "$PRICE" \
--instance-count 1 \
--type "one-time" \
--launch-specification file://$temp_file \
--query "SpotInstanceRequests[*].[SpotInstanceRequestId]" \
--output text)

>&2 echo "Waiting for instance id for spot request: $SIR..."
sleep 5
for I in {1..6}; do
IID=$(aws ec2 describe-spot-instance-requests \
--spot-instance-request-ids $SIR \
--query "SpotInstanceRequests[*].[InstanceId]" \
--output text)

[ -z "$IID" -o "$IID" == "None" ] || break

if [ $I -eq 6 ]; then
>&2 echo "Timeout waiting for spot request."
# Cancel spot request. We may still get allocated an instance if it's *just* happened.
aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $SIR > /dev/null
IFS=',' read -ra cpu_list <<< "$cpus"
for cpu in "${cpu_list[@]}"; do
price=$(jq -n "$bid_per_cpu_hour*$cpu*100000 | round / 100000")
suffix=${cpu_map[$cpu]}
for inst in "${instances[@]}"; do
instance_type="$inst.$suffix"
if aws_request_instance_type $name $instance_type $price $ami; then
exit 0
fi

sleep 5
done
fi

if [ -z "${IID:-}" -o "${IID:-}" == "None" ]; then
# Request on-demand instance.
>&2 echo "Requesting $INSTANCE_TYPE on-demand instance $info..."
IID=$(aws ec2 run-instances \
--cli-input-json file://$temp_file \
--query "Instances[*].[InstanceId]" \
--output text)
fi

aws ec2 create-tags --resources $IID --tags "Key=Name,Value=$NAME"
aws ec2 create-tags --resources $IID --tags "Key=Group,Value=build-instance"

while [ -z "${IP:-}" ]; do
sleep 1
IP=$(aws ec2 describe-instances \
--filter "Name=instance-id,Values=$IID" \
--query "Reservations[*].Instances[*].PublicIpAddress" \
--output=text)
done

# Wait till ssh port is open.
>&2 echo "Waiting for SSH at $IP..."
SECONDS=0
SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config}
[ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}"
while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $IP $LIVE_CMD > /dev/null 2>&1; do
if (( SECONDS >= 60 )); then
>&2 echo "Timeout: SSH could not login to $IP within 60 seconds."
exit 1
fi
sleep 1
done
echo $IP:${SIR:-}:$IID
exit 1
104 changes: 104 additions & 0 deletions ci3/aws_request_instance_type
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env bash
[ -n "${BUILD_SYSTEM_DEBUG:-}" ] && set -x # conditionally trace
set -eu

NAME=$1
INSTANCE_TYPE=$2
PRICE=$3
AMI=$4

cd $(dirname $0)

export AWS_DEFAULT_REGION=us-east-2

launch_spec=$(cat <<EOF
{
"ImageId": "$AMI",
"KeyName": "${KEY_NAME:-build-instance}",
"SecurityGroupIds": ["sg-0ccd4e5df0dcca0c9"],
"InstanceType": "$INSTANCE_TYPE",
"BlockDeviceMappings": [
{
"DeviceName": "/dev/sda1",
"Ebs": {
"VolumeSize": 64,
"VolumeType": "gp3",
"Throughput": 1000,
"Iops": 4000
}
}
]
}
EOF
)

# Save the launch specification to a temporary file.
temp_file=$(mktemp)
trap "rm -f $temp_file" EXIT
echo "$launch_spec" > "$temp_file"

info="(name: $NAME) (type: $INSTANCE_TYPE) (ami: $AMI) (bid: $PRICE)"

if [ "${NO_SPOT:-0}" -ne 1 ]; then
>&2 echo "Requesting $INSTANCE_TYPE spot instance $info..."
SIR=$(aws ec2 request-spot-instances \
--spot-price "$PRICE" \
--instance-count 1 \
--type "one-time" \
--launch-specification file://$temp_file \
--query "SpotInstanceRequests[*].[SpotInstanceRequestId]" \
--output text)

>&2 echo "Waiting for instance id for spot request: $SIR..."
sleep 5
for I in {1..6}; do
IID=$(aws ec2 describe-spot-instance-requests \
--spot-instance-request-ids $SIR \
--query "SpotInstanceRequests[*].[InstanceId]" \
--output text)

[ -z "$IID" -o "$IID" == "None" ] || break

if [ $I -eq 6 ]; then
>&2 echo "Timeout waiting for spot request."
# Cancel spot request. We may still get allocated an instance if it's *just* happened.
aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $SIR > /dev/null
fi

sleep 5
done
fi

if [ -z "${IID:-}" -o "${IID:-}" == "None" ]; then
# Request on-demand instance.
>&2 echo "Requesting $INSTANCE_TYPE on-demand instance $info..."
IID=$(aws ec2 run-instances \
--cli-input-json file://$temp_file \
--query "Instances[*].[InstanceId]" \
--output text)
fi

aws ec2 create-tags --resources $IID --tags "Key=Name,Value=$NAME"
aws ec2 create-tags --resources $IID --tags "Key=Group,Value=build-instance"

while [ -z "${IP:-}" ]; do
sleep 1
IP=$(aws ec2 describe-instances \
--filter "Name=instance-id,Values=$IID" \
--query "Reservations[*].Instances[*].PublicIpAddress" \
--output=text)
done

# Wait till ssh port is open.
>&2 echo "Waiting for SSH at $IP..."
SECONDS=0
SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config}
[ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}"
while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $IP $LIVE_CMD > /dev/null 2>&1; do
if (( SECONDS >= 60 )); then
>&2 echo "Timeout: SSH could not login to $IP within 60 seconds."
exit 1
fi
sleep 1
done
echo $IP:${SIR:-}:$IID
4 changes: 2 additions & 2 deletions ci3/bootstrap_ec2
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ if [ "$arch" == "arm64" ]; then
export AWS_SHUTDOWN_TIME=90
else
if [ "$CI_FULL" -eq 1 ]; then
cores=192
cores=192,128,64
else
cores=128
cores=128,64
fi
fi

Expand Down