-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CI] Add scripts to create a buildkite AMI
- Loading branch information
1 parent
a47d1c8
commit 2766aac
Showing
5 changed files
with
112 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Builds a buildkite image that has GPU drivers and the NVIDIA container toolkit installed | ||
|
||
Make sure you have docker installed. You also need the `requests` and `cfn_flip` python packages installed | ||
|
||
Instructions to run | ||
|
||
``` | ||
AWS_ACCESS_KEY_ID=... AWS_SECRET_ACCESS_KEY=... AWS_REGION=us-east-2 BUILDKITE_STACK_VERSION=5.3.0 PACKER_LOG=1 ./create_ami.sh | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
set -eux | ||
|
||
# Get the base AMI | ||
BASE_AMI=$(python3 ./get_base_ami.py --aws-region ${AWS_REGION} --buildkite-stack-version ${BUILDKITE_STACK_VERSION}) | ||
|
||
# Get the instances public IP | ||
set +x | ||
TOKEN=`curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"` | ||
PUBLIC_IP=`curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/public-ipv4` | ||
LOCAL_IP=`curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/local-ipv4` | ||
set -x | ||
|
||
echo "Public IP: ${PUBLIC_IP}. Local IP: ${LOCAL_IP}" | ||
|
||
docker run \ | ||
-e AWS_ACCESS_KEY_ID \ | ||
-e AWS_SECRET_ACCESS_KEY \ | ||
-e PACKER_LOG \ | ||
-v "${PWD}:/src" \ | ||
--rm \ | ||
-w /src \ | ||
hashicorp/packer build -timestamp-ui -var "region=${AWS_REGION}" -var "source_ami=${BASE_AMI}" -var "public_ip=${PUBLIC_IP}" gpu_ami.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import argparse | ||
import requests | ||
|
||
from cfn_tools import load_yaml | ||
|
||
|
||
parser = argparse.ArgumentParser(description='Get the base AMI to use for a specific region and version of the buildkite elastic stack') | ||
parser.add_argument('--aws-region', help='The AWS region (e.g. us-east-1)', required=True) | ||
parser.add_argument('--buildkite-stack-version', help='The buildkite stack version (e.g. 5.3.0)', required=True) | ||
args = parser.parse_args() | ||
|
||
# Get the base AMI | ||
r = requests.get(f"https://s3.amazonaws.com/buildkite-aws-stack/v{args.buildkite_stack_version}/aws-stack.yml") | ||
|
||
if r.status_code != 200: | ||
raise ValueError("Failed to fetch buildkite stack config") | ||
|
||
# Parse it and get the base AMI | ||
config = load_yaml(r.text) | ||
base_ami = config["Mappings"]["AWSRegion2AMI"][args.aws_region]["linuxamd64"] | ||
|
||
print(base_ami) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"variables": { | ||
"region": "us-east-1", | ||
"instance_type": "p2.xlarge" | ||
}, | ||
|
||
"builders": [ | ||
{ | ||
"type": "amazon-ebs", | ||
"region": "{{user `region`}}", | ||
"source_ami": "{{user `source_ami`}}", | ||
"instance_type": "{{user `instance_type`}}", | ||
"ssh_username": "ec2-user", | ||
"ami_name": "neuropod-buildkite-stack-gpu-{{isotime | clean_resource_name}}", | ||
"ami_description": "Buildkite Elastic Stack AMI w/ GPU support", | ||
"ami_groups": ["all"], | ||
"temporary_security_group_source_cidrs": ["{{user `public_ip`}}/32"] | ||
} | ||
], | ||
"provisioners": [ | ||
{ | ||
"type": "shell", | ||
"script": "install-nvidia-container-toolkit.sh" | ||
}, | ||
{ | ||
"type": "shell", | ||
"inline": [ | ||
"rm /home/ec2-user/.ssh/authorized_keys" | ||
] | ||
} | ||
] | ||
} |
26 changes: 26 additions & 0 deletions
26
build/ci/buildkite_image/install-nvidia-container-toolkit.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
set -eux | ||
|
||
# Install nvidia drivers | ||
# https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html | ||
sudo yum groupinstall -y "Development Tools" | ||
sudo yum install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) | ||
BASE_URL=https://us.download.nvidia.com/tesla | ||
DRIVER_VERSION=470.82.01 | ||
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-x86_64-$DRIVER_VERSION.run | ||
sudo sh NVIDIA-Linux-x86_64-$DRIVER_VERSION.run --silent | ||
|
||
# Install the container toolkit | ||
# Instructions from https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html | ||
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ | ||
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | sudo tee /etc/yum.repos.d/nvidia-docker.repo | ||
|
||
sudo yum clean expire-cache | ||
sudo yum install -y nvidia-docker2 | ||
sudo systemctl restart docker | ||
|
||
# Automatically start it on boot | ||
sudo systemctl --now enable docker | ||
|
||
# Sanity check | ||
docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi |