Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions .github/workflows/ci3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
if: github.event.pull_request.head.repo.fork != true
environment: ${{ startsWith(github.ref, 'refs/tags/v') && 'master' || '' }}
strategy:
fail-fast: false
fail-fast: true
matrix:
# Only run arm64 build with arm64-ci label or on master or on tagged releases.
# The way to do conditions here is to parse full strings as JSON.
Expand Down Expand Up @@ -104,8 +104,7 @@ jobs:
EXTERNAL_ETHEREUM_CONSENSUS_HOST: "https://beacon.${{ secrets.GCP_SEPOLIA_URL }}"
EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY: ${{ secrets.GCP_SEPOLIA_API_KEY }}
EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY_HEADER: "X-goog-api-key"
run: |
./ci.sh ec2
run: exec ./ci.sh ec2

- name: Download benchmarks
if: matrix.settings.arch == 'amd64' && github.event_name == 'push' && github.ref_name == 'master'
Expand Down Expand Up @@ -184,7 +183,7 @@ jobs:
strategy:
matrix:
number: [1, 2]
fail-fast: false
fail-fast: true
steps:
#############
# Prepare Env
Expand Down Expand Up @@ -217,8 +216,7 @@ jobs:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
CI: 1
CI_FULL: 1
run: |
./ci.sh ec2-test
run: exec ./ci.sh ec2-test

# Necessary as github actions won't allow checks on matrix builds.
merge-check:
Expand Down
12 changes: 12 additions & 0 deletions .test_patterns.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ tests:
skip: true
owners:
- *tom
- regex: "tests::execution_success::test_ram_blowup_regression"
error_regex: "assertion `left == right` failed"
owners:
- *tom

# noir
# Something to do with how I run the tests now. Think these are fine in nextest.
Expand Down Expand Up @@ -124,6 +128,10 @@ tests:
error_regex: "BlockOutOfRangeError"
owners:
- *palla
- regex: "simple e2e_sequencer_config"
error_regex: "Anvil failed to stop in time"
owners:
- *palla

# yarn-project tests
- regex: "p2p/src/services/discv5/discv5_service.test.ts"
Expand All @@ -134,6 +142,10 @@ tests:
error_regex: "Exceeded timeout of 120000"
owners:
- *sean
- regex: "p2p/src/services/reqresp/reqresp.test.ts"
error_regex: "✕ should stop after max retry attempts"
owners:
- *sean
- regex: "p2p/src/testbench/port_change.test.ts"
error_regex: "Timeout waiting for worker"
owners:
Expand Down
8 changes: 4 additions & 4 deletions ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,23 @@ case "$cmd" in
"ec2")
# Spin up ec2 instance and ci bootstrap with shell on failure.
export USE_TEST_CACHE=1
bootstrap_ec2
exec bootstrap_ec2
;;
"ec2-no-cache")
# Disable the build and test cache.
export NO_CACHE=1
export USE_TEST_CACHE=0
bootstrap_ec2
exec bootstrap_ec2
;;
"ec2-test")
# Can use the build cache, but don't use the test cache.
export USE_TEST_CACHE=0
bootstrap_ec2
exec bootstrap_ec2
;;
"ec2-shell")
# Spin up ec2 instance, clone, and drop into shell.
# False triggers the shell on fail.
bootstrap_ec2 "false"
exec bootstrap_ec2 "false"
;;
"ec2-grind")
# Same as ec2-test but repeat it over arg1 instances.
Expand Down
155 changes: 86 additions & 69 deletions ci3/bootstrap_ec2
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ if [ "$arch" == "arm64" ]; then
export AWS_SHUTDOWN_TIME=90
else
if [ "$CI_FULL" -eq 1 ]; then
cores=192,128,64
# cores=192,128,64
cores=128,64
else
cores=128,64
fi
Expand All @@ -27,12 +28,16 @@ cores=${CPUS:-$cores}
# Trap function to terminate our running instance when the script exits.
function on_exit {
set +e
if [ "$NO_TERMINATE" -eq 0 ]; then
aws_terminate_instance $iid $sir
else
echo "Remote machine not terminated, connect with: ARCH=$arch ./ci.sh shell"
if [ -n "$iid" ]; then
if [ "$NO_TERMINATE" -eq 0 ]; then
aws_terminate_instance $iid $sir
else
echo "Remote machine not terminated, connect with: ARCH=$arch ./ci.sh shell"
fi
iid=""
fi
}
trap on_exit SIGINT SIGTERM EXIT

# Verify that the commit exists on the remote. It will be the remote tip of itself if so.
current_commit=$(git rev-parse HEAD)
Expand Down Expand Up @@ -76,7 +81,6 @@ IFS=':' read -r -a parts <<< "$ip_sir"
ip="${parts[0]}"
sir="${parts[1]}"
iid="${parts[2]}"
trap on_exit EXIT

# If AWS credentials are not set, try to load them from ~/.aws/build_instance_credentials.
if [ -z "${AWS_ACCESS_KEY_ID:-}" ] || [ -z "${AWS_SECRET_ACCESS_KEY:-}" ]; then
Expand Down Expand Up @@ -181,72 +185,85 @@ EOF
# We set SSH_CONNECTION to something to ensure the hostname is shown in the lean prompt.
# We provide the host user and group ids to the entrypoint script to ensure alignment.
# We raise the default pid limit to 32k.
set +e
ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip "
# TODO: This should *not* be needed in a CI run. Remove "watching" code, e.g. in boxes.
sudo sysctl fs.inotify.max_user_watches=1048576 &>/dev/null
sudo sysctl fs.inotify.max_user_instances=1048576 &>/dev/null
function run {
ssh ${ssh_args:-} -F $ci3/aws/build_instance_ssh_config ubuntu@$ip "
# TODO: This should *not* be needed in a CI run. Remove "watching" code, e.g. in boxes.
sudo sysctl fs.inotify.max_user_watches=1048576 &>/dev/null
sudo sysctl fs.inotify.max_user_instances=1048576 &>/dev/null

echo Loading CRS into tmpfs...
sudo mkdir /mnt/bb-crs
sudo mount -t tmpfs -o size=3G tmpfs /mnt/bb-crs
sudo cp -r \$HOME/.bb-crs/* /mnt/bb-crs
echo Done in \$SECONDS seconds.

echo Loading CRS into tmpfs...
sudo mkdir /mnt/bb-crs
sudo mount -t tmpfs -o size=3G tmpfs /mnt/bb-crs
sudo cp -r \$HOME/.bb-crs/* /mnt/bb-crs
echo Done in \$SECONDS seconds.
echo Installing and starting sysdig...
sudo apt-get update &>/dev/null
sudo apt-get install -y sysdig &>/dev/null
sudo sysdig -p '%evt.time (cid: %container.id) (event: %evt.dir %evt.type) (args: %evt.args): %proc.cmdline' 'evt.type=bind or evt.type=listen' > /tmp/netfile &
netfile_pid=\$!
# Capture cpu load.
mpstat 2 &> /tmp/cpufile &
cpufile_pid=\$!
# Capture mem load.
vmstat -w -S M 2 &> /tmp/memfile &
memfile_pid=\$!
trap 'sudo kill \$netfile_pid \$cpufile_pid \$memfile_pid' EXIT

echo Installing and starting sysdig...
sudo apt-get update &>/dev/null
sudo apt-get install -y sysdig &>/dev/null
sudo sysdig -p '%evt.time (cid: %container.id) (event: %evt.dir %evt.type) (args: %evt.args): %proc.cmdline' 'evt.type=bind or evt.type=listen' > /tmp/netfile &
netfile_pid=\$!
# Capture cpu load.
mpstat 2 &> /tmp/cpufile &
cpufile_pid=\$!
# Capture mem load.
vmstat -w -S M 2 &> /tmp/memfile &
memfile_pid=\$!
trap 'sudo kill \$netfile_pid \$cpufile_pid \$memfile_pid' EXIT
echo Starting devbox...
docker run --privileged ${docker_args:-} \
--name aztec_build \
--hostname $instance_name \
-v bootstrap_ci_local_docker:/var/lib/docker \
-v bootstrap_ci_repo:/home/aztec-dev/aztec-packages \
-v \$HOME/.aws:/home/aztec-dev/.aws:ro \
-v /mnt/bb-crs:/home/aztec-dev/.bb-crs:ro \
-v /tmp:/tmp \
-v /dev/kmsg:/dev/kmsg \
-e RUN_ID=${RUN_ID:-} \
-e JOB_ID=${JOB_ID:-} \
-e NO_CACHE=${NO_CACHE:-} \
-e USE_TEST_CACHE=${USE_TEST_CACHE:-1} \
-e CI_REDIS='ci-redis-tiered.lzka0i.ng.0001.use2.cache.amazonaws.com' \
-e SSH_CONNECTION=' ' \
-e LOCAL_USER_ID=\$(id -u) \
-e LOCAL_GROUP_ID=\$(id -g) \
-e CI=$CI \
-e CI_FULL=$CI_FULL \
-e CI_NIGHTLY=${CI_NIGHTLY:-0} \
-e EXTERNAL_ETHEREUM_HOSTS=${EXTERNAL_ETHEREUM_HOSTS:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST=${EXTERNAL_ETHEREUM_CONSENSUS_HOST:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY=${EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY_HEADER=${EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY_HEADER:-} \
-e L1_DEPLOYMENT_PRIVATE_KEY=${L1_DEPLOYMENT_PRIVATE_KEY:-} \
-e DRY_RUN=${DRY_RUN:-0} \
-e DOCKERHUB_PASSWORD=${DOCKERHUB_PASSWORD:-} \
-e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} \
-e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} \
-e BUILD_SYSTEM_DEBUG=${BUILD_SYSTEM_DEBUG:-} \
-e GITHUB_TOKEN=${GITHUB_TOKEN:-} \
-e NETLIFY_SITE_ID=${NETLIFY_SITE_ID:-} \
-e NETLIFY_AUTH_TOKEN=${NETLIFY_AUTH_TOKEN:-} \
-e NPM_TOKEN=${NPM_TOKEN:-} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN:-} \
-e REF_NAME=${REF_NAME:-} \
-e AWS_TOKEN=\$(curl -sX PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') \
--pids-limit=32768 \
aztecprotocol/devbox:3.0 bash -c $(printf '%q' "$container_script")
"
}

set +e
# If in terminal run in foreground.
# If not, run in background so we can handle the signals in a timely fashion, and wait for it to finish.
if [ -t 0 ]; then
run
else
echo "No tty, running in background..."
run &
wait $!
fi

echo Starting devbox...
docker run --privileged ${docker_args:-} \
--name aztec_build \
--hostname $instance_name \
-v bootstrap_ci_local_docker:/var/lib/docker \
-v bootstrap_ci_repo:/home/aztec-dev/aztec-packages \
-v \$HOME/.aws:/home/aztec-dev/.aws:ro \
-v /mnt/bb-crs:/home/aztec-dev/.bb-crs:ro \
-v /tmp:/tmp \
-v /dev/kmsg:/dev/kmsg \
-e RUN_ID=${RUN_ID:-} \
-e JOB_ID=${JOB_ID:-} \
-e NO_CACHE=${NO_CACHE:-} \
-e USE_TEST_CACHE=${USE_TEST_CACHE:-1} \
-e CI_REDIS='ci-redis-tiered.lzka0i.ng.0001.use2.cache.amazonaws.com' \
-e SSH_CONNECTION=' ' \
-e LOCAL_USER_ID=\$(id -u) \
-e LOCAL_GROUP_ID=\$(id -g) \
-e CI=$CI \
-e CI_FULL=$CI_FULL \
-e CI_NIGHTLY=${CI_NIGHTLY:-0} \
-e EXTERNAL_ETHEREUM_HOSTS=${EXTERNAL_ETHEREUM_HOSTS:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST=${EXTERNAL_ETHEREUM_CONSENSUS_HOST:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY=${EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY:-} \
-e EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY_HEADER=${EXTERNAL_ETHEREUM_CONSENSUS_HOST_API_KEY_HEADER:-} \
-e L1_DEPLOYMENT_PRIVATE_KEY=${L1_DEPLOYMENT_PRIVATE_KEY:-} \
-e DRY_RUN=${DRY_RUN:-0} \
-e DOCKERHUB_PASSWORD=${DOCKERHUB_PASSWORD:-} \
-e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} \
-e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} \
-e BUILD_SYSTEM_DEBUG=${BUILD_SYSTEM_DEBUG:-} \
-e GITHUB_TOKEN=${GITHUB_TOKEN:-} \
-e NETLIFY_SITE_ID=${NETLIFY_SITE_ID:-} \
-e NETLIFY_AUTH_TOKEN=${NETLIFY_AUTH_TOKEN:-} \
-e NPM_TOKEN=${NPM_TOKEN:-} \
-e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN:-} \
-e REF_NAME=${REF_NAME:-} \
-e AWS_TOKEN=\$(curl -sX PUT http://169.254.169.254/latest/api/token -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600') \
--pids-limit=32768 \
aztecprotocol/devbox:3.0 bash -c $(printf '%q' "$container_script")
"
code=$?
set -e
echo "SSH exited with code: $code"
Expand Down
3 changes: 1 addition & 2 deletions ci3/parallelise
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ cd $root
jobs=$(get_num_cpus_max ${1:-})
parallel_args="-j$jobs --memsuspend $(memsuspend_limit) --line-buffer --joblog joblog.txt"

# If not in CI, fail fast.
if [ "$CI" -eq 0 ]; then
if [ "${NO_FAIL_FAST:-0}" -eq 0 ]; then
parallel_args+=" --halt now,fail=1"
fi

Expand Down
5 changes: 5 additions & 0 deletions ci3/run_test_cmd
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ trap "kill -- -$timeout_pid &>/dev/null; exit" SIGTERM SIGINT
wait $timeout_pid
code=$?

# If the test received a SIGTERM or SIGINT, we don't want to track or print anything.
if [ "$code" -eq 143 ] || [ "$code" -eq 130 ]; then
exit $code
fi

if [ "$CI_REDIS_AVAILABLE" -eq 1 ]; then
# If the test succeeded and we're using the test cache, set success flag for test. This key is unique to the test.
# If the test succeeded and we're in CI, save the test log.
Expand Down
Loading