diff --git a/.test_patterns.yml b/.test_patterns.yml index 318e8bc7aba1..67a5e54ef86d 100644 --- a/.test_patterns.yml +++ b/.test_patterns.yml @@ -7,6 +7,7 @@ # If there is no owner for failed test the build will fail, regardless of if it matches in this file. # When a failing test matches one or more of the "regex" properties below, # a message is sent to slack channel #aztec3-ci tagging the owners. +# If an optional error_regex is provided, the output file must match for the test to be considered a flake. names: - adam: &adam "U04BM8H25NJ" @@ -33,6 +34,8 @@ tests: # Actual: false # Expected: true - regex: "join_split_example_tests" + skip: true + error_regex: "field_t::range_constraint" owners: - *luke @@ -52,6 +55,7 @@ tests: skip: true owners: - *charlie + # Sometimes see this on ARM. But not when run on it's own... # FAILED 6a60c4e796ac0aef: noir/scripts/run_test.sh debug-21ff1948430ded06 tests::debug_ram_blowup_regression (code: 101) # running 1 test @@ -152,6 +156,7 @@ tests: owners: - *palla - regex: "yarn-project/kv-store" + error_regex: "Could not import your test module" owners: - *alex @@ -189,3 +194,9 @@ tests: - regex: "nonsense to match" owners: - *charlie + + - regex: "nonsense to match" + owners: + - *charlie + - *adam + error_regex: "something else" diff --git a/bootstrap.sh b/bootstrap.sh index 52737b485e5a..487ee665fa41 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -126,13 +126,28 @@ function test_cmds { parallel -k --line-buffer './{}/bootstrap.sh test_cmds 2>/dev/null' ::: $@ | filter_test_cmds } +function start_txe { + cd $root/yarn-project/txe + LOG_LEVEL=info TXE_PORT=$1 node --no-warnings ./dest/bin/index.js & + local pid=$! + trap "kill -SIGTERM $pid &>/dev/null || true" SIGTERM; + wait $pid + wait $pid + local code=$? + if [ "$code" -ne 0 ]; then + sudo lsof -i + fi + return $code +} +export -f start_txe + function start_txes { # Starting txe servers with incrementing port numbers. trap 'kill -SIGTERM $(jobs -p) &>/dev/null || true' EXIT for i in $(seq 0 $((NUM_TXES-1))); do existing_pid=$(lsof -ti :$((45730 + i)) || true) - [ -n "$existing_pid" ] && kill -9 $existing_pid - dump_fail "cd $root/yarn-project/txe && LOG_LEVEL=info TXE_PORT=$((45730 + i)) node --no-warnings ./dest/bin/index.js" & + [ -n "$existing_pid" ] && kill -9 $existing_pid && wait $existing_pid || true + dump_fail "start_txe $((45730 + i))" & done echo "Waiting for TXE's to start..." for i in $(seq 0 $((NUM_TXES-1))); do @@ -149,11 +164,11 @@ export -f start_txes function test { echo_header "test all" + start_txes + # Make sure KIND starts so it is running by the time we do spartan tests. spartan/bootstrap.sh kind &>/dev/null & - start_txes - # We will start half as many jobs as we have cpu's. # This is based on the slightly magic assumption that many tests can benefit from 2 cpus, # and also that half the cpus are logical, not physical. diff --git a/ci3/run_test_cmd b/ci3/run_test_cmd index c28b91a327df..a52a13000134 100755 --- a/ci3/run_test_cmd +++ b/ci3/run_test_cmd @@ -116,6 +116,10 @@ function fail { function flake { echo -e "${purple}FLAKED${reset}${log_info:-}: $test_cmd (${SECONDS}s) (code: $code)" + if [ -z "${SLACK_BOT_TOKEN:-}" ]; then + return + fi + # Send slack message to owners. slack_uids="" for uid in $owners; do @@ -127,23 +131,39 @@ function flake { "text": "${slack_uids% }: Test flaked on *$REF_NAME*: \`$test_cmd\` http://ci.aztec-labs.com/$log_key" } EOF - if [ -n "${SLACK_BOT_TOKEN:-}" ]; then - curl -X POST https://slack.com/api/chat.postMessage \ - -H "Authorization: Bearer $SLACK_BOT_TOKEN" \ - -H "Content-type: application/json" \ - --data "$data" &>/dev/null - fi + curl -X POST https://slack.com/api/chat.postMessage \ + -H "Authorization: Bearer $SLACK_BOT_TOKEN" \ + -H "Content-type: application/json" \ + --data "$data" &>/dev/null exit } +# Prints a list of test owners based on matching test name pattern, and optional error regex. +function get_owners { + entries=$(yq e -o=json -r 'explode(.) | .tests[] | .regex as $pattern | select(strenv(test_cmd) | test($pattern))' .test_patterns.yml | jq -c .) + + # Iterate over each matching entry. + while IFS= read -r entry; do + error_regex=$(echo "$entry" | jq -r '.error_regex // empty') + + # If error_regex is present, check the log content. + if [ -n "$error_regex" ]; then + if grep -qE "$error_regex" $tmp_file; then + echo "$entry" | jq -r '.owners[]' + fi + else + echo "$entry" | jq -r '.owners[]' + fi + done <<< "$entries" | sort -u +} + # Test passed. [ $code -eq 0 ] && pass # We're not in CI, fail. [ "$CI" -eq 0 ] && fail -# Get list of owners of this failed test. -owners=$(yq e -r 'explode(.) | .tests[] | .regex as $pattern | select(strenv(test_cmd) | test($pattern)) | .owners[]' .test_patterns.yml | sort -u) +owners=$(get_owners) # To not fail a test, we at least need an owner to notify. if [ -z "$owners" ]; then