AztecProtocol · charlielye · Mar 18, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025
diff --git a/.test_patterns.yml b/.test_patterns.yml
@@ -7,6 +7,7 @@
 # If there is no owner for failed test the build will fail, regardless of if it matches in this file.
 # When a failing test matches one or more of the "regex" properties below,
 # a message is sent to slack channel #aztec3-ci tagging the owners.
+# If an optional error_regex is provided, the output file must match for the test to be considered a flake.
 
 names:
   - adam: &adam "U04BM8H25NJ"
@@ -33,6 +34,8 @@ tests:
   #   Actual: false
   # Expected: true
   - regex: "join_split_example_tests"
+    skip: true
+    error_regex: "field_t::range_constraint"
     owners:
       - *luke
 
@@ -52,6 +55,7 @@ tests:
     skip: true
     owners:
       - *charlie
+
   # Sometimes see this on ARM. But not when run on it's own...
   # FAILED 6a60c4e796ac0aef: noir/scripts/run_test.sh debug-21ff1948430ded06 tests::debug_ram_blowup_regression (code: 101)
   #                                                                                                                                                                                running 1 test
@@ -152,6 +156,7 @@ tests:
     owners:
       - *palla
   - regex: "yarn-project/kv-store"
+    error_regex: "Could not import your test module"
     owners:
       - *alex
 
@@ -189,3 +194,9 @@ tests:
   - regex: "nonsense to match"
     owners:
       - *charlie
+
+  - regex: "nonsense to match"
+    owners:
+      - *charlie
+      - *adam
+    error_regex: "something else"
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -126,13 +126,28 @@ function test_cmds {
   parallel -k --line-buffer './{}/bootstrap.sh test_cmds 2>/dev/null' ::: $@ | filter_test_cmds
 }
 
+function start_txe {
+  cd $root/yarn-project/txe
+  LOG_LEVEL=info TXE_PORT=$1 node --no-warnings ./dest/bin/index.js &
+  local pid=$!
+  trap "kill -SIGTERM $pid &>/dev/null || true" SIGTERM;
+  wait $pid
+  wait $pid
+  local code=$?
+  if [ "$code" -ne 0 ]; then
+    sudo lsof -i
+  fi
+  return $code
+}
+export -f start_txe
+
 function start_txes {
   # Starting txe servers with incrementing port numbers.
   trap 'kill -SIGTERM $(jobs -p) &>/dev/null || true' EXIT
   for i in $(seq 0 $((NUM_TXES-1))); do
     existing_pid=$(lsof -ti :$((45730 + i)) || true)
-    [ -n "$existing_pid" ] && kill -9 $existing_pid
-    dump_fail "cd $root/yarn-project/txe && LOG_LEVEL=info TXE_PORT=$((45730 + i)) node --no-warnings ./dest/bin/index.js" &
+    [ -n "$existing_pid" ] && kill -9 $existing_pid && wait $existing_pid || true
+    dump_fail "start_txe $((45730 + i))" &
   done
   echo "Waiting for TXE's to start..."
   for i in $(seq 0 $((NUM_TXES-1))); do
@@ -149,11 +164,11 @@ export -f start_txes
 function test {
   echo_header "test all"
 
+  start_txes
+
   # Make sure KIND starts so it is running by the time we do spartan tests.
   spartan/bootstrap.sh kind &>/dev/null &
 
-  start_txes
-
   # We will start half as many jobs as we have cpu's.
   # This is based on the slightly magic assumption that many tests can benefit from 2 cpus,
   # and also that half the cpus are logical, not physical.

diff --git a/ci3/run_test_cmd b/ci3/run_test_cmd
@@ -116,6 +116,10 @@ function fail {
 function flake {
   echo -e "${purple}FLAKED${reset}${log_info:-}: $test_cmd (${SECONDS}s) (code: $code)"
 
+  if [ -z "${SLACK_BOT_TOKEN:-}" ]; then
+    return
+  fi
+
   # Send slack message to owners.
   slack_uids=""
   for uid in $owners; do
@@ -127,23 +131,39 @@ function flake {
     "text": "${slack_uids% }: Test flaked on *$REF_NAME*: \`$test_cmd\` http://ci.aztec-labs.com/$log_key"
   }
 EOF
-  if [ -n "${SLACK_BOT_TOKEN:-}" ]; then
-    curl -X POST https://slack.com/api/chat.postMessage \
-      -H "Authorization: Bearer $SLACK_BOT_TOKEN" \
-      -H "Content-type: application/json" \
-      --data "$data" &>/dev/null
-  fi
+  curl -X POST https://slack.com/api/chat.postMessage \
+    -H "Authorization: Bearer $SLACK_BOT_TOKEN" \
+    -H "Content-type: application/json" \
+    --data "$data" &>/dev/null
   exit
 }
 
+# Prints a list of test owners based on matching test name pattern, and optional error regex.
+function get_owners {
+  entries=$(yq e -o=json -r 'explode(.) | .tests[] | .regex as $pattern | select(strenv(test_cmd) | test($pattern))' .test_patterns.yml | jq -c .)
+
+  # Iterate over each matching entry.
+  while IFS= read -r entry; do
+    error_regex=$(echo "$entry" | jq -r '.error_regex // empty')
+
+    # If error_regex is present, check the log content.
+    if [ -n "$error_regex" ]; then
+      if grep -qE "$error_regex" $tmp_file; then
+        echo "$entry" | jq -r '.owners[]'
+      fi
+    else
+      echo "$entry" | jq -r '.owners[]'
+    fi
+  done <<< "$entries" | sort -u
+}
+
 # Test passed.
 [ $code -eq 0 ] && pass
 
 # We're not in CI, fail.
 [ "$CI" -eq 0 ] && fail
 
-# Get list of owners of this failed test.
-owners=$(yq e -r 'explode(.) | .tests[] | .regex as $pattern | select(strenv(test_cmd) | test($pattern)) | .owners[]' .test_patterns.yml | sort -u)
+owners=$(get_owners)
 
 # To not fail a test, we at least need an owner to notify.
 if [ -z "$owners" ]; then