From 0357c2d9eec3359ad2d6f71fe6755ff3bc76095a Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 11 May 2026 09:45:05 +0000 Subject: [PATCH 1/2] fix(tests/sh): accept pinned tokenizers line after #5359 #5359 pinned the tokenizers line in studio/backend/requirements/no-torch-runtime.txt from bare `tokenizers` to `tokenizers<=0.23.0` to stop pip from resolving to 0.23.1+ (which transformers rejects at import time). The shell test in tests/sh/test_torch_constraint.sh was still asserting the literal `^tokenizers$` regex, which fails on the pinned form. Surfaced as a hard fail on PR #5312's Backend CI Repo tests (CPU) step: === Structural: tokenizers in no-torch-runtime.txt === FAIL: tokenizers present as standalone line (expected '1', got '0') FAIL: tokenizers before transformers (expected 'yes', got 'no') Relax the regex to `^tokenizers([<>=!,~ ]|$)` so it matches both bare and version-constrained forms, which preserves the original intent (verify tokenizers is present in the file, before transformers). Verified locally: 24 PASS, 0 FAIL. --- tests/sh/test_torch_constraint.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/sh/test_torch_constraint.sh b/tests/sh/test_torch_constraint.sh index 8766635209..e89095efca 100644 --- a/tests/sh/test_torch_constraint.sh +++ b/tests/sh/test_torch_constraint.sh @@ -109,11 +109,14 @@ assert_eq "hardcoded torch>=2.4 appears exactly once" "1" "$_hardcoded" echo "" echo "=== Structural: tokenizers in no-torch-runtime.txt ===" -_has_tokenizers=$(grep -c '^tokenizers$' "$NO_TORCH_RT" || true) -assert_eq "tokenizers present as standalone line" "1" "$_has_tokenizers" +# Accept bare `tokenizers` or version-pinned forms (e.g. `tokenizers<=0.23.0`). +# Per #5359 the line is pinned because unpinned resolves to 0.23.1+ which +# transformers rejects at import time. +_has_tokenizers=$(grep -cE '^tokenizers([<>=!,~ ]|$)' "$NO_TORCH_RT" || true) +assert_eq "tokenizers present as standalone or pinned line" "1" "$_has_tokenizers" # tokenizers before transformers -_tok_line=$(grep -n '^tokenizers$' "$NO_TORCH_RT" | head -1 | cut -d: -f1) +_tok_line=$(grep -nE '^tokenizers([<>=!,~ ]|$)' "$NO_TORCH_RT" | head -1 | cut -d: -f1) _tf_line=$(grep -n '^transformers' "$NO_TORCH_RT" | head -1 | cut -d: -f1) _tok_first=$([ "$_tok_line" -lt "$_tf_line" ] && echo "yes" || echo "no") assert_eq "tokenizers before transformers" "yes" "$_tok_first" From 42342521df5d47b816dd9e88deacb95037b2484e Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Mon, 11 May 2026 09:50:15 +0000 Subject: [PATCH 2/2] fixup(tests/sh): tighten tokenizers check to guard the safe bound Address bot review feedback on #5361: * Codex (P2): the previous relaxed regex `^tokenizers([<>=!,~ ]|$)` accepted any version operator, so `tokenizers>=0.23.1` would still pass the test even though that line would re-introduce the import failure #5359 fixed. * Gemini (medium): the boundary char class did not cover all PEP 508 separators (`[`, `;`, `@`). Replace the single check with two: 1. Loose: `^tokenizers([^a-zA-Z0-9._-]|$)` confirms the package is listed (covers extras, env markers, URLs, bare line). 2. Tight regression guard: pipe those lines through a second grep that requires `<=0.23.0` or the functionally equivalent `<0.23.1`. Rejects bare `tokenizers`, `>=0.22.0` (no upper bound), `>=0.23.1`, `!=0.23.0`, `<=0.24.0`, etc. Verified locally: - Current main (tokenizers<=0.23.0): 25 PASS, 0 FAIL. - Spot-check with the bug reverted (bare `tokenizers`): the new "tokenizers pinned with upper bound excluding 0.23.1+" check FAILS as intended; the original "listed" check still passes. --- tests/sh/test_torch_constraint.sh | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tests/sh/test_torch_constraint.sh b/tests/sh/test_torch_constraint.sh index e89095efca..4dc8fe661d 100644 --- a/tests/sh/test_torch_constraint.sh +++ b/tests/sh/test_torch_constraint.sh @@ -109,14 +109,28 @@ assert_eq "hardcoded torch>=2.4 appears exactly once" "1" "$_hardcoded" echo "" echo "=== Structural: tokenizers in no-torch-runtime.txt ===" -# Accept bare `tokenizers` or version-pinned forms (e.g. `tokenizers<=0.23.0`). -# Per #5359 the line is pinned because unpinned resolves to 0.23.1+ which -# transformers rejects at import time. -_has_tokenizers=$(grep -cE '^tokenizers([<>=!,~ ]|$)' "$NO_TORCH_RT" || true) -assert_eq "tokenizers present as standalone or pinned line" "1" "$_has_tokenizers" +# Package-name boundary is anything not valid in a PEP 508 name, or EOL. +# Covers `tokenizers`, `tokenizers<=0.23.0`, `tokenizers[extra]`, +# `tokenizers; python_version<"3.13"`, etc., but NOT `tokenizers-foo`. +_TOK_RE='^tokenizers([^a-zA-Z0-9._-]|$)' + +_has_tokenizers=$(grep -cE "$_TOK_RE" "$NO_TORCH_RT" || true) +assert_eq "tokenizers package listed" "1" "$_has_tokenizers" + +# Regression guard for #5359: the tokenizers line must carry an upper +# bound that excludes 0.23.1+. transformers in the allowed 4.56..5.3 +# window rejects 0.23.1 at import time with +# `tokenizers<=0.23.0,>=0.22.0 is required, but found 0.23.1`. +# Accept both `<=0.23.0` and the functionally equivalent `<0.23.1`. +# Two-stage grep: pick lines that start with the tokenizers package +# name (PEP 508 name boundary), then require a safe upper bound. +_has_safe_pin=$(grep -E "$_TOK_RE" "$NO_TORCH_RT" \ + | grep -cE '(<=[[:space:]]*0\.23\.0|<[[:space:]]*0\.23\.1)' \ + || true) +assert_eq "tokenizers pinned with upper bound excluding 0.23.1+" "1" "$_has_safe_pin" # tokenizers before transformers -_tok_line=$(grep -nE '^tokenizers([<>=!,~ ]|$)' "$NO_TORCH_RT" | head -1 | cut -d: -f1) +_tok_line=$(grep -nE "$_TOK_RE" "$NO_TORCH_RT" | head -1 | cut -d: -f1) _tf_line=$(grep -n '^transformers' "$NO_TORCH_RT" | head -1 | cut -d: -f1) _tok_first=$([ "$_tok_line" -lt "$_tf_line" ] && echo "yes" || echo "no") assert_eq "tokenizers before transformers" "yes" "$_tok_first"