Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ jobs:
- pre-flight
- lint-check
- sphinx-build
- build-container
- cicd-doc-tests
- cicd-unit-tests
- cicd-functional-tests
Expand All @@ -272,6 +273,7 @@ jobs:
(
needs.pre-flight.outputs.test_level != 'none' &&
needs.sphinx-build.result == 'success' &&
needs.build-container.result == 'success' &&
(
(
(needs.cicd-doc-tests.result == 'skipped' || needs.cicd-doc-tests.result == 'success') &&
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ url = "https://download.pytorch.org/whl/cu128"
explicit = true

[tool.uv]
no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn", "mamba-ssm", "causal-conv1d"]
no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn", "mamba-ssm", "causal-conv1d", "deep_gemm"]
# Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
# and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
# avoid including these in the default dependency set, but for now it's required.
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_recipes_and_test_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_all_recipe_yamls_accounted_for_in_test_suites(
)


def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker):
def test_nightly_compute_stays_below_1030_hours(nightly_test_suite, tracker):
command = f"DRYRUN=1 HF_HOME=... HF_DATASETS_CACHE=... CONTAINER= ACCOUNT= PARTITION= ./tools/launch {' '.join(nightly_test_suite)}"

print(f"Running command: {command}")
Expand Down Expand Up @@ -215,8 +215,8 @@ def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker):
f"Last line of output was not as expected: '{last_line}'"
)
total_gpu_hours = float(last_line.split(":")[-1].strip())
assert total_gpu_hours <= 1024, (
f"Total GPU hours exceeded 1024: {last_line}. We should revisit the test suites to reduce the total GPU hours."
assert total_gpu_hours <= 1030, (
f"Total GPU hours exceeded 1030: {last_line}. We should revisit the test suites to reduce the total GPU hours."
)
tracker.track("total_nightly_gpu_hours", total_gpu_hours)

Expand Down
Loading