diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 2a052970fe..da4dad7d24 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -256,6 +256,7 @@ jobs: - pre-flight - lint-check - sphinx-build + - build-container - cicd-doc-tests - cicd-unit-tests - cicd-functional-tests @@ -272,6 +273,7 @@ jobs: ( needs.pre-flight.outputs.test_level != 'none' && needs.sphinx-build.result == 'success' && + needs.build-container.result == 'success' && ( ( (needs.cicd-doc-tests.result == 'skipped' || needs.cicd-doc-tests.result == 'success') && diff --git a/pyproject.toml b/pyproject.toml index 8780797e49..684e3b0d38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,7 +160,7 @@ url = "https://download.pytorch.org/whl/cu128" explicit = true [tool.uv] -no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn", "mamba-ssm", "causal-conv1d"] +no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn", "mamba-ssm", "causal-conv1d", "deep_gemm"] # Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies # and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd # avoid including these in the default dependency set, but for now it's required. diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py index 19a13aebdb..032909295e 100644 --- a/tests/unit/test_recipes_and_test_suites.py +++ b/tests/unit/test_recipes_and_test_suites.py @@ -183,7 +183,7 @@ def test_all_recipe_yamls_accounted_for_in_test_suites( ) -def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker): +def test_nightly_compute_stays_below_1030_hours(nightly_test_suite, tracker): command = f"DRYRUN=1 HF_HOME=... HF_DATASETS_CACHE=... CONTAINER= ACCOUNT= PARTITION= ./tools/launch {' '.join(nightly_test_suite)}" print(f"Running command: {command}") @@ -215,8 +215,8 @@ def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker): f"Last line of output was not as expected: '{last_line}'" ) total_gpu_hours = float(last_line.split(":")[-1].strip()) - assert total_gpu_hours <= 1024, ( - f"Total GPU hours exceeded 1024: {last_line}. We should revisit the test suites to reduce the total GPU hours." + assert total_gpu_hours <= 1030, ( + f"Total GPU hours exceeded 1030: {last_line}. We should revisit the test suites to reduce the total GPU hours." ) tracker.track("total_nightly_gpu_hours", total_gpu_hours)