diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index da0849b57f5..f0a9e1d9d56 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -15,6 +15,10 @@ on: contains_310: required: true type: boolean + continue_on_error: + required: false + type: boolean + default: false jobs: e2e-light: @@ -80,7 +84,29 @@ jobs: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + if [ "${{ inputs.continue_on_error }}" = "true" ]; then + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-singlecard-light \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 \ + --auto-upgrade-estimated-times \ + --continue-on-error + else + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-singlecard-light \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 + fi + + + - name: Upload timing data + uses: actions/upload-artifact@v4 + if: ${{ inputs.continue_on_error == true }} + with: + name: timing-data-singlecard-light-part${{ matrix.part }} + path: test_timing_data.json + if-no-files-found: warn + retention-days: 5 e2e-full: name: singlecard-full @@ -146,7 +172,28 @@ jobs: VLLM_WORKER_MULTIPROC_METHOD: spawn PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 run: | - python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 + if [ "${{ inputs.continue_on_error }}" = "true" ]; then + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-singlecard \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 2 \ + --auto-upgrade-estimated-times \ + --continue-on-error + else + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-singlecard \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 2 + fi + + - name: Upload timing data + uses: actions/upload-artifact@v4 + if: ${{ inputs.continue_on_error == true }} + with: + name: timing-data-singlecard-full-part${{ matrix.part }} + path: test_timing_data.json + if-no-files-found: warn + retention-days: 5 e2e-2-cards-light: name: multicard-2-light @@ -210,7 +257,29 @@ jobs: env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + if [ "${{ inputs.continue_on_error }}" = "true" ]; then + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-2card-light \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 \ + --auto-upgrade-estimated-times \ + --continue-on-error + else + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-2card-light \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 + fi + + + - name: Upload timing data + uses: actions/upload-artifact@v4 + if: ${{ inputs.continue_on_error == true }} + with: + name: timing-data-2card-light-part${{ matrix.part }} + path: test_timing_data.json + if-no-files-found: warn + retention-days: 5 e2e-2-cards-full: name: multicard-2-full @@ -274,7 +343,29 @@ jobs: env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + if [ "${{ inputs.continue_on_error }}" = "true" ]; then + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-multicard-2-cards \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 \ + --auto-upgrade-estimated-times \ + --continue-on-error + else + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-multicard-2-cards \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 + fi + + + - name: Upload timing data + uses: actions/upload-artifact@v4 + if: ${{ inputs.continue_on_error == true }} + with: + name: timing-data-2card-full-part${{ matrix.part }} + path: test_timing_data.json + if-no-files-found: warn + retention-days: 5 - name: Run vllm-project/vllm-ascend test (non triton) if: ${{ inputs.type == 'full' && matrix.part == 0 }} @@ -346,7 +437,29 @@ jobs: env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + if [ "${{ inputs.continue_on_error }}" = "true" ]; then + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-multicard-4-cards \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 \ + --auto-upgrade-estimated-times \ + --continue-on-error + else + python3 .github/workflows/scripts/run_suite.py \ + --suite e2e-multicard-4-cards \ + --auto-partition-id "${{ matrix.part }}" \ + --auto-partition-size 1 + fi + + + - name: Upload timing data + uses: actions/upload-artifact@v4 + if: ${{ inputs.continue_on_error == true }} + with: + name: timing-data-4card-full-part${{ matrix.part }} + path: test_timing_data.json + if-no-files-found: warn + retention-days: 5 e2e_310p: name: 310p singlecard diff --git a/.github/workflows/schedule_update_estimated_time.yaml b/.github/workflows/schedule_update_estimated_time.yaml new file mode 100644 index 00000000000..27de412a7af --- /dev/null +++ b/.github/workflows/schedule_update_estimated_time.yaml @@ -0,0 +1,111 @@ +name: Update estimated test times + +on: + schedule: + - cron: '0 2 * * 1' # Every Monday at 02:00 UTC + workflow_dispatch: + pull_request: + branches: + - 'main' + paths: + - '.github/workflows/schedule_update_estimated_time.yaml' + +permissions: + contents: write + pull-requests: write + +concurrency: + group: update-estimated-times-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e-test: + name: e2e-test + strategy: + matrix: + vllm_version: [15d76f74e2fdb12a95ea00f0ca283acf6219a2b7] + type: [full, light] + uses: ./.github/workflows/_e2e_test.yaml + with: + vllm: ${{ matrix.vllm_version }} + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main + contains_310: false + type: ${{ matrix.type }} + continue_on_error: true # Continue even if some tests fail, we want to collect as much timing data as possible + + update-estimated-times: + name: Update estimated_time in config.yaml + needs: [e2e-test] + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Download all timing artifacts + uses: actions/download-artifact@v4 + with: + pattern: timing-data-* + path: timing-artifacts/ + merge-multiple: false + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pyyaml + + - name: Update config.yaml from timing data + run: | + python3 .github/workflows/scripts/update_estimated_time.py \ + --timing-dir timing-artifacts/ \ + --config .github/workflows/scripts/config.yaml + + - name: Check for changes + id: check_changes + run: | + if git diff --quiet .github/workflows/scripts/config.yaml; then + echo "changed=false" >> "$GITHUB_OUTPUT" + echo "No changes to config.yaml." + else + echo "changed=true" >> "$GITHUB_OUTPUT" + echo "config.yaml has been updated:" + git diff .github/workflows/scripts/config.yaml + fi + + - name: Create pull request + if: steps.check_changes.outputs.changed == 'true' && github.event_name != 'pull_request' + env: + GH_TOKEN: ${{ github.token }} + run: | + BRANCH="auto/update-estimated-times-${{ github.run_id }}" + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git checkout -b "$BRANCH" + git add .github/workflows/scripts/config.yaml + git commit -m "[CI] Auto-update estimated test times in config.yaml + + Computed from timing-data artifacts of workflow run ${{ github.run_id }}. + Buffer ratio: 1.1x median, rounded to the nearest 10 s." + git push origin "$BRANCH" + gh pr create \ + --repo "${{ github.repository }}" \ + --base main \ + --head "$BRANCH" \ + --title "chore: Auto-update estimated test times in config.yaml" \ + --body "## Summary + + This PR was auto-generated by the **Update estimated test times** workflow. + + It updates the \`estimated_time\` values in \`.github/workflows/scripts/config.yaml\` + based on actual elapsed times collected from workflow run \`${{ github.run_id }}\`. + + ### Methodology + - Timing data is uploaded as \`timing-data-*\` artifacts by each e2e test job. + - For each test file, the **median** of all collected elapsed times is taken. + - A **10 % safety buffer** is applied and the result is rounded to the nearest 10 s. + + Please review the diff and merge if the new values look reasonable." diff --git a/.github/workflows/scripts/ci_utils.py b/.github/workflows/scripts/ci_utils.py index c1bc4cbf5f9..7827f40df9b 100644 --- a/.github/workflows/scripts/ci_utils.py +++ b/.github/workflows/scripts/ci_utils.py @@ -1,24 +1,13 @@ -import logging -import os import subprocess import time from dataclasses import dataclass -# Configure logger to output to stdout -logging.basicConfig(level=logging.INFO, format="%(message)s") -logger = logging.getLogger(__name__) - -class Colors: +class _Color: HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKCYAN = "\033[96m" - OKGREEN = "\033[92m" - WARNING = "\033[93m" - FAIL = "\033[91m" - ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" + GREEN = "\033[92m" + RED = "\033[91m" + RESET = "\033[0m" @dataclass @@ -28,74 +17,77 @@ class TestFile: is_skipped: bool = False -def run_e2e_files( +@dataclass +class TestRecord: + name: str + passed: bool + elapsed: float + estimated: float + + def to_dict(self) -> dict: + return { + "name": self.name, + "passed": self.passed, + "elapsed": self.elapsed, + "estimated": self.estimated, + } + + +def run_tests( files: list[TestFile], continue_on_error: bool = False, -): +) -> tuple[int, list[TestRecord]]: """ - Run a list of test files. + Run each TestFile with pytest and collect timing results. Args: - files: List of TestFile objects to run - continue_on_error: If True, continue running remaining tests even if one fails. - If False, stop at first failure (default behavior for PR tests). + files: Tests to run (skipped entries should already be filtered out). + continue_on_error: If True, keep running after a failure. + report_path: If provided, write a Markdown timing report here. + + Returns: + (exit_code, records) — exit_code is 0 on full success, -1 otherwise. """ - tic = time.perf_counter() - success = True - passed_tests = [] - failed_tests = [] - - for i, file in enumerate(files): - filename, estimated_time = file.name, file.estimated_time - - full_path = os.path.join(os.getcwd(), filename) - logger.info(f".\n.\n{Colors.HEADER}Begin ({i}/{len(files)}):{Colors.ENDC}\npytest -sv {full_path}\n.\n.\n") - file_tic = time.perf_counter() - - process = subprocess.Popen( - ["pytest", "-sv", "--durations=0", "--color=yes", full_path], - stdout=None, - stderr=None, - env=os.environ, + records: list[TestRecord] = [] + all_passed = True + total_start = time.perf_counter() + + for i, test in enumerate(files): + print(f"\n{'.' * 60}", flush=True) + print( + f"{_Color.HEADER}[{i + 1}/{len(files)}] START {test.name}{_Color.RESET}", + flush=True, ) - process.wait() - elapsed = time.perf_counter() - file_tic - ret_code = process.returncode + start = time.perf_counter() + result = subprocess.run(["pytest", "-sv", "--durations=0", "--color=yes", test.name]) + elapsed = time.perf_counter() - start + passed = result.returncode == 0 + + records.append(TestRecord(name=test.name, passed=passed, elapsed=elapsed, estimated=test.estimated_time)) - logger.info( - f".\n.\n{Colors.HEADER}End ({i}/{len(files)}):{Colors.ENDC}\n{filename=}, \ - {elapsed=:.0f}, {estimated_time=}\n.\n.\n" + color = _Color.GREEN if passed else _Color.RED + status = "PASSED" if passed else f"FAILED (exit code {result.returncode})" + print( + f"{color}[{i + 1}/{len(files)}] {status} {test.name} ({elapsed:.0f}s){_Color.RESET}", + flush=True, ) - if ret_code == 0: - passed_tests.append(filename) - else: - logger.info(f"\n{Colors.FAIL}✗ FAILED: {filename} returned exit code {ret_code}{Colors.ENDC}\n") - failed_tests.append((filename, f"exit code {ret_code}")) - success = False + if not passed: + all_passed = False if not continue_on_error: break - elapsed_total = time.perf_counter() - tic - - if success: - logger.info(f"{Colors.OKGREEN}Success. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}") - else: - logger.info(f"{Colors.FAIL}Fail. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}") - - # Print summary - logger.info(f"\n{'=' * 60}") - logger.info(f"Test Summary: {Colors.OKGREEN}{len(passed_tests)}/{len(files)} passed{Colors.ENDC}") - logger.info(f"{'=' * 60}") - if passed_tests: - logger.info(f"{Colors.OKGREEN}✓ PASSED:{Colors.ENDC}") - for test in passed_tests: - logger.info(f" {test}") - if failed_tests: - logger.info(f"\n{Colors.FAIL}✗ FAILED:{Colors.ENDC}") - for test, reason in failed_tests: - logger.info(f" {test} ({reason})") - logger.info(f"{'=' * 60}\n") - - return 0 if success else -1 + total_elapsed = time.perf_counter() - total_start + passed_count = sum(1 for r in records if r.passed) + + print(f"\n{'=' * 60}") + color = _Color.GREEN if all_passed else _Color.RED + print(f"{color}Summary: {passed_count}/{len(files)} passed ({total_elapsed:.2f}s total){_Color.RESET}") + print("=" * 60) + for r in records: + icon = f"{_Color.GREEN}✓{_Color.RESET}" if r.passed else f"{_Color.RED}✗{_Color.RESET}" + print(f" {icon} {r.name} ({r.elapsed:.0f}s)") + print(flush=True) + + return (0 if all_passed else -1), records diff --git a/.github/workflows/scripts/run_suite.py b/.github/workflows/scripts/run_suite.py index bbd4d6dce6c..b6dc42b8d4f 100644 --- a/.github/workflows/scripts/run_suite.py +++ b/.github/workflows/scripts/run_suite.py @@ -1,244 +1,239 @@ import argparse +import json import os +import sys +from datetime import datetime, timezone from pathlib import Path import tabulate import yaml -from ci_utils import TestFile, run_e2e_files +from ci_utils import TestFile, TestRecord, run_tests +_CONFIG_PATH = Path(__file__).parent / "config.yaml" -def load_suites_from_config(config_path: str = "config.yaml") -> dict[str, list[TestFile]]: - # Get absolute path relative to this script - script_dir = Path(__file__).parent - abs_config_path = script_dir / config_path - with open(abs_config_path) as f: - suites_data = yaml.safe_load(f) +def load_suites(config_path: Path = _CONFIG_PATH) -> dict[str, list[TestFile]]: + """Load all test suites from config.yaml.""" + data = yaml.safe_load(config_path.read_text()) + return { + suite_name: [ + TestFile( + name=entry["name"], + estimated_time=entry.get("estimated_time", 60), + is_skipped=entry.get("is_skipped", False), + ) + for entry in entries + ] + for suite_name, entries in data.items() + } - suites = {} - for suite_name, test_files in suites_data.items(): - suites[suite_name] = [] - for file_data in test_files: - name = file_data.get("name") - estimated_time = file_data.get("estimated_time", 60) - is_skipped = file_data.get("is_skipped", False) - suites[suite_name].append(TestFile(name, estimated_time, is_skipped)) - - return suites - - -suites = load_suites_from_config() - - -def auto_partition(files, rank, size): +def partition(files: list[TestFile], rank: int, size: int) -> list[TestFile]: """ - Partition files into size sublists with approximately equal sums of estimated times - using stable sorting, and return the partition for the specified rank. - - Args: - files (list): List of file objects with estimated_time attribute - rank (int): Index of the partition to return (0 to size-1) - size (int): Number of partitions - - Returns: - list: List of file objects in the specified rank's partition + Split non-skipped files into `size` groups of approximately equal estimated + time using a greedy algorithm, and return the group at index `rank`. + Files within the returned group are sorted ascending by estimated_time. """ - # Filter out skipped files - files = [f for f in files if not f.is_skipped] - weights = [f.estimated_time for f in files] - - if not weights or size <= 0 or size > len(weights): + active = [f for f in files if not f.is_skipped] + if not active or size <= 0 or size > len(active): return [] - # Create list of (weight, original_index) tuples - # Using negative index as secondary key to maintain original order for equal weights - indexed_weights = [(w, -i) for i, w in enumerate(weights)] - # Stable sort in descending order by weight - # If weights are equal, larger (negative) index comes first (i.e., earlier original position) - indexed_weights = sorted(indexed_weights, reverse=True) + # Sort descending by weight; use original index as tiebreaker to be stable + indexed = sorted(enumerate(active), key=lambda x: (-x[1].estimated_time, x[0])) - # Extract original indices (negate back to positive) - indexed_weights = [(w, -i) for w, i in indexed_weights] - - # Initialize partitions and their sums - partitions = [[] for _ in range(size)] + buckets: list[list[int]] = [[] for _ in range(size)] sums = [0.0] * size - # Greedy approach: assign each weight to partition with smallest current sum - for weight, idx in indexed_weights: - # Find partition with minimum sum - min_sum_idx = sums.index(min(sums)) - partitions[min_sum_idx].append(idx) - sums[min_sum_idx] += weight - - # Return the files corresponding to the indices in the specified rank's partition - indices = partitions[rank] - indices.sort(key=lambda i: files[i].estimated_time) - return [files[i] for i in indices] - - -def _get_disk_covered_dirs(all_suite_files: set[str], project_root: Path | str) -> list[str]: - covered_dirs = set() - for file_path in all_suite_files: - # e.g. tests/e2e/singlecard/test_foo.py -> tests/e2e/singlecard - parent_dir = (project_root / file_path).parent if os.path.isfile(file_path) else (project_root / file_path) - if parent_dir.exists(): - # Store relative path to project root - try: - rel_dir = parent_dir.relative_to(project_root) - - # Check if this directory is already covered by a parent directory - is_covered = False - for existing_dir in list(covered_dirs): - # If existing_dir is a parent of rel_dir, rel_dir is already covered - if existing_dir in rel_dir.parents or existing_dir == rel_dir: - is_covered = True - break - # If rel_dir is a parent of existing_dir, replace existing_dir with rel_dir - elif rel_dir in existing_dir.parents: - covered_dirs.remove(existing_dir) - # We continue checking other existing_dirs, but we know rel_dir should be added - # unless another parent covers it (which is handled by the first if block logic effectively - # but we need to be careful with modification during iteration, so we use list copy) - - if not is_covered: - covered_dirs.add(rel_dir) - - except ValueError: - pass - return covered_dirs - - -def _sanity_check_suites(suites: dict[str, list[TestFile]]): + for idx, test in indexed: + lightest = sums.index(min(sums)) + buckets[lightest].append(idx) + sums[lightest] += test.estimated_time + + return sorted([active[i] for i in buckets[rank]], key=lambda f: f.estimated_time) + + +def _find_project_root() -> Path: + root = Path.cwd() + if (root / "tests").exists(): + return root + # Fall back: assume script lives at .github/workflows/scripts/ + return Path(__file__).parents[3] + + +def _minimal_covered_dirs(file_paths: set[str], root: Path) -> set[Path]: + """Return the minimal set of directories that covers all file_paths.""" + dirs: set[Path] = set() + for fp in file_paths: + candidate = (root / fp).parent + if not candidate.exists(): + continue + try: + rel = candidate.relative_to(root) + except ValueError: + continue + # Drop any existing entries that are subdirectories of rel + dirs = {d for d in dirs if rel not in d.parents} + # Only add rel if no ancestor already covers it + if not any(d == rel or d in rel.parents for d in dirs): + dirs.add(rel) + return dirs + + +def sanity_check(suites: dict[str, list[TestFile]]) -> None: """ - Check if all test files defined in the suites exist on disk. + Verify that: + 1. Every test file in any suite exists on disk. + 2. No test_*.py files exist on disk (in covered dirs) that are absent from all suites. + Raises SystemExit with a descriptive message on failure. """ - # 1. Collect all test files defined in all suites - all_suite_files = set() - for suite in suites.values(): - for test_file in suite: - # Handle ::test_case syntax - file_path = test_file.name.split("::")[0] - all_suite_files.add(file_path) - - # 2. Identify all directories covered by the suites - project_root = Path.cwd() - if not (project_root / "tests").exists(): - script_dir = Path(__file__).parent - # .github/workflows/scripts -> ../../../ -> root - project_root = script_dir.parents[2] - # For now, we only check dirs under [tests/e2e/singlecard, tests/e2e/multicard] - covered_dirs = _get_disk_covered_dirs(all_suite_files, project_root) - - # 3. Scan disk for all test_*.py files in these directories - all_disk_files = set() - for dir_path in covered_dirs: - full_dir_path = project_root / dir_path - # rglob is equivalent to glob('**/' + pattern) - for py_file in full_dir_path.rglob("test_*.py"): - try: - rel_path = py_file.relative_to(project_root) - all_disk_files.add(str(rel_path)) - except ValueError: - pass - - # 4. Find files on disk but missing from ANY suite - # We check if a disk file is present in 'all_suite_files' (union of all suites) - missing_files = sorted(list(all_disk_files - all_suite_files)) - - missing_text = "\n".join(f'TestFile("{x}"),' for x in missing_files) - - if missing_files: - assert len(missing_files) == 0, ( - f"Some test files found on disk in covered directories are not in ANY test suite.\n" - f"Scanned directories: {sorted([str(d) for d in covered_dirs])}\n" - f"Missing files:\n" - f"{missing_text}\n" - f"If this is intentional, please label them as 'is_skipped=True' and add them to the test suite." - ) - - # 5. check if all files in suites exist on disk - non_existent_files = sorted(list(all_suite_files - all_disk_files)) - non_existent_text = "\n".join(f'TestFile("{x}"),' for x in non_existent_files) - assert len(non_existent_files) == 0, ( - f"Some test files in test suite do not exist on disk:\n" - f"{non_existent_text}\n" - f"Please check if the test files are correctly specified in the local repository." + suite_files = {f.name.split("::")[0] for tests in suites.values() for f in tests} + root = _find_project_root() + covered = _minimal_covered_dirs(suite_files, root) + + disk_files = {str(p.relative_to(root)) for d in covered for p in (root / d).rglob("test_*.py")} + + missing_from_suite = sorted(disk_files - suite_files) + if missing_from_suite: + entries = "\n".join(f' TestFile("{f}"),' for f in missing_from_suite) + raise SystemExit(f"Test files on disk are not in any suite (add them or mark is_skipped=True):\n{entries}") + + missing_from_disk = sorted(suite_files - disk_files) + if missing_from_disk: + entries = "\n".join(f' TestFile("{f}"),' for f in missing_from_disk) + raise SystemExit(f"Test files listed in suite do not exist on disk:\n{entries}") + + +def _print_plan( + suite: str, + files: list[TestFile], + skipped: list[TestFile], + partition_info: str, +) -> None: + print(tabulate.tabulate([[suite, partition_info]], headers=["Suite", "Partition"], tablefmt="psql")) + total_est = sum(f.estimated_time for f in files) + print(f"✅ Enabled {len(files)} test(s) (est. total {total_est:.1f}s):") + for f in files: + print(f" - {f.name} (est={f.estimated_time}s)") + if skipped: + print(f"\n❌ Skipped {len(skipped)} test(s) (consider recovering):") + for f in skipped: + print(f" - {f.name}") + print(flush=True) + + +def _print_results( + suite: str, + records: list[TestRecord], + skipped: list[TestFile], + partition_info: str, +) -> None: + print(tabulate.tabulate([[suite, partition_info]], headers=["Suite", "Partition"], tablefmt="psql")) + total_elapsed = sum(r.elapsed for r in records) + passed_count = sum(1 for r in records if r.passed) + print(f"Results: {passed_count}/{len(records)} passed (actual total {total_elapsed:.1f}s):") + for r in records: + status = "✅ PASSED" if r.passed else "❌ FAILED" + print(f" {status} {r.name} (actual={r.elapsed:.0f}s est={r.estimated:.0f}s)") + if skipped: + print(f"\n❌ Skipped {len(skipped)} test(s) (consider recovering):") + for f in skipped: + print(f" - {f.name}") + print(flush=True) + + +def _save_timing_json( + records: list[TestRecord], + suite: str, + partition_id: int | None, + partition_size: int | None, + output_path: Path, +) -> None: + passed_suites = [r.to_dict() for r in records if r.passed] + payload = { + "suite": suite, + "partition_id": partition_id, + "partition_size": partition_size, + "commit_sha": os.environ.get("GITHUB_SHA", ""), + "github_run_id": os.environ.get("GITHUB_RUN_ID", ""), + "timestamp": datetime.now(timezone.utc).isoformat(), + "tests": passed_suites, + } + output_path.write_text(json.dumps(payload, indent=2)) + print( + f"Timing data written to {output_path} ({len(passed_suites)}/{len(records)} passed)", + flush=True, ) -def main(): - arg_parser = argparse.ArgumentParser() - arg_parser.add_argument( +def main() -> None: + suites = load_suites() + + parser = argparse.ArgumentParser(description="Run a named e2e test suite") + parser.add_argument( "--suite", - type=str, - default=list(suites.keys())[0], - choices=list(suites.keys()) + ["all"], - help="The suite to run", + required=True, + choices=list(suites.keys()), + help="Name of the test suite to run", ) - arg_parser.add_argument( + parser.add_argument( "--auto-partition-id", type=int, - help="Use auto load balancing. The part id.", + default=None, + metavar="ID", + help="Zero-based partition index (requires --auto-partition-size)", ) - arg_parser.add_argument( + parser.add_argument( "--auto-partition-size", type=int, - help="Use auto load balancing. The number of parts.", + default=None, + metavar="N", + help="Total number of partitions", + ) + parser.add_argument( + "--auto-upgrade-estimated-times", + action="store_true", + help="Automatically update estimated times in config.yaml based on actual timings (default: False) \ +If enabled, the script always exit with 0, even if some tests fail, since the primary purpose is to gather \ +timing data to improve estimates.", ) - arg_parser.add_argument( + parser.add_argument( "--continue-on-error", action="store_true", - default=True, - help="Continue running remaining tests even if one fails (useful for nightly tests)", + help="Continue running after a test failure (default: True)", ) - args = arg_parser.parse_args() - print(f"{args=}") - - _sanity_check_suites(suites) - files = suites[args.suite] + parser.add_argument( + "--timing-report-json", + type=Path, + default=Path("test_timing_data.json"), + help="Path to write the JSON timing data for CI aggregation", + ) + args = parser.parse_args() - files_disabled = [f for f in files if f.is_skipped] + sanity_check(suites) - if args.auto_partition_size: - files = auto_partition(files, args.auto_partition_id, args.auto_partition_size) + all_files = suites[args.suite] + skipped = [f for f in all_files if f.is_skipped] - # Print test info at beginning (similar to test/run_suite.py pretty_print_tests) - if args.auto_partition_size: - partition_info = ( - f"{args.auto_partition_id + 1}/{args.auto_partition_size} (0-based id={args.auto_partition_id})" - ) + if args.auto_partition_size is not None: + files = partition(all_files, args.auto_partition_id, args.auto_partition_size) + partition_info = f"{args.auto_partition_id + 1}/{args.auto_partition_size}" else: + files = [f for f in all_files if not f.is_skipped] partition_info = "full" - headers = ["Suite", "Partition"] - rows = [[args.suite, partition_info]] - msg = tabulate.tabulate(rows, headers=headers, tablefmt="psql") + "\n" + _print_plan(args.suite, files, skipped, partition_info) - total_est_time = sum(f.estimated_time for f in files) - msg += f"✅ Enabled {len(files)} test(s) (est total {total_est_time:.1f}s):\n" - for f in files: - msg += f" - {f.name} (est_time={f.estimated_time})\n" - msg += f"\n❌ Disabled {len(files_disabled)} test(s)(Please consider to recover them):\n" - for f in files_disabled: - msg += f" - {f.name} (est_time={f.estimated_time})\n" - - print(msg, flush=True) - - exit_code = run_e2e_files( + exit_code, records = run_tests( files, continue_on_error=args.continue_on_error, ) - # Print tests again at the end for visibility - msg = "\n" + tabulate.tabulate(rows, headers=headers, tablefmt="psql") + "\n" - msg += f"✅ Executed {len(files)} test(s) (est total {total_est_time:.1f}s):\n" - for f in files: - msg += f" - {f.name} (est_time={f.estimated_time})\n" - print(msg, flush=True) + _save_timing_json(records, args.suite, args.auto_partition_id, args.auto_partition_size, args.timing_report_json) - exit(exit_code) + _print_results(args.suite, records, skipped, partition_info) + if args.auto_upgrade_estimated_times: + sys.exit(0) + sys.exit(exit_code) if __name__ == "__main__": diff --git a/.github/workflows/scripts/update_estimated_time.py b/.github/workflows/scripts/update_estimated_time.py new file mode 100644 index 00000000000..a47d14cbb5a --- /dev/null +++ b/.github/workflows/scripts/update_estimated_time.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +Update estimated_time in config.yaml from CI timing data. + +Usage: + python3 update_estimated_time.py \ + --timing-dir ./timing-artifacts \ + --config .github/workflows/scripts/config.yaml +""" + +import argparse +import json +from pathlib import Path + +import yaml + + +def collect_timings(timing_dir: Path) -> dict[str, int]: + """ + Recursively scan timing_dir for JSON files produced by run_suite.py. + Returns {test_name: elapsed_seconds} for all passed tests. + Warns if the same test name appears in multiple files. + """ + json_files = list(timing_dir.rglob("*.json")) + print(f"Found {len(json_files)} timing file(s) in {timing_dir}") + + timings: dict[str, int] = {} + for path in json_files: + try: + data = json.loads(path.read_text()) + except (json.JSONDecodeError, OSError) as e: + print(f" Warning: skipping {path}: {e}") + continue + + for test in data.get("tests", []): + if not test.get("passed", False): + continue + name: str = test.get("name", "") + elapsed: float = test.get("elapsed", 0.0) + if not name or elapsed <= 0: + continue + if name in timings: + print(f" Warning: duplicate entry for '{name}', overwriting {timings[name]}s with {int(elapsed)}s") + timings[name] = int(elapsed) + + return timings + + +def update_config(config_path: Path, timings: dict[str, int]) -> int: + """ + Load config.yaml, update estimated_time for each test found in timings, + and write the result back. Returns the number of changed entries. + """ + configs: dict = yaml.safe_load(config_path.read_text()) + + changed = 0 + for suite_tests in configs.values(): + for test in suite_tests: + name: str = test.get("name", "") + if name not in timings: + continue + old_time: int = test.get("estimated_time", 0) + new_time: int = timings[name] + if old_time == new_time: + continue + test["estimated_time"] = new_time + print(f" {name}: {old_time}s -> {new_time}s") + changed += 1 + + config_path.write_text(yaml.dump(configs, default_flow_style=False, allow_unicode=True, sort_keys=False)) + return changed + + +def main() -> None: + parser = argparse.ArgumentParser(description="Update estimated_time in config.yaml from CI timing data") + parser.add_argument( + "--timing-dir", + required=True, + type=Path, + help="Directory containing timing JSON files (searched recursively)", + ) + parser.add_argument( + "--config", + default=".github/workflows/scripts/config.yaml", + type=Path, + help="Path to config.yaml (default: .github/workflows/scripts/config.yaml)", + ) + args = parser.parse_args() + + timings = collect_timings(args.timing_dir) + if not timings: + print("No timing data collected. Exiting without changes.") + return + + print(f"\nCollected timing data for {len(timings)} test(s).") + print(f"Updating {args.config}...") + changed = update_config(args.config, timings) + print(f"\nDone. {changed} estimated_time value(s) changed.") + + +if __name__ == "__main__": + main()