-
Notifications
You must be signed in to change notification settings - Fork 12
refine ci workflow #12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
43fa15b
b758e4d
83e0c5a
53e25b6
f4164a9
5feb525
3a6a92e
f17e48d
2b0fe7d
01214d2
181febd
df20813
212172c
f549690
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| import argparse | ||
| import glob | ||
| from dataclasses import dataclass | ||
|
|
||
| from test_utils import run_unittest_files | ||
|
|
||
|
|
||
| @dataclass | ||
| class TestFile: | ||
| name: str | ||
| estimated_time: float = 60 | ||
|
|
||
|
|
||
| # Add Intel XPU Kernel tests | ||
| suites = { | ||
| "per-commit": [ | ||
| TestFile("test_awq_dequant.py"), | ||
| TestFile("test_topk_softmax.py"), | ||
| ], | ||
| } | ||
|
|
||
|
|
||
| def auto_partition(files, rank, size): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you elaborate more what does this function do?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this file is fully ported from sglang main, aiming to achieve load balance when tests workload is heavy. we may not use this function for now. Shall I remove this function? |
||
| """ | ||
| Partition files into size sublists with approximately equal sums of estimated times | ||
| using stable sorting, and return the partition for the specified rank. | ||
|
|
||
| Args: | ||
| files (list): List of file objects with estimated_time attribute | ||
| rank (int): Index of the partition to return (0 to size-1) | ||
| size (int): Number of partitions | ||
|
|
||
| Returns: | ||
| list: List of file objects in the specified rank's partition | ||
| """ | ||
| weights = [f.estimated_time for f in files] | ||
|
|
||
| if not weights or size <= 0 or size > len(weights): | ||
| return [] | ||
|
|
||
| # Create list of (weight, original_index) tuples | ||
| # Using negative index as secondary key to maintain original order for equal weights | ||
| indexed_weights = [(w, -i) for i, w in enumerate(weights)] | ||
| # Stable sort in descending order by weight | ||
| # If weights are equal, larger (negative) index comes first (i.e., earlier original position) | ||
| indexed_weights = sorted(indexed_weights, reverse=True) | ||
|
|
||
| # Extract original indices (negate back to positive) | ||
| indexed_weights = [(w, -i) for w, i in indexed_weights] | ||
|
|
||
| # Initialize partitions and their sums | ||
| partitions = [[] for _ in range(size)] | ||
| sums = [0.0] * size | ||
|
|
||
| # Greedy approach: assign each weight to partition with smallest current sum | ||
| for weight, idx in indexed_weights: | ||
| # Find partition with minimum sum | ||
| min_sum_idx = sums.index(min(sums)) | ||
| partitions[min_sum_idx].append(idx) | ||
| sums[min_sum_idx] += weight | ||
|
|
||
| # Return the files corresponding to the indices in the specified rank's partition | ||
| indices = partitions[rank] | ||
| return [files[i] for i in indices] | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| arg_parser = argparse.ArgumentParser() | ||
| arg_parser.add_argument( | ||
| "--timeout-per-file", | ||
| type=int, | ||
| default=1800, | ||
| help="The time limit for running one file in seconds.", | ||
| ) | ||
| arg_parser.add_argument( | ||
| "--suite", | ||
| type=str, | ||
| default=list(suites.keys())[0], | ||
| choices=list(suites.keys()) + ["all"], | ||
| help="The suite to run", | ||
| ) | ||
| arg_parser.add_argument( | ||
| "--range-begin", | ||
| type=int, | ||
| default=0, | ||
| help="The begin index of the range of the files to run.", | ||
| ) | ||
| arg_parser.add_argument( | ||
| "--range-end", | ||
| type=int, | ||
| default=None, | ||
| help="The end index of the range of the files to run.", | ||
| ) | ||
| arg_parser.add_argument( | ||
| "--auto-partition-id", | ||
| type=int, | ||
| help="Use auto load balancing. The part id.", | ||
| ) | ||
| arg_parser.add_argument( | ||
| "--auto-partition-size", | ||
| type=int, | ||
| help="Use auto load balancing. The number of parts.", | ||
| ) | ||
| args = arg_parser.parse_args() | ||
| print(f"{args=}") | ||
|
|
||
| if args.suite == "all": | ||
| files = glob.glob("**/test_*.py", recursive=True) | ||
| else: | ||
| files = suites[args.suite] | ||
|
|
||
| if args.auto_partition_size: | ||
| files = auto_partition(files, args.auto_partition_id, args.auto_partition_size) | ||
| else: | ||
| files = files[args.range_begin : args.range_end] | ||
|
|
||
| print("The running tests are ", [f.name for f in files]) | ||
|
|
||
| exit_code = run_unittest_files(files, args.timeout_per_file) | ||
|
|
||
| exit(exit_code) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| """Common utilities for testing and benchmarking""" | ||
|
|
||
| import os | ||
| import subprocess | ||
| import threading | ||
| import time | ||
| from typing import Callable, List, Optional | ||
|
|
||
|
|
||
| class TestFile: | ||
| name: str | ||
| estimated_time: float = 60 | ||
|
|
||
|
|
||
| def run_with_timeout( | ||
| func: Callable, | ||
| args: tuple = (), | ||
| kwargs: Optional[dict] = None, | ||
| timeout: float = None, | ||
| ): | ||
| """Run a function with timeout.""" | ||
| ret_value = [] | ||
|
|
||
| def _target_func(): | ||
| ret_value.append(func(*args, **(kwargs or {}))) | ||
|
|
||
| t = threading.Thread(target=_target_func) | ||
| t.start() | ||
| t.join(timeout=timeout) | ||
| if t.is_alive(): | ||
| raise TimeoutError() | ||
|
|
||
| if not ret_value: | ||
| raise RuntimeError() | ||
|
|
||
| return ret_value[0] | ||
|
|
||
|
|
||
| def run_unittest_files(files: List[TestFile], timeout_per_file: float): | ||
| tic = time.perf_counter() | ||
| success = True | ||
|
|
||
| for i, file in enumerate(files): | ||
| filename, estimated_time = file.name, file.estimated_time | ||
| process = None | ||
|
|
||
| def run_one_file(filename): | ||
| nonlocal process | ||
|
|
||
| filename = os.path.join(os.getcwd(), filename) | ||
| print( | ||
| f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n", | ||
| flush=True, | ||
| ) | ||
| tic = time.perf_counter() | ||
|
|
||
| process = subprocess.Popen( | ||
| ["python3", filename], stdout=None, stderr=None, env=os.environ | ||
| ) | ||
| process.wait() | ||
| elapsed = time.perf_counter() - tic | ||
|
|
||
| print( | ||
| f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n", | ||
| flush=True, | ||
| ) | ||
| return process.returncode | ||
|
|
||
| try: | ||
| ret_code = run_with_timeout( | ||
| run_one_file, args=(filename,), timeout=timeout_per_file | ||
| ) | ||
| assert ( | ||
| ret_code == 0 | ||
| ), f"expected return code 0, but {filename} returned {ret_code}" | ||
| except TimeoutError: | ||
| kill_process_tree(process.pid) | ||
| time.sleep(5) | ||
| print( | ||
| f"\nTimeout after {timeout_per_file} seconds when running {filename}\n", | ||
| flush=True, | ||
| ) | ||
| success = False | ||
| break | ||
|
|
||
| if success: | ||
| print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True) | ||
| else: | ||
| print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True) | ||
|
|
||
| return 0 if success else -1 |
Uh oh!
There was an error while loading. Please reload this page.