|
25 | 25 | from tvm.rpc import RPCSession |
26 | 26 | from tvm.runtime import Device, Module |
27 | 27 |
|
| 28 | +from ..profiler import Profiler |
28 | 29 | from ..utils import ( |
29 | 30 | cpu_count, |
30 | 31 | derived_object, |
@@ -243,7 +244,7 @@ def __init__( |
243 | 244 | f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, |
244 | 245 | f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, |
245 | 246 | f_cleanup: Union[T_CLEANUP, str, None] = None, |
246 | | - max_workers: Optional[int] = 1, |
| 247 | + max_workers: Optional[int] = None, |
247 | 248 | initializer: Optional[Callable[[], None]] = None, |
248 | 249 | ) -> None: |
249 | 250 | """Constructor |
@@ -284,7 +285,7 @@ def __init__( |
284 | 285 | self.f_run_evaluator = f_run_evaluator |
285 | 286 | self.f_cleanup = f_cleanup |
286 | 287 | if max_workers is None: |
287 | | - max_workers = cpu_count() |
| 288 | + max_workers = cpu_count(logical=True) |
288 | 289 | logger.info("RPCRunner: max_workers = %d", max_workers) |
289 | 290 | self.pool = PopenPoolExecutor( |
290 | 291 | max_workers=max_workers, |
@@ -378,31 +379,36 @@ def resource_handler(): |
378 | 379 | yield |
379 | 380 | finally: |
380 | 381 | # Final step. Always clean up |
381 | | - f_cleanup(session, remote_path) |
| 382 | + with Profiler.timeit("RPCRunner/cleanup"): |
| 383 | + f_cleanup(session, remote_path) |
382 | 384 |
|
383 | 385 | with resource_handler(): |
384 | 386 | # Step 1. Create session |
385 | | - session = f_create_session(rpc_config) |
386 | | - device = session.device(dev_type=device_type, dev_id=0) |
| 387 | + with Profiler.timeit("RPCRunner/create_session"): |
| 388 | + session = f_create_session(rpc_config) |
| 389 | + device = session.device(dev_type=device_type, dev_id=0) |
387 | 390 | # Step 2. Upload the module |
388 | | - _, remote_path = osp.split(artifact_path) |
389 | | - local_path: str = artifact_path |
390 | | - rt_mod: Module = f_upload_module(session, local_path, remote_path) |
| 391 | + with Profiler.timeit("RPCRunner/upload_module"): |
| 392 | + _, remote_path = osp.split(artifact_path) |
| 393 | + local_path: str = artifact_path |
| 394 | + rt_mod: Module = f_upload_module(session, local_path, remote_path) |
391 | 395 | # Step 3: Allocate input arguments |
392 | | - repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument( |
393 | | - session, |
394 | | - device, |
395 | | - args_info, |
396 | | - alloc_repeat, |
397 | | - ) |
| 396 | + with Profiler.timeit("RPCRunner/alloc_argument"): |
| 397 | + repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument( |
| 398 | + session, |
| 399 | + device, |
| 400 | + args_info, |
| 401 | + alloc_repeat, |
| 402 | + ) |
398 | 403 | # Step 4: Run time_evaluator |
399 | | - costs: List[float] = f_run_evaluator( |
400 | | - session, |
401 | | - rt_mod, |
402 | | - device, |
403 | | - evaluator_config, |
404 | | - repeated_args, |
405 | | - ) |
| 404 | + with Profiler.timeit("LocalRunner/run_evaluator"): |
| 405 | + costs: List[float] = f_run_evaluator( |
| 406 | + session, |
| 407 | + rt_mod, |
| 408 | + device, |
| 409 | + evaluator_config, |
| 410 | + repeated_args, |
| 411 | + ) |
406 | 412 | return costs |
407 | 413 |
|
408 | 414 |
|
@@ -474,7 +480,7 @@ def default_alloc_argument( |
474 | 480 | """ |
475 | 481 | f_random_fill = get_global_func_on_rpc_session( |
476 | 482 | session, |
477 | | - "tvm.contrib.random.random_fill", |
| 483 | + "tvm.contrib.random.random_fill_for_measure", |
478 | 484 | "Please make sure 'USE_RANDOM' is turned ON in the config.cmake on the RPC server.", |
479 | 485 | ) |
480 | 486 |
|
|
0 commit comments