Skip to content

Commit c84fe9f

Browse files
committed
[Enhancement] Add CPU utilization and count settings for Auto-Tuning (tile-ai#630)
* [Enhancement] Add CPU utilization and count settings for Auto-Tuning - Introduced environment variables for CPU utilization, counts, and maximum CPU count for auto-tuning. - Updated the AutoTuner class to utilize these new settings, improving flexibility and performance in multi-threaded environments. - Enhanced logging to provide better insights into the auto-tuning process based on the configured CPU settings. * typo fix
1 parent 5a64d07 commit c84fe9f

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

tilelang/autotuner/__init__.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@
2525
import traceback
2626
from pathlib import Path
2727

28-
from tilelang.env import TILELANG_CACHE_DIR, is_cache_enabled
28+
from tilelang.env import (
29+
TILELANG_CACHE_DIR,
30+
TILELANG_AUTO_TUNING_CPU_UTILITIES,
31+
TILELANG_AUTO_TUNING_CPU_COUNTS,
32+
TILELANG_AUTO_TUNING_MAX_CPU_COUNT,
33+
is_cache_enabled,
34+
)
2935
from tilelang.autotuner.param import CompileArgs, ProfileArgs, AutotuneResult
3036
from tilelang.jit.param import _P, _RProg
3137
from tilelang.version import __version__
@@ -419,8 +425,28 @@ def shape_equal(a, b):
419425
kernel=jit_kernel)
420426
self._memory_cache[key] = autotuner_result
421427
return autotuner_result
428+
# get the cpu count
429+
available_cpu_count = get_available_cpu_count()
430+
cpu_utilizations = float(TILELANG_AUTO_TUNING_CPU_UTILITIES)
431+
cpu_counts = int(TILELANG_AUTO_TUNING_CPU_COUNTS)
432+
max_cpu_count = int(TILELANG_AUTO_TUNING_MAX_CPU_COUNT)
433+
if cpu_counts > 0:
434+
num_workers = min(cpu_counts, available_cpu_count)
435+
logger.info(
436+
f"Auto-tuning with {cpu_counts} CPU counts, {available_cpu_count} CPUs available, {num_workers} CPUs will be used"
437+
)
438+
else:
439+
num_workers = max(1, int(available_cpu_count * cpu_utilizations))
440+
logger.info(
441+
f"Auto-tuning with {cpu_utilizations} CPU utilizations, {available_cpu_count} CPUs available, {num_workers} CPUs will be used"
442+
)
443+
444+
if max_cpu_count > 0 and num_workers > max_cpu_count:
445+
logger.warning(
446+
f"Auto-tuning with {cpu_utilizations} CPU utilizations, {available_cpu_count} CPUs available, {num_workers} CPUs will be used, but the max CPU count is {max_cpu_count}, so we will use {max_cpu_count} CPUs"
447+
)
448+
num_workers = max_cpu_count
422449

423-
num_workers = max(1, int(get_available_cpu_count() * 0.9))
424450
pool = concurrent.futures.ThreadPoolExecutor(max_workers=num_workers)
425451
futures = []
426452
future_to_index = {}

tilelang/env.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ def _initialize_torch_cuda_arch_flags():
7777
# Auto-clear cache if environment variable is set
7878
TILELANG_CLEAR_CACHE = os.environ.get("TILELANG_CLEAR_CACHE", "0")
7979

80+
# CPU Utilizations for Auto-Tuning, default is 0.9
81+
TILELANG_AUTO_TUNING_CPU_UTILITIES: str = os.environ.get("TILELANG_AUTO_TUNING_CPU_UTILITIES",
82+
"0.9")
83+
84+
# CPU COUNTS for Auto-Tuning, default is -1,
85+
# which will use TILELNAG_AUTO_TUNING_CPU_UTILITIES * get_available_cpu_count()
86+
TILELANG_AUTO_TUNING_CPU_COUNTS: str = os.environ.get("TILELANG_AUTO_TUNING_CPU_COUNTS", "-1")
87+
88+
# Max CPU Count for Auto-Tuning, default is 100
89+
TILELANG_AUTO_TUNING_MAX_CPU_COUNT: str = os.environ.get("TILELANG_AUTO_TUNING_MAX_CPU_COUNT", "-1")
90+
8091
# SETUP ENVIRONMENT VARIABLES
8192
CUTLASS_NOT_FOUND_MESSAGE = ("CUTLASS is not installed or found in the expected path")
8293
", which may lead to compilation bugs when utilize tilelang backend."

0 commit comments

Comments
 (0)