Skip to content

Commit 6ba8106

Browse files
committed
[MetaSchedule][Runtime] Enhance Runner RandomFill
1 parent 4b15746 commit 6ba8106

File tree

20 files changed

+377
-108
lines changed

20 files changed

+377
-108
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ tvm_option(USE_CUDNN "Build with cuDNN" OFF)
8989
tvm_option(USE_CUBLAS "Build with cuBLAS" OFF)
9090
tvm_option(USE_CUTLASS "Build with CUTLASS" OFF)
9191
tvm_option(USE_THRUST "Build with Thrust" OFF)
92+
tvm_option(USE_CURAND "Build with cuRAND" OFF)
9293
tvm_option(USE_MIOPEN "Build with ROCM:MIOpen" OFF)
9394
tvm_option(USE_ROCBLAS "Build with ROCM:RoCBLAS" OFF)
9495
tvm_option(USE_SORT "Build with sort support" ON)

cmake/config.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ set(USE_VTA_FPGA OFF)
296296
# Whether use Thrust
297297
set(USE_THRUST OFF)
298298

299+
# Whether use cuRAND
300+
set(USE_CURAND OFF)
301+
299302
# Whether to build the TensorFlow TVMDSOOp module
300303
set(USE_TF_TVMDSOOP OFF)
301304

cmake/modules/CUDA.cmake

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,18 @@ if(USE_CUDA)
6969
list(APPEND RUNTIME_SRCS ${CONTRIB_THRUST_SRC})
7070
endif(USE_THRUST)
7171

72+
if(USE_CURAND)
73+
message(STATUS "Build with cuRAND support")
74+
message(STATUS "${CUDA_CURAND_LIBRARY}")
75+
cmake_minimum_required(VERSION 3.13) # to compile CUDA code
76+
enable_language(CUDA)
77+
tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CC src/runtime/contrib/curand/*.cc)
78+
tvm_file_glob(GLOB CONTRIB_CURAND_SRC_CU src/runtime/contrib/curand/*.cu)
79+
list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CURAND_LIBRARY})
80+
list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CC})
81+
list(APPEND RUNTIME_SRCS ${CONTRIB_CURAND_SRC_CU})
82+
endif(USE_CURAND)
83+
7284
if(USE_GRAPH_EXECUTOR_CUDA_GRAPH)
7385
if(NOT USE_GRAPH_EXECUTOR)
7486
message(FATAL_ERROR "CUDA Graph is only supported by graph executor, please set USE_GRAPH_EXECUTOR=ON")

cmake/modules/LibInfo.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ function(add_lib_info src_file)
111111
TVM_INFO_USE_TFLITE="${USE_TFLITE}"
112112
TVM_INFO_USE_THREADS="${USE_THREADS}"
113113
TVM_INFO_USE_THRUST="${USE_THRUST}"
114+
TVM_INFO_USE_CURAND="${USE_CURAND}"
114115
TVM_INFO_USE_VITIS_AI="${USE_VITIS_AI}"
115116
TVM_INFO_USE_VULKAN="${USE_VULKAN}"
116117
TVM_INFO_USE_CLML="${USE_CLML}"

cmake/utils/FindCUDA.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ macro(find_cuda use_cuda use_cudnn)
8585
PATHS ${CUDA_TOOLKIT_ROOT_DIR}
8686
PATH_SUFFIXES lib lib64 targets/x86_64-linux/lib targets/x86_64-linux/lib/stubs lib64/stubs lib/x86_64-linux-gnu
8787
NO_DEFAULT_PATH)
88+
find_library(CUDA_CURAND_LIBRARY curand
89+
${CUDA_TOOLKIT_ROOT_DIR}/lib64
90+
${CUDA_TOOLKIT_ROOT_DIR}/lib
91+
NO_DEFAULT_PATH)
8892
find_library(CUDA_CUBLAS_LIBRARY cublas
8993
${CUDA_TOOLKIT_ROOT_DIR}/lib64
9094
${CUDA_TOOLKIT_ROOT_DIR}/lib
@@ -134,6 +138,7 @@ macro(find_cuda use_cuda use_cudnn)
134138
message(STATUS "Found CUDA_CUDNN_INCLUDE_DIRS=" ${CUDA_CUDNN_INCLUDE_DIRS})
135139
message(STATUS "Found CUDA_CUDNN_LIBRARY=" ${CUDA_CUDNN_LIBRARY})
136140
message(STATUS "Found CUDA_CUBLAS_LIBRARY=" ${CUDA_CUBLAS_LIBRARY})
141+
message(STATUS "Found CUDA_CURAND_LIBRARY=" ${CUDA_CURAND_LIBRARY})
137142
message(STATUS "Found CUDA_CUBLASLT_LIBRARY=" ${CUDA_CUBLASLT_LIBRARY})
138143
endif(CUDA_FOUND)
139144
endmacro(find_cuda)

docs/contribute/pull_request.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ space. You can remove stale images that aren't used in the presently checked-out
118118
other worktrees using the following command:
119119

120120
.. code:: bash
121+
121122
docker/clear-stale-images.sh
122123
123124
Consult the ``--help`` for more options.

python/tvm/auto_scheduler/testing/tune_onnx.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from tvm import meta_schedule as ms
2727
from tvm import relay
2828
from tvm.meta_schedule.testing.custom_builder_runner import run_module_via_rpc
29+
from tvm.meta_schedule.utils import cpu_count
2930
from tvm.relay.frontend import from_onnx
3031
from tvm.support import describe
3132

@@ -73,11 +74,6 @@ def _parse_args():
7374
type=str,
7475
required=True,
7576
)
76-
args.add_argument(
77-
"--rpc-workers",
78-
type=int,
79-
required=True,
80-
)
8177
args.add_argument(
8278
"--work-dir",
8379
type=str,
@@ -100,7 +96,7 @@ def _parse_args():
10096
)
10197
args.add_argument(
10298
"--cpu-flush",
103-
type=bool,
99+
type=int,
104100
required=True,
105101
)
106102
parsed = args.parse_args()
@@ -125,7 +121,7 @@ def main():
125121
key=ARGS.rpc_key,
126122
host=ARGS.rpc_host,
127123
port=ARGS.rpc_port,
128-
n_parallel=ARGS.rpc_workers,
124+
n_parallel=cpu_count(logical=True),
129125
number=ARGS.number,
130126
repeat=ARGS.repeat,
131127
min_repeat_ms=ARGS.min_repeat_ms,

python/tvm/auto_scheduler/testing/tune_relay.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from tvm import relay
2727
from tvm.meta_schedule.testing.custom_builder_runner import run_module_via_rpc
2828
from tvm.meta_schedule.testing.relay_workload import get_network
29+
from tvm.meta_schedule.utils import cpu_count
2930
from tvm.support import describe
3031

3132

@@ -66,11 +67,6 @@ def _parse_args():
6667
type=str,
6768
required=True,
6869
)
69-
args.add_argument(
70-
"--rpc-workers",
71-
type=int,
72-
required=True,
73-
)
7470
args.add_argument(
7571
"--work-dir",
7672
type=str,
@@ -98,7 +94,7 @@ def _parse_args():
9894
)
9995
args.add_argument(
10096
"--cpu-flush",
101-
type=bool,
97+
type=int,
10298
required=True,
10399
)
104100
parsed = args.parse_args()
@@ -123,7 +119,7 @@ def main():
123119
key=ARGS.rpc_key,
124120
host=ARGS.rpc_host,
125121
port=ARGS.rpc_port,
126-
n_parallel=ARGS.rpc_workers,
122+
n_parallel=cpu_count(logical=True),
127123
number=ARGS.number,
128124
repeat=ARGS.repeat,
129125
min_repeat_ms=ARGS.min_repeat_ms,

python/tvm/auto_scheduler/testing/tune_te.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import tvm
2222
from tvm import auto_scheduler
2323
from tvm.meta_schedule.testing.te_workload import CONFIGS
24+
from tvm.meta_schedule.utils import cpu_count
2425
from tvm.support import describe
2526

2627

@@ -56,11 +57,6 @@ def _parse_args():
5657
type=str,
5758
required=True,
5859
)
59-
args.add_argument(
60-
"--rpc-workers",
61-
type=int,
62-
required=True,
63-
)
6460
args.add_argument(
6561
"--work-dir",
6662
type=str,
@@ -83,7 +79,7 @@ def _parse_args():
8379
)
8480
args.add_argument(
8581
"--cpu-flush",
86-
type=bool,
82+
type=int,
8783
required=True,
8884
)
8985
parsed = args.parse_args()
@@ -132,7 +128,7 @@ def main():
132128
key=ARGS.rpc_key,
133129
host=ARGS.rpc_host,
134130
port=ARGS.rpc_port,
135-
n_parallel=ARGS.rpc_workers,
131+
n_parallel=cpu_count(logical=True),
136132
number=ARGS.number,
137133
repeat=ARGS.repeat,
138134
min_repeat_ms=ARGS.min_repeat_ms,

python/tvm/meta_schedule/runner/local_runner.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,17 @@
2323

2424
from ...contrib.popen_pool import PopenPoolExecutor
2525
from ...runtime import Device, Module
26+
from ..profiler import Profiler
2627
from ..utils import derived_object, get_global_func_with_default_on_worker
2728
from .config import EvaluatorConfig
28-
from .runner import PyRunner, RunnerFuture, RunnerInput, RunnerResult, PyRunnerFuture
29+
from .runner import PyRunner, PyRunnerFuture, RunnerFuture, RunnerInput, RunnerResult
2930
from .utils import (
30-
T_ARGUMENT_LIST,
3131
T_ARG_INFO_JSON_OBJ_LIST,
32+
T_ARGUMENT_LIST,
3233
alloc_argument_common,
3334
run_evaluator_common,
3435
)
3536

36-
3737
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
3838

3939

@@ -137,26 +137,29 @@ def resource_handler():
137137
yield
138138
finally:
139139
# Final step. Always clean up
140-
f_cleanup()
140+
with Profiler.timeit("LocalRunner/cleanup"):
141+
f_cleanup()
141142

142143
with resource_handler():
143144
# Step 1: create the local runtime module
144-
rt_mod = tvm.runtime.load_module(artifact_path)
145-
# Step 2: create the local device
146-
device = tvm.runtime.device(dev_type=device_type, dev_id=0)
147-
# Step 3: Allocate input arguments
148-
repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument(
149-
device,
150-
args_info,
151-
alloc_repeat,
152-
)
153-
# Step 4: Run time_evaluator
154-
costs: List[float] = f_run_evaluator(
155-
rt_mod,
156-
device,
157-
evaluator_config,
158-
repeated_args,
159-
)
145+
with Profiler.timeit("LocalRunner/load_module"):
146+
rt_mod = tvm.runtime.load_module(artifact_path)
147+
# Step 2: Allocate input arguments
148+
with Profiler.timeit("LocalRunner/alloc_argument"):
149+
device = tvm.runtime.device(dev_type=device_type, dev_id=0)
150+
repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument(
151+
device,
152+
args_info,
153+
alloc_repeat,
154+
)
155+
# Step 3: Run time_evaluator
156+
with Profiler.timeit("LocalRunner/run_evaluator"):
157+
costs: List[float] = f_run_evaluator(
158+
rt_mod,
159+
device,
160+
evaluator_config,
161+
repeated_args,
162+
)
160163
return costs
161164

162165

@@ -313,9 +316,6 @@ def _check(
313316
get_global_func_with_default_on_worker(name=f_alloc_argument, default=None)
314317
get_global_func_with_default_on_worker(name=f_run_evaluator, default=None)
315318
get_global_func_with_default_on_worker(name=f_cleanup, default=None)
316-
get_global_func_with_default_on_worker(
317-
name="tvm.contrib.random.random_fill", default=None
318-
)
319319

320320
value = self.pool.submit(
321321
_check,
@@ -348,7 +348,7 @@ def default_alloc_argument(
348348
The allocation args
349349
"""
350350
f_random_fill = get_global_func_with_default_on_worker(
351-
name="tvm.contrib.random.random_fill", default=None
351+
name="tvm.contrib.random.random_fill_for_measure", default=None
352352
)
353353
return alloc_argument_common(f_random_fill, device, args_info, alloc_repeat)
354354

0 commit comments

Comments
 (0)