Skip to content

Commit fe5d09f

Browse files
Sunny-bot1aquagullJiang-Jia-Junaquagull
authored
[FIX]Fix Machete compile via ENABLE_MACHETE (#3727)
* add ENABLE_MACHETE * fix * revert * update * pre_commit * fix * fix --------- Co-authored-by: Ayakouji <[email protected]> Co-authored-by: Jiang-Jia-Jun <[email protected]> Co-authored-by: aquagull <[email protected]>
1 parent b9af95c commit fe5d09f

File tree

4 files changed

+17
-9
lines changed

4 files changed

+17
-9
lines changed

custom_ops/gpu_ops/cpp_extensions.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,7 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
986986
py::arg("recv_expert_count"), py::arg("block_size"),
987987
"per token per block quant");
988988

989+
#ifdef ENABLE_MACHETE
989990
/*machete/machete_mm.cu
990991
* machete_mm
991992
*/
@@ -1004,6 +1005,7 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
10041005
* machete_supported_schedules
10051006
*/
10061007
m.def("machete_supported_schedules", &MacheteSupportedSchedules, "machete supported schedules function");
1008+
#endif
10071009

10081010
/**
10091011
* moe/fused_moe/moe_topk_select.cu

custom_ops/setup_ops.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ def find_end_files(directory, end_str):
373373
if not os.listdir(json_dir):
374374
raise ValueError("Git clone nlohmann_json failed!")
375375

376+
cc_compile_args = []
376377
nvcc_compile_args = get_gencode_flags(archs)
377378
nvcc_compile_args += ["-DPADDLE_DEV"]
378379
nvcc_compile_args += ["-DPADDLE_ON_INFERENCE"]
@@ -519,12 +520,13 @@ def find_end_files(directory, end_str):
519520
sources += find_end_files("gpu_ops/wfp8afp8_sparse_gemm", ".cu")
520521
os.system("python gpu_ops/machete/generate.py")
521522
sources += find_end_files("gpu_ops/machete", ".cu")
523+
cc_compile_args += ["-DENABLE_MACHETE"]
522524

523525
setup(
524526
name="fastdeploy_ops",
525527
ext_modules=CUDAExtension(
526528
sources=sources,
527-
extra_compile_args={"nvcc": nvcc_compile_args},
529+
extra_compile_args={"cxx": cc_compile_args, "nvcc": nvcc_compile_args},
528530
libraries=["cublasLt"],
529531
extra_link_args=["-lcuda"],
530532
),

fastdeploy/model_executor/layers/quantization/ops/machete_mm.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,14 @@ def get_sm_version():
2626
return cc
2727

2828

29+
_ENABLE_MACHETE = False
2930
if current_platform.is_cuda() and get_sm_version() == 90:
30-
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
31+
try:
32+
from fastdeploy.model_executor.ops.gpu import machete_mm, machete_prepack_B
33+
34+
_ENABLE_MACHETE = True
35+
except Exception:
36+
pass
3137

3238

3339
def get_pack_factor(num_bits):

fastdeploy/model_executor/layers/quantization/weight_only.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@
3434
from .quant_base import QuantConfigBase, QuantMethodBase
3535

3636

37-
def get_sm_version():
38-
prop = paddle.device.cuda.get_device_properties()
39-
cc = prop.major * 10 + prop.minor
40-
return cc
41-
42-
4337
class WeightOnlyConfig(QuantConfigBase):
4438
"""
4539
Quantization config for weight only
@@ -139,10 +133,14 @@ def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
139133
else:
140134
raise ValueError(f"Unsupported MOE backend {layer.use_method}")
141135
else:
136+
from fastdeploy.model_executor.layers.quantization.ops.machete_mm import (
137+
_ENABLE_MACHETE,
138+
)
139+
142140
if (
143141
self.name() == "wint4"
142+
and _ENABLE_MACHETE
144143
and envs.FD_USE_MACHETE == "1"
145-
and get_sm_version() == 90
146144
and layer.weight_shape[1]
147145
and layer.weight_shape[1] % 128 == 0
148146
):

0 commit comments

Comments
 (0)