diff --git a/python/sglang/srt/layers/attention/trtllm_fp8_kv_kernel.py b/python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py similarity index 100% rename from python/sglang/srt/layers/attention/trtllm_fp8_kv_kernel.py rename to python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py diff --git a/python/sglang/srt/layers/attention/trtllm_mha_backend.py b/python/sglang/srt/layers/attention/trtllm_mha_backend.py index 5dcbfb1b205f..8f2447d210c7 100644 --- a/python/sglang/srt/layers/attention/trtllm_mha_backend.py +++ b/python/sglang/srt/layers/attention/trtllm_mha_backend.py @@ -15,7 +15,9 @@ FlashInferAttnBackend, FlashInferMultiStepDraftBackend, ) -from sglang.srt.layers.attention.trtllm_fp8_kv_kernel import fused_fp8_set_kv_buffer +from sglang.srt.layers.attention.triton_ops.trtllm_fp8_kv_kernel import ( + fused_fp8_set_kv_buffer, +) from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode from sglang.srt.utils import is_flashinfer_available diff --git a/test/manual/test_trtllm_fp8_kv_kernel.py b/test/manual/test_trtllm_fp8_kv_kernel.py index b713747a2a0c..c97deaf11ef6 100644 --- a/test/manual/test_trtllm_fp8_kv_kernel.py +++ b/test/manual/test_trtllm_fp8_kv_kernel.py @@ -6,7 +6,9 @@ import torch -from sglang.srt.layers.attention.trtllm_fp8_kv_kernel import fused_fp8_set_kv_buffer +from sglang.srt.layers.attention.triton_ops.trtllm_fp8_kv_kernel import ( + fused_fp8_set_kv_buffer, +) from sglang.test.test_utils import CustomTestCase