diff --git a/python/sglang/srt/layers/attention/trtllm_fp8_kv_kernel.py b/python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py
similarity index 100%
rename from python/sglang/srt/layers/attention/trtllm_fp8_kv_kernel.py
rename to python/sglang/srt/layers/attention/triton_ops/trtllm_fp8_kv_kernel.py
diff --git a/python/sglang/srt/layers/attention/trtllm_mha_backend.py b/python/sglang/srt/layers/attention/trtllm_mha_backend.py
index 5dcbfb1b205f..8f2447d210c7 100644
--- a/python/sglang/srt/layers/attention/trtllm_mha_backend.py
+++ b/python/sglang/srt/layers/attention/trtllm_mha_backend.py
@@ -15,7 +15,9 @@
     FlashInferAttnBackend,
     FlashInferMultiStepDraftBackend,
 )
-from sglang.srt.layers.attention.trtllm_fp8_kv_kernel import fused_fp8_set_kv_buffer
+from sglang.srt.layers.attention.triton_ops.trtllm_fp8_kv_kernel import (
+    fused_fp8_set_kv_buffer,
+)
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
 from sglang.srt.utils import is_flashinfer_available
 
diff --git a/test/manual/test_trtllm_fp8_kv_kernel.py b/test/manual/test_trtllm_fp8_kv_kernel.py
index b713747a2a0c..c97deaf11ef6 100644
--- a/test/manual/test_trtllm_fp8_kv_kernel.py
+++ b/test/manual/test_trtllm_fp8_kv_kernel.py
@@ -6,7 +6,9 @@
 
 import torch
 
-from sglang.srt.layers.attention.trtllm_fp8_kv_kernel import fused_fp8_set_kv_buffer
+from sglang.srt.layers.attention.triton_ops.trtllm_fp8_kv_kernel import (
+    fused_fp8_set_kv_buffer,
+)
 from sglang.test.test_utils import CustomTestCase