diff --git a/docs/programming_guides/autotuning.md b/docs/programming_guides/autotuning.md
index 66d46889f..9cc5a2d94 100644
--- a/docs/programming_guides/autotuning.md
+++ b/docs/programming_guides/autotuning.md
@@ -293,7 +293,7 @@ def matmul_configs(M, N, K):
 
 Tune compile‑time options explicitly:
 - `target='auto'|'cuda'|'hip'|'metal'` (normalized to a TVM Target)
-- `execution_backend='auto'|'tvm_ffi'|'ctypes'|'cython'|'nvrtc'|'torch'`
+- `execution_backend='auto'|'tvm_ffi'|'cython'|'nvrtc'|'torch'`
 - `pass_configs={...}` to toggle TileLang/TVM passes for experiments
 
 On CUDA with multiple GPUs, the tuner sets the current device per worker thread
diff --git a/testing/python/cpu/test_tilelang_cpu_gemm.py b/testing/python/cpu/test_tilelang_cpu_gemm.py
index 55646622e..4113c9d06 100644
--- a/testing/python/cpu/test_tilelang_cpu_gemm.py
+++ b/testing/python/cpu/test_tilelang_cpu_gemm.py
@@ -101,7 +101,7 @@ def matmul(
     block_M, block_N, block_K = M // 4, N // 4, K // 4
     cpu_func = matmul_jit_test(M, N, K, block_M, block_N, block_K)
     with tvm.target.Target("c"):
-        complied_fun = tilelang.compile(cpu_func, -1, execution_backend="ctypes")
+        complied_fun = tilelang.compile(cpu_func, -1, execution_backend="cython")
 
     in_dtype = T.float16
     A = torch.randn(M, K, dtype=torch.__getattribute__(in_dtype))
diff --git a/testing/python/jit/test_tilelang_jit_gemm_cython.py b/testing/python/jit/test_tilelang_jit_gemm_cython.py
index c5399fc51..546617583 100644
--- a/testing/python/jit/test_tilelang_jit_gemm_cython.py
+++ b/testing/python/jit/test_tilelang_jit_gemm_cython.py
@@ -256,24 +256,16 @@ def run_cython_kernel_do_bench(
     )
 
     cython_matmul_kernel = tilelang.compile(program, execution_backend="cython")
-    ctypes_matmul_kernel = tilelang.compile(program, execution_backend="ctypes")
 
     cython_profiler = cython_matmul_kernel.get_profiler()
-    ctypes_profiler = ctypes_matmul_kernel.get_profiler()
 
     cython_latency = cython_profiler.do_bench(func=cython_matmul_kernel)
     print(f"cython Latency: {cython_latency} ms")
 
-    # assert ctypes_latency is not None
-
     tvm_latency = cython_profiler.do_bench()
     print(f"TVM Latency: {tvm_latency} ms")
 
     assert tvm_latency is not None
-
-    ctypes_latency = ctypes_profiler.do_bench(func=ctypes_matmul_kernel)
-    print(f"ctypes Latency: {ctypes_latency} ms")
-
     assert cython_latency is not None
 
 
diff --git a/tilelang/autotuner/param.py b/tilelang/autotuner/param.py
index 69ad49c79..e3f540177 100644
--- a/tilelang/autotuner/param.py
+++ b/tilelang/autotuner/param.py
@@ -49,7 +49,7 @@ class CompileArgs:
     """
 
     out_idx: list[int] | int | None = None
-    execution_backend: Literal["auto", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch"] = "auto"
+    execution_backend: Literal["auto", "tvm_ffi", "cython", "nvrtc", "torch"] = "auto"
     target: Literal["auto", "cuda", "hip"] = "auto"
     target_host: str | Target = None
     verbose: bool = False
@@ -265,7 +265,7 @@ def _load_kernel_from_disk(
         target: str | Target = "auto",
         target_host: str | Target = None,
         out_idx: list[int] | int | None = None,
-        execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch"] = "tvm_ffi",
+        execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch"] = "tvm_ffi",
         pass_configs: dict = None,
         compile_flags: list[str] | str | None = None,
         func: Callable = None,
diff --git a/tilelang/autotuner/tuner.py b/tilelang/autotuner/tuner.py
index 8d9503739..95595d5dc 100644
--- a/tilelang/autotuner/tuner.py
+++ b/tilelang/autotuner/tuner.py
@@ -144,7 +144,7 @@ def set_compile_args(
         self,
         out_idx: list[int] | int | None = None,
         target: Literal["auto", "cuda", "hip", "metal"] = "auto",
-        execution_backend: Literal["auto", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch"] = "auto",
+        execution_backend: Literal["auto", "tvm_ffi", "cython", "nvrtc", "torch"] = "auto",
         target_host: str | Target = None,
         verbose: bool = False,
         pass_configs: dict[str, Any] | None = None,
@@ -725,7 +725,7 @@ def autotune(  # This is the new public interface
         Compilation target for TVM (e.g., "cuda", "llvm"). Defaults to "auto".
     target_host : Union[str, Target], optional
         Target host for cross-compilation. Defaults to None.
-    execution_backend : Literal["auto", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch"], optional
+    execution_backend : Literal["auto", "tvm_ffi", "cython", "nvrtc", "torch"], optional
         Backend for kernel execution and argument passing. Use "auto" to pick a sensible
         default per target (cuda->tvm_ffi, metal->torch, others->cython).
     verbose : bool, optional
diff --git a/tilelang/cache/__init__.py b/tilelang/cache/__init__.py
index 18ac847bf..9774e8a9c 100644
--- a/tilelang/cache/__init__.py
+++ b/tilelang/cache/__init__.py
@@ -19,7 +19,7 @@ def cached(
     *args,
     target: str | Target = "auto",
     target_host: str | Target = None,
-    execution_backend: Literal["auto", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch"] | None = "auto",
+    execution_backend: Literal["auto", "tvm_ffi", "cython", "nvrtc", "torch"] | None = "auto",
     verbose: bool | None = False,
     pass_configs: dict | None = None,
     compile_flags: list[str] | str | None = None,
diff --git a/tilelang/cache/kernel_cache.py b/tilelang/cache/kernel_cache.py
index cf6a5591b..ded861a3d 100644
--- a/tilelang/cache/kernel_cache.py
+++ b/tilelang/cache/kernel_cache.py
@@ -48,7 +48,7 @@ class KernelCache:
     _instance = None  # For implementing singleton pattern
     _lock = threading.Lock()  # For thread safety
     _memory_cache = {}  # In-memory cache dictionary
-    execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi"
+    execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi"
 
     def __new__(cls):
         """
@@ -77,7 +77,7 @@ def _generate_key(
         self,
         func: Callable,
         out_idx: list[int],
-        execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
+        execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
         args=None,
         target: str | Target = "auto",
         target_host: str | Target = None,
@@ -123,7 +123,7 @@ def cached(
         *args,
         target: str | Target = "auto",
         target_host: str | Target = None,
-        execution_backend: Literal["auto", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
+        execution_backend: Literal["auto", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
         verbose: bool = False,
         pass_configs: dict = None,
         compile_flags: list[str] | str | None = None,
@@ -389,7 +389,7 @@ def _load_kernel_from_disk(
         target: str | Target = "auto",
         target_host: str | Target | None = None,
         out_idx: list[int] | None = None,
-        execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
+        execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
         pass_configs: dict | None = None,
         compile_flags: list[str] | str | None = None,
         func: Callable | None = None,
diff --git a/tilelang/jit/__init__.py b/tilelang/jit/__init__.py
index eac206f72..23163b1fe 100644
--- a/tilelang/jit/__init__.py
+++ b/tilelang/jit/__init__.py
@@ -49,7 +49,7 @@
 def compile(
     func: PrimFunc[_KP, _T] = None,
     out_idx: list[int] | int | None = None,
-    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
+    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
     target: str | Target = "auto",
     target_host: str | Target | None = None,
     verbose: bool = False,
@@ -64,7 +64,7 @@ def compile(
         The TileLang TIR function to compile and wrap.
     out_idx : Union[List[int], int], optional
         Index(es) of the output tensors to return (default: None).
-    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"], optional
+    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"], optional
         Execution backend to use for kernel execution. Use "auto" to pick a sensible
         default per target (cuda->tvm_ffi, metal->torch, others->cython).
     target : Union[str, Target], optional
@@ -118,7 +118,7 @@ def compile(
 def par_compile(
     funcs: Iterable[PrimFunc[_KP, _T]],
     out_idx: list[int] | int | None = None,
-    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
+    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "auto",
     target: str | Target = "auto",
     target_host: str | Target | None = None,
     verbose: bool = False,
@@ -135,7 +135,7 @@ def par_compile(
         The TileLang TIR functions to compile and wrap.
     out_idx : Union[List[int], int], optional
         Index(es) of the output tensors to return (default: None).
-    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"], optional
+    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"], optional
         Execution backend to use for kernel execution. Use "auto" to pick a sensible
         default per target (cuda->tvm_ffi, metal->torch, others->cython).
     target : Union[str, Target], optional
@@ -202,7 +202,7 @@ class JITImpl(Generic[_P, _KP, _T, _Ret]):
     out_idx : list[int] | int | None
         Which output tensor(s) of the compiled kernel should be returned to the
         caller. Accepts a single index, a list of indices, or None to return all.
-    execution_backend : Literal["dlpack", "ctypes", "cython"]
+    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"]
         Backend used for exchanging arguments and executing the generated kernel.
     target : str | tvm.target.Target
         TVM compilation target (e.g. "cuda", "llvm", or "auto").
@@ -256,7 +256,7 @@ class JITImpl(Generic[_P, _KP, _T, _Ret]):
     """
 
     out_idx: list[int] | int | None
-    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"]
+    execution_backend: Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"]
     target: str | Target
     target_host: str | Target
     verbose: bool
@@ -424,7 +424,7 @@ def __call__(self, *args: _P.args, **kwargs: _P.kwargs) -> _Ret:
             return kernel
 
 
-ExecutionBackend = Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"]
+ExecutionBackend = Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"]
 
 
 @overload
@@ -473,7 +473,7 @@ def jit(  # This is the new public interface
         Compilation target for TVM (e.g., "cuda", "llvm"). Defaults to "auto".
     target_host : Union[str, Target], optional
         Target host for cross-compilation. Defaults to None.
-    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"], optional
+    execution_backend : Literal["auto", "dlpack", "tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"], optional
         Backend for kernel execution and argument passing. Use "auto" to pick a sensible
         default per target (cuda->tvm_ffi, metal->torch, others->cython).
     verbose : bool, optional
diff --git a/tilelang/jit/adapter/__init__.py b/tilelang/jit/adapter/__init__.py
index f511608fc..0d9945285 100644
--- a/tilelang/jit/adapter/__init__.py
+++ b/tilelang/jit/adapter/__init__.py
@@ -1,6 +1,5 @@
 from .base import BaseKernelAdapter  # noqa: F401
 from .tvm_ffi import TVMFFIKernelAdapter  # noqa: F401
-from .ctypes import CtypesKernelAdapter  # noqa: F401
 from .cython import CythonKernelAdapter  # noqa: F401
 from .nvrtc import NVRTCKernelAdapter  # noqa: F401
 from .torch import MetalKernelAdapter  # noqa: F401
diff --git a/tilelang/jit/adapter/ctypes/__init__.py b/tilelang/jit/adapter/ctypes/__init__.py
deleted file mode 100644
index 5e6fdc84d..000000000
--- a/tilelang/jit/adapter/ctypes/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .adapter import CtypesKernelAdapter  # noqa: F401
diff --git a/tilelang/jit/adapter/ctypes/adapter.py b/tilelang/jit/adapter/ctypes/adapter.py
deleted file mode 100644
index b7cac9d6f..000000000
--- a/tilelang/jit/adapter/ctypes/adapter.py
+++ /dev/null
@@ -1,302 +0,0 @@
-"""The profiler and convert to torch utils"""
-
-from __future__ import annotations
-import torch
-from ..base import BaseKernelAdapter
-import ctypes
-from typing import Callable, Any
-from tilelang import tvm as tvm
-from tvm.target import Target
-from tvm.relax import TensorType
-from tvm import tir
-from tilelang.jit.adapter.wrapper import TLWrapper
-from tilelang.jit.adapter.libgen import LibraryGenerator
-from tilelang.utils.target import determine_target
-from tilelang.utils.language import retrieve_func_from_module
-
-
-# TODO(lei): remove ctypes adapter.
-class CtypesKernelAdapter(BaseKernelAdapter):
-    """Adapter class that converts TVM/TIR functions to callable CUDA kernels using ctypes.
-
-    This adapter handles:
-    1. Converting TIR functions to compiled CUDA libraries
-    2. Managing dynamic shapes in tensor operations
-    3. Wrapping C++ kernels for Python/PyTorch usage
-    """
-
-    # Class attributes to store compiled kernel information
-    target = "cuda"
-    ir_module: tvm.IRModule | None = None
-    # The global source code of the kernel -> global means the source code of the kernel
-    # that is not wrapped by the wrapper code
-    host_kernel_source: str | None = None
-    device_kernel_source: str | None = None
-    lib: ctypes.CDLL | None = None  # Compiled library handle
-    # Maps symbolic variables to their corresponding buffer and shape indices
-    dynamic_symbolic_map: dict[tir.Var, tuple[int, int]] | None = None
-    # Pass configs for the compiler
-    pass_configs: dict[str, Any] | None = None
-
-    # Add new cache attributes
-    param_dtypes: list[torch.dtype] | None = None  # Cache for parameter dtypes
-    param_shapes: list[list] | None = None  # Cache for parameter shapes
-
-    def __init__(
-        self,
-        params: list[TensorType],
-        result_idx: list[int],
-        target: str,
-        func_or_mod: tir.PrimFunc | tvm.IRModule,
-        host_mod: tvm.IRModule | None = None,
-        device_mod: tvm.IRModule | None = None,
-        host_kernel_source: str | None = None,
-        device_kernel_source: str | None = None,
-        verbose: bool = False,
-        pass_configs: dict[str, Any] | None = None,
-        compile_flags: list[str] | None = None,
-    ):
-        """Initialize the adapter with the given TIR function or module.
-
-        Args:
-            params: List of tensor types for inputs/outputs
-            result_idx: Indices of output tensors
-            target: Target platform (e.g., 'cuda')
-            func_or_mod: TIR function or module to be compiled
-            verbose: Enable verbose logging
-        """
-        self.params = params
-        self.result_idx = self._legalize_result_idx(result_idx)
-        self.host_kernel_source = host_kernel_source
-        self.device_kernel_source = device_kernel_source
-
-        if isinstance(func_or_mod, tir.PrimFunc):
-            self.ir_module = tvm.IRModule({func_or_mod.attrs["global_symbol"]: func_or_mod})
-        else:
-            self.ir_module = func_or_mod
-
-        # Cache parameter information during initialization
-        # Convert tvm.DataType to torch.dtype for tensor creation
-        self.param_dtypes = [param.torch_dtype() for param in params]
-        self.param_shapes = []
-        for param in params:
-            native_shape = []
-            for dim in param.shape:
-                if isinstance(dim, tir.IntImm):
-                    native_shape.append(int(dim))
-                elif isinstance(dim, tir.Var):
-                    native_shape.append(dim)  # Keep tir.Var for dynamic dimensions
-                else:
-                    native_shape.append(dim)
-            self.param_shapes.append(native_shape)
-
-        self.dynamic_symbolic_map = self._process_dynamic_symbolic()
-
-        self.target = Target.canon_target(determine_target(target))
-        self.verbose = verbose
-        self.wrapper = TLWrapper(self.target)
-        self.lib_generator = LibraryGenerator(self.target, verbose=verbose)
-        self.lib_generator.assign_pass_configs(pass_configs)
-        self.lib_generator.assign_compile_flags(compile_flags)
-
-        self.wrapper.assign_optimized_module(self.ir_module)
-        self.wrapper.assign_pass_configs(pass_configs)
-        self.wrapper.assign_host_module(host_mod)
-        self.wrapper.assign_device_module(device_mod)
-        self.wrapped_source = self.wrapper.wrap(self.get_kernel_source(kernel_only=True))
-
-        self.lib_generator.update_lib_code(self.wrapped_source)
-        self.lib_generator.compile_lib()
-        self.lib = self.lib_generator.load_lib()
-        self.lib.init()
-
-        self._post_init()
-
-    @classmethod
-    def from_database(
-        cls,
-        params: list[TensorType],
-        result_idx: list[int],
-        target: str,
-        func_or_mod: tir.PrimFunc | tvm.IRModule,
-        host_kernel_source: str,
-        device_kernel_source: str,
-        kernel_lib_path: str,
-        verbose: bool = False,
-        pass_configs: dict[str, Any] | None = None,
-        compile_flags: list[str] | None = None,
-    ):
-        adapter = cls.__new__(cls)
-        adapter.params = params
-        adapter.result_idx = adapter._legalize_result_idx(result_idx)
-        adapter.host_kernel_source = host_kernel_source
-        adapter.device_kernel_source = device_kernel_source
-        adapter.wrapped_source = device_kernel_source + "\n\n" + host_kernel_source
-        adapter.pass_configs = pass_configs
-
-        if isinstance(func_or_mod, tir.PrimFunc):
-            adapter.ir_module = tvm.IRModule({func_or_mod.attrs["global_symbol"]: func_or_mod})
-        else:
-            adapter.ir_module = func_or_mod
-
-        # Cache parameter information during initialization
-        # Convert tvm.DataType to torch.dtype for tensor creation
-        adapter.param_dtypes = [param.torch_dtype() for param in params]
-        adapter.param_shapes = []
-        for param in params:
-            native_shape = []
-            for dim in param.shape:
-                if isinstance(dim, tir.IntImm):
-                    native_shape.append(int(dim))
-                elif isinstance(dim, tir.Var):
-                    native_shape.append(dim)  # Keep tir.Var for dynamic dimensions
-                else:
-                    native_shape.append(dim)
-            adapter.param_shapes.append(native_shape)
-
-        adapter.dynamic_symbolic_map = adapter._process_dynamic_symbolic()
-
-        adapter.target = Target.canon_target(determine_target(target))
-        adapter.verbose = verbose
-        adapter.lib_generator = LibraryGenerator(adapter.target, verbose=verbose)
-        adapter.lib_generator.assign_pass_configs(pass_configs)
-        adapter.lib_generator.assign_compile_flags(compile_flags)
-        adapter.lib = adapter.lib_generator.load_lib(lib_path=kernel_lib_path)
-        adapter.lib.init()
-
-        adapter._post_init()
-        return adapter
-
-    def _process_dynamic_symbolic(self) -> dict[tir.Var, tuple[int, int, int]]:
-        """Extract information about dynamic shapes from the TIR function.
-
-        Maps symbolic variables to their corresponding (id, buffer_index, dimension)
-        for runtime shape resolution.
-        id represents shape or stride, 0 represents shape, 1 represents stride
-        """
-        func = self.prim_func
-        params = func.params
-        buffer_map = func.buffer_map
-        dynamic_symbolic_map = {}
-        for i, param in enumerate(params):
-            if param in buffer_map:
-                buffer = buffer_map[param]
-                for j, shape in enumerate(buffer.shape):
-                    if isinstance(shape, tir.Var) and (shape not in dynamic_symbolic_map) and (shape not in params):
-                        dynamic_symbolic_map[shape] = (0, i, j)
-        for i, param in enumerate(params):
-            if param in buffer_map:
-                buffer = buffer_map[param]
-                for j, stride in enumerate(buffer.strides):
-                    if isinstance(stride, tir.Var) and (stride not in dynamic_symbolic_map) and (stride not in params):
-                        dynamic_symbolic_map[stride] = (1, i, j)
-        return dynamic_symbolic_map
-
-    def _forward_from_prebuild_lib(self, *args, stream: int | None = None):
-        """Low-level function to call the compiled CUDA kernel.
-
-        Converts PyTorch tensor pointers to C void pointers for ctypes interface.
-        """
-        ctypes_args = [ctypes.c_void_p(arr.data_ptr()) if not isinstance(arr, int) else arr for arr in args]
-        ctypes_args.append(ctypes.c_void_p(stream))
-        self.lib.call(*ctypes_args)
-
-    def _wrap_forward_from_prebuild_lib(self, *ins: list[torch.Tensor], stream: int | None = None):
-        """High-level wrapper for kernel execution.
-
-        Handles:
-        1. Input validation
-        2. Output tensor allocation
-        3. Dynamic shape resolution
-        4. CUDA stream management
-
-        Args:
-            ins: Input PyTorch tensors
-            stream: Optional CUDA stream for asynchronous execution
-
-        Returns:
-            Single tensor or list of tensors containing the kernel results
-        """
-        if len(ins) + len(self.result_idx) != len(self.params):
-            raise ValueError(
-                f"Expected {len(self.params)} inputs, got {len(ins) + len(self.result_idx)} with {len(ins)} inputs and {len(self.result_idx)} outputs"
-            )
-        ins_idx = 0
-        args = []
-
-        # tensor pointers
-        for i in range(len(self.params)):
-            if i in self.result_idx:
-                dtype = self.param_dtypes[i]
-                shape = []
-                # Now working with native Python list, no FFI calls needed
-                for s in self.param_shapes[i]:
-                    if isinstance(s, tir.Var):
-                        ref_tensor_idx, ref_shape_idx = self.dynamic_symbolic_map[s]
-                        shape.append(ins[ref_tensor_idx].shape[ref_shape_idx])
-                    else:  # Already converted to Python int during initialization
-                        shape.append(s)
-                device = ins[0].device if len(ins) > 0 else torch.cuda.current_device()
-                tensor = torch.empty(*shape, dtype=dtype, device=device)
-            else:
-                tensor = ins[ins_idx]
-                ins_idx += 1
-            args.append(tensor)
-
-        # dynamic symbolics
-        for _, (ref_id, buffer_idx, shape_idx) in self.dynamic_symbolic_map.items():
-            if ref_id == 0:
-                args.append(ins[buffer_idx].shape[shape_idx])
-            else:
-                args.append(ins[buffer_idx].stride(shape_idx))
-
-        # if stream is not None, we need to pass the stream to the library
-        if stream is None:
-            if str(self.target).startswith("cuda") and torch.cuda.is_available():
-                stream = torch.cuda.current_stream().cuda_stream
-            else:
-                stream = 0
-
-        self._forward_from_prebuild_lib(*args, stream=stream)
-
-        if len(self.result_idx) == 1:
-            return args[self.result_idx[0]]
-        else:
-            return [args[i] for i in self.result_idx]
-
-    def _convert_torch_func(self) -> Callable:
-        """Returns a PyTorch-compatible function wrapper for the kernel."""
-        return self._wrap_forward_from_prebuild_lib
-
-    @property
-    def prim_func(self) -> tir.PrimFunc:
-        """Returns the primary TIR function from the IR module."""
-        return retrieve_func_from_module(self.ir_module)
-
-    @property
-    def srcpath(self):
-        """Returns the source path of the compiled library."""
-        return self.lib_generator.srcpath
-
-    @property
-    def libpath(self):
-        """Returns the path to the compiled library."""
-        return self.lib_generator.libpath
-
-    @property
-    def lib_code(self):
-        """Returns the code of the compiled library."""
-        return self.lib_generator.lib_code
-
-    @property
-    def is_dynamic(self):
-        """Indicates whether the kernel handles dynamic shapes."""
-        return self.dynamic_symbolic_map is not None and len(self.dynamic_symbolic_map) > 0
-
-    def get_kernel_source(self, kernel_only: bool = False):
-        """Returns the source code of the compiled kernel."""
-        if kernel_only:
-            return self.device_kernel_source
-        else:
-            # Wrapper only has host kernel source
-            return self.host_kernel_source
diff --git a/tilelang/jit/execution_backend.py b/tilelang/jit/execution_backend.py
index db5e4a8b4..e2604f24b 100644
--- a/tilelang/jit/execution_backend.py
+++ b/tilelang/jit/execution_backend.py
@@ -35,16 +35,16 @@ def allowed_backends_for_target(target: Target, *, include_unavailable: bool = T
     if is_cutedsl_target(target):
         return ["cutedsl"]
     elif kind == "cuda":
-        allowed = ["tvm_ffi", "nvrtc", "cython", "ctypes"]
+        allowed = ["tvm_ffi", "nvrtc", "cython"]
     elif kind == "hip":
-        allowed = ["tvm_ffi", "cython", "ctypes"]
+        allowed = ["tvm_ffi", "cython"]
     elif kind == "metal":
         allowed = ["torch"]
     elif kind == "c":  # CPU C backend
-        allowed = ["cython", "ctypes", "tvm_ffi"]
+        allowed = ["cython", "tvm_ffi"]
     else:
         # Fallback: prefer portable hosts
-        allowed = ["cython", "ctypes", "tvm_ffi"]
+        allowed = ["cython", "tvm_ffi"]
 
     if not include_unavailable:
         # Drop NVRTC if not importable
diff --git a/tilelang/jit/kernel.py b/tilelang/jit/kernel.py
index a788e76ba..de935d996 100644
--- a/tilelang/jit/kernel.py
+++ b/tilelang/jit/kernel.py
@@ -17,7 +17,6 @@
 from tilelang.engine.param import CompiledArtifact, KernelParam
 from tilelang.jit.adapter import (
     BaseKernelAdapter,
-    CtypesKernelAdapter,
     CythonKernelAdapter,
     CuTeDSLKernelAdapter,
     TVMFFIKernelAdapter,
@@ -64,7 +63,7 @@ def __init__(
         self,
         func: PrimFunc = None,
         out_idx: list[int] | int = None,
-        execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
+        execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"] = "tvm_ffi",
         target: str | Target = "auto",
         target_host: str | Target = None,
         verbose: bool = False,
@@ -81,7 +80,7 @@ def __init__(
             The TileLang TIR function to compile and wrap.
         out_idx : Union[List[int], int], optional
             Index(es) of the output tensors to return (default: None).
-        execution_backend : Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch", "cutedsl"], optional
+        execution_backend : Literal["tvm_ffi", "cython", "nvrtc", "torch", "cutedsl"], optional
             Execution backend to use for kernel execution.
         target : Union[str, Target], optional
             Compilation target, either as a string or a TVM Target object (default: "auto").
@@ -112,7 +111,6 @@ def __init__(
         # Validate the execution backend.
         assert execution_backend in [
             "tvm_ffi",
-            "ctypes",
             "cython",
             "nvrtc",
             "torch",
@@ -158,7 +156,7 @@ def from_database(
         target: str | Target,
         target_host: str | Target,
         out_idx: list[int] | int,
-        execution_backend: Literal["tvm_ffi", "ctypes", "cython", "nvrtc", "torch"],
+        execution_backend: Literal["tvm_ffi", "cython", "nvrtc", "torch"],
         pass_configs: dict[str, Any] | None = None,
         compile_flags: list[str] | None = None,
     ):
@@ -269,19 +267,6 @@ def _compile_and_create_adapter(self, tilelang_func: PrimFunc, out_idx: list[int
                 pass_configs=pass_configs,
                 compile_flags=compile_flags,
             )
-        elif execution_backend == "ctypes":
-            adapter = CtypesKernelAdapter(
-                params=artifact.params,
-                result_idx=out_idx,
-                target=target,
-                func_or_mod=tilelang_func,
-                host_mod=artifact.host_mod,
-                device_mod=artifact.device_mod,
-                device_kernel_source=artifact.kernel_source,
-                verbose=verbose,
-                pass_configs=pass_configs,
-                compile_flags=compile_flags,
-            )
         elif execution_backend == "cython":
             adapter = CythonKernelAdapter(
                 params=artifact.params,
@@ -372,18 +357,6 @@ def _create_adapter_from_database(
                 pass_configs=pass_configs,
                 compile_flags=compile_flags,
             )
-        elif execution_backend == "ctypes":
-            adapter = CtypesKernelAdapter.from_database(
-                params=params,
-                result_idx=result_idx,
-                target=target,
-                func_or_mod=func_or_mod,
-                host_kernel_source=host_kernel_source,
-                device_kernel_source=device_kernel_source,
-                kernel_lib_path=kernel_lib_path,
-                pass_configs=pass_configs,
-                compile_flags=compile_flags,
-            )
         elif execution_backend == "cython":
             adapter = CythonKernelAdapter.from_database(
                 params=params,
@@ -471,7 +444,7 @@ def get_kernel_source(self, kernel_only: bool = True) -> str:
         str
             The source code of the compiled kernel function.
         """
-        if self.execution_backend in {"ctypes", "cython", "nvrtc", "tvm_ffi", "cutedsl"}:
+        if self.execution_backend in {"cython", "nvrtc", "tvm_ffi", "cutedsl"}:
             return self.adapter.get_kernel_source(kernel_only=kernel_only)
         return self.artifact.kernel_source
 
@@ -479,7 +452,7 @@ def get_host_source(self) -> str:
         """
         Returns the source code of the host function.
         """
-        if self.execution_backend in {"ctypes", "cython", "nvrtc", "tvm_ffi", "cutedsl"}:
+        if self.execution_backend in {"cython", "nvrtc", "tvm_ffi", "cutedsl"}:
             return self.adapter.get_host_source()
         assert self.artifact.host_mod is not None, "host_mod is not available"
         return str(self.artifact.host_mod)