diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh
index 4cacc2710f10..1e84c8f01277 100755
--- a/.buildkite/scripts/hardware_ci/run-amd-test.sh
+++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh
@@ -327,8 +327,7 @@ apply_rocm_test_overrides() {
     cmds="${cmds} \
     --ignore=kernels/moe/test_moe.py \
     --ignore=kernels/moe/test_cutlass_moe.py \
-    --ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
-  fi
+    fi
 
   # --- Entrypoint ignores ---
   if [[ $cmds == *" entrypoints/openai "* ]]; then
diff --git a/tests/quantization/test_ptpc_fp8.py b/tests/quantization/test_ptpc_fp8.py
deleted file mode 100644
index 6858062b9183..000000000000
--- a/tests/quantization/test_ptpc_fp8.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Tests whether PTPC w8a8 FP8 computation is enabled correctly.
-
-Run `pytest tests/quantization/test_ptpc_fp8.py --forked`.
-"""
-
-import pytest
-
-from tests.quantization.utils import is_quant_method_supported
-from vllm.model_executor.layers.quantization.fp8 import Fp8KVCacheMethod
-from vllm.model_executor.layers.quantization.ptpc_fp8 import PTPCFp8LinearMethod
-from vllm.platforms import current_platform
-
-
-@pytest.fixture(scope="function", autouse=True)
-def enable_pickle(monkeypatch):
-    """`LLM.apply_model` requires pickling a function."""
-    monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
-
-
-@pytest.mark.skipif(
-    not is_quant_method_supported("ptpc_fp8"),
-    reason="PTPC FP8 is not supported on this GPU type.",
-)
-@pytest.mark.skipif(not current_platform.is_rocm(), reason="This test is for ROCm GPU.")
-@pytest.mark.parametrize("dtype", ["bfloat16"])
-@pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8"])
-def test_ptpc_fp8_rocm(vllm_runner, dtype: str, kv_cache_dtype: str) -> None:
-    llm = vllm_runner(
-        "facebook/opt-125m",
-        dtype=dtype,
-        quantization="ptpc_fp8",
-        enforce_eager=True,
-        kv_cache_dtype=kv_cache_dtype,
-        allow_deprecated_quantization=True,
-    )
-
-    with llm:
-
-        def check_model(model):
-            fc1 = model.model.decoder.layers[0].fc1
-            assert isinstance(fc1.quant_method, PTPCFp8LinearMethod)
-            if kv_cache_dtype == "ptpc_fp8":
-                attn = model.model.decoder.layers[0].self_attn.attn
-                assert isinstance(attn.quant_method, Fp8KVCacheMethod)
-                assert attn._k_scale == 1.0
-                assert attn._v_scale == 1.0
-
-            # For GPUs with hardware support, we keep weights in fp8
-            if current_platform.has_device_capability(94):
-                assert fc1.weight.dtype == current_platform.fp8_dtype()
-
-        llm.apply_model(check_model)
-
-        output = llm.generate_greedy("Hello my name is", max_tokens=4)
-        assert output
diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
index e08a6456aba7..9aceb3be054d 100644
--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -12,7 +12,6 @@
 QuantizationMethods = Literal[
     "awq",
     "fp8",
-    "ptpc_fp8",
     "fbgemm_fp8",
     "fp_quant",
     "modelopt",
@@ -39,7 +38,6 @@
 
 DEPRECATED_QUANTIZATION_METHODS = [
     "tpu_int8",
-    "ptpc_fp8",
     "fbgemm_fp8",
     "fp_quant",
     "experts_int8",
@@ -132,7 +130,6 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
     from .mxfp4 import Mxfp4Config
     from .mxfp8 import Mxfp8Config
     from .petit import PetitNvFp4Config
-    from .ptpc_fp8 import PTPCFp8Config
     from .torchao import TorchAOConfig
 
     method_to_config: dict[str, type[QuantizationConfig]] = {
@@ -150,7 +147,6 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         "gptq": GPTQConfig,
         "compressed-tensors": CompressedTensorsConfig,
         "bitsandbytes": BitsAndBytesConfig,
-        "ptpc_fp8": PTPCFp8Config,
         "experts_int8": ExpertsInt8Config,
         "quark": QuarkConfig,
         "moe_wna16": MoeWNA16Config,
diff --git a/vllm/model_executor/layers/quantization/ptpc_fp8.py b/vllm/model_executor/layers/quantization/ptpc_fp8.py
deleted file mode 100644
index 5d7b7b54adc8..000000000000
--- a/vllm/model_executor/layers/quantization/ptpc_fp8.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from typing import Any
-
-import torch
-from torch.nn.parameter import Parameter
-
-from vllm import _custom_ops as ops
-from vllm.model_executor.kernels.linear import (
-    init_fp8_linear_kernel,
-)
-from vllm.model_executor.layers.attention import Attention
-from vllm.model_executor.layers.linear import LinearBase, UnquantizedLinearMethod
-from vllm.model_executor.layers.quantization import QuantizationMethods
-from vllm.model_executor.layers.quantization.base_config import QuantizeMethodBase
-from vllm.model_executor.layers.quantization.fp8 import (
-    Fp8Config,
-    Fp8KVCacheMethod,
-    Fp8LinearMethod,
-)
-from vllm.model_executor.layers.quantization.utils.quant_utils import (
-    is_layer_skipped,
-    kFp8DynamicTokenSym,
-)
-from vllm.platforms import current_platform
-
-
-class PTPCFp8Config(Fp8Config):
-    """Config class for Per-Token-Per-Channel Dynamic Quantization Fp8."""
-
-    def __init__(
-        self,
-        activation_scheme: str = "dynamic",
-        ignored_layers: list[str] | None = None,
-    ) -> None:
-        if not current_platform.is_rocm():
-            raise ValueError("ptpc_fp8 quantization is supported only on ROCm.")
-
-        if not current_platform.has_device_capability(94):
-            raise ValueError(
-                "ptpc_fp8 quantization is supported only on AMD Instinct MI300 GPUs and newer."  # noqa: E501
-            )
-        if activation_scheme == "static":
-            raise ValueError("ptpc_fp8 as of now only support dynamic quantization.")
-
-        super().__init__(
-            is_checkpoint_fp8_serialized=False,
-            activation_scheme=activation_scheme,
-            ignored_layers=ignored_layers,
-        )
-
-    @classmethod
-    def get_name(cls) -> QuantizationMethods:
-        return "ptpc_fp8"
-
-    @classmethod
-    def from_config(cls, config: dict[str, Any]) -> "PTPCFp8Config":
-        activation_scheme = cls.get_from_keys(config, ["activation_scheme"])
-        ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None)
-        return cls(activation_scheme=activation_scheme, ignored_layers=ignored_layers)
-
-    def get_quant_method(
-        self, layer: torch.nn.Module, prefix: str
-    ) -> "QuantizeMethodBase | None":
-        if isinstance(layer, LinearBase):
-            if is_layer_skipped(prefix, self.ignored_layers):
-                return UnquantizedLinearMethod()
-            return PTPCFp8LinearMethod(self)
-        elif isinstance(layer, Attention):
-            return Fp8KVCacheMethod(self)
-        return None
-
-
-class PTPCFp8LinearMethod(Fp8LinearMethod):
-    """Linear method for Per-Token and Per-Channel FP8 Quantization.
-    Only supports loading quantized BF16 model checkpoints with dynamic
-    activation scaling. To load FP16 model checkpoints, user must specify
-    to convert the FP16 model weight loading into BF16.
-    The weight scaling factor will be initialized after
-    the model weights are loaded.
-
-    Limitations:
-    1. Only support float8_e4m3fnuz data type due to the limitation of
-       torch._scaled_mm (https://github.com/ROCm/pytorch/blob/8c0504d7f3fb0ee4c278c096a5c3caedb01129fa/aten/src/ATen/native/cuda/Blas.cpp#L1041)
-
-    Args:
-        quant_config: The quantization config.
-    """
-
-    def __init__(self, quant_config: PTPCFp8Config):
-        assert current_platform.is_rocm(), (
-            "PTPCFp8LinearMethod is only supported on ROCm."
-        )
-        super().__init__(quant_config=quant_config)
-        # Force weight quantization
-        self.fp8_linear = init_fp8_linear_kernel(
-            activation_quant_key=kFp8DynamicTokenSym,
-            weight_quant_key=kFp8DynamicTokenSym,
-            out_dtype=torch.get_default_dtype(),
-            module_name=self.__class__.__name__,
-        )
-
-    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
-        assert layer.weight.data.dtype not in (torch.float16, torch.float32), (
-            "Currently torch._scaled_mm (hipBLASLt) rowwise gemm only support "
-            f"output dtype of bfloat16. {layer.weight.data.dtype} is specified."
-        )
-
-        if layer.weight.data.dtype == torch.bfloat16:
-            # Quantize the weights.
-            qweight, weight_scale = ops.scaled_fp8_quant(
-                layer.weight, scale=None, use_per_token_if_dynamic=True
-            )
-
-            # Update the layer with the new values.
-            layer.weight = Parameter(
-                qweight.t(), requires_grad=False
-            )  # Pretranspose the weight
-            layer.weight_scale = Parameter(weight_scale, requires_grad=False)
-        else:
-            assert layer.weight.data.dtype == current_platform.fp8_dtype()
-            assert getattr(layer, "weight_scale", None) is not None
-        layer.input_scale = None
-
-    def apply(
-        self,
-        layer: torch.nn.Module,
-        x: torch.Tensor,
-        bias: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        return self.fp8_linear.apply_weights(layer, x, bias)
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 46d83564d476..29d7d5ce8592 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -378,7 +378,6 @@ class RocmPlatform(Platform):
         "fbgemm_fp8",
         "gguf",
         "quark",
-        "ptpc_fp8",
         "mxfp4",
         "petit_nvfp4",
         "torchao",