ROCm · valarLip · Jan 13, 2026 · Sep 8, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/3rdparty/composable_kernel b/3rdparty/composable_kernel
diff --git a/aiter/aot/test/matmul_fp16.py b/aiter/aot/test/matmul_fp16.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton
 import triton.language as tl

diff --git a/aiter/dist/device_communicators/communicator_cuda.py b/aiter/dist/device_communicators/communicator_cuda.py
@@ -155,7 +155,10 @@ def all_reduce(
             qr_comm is not None
             and not qr_comm.disabled
             and qr_comm.should_quick_allreduce(input_)
-            and (input_.nelement() * input_.element_size()) >= 4*1024*1024 # input shape should be such that quick reduce will show benefits.
+            and (input_.nelement() * input_.element_size())
+            >= 4
+            * 1024
+            * 1024  # input shape should be such that quick reduce will show benefits.
             # input shape estimated at 2 * max concurrency for now. if performance issues, subject to change
         ):
             out = qr_comm.quick_all_reduce(input_)

diff --git a/aiter/jit/core.py b/aiter/jit/core.py
@@ -818,7 +818,7 @@ def wrapper(*args, custom_build_args={}, **kwargs):
                 if module is None:
                     try:
                         module = get_module(md_name)
-                    except Exception as e:
+                    except Exception:
                         md = custom_build_args.get("md_name", md_name)
                         module = get_module(md)
             except ModuleNotFoundError:

diff --git a/aiter/ops/mha.py b/aiter/ops/mha.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 from typing import Any, Optional, Tuple
 
@@ -973,6 +973,9 @@ def cmdGenFunc_mha_batch_prefill(
     k_descale: Optional[Tensor] = None,
     v_descale: Optional[Tensor] = None,
     gen: Optional[Generator] = None,
+    kv_last_page_lens: Optional[Tensor] = None,
+    block_table: Optional[Tensor] = None,
+    seqlen_k: Optional[Tensor] = None,
 ):
     # causal=true is the same as causal=false in this case
     causal = is_causal
@@ -2598,15 +2601,26 @@ def mha_batch_prefill_fake_tensors(
     return_softmax_lse: bool,
     return_dropout_randval: bool,
     out: Optional[torch.Tensor] = None,
+    bias: Optional[torch.Tensor] = None,
     alibi_slopes: Optional[torch.Tensor] = None,
     q_descale: Optional[torch.Tensor] = None,
     k_descale: Optional[torch.Tensor] = None,
     v_descale: Optional[torch.Tensor] = None,
     gen: Optional[Generator] = None,
+    kv_last_page_lens: Optional[torch.Tensor] = None,
+    block_table: Optional[torch.Tensor] = None,
+    seqlen_k: Optional[torch.Tensor] = None,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
     # ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    is_vectorized = k.dim() == 5 and v.dim() == 5
+    is_linear = (k.dim() == 4 and v.dim() == 4) or (k.dim() == 3 and v.dim() == 3)
+    if not (is_vectorized or is_linear):
+        raise ValueError(
+            "Batch prefill requires 5D vectorized, 4D linear, or 3D linear (page_size=1) K/V"
+            " tensors"
+        )
     num_heads = q.size(1)  # num_heads = q.sizes()[1]
-    head_size_v = v.size(2)  # head_size_v = v.size(2)
+    head_size_v = v.size(-2) if is_vectorized else v.size(-1)
     total_q = q.size(0)  # total_q = q.size(0)
 
     if out is None:
@@ -2671,6 +2685,9 @@ def mha_batch_prefill(
     q_descale: Optional[torch.Tensor] = None,
     k_descale: Optional[torch.Tensor] = None,
     v_descale: Optional[torch.Tensor] = None,
+    kv_last_page_lens: Optional[Tensor] = None,
+    block_table: Optional[Tensor] = None,
+    seqlen_k: Optional[Tensor] = None,
     gen: Optional[Generator] = None,
 ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ...
 
@@ -2696,6 +2713,9 @@ def _mha_batch_prefill(
     return_softmax: bool = False,
     zero_tensors: bool = False,
     out: torch.Tensor = None,
+    kv_last_page_lens: torch.Tensor = None,
+    block_table: torch.Tensor = None,
+    seqlen_k: torch.Tensor = None,
     q_descale: Optional[torch.Tensor] = None,
     k_descale: Optional[torch.Tensor] = None,
     v_descale: Optional[torch.Tensor] = None,
@@ -2726,6 +2746,9 @@ def _mha_batch_prefill(
         q_descale,
         k_descale,
         v_descale,
+        kv_last_page_lens,
+        block_table,
+        seqlen_k,
         # custom_build_args={"md_name": md_name, "blob_gen_cmd": blob_gen_cmd},
     )
     return out, softmax_lse, S_dmask, rng_state
@@ -2750,19 +2773,44 @@ def mha_batch_prefill_func(
     return_lse=False,
     return_attn_probs=False,
     out=None,
+    kv_last_page_lens=None,
+    block_table=None,
+    seqlen_k=None,
     q_descale=None,
     k_descale=None,
     v_descale=None,
 ):
     if softmax_scale is None:
         softmax_scale = q.shape[-1] ** (-0.5)
-    head_size_q_og = q.size(2)
-    head_size_v_og = v.size(2)
-    if head_size_q_og % 8 != 0:
-        q = torch.nn.functional.pad(q, [0, 8 - head_size_q_og % 8])
-        k = torch.nn.functional.pad(k, [0, 8 - head_size_q_og % 8])
-    if head_size_v_og % 8 != 0:
-        v = torch.nn.functional.pad(v, [0, 8 - head_size_v_og % 8])
+    head_size_q_og = q.size(-1)
+    # 16 bytes = 128-bit (dwordx4) vector width assumed by CK kernels.
+    k_vector_size = 16 // k.element_size()
+    is_vectorized = k.dim() == 5 and v.dim() == 5
+    is_linear = (k.dim() == 4 and v.dim() == 4) or (k.dim() == 3 and v.dim() == 3)
+    if not (is_vectorized or is_linear):
+        raise ValueError(
+            "Batch prefill requires 5D vectorized, 4D linear, or 3D linear (page_size=1) K/V"
+            " tensors"
+        )
+    head_size_v_og = v.size(-2) if is_vectorized else v.size(-1)
+    if head_size_q_og % k_vector_size != 0 or head_size_v_og % k_vector_size != 0:
+        raise ValueError("Batch prefill requires head size divisible by vector size")
+    if is_vectorized:
+        if k.size(-3) * k_vector_size != head_size_q_og:
+            raise ValueError("K vectorized layout does not match Q head size")
+        if k.size(-2) % k_vector_size != 0:
+            raise ValueError(
+                "Vectorized KV requires page size divisible by vector size"
+            )
+        if v.size(-1) != k_vector_size:
+            raise ValueError("Vectorized KV requires last dim equal to vector size")
+    else:
+        if k.size(-1) != head_size_q_og:
+            raise ValueError("K linear layout does not match Q head size")
+        if k.size(1) != v.size(1) or k.size(2) != v.size(2):
+            raise ValueError("K/V linear layout must match page size and head count")
+    if k.stride(-1) != 1 or v.stride(-1) != 1:
+        raise ValueError("Batch prefill requires K/V with contiguous last dimension")
     out_padded, softmax_lse, S_dmask, rng_state = _mha_batch_prefill(
         q,
         k,
@@ -2782,6 +2830,9 @@ def mha_batch_prefill_func(
         return_lse=return_lse,
         return_softmax=return_attn_probs and dropout_p > 0,
         out=out,
+        kv_last_page_lens=kv_last_page_lens,
+        block_table=block_table,
+        seqlen_k=seqlen_k,
         q_descale=q_descale,
         k_descale=k_descale,
         v_descale=v_descale,

diff --git a/aiter/ops/moe_op.py b/aiter/ops/moe_op.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import torch
 from torch import Tensor

diff --git a/aiter/ops/triton/__init__.py b/aiter/ops/triton/__init__.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import importlib.util
 import sys
@@ -42,7 +42,7 @@
     )
 
 """
-These following help implement backward-compatibility 
+These following help implement backward-compatibility
 for modules that were reorganized so that external repos (like sglang for example),
 which depend on the old module names, can still import it the old "way" of importing.
 """

diff --git a/aiter/ops/triton/_triton_kernels/attention/chunked_pa_prefill.py b/aiter/ops/triton/_triton_kernels/attention/chunked_pa_prefill.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 # The kernel in this file is adapted from the VLLM project:
 # https://github.com/ROCm/vllm/blob/aiter_integration_final/vllm/attention/ops/chunked_prefill_paged_decode.py

diff --git a/aiter/ops/triton/_triton_kernels/attention/extend_attention.py b/aiter/ops/triton/_triton_kernels/attention/extend_attention.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 SGLang Team
+# Copyright (C) 2023-2026 SGLang Team
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

diff --git a/aiter/ops/triton/_triton_kernels/attention/hstu_attention.py b/aiter/ops/triton/_triton_kernels/attention/hstu_attention.py
@@ -1,5 +1,5 @@
 # Copyright (C) Advanced Micro Devices, Inc. All rights reserved.
-# Copyright (C) 2024-2025, The vLLM team.
+# Copyright (C) 2024-2026, The vLLM team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

diff --git a/aiter/ops/triton/_triton_kernels/attention/lean_atten.py b/aiter/ops/triton/_triton_kernels/attention/lean_atten.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 """
 Lean Attention

diff --git a/aiter/ops/triton/_triton_kernels/attention/mha.py b/aiter/ops/triton/_triton_kernels/attention/mha.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import functools
 import json

diff --git a/aiter/ops/triton/_triton_kernels/attention/mha_fused_bwd.py b/aiter/ops/triton/_triton_kernels/attention/mha_fused_bwd.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import functools
 import json

diff --git a/aiter/ops/triton/_triton_kernels/attention/mha_onekernel_bwd.py b/aiter/ops/triton/_triton_kernels/attention/mha_onekernel_bwd.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import functools
 import json

diff --git a/aiter/ops/triton/_triton_kernels/attention/mla_decode_rope.py b/aiter/ops/triton/_triton_kernels/attention/mla_decode_rope.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
-# Copyright (C) 2023-2025 SGLang Team
+# Copyright (C) 2023-2026 SGLang Team
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

diff --git a/aiter/ops/triton/_triton_kernels/attention/pa_decode.py b/aiter/ops/triton/_triton_kernels/attention/pa_decode.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton
 import triton.language as tl

diff --git a/aiter/ops/triton/_triton_kernels/attention/pa_mqa_logits.py b/aiter/ops/triton/_triton_kernels/attention/pa_mqa_logits.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton
 import triton.language as tl

diff --git a/aiter/ops/triton/_triton_kernels/attention/pa_prefill.py b/aiter/ops/triton/_triton_kernels/attention/pa_prefill.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 # The kernels in this file are adapted from LightLLM's context_attention_fwd:
 # https://github.com/ModelTC/lightllm/blob/main/lightllm/models/llama/triton_kernel/context_flashattention_nopad.py

diff --git a/aiter/ops/triton/_triton_kernels/attention/prefill_attention.py b/aiter/ops/triton/_triton_kernels/attention/prefill_attention.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
-# Copyright (C) 2023-2025 SGLang Team
+# Copyright (C) 2023-2026 SGLang Team
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w16_atomic.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w16_atomic.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w16_gated.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w16_gated.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w8_blockscale.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16w8_blockscale.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton._triton_kernels.quant.fused_fp8_quant import _fp8_quant_op

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16wfp4.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a16wfp4.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton._triton_kernels.quant.quant import _mxfp4_quant_op

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8_blockscale.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8_blockscale.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton
 import triton.language as tl

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8_per_token_scale.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8w8_per_token_scale.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8wfp4.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_a8wfp4.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_afp4wfp4.py b/aiter/ops/triton/_triton_kernels/gemm/basic/gemm_afp4wfp4.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_a16wfp4.py b/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_a16wfp4.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton._triton_kernels.quant.quant import _mxfp4_quant_op

diff --git a/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_a8w8.py b/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_a8w8.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/...s/gemm/batched/batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant.py b/...s/gemm/batched/batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_afp4wfp4.py b/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_afp4wfp4.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_bf16.py b/aiter/ops/triton/_triton_kernels/gemm/batched/batched_gemm_bf16.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.kernel_repr import make_kernel_repr

diff --git a/aiter/ops/triton/_triton_kernels/gemm/feed_forward/ff_a16w16_fused_gated.py b/aiter/ops/triton/_triton_kernels/gemm/feed_forward/ff_a16w16_fused_gated.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.pid_preprocessing import pid_grid, remap_xcd

diff --git a/aiter/ops/triton/_triton_kernels/gemm/feed_forward/ff_a16w16_fused_ungated.py b/aiter/ops/triton/_triton_kernels/gemm/feed_forward/ff_a16w16_fused_ungated.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT
-# Copyright (C) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved.
 
 import triton.language as tl
 from aiter.ops.triton.utils._triton.pid_preprocessing import pid_grid, remap_xcd