From 50e206d091daf98685b27e401c9a6f6766eae83e Mon Sep 17 00:00:00 2001 From: Justin Erenkrantz Date: Mon, 27 Oct 2025 20:07:05 +0000 Subject: [PATCH] Tensile: add gfx1036 to cmake and add missing gfx1034-1036 to AsmCaps & Common. --- shared/tensile/Tensile/AsmCaps.py | 132 ++++++++++++++++++ shared/tensile/Tensile/Common.py | 2 +- .../cmake/TensileSupportedArchitectures.cmake | 1 + .../cmake/TensileSupportedArchitectures.cmake | 1 + 4 files changed, 135 insertions(+), 1 deletion(-) diff --git a/shared/tensile/Tensile/AsmCaps.py b/shared/tensile/Tensile/AsmCaps.py index cacc1848b7e..e3b62543d3e 100644 --- a/shared/tensile/Tensile/AsmCaps.py +++ b/shared/tensile/Tensile/AsmCaps.py @@ -653,6 +653,138 @@ def getCapabilitiesCache(rocmVersion: NamedTuple) -> dict: 'v_mov_b64': False, 'v_pk_fma_f16': True, 'v_pk_fmac_f16': False}, + (10, 3, 4): {'HasAddLshl': True, + 'HasAtomicAdd': False, + 'HasDirectToLdsDest': False, + 'HasDirectToLdsNoDest': True, + 'HasExplicitCO': True, + 'HasExplicitNC': True, + 'HasGLCModifier': True, + 'HasNTModifier': False, + 'HasLshlOr': True, + 'HasMFMA': False, + 'HasMFMA_b8': False, + 'HasMFMA_bf16_1k': False, + 'HasMFMA_bf16_original': False, + 'HasMFMA_constSrc': False, + 'HasMFMA_f64': False, + 'HasMFMA_f8': False, + 'HasMFMA_i8_908': False, + 'HasMFMA_i8_940': False, + 'HasMFMA_vgpr': False, + 'HasMFMA_xf32': False, + 'HasSMulHi': True, + 'HasWMMA': False, + 'KernargPreloading': False, + 'MaxLgkmcnt': 15, + 'MaxVmcnt': 63, + 'SupportedISA': True, + 'SupportedSource': True, + 'VOP3v_dot4_i32_i8': True, + 'v_dot2_f32_f16': True, + 'v_dot2c_f32_f16': True, + 'v_dot4_i32_i8': False, + 'v_dot4c_i32_i8': True, + 'v_fma_f16': True, + 'v_fma_f32': True, + 'v_fma_f64': True, + 'v_fma_mix_f32': True, + 'v_fmac_f16': False, + 'v_fmac_f32': True, + 'v_mac_f16': False, + 'v_mac_f32': False, + 'v_mad_mix_f32': False, + 'v_mov_b64': False, + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, + (10, 3, 5): {'HasAddLshl': True, + 'HasAtomicAdd': False, + 'HasDirectToLdsDest': False, + 'HasDirectToLdsNoDest': True, + 'HasExplicitCO': True, + 'HasExplicitNC': True, + 'HasGLCModifier': True, + 'HasNTModifier': False, + 'HasLshlOr': True, + 'HasMFMA': False, + 'HasMFMA_b8': False, + 'HasMFMA_bf16_1k': False, + 'HasMFMA_bf16_original': False, + 'HasMFMA_constSrc': False, + 'HasMFMA_f64': False, + 'HasMFMA_f8': False, + 'HasMFMA_i8_908': False, + 'HasMFMA_i8_940': False, + 'HasMFMA_vgpr': False, + 'HasMFMA_xf32': False, + 'HasSMulHi': True, + 'HasWMMA': False, + 'KernargPreloading': False, + 'MaxLgkmcnt': 15, + 'MaxVmcnt': 63, + 'SupportedISA': True, + 'SupportedSource': True, + 'VOP3v_dot4_i32_i8': True, + 'v_dot2_f32_f16': True, + 'v_dot2c_f32_f16': True, + 'v_dot4_i32_i8': False, + 'v_dot4c_i32_i8': True, + 'v_fma_f16': True, + 'v_fma_f32': True, + 'v_fma_f64': True, + 'v_fma_mix_f32': True, + 'v_fmac_f16': False, + 'v_fmac_f32': True, + 'v_mac_f16': False, + 'v_mac_f32': False, + 'v_mad_mix_f32': False, + 'v_mov_b64': False, + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, + (10, 3, 6): {'HasAddLshl': True, + 'HasAtomicAdd': False, + 'HasDirectToLdsDest': False, + 'HasDirectToLdsNoDest': True, + 'HasExplicitCO': True, + 'HasExplicitNC': True, + 'HasGLCModifier': True, + 'HasNTModifier': False, + 'HasLshlOr': True, + 'HasMFMA': False, + 'HasMFMA_b8': False, + 'HasMFMA_bf16_1k': False, + 'HasMFMA_bf16_original': False, + 'HasMFMA_constSrc': False, + 'HasMFMA_f64': False, + 'HasMFMA_f8': False, + 'HasMFMA_i8_908': False, + 'HasMFMA_i8_940': False, + 'HasMFMA_vgpr': False, + 'HasMFMA_xf32': False, + 'HasSMulHi': True, + 'HasWMMA': False, + 'KernargPreloading': False, + 'MaxLgkmcnt': 15, + 'MaxVmcnt': 63, + 'SupportedISA': True, + 'SupportedSource': True, + 'VOP3v_dot4_i32_i8': True, + 'v_dot2_f32_f16': True, + 'v_dot2c_f32_f16': True, + 'v_dot4_i32_i8': False, + 'v_dot4c_i32_i8': True, + 'v_fma_f16': True, + 'v_fma_f32': True, + 'v_fma_f64': True, + 'v_fma_mix_f32': True, + 'v_fmac_f16': False, + 'v_fmac_f32': True, + 'v_mac_f16': False, + 'v_mac_f32': False, + 'v_mad_mix_f32': False, + 'v_mov_b64': False, + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, (11, 0, 0): {'HasAddLshl': True, 'HasAtomicAdd': True, 'HasDirectToLdsDest': False, diff --git a/shared/tensile/Tensile/Common.py b/shared/tensile/Tensile/Common.py index 86c6c577829..354d91f31ad 100644 --- a/shared/tensile/Tensile/Common.py +++ b/shared/tensile/Tensile/Common.py @@ -248,7 +248,7 @@ class DeveloperWarning(Warning): globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,2), (9,5,0), - (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,2), + (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,2), (10,3,4), (10,3,5), (10,3,6), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,0), (11,5,1), (12,0,0), (12,0,1)] # assembly kernels writer supports these architectures diff --git a/shared/tensile/Tensile/Source/cmake/TensileSupportedArchitectures.cmake b/shared/tensile/Tensile/Source/cmake/TensileSupportedArchitectures.cmake index e8a28d3bfed..3c587078fe0 100644 --- a/shared/tensile/Tensile/Source/cmake/TensileSupportedArchitectures.cmake +++ b/shared/tensile/Tensile/Source/cmake/TensileSupportedArchitectures.cmake @@ -45,6 +45,7 @@ if(NOT BUILD_ADDRESS_SANITIZER) "gfx1032" "gfx1034" "gfx1035" + "gfx1036" "gfx1100" "gfx1101" "gfx1102" diff --git a/shared/tensile/next-cmake/cmake/TensileSupportedArchitectures.cmake b/shared/tensile/next-cmake/cmake/TensileSupportedArchitectures.cmake index e8a28d3bfed..3c587078fe0 100644 --- a/shared/tensile/next-cmake/cmake/TensileSupportedArchitectures.cmake +++ b/shared/tensile/next-cmake/cmake/TensileSupportedArchitectures.cmake @@ -45,6 +45,7 @@ if(NOT BUILD_ADDRESS_SANITIZER) "gfx1032" "gfx1034" "gfx1035" + "gfx1036" "gfx1100" "gfx1101" "gfx1102"