From 45d7b1f278892ac504ddc1d61e13c6f95b371040 Mon Sep 17 00:00:00 2001 From: Milica Trifunovic Date: Tue, 2 Sep 2025 06:47:40 -0400 Subject: [PATCH 1/2] Disable HW monitor and update AsmCaps for gfx115X --- shared/tensile/Tensile/AsmCaps.py | 1 + shared/tensile/Tensile/Common.py | 2 +- .../docs/src/cli-reference/tensile-create-library-cli.rst | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/shared/tensile/Tensile/AsmCaps.py b/shared/tensile/Tensile/AsmCaps.py index d8516672e23..423d30cfc72 100644 --- a/shared/tensile/Tensile/AsmCaps.py +++ b/shared/tensile/Tensile/AsmCaps.py @@ -972,6 +972,7 @@ def nested_update_(left, right): (11, 0, 1): {'VOP3v_dot4_i32_i8': True}, (11, 0, 2): {'VOP3v_dot4_i32_i8': True}, (11, 0, 3): {'VOP3v_dot4_i32_i8': True}, + (11, 5, 0): {'VOP3v_dot4_i32_i8': True}, (11, 5, 1): {'VOP3v_dot4_i32_i8': True}, (12, 0, 0): {'VOP3v_dot4_i32_i8': True}, (12, 0, 1): {'VOP3v_dot4_i32_i8': True}} diff --git a/shared/tensile/Tensile/Common.py b/shared/tensile/Tensile/Common.py index 5336a3ac8a0..4237205dd1f 100644 --- a/shared/tensile/Tensile/Common.py +++ b/shared/tensile/Tensile/Common.py @@ -2464,7 +2464,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ): if os.name == "nt": globalParameters["CurrentISA"] = (9,0,6) printWarning("Failed to detect ISA so forcing (gfx906) on windows") - isasWithDisabledHWMonitor = ((9,4,2), (9,5,0), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1)) + isasWithDisabledHWMonitor = ((9,4,2), (9,5,0), (11,0,0), (11,0,1), (11,0,2), (11,5,0), (11,5,1), (12,0,0), (12,0,1)) if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor: isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor)) printWarning(f"HardwareMonitor currently disabled for {isaString}") diff --git a/shared/tensile/docs/src/cli-reference/tensile-create-library-cli.rst b/shared/tensile/docs/src/cli-reference/tensile-create-library-cli.rst index 45556ede66e..4826f94457e 100644 --- a/shared/tensile/docs/src/cli-reference/tensile-create-library-cli.rst +++ b/shared/tensile/docs/src/cli-reference/tensile-create-library-cli.rst @@ -56,7 +56,7 @@ Here is the list of optional arguments for invoking the ``TensileCreateLibrary`` Supported architectures include: all; gfx000; gfx803; gfx900; gfx900:xnack-; gfx906; gfx906:xnack+; gfx906:xnack-; gfx908; gfx908:xnack+; gfx908:xnack-; gfx90a; gfx90a:xnack+; gfx90a:xnack-; gfx940; gfx940:xnack+; gfx940:xnack-; gfx941; gfx941:xnack+; gfx941:xnack-; gfx942; gfx942:xnack+; gfx942:xnack-; gfx1010; gfx1011; gfx1012; gfx1030; gfx1031; gfx1032; gfx1034; gfx1035; - gfx1100; gfx1101; gfx1102; gfx1103. + gfx1100; gfx1101; gfx1102; gfx1103; gfx1150; gfx1151; gfx1200; gfx1201. * - \-\-build-client - Builds Tensile client executable that is used for stand alone benchmarking. This option is set by default. From 26dada339fd87f2737b913d97dc3e7fdd35c0ecd Mon Sep 17 00:00:00 2001 From: Milica Trifunovic Date: Tue, 2 Sep 2025 09:21:49 -0400 Subject: [PATCH 2/2] Complete gfx120X support --- .../Source/lib/include/Tensile/AMDGPU.hpp | 16 +++++++++++++++- .../lib/include/Tensile/PlaceholderLibrary.hpp | 6 ++++++ .../include/Tensile/Serialization/Predicates.hpp | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/shared/tensile/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/shared/tensile/Tensile/Source/lib/include/Tensile/AMDGPU.hpp index 4c21b9f4db3..317250db16c 100644 --- a/shared/tensile/Tensile/Source/lib/include/Tensile/AMDGPU.hpp +++ b/shared/tensile/Tensile/Source/lib/include/Tensile/AMDGPU.hpp @@ -75,7 +75,9 @@ namespace Tensile gfx1102 = 1102, gfx1103 = 1103, gfx1150 = 1150, - gfx1151 = 1151 + gfx1151 = 1151, + gfx1200 = 1200, + gfx1201 = 1201 }; static std::string toString(Processor p) @@ -124,6 +126,10 @@ namespace Tensile return "gfx1150"; case AMDGPU::Processor::gfx1151: return "gfx1151"; + case AMDGPU::Processor::gfx1200: + return "gfx1200"; + case AMDGPU::Processor::gfx1201: + return "gfx1201"; } return ""; } @@ -198,6 +204,14 @@ namespace Tensile { return AMDGPU::Processor::gfx1151; } + else if(deviceString.find("gfx1200") != std::string::npos) + { + return AMDGPU::Processor::gfx1200; + } + else if(deviceString.find("gfx1201") != std::string::npos) + { + return AMDGPU::Processor::gfx1201; + } else { return static_cast(0); diff --git a/shared/tensile/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/shared/tensile/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp index 424c3899b5a..a21e584d291 100644 --- a/shared/tensile/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp +++ b/shared/tensile/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp @@ -60,6 +60,8 @@ namespace Tensile gfx1103, gfx1150, gfx1151, + gfx1200, + gfx1201, All }; @@ -112,6 +114,10 @@ namespace Tensile return "TensileLibrary_*_gfx1150"; case LazyLoadingInit::gfx1151: return "TensileLibrary_*_gfx1151"; + case LazyLoadingInit::gfx1200: + return "TensileLibrary_*_gfx1200"; + case LazyLoadingInit::gfx1201: + return "TensileLibrary_*_gfx1201"; case LazyLoadingInit::None: return ""; } diff --git a/shared/tensile/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp b/shared/tensile/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp index 3d85f13aeb0..361a0e14183 100644 --- a/shared/tensile/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp +++ b/shared/tensile/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp @@ -236,6 +236,8 @@ namespace Tensile iot::enumCase(io, value, "gfx1103", AMDGPU::Processor::gfx1103); iot::enumCase(io, value, "gfx1150", AMDGPU::Processor::gfx1150); iot::enumCase(io, value, "gfx1151", AMDGPU::Processor::gfx1151); + iot::enumCase(io, value, "gfx1200", AMDGPU::Processor::gfx1200); + iot::enumCase(io, value, "gfx1201", AMDGPU::Processor::gfx1201); } };