[SYCL][Joint Matrix] Add DMR to device_architecture and matrix extens…

…ions (#16543) This patch adds support of Diamond Rapids CPU to `sycl_ext_oneapi_device_architecture` and `sycl_ext_oneapi_matrix` extensions.
intel · Jan 8, 2025 · 107998a · 107998a
1 parent 5515791
commit 107998a
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 7 deletions.
diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc
@@ -1067,7 +1067,8 @@ XMX hardware. Note that these can be returned using
 
 ==== Intel AMX Supported Combinations
 This is currently available in devices with the architecture
-`architecture::intel_cpu_spr`, and `architecture::intel_cpu_gnr`.
+`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` and
+`architecture::intel_cpu_dmr`.
 In this architecture's implementation, the type of the C matrix must
 be the same as the type of the D matrix. Therefore, that common type
 is shown in a single column in the table below.
@@ -1078,21 +1079,29 @@ is shown in a single column in the table below.
 | `matrix_type::uint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
+, `architecture::intel_cpu_dmr`
 | `matrix_type::uint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
+, `architecture::intel_cpu_dmr`
 | `matrix_type::sint8`  | `matrix_type::uint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
+, `architecture::intel_cpu_dmr`
 | `matrix_type::sint8`  | `matrix_type::sint8` |
 `matrix_type::sint32`  |  +<=+ 16 |  +<=+ 16 |  +<=+ 64
 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
+, `architecture::intel_cpu_dmr`
 |  `matrix_type::bf16`       |  `matrix_type::bf16`   |
 `matrix_type::fp32`   |  +<=+ 16 |  +<=+ 16   |  +<=+ 32
 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr`
+, `architecture::intel_cpu_dmr`
 |  `matrix_type::fp16`       |  `matrix_type::fp16`   |
 `matrix_type::fp32`   |  +<=+ 16 |  +<=+ 16   |  +<=+ 32
-|`architecture::intel_cpu_gnr`
+|`architecture::intel_cpu_gnr`, `architecture::intel_cpu_dmr`
+|  `matrix_type::tf32`       |  `matrix_type::tf32`   |
+`matrix_type::fp32`   |  +<=+ 16 |  +<=+ 16   |  +<=+ 16
+|`architecture::intel_cpu_dmr`
 |======================
 
 ==== Intel XMX Supported Combinations

diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc
@@ -163,6 +163,17 @@ Intel Xeon processor codenamed Granite Rapids.
 The utility of this enumeration is currently limited.
 See the section "Limitations with the experimental version" for details.
 
+a|
+[source]
+----
+intel_cpu_dmr
+----
+|-
+|
+Intel Xeon processor codenamed Diamond Rapids.
+The utility of this enumeration is currently limited.
+See the section "Limitations with the experimental version" for details.
+
 3+^|*Intel GPU family*
 
 a|
@@ -1130,7 +1141,7 @@ option.
 These are the target names of the form "intel_gpu_*", "nvidia_gpu_*", or
 "amd_gpu_*".
 
-The architecture enumerations `intel_cpu_spr` and `intel_cpu_gnr` do
+The architecture enumerations `intel_cpu_spr`, `intel_cpu_gnr` and `intel_cpu_dmr` do
 not currently work with any of the APIs described in this extension.
 They cannot be used with the `if_architecture_is` function, the
 `device::ext_oneapi_architecture_is` function, or the

diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def
@@ -30,6 +30,7 @@ __SYCL_ARCHITECTURE(unknown, 0x9900000000000000)
 __SYCL_ARCHITECTURE(x86_64, 0x0300000000000000)
 __SYCL_ARCHITECTURE(intel_cpu_spr, 0x0300000000000800)
 __SYCL_ARCHITECTURE(intel_cpu_gnr, 0x0300000000000900)
+__SYCL_ARCHITECTURE(intel_cpu_dmr, 0x0300000000001000)
 //
 // Intel GPU architectures
 //

diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp
@@ -674,10 +674,10 @@ bool device_impl::has(aspect Aspect) const {
     using arch = sycl::ext::oneapi::experimental::architecture;
     const arch supported_archs[] = {
         arch::intel_cpu_spr,     arch::intel_cpu_gnr,
-        arch::intel_gpu_pvc,     arch::intel_gpu_dg2_g10,
-        arch::intel_gpu_dg2_g11, arch::intel_gpu_dg2_g12,
-        arch::intel_gpu_bmg_g21, arch::intel_gpu_lnl_m,
-        arch::intel_gpu_arl_h,
+        arch::intel_cpu_dmr,     arch::intel_gpu_pvc,
+        arch::intel_gpu_dg2_g10, arch::intel_gpu_dg2_g11,
+        arch::intel_gpu_dg2_g12, arch::intel_gpu_bmg_g21,
+        arch::intel_gpu_lnl_m,   arch::intel_gpu_arl_h,
     };
     try {
       return std::any_of(

diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp
@@ -722,6 +722,7 @@ constexpr std::pair<const int, oneapi_exp_arch> IntelGPUArchitectures[] = {
 constexpr std::pair<const int, oneapi_exp_arch> IntelCPUArchitectures[] = {
     {8, oneapi_exp_arch::intel_cpu_spr},
     {9, oneapi_exp_arch::intel_cpu_gnr},
+    {10, oneapi_exp_arch::intel_cpu_dmr},
 };
 
 template <>
@@ -843,6 +844,23 @@ struct get_device_info_impl<
           {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
            matrix_type::fp32, matrix_type::fp32},
       };
+    else if (architecture::intel_cpu_dmr == DeviceArch)
+      return {
+          {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
+           matrix_type::sint32, matrix_type::sint32},
+          {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
+           matrix_type::sint32, matrix_type::sint32},
+          {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
+           matrix_type::sint32, matrix_type::sint32},
+          {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
+           matrix_type::sint32, matrix_type::sint32},
+          {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
+           matrix_type::fp32, matrix_type::fp32},
+          {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
+           matrix_type::fp32, matrix_type::fp32},
+          {16, 16, 16, 0, 0, 0, matrix_type::tf32, matrix_type::tf32,
+           matrix_type::fp32, matrix_type::fp32},
+      };
     else if ((architecture::intel_gpu_pvc == DeviceArch) ||
              (architecture::intel_gpu_bmg_g21 == DeviceArch) ||
              (architecture::intel_gpu_lnl_m == DeviceArch)) {