diff --git a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc index cc91348636e0d..6ef599f0d65c8 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc @@ -1067,7 +1067,8 @@ XMX hardware. Note that these can be returned using ==== Intel AMX Supported Combinations This is currently available in devices with the architecture -`architecture::intel_cpu_spr`, and `architecture::intel_cpu_gnr`. +`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` and +`architecture::intel_cpu_dmr`. In this architecture's implementation, the type of the C matrix must be the same as the type of the D matrix. Therefore, that common type is shown in a single column in the table below. @@ -1078,21 +1079,29 @@ is shown in a single column in the table below. | `matrix_type::uint8` | `matrix_type::uint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` +, `architecture::intel_cpu_dmr` | `matrix_type::uint8` | `matrix_type::sint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` +, `architecture::intel_cpu_dmr` | `matrix_type::sint8` | `matrix_type::uint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` +, `architecture::intel_cpu_dmr` | `matrix_type::sint8` | `matrix_type::sint8` | `matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` +, `architecture::intel_cpu_dmr` | `matrix_type::bf16` | `matrix_type::bf16` | `matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32 |`architecture::intel_cpu_spr`, `architecture::intel_cpu_gnr` +, `architecture::intel_cpu_dmr` | `matrix_type::fp16` | `matrix_type::fp16` | `matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32 -|`architecture::intel_cpu_gnr` +|`architecture::intel_cpu_gnr`, `architecture::intel_cpu_dmr` +| `matrix_type::tf32` | `matrix_type::tf32` | +`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 16 +|`architecture::intel_cpu_dmr` |====================== ==== Intel XMX Supported Combinations diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index de0bca6520147..418d8fd1ee4ef 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -163,6 +163,17 @@ Intel Xeon processor codenamed Granite Rapids. The utility of this enumeration is currently limited. See the section "Limitations with the experimental version" for details. +a| +[source] +---- +intel_cpu_dmr +---- +|- +| +Intel Xeon processor codenamed Diamond Rapids. +The utility of this enumeration is currently limited. +See the section "Limitations with the experimental version" for details. + 3+^|*Intel GPU family* a| @@ -1130,7 +1141,7 @@ option. These are the target names of the form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*". -The architecture enumerations `intel_cpu_spr` and `intel_cpu_gnr` do +The architecture enumerations `intel_cpu_spr`, `intel_cpu_gnr` and `intel_cpu_dmr` do not currently work with any of the APIs described in this extension. They cannot be used with the `if_architecture_is` function, the `device::ext_oneapi_architecture_is` function, or the diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def index 8717a32041417..3f7d465634352 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.def @@ -30,6 +30,7 @@ __SYCL_ARCHITECTURE(unknown, 0x9900000000000000) __SYCL_ARCHITECTURE(x86_64, 0x0300000000000000) __SYCL_ARCHITECTURE(intel_cpu_spr, 0x0300000000000800) __SYCL_ARCHITECTURE(intel_cpu_gnr, 0x0300000000000900) +__SYCL_ARCHITECTURE(intel_cpu_dmr, 0x0300000000001000) // // Intel GPU architectures // diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 205f5d14eada2..818ed193300f2 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -674,10 +674,10 @@ bool device_impl::has(aspect Aspect) const { using arch = sycl::ext::oneapi::experimental::architecture; const arch supported_archs[] = { arch::intel_cpu_spr, arch::intel_cpu_gnr, - arch::intel_gpu_pvc, arch::intel_gpu_dg2_g10, - arch::intel_gpu_dg2_g11, arch::intel_gpu_dg2_g12, - arch::intel_gpu_bmg_g21, arch::intel_gpu_lnl_m, - arch::intel_gpu_arl_h, + arch::intel_cpu_dmr, arch::intel_gpu_pvc, + arch::intel_gpu_dg2_g10, arch::intel_gpu_dg2_g11, + arch::intel_gpu_dg2_g12, arch::intel_gpu_bmg_g21, + arch::intel_gpu_lnl_m, arch::intel_gpu_arl_h, }; try { return std::any_of( diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 3dbb0d89625b7..81ec78624477e 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -722,6 +722,7 @@ constexpr std::pair IntelGPUArchitectures[] = { constexpr std::pair IntelCPUArchitectures[] = { {8, oneapi_exp_arch::intel_cpu_spr}, {9, oneapi_exp_arch::intel_cpu_gnr}, + {10, oneapi_exp_arch::intel_cpu_dmr}, }; template <> @@ -843,6 +844,23 @@ struct get_device_info_impl< {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16, matrix_type::fp32, matrix_type::fp32}, }; + else if (architecture::intel_cpu_dmr == DeviceArch) + return { + {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8, + matrix_type::sint32, matrix_type::sint32}, + {16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8, + matrix_type::sint32, matrix_type::sint32}, + {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8, + matrix_type::sint32, matrix_type::sint32}, + {16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8, + matrix_type::sint32, matrix_type::sint32}, + {16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16, + matrix_type::fp32, matrix_type::fp32}, + {16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16, + matrix_type::fp32, matrix_type::fp32}, + {16, 16, 16, 0, 0, 0, matrix_type::tf32, matrix_type::tf32, + matrix_type::fp32, matrix_type::fp32}, + }; else if ((architecture::intel_gpu_pvc == DeviceArch) || (architecture::intel_gpu_bmg_g21 == DeviceArch) || (architecture::intel_gpu_lnl_m == DeviceArch)) {