From 07b80a7b2c506ff6b4461f9ef806834ba71d0a20 Mon Sep 17 00:00:00 2001 From: Alexey Sotkin Date: Thu, 19 Aug 2021 15:38:52 +0300 Subject: [PATCH 1/3] Create SPV_INTEL_joint_matrix.asciidoc --- .../SPIRV/SPV_INTEL_joint_matrix.asciidoc | 338 ++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc diff --git a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc new file mode 100644 index 0000000000000..19b1afb5198ae --- /dev/null +++ b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc @@ -0,0 +1,338 @@ +:extension_name: SPV_INTEL_joint_matrix +:capability_name: JointMatrixINTEL +:capability_token: 6118 +:OpTypeJointMatrixINTEL_token: 6119 +:OpJointMatrixLoadINTEL_token: 6120 +:OpJointMatrixStoreINTEL_token: 6121 +:OpJointMatrixMadINTEL_token: 6122 + +{extension_name} +================ + + +== Name Strings + +{extension_name} + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm + +== Contributors + +- Alexey Sotkin, Intel + +- Dounia Khaldi, Intel + +- Mateusz, Belicki Intel + + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to a +feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. If you are interested in using this feature in your software product, +please let us know! + +== Version + +[width="40%",cols="25,25"] +|======================================== +| Last Modified Date | {docdate} +| Revision | A +|======================================== + +== Dependencies + +This extension is written against the SPIR-V Specification, +Version 1.5 Revision 5. + +This extension requires SPIR-V 1.0. + +== Overview + +This extension adds a type and instructions for joint matrices. Such matrices +are shared among a group of work items and is not private to each work item. +The type introduced with this extension allows to specify memory scope and +layout of the matrix, including layouts optimized for particular hardware(AMX) . +New instructions also allow to specify synchronization scope. + +== Extension Name + + +To use this extension within a SPIR-V module, the appropriate *OpExtension* must +be present in the module: + +[subs="attributes"] +---- +OpExtension "{extension_name}" +---- + +== New Capabilities + +This extension introduces new capabilities: + +[subs="attributes"] +---- +{capability_name} +---- + +== New Instructions + +Instructions added under the *{capability_name}* capability: + +---- + +OpTypeJointMatrixINTEL +OpJointMatrixLoadINTEL +OpJointMatrixStoreINTEL +OpJointMatrixMADINTEL + +---- + +== Token Number Assignments + +[width="40%"] +[cols="70%,30%"] +[grid="rows"] +|==== +|*{capability_name}* | {capability_token} +|*OpTypeJointMatrixINTEL* | {OpTypeJointMatrixINTEL_token} +|*OpJointMatrixLoadINTEL* | {OpJointMatrixLoadINTEL_token} +|*OpJointMatrixStoreINTEL* | {OpJointMatrixStoreINTEL_token} +|*OpJointMatrixMadINTEL* | {OpJointMatrixMadINTEL_token} +|==== + +== Modifications to the SPIR-V Specification, Version 1.5 + +=== Matrix layout + +Add section 3.XX, Matrix layout. + +[options="header"] +|==== +2+^| Layout ^| Enabling capability +| 0 | *ColumnMajor* | *{capability_name}* +| 1 | *RowMajor* | *{capability_name}* +| 2 | *PackedA* + +Suitable for VNNI instructions | *{capability_name}* +| 3 | *PackedB* + +Suitable for VNNI instructions | *{capability_name}* +|==== + +=== Capabilities + +Modify Section 3.31, Capability, adding rows to the Capability table: + +-- +[options="header"] +|==== +2+^| Capability ^| Implicitly Declares +| {capability_token} | *{capability_name}* +| Reserved. + + + +See also extension: *{extension_name}* +|==== +-- + +=== Instructions + +==== 3.37.6 Type-Declaration Instructions + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpTypeJointMatrixINTEL]]*OpTypeJointMatrixINTEL* + + + +Declare a matrix type. + + + +'Component Type' is the type of each component in the resulting type. It must be +a scalar 'numerical type'. + + + +'Row Count' is the number of rows in the matrix type. It must be a constant +unsigned 32-bit integer. Behavior is undefined when 'Row Count' is 0 or +<>. + + + +'Column Count' is the number of columns in the matrix type. It must be a +constant unsigned 32-bit integer. Behavior is undefined when 'Column Count' is +0 or <>. + + + +'Layout' indicates how the values are arranged internally in the matrix type. +It must be the result of a constant instruction. + + + +'Scope' is memory scope for operations on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpTypeJointMatrixINTEL_token} +| 'Result ' +| '' + +'Component Type' +| '' + +'Row Count' +| '' + +'Column Count' +| '' + +'Layout' +| '' + +'Scope' +|===== + + +==== 3.37.8. Memory Instructions + +[cols="1,1,7*3",width="100%"] +|===== +8+|[[OpJointMatrixLoadINTEL]]*OpJointMatrixLoadINTEL* + + + +Load a matrix through a pointer. + + + +'Result Type' is the type of the loaded matrix. It must be +<>. + + + +'Pointer' is the pointer to load through. It specifies start of memory region +where elements of the matrix are stored and arranged according to 'Layout'. + + + +'Stride' is the number of elements in memory between beginnings of successive +rows, columns (or words) in the result. It must be a scalar integer type. + + + +'Layout' indicates how the values loaded from memory are arranged. +It must be the result of a constant instruction. + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +If present, any 'Memory Operands' must begin with a +<> literal. If not present, it is the same as +specifying the <> *None*. + + +1+|Capability: + +*{capability_name}* +1+| 7 + variable | {OpJointMatrixLoadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'Pointer' +| '' + +'Stride' +| '' + +'<>' +| '' + +'Scope' +| Optional + +'Memory Access' +|===== + + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixStoreINTEL]]*OpJointMatrixStoreINTEL* + + + +Store a matrix through a pointer. + + + +'Pointer' is the pointer to store through. It specifies start of memory region +where elements of the matrix must be stored and arranged according to 'Layout'. + + + +'Object' is the matrix to store. It must be +<>. + + + +'Stride' is the number of elements in memory between beginnings of successive +rows, columns (or words) of the 'Object'. It must be a scalar integer type. + + + +'Layout' indicates how the values stored to memory are arranged. It must be the +result of a constant instruction. + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +If present, any 'Memory Operands' must begin with a +<> literal. If not present, it is the same as +specifying the <> *None*. + + +1+|Capability: + +*{capability_name}* +1+| 6 + variable | {OpJointMatrixStoreINTEL_token} +| '' + +'Pointer' +| '' + +'Object' +| '' + +'Stride' +| '' + +'<>' +| '' + +'Scope' +| Optional + +'Memory Access' +|===== + + +==== 3.37.13. Arithmetic Instructions + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixMadINTEL]]*OpJointMatrixMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Ruslt Type' must be +<>. + + + +'A' must be a <> with +'Rown Count' equals to 'M' and 'Column Count' equals to 'K' + + + +'B' must be a <> with +'Rown Count' equals to 'K' and 'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Rown Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +=== Issues + +None + +Revision History +---------------- + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-02-16|Alexey Sotkin|Initial revision +|======================================== From e3a3b8710ad8f4636ca128f9152eb59b369e4073 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 6 Sep 2021 10:22:53 +0300 Subject: [PATCH 2/3] [SYCL][DOC] split OpJointMatrixMadINTEL instruction into 4 To express signees of its operands Signed-off-by: Dmitry Sidorov --- .../SPIRV/SPV_INTEL_joint_matrix.asciidoc | 158 +++++++++++++++++- 1 file changed, 153 insertions(+), 5 deletions(-) diff --git a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc index 19b1afb5198ae..f0ffc9610fbe7 100644 --- a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc +++ b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc @@ -5,6 +5,9 @@ :OpJointMatrixLoadINTEL_token: 6120 :OpJointMatrixStoreINTEL_token: 6121 :OpJointMatrixMadINTEL_token: 6122 +:OpJointMatrixSUMadINTEL_token: 6128 +:OpJointMatrixUSMadINTEL_token: 6129 +:OpJointMatrixUUMadINTEL_token: 6130 {extension_name} ================ @@ -25,6 +28,7 @@ https://github.com/intel/llvm - Alexey Sotkin, Intel + - Dounia Khaldi, Intel + - Mateusz, Belicki Intel + +- Dmitry Sidorov, Intel + == Notice @@ -97,6 +101,9 @@ OpTypeJointMatrixINTEL OpJointMatrixLoadINTEL OpJointMatrixStoreINTEL OpJointMatrixMADINTEL +OpJointMatrixSUMADINTEL +OpJointMatrixUSMADINTEL +OpJointMatrixUUMADINTEL ---- @@ -107,10 +114,13 @@ OpJointMatrixMADINTEL [grid="rows"] |==== |*{capability_name}* | {capability_token} -|*OpTypeJointMatrixINTEL* | {OpTypeJointMatrixINTEL_token} -|*OpJointMatrixLoadINTEL* | {OpJointMatrixLoadINTEL_token} -|*OpJointMatrixStoreINTEL* | {OpJointMatrixStoreINTEL_token} -|*OpJointMatrixMadINTEL* | {OpJointMatrixMadINTEL_token} +|*OpTypeJointMatrixINTEL* | {OpTypeJointMatrixINTEL_token} +|*OpJointMatrixLoadINTEL* | {OpJointMatrixLoadINTEL_token} +|*OpJointMatrixStoreINTEL* | {OpJointMatrixStoreINTEL_token} +|*OpJointMatrixMadINTEL* | {OpJointMatrixMadINTEL_token} +|*OpJointMatrixSUMadINTEL* | {OpJointMatrixSUMadINTEL_token} +|*OpJointMatrixUSMadINTEL* | {OpJointMatrixUSMadINTEL_token} +|*OpJointMatrixUUMadINTEL* | {OpJointMatrixUUMadINTEL_token} |==== == Modifications to the SPIR-V Specification, Version 1.5 @@ -290,13 +300,21 @@ multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` matrix and 'C' is a `M x N` matrix. + + Behavior is undefined if sizes of operands do not meet the conditions above. -All operands and the 'Ruslt Type' must be +All operands and the 'Result Type' must be <>. + + 'A' must be a <> with +'Component Type' be a signed 'numerical type' or 16-bit unsigned integer type +(which stands for 'bfloat16' type) in case if 'B' is of 16-bit unsigned integer +<> type as well and 'C' is of +'float' <> type. 'Rown Count' equals to 'M' and 'Column Count' equals to 'K' + + 'B' must be a <> with +'Component Type' be a signed 'numerical type' or 16-bit unsigned integer type +(which stands for 'bfloat16' type) in case if 'A' is of 16-bit unsigned integer +<> type as well and 'C' is of +'float' <> type. 'Rown Count' equals to 'K' and 'Column Count' equals to 'N' + + 'C' and 'Result Type' must be a @@ -322,6 +340,135 @@ result of a constant instruction with scalar 'integer type'. + 'Scope' |===== +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixSUMadINTEL]]*OpJointMatrixSUMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> with +'Component Type' be a signed 'numerical type', 'Rown Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> with +'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Rown Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixSUMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixUSMadINTEL]]*OpJointMatrixUSMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> with +'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> with +'Component Type' be a signed 'numerical type', 'Rown Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Rown Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixUSMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixUUMadINTEL]]*OpJointMatrixUUMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> with +'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> with +'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Rown Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixUUMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + === Issues None @@ -335,4 +482,5 @@ Revision History |======================================== |Rev|Date|Author|Changes |1|2021-02-16|Alexey Sotkin|Initial revision +|1|2021-09-06|Dmitry Sidorov|Split OpJointMatrixMadINTEL instruction into 4 |======================================== From df81a2b7ac0d3fc53a73f508622becc685051a0b Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Mon, 11 Oct 2021 14:58:34 +0300 Subject: [PATCH 3/3] Little spelling fixes Signed-off-by: Dmitry Sidorov --- .../SPIRV/SPV_INTEL_joint_matrix.asciidoc | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc index f0ffc9610fbe7..1f819ecab5a71 100644 --- a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc +++ b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc @@ -303,22 +303,16 @@ Behavior is undefined if sizes of operands do not meet the conditions above. All operands and the 'Result Type' must be <>. + + -'A' must be a <> with -'Component Type' be a signed 'numerical type' or 16-bit unsigned integer type -(which stands for 'bfloat16' type) in case if 'B' is of 16-bit unsigned integer -<> type as well and 'C' is of -'float' <> type. -'Rown Count' equals to 'M' and 'Column Count' equals to 'K' + - + -'B' must be a <> with -'Component Type' be a signed 'numerical type' or 16-bit unsigned integer type -(which stands for 'bfloat16' type) in case if 'A' is of 16-bit unsigned integer -<> type as well and 'C' is of -'float' <> type. -'Rown Count' equals to 'K' and 'Column Count' equals to 'N' + +'A' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'K' and +'Column Count' equals to 'N' + + 'C' and 'Result Type' must be a -<> with 'Rown Count' equals to +<> with 'Row Count' equals to 'M' and 'Column Count' equals to 'N' + + 'Scope' is syncronization scope for operation on the matrix. It must be the @@ -352,16 +346,16 @@ Behavior is undefined if sizes of operands do not meet the conditions above. All operands and the 'Result Type' must be <>. + + -'A' must be a <> with -'Component Type' be a signed 'numerical type', 'Rown Count' equals to 'M' and +'A' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'M' and 'Column Count' equals to 'K' + + -'B' must be a <> with -'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'K' and +'B' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'K' and 'Column Count' equals to 'N' + + 'C' and 'Result Type' must be a -<> with 'Rown Count' equals to +<> with 'Row Count' equals to 'M' and 'Column Count' equals to 'N' + + 'Scope' is syncronization scope for operation on the matrix. It must be the @@ -395,16 +389,16 @@ Behavior is undefined if sizes of operands do not meet the conditions above. All operands and the 'Result Type' must be <>. + + -'A' must be a <> with -'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'M' and +'A' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'M' and 'Column Count' equals to 'K' + + -'B' must be a <> with -'Component Type' be a signed 'numerical type', 'Rown Count' equals to 'K' and +'B' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'K' and 'Column Count' equals to 'N' + + 'C' and 'Result Type' must be a -<> with 'Rown Count' equals to +<> with 'Row Count' equals to 'M' and 'Column Count' equals to 'N' + + 'Scope' is syncronization scope for operation on the matrix. It must be the @@ -438,16 +432,16 @@ Behavior is undefined if sizes of operands do not meet the conditions above. All operands and the 'Result Type' must be <>. + + -'A' must be a <> with -'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'M' and +'A' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'M' and 'Column Count' equals to 'K' + + -'B' must be a <> with -'Component Type' be an unsigned 'numerical type', 'Rown Count' equals to 'K' and +'B' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'K' and 'Column Count' equals to 'N' + + 'C' and 'Result Type' must be a -<> with 'Rown Count' equals to +<> with 'Row Count' equals to 'M' and 'Column Count' equals to 'N' + + 'Scope' is syncronization scope for operation on the matrix. It must be the