[RISCV] Support Xsfvfwmaccqqq extensions #68296

4vtomat · 2023-10-05T10:01:14Z

Bfloat16 Matrix Multiply Accumulate Instruction
https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf

llvmbot · 2023-10-05T10:03:30Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-support

Changes

Bfloat16 Matrix Multiply Accumulate Instruction
https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf

Patch is 39.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68296.diff

18 Files Affected:

(modified) clang/include/clang/Basic/riscv_sifive_vector.td (+12)
(modified) clang/include/clang/Support/RISCVVIntrinsicUtils.h (+9-8)
(modified) clang/lib/Sema/SemaRISCVVectorLookup.cpp (+1)
(added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c (+57)
(added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c (+57)
(added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c (+57)
(added) clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c (+57)
(modified) clang/test/Sema/rvv-required-features-invalid.c (+4)
(modified) clang/test/Sema/rvv-required-features.c (+6-1)
(modified) clang/utils/TableGen/RISCVVEmitter.cpp (+1)
(modified) llvm/include/llvm/IR/IntrinsicsRISCVXsf.td (+11)
(modified) llvm/lib/Support/RISCVISAInfo.cpp (+3)
(modified) llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp (+2)
(modified) llvm/lib/Target/RISCV/RISCVFeatures.td (+8)
(modified) llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td (+53)
(added) llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll (+195)
(added) llvm/test/MC/RISCV/rvv/xsfvfwmacc.s (+15)
(modified) llvm/unittests/Support/RISCVISAInfoTest.cpp (+1)

diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td
index 6583a7eb7b2e59b..d9be21a2a602944 100644
--- a/clang/include/clang/Basic/riscv_sifive_vector.td
+++ b/clang/include/clang/Basic/riscv_sifive_vector.td
@@ -103,3 +103,15 @@ let SupportOverloading = false in {
     defm sf_vc_v_fvw : RVVVCIXBuiltinSet<["si"],  "UwKzUwUvFe", [-1, 0, 2, 3], UseGPR=0>;
   }
 }
+
+multiclass RVVVFWMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
+  let OverloadedName = NAME,
+      Name = NAME,
+      HasMasked = false,
+      Log2LMUL = [-2, -1, 0, 1, 2] in
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "x", suffixes_prototypes>;
+}
+
+let UnMaskedPolicyScheme = HasPolicyOperand in
+  let RequiredFeatures = ["Xsfvfwmaccqqq"] in
+    defm sf_vfwmacc_4x4x4 : RVVVFWMACCBuiltinSet<[["", "w", "wwSvv"]]>;
diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index 8ba57d77221dc52..c6ce4884cdbb005 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -485,14 +485,15 @@ enum RVVRequire : uint16_t {
   RVV_REQ_RV64 = 1 << 0,
   RVV_REQ_ZvfhminOrZvfh = 1 << 1,
   RVV_REQ_Xsfvcp = 1 << 2,
-  RVV_REQ_Zvbb = 1 << 3,
-  RVV_REQ_Zvbc = 1 << 4,
-  RVV_REQ_Zvkb = 1 << 5,
-  RVV_REQ_Zvkg = 1 << 6,
-  RVV_REQ_Zvkned = 1 << 7,
-  RVV_REQ_Zvknha = 1 << 8,
-  RVV_REQ_Zvksed = 1 << 9,
-  RVV_REQ_Zvksh = 1 << 10,
+  RVV_REQ_Xsfvfwmaccqqq = 1 << 3,
+  RVV_REQ_Zvbb = 1 << 4,
+  RVV_REQ_Zvbc = 1 << 5,
+  RVV_REQ_Zvkb = 1 << 6,
+  RVV_REQ_Zvkg = 1 << 7,
+  RVV_REQ_Zvkned = 1 << 8,
+  RVV_REQ_Zvknha = 1 << 9,
+  RVV_REQ_Zvksed = 1 << 10,
+  RVV_REQ_Zvksh = 1 << 11,
 
   LLVM_MARK_AS_BITMASK_ENUM(RVV_REQ_Zvksh)
 };
diff --git a/clang/lib/Sema/SemaRISCVVectorLookup.cpp b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
index ae584dc68719901..97ebeed8ca4375d 100644
--- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp
+++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
@@ -205,6 +205,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
   static const std::pair<const char *, RVVRequire> FeatureCheckList[] = {
       {"64bit", RVV_REQ_RV64},
       {"xsfvcp", RVV_REQ_Xsfvcp},
+      {"xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq},
       {"experimental-zvbb", RVV_REQ_Zvbb},
       {"experimental-zvbc", RVV_REQ_Zvbc},
       {"experimental-zvkb", RVV_REQ_Zvkb},
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..185b8f236b62a8d
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..a07782821327cec
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..e55ecb324c74011
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8_tu(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..a7b26b3de36f90e
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4...
[truncated]

github-actions · 2023-10-05T10:12:06Z

✅ With the latest revision this PR passed the C/C++ code formatter.

4vtomat · 2023-10-27T05:48:14Z

ping

llvm/lib/Target/RISCV/RISCVFeatures.td

llvm/lib/Support/RISCVISAInfo.cpp

topperc

LGTM

Bfloat16 Matrix Multiply Accumulate Instruction https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf

sunshaoce · 2023-10-05T15:05:15Z

clang/include/clang/Basic/riscv_sifive_vector.td

+      Name = NAME,
+      HasMasked = false,
+      Log2LMUL = [-2, -1, 0, 1, 2] in
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "x", suffixes_prototypes>;


The SiFive custom vector extension Xsfvfwmaccqqq introduces the Bfloat16 matrix multiply accumulate instruction.

This is wrong! It should be bfloat16 instead of float16. Need to be based on: https://reviews.llvm.org/D152498

llvm/lib/Target/RISCV/RISCVFeatures.td

@joshua-arch1

BF16 implementation based on @joshua-arch1's https://reviews.llvm.org/D152498 Fixed the incorrect f16 type introduced in llvm#68296 --------- Co-authored-by: un Sha (Joshua) <[email protected]>

@joshua-arch1

BF16 implementation based on @joshua-arch1's https://reviews.llvm.org/D152498 Fixed the incorrect f16 type introduced in #68296 --------- Co-authored-by: Jun Sha (Joshua) <[email protected]>

@joshua-arch1

BF16 implementation based on @joshua-arch1's https://reviews.llvm.org/D152498 Fixed the incorrect f16 type introduced in llvm/llvm-project#68296 --------- Co-authored-by: Jun Sha (Joshua) <[email protected]>

llvmbot added clang Clang issues not falling into any other category backend:RISC-V clang:frontend Language frontend issues, e.g. anything involving "Sema" mc Machine (object) code llvm:support llvm:ir labels Oct 5, 2023

4vtomat requested review from topperc, kito-cheng and michaelmaitland October 5, 2023 10:24

4vtomat force-pushed the support_xsfvfwmaccqqq branch from 437d100 to 548d589 Compare October 6, 2023 17:25

topperc reviewed Oct 27, 2023

View reviewed changes

llvm/lib/Target/RISCV/RISCVFeatures.td Outdated Show resolved Hide resolved

topperc reviewed Oct 27, 2023

View reviewed changes

llvm/lib/Support/RISCVISAInfo.cpp Outdated Show resolved Hide resolved

topperc approved these changes Nov 1, 2023

View reviewed changes

[RISCV][SiFive] Support Xsfvfwmaccqqq extensions

625c45d

Bfloat16 Matrix Multiply Accumulate Instruction https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf

4vtomat force-pushed the support_xsfvfwmaccqqq branch from b9758e5 to 625c45d Compare November 3, 2023 02:07

4vtomat merged commit 945d2e6 into llvm:main Nov 3, 2023

4vtomat deleted the support_xsfvfwmaccqqq branch November 3, 2023 02:08

sunshaoce reviewed Nov 3, 2023

View reviewed changes

sunshaoce mentioned this pull request Nov 3, 2023

[RISCV] Introduce and use BF16 in Xsfvfwmaccqqq intrinsics #71140

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Support Xsfvfwmaccqqq extensions #68296

[RISCV] Support Xsfvfwmaccqqq extensions #68296

4vtomat commented Oct 5, 2023

llvmbot commented Oct 5, 2023 •

edited

Loading

github-actions bot commented Oct 5, 2023 •

edited

Loading

4vtomat commented Oct 27, 2023

topperc left a comment

sunshaoce Oct 5, 2023

[RISCV] Support Xsfvfwmaccqqq extensions #68296

[RISCV] Support Xsfvfwmaccqqq extensions #68296

Conversation

4vtomat commented Oct 5, 2023

llvmbot commented Oct 5, 2023 • edited Loading

github-actions bot commented Oct 5, 2023 • edited Loading

4vtomat commented Oct 27, 2023

topperc left a comment

Choose a reason for hiding this comment

sunshaoce Oct 5, 2023

Choose a reason for hiding this comment

llvmbot commented Oct 5, 2023 •

edited

Loading

github-actions bot commented Oct 5, 2023 •

edited

Loading