release/22.x: [SystemZ] Support fp16 vector ABI and basic codegen. (#171066)#178300
Merged
c-rhodes merged 2 commits intollvm:release/22.xfrom Jan 29, 2026
Merged
release/22.x: [SystemZ] Support fp16 vector ABI and basic codegen. (#171066)#178300c-rhodes merged 2 commits intollvm:release/22.xfrom
c-rhodes merged 2 commits intollvm:release/22.xfrom
Conversation
Member
Author
|
@llvm/pr-subscribers-backend-systemz Author: None (llvmbot) ChangesRequested by: @nikic Patch is 340.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/178300.diff 30 Files Affected:
diff --git a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
index fab6050a0d876..0c577723c6265 100644
--- a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
+++ b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
@@ -29,16 +29,19 @@ typedef __attribute__((vector_size(1))) char v1i8;
typedef __attribute__((vector_size(2))) char v2i8;
typedef __attribute__((vector_size(2))) short v1i16;
+typedef __attribute__((vector_size(2))) _Float16 v1f16;
typedef __attribute__((vector_size(4))) char v4i8;
typedef __attribute__((vector_size(4))) short v2i16;
typedef __attribute__((vector_size(4))) int v1i32;
+typedef __attribute__((vector_size(4))) _Float16 v2f16;
typedef __attribute__((vector_size(4))) float v1f32;
typedef __attribute__((vector_size(8))) char v8i8;
typedef __attribute__((vector_size(8))) short v4i16;
typedef __attribute__((vector_size(8))) int v2i32;
typedef __attribute__((vector_size(8))) long long v1i64;
+typedef __attribute__((vector_size(8))) _Float16 v4f16;
typedef __attribute__((vector_size(8))) float v2f32;
typedef __attribute__((vector_size(8))) double v1f64;
@@ -47,11 +50,20 @@ typedef __attribute__((vector_size(16))) short v8i16;
typedef __attribute__((vector_size(16))) int v4i32;
typedef __attribute__((vector_size(16))) long long v2i64;
typedef __attribute__((vector_size(16))) __int128 v1i128;
+typedef __attribute__((vector_size(16))) _Float16 v8f16;
typedef __attribute__((vector_size(16))) float v4f32;
typedef __attribute__((vector_size(16))) double v2f64;
typedef __attribute__((vector_size(16))) long double v1f128;
typedef __attribute__((vector_size(32))) char v32i8;
+typedef __attribute__((vector_size(32))) short v16i16;
+typedef __attribute__((vector_size(32))) int v8i32;
+typedef __attribute__((vector_size(32))) long long v4i64;
+typedef __attribute__((vector_size(32))) __int128 v2i128;
+typedef __attribute__((vector_size(32))) _Float16 v16f16;
+typedef __attribute__((vector_size(32))) float v8f32;
+typedef __attribute__((vector_size(32))) double v4f64;
+typedef __attribute__((vector_size(32))) long double v2f128;
unsigned int align = __alignof__ (v16i8);
// CHECK: @align ={{.*}} global i32 16
@@ -97,6 +109,10 @@ v8i16 pass_v8i16(v8i16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v8i16(ptr dead_on_unwind noalias writable sret(<8 x i16>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <8 x i16> @pass_v8i16(<8 x i16> %{{.*}})
+v16i16 pass_v16i16(v16i16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v16i16(ptr dead_on_unwind noalias writable sret(<16 x i16>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v16i16(ptr dead_on_unwind noalias writable sret(<16 x i16>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1i32 pass_v1i32(v1i32 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1i32(ptr dead_on_unwind noalias writable sret(<1 x i32>) align 4 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x i32> @pass_v1i32(<1 x i32> %{{.*}})
@@ -109,6 +125,10 @@ v4i32 pass_v4i32(v4i32 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v4i32(ptr dead_on_unwind noalias writable sret(<4 x i32>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <4 x i32> @pass_v4i32(<4 x i32> %{{.*}})
+v8i32 pass_v8i32(v8i32 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v8i32(ptr dead_on_unwind noalias writable sret(<8 x i32>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v8i32(ptr dead_on_unwind noalias writable sret(<8 x i32>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1i64 pass_v1i64(v1i64 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1i64(ptr dead_on_unwind noalias writable sret(<1 x i64>) align 8 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x i64> @pass_v1i64(<1 x i64> %{{.*}})
@@ -117,10 +137,38 @@ v2i64 pass_v2i64(v2i64 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v2i64(ptr dead_on_unwind noalias writable sret(<2 x i64>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <2 x i64> @pass_v2i64(<2 x i64> %{{.*}})
+v4i64 pass_v4i64(v4i64 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v4i64(ptr dead_on_unwind noalias writable sret(<4 x i64>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v4i64(ptr dead_on_unwind noalias writable sret(<4 x i64>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1i128 pass_v1i128(v1i128 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1i128(ptr dead_on_unwind noalias writable sret(<1 x i128>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x i128> @pass_v1i128(<1 x i128> %{{.*}})
+v2i128 pass_v2i128(v2i128 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v2i128(ptr dead_on_unwind noalias writable sret(<2 x i128>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v2i128(ptr dead_on_unwind noalias writable sret(<2 x i128>) align 8 %{{.*}}, ptr dead_on_return %0)
+
+v1f16 pass_v1f16(v1f16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v1f16(ptr dead_on_unwind noalias writable sret(<1 x half>) align 2 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} <1 x half> @pass_v1f16(<1 x half> %{{.*}})
+
+v2f16 pass_v2f16(v2f16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v2f16(ptr dead_on_unwind noalias writable sret(<2 x half>) align 4 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} <2 x half> @pass_v2f16(<2 x half> %{{.*}})
+
+v4f16 pass_v4f16(v4f16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v4f16(ptr dead_on_unwind noalias writable sret(<4 x half>) align 8 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} <4 x half> @pass_v4f16(<4 x half> %{{.*}})
+
+v8f16 pass_v8f16(v8f16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v8f16(ptr dead_on_unwind noalias writable sret(<8 x half>) align 16 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} <8 x half> @pass_v8f16(<8 x half> %{{.*}})
+
+v16f16 pass_v16f16(v16f16 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v16f16(ptr dead_on_unwind noalias writable sret(<16 x half>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v16f16(ptr dead_on_unwind noalias writable sret(<16 x half>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1f32 pass_v1f32(v1f32 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1f32(ptr dead_on_unwind noalias writable sret(<1 x float>) align 4 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x float> @pass_v1f32(<1 x float> %{{.*}})
@@ -133,6 +181,10 @@ v4f32 pass_v4f32(v4f32 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v4f32(ptr dead_on_unwind noalias writable sret(<4 x float>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <4 x float> @pass_v4f32(<4 x float> %{{.*}})
+v8f32 pass_v8f32(v8f32 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v8f32(ptr dead_on_unwind noalias writable sret(<8 x float>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v8f32(ptr dead_on_unwind noalias writable sret(<8 x float>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1f64 pass_v1f64(v1f64 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1f64(ptr dead_on_unwind noalias writable sret(<1 x double>) align 8 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x double> @pass_v1f64(<1 x double> %{{.*}})
@@ -141,10 +193,17 @@ v2f64 pass_v2f64(v2f64 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v2f64(ptr dead_on_unwind noalias writable sret(<2 x double>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <2 x double> @pass_v2f64(<2 x double> %{{.*}})
+v4f64 pass_v4f64(v4f64 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v4f64(ptr dead_on_unwind noalias writable sret(<4 x double>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v4f64(ptr dead_on_unwind noalias writable sret(<4 x double>) align 8 %{{.*}}, ptr dead_on_return %0)
+
v1f128 pass_v1f128(v1f128 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_v1f128(ptr dead_on_unwind noalias writable sret(<1 x fp128>) align 16 %{{.*}}, ptr dead_on_return %0)
// CHECK-VECTOR-LABEL: define{{.*}} <1 x fp128> @pass_v1f128(<1 x fp128> %{{.*}})
+v2f128 pass_v2f128(v2f128 arg) { return arg; }
+// CHECK-LABEL: define{{.*}} void @pass_v2f128(ptr dead_on_unwind noalias writable sret(<2 x fp128>) align 32 %{{.*}}, ptr dead_on_return %0)
+// CHECK-VECTOR-LABEL: define{{.*}} void @pass_v2f128(ptr dead_on_unwind noalias writable sret(<2 x fp128>) align 8 %{{.*}}, ptr dead_on_return %0)
// Vector-like aggregate types
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 2795de5eeeb66..69202e3fcbc57 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -50,7 +50,7 @@ def RetCC_SystemZ_ELF : CallingConv<[
// Sub-128 vectors are returned in the same way, but they're widened
// to one of these types during type legalization.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
]>;
@@ -116,19 +116,19 @@ def CC_SystemZ_ELF : CallingConv<[
// are passed in the same way, but they're widened to one of these types
// during type legalization.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfArgFixed<CCAssignToReg<[V24, V26, V28, V30,
V25, V27, V29, V31]>>>>,
// However, sub-128 vectors which need to go on the stack occupy just a
// single 8-byte-aligned 8-byte stack slot. Pass as i64.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfShortVector<CCBitConvertToType<i64>>>>,
// Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToStack<16, 8>>>,
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e979c63d03dbc..21beef238f187 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -123,6 +123,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
+ addRegisterClass(MVT::v8f16, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
}
@@ -620,13 +621,16 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Handle floating-point vector types.
if (Subtarget.hasVector()) {
// Scalar-to-vector conversion is just a subreg.
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
// Some insertions and extractions can be done directly but others
// need to go via integers.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
@@ -842,6 +846,41 @@ bool SystemZTargetLowering::useSoftFloat() const {
return Subtarget.hasSoftFloat();
}
+unsigned SystemZTargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ // Pass fp16 vectors in VR(s).
+ if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16) {
+ IntermediateVT = RegisterVT = MVT::v8f16;
+ return NumIntermediates =
+ divideCeil(VT.getVectorNumElements(), SystemZ::VectorBytes / 2);
+ }
+ return TargetLowering::getVectorTypeBreakdownForCallingConv(
+ Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
+}
+
+MVT SystemZTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ CallingConv::ID CC,
+ EVT VT) const {
+ // 128-bit single-element vector types are passed like other vectors,
+ // not like their element type.
+ if (VT.isVector() && VT.getSizeInBits() == 128 &&
+ VT.getVectorNumElements() == 1)
+ return MVT::v16i8;
+ // Pass fp16 vectors in VR(s).
+ if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
+ return MVT::v8f16;
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+}
+
+unsigned SystemZTargetLowering::getNumRegistersForCallingConv(
+ LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
+ // Pass fp16 vectors in VR(s).
+ if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
+ return divideCeil(VT.getVectorNumElements(), SystemZ::VectorBytes / 2);
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+}
+
EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
@@ -2051,6 +2090,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
+ case MVT::v8f16:
case MVT::v4f32:
case MVT::v2f64:
RC = &SystemZ::VR128BitRegClass;
@@ -6344,6 +6384,38 @@ bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
return false;
}
+static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL,
+ unsigned MergedBits, EVT VT, SDValue Op0,
+ SDValue Op1) {
+ MVT IntVecVT = MVT::getVectorVT(MVT::getIntegerVT(MergedBits),
+ SystemZ::VectorBits / MergedBits);
+ assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
+ "Handling full vectors only.");
+ Op0 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op1);
+ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, IntVecVT, Op0, Op1);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT, SmallVectorImpl<SDValue> &Elems,
+ unsigned Pos) {
+ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 0], Elems[Pos + 1]);
+ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 2], Elems[Pos + 3]);
+ // Avoid unnecessary undefs by reusing the other operand.
+ if (Op01.isUndef()) {
+ if (Op23.isUndef())
+ return Op01;
+ Op01 = Op23;
+ } else if (Op23.isUndef())
+ Op23 = Op01;
+ // Merging identical replications is a no-op.
+ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
+ return Op01;
+ unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
+ return mergeHighParts(DAG, DL, MergedBits, VT, Op01, Op23);
+}
+
// Combine GPR scalar values Elems into a vector of type VT.
SDValue
SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
@@ -6402,22 +6474,22 @@ SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// <ABxx> <CDxx>
// V VMRHG
// <ABCD>
- if (VT == MVT::v4f32 && !AllLoads) {
- SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
- SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
+ if (VT == MVT::v4f32 && !AllLoads)
+ return buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
+
+ // Same for v8f16.
+ if (VT == MVT::v8f16 && !AllLoads) {
+ SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
+ SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 4);
// Avoid unnecessary undefs by reusing the other operand.
- if (Op01.isUndef())
- Op01 = Op23;
- else if (Op23.isUndef())
- Op23 = Op01;
+ if (Op0123.isUndef())
+ Op0123 = Op4567;
+ else if (Op4567.isUndef())
+ Op4567 = Op0123;
// Merging identical replications is a no-op.
- if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
- return Op01;
- Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
- Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
- SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
- DL, MVT::v2i64, Op01, Op23);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
+ return Op0123;
+ return mergeHighParts(DAG, DL, 64, VT, Op0123, Op4567);
}
// Collect the constant terms.
@@ -6561,6 +6633,33 @@ SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
}
+// Shift the lower 2 bytes of Op to the left in order to insert into the
+// upper 2 bytes of the FP register.
+static SDValue convertToF16(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType() == MVT::i64 &&
+ "Expexted to convert i64 to f16.");
+ SDLoc DL(Op);
+ SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
+ DAG.getConstant(48, DL, MVT::i64));
+ SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
+ SDValue F16Val =
+ DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
+ return F16Val;
+}
+
+// Extract Op into GPR and shift the 2 f16 bytes to the right.
+static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType() == MVT::f16 &&
+ "Expected to convert f16 to i64.");
+ SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
+ SDValue(U32, 0), Op);
+ SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
+ SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
+ DAG.getConstant(48, DL, MVT::i32));
+ return Shft;
+}
+
SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
// Handle insertions of floating-point values.
@@ -6586,9 +6685,13 @@ SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
// Otherwise bitcast to the equivalent integer form and insert via a GPR.
MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
- SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
- DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
- DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
+ SDValue IntOp1 =
+ VT == MVT::v8f16
+ ? DAG.getZExtOrTrunc(convertFromF16(Op1, DL, DAG), DL, MVT::i32)
+...
[truncated]
|
In preparation for llvm#171066 (FP16 vector support). (cherry picked from commit e0a1326)
- Make v8f16 a legal type so that arguments can be passed in vector registers. Handle fp16 vectors so that they have the same ABI as other fp vectors. - Set the preferred vector action for fp16 vectors to "split". This will scalarize all operations, which is not always necessary (like with memory operations), but it avoids the superfluous operations that result after first widening and then scalarizing a narrow vector (like v4f16). Fixes llvm#168992 (cherry picked from commit c999e9a)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Backport e0a1326 c999e9a
Requested by: @nikic