Skip to content

Commit

Permalink
Initial experiments (with integer regs for fp16).
Browse files Browse the repository at this point in the history
  • Loading branch information
JonPsson1 committed Oct 2, 2024
1 parent a87640c commit ae1e35c
Show file tree
Hide file tree
Showing 9 changed files with 420 additions and 18 deletions.
12 changes: 12 additions & 0 deletions clang/lib/Basic/Targets/SystemZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,23 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
"-v128:64-a:8:16-n32:64");
}
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;

// True if the backend supports operations on the half LLVM IR type.
HasLegalHalfType = false;
// Allow half arguments and return values.
HalfArgsAndReturns = true;
// Support _Float16.
HasFloat16 = true;

HasStrictFP = true;
}

unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override;

bool useFP16ConversionIntrinsics() const override {
return false;
}

void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;

Expand Down
12 changes: 8 additions & 4 deletions clang/lib/CodeGen/Targets/SystemZ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {

if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
// case BuiltinType::Half: // __fp16 Support __fp16??
case BuiltinType::Float16: // _Float16
case BuiltinType::Float:
case BuiltinType::Double:
return true;
Expand Down Expand Up @@ -277,7 +279,8 @@ RValue SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
} else {
if (AI.getCoerceToType())
ArgTy = AI.getCoerceToType();
InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
InFPRs = (!IsSoftFloatABI &&
(ArgTy->isHalfTy() || ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
IsVector = ArgTy->isVectorTy();
UnpaddedSize = TyInfo.Width;
DirectAlign = TyInfo.Align;
Expand Down Expand Up @@ -446,10 +449,11 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {

// The structure is passed as an unextended integer, a float, or a double.
if (isFPArgumentType(SingleElementTy)) {
assert(Size == 32 || Size == 64);
assert(Size == 16 || Size == 32 || Size == 64);
return ABIArgInfo::getDirect(
Size == 32 ? llvm::Type::getFloatTy(getVMContext())
: llvm::Type::getDoubleTy(getVMContext()));
Size == 16 ? llvm::Type::getHalfTy(getVMContext())
: Size == 32 ? llvm::Type::getFloatTy(getVMContext())
: llvm::Type::getDoubleTy(getVMContext()));
} else {
llvm::IntegerType *PassTy = llvm::IntegerType::get(getVMContext(), Size);
return Size <= 32 ? ABIArgInfo::getNoExtend(PassTy)
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16534,7 +16534,7 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
PromoteType = QualType();
}
}
if (TInfo->getType()->isSpecificBuiltinType(BuiltinType::Float))
if (TInfo->getType()->isFloat16Type() || TInfo->getType()->isFloat32Type())
PromoteType = Context.DoubleTy;
if (!PromoteType.isNull())
DiagRuntimeBehavior(TInfo->getTypeLoc().getBeginLoc(), E,
Expand Down
85 changes: 85 additions & 0 deletions clang/test/CodeGen/SystemZ/fexcess-precision.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// RUN: %clang_cc1 -triple s390x-linux-gnu \
// RUN: -ffloat16-excess-precision=standard -emit-llvm -o - %s \
// RUN: | FileCheck %s -check-prefix=STANDARD

// RUN: %clang_cc1 -triple s390x-linux-gnu \
// RUN: -ffloat16-excess-precision=none -emit-llvm -o - %s \
// RUN: | FileCheck %s -check-prefix=NONE

// RUN: %clang_cc1 -triple s390x-linux-gnu \
// RUN: -ffloat16-excess-precision=fast -emit-llvm -o - %s \
// RUN: | FileCheck %s -check-prefix=FAST

_Float16 f(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
return a * b + c * d;
}

// STANDARD-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
// STANDARD-NEXT: entry:
// STANDARD-NEXT: %a.addr = alloca half, align 2
// STANDARD-NEXT: %b.addr = alloca half, align 2
// STANDARD-NEXT: %c.addr = alloca half, align 2
// STANDARD-NEXT: %d.addr = alloca half, align 2
// STANDARD-NEXT: store half %a, ptr %a.addr, align 2
// STANDARD-NEXT: store half %b, ptr %b.addr, align 2
// STANDARD-NEXT: store half %c, ptr %c.addr, align 2
// STANDARD-NEXT: store half %d, ptr %d.addr, align 2
// STANDARD-NEXT: %0 = load half, ptr %a.addr, align 2
// STANDARD-NEXT: %ext = fpext half %0 to float
// STANDARD-NEXT: %1 = load half, ptr %b.addr, align 2
// STANDARD-NEXT: %ext1 = fpext half %1 to float
// STANDARD-NEXT: %mul = fmul float %ext, %ext1
// STANDARD-NEXT: %2 = load half, ptr %c.addr, align 2
// STANDARD-NEXT: %ext2 = fpext half %2 to float
// STANDARD-NEXT: %3 = load half, ptr %d.addr, align 2
// STANDARD-NEXT: %ext3 = fpext half %3 to float
// STANDARD-NEXT: %mul4 = fmul float %ext2, %ext3
// STANDARD-NEXT: %add = fadd float %mul, %mul4
// STANDARD-NEXT: %unpromotion = fptrunc float %add to half
// STANDARD-NEXT: ret half %unpromotion
// STANDARD-NEXT: }

// NONE-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
// NONE-NEXT: entry:
// NONE-NEXT: %a.addr = alloca half, align 2
// NONE-NEXT: %b.addr = alloca half, align 2
// NONE-NEXT: %c.addr = alloca half, align 2
// NONE-NEXT: %d.addr = alloca half, align 2
// NONE-NEXT: store half %a, ptr %a.addr, align 2
// NONE-NEXT: store half %b, ptr %b.addr, align 2
// NONE-NEXT: store half %c, ptr %c.addr, align 2
// NONE-NEXT: store half %d, ptr %d.addr, align 2
// NONE-NEXT: %0 = load half, ptr %a.addr, align 2
// NONE-NEXT: %1 = load half, ptr %b.addr, align 2
// NONE-NEXT: %mul = fmul half %0, %1
// NONE-NEXT: %2 = load half, ptr %c.addr, align 2
// NONE-NEXT: %3 = load half, ptr %d.addr, align 2
// NONE-NEXT: %mul1 = fmul half %2, %3
// NONE-NEXT: %add = fadd half %mul, %mul1
// NONE-NEXT: ret half %add
// NONE-NEXT: }

// FAST-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
// FAST-NEXT: entry:
// FAST-NEXT: %a.addr = alloca half, align 2
// FAST-NEXT: %b.addr = alloca half, align 2
// FAST-NEXT: %c.addr = alloca half, align 2
// FAST-NEXT: %d.addr = alloca half, align 2
// FAST-NEXT: store half %a, ptr %a.addr, align 2
// FAST-NEXT: store half %b, ptr %b.addr, align 2
// FAST-NEXT: store half %c, ptr %c.addr, align 2
// FAST-NEXT: store half %d, ptr %d.addr, align 2
// FAST-NEXT: %0 = load half, ptr %a.addr, align 2
// FAST-NEXT: %ext = fpext half %0 to float
// FAST-NEXT: %1 = load half, ptr %b.addr, align 2
// FAST-NEXT: %ext1 = fpext half %1 to float
// FAST-NEXT: %mul = fmul float %ext, %ext1
// FAST-NEXT: %2 = load half, ptr %c.addr, align 2
// FAST-NEXT: %ext2 = fpext half %2 to float
// FAST-NEXT: %3 = load half, ptr %d.addr, align 2
// FAST-NEXT: %ext3 = fpext half %3 to float
// FAST-NEXT: %mul4 = fmul float %ext2, %ext3
// FAST-NEXT: %add = fadd float %mul, %mul4
// FAST-NEXT: %unpromotion = fptrunc float %add to half
// FAST-NEXT: ret half %unpromotion
// FAST-NEXT: }
44 changes: 44 additions & 0 deletions clang/test/CodeGen/SystemZ/systemz-abi.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ long long pass_longlong(long long arg) { return arg; }
__int128 pass_int128(__int128 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_int128(ptr dead_on_unwind noalias writable sret(i128) align 8 %{{.*}}, ptr %0)

_Float16 pass__Float16(_Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} half @pass__Float16(half %{{.*}})

float pass_float(float arg) { return arg; }
// CHECK-LABEL: define{{.*}} float @pass_float(float %{{.*}})

Expand Down Expand Up @@ -72,6 +75,9 @@ _Complex long pass_complex_long(_Complex long arg) { return arg; }
_Complex long long pass_complex_longlong(_Complex long long arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_complex_longlong(ptr dead_on_unwind noalias writable sret({ i64, i64 }) align 8 %{{.*}}, ptr %{{.*}}arg)

_Complex _Float16 pass_complex__Float16(_Complex _Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_complex__Float16(ptr dead_on_unwind noalias writable sret({ half, half }) align 2 %{{.*}}, ptr %{{.*}}arg)

_Complex float pass_complex_float(_Complex float arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_complex_float(ptr dead_on_unwind noalias writable sret({ float, float }) align 4 %{{.*}}, ptr %{{.*}}arg)

Expand Down Expand Up @@ -123,6 +129,11 @@ struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; }

// Float-like aggregate types

struct agg__Float16 { _Float16 a; };
struct agg__Float16 pass_agg__Float16(struct agg__Float16 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, half %{{.*}})
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, i16 noext %{{.*}})

struct agg_float { float a; };
struct agg_float pass_agg_float(struct agg_float arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, float %{{.*}})
Expand All @@ -137,6 +148,11 @@ struct agg_longdouble { long double a; };
struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}})

struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); };
struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}})
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, i64 %{{.*}})

struct agg_float_a8 { float a __attribute__((aligned (8))); };
struct agg_float_a8 pass_agg_float_a8(struct agg_float_a8 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float_a8(ptr dead_on_unwind noalias writable sret(%struct.agg_float_a8) align 8 %{{.*}}, double %{{.*}})
Expand Down Expand Up @@ -164,6 +180,10 @@ struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; }

// Union types likewise are *not* float-like aggregate types

union union__Float16 { _Float16 a; };
union union__Float16 pass_union__Float16(union union__Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_union__Float16(ptr dead_on_unwind noalias writable sret(%union.union__Float16) align 2 %{{.*}}, i16 noext %{{.*}})

union union_float { float a; };
union union_float pass_union_float(union union_float arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_union_float(ptr dead_on_unwind noalias writable sret(%union.union_float) align 4 %{{.*}}, i32 noext %{{.*}})
Expand Down Expand Up @@ -441,6 +461,30 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l,
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
// CHECK: ret void

struct agg__Float16 va_agg__Float16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg__Float16); }
// CHECK-LABEL: define{{.*}} void @va_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, ptr %{{.*}}
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
// SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 0
// CHECK: [[REG_COUNT:%[^ ]+]] = load i64, ptr [[REG_COUNT_PTR]]
// HARD-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 4
// SOFT-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
// CHECK: br i1 [[FITS_IN_REGS]],
// CHECK: [[SCALED_REG_COUNT:%[^ ]+]] = mul i64 [[REG_COUNT]], 8
// HARD-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 128
// SOFT-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 22
// CHECK: [[REG_SAVE_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 3
// CHECK: [[REG_SAVE_AREA:%[^ ]+]] = load ptr, ptr [[REG_SAVE_AREA_PTR:[^ ]+]]
// CHECK: [[RAW_REG_ADDR:%[^ ]+]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i64 [[REG_OFFSET]]
// CHECK: [[REG_COUNT1:%[^ ]+]] = add i64 [[REG_COUNT]], 1
// CHECK: store i64 [[REG_COUNT1]], ptr [[REG_COUNT_PTR]]
// CHECK: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 2
// CHECK: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_PTR]]
// CHECK: [[RAW_MEM_ADDR:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 6
// CHECK: [[OVERFLOW_ARG_AREA2:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 8
// CHECK: store ptr [[OVERFLOW_ARG_AREA2]], ptr [[OVERFLOW_ARG_AREA_PTR]]
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
// CHECK: ret void

struct agg_float va_agg_float(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float); }
// CHECK-LABEL: define{{.*}} void @va_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, ptr %{{.*}}
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/SystemZ/SystemZCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def RetCC_SystemZ_ELF : CallingConv<[
// other floating-point argument registers available for code that
// doesn't care about the ABI. All floating-point argument registers
// are call-clobbered, so we can use all of them here.
CCIfType<[f16], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,

Expand Down Expand Up @@ -115,6 +116,7 @@ def CC_SystemZ_ELF : CallingConv<[
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,

// The first 4 float and double arguments are passed in even registers F0-F6.
CCIfType<[f16], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,

Expand All @@ -138,7 +140,7 @@ def CC_SystemZ_ELF : CallingConv<[
CCAssignToStack<16, 8>>>,

// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
CCIfType<[i32, i64, f16, f32, f64], CCAssignToStack<8, 8>>
]>;

//===----------------------------------------------------------------------===//
Expand Down
62 changes: 58 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
}

// Expand FP16 <=> FP32 conversions to libcalls and handle FP16 loads and
// stores in GPRs.
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);

// VASTART and VACOPY need to deal with the SystemZ-specific varargs
// structure, but VAEND is a no-op.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
Expand Down Expand Up @@ -784,6 +791,20 @@ bool SystemZTargetLowering::useSoftFloat() const {
return Subtarget.hasSoftFloat();
}

MVT SystemZTargetLowering::getRegisterTypeForCallingConv(
LLVMContext &Context, CallingConv::ID CC,
EVT VT) const {
// 128-bit single-element vector types are passed like other vectors,
// not like their element type.
if (VT.isVector() && VT.getSizeInBits() == 128 &&
VT.getVectorNumElements() == 1)
return MVT::v16i8;
// Keep f16 so that they can be recognized and handled.
if (VT == MVT::f16)
return MVT::f16;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}

EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &, EVT VT) const {
if (!VT.isVector())
Expand Down Expand Up @@ -1597,6 +1618,15 @@ bool SystemZTargetLowering::splitValueIntoRegisterParts(
return true;
}

// Convert f16 to f32 (Out-arg).
if (PartVT == MVT::f16) {
assert(NumParts == 1 && "");
SDValue I16Val = DAG.getBitcast(MVT::i16, Val);
SDValue I32Val = DAG.getAnyExtOrTrunc(I16Val, DL, MVT::i32);
Parts[0] = DAG.getBitcast(MVT::f32, I32Val);
return true;
}

return false;
}

Expand All @@ -1612,6 +1642,18 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
return SDValue();
}

// F32Val holds a f16 value in f32, return it as an f16 (In-arg). The
// CopyFromReg was made into an f32 as required as FP32 registers are used
// for arguments, now convert it to f16.
static SDValue convertF32ToF16(SDValue F32Val, SelectionDAG &DAG,
const SDLoc &DL) {
assert(F32Val->getOpcode() == ISD::CopyFromReg &&
"Only expecting to handle f16 with CopyFromReg here.");
SDValue I32Val = DAG.getBitcast(MVT::i32, F32Val);
SDValue I16Val = DAG.getAnyExtOrTrunc(I32Val, DL, MVT::i16);
return DAG.getBitcast(MVT::f16, I16Val);
}

SDValue SystemZTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
Expand Down Expand Up @@ -1651,6 +1693,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
NumFixedGPRs += 1;
RC = &SystemZ::GR64BitRegClass;
break;
case MVT::f16:
case MVT::f32:
NumFixedFPRs += 1;
RC = &SystemZ::FP32BitRegClass;
Expand All @@ -1675,7 +1718,11 @@ SDValue SystemZTargetLowering::LowerFormalArguments(

Register VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(VA.getLocReg(), VReg);
ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
// Special handling is needed for f16.
MVT ArgVT = VA.getLocVT() == MVT::f16 ? MVT::f32 : VA.getLocVT();
ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, ArgVT);
if (VA.getLocVT() == MVT::f16)
ArgValue = convertF32ToF16(ArgValue, DAG, DL);
} else {
assert(VA.isMemLoc() && "Argument not register or memory");

Expand All @@ -1695,9 +1742,12 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// from this parameter. Unpromoted ints and floats are
// passed as right-justified 8-byte values.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
VA.getLocVT() == MVT::f16) {
unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
DAG.getIntPtrConstant(SlotOffs, DL));
}
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
}
Expand Down Expand Up @@ -2120,10 +2170,14 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Copy all of the result registers out of their specified physreg.
for (CCValAssign &VA : RetLocs) {
// Copy the value out, gluing the copy to the end of the call sequence.
// Special handling is needed for f16.
MVT ArgVT = VA.getLocVT() == MVT::f16 ? MVT::f32 : VA.getLocVT();
SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
VA.getLocVT(), Glue);
ArgVT, Glue);
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::f16)
RetValue = convertF32ToF16(RetValue, DAG, DL);

// Convert the value of the return register into the value that's
// being returned.
Expand Down
Loading

0 comments on commit ae1e35c

Please sign in to comment.