Skip to content

Commit 2024111

Browse files
tomeksowipull[bot]
authored andcommitted
[RISC-V][LoongArch64] JIT: pass structs according to floating-point calling convention properly (#104237)
* Replace StructFloatFieldInfoFlags with FpStructInRegistersInfo which carries also exact field sizes and offsets * Replace StructFloatFieldInfoFlags with FpStruct::Flags in profiler * Remove FpStructInRegistersInfo::FromOldFlags() * Fix duplicating types in HandleInlineArray * Remove signedness from FpStruct::IntKind because most probably we won't need it * Remove old StructFloatFieldInfoFlags calculating routine * Typo in TARGET_LOONGARCH64 * Remove m_returnedFpFieldOffsets from ArgIterator * Add missing ENREGISTERED_PARAMTYPE_MAXSIZE condition to C# version of FpStruct info calculation * Rename RISCV64PassStructInRegister to match settled casing for RiscV in class names * Update hardcoded flags for float and double in ArgIteratorTemplate::ComputeReturnFlags() This fixes JIT/HardwareIntrinsics/General/Vector* tests. * Fix build on other platforms * Update LoongArch to use FpStructInRegistersInfo * Remove unused old flag masks * LoongArch64 typo Co-authored-by: Qiao Pengcheng <[email protected]> * Missing FpStruct namespace Co-authored-by: Qiao Pengcheng <[email protected]> * Missing FpStruct namespace Co-authored-by: Qiao Pengcheng <[email protected]> * Missing FpStruct namespace Co-authored-by: Qiao Pengcheng <[email protected]> * Use FpStruct namespace everywhere in JIT * Add tests * Fix passing FP structs in JIT * Remove CallArgABIInformation.StructFloatFieldType and Offset because they were write-only. Replace with offset from new ABI info * Remove CallArgABIInformation.StructDesc on System V because it was write-only * Remove unused local vars from ABIPassingInformation::Classify * Pass variables after the tested struct to detect potential register/stack slot over-allocation * Add test for two-field but one-slot struct. Fix assertion bug that it uncovered * Add sanity check for empty struct passing, disable on RISC-V and System V * Format * JIT review * Update StructFloatFieldInfoFlags description * Revert to hitherto instruction set order as it's not the point of this PR * Disable Test_Empty_Sanity on arm32 due to bug in Clang * Add ActiveIssue on arm and arm64 * Exclude arm64 from Test_PackedEmptyFloatLong*_RiscV * Unify get{LoongArch,RiscV}64PassFpStructInRegistersInfo JIT interfaces * Use JIT_TO_EE_TRANSITION instead of _LEAF because MethodTable::GetFpStructInRegistersInfo may throw * Remove FpStruct::IntKind, we should have similar info in ClassLayout in JIT * Change JIT interface to return a struct similar to CORINFO_SWIFT_LOWERING to facilitate code unification in the future * Change JIT to use new Swift-like getFpStructLowering * Cache CORINFO_FPSTRUCT_LOWERING * Update LoongArch classifier to use CORINFO_FPSTRUCT_LOWERING * Update StructFloatInfoFlags doc comment on C# * Add arm32 clang issue * Move StructFloatFieldInfoFlags and FpStructInRegistersInfo out of the JIT interface * Merge LoongArch and RISC-V AOT calculation of FpStructInRegistersInfo because they were identical. Move it to Common\Internal/Runtime because it's no longer exposed in JIT interface. * Don't zero-initialize CORINFO_FPSTRUCT_LOWERING * Update LoongArch classifier * Fix assert for two-field structs smaller than 8 bytes, add test
1 parent da22ea8 commit 2024111

File tree

11 files changed

+1468
-154
lines changed

11 files changed

+1468
-154
lines changed

src/coreclr/jit/codegencommon.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3201,6 +3201,13 @@ var_types CodeGen::genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& se
32013201
// can always use the full register size here. This allows us to
32023202
// use stp more often.
32033203
return TYP_I_IMPL;
3204+
#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
3205+
// On RISC-V/LoongArch structs passed according to floating-point calling convention are enregistered one
3206+
// field per register regardless of the field layout in memory, so the small int load/store instructions
3207+
// must not be upsized to 4 bytes, otherwise for example:
3208+
// * struct { struct{} e1,e2,e3; byte b; float f; } -- 4-byte store for 'b' would trash 'f'
3209+
// * struct { float f; struct{} e1,e2,e3; byte b; } -- 4-byte store for 'b' would trash adjacent stack slot
3210+
return seg.GetRegisterType();
32043211
#else
32053212
return genActualType(seg.GetRegisterType());
32063213
#endif
@@ -7390,18 +7397,19 @@ void CodeGen::genStructReturn(GenTree* treeNode)
73907397
assert(varDsc->lvIsMultiRegRet);
73917398

73927399
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7393-
// On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
7394-
// i. e. not include the padding for the first field, and so the general loop below won't work.
7395-
var_types type = retTypeDesc.GetReturnRegType(0);
7396-
regNumber toReg = retTypeDesc.GetABIReturnReg(0, compiler->info.compCallConv);
7397-
GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
7400+
var_types type = retTypeDesc.GetReturnRegType(0);
7401+
unsigned offset = retTypeDesc.GetReturnFieldOffset(0);
7402+
regNumber toReg = retTypeDesc.GetABIReturnReg(0, compiler->info.compCallConv);
7403+
7404+
GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
73987405
if (regCount > 1)
73997406
{
74007407
assert(regCount == 2);
7401-
int offset = genTypeSize(type);
7402-
type = retTypeDesc.GetReturnRegType(1);
7403-
offset = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
7404-
toReg = retTypeDesc.GetABIReturnReg(1, compiler->info.compCallConv);
7408+
assert(offset + genTypeSize(type) <= retTypeDesc.GetReturnFieldOffset(1));
7409+
type = retTypeDesc.GetReturnRegType(1);
7410+
offset = retTypeDesc.GetReturnFieldOffset(1);
7411+
toReg = retTypeDesc.GetABIReturnReg(1, compiler->info.compCallConv);
7412+
74057413
GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
74067414
}
74077415
#else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
@@ -7779,6 +7787,11 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
77797787
assert(regCount == varDsc->lvFieldCnt);
77807788
}
77817789

7790+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
7791+
// genMultiRegStoreToLocal is only used for calls on RISC-V and LoongArch
7792+
const ReturnTypeDesc* returnTypeDesc = actualOp1->AsCall()->GetReturnTypeDesc();
7793+
#endif
7794+
77827795
#ifdef SWIFT_SUPPORT
77837796
const uint32_t* offsets = nullptr;
77847797
if (actualOp1->IsCall() && (actualOp1->AsCall()->GetUnmanagedCallConv() == CorInfoCallConvExtension::Swift))
@@ -7829,8 +7842,8 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode)
78297842
else
78307843
{
78317844
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
7832-
// should consider the padding field within a struct.
7833-
offset = (offset % genTypeSize(srcType)) ? AlignUp(offset, genTypeSize(srcType)) : offset;
7845+
// Should consider the padding, empty struct fields, etc within a struct.
7846+
offset = returnTypeDesc->GetReturnFieldOffset(i);
78347847
#endif
78357848
#ifdef SWIFT_SUPPORT
78367849
if (offsets != nullptr)

src/coreclr/jit/compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
743743

744744
// Otherwise we pass this struct by value on the stack
745745
// setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
746-
// On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s)
746+
// On LOONGARCH64 and RISCV64 struct that is 1-16 bytes is passed by value in one/two register(s)
747747
howToPassStruct = SPK_ByValue;
748748
useType = TYP_STRUCT;
749749

src/coreclr/jit/gentree.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29489,6 +29489,15 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp,
2948929489
assert(returnType != TYP_UNKNOWN);
2949029490
assert(returnType != TYP_STRUCT);
2949129491
m_regType[0] = returnType;
29492+
29493+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
29494+
const CORINFO_FPSTRUCT_LOWERING* lowering = comp->GetFpStructLowering(retClsHnd);
29495+
if (!lowering->byIntegerCallConv)
29496+
{
29497+
assert(lowering->numLoweredElements == 1);
29498+
m_fieldOffset[0] = lowering->offsets[0];
29499+
}
29500+
#endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
2949229501
break;
2949329502
}
2949429503

@@ -29554,37 +29563,36 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp,
2955429563
}
2955529564

2955629565
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
29557-
assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
29566+
assert(structSize > sizeof(float));
29567+
assert(structSize <= (2 * TARGET_POINTER_SIZE));
2955829568
BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
2955929569
comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
2956029570
const CORINFO_FPSTRUCT_LOWERING* lowering = comp->GetFpStructLowering(retClsHnd);
2956129571
if (!lowering->byIntegerCallConv)
2956229572
{
2956329573
comp->compFloatingPointUsed = true;
2956429574
assert(lowering->numLoweredElements == MAX_RET_REG_COUNT);
29565-
var_types types[MAX_RET_REG_COUNT] = {JITtype2varType(lowering->loweredElements[0]),
29566-
JITtype2varType(lowering->loweredElements[1])};
29567-
assert(varTypeIsFloating(types[0]) || varTypeIsFloating(types[1]));
29568-
assert((structSize > 8) == ((genTypeSize(types[0]) == 8) || (genTypeSize(types[1]) == 8)));
29575+
static_assert(MAX_RET_REG_COUNT == MAX_FPSTRUCT_LOWERED_ELEMENTS, "");
2956929576
for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
2957029577
{
29571-
if (varTypeIsFloating(types[i]))
29572-
{
29573-
m_regType[i] = types[i];
29574-
}
29575-
else
29578+
m_regType[i] = JITtype2varType(lowering->loweredElements[i]);
29579+
m_fieldOffset[i] = lowering->offsets[i];
29580+
if (m_regType[i] == TYP_LONG && ((m_fieldOffset[i] % TARGET_POINTER_SIZE) == 0))
2957629581
{
29577-
assert(varTypeIsIntegralOrI(types[i]));
29578-
m_regType[i] = (genTypeSize(types[i]) == 8) ? comp->getJitGCType(gcPtrs[i]) : TYP_INT;
29582+
unsigned slot = m_fieldOffset[i] / TARGET_POINTER_SIZE;
29583+
m_regType[i] = comp->getJitGCType(gcPtrs[slot]);
2957929584
}
2958029585
}
29586+
assert(varTypeIsFloating(m_regType[0]) || varTypeIsFloating(m_regType[1]));
2958129587
}
2958229588
else
2958329589
{
2958429590
for (unsigned i = 0; i < 2; ++i)
2958529591
{
2958629592
m_regType[i] = comp->getJitGCType(gcPtrs[i]);
2958729593
}
29594+
m_fieldOffset[0] = 0;
29595+
m_fieldOffset[1] = TARGET_POINTER_SIZE;
2958829596
}
2958929597

2959029598
#elif defined(TARGET_X86)

src/coreclr/jit/gentree.h

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4275,6 +4275,13 @@ struct ReturnTypeDesc
42754275
private:
42764276
var_types m_regType[MAX_RET_REG_COUNT];
42774277

4278+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
4279+
// Structs according to hardware floating-point calling convention are passed as two logical fields, each in
4280+
// separate register, disregarding struct layout such as packing, custom alignment, padding with empty structs, etc.
4281+
// We need size (can be derived from m_regType) & offset of each field for memory load/stores
4282+
unsigned m_fieldOffset[MAX_RET_REG_COUNT];
4283+
#endif
4284+
42784285
#ifdef DEBUG
42794286
bool m_inited;
42804287
#endif
@@ -4306,6 +4313,9 @@ struct ReturnTypeDesc
43064313
for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
43074314
{
43084315
m_regType[i] = TYP_UNKNOWN;
4316+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
4317+
m_fieldOffset[i] = 0;
4318+
#endif
43094319
}
43104320
#ifdef DEBUG
43114321
m_inited = false;
@@ -4351,8 +4361,11 @@ struct ReturnTypeDesc
43514361
for (unsigned i = regCount + 1; i < MAX_RET_REG_COUNT; ++i)
43524362
{
43534363
assert(m_regType[i] == TYP_UNKNOWN);
4354-
}
4364+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
4365+
assert(m_fieldOffset[i] == 0);
43554366
#endif
4367+
}
4368+
#endif // DEBUG
43564369

43574370
return regCount;
43584371
}
@@ -4401,6 +4414,25 @@ struct ReturnTypeDesc
44014414
return result;
44024415
}
44034416

4417+
unsigned GetSingleReturnFieldOffset() const
4418+
{
4419+
assert(!IsMultiRegRetType());
4420+
assert(m_regType[0] != TYP_UNKNOWN);
4421+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
4422+
return m_fieldOffset[0];
4423+
#else
4424+
return 0;
4425+
#endif
4426+
}
4427+
4428+
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
4429+
unsigned GetReturnFieldOffset(unsigned index) const
4430+
{
4431+
assert(m_regType[index] != TYP_UNKNOWN);
4432+
return m_fieldOffset[index];
4433+
}
4434+
#endif
4435+
44044436
// Get i'th ABI return register
44054437
regNumber GetABIReturnReg(unsigned idx, CorInfoCallConvExtension callConv) const;
44064438

@@ -4508,9 +4540,6 @@ struct CallArgABIInformation
45084540
: NumRegs(0)
45094541
, ByteOffset(0)
45104542
, ByteSize(0)
4511-
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4512-
, StructFloatFieldType()
4513-
#endif
45144543
, ArgType(TYP_UNDEF)
45154544
, PassedByRef(false)
45164545
#if FEATURE_ARG_SPLIT
@@ -4537,18 +4566,6 @@ struct CallArgABIInformation
45374566
unsigned NumRegs;
45384567
unsigned ByteOffset;
45394568
unsigned ByteSize;
4540-
#if defined(UNIX_AMD64_ABI)
4541-
// Unix amd64 will split floating point types and integer types in structs
4542-
// between floating point and general purpose registers. Keep track of that
4543-
// information so we do not need to recompute it later.
4544-
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR StructDesc;
4545-
#endif // UNIX_AMD64_ABI
4546-
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
4547-
// For LoongArch64's and RISC-V 64's ABI, the struct which has float field(s) and no more than two fields
4548-
// may be passed by float register(s).
4549-
// e.g `struct {int a; float b;}` passed by an integer register and a float register.
4550-
var_types StructFloatFieldType[2];
4551-
#endif
45524569
// The type used to pass this argument. This is generally the original
45534570
// argument type, but when a struct is passed as a scalar type, this is
45544571
// that type. Note that if a struct is passed by reference, this will still

src/coreclr/jit/lclvars.cpp

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -914,40 +914,37 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
914914
if ((lowering != nullptr) && !lowering->byIntegerCallConv)
915915
{
916916
assert(varTypeIsStruct(argType));
917-
int floatNum = 0;
917+
assert((lowering->numLoweredElements == 1) || (lowering->numLoweredElements == 2));
918918
if (lowering->numLoweredElements == 1)
919-
{
920-
assert(argSize <= 8);
921919
assert(varDsc->lvExactSize() <= argSize);
922920

923-
floatNum = 1;
924-
canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
921+
cSlotsToEnregister = lowering->numLoweredElements;
922+
argRegTypeInStruct1 = JITtype2varType(lowering->loweredElements[0]);
923+
if (lowering->numLoweredElements == 2)
924+
argRegTypeInStruct2 = JITtype2varType(lowering->loweredElements[1]);
925925

926-
argRegTypeInStruct1 = JITtype2varType(lowering->loweredElements[0]);
927-
assert(varTypeIsFloating(argRegTypeInStruct1));
928-
}
929-
else
926+
int floatNum = (int)varTypeIsFloating(argRegTypeInStruct1) + (int)varTypeIsFloating(argRegTypeInStruct2);
927+
assert(floatNum > 0);
928+
929+
canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, floatNum);
930+
if (canPassArgInRegisters && (floatNum < lowering->numLoweredElements))
930931
{
932+
assert(floatNum == 1);
931933
assert(lowering->numLoweredElements == 2);
932-
argRegTypeInStruct1 = genActualType(JITtype2varType(lowering->loweredElements[0]));
933-
argRegTypeInStruct2 = genActualType(JITtype2varType(lowering->loweredElements[1]));
934-
floatNum = (int)varTypeIsFloating(argRegTypeInStruct1) + (int)varTypeIsFloating(argRegTypeInStruct2);
935-
canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, floatNum);
936-
if (floatNum == 1)
937-
canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
934+
assert(varTypeIsIntegralOrI(argRegTypeInStruct1) || varTypeIsIntegralOrI(argRegTypeInStruct2));
935+
canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
938936
}
939937

940-
assert((floatNum == 1) || (floatNum == 2));
941-
942938
if (!canPassArgInRegisters)
943939
{
944-
// On LoongArch64 and RISCV64, if there aren't any remaining floating-point registers to pass the
945-
// argument, integer registers (if any) are used instead.
946-
canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
947-
940+
// If a struct eligible for passing according to floating-point calling convention cannot be fully
941+
// enregistered, it is passed according to integer calling convention -- in up to two integer registers
942+
// and/or stack slots, as a lump of bits laid out like in memory.
943+
cSlotsToEnregister = cSlots;
948944
argRegTypeInStruct1 = TYP_UNKNOWN;
949945
argRegTypeInStruct2 = TYP_UNKNOWN;
950946

947+
canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
951948
if (cSlotsToEnregister == 2)
952949
{
953950
if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1))
@@ -1082,7 +1079,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
10821079
varDsc->SetOtherArgReg(
10831080
genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2, info.compCallConv));
10841081
}
1085-
else if (cSlots > 1)
1082+
else if (cSlotsToEnregister > 1)
10861083
{
10871084
// Here a struct-arg which needs two registers but only one integer register available,
10881085
// it has to be split. But we reserved extra 8-bytes for the whole struct.

src/coreclr/jit/lower.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4840,12 +4840,14 @@ GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
48404840
// x86 uses it only for long return type, not for structs.
48414841
assert(slotCount == 1);
48424842
assert(lclRegType != TYP_UNDEF);
4843-
#else // !TARGET_XARCH || UNIX_AMD64_ABI
4843+
#else // !TARGET_XARCH || UNIX_AMD64_ABI
48444844
if (!comp->IsHfa(layout->GetClassHandle()))
48454845
{
48464846
if (slotCount > 1)
48474847
{
4848+
#if !defined(TARGET_RISCV64) && !defined(TARGET_LOONGARCH64)
48484849
assert(call->HasMultiRegRetVal());
4850+
#endif
48494851
}
48504852
else
48514853
{
@@ -5151,6 +5153,7 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret)
51515153
// Otherwise we don't mind that we leave the upper bits undefined.
51525154
lclVar->ChangeType(ret->TypeGet());
51535155
}
5156+
lclVar->AsLclFld()->SetLclOffs(comp->compRetTypeDesc.GetSingleReturnFieldOffset());
51545157
}
51555158
else
51565159
{
@@ -5339,7 +5342,8 @@ GenTreeLclVar* Lowering::SpillStructCallResult(GenTreeCall* call) const
53395342
comp->lvaSetVarDoNotEnregister(spillNum DEBUGARG(DoNotEnregisterReason::LocalField));
53405343
CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
53415344
comp->lvaSetStruct(spillNum, retClsHnd, false);
5342-
GenTreeLclFld* spill = comp->gtNewStoreLclFldNode(spillNum, call->TypeGet(), 0, call);
5345+
unsigned offset = call->GetReturnTypeDesc()->GetSingleReturnFieldOffset();
5346+
GenTreeLclFld* spill = comp->gtNewStoreLclFldNode(spillNum, call->TypeGet(), offset, call);
53435347

53445348
BlockRange().InsertAfter(call, spill);
53455349
ContainCheckStoreLoc(spill);

0 commit comments

Comments
 (0)