Skip to content

Commit 0c88424

Browse files
authored
JIT: Support bitwise field insertions for call arguments (dotnet#115977)
Add support using bitwise operations to reconstruct registers passed into calls from multiple promoted fields. Remove the IR invariant that `FIELD_LIST` args must always map cleanly to registers; instead, any `FIELD_LIST` is allowed for register-only arguments before lowering, and lowering takes care to normalize them into a handled shape. `fgTryMorphStructArg` is changed to take advantage of this by now producing `FIELD_LIST` even when a promoted arg does not match the target ABI. Support in physical promotion will be added in a follow up. Split arguments are not handled and retain the old IR invariant of requiring registers and stack slots to make cleanly from `FIELD_LIST`. win-x64 examples: ```csharp static void Foo(int x) { Use<int?>(x); Use<int?>(5); Use<int?>(null); } ``` ```diff G_M7200_IG02: ;; offset=0x0004 - mov byte ptr [rsp+0x20], 1 - mov dword ptr [rsp+0x24], ecx - mov rcx, qword ptr [rsp+0x20] + mov ecx, ecx + shl rcx, 32 + or rcx, 1 call [Program:Bar(System.Nullable`1[int])] - mov dword ptr [rsp+0x24], 5 - mov rcx, qword ptr [rsp+0x20] + mov rcx, 0x500000001 call [Program:Bar(System.Nullable`1[int])] - mov byte ptr [rsp+0x20], 0 xor ecx, ecx - mov dword ptr [rsp+0x24], ecx - mov rcx, qword ptr [rsp+0x20] - ;; size=55 bbWeight=1 PerfScore 14.25 + ;; size=34 bbWeight=1 PerfScore 7.50 G_M7200_IG03: add rsp, 40 tail.jmp [Program:Bar(System.Nullable`1[int])] ;; size=10 bbWeight=1 PerfScore 2.25 ``` ```csharp static void Foo(int x, float y) { Use((x, y)); } ``` ```diff G_M42652_IG01: ;; offset=0x0000 - push rax - ;; size=1 bbWeight=1 PerfScore 1.00 + ;; size=0 bbWeight=1 PerfScore 0.00 G_M42652_IG02: - mov dword ptr [rsp], ecx - vmovss dword ptr [rsp+0x04], xmm1 - mov rcx, qword ptr [rsp] + vmovd eax, xmm1 + shl rax, 32 + mov ecx, ecx + or rcx, rax ;; size=13 bbWeight=1 PerfScore 3.00 G_M42652_IG03: - add rsp, 8 tail.jmp [Program:Use[System.ValueTuple`2[int,float]](System.ValueTuple`2[int,float])] ``` A win-arm64 example: ```csharp static void Foo(int[] arr) { Use(arr.AsMemory()); } ``` ```diff G_M33990_IG01: - stp fp, lr, [sp, #-0x20]! + stp fp, lr, [sp, #-0x10]! mov fp, sp - str xzr, [fp, #0x10] // [V03 tmp2] - ;; size=12 bbWeight=1 PerfScore 2.50 + ;; size=8 bbWeight=1 PerfScore 1.50 G_M33990_IG02: cbz x0, G_M33990_IG04 ;; size=4 bbWeight=1 PerfScore 1.00 G_M33990_IG03: - str x0, [fp, #0x10] // [V07 tmp6] - str wzr, [fp, #0x18] // [V08 tmp7] - ldr w0, [x0, #0x08] - str w0, [fp, #0x1C] // [V09 tmp8] + ldr w1, [x0, #0x08] b G_M33990_IG05 - ;; size=20 bbWeight=0.50 PerfScore 3.50 + ;; size=8 bbWeight=0.50 PerfScore 2.00 G_M33990_IG04: - str xzr, [fp, #0x10] // [V07 tmp6] - str xzr, [fp, #0x18] - ;; size=8 bbWeight=0.50 PerfScore 1.00 + mov x0, xzr + mov w1, wzr + ;; size=8 bbWeight=0.50 PerfScore 0.50 G_M33990_IG05: - ldp x0, x1, [fp, #0x10] // [V03 tmp2], [V03 tmp2+0x08] - movz x2, #0xD920 // code for Program:Use[System.Memory`1[int]](System.Memory`1[int]) - movk x2, #0x4590 LSL #16 + mov w1, w1 + lsl x1, x1, #32 + movz x2, #0xD950 // code for Program:Use[System.Memory`1[int]](System.Memory`1[int]) + movk x2, #0x4592 LSL #16 movk x2, #0x7FFE LSL #32 ldr x2, [x2] - ;; size=20 bbWeight=1 PerfScore 7.50 + ;; size=24 bbWeight=1 PerfScore 6.00 G_M33990_IG06: - ldp fp, lr, [sp], #0x20 + ldp fp, lr, [sp], #0x10 br x2 ;; size=8 bbWeight=1 PerfScore 2.00 -; Total bytes of code: 72 +; Total bytes of code: 60 ```
1 parent 7e41ae6 commit 0c88424

File tree

7 files changed

+245
-105
lines changed

7 files changed

+245
-105
lines changed

src/coreclr/jit/abi.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,29 @@ var_types ABIPassingSegment::GetRegisterType() const
154154
}
155155
}
156156

157+
//-----------------------------------------------------------------------------
158+
// GetRegisterType:
159+
// Return the smallest type larger or equal to Size that most naturally
160+
// represents the register this segment is passed in, taking into account the
161+
// GC info of the specified layout.
162+
//
163+
// Return Value:
164+
// A type that matches ABIPassingSegment::Size and the register.
165+
//
166+
var_types ABIPassingSegment::GetRegisterType(ClassLayout* layout) const
167+
{
168+
if (genIsValidIntReg(GetRegister()))
169+
{
170+
assert(Offset < layout->GetSize());
171+
if (((Offset % TARGET_POINTER_SIZE) == 0) && (Size == TARGET_POINTER_SIZE))
172+
{
173+
return layout->GetGCPtrType(Offset / TARGET_POINTER_SIZE);
174+
}
175+
}
176+
177+
return GetRegisterType();
178+
}
179+
157180
//-----------------------------------------------------------------------------
158181
// InRegister:
159182
// Create an ABIPassingSegment representing that a segment is passed in a

src/coreclr/jit/abi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class ABIPassingSegment
4141
unsigned GetStackSize() const;
4242

4343
var_types GetRegisterType() const;
44+
var_types GetRegisterType(ClassLayout* layout) const;
4445

4546
static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size);
4647
static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size);

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11635,6 +11635,7 @@ class Compiler
1163511635
#endif // defined(UNIX_AMD64_ABI)
1163611636

1163711637
bool fgTryMorphStructArg(CallArg* arg);
11638+
bool FieldsMatchAbi(LclVarDsc* varDsc, const ABIPassingInformation& abiInfo);
1163811639

1163911640
bool killGCRefs(GenTree* tree);
1164011641

src/coreclr/jit/gentree.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2735,16 +2735,10 @@ struct GenTreeFieldList : public GenTree
27352735

27362736
class UseList
27372737
{
2738-
Use* m_head;
2739-
Use* m_tail;
2738+
Use* m_head = nullptr;
2739+
Use* m_tail = nullptr;
27402740

27412741
public:
2742-
UseList()
2743-
: m_head(nullptr)
2744-
, m_tail(nullptr)
2745-
{
2746-
}
2747-
27482742
Use* GetHead() const
27492743
{
27502744
return m_head;
@@ -2802,6 +2796,12 @@ struct GenTreeFieldList : public GenTree
28022796
}
28032797
}
28042798

2799+
void Clear()
2800+
{
2801+
m_head = nullptr;
2802+
m_tail = nullptr;
2803+
}
2804+
28052805
bool IsSorted() const
28062806
{
28072807
unsigned offset = 0;

src/coreclr/jit/lower.cpp

Lines changed: 147 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,20 +1725,22 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg)
17251725
{
17261726
if (abiInfo.HasAnyRegisterSegment())
17271727
{
1728-
#if FEATURE_MULTIREG_ARGS
1729-
if ((abiInfo.NumSegments > 1) && arg->OperIs(GT_FIELD_LIST))
1728+
if (arg->OperIs(GT_FIELD_LIST) || (abiInfo.NumSegments > 1))
17301729
{
1731-
unsigned int regIndex = 0;
1732-
for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses())
1730+
if (!arg->OperIs(GT_FIELD_LIST))
17331731
{
1734-
const ABIPassingSegment& segment = abiInfo.Segment(regIndex);
1735-
InsertPutArgReg(&use.NodeRef(), segment);
1736-
1737-
regIndex++;
1732+
// Primitive arg, but the ABI requires it to be split into
1733+
// registers. Insert the field list here.
1734+
GenTreeFieldList* fieldList = comp->gtNewFieldList();
1735+
fieldList->AddFieldLIR(comp, arg, 0, genActualType(arg->TypeGet()));
1736+
BlockRange().InsertAfter(arg, fieldList);
1737+
arg = *ppArg = fieldList;
17381738
}
1739+
1740+
LowerArgFieldList(callArg, arg->AsFieldList());
1741+
arg = *ppArg;
17391742
}
17401743
else
1741-
#endif // FEATURE_MULTIREG_ARGS
17421744
{
17431745
assert(abiInfo.HasExactlyOneRegisterSegment());
17441746
InsertPutArgReg(ppArg, abiInfo.Segment(0));
@@ -4809,6 +4811,18 @@ void Lowering::LowerRet(GenTreeOp* ret)
48094811
ContainCheckRet(ret);
48104812
}
48114813

4814+
struct LowerFieldListRegisterInfo
4815+
{
4816+
unsigned Offset;
4817+
var_types RegType;
4818+
4819+
LowerFieldListRegisterInfo(unsigned offset, var_types regType)
4820+
: Offset(offset)
4821+
, RegType(regType)
4822+
{
4823+
}
4824+
};
4825+
48124826
//----------------------------------------------------------------------------------------------
48134827
// LowerRetFieldList:
48144828
// Lower a returned FIELD_LIST node.
@@ -4822,21 +4836,18 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
48224836
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
48234837
unsigned numRegs = retDesc.GetReturnRegCount();
48244838

4825-
bool isCompatible = IsFieldListCompatibleWithReturn(fieldList);
4839+
auto getRegInfo = [=, &retDesc](unsigned regIndex) {
4840+
unsigned offset = retDesc.GetReturnFieldOffset(regIndex);
4841+
var_types regType = genActualType(retDesc.GetReturnRegType(regIndex));
4842+
return LowerFieldListRegisterInfo(offset, regType);
4843+
};
4844+
4845+
bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, numRegs, getRegInfo);
48264846
if (!isCompatible)
48274847
{
4828-
JITDUMP("Spilling field list [%06u] to stack\n", Compiler::dspTreeID(fieldList));
4829-
unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Spilled local for return value"));
4848+
unsigned lclNum =
4849+
StoreFieldListToNewLocal(comp->typGetObjLayout(comp->info.compMethodInfo->args.retTypeClass), fieldList);
48304850
LclVarDsc* varDsc = comp->lvaGetDesc(lclNum);
4831-
comp->lvaSetStruct(lclNum, comp->info.compMethodInfo->args.retTypeClass, false);
4832-
comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet));
4833-
4834-
for (GenTreeFieldList::Use& use : fieldList->Uses())
4835-
{
4836-
GenTree* store = comp->gtNewStoreLclFldNode(lclNum, use.GetType(), use.GetOffset(), use.GetNode());
4837-
BlockRange().InsertAfter(use.GetNode(), store);
4838-
LowerNode(store);
4839-
}
48404851

48414852
GenTree* retValue = comp->gtNewLclvNode(lclNum, varDsc->TypeGet());
48424853
ret->SetReturnValue(retValue);
@@ -4859,7 +4870,89 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
48594870
return;
48604871
}
48614872

4862-
LowerFieldListToFieldListOfRegisters(fieldList);
4873+
LowerFieldListToFieldListOfRegisters(fieldList, numRegs, getRegInfo);
4874+
}
4875+
4876+
//----------------------------------------------------------------------------------------------
4877+
// StoreFieldListToNewLocal:
4878+
// Create a new local with the specified layout and store the specified
4879+
// fields of the specified FIELD_LIST into it.
4880+
//
4881+
// Arguments:
4882+
// layout - Layout of the new local
4883+
// fieldList - Fields to store to it
4884+
//
4885+
// Returns:
4886+
// Var number of new local.
4887+
//
4888+
unsigned Lowering::StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList)
4889+
{
4890+
JITDUMP("Spilling field list [%06u] to stack\n", Compiler::dspTreeID(fieldList));
4891+
unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Spilled local for field list"));
4892+
LclVarDsc* varDsc = comp->lvaGetDesc(lclNum);
4893+
comp->lvaSetStruct(lclNum, layout, false);
4894+
comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));
4895+
4896+
for (GenTreeFieldList::Use& use : fieldList->Uses())
4897+
{
4898+
GenTree* store = comp->gtNewStoreLclFldNode(lclNum, use.GetType(), use.GetOffset(), use.GetNode());
4899+
BlockRange().InsertAfter(use.GetNode(), store);
4900+
LowerNode(store);
4901+
}
4902+
4903+
return lclNum;
4904+
}
4905+
4906+
//----------------------------------------------------------------------------------------------
4907+
// LowerArgFieldList:
4908+
// Lower an argument FIELD_LIST node.
4909+
//
4910+
// Arguments:
4911+
// arg - The argument
4912+
// fieldList - The FIELD_LIST node
4913+
//
4914+
void Lowering::LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList)
4915+
{
4916+
assert(!arg->AbiInfo.HasAnyStackSegment());
4917+
4918+
auto getRegInfo = [=](unsigned regIndex) {
4919+
const ABIPassingSegment& seg = arg->AbiInfo.Segment(regIndex);
4920+
return LowerFieldListRegisterInfo(seg.Offset, seg.GetRegisterType());
4921+
};
4922+
4923+
bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo);
4924+
if (!isCompatible)
4925+
{
4926+
ClassLayout* layout = comp->typGetObjLayout(arg->GetSignatureClassHandle());
4927+
unsigned lclNum = StoreFieldListToNewLocal(layout, fieldList);
4928+
fieldList->Uses().Clear();
4929+
for (const ABIPassingSegment& seg : arg->AbiInfo.Segments())
4930+
{
4931+
GenTreeLclFld* fld = comp->gtNewLclFldNode(lclNum, seg.GetRegisterType(layout), seg.Offset);
4932+
fieldList->AddFieldLIR(comp, fld, seg.Offset, fld->TypeGet());
4933+
BlockRange().InsertBefore(fieldList, fld);
4934+
}
4935+
}
4936+
else
4937+
{
4938+
LowerFieldListToFieldListOfRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo);
4939+
}
4940+
4941+
GenTreeFieldList::Use* field = fieldList->Uses().GetHead();
4942+
for (const ABIPassingSegment& seg : arg->AbiInfo.Segments())
4943+
{
4944+
assert((field != nullptr) && "Ran out of fields while inserting PUTARG_REG");
4945+
InsertPutArgReg(&field->NodeRef(), seg);
4946+
field = field->GetNext();
4947+
}
4948+
4949+
assert((field == nullptr) && "Missed fields while inserting PUTARG_REG");
4950+
4951+
arg->NodeRef() = fieldList->SoleFieldOrThis();
4952+
if (arg->GetNode() != fieldList)
4953+
{
4954+
BlockRange().Remove(fieldList);
4955+
}
48634956
}
48644957

48654958
//----------------------------------------------------------------------------------------------
@@ -4874,21 +4967,29 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
48744967
// True if the fields of the FIELD_LIST are all direct insertions into the
48754968
// return registers.
48764969
//
4877-
bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList)
4970+
template <typename GetRegisterInfoFunc>
4971+
bool Lowering::IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList,
4972+
unsigned numRegs,
4973+
GetRegisterInfoFunc getRegInfo)
48784974
{
4879-
JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList));
4880-
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
4881-
unsigned numRetRegs = retDesc.GetReturnRegCount();
4975+
JITDUMP("Checking if field list [%06u] is compatible with registers: ", Compiler::dspTreeID(fieldList));
48824976

48834977
GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
4884-
for (unsigned i = 0; i < numRetRegs; i++)
4978+
for (unsigned i = 0; i < numRegs; i++)
48854979
{
4886-
unsigned regStart = retDesc.GetReturnFieldOffset(i);
4887-
var_types regType = retDesc.GetReturnRegType(i);
4888-
unsigned regEnd = regStart + genTypeSize(regType);
4980+
LowerFieldListRegisterInfo regInfo = getRegInfo(i);
4981+
unsigned regStart = regInfo.Offset;
4982+
var_types regType = regInfo.RegType;
4983+
unsigned regEnd = regStart + genTypeSize(regType);
4984+
4985+
if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType))
4986+
{
4987+
// Allow tail end to pass undefined bits into the register
4988+
regEnd = regStart + REGSIZE_BYTES;
4989+
}
48894990

48904991
// TODO-CQ: Could just create a 0 for this.
4891-
if (use == nullptr)
4992+
if ((use == nullptr) || (use->GetOffset() >= regEnd))
48924993
{
48934994
JITDUMP("it is not; register %u has no corresponding field\n", i);
48944995
return false;
@@ -4949,19 +5050,26 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList)
49495050
// Arguments:
49505051
// fieldList - The field list
49515052
//
4952-
void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList)
5053+
template <typename GetRegisterInfoFunc>
5054+
void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList,
5055+
unsigned numRegs,
5056+
GetRegisterInfoFunc getRegInfo)
49535057
{
4954-
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
4955-
unsigned numRegs = retDesc.GetReturnRegCount();
4956-
49575058
GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
49585059
assert(fieldList->Uses().IsSorted());
49595060

49605061
for (unsigned i = 0; i < numRegs; i++)
49615062
{
4962-
unsigned regStart = retDesc.GetReturnFieldOffset(i);
4963-
var_types regType = genActualType(retDesc.GetReturnRegType(i));
4964-
unsigned regEnd = regStart + genTypeSize(regType);
5063+
LowerFieldListRegisterInfo regInfo = getRegInfo(i);
5064+
unsigned regStart = regInfo.Offset;
5065+
var_types regType = regInfo.RegType;
5066+
unsigned regEnd = regStart + genTypeSize(regType);
5067+
5068+
if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType))
5069+
{
5070+
// Allow tail end to pass undefined bits into the register
5071+
regEnd = regStart + REGSIZE_BYTES;
5072+
}
49655073

49665074
GenTreeFieldList::Use* regEntry = use;
49675075

@@ -5001,7 +5109,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList)
50015109
}
50025110

50035111
// If this is a float -> int insertion, then we need the bitcast now.
5004-
if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regType))
5112+
if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regInfo.RegType))
50055113
{
50065114
assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8));
50075115
var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG;

src/coreclr/jit/lower.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,14 @@ class Lowering final : public Phase
187187
GenTree* LowerAsyncContinuation(GenTree* asyncCont);
188188
void LowerReturnSuspend(GenTree* retSuspend);
189189
void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList);
190-
bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList);
191-
void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList);
192-
void LowerCallStruct(GenTreeCall* call);
193-
void LowerStoreSingleRegCallStruct(GenTreeBlk* store);
190+
unsigned StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList);
191+
void LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList);
192+
template <typename GetRegisterInfoFunc>
193+
bool IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func);
194+
template <typename GetRegisterInfoFunc>
195+
void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func);
196+
void LowerCallStruct(GenTreeCall* call);
197+
void LowerStoreSingleRegCallStruct(GenTreeBlk* store);
194198
#if !defined(WINDOWS_AMD64_ABI)
195199
GenTreeLclVar* SpillStructCallResult(GenTreeCall* call) const;
196200
#endif // WINDOWS_AMD64_ABI

0 commit comments

Comments
 (0)