Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid calling GC write barrier for byrefs [#89064 + more HasGCByRef logic] #89116

Closed
wants to merge 15 commits into from
Closed
11 changes: 3 additions & 8 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,8 +821,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -852,7 +850,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
unsigned slots = layout->GetSlotCount();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
for (unsigned i = 0; i < slots; ++i)
{
Expand All @@ -866,12 +864,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
Expand All @@ -881,11 +877,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}

++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3582,8 +3582,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -3627,7 +3625,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitter* emit = GetEmitter();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use a ldp/stp sequence
Expand Down Expand Up @@ -3656,15 +3654,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
Expand All @@ -3684,11 +3680,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2746,8 +2746,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -2794,7 +2792,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use two load/store sequence
Expand Down Expand Up @@ -2839,15 +2837,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two load/store
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two load/store
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2883,11 +2879,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,8 +2382,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -2430,7 +2428,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use two ld/sd sequence
Expand Down Expand Up @@ -2475,15 +2473,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2519,11 +2515,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
28 changes: 11 additions & 17 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4091,7 +4091,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

// If the GenTree node has data about GC pointers, this means we're dealing
// with CpObj, so this requires special logic.
Expand Down Expand Up @@ -4137,10 +4136,11 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());

unsigned slots = cpObjNode->GetLayout()->GetSlotCount();
ClassLayout* layout = cpObjNode->GetLayout();
unsigned slots = layout->GetSlotCount();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
Expand All @@ -4164,32 +4164,29 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
ClassLayout* layout = cpObjNode->GetLayout();
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
unsigned nonGcSlotCount = 0;
unsigned nonRefSlotCount = 0;

do
{
nonGcSlotCount++;
nonRefSlotCount++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));
} while ((i < slots) && !layout->IsGCRef(i));

// If we have a very small contiguous non-gc region, it's better just to
// emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
while (nonRefSlotCount > 0)
{
instGen(INS_movsp);
nonGcSlotCount--;
nonRefSlotCount--;
}
}
else
Expand All @@ -4198,19 +4195,16 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
// rep movsp (alias for movsd/movsq for x86/x64)
assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);

GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonRefSlotCount);
instGen(INS_r_movsp);
}
}
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
}

assert(gcPtrCount == 0);
}

// Clear the gcInfo for RSI and RDI.
Expand Down
20 changes: 10 additions & 10 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,37 +422,37 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the
// Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
// the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q).
unsigned nonGCSlots = 0;
unsigned nonRefSlots = 0;

if (dstAddr->OperIs(GT_LCL_ADDR))
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the destination is on the stack then no write barriers are needed.
nonGCSlots = layout->GetSlotCount();
nonRefSlots = layout->GetSlotCount();
}
else
{
// Otherwise a write barrier is needed for every GC pointer in the layout
// so we need to check if there's a long enough sequence of non-GC slots.
// Otherwise a write barrier is needed for every TYP_REF pointer in the layout
// so we need to check if there's a long enough sequence of non-TYP_REF slots.
unsigned slots = layout->GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (layout->IsGCPtr(i))
if (layout->IsGCRef(i))
{
nonGCSlots = 0;
nonRefSlots = 0;
}
else
{
nonGCSlots++;
nonRefSlots++;

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
break;
}
}
}
}

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr;
}
Expand Down
Loading