diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index e8ebf46272fc4..211cef2bee81d 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -821,8 +821,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) sourceIsLocal = true; } - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR); - #ifdef DEBUG assert(!dstAddr->isContained()); @@ -852,7 +850,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) unsigned slots = layout->GetSlotCount(); // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) + if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { for (unsigned i = 0; i < slots; ++i) { @@ -866,12 +864,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { - unsigned gcPtrCount = layout->GetGCPtrCount(); - unsigned i = 0; while (i < slots) { - if (!layout->IsGCPtr(i)) + if (!layout->IsGCRef(i)) { emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC); @@ -881,11 +877,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) else { genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; } + ++i; } - assert(gcPtrCount == 0); } if (cpObjNode->IsVolatile()) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f99602d21d953..525ee061b3e5d 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3582,8 +3582,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) sourceIsLocal = true; } - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR); - #ifdef DEBUG assert(!dstAddr->isContained()); @@ -3627,7 +3625,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) emitter* emit = GetEmitter(); // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) + if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { unsigned i = 0; // Check if two or more remaining slots and use a ldp/stp sequence @@ -3656,15 +3654,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { - unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); - unsigned i = 0; while (i < slots) { - if (!layout->IsGCPtr(i)) + if (!layout->IsGCRef(i)) { - // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp - if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + // Check if the next slot's type is also non-ref and use ldp/stp + if ((i + 1 < slots) && !layout->IsGCRef(i + 1)) { emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX); @@ -3684,11 +3680,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) { // In the case of a GC-Pointer we'll call the ByRef write barrier helper genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; } ++i; } - assert(gcPtrCount == 0); } if (cpObjNode->IsVolatile()) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 6e649a1176072..44cee0e8d368d 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -2746,8 +2746,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) sourceIsLocal = true; } - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR); - #ifdef DEBUG assert(!dstAddr->isContained()); @@ -2794,7 +2792,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet()); // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) + if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { unsigned i = 0; // Check if two or more remaining slots and use two load/store sequence @@ -2839,15 +2837,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { - unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); - unsigned i = 0; while (i < slots) { - if (!layout->IsGCPtr(i)) + if (!layout->IsGCRef(i)) { - // Check if the next slot's type is also TYP_GC_NONE and use two load/store - if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + // Check if the next slot's type is also non-ref and use two load/store + if ((i + 1 < slots) && !layout->IsGCRef(i + 1)) { if ((i + 2) == slots) { @@ -2883,11 +2879,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) { // In the case of a GC-Pointer we'll call the ByRef write barrier helper genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; } ++i; } - assert(gcPtrCount == 0); } if (cpObjNode->IsVolatile()) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 015a33f82c2c3..fa862e6cf56aa 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -2382,8 +2382,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) sourceIsLocal = true; } - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR); - #ifdef DEBUG assert(!dstAddr->isContained()); @@ -2430,7 +2428,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet()); // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) + if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { unsigned i = 0; // Check if two or more remaining slots and use two ld/sd sequence @@ -2475,15 +2473,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { - unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount(); - unsigned i = 0; while (i < slots) { - if (!layout->IsGCPtr(i)) + if (!layout->IsGCRef(i)) { - // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd - if ((i + 1 < slots) && !layout->IsGCPtr(i + 1)) + // Check if the next slot's type is also non-ref and use two ld/sd + if ((i + 1 < slots) && !layout->IsGCRef(i + 1)) { if ((i + 2) == slots) { @@ -2519,11 +2515,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) { // In the case of a GC-Pointer we'll call the ByRef write barrier helper genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; } ++i; } - assert(gcPtrCount == 0); } if (cpObjNode->IsVolatile()) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 23c79087d4f8c..36039d2393739 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4091,7 +4091,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) GenTree* dstAddr = cpObjNode->Addr(); GenTree* source = cpObjNode->Data(); var_types srcAddrType = TYP_BYREF; - bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR); // If the GenTree node has data about GC pointers, this means we're dealing // with CpObj, so this requires special logic. @@ -4137,10 +4136,11 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); - unsigned slots = cpObjNode->GetLayout()->GetSlotCount(); + ClassLayout* layout = cpObjNode->GetLayout(); + unsigned slots = layout->GetSlotCount(); // If we can prove it's on the stack we don't need to use the write barrier. - if (dstOnStack) + if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) { @@ -4164,32 +4164,29 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { - ClassLayout* layout = cpObjNode->GetLayout(); - unsigned gcPtrCount = layout->GetGCPtrCount(); - unsigned i = 0; while (i < slots) { - if (!layout->IsGCPtr(i)) + if (!layout->IsGCRef(i)) { // Let's see if we can use rep movsp instead of a sequence of movsp instructions // to save cycles and code size. - unsigned nonGcSlotCount = 0; + unsigned nonRefSlotCount = 0; do { - nonGcSlotCount++; + nonRefSlotCount++; i++; - } while ((i < slots) && !layout->IsGCPtr(i)); + } while ((i < slots) && !layout->IsGCRef(i)); // If we have a very small contiguous non-gc region, it's better just to // emit a sequence of movsp instructions - if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) + if (nonRefSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { - while (nonGcSlotCount > 0) + while (nonRefSlotCount > 0) { instGen(INS_movsp); - nonGcSlotCount--; + nonRefSlotCount--; } } else @@ -4198,19 +4195,16 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) // rep movsp (alias for movsd/movsq for x86/x64) assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); + GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonRefSlotCount); instGen(INS_r_movsp); } } else { genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - gcPtrCount--; i++; } } - - assert(gcPtrCount == 0); } // Clear the gcInfo for RSI and RDI. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8c22396542545..240280e3c5879 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -422,29 +422,29 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) // we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where // the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q). - unsigned nonGCSlots = 0; + unsigned nonRefSlots = 0; - if (dstAddr->OperIs(GT_LCL_ADDR)) + if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef()) { // If the destination is on the stack then no write barriers are needed. - nonGCSlots = layout->GetSlotCount(); + nonRefSlots = layout->GetSlotCount(); } else { - // Otherwise a write barrier is needed for every GC pointer in the layout - // so we need to check if there's a long enough sequence of non-GC slots. + // Otherwise a write barrier is needed for every TYP_REF pointer in the layout + // so we need to check if there's a long enough sequence of non-TYP_REF slots. unsigned slots = layout->GetSlotCount(); for (unsigned i = 0; i < slots; i++) { - if (layout->IsGCPtr(i)) + if (layout->IsGCRef(i)) { - nonGCSlots = 0; + nonRefSlots = 0; } else { - nonGCSlots++; + nonRefSlots++; - if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) + if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT) { break; } @@ -452,7 +452,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } } - if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT) + if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr; }