diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index 93df3bdfed9fb0..a1c1386392e0c0 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -51,7 +51,7 @@ public static long Decrement(ref long location) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -130,7 +130,7 @@ public static T Exchange([NotNullIfNotNull(nameof(value))] ref T location1, T [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -229,7 +229,7 @@ public static long Add(ref long location1, long value) => [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ExchangeAdd(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return ExchangeAdd(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 82deccb99ede2d..ee284721710881 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -61,6 +61,11 @@ bool CodeGen::genInstrWithConstant( { case INS_add: case INS_sub: + if (imm < 0) + { + imm = -imm; + ins = (ins == INS_add) ? INS_sub : INS_add; + } immFitsInIns = validImmForInstr(ins, (target_ssize_t)imm, flags); break; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4ecbcc80a1ce75..2316551ca60840 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3820,327 +3820,6 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } -//------------------------------------------------------------------------ -// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XORR or GT_XCHG node. -// -// Arguments: -// treeNode - the GT_XADD/XAND/XORR/XCHG node -// -void CodeGen::genLockedInstructions(GenTreeOp* treeNode) -{ - GenTree* data = treeNode->AsOp()->gtOp2; - GenTree* addr = treeNode->AsOp()->gtOp1; - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - - assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); - - emitAttr dataSize = emitActualTypeSize(data); - - if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - assert(!data->isContainedIntOrIImmed()); - - switch (treeNode->gtOper) - { - case GT_XORR: - GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - case GT_XAND: - { - // Grab a temp reg to perform `MVN` for dataReg first. - regNumber tempReg = internalRegisters.GetSingle(treeNode); - GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); - GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - } - case GT_XCHG: - { - instruction ins = INS_swpal; - if (varTypeIsByte(treeNode->TypeGet())) - { - ins = INS_swpalb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - ins = INS_swpalh; - } - GetEmitter()->emitIns_R_R_R(ins, dataSize, dataReg, targetReg, addrReg); - break; - } - case GT_XADD: - GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - default: - assert(!"Unexpected treeNode->gtOper"); - } - } - else - { - // These are imported normally if Atomics aren't supported. - assert(!treeNode->OperIs(GT_XORR, GT_XAND)); - - regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber storeDataReg = - (treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - - noway_assert(addrReg != loadReg); - noway_assert(dataReg != loadReg); - - noway_assert(addrReg != storeDataReg); - noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); - - assert(addr->isUsedFromReg()); - noway_assert(exResultReg != REG_NA); - noway_assert(exResultReg != targetReg); - noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); - - // Store exclusive unpredictable cases must be avoided - noway_assert(exResultReg != storeDataReg); - noway_assert(exResultReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldxr loadReg, [addrReg] - // add storeDataReg, loadReg, dataReg # Only for GT_XADD - // # GT_XCHG storeDataReg === dataReg - // stxr exResult, storeDataReg, [addrReg] - // cbnz exResult, retry - // dmb ish - - BasicBlock* labelRetry = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - instruction insLd = INS_ldaxr; - instruction insSt = INS_stlxr; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldaxrb; - insSt = INS_stlxrb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldaxrh; - insSt = INS_stlxrh; - } - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); - - switch (treeNode->OperGet()) - { - case GT_XADD: - if (data->isContainedIntOrIImmed()) - { - // Even though INS_add is specified here, the encoder will choose either - // an INS_add or an INS_sub and encode the immediate as a positive value - genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), - REG_NA); - } - else - { - GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); - } - break; - case GT_XCHG: - assert(!data->isContained()); - storeDataReg = dataReg; - break; - default: - unreached(); - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, storeDataReg, addrReg); - - GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - } - - if (targetReg != REG_NA) - { - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); - } -} - -//------------------------------------------------------------------------ -// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. -// -// Arguments: -// tree - the GT_CMPXCHG node -// -void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) -{ - assert(treeNode->OperIs(GT_CMPXCHG)); - - GenTree* addr = treeNode->Addr(); // arg1 - GenTree* data = treeNode->Data(); // arg2 - GenTree* comparand = treeNode->Comparand(); // arg3 - - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - regNumber comparandReg = comparand->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - genConsumeRegs(comparand); - - emitAttr dataSize = emitActualTypeSize(data); - - if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // casal use the comparand as the target reg - GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true); - - // Catch case we destroyed data or address before use - noway_assert((addrReg != targetReg) || (targetReg == comparandReg)); - noway_assert((dataReg != targetReg) || (targetReg == comparandReg)); - - instruction ins = INS_casal; - if (varTypeIsByte(treeNode->TypeGet())) - { - ins = INS_casalb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - ins = INS_casalh; - } - GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, dataReg, addrReg); - } - else - { - regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - noway_assert(dataReg != targetReg); - noway_assert(comparandReg != targetReg); - noway_assert(addrReg != dataReg); - noway_assert(targetReg != REG_NA); - noway_assert(exResultReg != REG_NA); - noway_assert(exResultReg != targetReg); - - assert(addr->isUsedFromReg()); - assert(data->isUsedFromReg()); - assert(!comparand->isUsedFromMemory()); - - // Store exclusive unpredictable cases must be avoided - noway_assert(exResultReg != dataReg); - noway_assert(exResultReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldxr targetReg, [addrReg] - // cmp targetReg, comparandReg - // bne compareFail - // stxr exResult, dataReg, [addrReg] - // cbnz exResult, retry - // compareFail: - // dmb ish - - BasicBlock* labelRetry = genCreateTempLabel(); - BasicBlock* labelCompareFail = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - instruction insLd = INS_ldaxr; - instruction insSt = INS_stlxr; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldaxrb; - insSt = INS_stlxrb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldaxrh; - insSt = INS_stlxrh; - } - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); - - if (comparand->isContainedIntOrIImmed()) - { - if (comparand->IsIntegralConst(0)) - { - GetEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); - } - else - { - GetEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, - comparand->AsIntConCommon()->IconValue()); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - } - else - { - GetEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); - - GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); - - genDefineTempLabel(labelCompareFail); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - } - - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); -} - instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins = INS_BREAKPOINT; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 6684939017646e..ab6ac93f9cff93 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -435,9 +435,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) } #ifdef TARGET_ARM64 - case GT_XCHG: case GT_XORR: case GT_XAND: +#endif // TARGET_ARM64 + case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode->AsOp()); break; @@ -445,7 +446,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_CMPXCHG: genCodeForCmpXchg(treeNode->AsCmpXchg()); break; -#endif // TARGET_ARM64 #ifdef SWIFT_SUPPORT case GT_SWIFT_ERROR: @@ -757,6 +757,424 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) genProduceReg(treeNode); } +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XORR or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XAND/XORR/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + GenTree* data = treeNode->AsOp()->gtOp2; + GenTree* addr = treeNode->AsOp()->gtOp1; + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + + assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); + + emitAttr dataSize = emitActualTypeSize(data); + +#ifdef TARGET_ARM64 + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + assert(!data->isContainedIntOrIImmed()); + + switch (treeNode->gtOper) + { + case GT_XORR: + GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + case GT_XAND: + { + // Grab a temp reg to perform `MVN` for dataReg first. + regNumber tempReg = internalRegisters.GetSingle(treeNode); + GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + } + case GT_XCHG: + { + instruction ins = INS_swpal; + if (varTypeIsByte(treeNode->TypeGet())) + { + ins = INS_swpalb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + ins = INS_swpalh; + } + GetEmitter()->emitIns_R_R_R(ins, dataSize, dataReg, targetReg, addrReg); + break; + } + case GT_XADD: + GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + default: + assert(!"Unexpected treeNode->gtOper"); + } + } + else +#endif // TARGET_ARM64 + { + // These are imported normally if Atomics aren't supported. + assert(!treeNode->OperIs(GT_XORR, GT_XAND)); + + regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber storeDataReg = + (treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + + noway_assert(addrReg != loadReg); + noway_assert(dataReg != loadReg); + + noway_assert(addrReg != storeDataReg); + noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); + + assert(addr->isUsedFromReg()); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != storeDataReg); + noway_assert(exResultReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this on arm64: + // retry: + // ldxr loadReg, [addrReg] + // add storeDataReg, loadReg, dataReg # Only for GT_XADD + // # GT_XCHG storeDataReg === dataReg + // stxr exResult, storeDataReg, [addrReg] + // cbnz exResult, retry + // dmb ish + + // Emit code like this on arm32: + // dmb ish + // retry: + // ldrex loadReg, [addrReg] + // add storeDataReg, loadReg, dataReg # Only for GT_XADD + // # GT_XCHG storeDataReg === dataReg + // strex tempReg, storeDataReg, [addrReg] + // cmp tempReg, 0 + // bne retry + // dmb ish + +#ifndef TARGET_ARM64 + instGen_MemoryBarrier(); +#endif + + BasicBlock* labelRetry = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd, insSt; +#ifdef TARGET_ARM64 + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldaxrb; + insSt = INS_stlxrb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldaxrh; + insSt = INS_stlxrh; + } + else + { + insLd = INS_ldaxr; + insSt = INS_stlxr; + } +#else + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + else + { + insLd = INS_ldrex; + insSt = INS_strex; + } +#endif + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); + + switch (treeNode->OperGet()) + { + case GT_XADD: + if (data->isContainedIntOrIImmed()) + { + // Even though INS_add is specified here, the encoder will choose either + // an INS_add or an INS_sub and encode the immediate as a positive value + genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), +#ifdef TARGET_ARM + INS_FLAGS_DONT_CARE, +#endif + exResultReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); + } + break; + case GT_XCHG: + assert(!data->isContained()); + storeDataReg = dataReg; + break; + default: + unreached(); + } + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, storeDataReg, addrReg); + +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); +#else + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); +#endif // TARGET_ARM64 + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + } + + if (targetReg != REG_NA) + { + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); + } +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + assert(treeNode->OperIs(GT_CMPXCHG)); + + GenTree* addr = treeNode->Addr(); // arg1 + GenTree* data = treeNode->Data(); // arg2 + GenTree* comparand = treeNode->Comparand(); // arg3 + + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + regNumber comparandReg = comparand->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + genConsumeRegs(comparand); + + emitAttr dataSize = emitActualTypeSize(data); + +#ifdef TARGET_ARM64 + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + // casal use the comparand as the target reg + GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true); + + // Catch case we destroyed data or address before use + noway_assert((addrReg != targetReg) || (targetReg == comparandReg)); + noway_assert((dataReg != targetReg) || (targetReg == comparandReg)); + + instruction ins = INS_casal; + if (varTypeIsByte(treeNode->TypeGet())) + { + ins = INS_casalb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + ins = INS_casalh; + } + GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, dataReg, addrReg); + } + else +#endif // TARGET_ARM64 + { + regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + noway_assert(dataReg != targetReg); + noway_assert(comparandReg != targetReg); + noway_assert(addrReg != dataReg); + noway_assert(targetReg != REG_NA); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert(!comparand->isUsedFromMemory()); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != dataReg); + noway_assert(exResultReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this on arm64: + // retry: + // ldxr targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // stxr exResult, dataReg, [addrReg] + // cbnz exResult, retry + // compareFail: + // dmb ish + + // Emit code like this on arm32: + // dmb ish + // retry: + // ldrex targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // strex exResult, dataReg, [addrReg] + // cmp exResult, 0 + // bne retry + // compareFail: + // dmb ish + +#ifndef TARGET_ARM64 + instGen_MemoryBarrier(); +#endif + + BasicBlock* labelRetry = genCreateTempLabel(); + BasicBlock* labelCompareFail = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd, insSt; +#ifdef TARGET_ARM64 + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldaxrb; + insSt = INS_stlxrb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldaxrh; + insSt = INS_stlxrh; + } + else + { + insLd = INS_ldaxr; + insSt = INS_stlxr; + } +#else + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + else + { + insLd = INS_ldrex; + insSt = INS_strex; + } +#endif + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); + + if (comparand->isContainedIntOrIImmed()) + { + if (comparand->IsIntegralConst(0) +#ifdef TARGET_ARM + && emitter::isLowRegister(targetReg) +#endif + ) + { + GetEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); + } + else + { + ssize_t value = comparand->AsIntConCommon()->IconValue(); + assert(value <= TARGET_SSIZE_MAX && value >= TARGET_SSIZE_MIN); + GetEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, (target_ssize_t)value); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + } + else + { + GetEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); + +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); +#else + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); +#endif // TARGET_ARM64 + + genDefineTempLabel(labelCompareFail); + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + } + + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); +} + //--------------------------------------------------------------------- // genPutArgStk - generate code for a GT_PUTARG_STK node // diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index fd0a27f61818e1..1d828c38b6a0ce 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -425,6 +425,26 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(emitGetInsSC(id) < 0x100); break; + case IF_T2_O1: // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(isGeneralRegister(id->idReg4())); + break; + + case IF_T2_O2: // T2_O2 ............nnnn tttt........dddd R1 R2 R3 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_T2_O3: // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(emitGetInsSC(id) < 0x100); + break; + case IF_T1_K: // T1_K ....cccciiiiiiii Branch imm8, cond4 case IF_T1_M: // T1_M .....iiiiiiiiiii Branch imm11 case IF_T2_J1: // T2_J1 .....Scccciiiiii ..j.jiiiiiiiiiii Branch imm20, cond4 @@ -554,6 +574,9 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_T2_H1: case IF_T2_K1: case IF_T2_K4: + case IF_T2_O1: + case IF_T2_O2: + case IF_T2_O3: // Some formats with "destination" or "target" registers are actually used for store instructions, for the // "source" value written to memory. // Similarly, PUSH has a target register, indicating the start of the set of registers to push. POP @@ -2441,21 +2464,19 @@ void emitter::emitIns_R_R( sf = INS_FLAGS_NOT_SET; break; - case INS_ldrexb: - case INS_strexb: - assert(size == EA_4BYTE); - assert(insDoesNotSetFlags(flags)); - fmt = IF_T2_E1; - sf = INS_FLAGS_NOT_SET; - break; + case INS_ldrex: + fmt = IF_T2_H1; + goto COMMON_THUMB2_EX; + case INS_ldrexb: case INS_ldrexh: - case INS_strexh: + fmt = IF_T2_E1; + COMMON_THUMB2_EX: assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); - fmt = IF_T2_E1; - sf = INS_FLAGS_NOT_SET; + sf = INS_FLAGS_NOT_SET; break; + default: #ifdef DEBUG printf("did not expect instruction %s\n", codeGen->genInsName(ins)); @@ -2464,7 +2485,8 @@ void emitter::emitIns_R_R( } assert((fmt == IF_T1_D0) || (fmt == IF_T1_E) || (fmt == IF_T2_C3) || (fmt == IF_T2_C9) || (fmt == IF_T2_C10) || - (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_E1)); + (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_H1) || + (fmt == IF_T2_E1)); assert(sf != INS_FLAGS_DONT_CARE); @@ -3034,14 +3056,13 @@ void emitter::emitIns_R_R_I(instruction ins, break; case INS_ldrex: - case INS_strex: assert(insOptsNone(opt)); assert(insDoesNotSetFlags(flags)); sf = INS_FLAGS_NOT_SET; if ((imm & 0x03fc) == imm) { - fmt = IF_T2_H0; + fmt = IF_T2_H1; } else { @@ -3295,17 +3316,27 @@ void emitter::emitIns_R_R_R(instruction ins, break; case INS_ldrexd: - case INS_strexd: - assert(insDoesNotSetFlags(flags)); fmt = IF_T2_G1; - sf = INS_FLAGS_NOT_SET; + goto COMMON_THUMB2_EX; + + case INS_strex: + fmt = IF_T2_O3; + goto COMMON_THUMB2_EX; + + case INS_strexb: + case INS_strexh: + fmt = IF_T2_O2; + COMMON_THUMB2_EX: + assert(size == EA_4BYTE); + assert(insDoesNotSetFlags(flags)); + sf = INS_FLAGS_NOT_SET; break; default: unreached(); } assert((fmt == IF_T1_H) || (fmt == IF_T2_C4) || (fmt == IF_T2_C5) || (fmt == IF_T2_VFP3) || (fmt == IF_T2_VMOVD) || - (fmt == IF_T2_G1)); + (fmt == IF_T2_G1) || (fmt == IF_T2_O2) || (fmt == IF_T2_O3)); assert(sf != INS_FLAGS_DONT_CARE); instrDesc* id = emitNewInstr(attr); @@ -3549,10 +3580,25 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_strex: + assert(insOptsNone(opt)); + assert(insDoesNotSetFlags(flags)); + sf = INS_FLAGS_NOT_SET; + + if ((imm & 0x03fc) == imm) + { + fmt = IF_T2_O3; + } + else + { + assert(!"Instruction cannot be encoded"); + } + break; + default: unreached(); } - assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0)); + assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0) || (fmt == IF_T2_O3)); assert(sf != INS_FLAGS_DONT_CARE); // 3-reg ops can't use the small instrdesc @@ -3586,7 +3632,6 @@ void emitter::emitIns_R_R_R_R( /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_smull: case INS_umull: case INS_smlal: @@ -3598,10 +3643,13 @@ void emitter::emitIns_R_R_R_R( case INS_mls: fmt = IF_T2_F2; break; + case INS_strexd: + fmt = IF_T2_O1; + break; default: unreached(); } - assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2)); + assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2) || (fmt == IF_T2_O1)); assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. assert(reg2 != REG_PC); @@ -6276,6 +6324,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) emitHandlePCRelativeMov32((void*)(dst - 8), addr); break; + case IF_T2_O1: // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + code |= insEncodeRegT2_M(id->idReg1()); + code |= insEncodeRegT2_D(id->idReg2()); + code |= insEncodeRegT2_T(id->idReg3()); + code |= insEncodeRegT2_N(id->idReg4()); + dst += emitOutput_Thumb2Instr(dst, code); + break; + + case IF_T2_O2: // T2_O2 ............nnnn tttt........dddd R1 R2 R3 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + code |= insEncodeRegT2_M(id->idReg1()); + code |= insEncodeRegT2_T(id->idReg2()); + code |= insEncodeRegT2_N(id->idReg3()); + dst += emitOutput_Thumb2Instr(dst, code); + break; + + case IF_T2_O3: // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert((imm & 0x00ff) == imm); + code |= insEncodeRegT2_D(id->idReg1()); + code |= insEncodeRegT2_T(id->idReg2()); + code |= insEncodeRegT2_N(id->idReg3()); + code |= imm; + dst += emitOutput_Thumb2Instr(dst, code); + break; + case IF_T2_VFP3: // these are the binary operators // d = n - m @@ -7425,6 +7504,28 @@ void emitter::emitDispInsHelp( } break; + case IF_T2_O1: + case IF_T2_O2: + case IF_T2_O3: + emitDispReg(id->idReg1(), attr, true); + emitDispReg(id->idReg2(), attr, true); + if (fmt == IF_T2_O1) + { + emitDispReg(id->idReg3(), attr, true); + emitDispAddrR(id->idReg4(), attr); + } + else if (fmt == IF_T2_O2) + { + emitDispAddrR(id->idReg3(), attr); + } + else + { + assert(fmt == IF_T2_O3); + imm = emitGetInsSC(id); + emitDispAddrRI(id->idReg3(), imm, attr); + } + break; + case IF_T1_J2: emitDispReg(id->idReg1(), attr, true); imm = emitGetInsSC(id); diff --git a/src/coreclr/jit/emitfmtsarm.h b/src/coreclr/jit/emitfmtsarm.h index a5c97364e64da0..75089af1203b81 100644 --- a/src/coreclr/jit/emitfmtsarm.h +++ b/src/coreclr/jit/emitfmtsarm.h @@ -136,6 +136,9 @@ IF_DEF(T2_N, IS_NONE, NONE) // T2_N .....i......iiii IF_DEF(T2_N1, IS_NONE, JMP) // T2_N1 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a code address IF_DEF(T2_N2, IS_NONE, NONE) // T2_N2 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a data address IF_DEF(T2_N3, IS_NONE, NONE) // T2_N3 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt (relocatable imm) +IF_DEF(T2_O1, IS_NONE, NONE) // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 +IF_DEF(T2_O2, IS_NONE, NONE) // T2_O2 ............nnnn tttt........dddd R1 R2 R3 +IF_DEF(T2_O3, IS_NONE, NONE) // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 IF_DEF(T2_VLDST, IS_NONE, NONE) // T2_VLDST 11101101UD0Lnnnn dddd101Ziiiiiiii D1 R2 imm(+-1020) IF_DEF(T2_VFP2, IS_NONE, NONE) // T2_VFP2 111011101D110--- dddd101Z--M0mmmm D1 D2 IF_DEF(T2_VFP3, IS_NONE, NONE) // T2_VFP3 11101110-D--nnnn dddd101ZN-M0mmmm D1 D2 D3 diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index c640105ec3a065..d96b6264d26e3b 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3904,8 +3904,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, } #endif // defined(TARGET_ARM64) || defined(TARGET_RISCV64) -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) - // TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic +#if defined(TARGET_XARCH) || defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_CompareExchange: { var_types retType = JITtype2varType(sig->retType); @@ -3914,12 +3913,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) if ((retType == TYP_REF) && (impStackTop(1).val->IsIntegralConst(0) || impStackTop(1).val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3960,12 +3959,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) if ((retType == TYP_REF) && (impStackTop().val->IsIntegralConst(0) || impStackTop().val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3991,7 +3990,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, callType, op1, op2); break; } -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) +#endif // defined(TARGET_XARCH) || defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_MemoryBarrier: case NI_System_Threading_Interlocked_ReadMemoryBarrier: diff --git a/src/coreclr/jit/instrsarm.h b/src/coreclr/jit/instrsarm.h index 3a1c871d316f6a..59d272ba3503c9 100644 --- a/src/coreclr/jit/instrsarm.h +++ b/src/coreclr/jit/instrsarm.h @@ -437,14 +437,14 @@ INST1(stmdb, "stmdb", 0,ST, IF_EN2D, 0xE9000000) // Rn{!}, T2_I0 1110100100W0nnnn 0r0rrrrrrrrrrrrr E900 0000 INST1(strd, "strd", 0,ST, IF_T2_G0, 0xE8400000) // Rt,RT,[Rn],+-i8{!}T2_G0 1110100PU1W0nnnn ttttTTTTiiiiiiii E840 0000 -INST1(strex, "strex", 0,ST, IF_T2_H1, 0xE8400F00) - // Rt,[Rn+i8] T2_H1 111010000100nnnn tttt1111iiiiiiii E840 0F00 imm(0-1020) -INST1(strexb, "strexb", 0,ST, IF_T2_E1, 0xE8C00F4F) - // Rt,[Rn] T2_E1 111010001100nnnn tttt111101001111 E8C0 0F4F -INST1(strexd, "strexd", 0,ST, IF_T2_G1, 0xE8C0007F) - // Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT01111111 E8C0 007F -INST1(strexh, "strexh", 0,ST, IF_T2_E1, 0xE8C00F5F) - // Rt,[Rn] T2_E1 111010001100nnnn tttt111101011111 E8C0 0F5F +INST1(strex, "strex", 0,ST, IF_T2_O3, 0xE8400000) + // Rd,Rt,[Rn+i8] T2_H1 111010000100nnnn ttttddddiiiiiiii E840 0F00 imm(0-255) +INST1(strexb, "strexb", 0,ST, IF_T2_O2, 0xE8C00F40) + // Rd,Rt,[Rn] T2_E1 111010001100nnnn tttt11110100dddd E8C0 0F4F +INST1(strexd, "strexd", 0,ST, IF_T2_O1, 0xE8C00070) + // Rd,Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT0111dddd E8C0 007F +INST1(strexh, "strexh", 0,ST, IF_T2_O2, 0xE8C00F50) + // Rd,Rt,[Rn] T2_E1 111010001100nnnn tttt11110101dddd E8C0 0F5F INST1(subw, "subw", 0, 0, IF_T2_M0, 0xF2A00000) // Rd,Rn,+i12 T2_M0 11110i101010nnnn 0iiiddddiiiiiiii F2A0 0000 imm(0-4095) INST1(tbb, "tbb", 0, 0, IF_T2_C9, 0xE8D0F000) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2137874887e70b..d0887c10e0878b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -635,13 +635,15 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->Comparand()); break; +#ifndef TARGET_ARM case GT_XORR: case GT_XAND: +#endif // TARGET_ARM case GT_XADD: CheckImmedAndMakeContained(node, node->AsOp()->gtOp2); break; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 436c770065d3bf..2455586f6328ce 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -72,9 +72,10 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; - emitAttr attr = emitActualTypeSize(childNode->TypeGet()); - emitAttr size = EA_SIZE(attr); -#ifdef TARGET_ARM +#ifdef TARGET_ARM64 + emitAttr attr = emitActualTypeSize(childNode->TypeGet()); + emitAttr size = EA_SIZE(attr); +#else insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif @@ -82,20 +83,30 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const { case GT_ADD: case GT_SUB: -#ifdef TARGET_ARM64 +#ifdef TARGET_ARM + return emitter::emitIns_valid_imm_for_add(immVal, flags); +#else return emitter::emitIns_valid_imm_for_add(immVal, size); +#endif + +#ifdef TARGET_ARM + case GT_XADD: + return emitter::emitIns_valid_imm_for_add(immVal, flags); case GT_CMPXCHG: + return emitter::emitIns_valid_imm_for_cmp(immVal, flags); +#else + case GT_XADD: case GT_LOCKADD: case GT_XORR: case GT_XAND: - case GT_XADD: return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); -#elif defined(TARGET_ARM) - return emitter::emitIns_valid_imm_for_add(immVal, flags); -#endif - break; + case GT_CMPXCHG: + return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) + ? false + : emitter::emitIns_valid_imm_for_cmp(immVal, size); +#endif // TARGET_ARM #ifdef TARGET_ARM64 case GT_EQ: diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5d49673ffc4bee..884dd20eb7d710 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2028,6 +2028,9 @@ class LinearScan : public LinearScanInterface int BuildIndir(GenTreeIndir* indirTree); int BuildGCWriteBarrier(GenTree* tree); int BuildCast(GenTreeCast* cast); +#ifdef TARGET_ARMARCH + int BuildAtomic(GenTree* atomic, int dstCount); +#endif #if defined(TARGET_XARCH) // returns true if the tree can use the read-modify-write memory instruction form diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index a0ed0524b0e4e5..f3bc57042eb4b5 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -570,6 +570,12 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_CMPXCHG: + case GT_XADD: + case GT_XCHG: + srcCount = BuildAtomic(tree, dstCount); + break; + case GT_CALL: srcCount = BuildCall(tree->AsCall()); if (tree->AsCall()->HasMultiRegRetVal()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 34092933aaffe6..dd1a5381d4e574 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -961,101 +961,14 @@ int LinearScan::BuildNode(GenTree* tree) buildInternalRegisterUses(); break; - case GT_CMPXCHG: - { - GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); - srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; - assert(dstCount == 1); - - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // For ARMv8 exclusives requires a single internal register - buildInternalIntRegisterDefForNode(tree); - } - - // For ARMv8 exclusives the lifetime of the addr and data must be extended because - // it may be used used multiple during retries - - // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent - // them being reused as the target register which must be destroyed early - - RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->Addr()); - setDelayFree(locationUse); - RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->Data()); - setDelayFree(valueUse); - if (!cmpXchgNode->Comparand()->isContained()) - { - RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->Comparand()); - - // For ARMv8 exclusives the lifetime of the comparand must be extended because - // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - setDelayFree(comparandUse); - } - } - - // Internals may not collide with target - setInternalRegsDelayFree = true; - buildInternalRegisterUses(); - BuildDef(tree); - } - break; - case GT_LOCKADD: case GT_XORR: case GT_XAND: case GT_XADD: case GT_XCHG: - { - assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1)); - srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; - - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // GT_XCHG requires a single internal register; the others require two. - buildInternalIntRegisterDefForNode(tree); - if (tree->OperGet() != GT_XCHG) - { - buildInternalIntRegisterDefForNode(tree); - } - } - else if (tree->OperIs(GT_XAND)) - { - // for ldclral we need an internal register. - buildInternalIntRegisterDefForNode(tree); - } - - assert(!tree->gtGetOp1()->isContained()); - RefPosition* op1Use = BuildUse(tree->gtGetOp1()); - RefPosition* op2Use = nullptr; - if (!tree->gtGetOp2()->isContained()) - { - op2Use = BuildUse(tree->gtGetOp2()); - } - - // For ARMv8 exclusives the lifetime of the addr and data must be extended because - // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // Internals may not collide with target - if (dstCount == 1) - { - setDelayFree(op1Use); - if (op2Use != nullptr) - { - setDelayFree(op2Use); - } - setInternalRegsDelayFree = true; - } - } - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(tree); - } - } - break; + case GT_CMPXCHG: + srcCount = BuildAtomic(tree, dstCount); + break; #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index a99ef06cc3e578..79eed39eac17fb 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -893,6 +893,134 @@ int LinearScan::BuildCast(GenTreeCast* cast) return srcCount; } +//------------------------------------------------------------------------ +// BuildCast: Set the NodeInfo for a GT_CAST. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildAtomic(GenTree* atomic, int dstCount) +{ + int srcCount; + switch (atomic->OperGet()) + { + case GT_CMPXCHG: + { + GenTreeCmpXchg* cmpXchgNode = atomic->AsCmpXchg(); + srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; + assert(dstCount == 1); + +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + // For legacy exclusives requires a single internal register + buildInternalIntRegisterDefForNode(atomic); + } + + // For legacy exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries + + // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent + // them being reused as the target register which must be destroyed early + + RefPosition* locationUse = BuildUse(atomic->AsCmpXchg()->Addr()); + setDelayFree(locationUse); + RefPosition* valueUse = BuildUse(atomic->AsCmpXchg()->Data()); + setDelayFree(valueUse); + if (!cmpXchgNode->Comparand()->isContained()) + { + RefPosition* comparandUse = BuildUse(atomic->AsCmpXchg()->Comparand()); + + // For legacy exclusives the lifetime of the comparand must be extended because + // it may be used multiple during retries +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + setDelayFree(comparandUse); + } + } + + // Internals may not collide with target + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + BuildDef(atomic); + } + break; + + case GT_LOCKADD: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: + { + assert(dstCount == (atomic->TypeIs(TYP_VOID) ? 0 : 1)); + srcCount = atomic->gtGetOp2()->isContained() ? 1 : 2; + +#ifdef TARGET_ARM64 + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + if (atomic->OperIs(GT_XAND)) + { + // for ldclral we need an internal register. + buildInternalIntRegisterDefForNode(atomic); + } + } + else +#endif + { + buildInternalIntRegisterDefForNode(atomic); + + // GT_XCHG requires a single internal register; the others require two. + if (atomic->OperGet() != GT_XCHG) + { + buildInternalIntRegisterDefForNode(atomic); + } + } + + assert(!atomic->gtGetOp1()->isContained()); + RefPosition* op1Use = BuildUse(atomic->gtGetOp1()); + RefPosition* op2Use = nullptr; + if (!atomic->gtGetOp2()->isContained()) + { + op2Use = BuildUse(atomic->gtGetOp2()); + } + + // For legacy exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + // Internals may not collide with target + if (dstCount == 1) + { + setDelayFree(op1Use); + if (op2Use != nullptr) + { + setDelayFree(op2Use); + } + setInternalRegsDelayFree = true; + } + } + buildInternalRegisterUses(); + if (dstCount == 1) + { + BuildDef(atomic); + } + } + break; + + default: + unreached(); + } + return srcCount; +} + //------------------------------------------------------------------------ // BuildSelect: Build RefPositions for a GT_SELECT node. // diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index d5073aa9ce7f4e..2742c274c1cb61 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -1164,13 +1164,23 @@ C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); #ifdef TARGET_64BIT typedef uint64_t target_size_t; typedef int64_t target_ssize_t; + #define TARGET_SIGN_BIT (1ULL << 63) +#define TARGET_SIZE_MAX UINT64_MAX +#define TARGET_SSIZE_MAX INT64_MAX +#define TARGET_SIZE_MIN UINT64_MIN +#define TARGET_SSIZE_MIN INT64_MIN #else // !TARGET_64BIT typedef unsigned int target_size_t; typedef int target_ssize_t; + #define TARGET_SIGN_BIT (1ULL << 31) +#define TARGET_SIZE_MAX UINT32_MAX +#define TARGET_SSIZE_MAX INT32_MAX +#define TARGET_SIZE_MIN UINT32_MIN +#define TARGET_SSIZE_MIN INT32_MIN #endif // !TARGET_64BIT C_ASSERT(sizeof(target_size_t) == TARGET_POINTER_SIZE); diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs index 09b3609b456997..b3f7c0b2c81280 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs @@ -14,7 +14,7 @@ public static partial class Interlocked [Intrinsic] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -61,7 +61,7 @@ public static T CompareExchange(ref T location1, T value, T comparand) where [Intrinsic] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return Exchange(ref location1, value); // Must expand intrinsic #else int oldValue; diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs index 9ac6aa5110ad81..995e3ae349ee26 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs @@ -21,7 +21,7 @@ public static IntPtr CompareExchange(ref IntPtr location1, IntPtr value, IntPtr [Intrinsic] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs index 6e33ce716fd6a1..4e5a83f8e5a73b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs @@ -80,7 +80,7 @@ public static short Exchange(ref short location1, short value) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe byte Exchange(ref byte location1, byte value) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return Exchange(ref location1, value); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -119,7 +119,7 @@ public static unsafe byte Exchange(ref byte location1, byte value) [CLSCompliant(false)] public static unsafe ushort Exchange(ref ushort location1, ushort value) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return Exchange(ref location1, value); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -258,7 +258,7 @@ public static short CompareExchange(ref short location1, short value, short comp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -301,7 +301,7 @@ public static unsafe byte CompareExchange(ref byte location1, byte value, byte c [CLSCompliant(false)] public static unsafe ushort CompareExchange(ref ushort location1, ushort value, ushort comparand) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object