diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 1749c6cc20afc..8d06db83aa0c7 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1072,8 +1072,8 @@ insGroup* emitter::emitSavIG(bool emitAdd) // being adding causes a new EXTEND IG to be created. Also, emitLastIns might not be in this IG // at all if this IG is empty. - assert((emitLastIns == nullptr) == (emitLastInsIG == nullptr)); - if ((emitLastIns != nullptr) && (sz != 0)) + assert(emitHasLastIns() == (emitLastInsIG != nullptr)); + if (emitHasLastIns() && (sz != 0)) { // If we get here, emitLastIns must be in the current IG we are saving. assert(emitLastInsIG == emitCurIG); @@ -1499,8 +1499,6 @@ size_t emitter::emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp) void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) { - instrDesc* id; - #ifdef DEBUG // Under STRESS_EMITTER, put every instruction in its own instruction group. // We can't do this for a prolog, epilog, funclet prolog, or funclet epilog, @@ -1557,18 +1555,18 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) assert(IsCodeAligned(emitCurIGsize)); // Make sure we have enough space for the new instruction. - // `igInsCnt` is currently a byte, so we can't have more than 255 instructions in a single insGroup. size_t fullSize = sz + m_debugInfoSize; - if ((emitCurIGfreeNext + fullSize >= emitCurIGfreeEndp) || emitForceNewIG || (emitCurIGinsCnt >= 255)) + if ((emitCurIGfreeNext + fullSize >= emitCurIGfreeEndp) || emitForceNewIG || + (emitCurIGinsCnt >= (EMIT_MAX_IG_INS_COUNT - 1))) { emitNxtIG(true); } /* Grab the space for the instruction */ - emitLastIns = id = (instrDesc*)(emitCurIGfreeNext + m_debugInfoSize); + instrDesc* id = emitLastIns = (instrDesc*)(emitCurIGfreeNext + m_debugInfoSize); #if EMIT_BACKWARDS_NAVIGATION emitCurIG->igLastIns = id; @@ -1606,14 +1604,18 @@ void* emitter::emitAllocAnyInstr(size_t sz, emitAttr opsz) if (m_debugInfoSize > 0) { instrDescDebugInfo* info = (instrDescDebugInfo*)emitGetMem(sizeof(*info)); - info->idNum = emitInsCount; - info->idSize = sz; - info->idVarRefOffs = 0; - info->idMemCookie = 0; - info->idFlags = GTF_EMPTY; - info->idFinallyCall = false; - info->idCatchRet = false; - info->idCallSig = nullptr; + memset(info, 0, sizeof(instrDescDebugInfo)); + + // These fields should have been zero-ed by the above + assert(info->idVarRefOffs == 0); + assert(info->idMemCookie == 0); + assert(info->idFlags == GTF_EMPTY); + assert(info->idFinallyCall == false); + assert(info->idCatchRet == false); + assert(info->idCallSig == nullptr); + + info->idNum = emitInsCount; + info->idSize = sz; id->idDebugOnlyInfo(info); } diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 6fdbd8bf5abd9..b9f4f90c8402a 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -38,7 +38,11 @@ #define EMIT_INSTLIST_VERBOSE (emitComp->verbose) #endif -#define EMIT_BACKWARDS_NAVIGATION 0 // If 1, enable backwards navigation code for MIR (insGroup/instrDesc). +#ifdef TARGET_XARCH +#define EMIT_BACKWARDS_NAVIGATION 1 // If 1, enable backwards navigation code for MIR (insGroup/instrDesc). +#else +#define EMIT_BACKWARDS_NAVIGATION 0 +#endif /*****************************************************************************/ @@ -2288,6 +2292,12 @@ class emitter return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase); } +#define EMIT_MAX_IG_INS_COUNT 256 + +#if EMIT_BACKWARDS_NAVIGATION +#define EMIT_MAX_PEEPHOLE_INS_COUNT 32 // The max number of previous instructions to navigate through for peepholes. +#endif // EMIT_BACKWARDS_NAVIGATION + instrDesc* emitLastIns; insGroup* emitLastInsIG; @@ -2295,24 +2305,98 @@ class emitter unsigned emitLastInsFullSize; #endif // EMIT_BACKWARDS_NAVIGATION + // Check to see if the last instruction is available. + inline bool emitHasLastIns() const + { + return (emitLastIns != nullptr); + } + + // Checks to see if we can cross between the two given IG boundaries. + // + // We have the following checks: + // 1. Looking backwards across an IG boundary can only be done if we're in an extension IG. + // 2. The IG of the previous instruction must have the same GC interrupt status as the current IG. + inline bool isInsIGSafeForPeepholeOptimization(insGroup* prevInsIG, insGroup* curInsIG) const + { + if (prevInsIG == curInsIG) + { + return true; + } + else + { + return (curInsIG->igFlags & IGF_EXTEND) && + ((prevInsIG->igFlags & IGF_NOGCINTERRUPT) == (curInsIG->igFlags & IGF_NOGCINTERRUPT)); + } + } + // Check if a peephole optimization involving emitLastIns is safe. // // We have the following checks: // 1. There must be a non-null emitLastIns to consult (thus, we have a known "last" instruction). // 2. `emitForceNewIG` is not set: this prevents peepholes from crossing nogc boundaries where // the next instruction is forced to create a new IG. - // 3. Looking backwards across an IG boundary can only be done if we're in an extension IG. - // 4. The IG of the previous instruction must have the same GC interrupt status as the current IG. - // This is related to #2; it disallows peephole when the previous IG is GC and the current is NOGC. - bool emitCanPeepholeLastIns() - { - assert((emitLastIns == nullptr) == (emitLastInsIG == nullptr)); - - return (emitLastIns != nullptr) && !emitForceNewIG && - ((emitCurIGinsCnt > 0) || // we're not at the start of a new IG - ((emitCurIG->igFlags & IGF_EXTEND) != 0)) && // or we are at the start of a new IG, - // and it's an extension IG - ((emitLastInsIG->igFlags & IGF_NOGCINTERRUPT) == (emitCurIG->igFlags & IGF_NOGCINTERRUPT)); + bool emitCanPeepholeLastIns() const + { + assert(emitHasLastIns() == (emitLastInsIG != nullptr)); + + return emitHasLastIns() && // there is an emitLastInstr + !emitForceNewIG && // and we're not about to start a new IG. + isInsIGSafeForPeepholeOptimization(emitLastInsIG, emitCurIG); + } + + enum emitPeepholeResult + { + PEEPHOLE_CONTINUE, + PEEPHOLE_ABORT + }; + + // Visits the last emitted instructions. + // Must be safe to do - use emitCanPeepholeLastIns for checking. + // Action is a function type: instrDesc* -> emitPeepholeResult + template + void emitPeepholeIterateLastInstrs(Action action) + { + assert(emitCanPeepholeLastIns()); + +#if EMIT_BACKWARDS_NAVIGATION + insGroup* curInsIG; + instrDesc* id; + + if (!emitGetLastIns(&curInsIG, &id)) + return; + + for (unsigned i = 0; i < EMIT_MAX_PEEPHOLE_INS_COUNT; i++) + { + assert(id != nullptr); + + switch (action(id)) + { + case PEEPHOLE_ABORT: + return; + case PEEPHOLE_CONTINUE: + { + insGroup* savedInsIG = curInsIG; + if (emitPrevID(curInsIG, id)) + { + if (isInsIGSafeForPeepholeOptimization(curInsIG, savedInsIG)) + { + continue; + } + else + { + return; + } + } + return; + } + + default: + unreached(); + } + } +#else // EMIT_BACKWARDS_NAVIGATION + action(emitLastIns); +#endif // !EMIT_BACKWARDS_NAVIGATION } #ifdef TARGET_ARMARCH diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 10f56ddf77ff3..ce39079d467ae 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -16030,7 +16030,8 @@ bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regN if (canOptimize && // Don't optimize if unsafe. (emitLastIns->idIns() == INS_mov) && // Don't optimize if last instruction was not 'mov'. - (emitLastIns->idOpSize() == size)) // Don't optimize if operand size is different than previous instruction. + (emitLastIns->idOpSize() == size)) // Don't optimize if operand size is different than previous + // instruction. { // Check if we did same move in prev instruction except dst/src were switched. regNumber prevDst = emitLastIns->idReg1(); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 267b5a9eac7ed..b08aa2fdab39d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -488,12 +488,6 @@ bool emitter::IsFlagsAlwaysModified(instrDesc* id) // true if previous instruction zeroed reg's upper 32 bits. // false if it did not, or if we can't safely determine. // -// Notes: -// Currently only looks back one instruction. -// -// movsx eax, ... might seem viable but we always encode this -// instruction with a 64 bit destination. See TakesRexWPrefix. - bool emitter::AreUpper32BitsZero(regNumber reg) { // Only allow GPRs. @@ -508,113 +502,132 @@ bool emitter::AreUpper32BitsZero(regNumber reg) return false; } - instrDesc* id = emitLastIns; - insFormat fmt = id->idInsFmt(); + bool result = false; - switch (fmt) - { - case IF_RWR: - case IF_RRW: + emitPeepholeIterateLastInstrs([&](instrDesc* id) { + switch ((ID_OPS)emitFmtToOps[id->idInsFmt()]) + { + // This is conservative. + case ID_OP_CALL: + return PEEPHOLE_ABORT; - case IF_RWR_CNS: - case IF_RRW_CNS: - case IF_RRW_SHF: + default: + break; + } - case IF_RWR_RRD: - case IF_RRW_RRD: - case IF_RRW_RRW: - case IF_RRW_RRW_CNS: + // This is a special case for idiv, div, imul, and mul. + // They always write to RAX and RDX. + if (instrHasImplicitRegPairDest(id->idIns())) + { + if (reg == REG_RAX || reg == REG_RDX) + { + result = (id->idOpSize() == EA_4BYTE); + return PEEPHOLE_ABORT; + } + } - case IF_RWR_RRD_RRD: - case IF_RWR_RRD_RRD_CNS: + switch (id->idInsFmt()) + { + case IF_RWR: + case IF_RRW: - case IF_RWR_RRD_RRD_RRD: + case IF_RWR_CNS: + case IF_RRW_CNS: + case IF_RRW_SHF: - case IF_RWR_MRD: - case IF_RRW_MRD: - case IF_RRW_MRD_CNS: + case IF_RWR_RRD: + case IF_RRW_RRD: + case IF_RRW_RRW: + case IF_RRW_RRW_CNS: - case IF_RWR_RRD_MRD: - case IF_RWR_MRD_CNS: - case IF_RWR_RRD_MRD_CNS: - case IF_RWR_RRD_MRD_RRD: - case IF_RWR_MRD_OFF: + case IF_RWR_RRD_RRD: + case IF_RWR_RRD_RRD_CNS: - case IF_RWR_SRD: - case IF_RRW_SRD: - case IF_RRW_SRD_CNS: + case IF_RWR_RRD_RRD_RRD: - case IF_RWR_RRD_SRD: - case IF_RWR_SRD_CNS: - case IF_RWR_RRD_SRD_CNS: - case IF_RWR_RRD_SRD_RRD: + case IF_RWR_MRD: + case IF_RRW_MRD: + case IF_RRW_MRD_CNS: - case IF_RWR_ARD: - case IF_RRW_ARD: - case IF_RRW_ARD_CNS: + case IF_RWR_RRD_MRD: + case IF_RWR_MRD_CNS: + case IF_RWR_RRD_MRD_CNS: + case IF_RWR_RRD_MRD_RRD: + case IF_RWR_MRD_OFF: - case IF_RWR_RRD_ARD: - case IF_RWR_ARD_CNS: - case IF_RWR_ARD_RRD: - case IF_RWR_RRD_ARD_CNS: - case IF_RWR_RRD_ARD_RRD: - { - if (id->idReg1() != reg) + case IF_RWR_SRD: + case IF_RRW_SRD: + case IF_RRW_SRD_CNS: + + case IF_RWR_RRD_SRD: + case IF_RWR_SRD_CNS: + case IF_RWR_RRD_SRD_CNS: + case IF_RWR_RRD_SRD_RRD: + + case IF_RWR_ARD: + case IF_RRW_ARD: + case IF_RRW_ARD_CNS: + + case IF_RWR_RRD_ARD: + case IF_RWR_ARD_CNS: + case IF_RWR_ARD_RRD: + case IF_RWR_RRD_ARD_CNS: + case IF_RWR_RRD_ARD_RRD: { - switch (id->idInsFmt()) + if (id->idReg1() != reg) { - // Handles instructions who write to two registers. - case IF_RRW_RRW: - case IF_RRW_RRW_CNS: + switch (id->idInsFmt()) { - if (id->idReg2() == reg) + // Handles instructions who write to two registers. + case IF_RRW_RRW: + case IF_RRW_RRW_CNS: { - return (id->idOpSize() == EA_4BYTE); + if (id->idReg2() == reg) + { + result = (id->idOpSize() == EA_4BYTE); + return PEEPHOLE_ABORT; + } + break; } - break; + + default: + break; } - default: - break; + return PEEPHOLE_CONTINUE; } - if (instrHasImplicitRegPairDest(id->idIns())) + // movsx always sign extends to 8 bytes. + if (id->idIns() == INS_movsx) { - if (id->idReg2() == reg) - { - return (id->idOpSize() == EA_4BYTE); - } + return PEEPHOLE_ABORT; } - return false; - } + if (id->idIns() == INS_movsxd) + { + return PEEPHOLE_ABORT; + } - // movsx always sign extends to 8 bytes. - if (id->idIns() == INS_movsx) - { - return false; - } + // movzx always zeroes the upper 32 bits. + if (id->idIns() == INS_movzx) + { + result = true; + return PEEPHOLE_ABORT; + } - if (id->idIns() == INS_movsxd) - { - return false; + // otherwise rely on operation size. + result = (id->idOpSize() == EA_4BYTE); + return PEEPHOLE_ABORT; } - // movzx always zeroes the upper 32 bits. - if (id->idIns() == INS_movzx) + default: { - return true; + return PEEPHOLE_CONTINUE; } - - // otherwise rely on operation size. - return (id->idOpSize() == EA_4BYTE); } + }); - default: - break; - } - - return false; + return result; } //------------------------------------------------------------------------ @@ -1970,7 +1983,7 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, */ bool emitter::emitIsLastInsCall() { - if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call)) + if (emitHasLastIns() && (emitLastIns->idIns() == INS_call)) { return true; } diff --git a/src/tests/JIT/opt/Divide/Regressions/Regression2.cs b/src/tests/JIT/opt/Divide/Regressions/Regression2.cs new file mode 100644 index 0000000000000..923462a26e95b --- /dev/null +++ b/src/tests/JIT/opt/Divide/Regressions/Regression2.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Generated by Fuzzlyn v1.5 on 2023-02-22 21:24:57 +// Run on X64 Windows +// Seed: 15773855565313675298 +public class Program +{ + public static sbyte s_3 = 10; + public static int Main() + { + var result = 100; + + for (int vr7 = 0; vr7 < 2; vr7++) + { + short vr16 = s_3--; + short vr19 = s_3--; + short vr20 = s_3--; + short vr21 = s_3--; + short vr22 = s_3--; + var vr13 = M23(); + if (vr7 == 0 && vr13 != 0) + { + result = 0; + } + else if (vr7 == 1 && vr13 != -17937) + { + result = 0; + } + } + + return result; + } + + public static short M23() + { + return (short)((uint)(1L / s_3--) / 37963); + } +} diff --git a/src/tests/JIT/opt/Divide/Regressions/Regression2.csproj b/src/tests/JIT/opt/Divide/Regressions/Regression2.csproj new file mode 100644 index 0000000000000..f3e1cbd44b404 --- /dev/null +++ b/src/tests/JIT/opt/Divide/Regressions/Regression2.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + diff --git a/src/tests/JIT/opt/Divide/Regressions/Regression3.cs b/src/tests/JIT/opt/Divide/Regressions/Regression3.cs new file mode 100644 index 0000000000000..b45f3a9e53887 --- /dev/null +++ b/src/tests/JIT/opt/Divide/Regressions/Regression3.cs @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Generated by Fuzzlyn v1.5 on 2023-02-22 22:21:43 +// Run on X64 Windows +// Seed: 1391301158118545956 +public class Program +{ + public static ulong s_2 = 12180724836008689112UL; + public static int Main() + { + short vr4 = default(short); + return M3(vr4); + } + + public static int M3(short arg0) + { + s_2 = (uint)(-1L % (-(int)s_2)); + if (s_2 == 4294967295) + { + return 100; + } + return 0; + } +} \ No newline at end of file diff --git a/src/tests/JIT/opt/Divide/Regressions/Regression3.csproj b/src/tests/JIT/opt/Divide/Regressions/Regression3.csproj new file mode 100644 index 0000000000000..f3e1cbd44b404 --- /dev/null +++ b/src/tests/JIT/opt/Divide/Regressions/Regression3.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + diff --git a/src/tests/JIT/opt/perf/movelim/CalculateChecksum.cs b/src/tests/JIT/opt/perf/movelim/CalculateChecksum.cs new file mode 100644 index 0000000000000..981ef84923c4c --- /dev/null +++ b/src/tests/JIT/opt/perf/movelim/CalculateChecksum.cs @@ -0,0 +1,112 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; + +namespace CodeGenTests +{ + static class CalculateChecksumTest + { + [MethodImpl(MethodImplOptions.NoInlining)] + public static ushort CalculateChecksum(ref byte buffer, int length) + { + ref var current = ref buffer; + ulong sum = 0; + + while (length >= sizeof(ulong)) + { + length -= sizeof(ulong); + + var ulong0 = Unsafe.As(ref current); + current = ref Unsafe.Add(ref current, sizeof(ulong)); + + // Add with carry + sum += ulong0; + if (sum < ulong0) + { + sum++; + } + } + + if ((length & sizeof(uint)) != 0) + { + var uint0 = Unsafe.As(ref current); + current = ref Unsafe.Add(ref current, sizeof(uint)); + + // Add with carry + sum += uint0; + if (sum < uint0) + { + sum++; + } + } + + if ((length & sizeof(ushort)) != 0) + { + var ushort0 = Unsafe.As(ref current); + current = ref Unsafe.Add(ref current, sizeof(ushort)); + + // Add with carry + sum += ushort0; + if (sum < ushort0) + { + sum++; + } + } + + if ((length & sizeof(byte)) != 0) + { + var byte0 = current; + + // Add with carry + sum += byte0; + if (sum < byte0) + { + sum++; + } + } + + // Fold down to 16 bits + + var uint1 = (uint)(sum >> 32); + var uint2 = (uint)sum; + + // Add with carry + uint1 += uint2; + if (uint1 < uint2) + { + uint1++; + } + + var ushort2 = (ushort)uint1; + var ushort1 = (ushort)(uint1 >> 16); + + // Add with carry + ushort1 = (ushort)(ushort1 + ushort2); + if (ushort1 < ushort2) + { + ushort1++; + } + + // Invert to get ones-complement result + return (ushort)~ushort1; + + // There should not be these kinds of zero extending 'mov' instructions here. + // example - mov eax, eax + + // X64-NOT: mov [[REG0:[a-z0-9]+]], [[REG0]] + } + + static int Main() + { + var buffer = new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + var result = CalculateChecksum(ref buffer[0], buffer.Length); + + if (result != 59115) + return 0; + + return 100; + } + } +} diff --git a/src/tests/JIT/opt/perf/movelim/CalculateChecksum.csproj b/src/tests/JIT/opt/perf/movelim/CalculateChecksum.csproj new file mode 100644 index 0000000000000..42a89c8384d74 --- /dev/null +++ b/src/tests/JIT/opt/perf/movelim/CalculateChecksum.csproj @@ -0,0 +1,17 @@ + + + Exe + + + None + True + + + + true + + + + + +