diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 45974ea7e1d950..94d3e57aaace04 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -4725,8 +4725,8 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2IfPresent(); - emitAttr size = emitActualTypeSize(treeNode); - bool is4 = (size == 4); + emitAttr size = emitActualTypeSize(op1); + bool is4 = (size == EA_4BYTE); instruction instr = INS_invalid; switch (treeNode->gtIntrinsicName) @@ -4744,6 +4744,15 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) case NI_System_Math_MaxNumber: instr = is4 ? INS_fmax_s : INS_fmax_d; break; + case NI_PRIMITIVE_LeadingZeroCount: + instr = is4 ? INS_clzw : INS_clz; + break; + case NI_PRIMITIVE_TrailingZeroCount: + instr = is4 ? INS_ctzw : INS_ctz; + break; + case NI_PRIMITIVE_PopCount: + instr = is4 ? INS_cpopw : INS_cpop; + break; default: NO_WAY("Unknown intrinsic"); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 891b33a169e46d..e89b79bee58887 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4850,7 +4850,7 @@ class Compiler bool IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName); bool IsTargetIntrinsic(NamedIntrinsic intrinsicName); bool IsMathIntrinsic(NamedIntrinsic intrinsicName); - bool IsMathIntrinsic(GenTree* tree); + bool IsBitCountingIntrinsic(NamedIntrinsic intrinsicName); private: //----------------- Importing the method ---------------------------------- diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index f372a6a3670ea8..6b9b9d024e6217 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -648,7 +648,7 @@ void emitter::emitIns_R_R( { code_t code = emitInsCode(ins); - if (INS_mov == ins || INS_sext_w == ins) + if (INS_mov == ins || INS_sext_w == ins || (INS_clz <= ins && ins <= INS_cpopw)) { assert(isGeneralRegisterOrR0(reg1)); assert(isGeneralRegisterOrR0(reg2)); @@ -3805,18 +3805,30 @@ void emitter::emitDispInsName( hasImmediate = false; } break; - case 0x1: // SLLI + case 0x1: { - static constexpr unsigned kSlliFunct6 = 0b000000; - unsigned funct6 = (imm12 >> 6) & 0x3f; - // SLLI's instruction code's upper 6 bits have to be equal to zero - if (funct6 != kSlliFunct6) + unsigned shamt = imm12 & 0x3f; // 6 BITS for SHAMT in RISCV64 + switch (funct6) { - return emitDispIllegalInstruction(code); + case 0b011000: + static const char* names[] = {"clz", "ctz", "cpop"}; + // shift amount is treated as additional funct opcode + if (shamt >= ARRAY_SIZE(names)) + return emitDispIllegalInstruction(code); + + printLength = printf("%s", names[shamt]); + hasImmediate = false; + break; + + case 0b000000: + printLength = printf("slli"); + imm12 = shamt; + break; + + default: + return emitDispIllegalInstruction(code); } - printLength = printf("slli"); - imm12 &= 0x3f; // 6 BITS for SHAMT in RISCV64 } break; case 0x2: // SLTI @@ -3891,19 +3903,27 @@ void emitter::emitDispInsName( emitDispImmediate(imm12); } return; - case 0x1: // SLLIW + case 0x1: { - static constexpr unsigned kSlliwFunct7 = 0b0000000; - unsigned funct7 = (imm12 >> 5) & 0x7f; - // SLLIW's instruction code's upper 7 bits have to be equal to zero - if (funct7 == kSlliwFunct7) - { - printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64 - } - else + unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64 + switch (funct7) { - emitDispIllegalInstruction(code); + case 0b0110000: + static const char* names[] = {"clzw ", "ctzw ", "cpopw"}; + // shift amount is treated as funct additional opcode bits + if (shamt >= ARRAY_SIZE(names)) + return emitDispIllegalInstruction(code); + + printf("%s %s, %s\n", names[shamt], rd, rs1); + return; + + case 0b0000000: + printf("slliw %s, %s, %d\n", rd, rs1, shamt); + return; + + default: + return emitDispIllegalInstruction(code); } } return; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6a062b02f2b12d..5bf5fb170af2f1 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5416,6 +5416,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case NI_System_Math_Tan: case NI_System_Math_Tanh: case NI_System_Math_Truncate: + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: { // Giving intrinsics a large fixed execution cost is because we'd like to CSE // them, even if they are implemented by calls. This is different from modeling diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 7c39a95917644b..208f6212cbd5c9 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -5733,7 +5733,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_LeadingZeroCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_LZCNT)) { @@ -5909,7 +5916,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_PopCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_POPCNT)) { @@ -6066,7 +6080,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, } #endif // !TARGET_64BIT -#if defined(FEATURE_HW_INTRINSICS) +#ifdef TARGET_RISCV64 + if (compOpportunisticallyDependsOn(InstructionSet_Zbb)) + { + impPopStack(); + result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_TrailingZeroCount, + nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE})); + } +#elif defined(FEATURE_HW_INTRINSICS) #if defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_BMI1)) { @@ -7927,6 +7948,11 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) case NI_System_Math_ReciprocalSqrtEstimate: return true; + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + return compOpportunisticallyDependsOn(InstructionSet_Zbb); + default: return false; } @@ -8024,9 +8050,18 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) } } -bool Compiler::IsMathIntrinsic(GenTree* tree) +bool Compiler::IsBitCountingIntrinsic(NamedIntrinsic intrinsicName) { - return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicName); + switch (intrinsicName) + { + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + return true; + + default: + return false; + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/instrsriscv64.h b/src/coreclr/jit/instrsriscv64.h index c61d6ad9103e01..db2a78a25e58e7 100644 --- a/src/coreclr/jit/instrsriscv64.h +++ b/src/coreclr/jit/instrsriscv64.h @@ -260,6 +260,15 @@ INST(amominu_w, "amominu.w", 0, 0xc000202f) // funct5:11000 INST(amominu_d, "amominu.d", 0, 0xc000302f) // funct5:11000 INST(amomaxu_w, "amomaxu.w", 0, 0xe000202f) // funct5:11100 INST(amomaxu_d, "amomaxu.d", 0, 0xe000302f) // funct5:11100 + +// Zbb (RV32 + RV64) +INST(clz, "clz", 0, 0x60001013) +INST(clzw, "clzw", 0, 0x6000101b) +INST(ctz, "ctz", 0, 0x60101013) +INST(ctzw, "ctzw", 0, 0x6010101b) +INST(cpop, "cpop", 0, 0x60201013) +INST(cpopw, "cpopw", 0, 0x6020101b) + // clang-format on /*****************************************************************************/ #undef INST diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 186927b290290e..5428e6b5c8860c 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -346,20 +346,41 @@ int LinearScan::BuildNode(GenTree* tree) case GT_INTRINSIC: { - NamedIntrinsic name = tree->AsIntrinsic()->gtIntrinsicName; - noway_assert((name == NI_System_Math_Abs) || (name == NI_System_Math_Sqrt) || - (name == NI_System_Math_MinNumber) || (name == NI_System_Math_MaxNumber)); - - // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2IfPresent(); - assert(varTypeIsFloating(op1)); - assert(op1->TypeIs(tree->TypeGet())); + + switch (tree->AsIntrinsic()->gtIntrinsicName) + { + // Both operands and its result must be of the same floating-point type. + case NI_System_Math_MinNumber: + case NI_System_Math_MaxNumber: + assert(op2 != nullptr); + assert(op2->TypeIs(tree->TypeGet())); + FALLTHROUGH; + case NI_System_Math_Abs: + case NI_System_Math_Sqrt: + assert(op1->TypeIs(tree->TypeGet())); + assert(varTypeIsFloating(tree)); + break; + + // Operand and its result must be integers + case NI_PRIMITIVE_LeadingZeroCount: + case NI_PRIMITIVE_TrailingZeroCount: + case NI_PRIMITIVE_PopCount: + assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb)); + assert(op2 == nullptr); + assert(varTypeIsIntegral(op1)); + assert(varTypeIsIntegral(tree)); + break; + + default: + NO_WAY("Unknown intrinsic"); + } + BuildUse(op1); srcCount = 1; if (op2 != nullptr) { - assert(op2->TypeIs(tree->TypeGet())); BuildUse(op2); srcCount++; } diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index c4c50a11d58cf5..b628f77e3e50ad 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9133,7 +9133,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary( ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN) { assert(arg0VN == VNNormalValue(arg0VN)); - assert(m_pComp->IsMathIntrinsic(gtMathFN)); + assert(m_pComp->IsMathIntrinsic(gtMathFN) RISCV64_ONLY(|| m_pComp->IsBitCountingIntrinsic(gtMathFN))); // If the math intrinsic is not implemented by target-specific instructions, such as implemented // by user calls, then don't do constant folding on it during ReadyToRun. This minimizes precision loss. @@ -9385,10 +9385,8 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN unreached(); } } - else + else if (gtMathFN == NI_System_Math_Round) { - assert(gtMathFN == NI_System_Math_Round); - switch (TypeOfVN(arg0VN)) { case TYP_DOUBLE: @@ -9409,6 +9407,58 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN unreached(); } } + else if (gtMathFN == NI_PRIMITIVE_LeadingZeroCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::LeadingZeroCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::LeadingZeroCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else if (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::TrailingZeroCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::TrailingZeroCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else if (gtMathFN == NI_PRIMITIVE_PopCount) + { + switch (TypeOfVN(arg0VN)) + { + case TYP_LONG: + res = BitOperations::PopCount((uint64_t)GetConstantInt64(arg0VN)); + break; + + case TYP_INT: + res = BitOperations::PopCount((uint32_t)GetConstantInt32(arg0VN)); + break; + + default: + unreached(); + } + } + else + { + unreached(); + } return VNForIntCon(res); } @@ -9416,7 +9466,10 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN else { assert((typ == TYP_DOUBLE) || (typ == TYP_FLOAT) || - ((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round)))); + ((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))) || + (((typ == TYP_INT) || (typ == TYP_LONG)) && + ((gtMathFN == NI_PRIMITIVE_LeadingZeroCount) || (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) || + (gtMathFN == NI_PRIMITIVE_PopCount)))); VNFunc vnf = VNF_Boundary; switch (gtMathFN) @@ -9508,6 +9561,15 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN case NI_System_Math_Truncate: vnf = VNF_Truncate; break; + case NI_PRIMITIVE_LeadingZeroCount: + vnf = VNF_LeadingZeroCount; + break; + case NI_PRIMITIVE_TrailingZeroCount: + vnf = VNF_TrailingZeroCount; + break; + case NI_PRIMITIVE_PopCount: + vnf = VNF_PopCount; + break; default: unreached(); // the above are the only math intrinsics at the time of this writing. } @@ -12829,7 +12891,7 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) vnStore->VNPUnpackExc(intrinsic->AsOp()->gtOp2->gtVNPair, &arg1VNP, &arg1VNPx); } - if (IsMathIntrinsic(intrinsic->gtIntrinsicName)) + if (IsMathIntrinsic(intrinsic->gtIntrinsicName) || IsBitCountingIntrinsic(intrinsic->gtIntrinsicName)) { // GT_INTRINSIC is a currently a subtype of binary operators. But most of // the math intrinsics are actually unary operations. diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index 227f4f26b11e9b..30cac5c244ef10 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -109,6 +109,10 @@ ValueNumFuncDef(Tan, 1, false, false, false) ValueNumFuncDef(Tanh, 1, false, false, false) ValueNumFuncDef(Truncate, 1, false, false, false) +ValueNumFuncDef(LeadingZeroCount, 1, false, false, false) +ValueNumFuncDef(TrailingZeroCount, 1, false, false, false) +ValueNumFuncDef(PopCount, 1, false, false, false) + ValueNumFuncDef(ManagedThreadId, 0, false, false, false) ValueNumFuncDef(ObjGetType, 1, false, true, false)