Skip to content

Commit 44dd838

Browse files
authored
[RISC-V] Bit counting intrinsics (#114337)
* Intrinsify LeadingZeroCount * TrailingZeroCount * PopCount * Build fix * Remove assertion from default case * cosmetics * Separate bit counting from System.Math intrinsics * Fix assert * Revert unnecessary diffs
1 parent 6eabb60 commit 44dd838

File tree

9 files changed

+204
-41
lines changed

9 files changed

+204
-41
lines changed

src/coreclr/jit/codegenriscv64.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4725,8 +4725,8 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
47254725
GenTree* op1 = treeNode->gtGetOp1();
47264726
GenTree* op2 = treeNode->gtGetOp2IfPresent();
47274727

4728-
emitAttr size = emitActualTypeSize(treeNode);
4729-
bool is4 = (size == 4);
4728+
emitAttr size = emitActualTypeSize(op1);
4729+
bool is4 = (size == EA_4BYTE);
47304730

47314731
instruction instr = INS_invalid;
47324732
switch (treeNode->gtIntrinsicName)
@@ -4744,6 +4744,15 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
47444744
case NI_System_Math_MaxNumber:
47454745
instr = is4 ? INS_fmax_s : INS_fmax_d;
47464746
break;
4747+
case NI_PRIMITIVE_LeadingZeroCount:
4748+
instr = is4 ? INS_clzw : INS_clz;
4749+
break;
4750+
case NI_PRIMITIVE_TrailingZeroCount:
4751+
instr = is4 ? INS_ctzw : INS_ctz;
4752+
break;
4753+
case NI_PRIMITIVE_PopCount:
4754+
instr = is4 ? INS_cpopw : INS_cpop;
4755+
break;
47474756
default:
47484757
NO_WAY("Unknown intrinsic");
47494758
}

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4850,7 +4850,7 @@ class Compiler
48504850
bool IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName);
48514851
bool IsTargetIntrinsic(NamedIntrinsic intrinsicName);
48524852
bool IsMathIntrinsic(NamedIntrinsic intrinsicName);
4853-
bool IsMathIntrinsic(GenTree* tree);
4853+
bool IsBitCountingIntrinsic(NamedIntrinsic intrinsicName);
48544854

48554855
private:
48564856
//----------------- Importing the method ----------------------------------

src/coreclr/jit/emitriscv64.cpp

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ void emitter::emitIns_R_R(
648648
{
649649
code_t code = emitInsCode(ins);
650650

651-
if (INS_mov == ins || INS_sext_w == ins)
651+
if (INS_mov == ins || INS_sext_w == ins || (INS_clz <= ins && ins <= INS_cpopw))
652652
{
653653
assert(isGeneralRegisterOrR0(reg1));
654654
assert(isGeneralRegisterOrR0(reg2));
@@ -3805,18 +3805,30 @@ void emitter::emitDispInsName(
38053805
hasImmediate = false;
38063806
}
38073807
break;
3808-
case 0x1: // SLLI
3808+
case 0x1:
38093809
{
3810-
static constexpr unsigned kSlliFunct6 = 0b000000;
3811-
38123810
unsigned funct6 = (imm12 >> 6) & 0x3f;
3813-
// SLLI's instruction code's upper 6 bits have to be equal to zero
3814-
if (funct6 != kSlliFunct6)
3811+
unsigned shamt = imm12 & 0x3f; // 6 BITS for SHAMT in RISCV64
3812+
switch (funct6)
38153813
{
3816-
return emitDispIllegalInstruction(code);
3814+
case 0b011000:
3815+
static const char* names[] = {"clz", "ctz", "cpop"};
3816+
// shift amount is treated as additional funct opcode
3817+
if (shamt >= ARRAY_SIZE(names))
3818+
return emitDispIllegalInstruction(code);
3819+
3820+
printLength = printf("%s", names[shamt]);
3821+
hasImmediate = false;
3822+
break;
3823+
3824+
case 0b000000:
3825+
printLength = printf("slli");
3826+
imm12 = shamt;
3827+
break;
3828+
3829+
default:
3830+
return emitDispIllegalInstruction(code);
38173831
}
3818-
printLength = printf("slli");
3819-
imm12 &= 0x3f; // 6 BITS for SHAMT in RISCV64
38203832
}
38213833
break;
38223834
case 0x2: // SLTI
@@ -3891,19 +3903,27 @@ void emitter::emitDispInsName(
38913903
emitDispImmediate(imm12);
38923904
}
38933905
return;
3894-
case 0x1: // SLLIW
3906+
case 0x1:
38953907
{
3896-
static constexpr unsigned kSlliwFunct7 = 0b0000000;
3897-
38983908
unsigned funct7 = (imm12 >> 5) & 0x7f;
3899-
// SLLIW's instruction code's upper 7 bits have to be equal to zero
3900-
if (funct7 == kSlliwFunct7)
3901-
{
3902-
printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64
3903-
}
3904-
else
3909+
unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64
3910+
switch (funct7)
39053911
{
3906-
emitDispIllegalInstruction(code);
3912+
case 0b0110000:
3913+
static const char* names[] = {"clzw ", "ctzw ", "cpopw"};
3914+
// shift amount is treated as funct additional opcode bits
3915+
if (shamt >= ARRAY_SIZE(names))
3916+
return emitDispIllegalInstruction(code);
3917+
3918+
printf("%s %s, %s\n", names[shamt], rd, rs1);
3919+
return;
3920+
3921+
case 0b0000000:
3922+
printf("slliw %s, %s, %d\n", rd, rs1, shamt);
3923+
return;
3924+
3925+
default:
3926+
return emitDispIllegalInstruction(code);
39073927
}
39083928
}
39093929
return;

src/coreclr/jit/gentree.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5416,6 +5416,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
54165416
case NI_System_Math_Tan:
54175417
case NI_System_Math_Tanh:
54185418
case NI_System_Math_Truncate:
5419+
case NI_PRIMITIVE_LeadingZeroCount:
5420+
case NI_PRIMITIVE_TrailingZeroCount:
5421+
case NI_PRIMITIVE_PopCount:
54195422
{
54205423
// Giving intrinsics a large fixed execution cost is because we'd like to CSE
54215424
// them, even if they are implemented by calls. This is different from modeling

src/coreclr/jit/importercalls.cpp

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5733,7 +5733,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
57335733
}
57345734
#endif // !TARGET_64BIT
57355735

5736-
#if defined(FEATURE_HW_INTRINSICS)
5736+
#ifdef TARGET_RISCV64
5737+
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
5738+
{
5739+
impPopStack();
5740+
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_LeadingZeroCount,
5741+
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
5742+
}
5743+
#elif defined(FEATURE_HW_INTRINSICS)
57375744
#if defined(TARGET_XARCH)
57385745
if (compOpportunisticallyDependsOn(InstructionSet_LZCNT))
57395746
{
@@ -5909,7 +5916,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
59095916
}
59105917
#endif // !TARGET_64BIT
59115918

5912-
#if defined(FEATURE_HW_INTRINSICS)
5919+
#ifdef TARGET_RISCV64
5920+
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
5921+
{
5922+
impPopStack();
5923+
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_PopCount,
5924+
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
5925+
}
5926+
#elif defined(FEATURE_HW_INTRINSICS)
59135927
#if defined(TARGET_XARCH)
59145928
if (compOpportunisticallyDependsOn(InstructionSet_POPCNT))
59155929
{
@@ -6066,7 +6080,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
60666080
}
60676081
#endif // !TARGET_64BIT
60686082

6069-
#if defined(FEATURE_HW_INTRINSICS)
6083+
#ifdef TARGET_RISCV64
6084+
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
6085+
{
6086+
impPopStack();
6087+
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_TrailingZeroCount,
6088+
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
6089+
}
6090+
#elif defined(FEATURE_HW_INTRINSICS)
60706091
#if defined(TARGET_XARCH)
60716092
if (compOpportunisticallyDependsOn(InstructionSet_BMI1))
60726093
{
@@ -7927,6 +7948,11 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
79277948
case NI_System_Math_ReciprocalSqrtEstimate:
79287949
return true;
79297950

7951+
case NI_PRIMITIVE_LeadingZeroCount:
7952+
case NI_PRIMITIVE_TrailingZeroCount:
7953+
case NI_PRIMITIVE_PopCount:
7954+
return compOpportunisticallyDependsOn(InstructionSet_Zbb);
7955+
79307956
default:
79317957
return false;
79327958
}
@@ -8024,9 +8050,18 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName)
80248050
}
80258051
}
80268052

8027-
bool Compiler::IsMathIntrinsic(GenTree* tree)
8053+
bool Compiler::IsBitCountingIntrinsic(NamedIntrinsic intrinsicName)
80288054
{
8029-
return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicName);
8055+
switch (intrinsicName)
8056+
{
8057+
case NI_PRIMITIVE_LeadingZeroCount:
8058+
case NI_PRIMITIVE_TrailingZeroCount:
8059+
case NI_PRIMITIVE_PopCount:
8060+
return true;
8061+
8062+
default:
8063+
return false;
8064+
}
80308065
}
80318066

80328067
//------------------------------------------------------------------------

src/coreclr/jit/instrsriscv64.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,15 @@ INST(amominu_w, "amominu.w", 0, 0xc000202f) // funct5:11000
260260
INST(amominu_d, "amominu.d", 0, 0xc000302f) // funct5:11000
261261
INST(amomaxu_w, "amomaxu.w", 0, 0xe000202f) // funct5:11100
262262
INST(amomaxu_d, "amomaxu.d", 0, 0xe000302f) // funct5:11100
263+
264+
// Zbb (RV32 + RV64)
265+
INST(clz, "clz", 0, 0x60001013)
266+
INST(clzw, "clzw", 0, 0x6000101b)
267+
INST(ctz, "ctz", 0, 0x60101013)
268+
INST(ctzw, "ctzw", 0, 0x6010101b)
269+
INST(cpop, "cpop", 0, 0x60201013)
270+
INST(cpopw, "cpopw", 0, 0x6020101b)
271+
263272
// clang-format on
264273
/*****************************************************************************/
265274
#undef INST

src/coreclr/jit/lsrariscv64.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -346,20 +346,41 @@ int LinearScan::BuildNode(GenTree* tree)
346346

347347
case GT_INTRINSIC:
348348
{
349-
NamedIntrinsic name = tree->AsIntrinsic()->gtIntrinsicName;
350-
noway_assert((name == NI_System_Math_Abs) || (name == NI_System_Math_Sqrt) ||
351-
(name == NI_System_Math_MinNumber) || (name == NI_System_Math_MaxNumber));
352-
353-
// Both operand and its result must be of the same floating point type.
354349
GenTree* op1 = tree->gtGetOp1();
355350
GenTree* op2 = tree->gtGetOp2IfPresent();
356-
assert(varTypeIsFloating(op1));
357-
assert(op1->TypeIs(tree->TypeGet()));
351+
352+
switch (tree->AsIntrinsic()->gtIntrinsicName)
353+
{
354+
// Both operands and its result must be of the same floating-point type.
355+
case NI_System_Math_MinNumber:
356+
case NI_System_Math_MaxNumber:
357+
assert(op2 != nullptr);
358+
assert(op2->TypeIs(tree->TypeGet()));
359+
FALLTHROUGH;
360+
case NI_System_Math_Abs:
361+
case NI_System_Math_Sqrt:
362+
assert(op1->TypeIs(tree->TypeGet()));
363+
assert(varTypeIsFloating(tree));
364+
break;
365+
366+
// Operand and its result must be integers
367+
case NI_PRIMITIVE_LeadingZeroCount:
368+
case NI_PRIMITIVE_TrailingZeroCount:
369+
case NI_PRIMITIVE_PopCount:
370+
assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb));
371+
assert(op2 == nullptr);
372+
assert(varTypeIsIntegral(op1));
373+
assert(varTypeIsIntegral(tree));
374+
break;
375+
376+
default:
377+
NO_WAY("Unknown intrinsic");
378+
}
379+
358380
BuildUse(op1);
359381
srcCount = 1;
360382
if (op2 != nullptr)
361383
{
362-
assert(op2->TypeIs(tree->TypeGet()));
363384
BuildUse(op2);
364385
srcCount++;
365386
}

src/coreclr/jit/valuenum.cpp

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9133,7 +9133,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(
91339133
ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN)
91349134
{
91359135
assert(arg0VN == VNNormalValue(arg0VN));
9136-
assert(m_pComp->IsMathIntrinsic(gtMathFN));
9136+
assert(m_pComp->IsMathIntrinsic(gtMathFN) RISCV64_ONLY(|| m_pComp->IsBitCountingIntrinsic(gtMathFN)));
91379137

91389138
// If the math intrinsic is not implemented by target-specific instructions, such as implemented
91399139
// by user calls, then don't do constant folding on it during ReadyToRun. This minimizes precision loss.
@@ -9385,10 +9385,8 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
93859385
unreached();
93869386
}
93879387
}
9388-
else
9388+
else if (gtMathFN == NI_System_Math_Round)
93899389
{
9390-
assert(gtMathFN == NI_System_Math_Round);
9391-
93929390
switch (TypeOfVN(arg0VN))
93939391
{
93949392
case TYP_DOUBLE:
@@ -9409,14 +9407,69 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
94099407
unreached();
94109408
}
94119409
}
9410+
else if (gtMathFN == NI_PRIMITIVE_LeadingZeroCount)
9411+
{
9412+
switch (TypeOfVN(arg0VN))
9413+
{
9414+
case TYP_LONG:
9415+
res = BitOperations::LeadingZeroCount((uint64_t)GetConstantInt64(arg0VN));
9416+
break;
9417+
9418+
case TYP_INT:
9419+
res = BitOperations::LeadingZeroCount((uint32_t)GetConstantInt32(arg0VN));
9420+
break;
9421+
9422+
default:
9423+
unreached();
9424+
}
9425+
}
9426+
else if (gtMathFN == NI_PRIMITIVE_TrailingZeroCount)
9427+
{
9428+
switch (TypeOfVN(arg0VN))
9429+
{
9430+
case TYP_LONG:
9431+
res = BitOperations::TrailingZeroCount((uint64_t)GetConstantInt64(arg0VN));
9432+
break;
9433+
9434+
case TYP_INT:
9435+
res = BitOperations::TrailingZeroCount((uint32_t)GetConstantInt32(arg0VN));
9436+
break;
9437+
9438+
default:
9439+
unreached();
9440+
}
9441+
}
9442+
else if (gtMathFN == NI_PRIMITIVE_PopCount)
9443+
{
9444+
switch (TypeOfVN(arg0VN))
9445+
{
9446+
case TYP_LONG:
9447+
res = BitOperations::PopCount((uint64_t)GetConstantInt64(arg0VN));
9448+
break;
9449+
9450+
case TYP_INT:
9451+
res = BitOperations::PopCount((uint32_t)GetConstantInt32(arg0VN));
9452+
break;
9453+
9454+
default:
9455+
unreached();
9456+
}
9457+
}
9458+
else
9459+
{
9460+
unreached();
9461+
}
94129462

94139463
return VNForIntCon(res);
94149464
}
94159465
}
94169466
else
94179467
{
94189468
assert((typ == TYP_DOUBLE) || (typ == TYP_FLOAT) ||
9419-
((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))));
9469+
((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))) ||
9470+
(((typ == TYP_INT) || (typ == TYP_LONG)) &&
9471+
((gtMathFN == NI_PRIMITIVE_LeadingZeroCount) || (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) ||
9472+
(gtMathFN == NI_PRIMITIVE_PopCount))));
94209473

94219474
VNFunc vnf = VNF_Boundary;
94229475
switch (gtMathFN)
@@ -9508,6 +9561,15 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
95089561
case NI_System_Math_Truncate:
95099562
vnf = VNF_Truncate;
95109563
break;
9564+
case NI_PRIMITIVE_LeadingZeroCount:
9565+
vnf = VNF_LeadingZeroCount;
9566+
break;
9567+
case NI_PRIMITIVE_TrailingZeroCount:
9568+
vnf = VNF_TrailingZeroCount;
9569+
break;
9570+
case NI_PRIMITIVE_PopCount:
9571+
vnf = VNF_PopCount;
9572+
break;
95119573
default:
95129574
unreached(); // the above are the only math intrinsics at the time of this writing.
95139575
}
@@ -12829,7 +12891,7 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree)
1282912891
vnStore->VNPUnpackExc(intrinsic->AsOp()->gtOp2->gtVNPair, &arg1VNP, &arg1VNPx);
1283012892
}
1283112893

12832-
if (IsMathIntrinsic(intrinsic->gtIntrinsicName))
12894+
if (IsMathIntrinsic(intrinsic->gtIntrinsicName) || IsBitCountingIntrinsic(intrinsic->gtIntrinsicName))
1283312895
{
1283412896
// GT_INTRINSIC is a currently a subtype of binary operators. But most of
1283512897
// the math intrinsics are actually unary operations.

0 commit comments

Comments
 (0)