Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4725,8 +4725,8 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
GenTree* op1 = treeNode->gtGetOp1();
GenTree* op2 = treeNode->gtGetOp2IfPresent();

emitAttr size = emitActualTypeSize(treeNode);
bool is4 = (size == 4);
emitAttr size = emitActualTypeSize(op1);
bool is4 = (size == EA_4BYTE);

instruction instr = INS_invalid;
switch (treeNode->gtIntrinsicName)
Expand All @@ -4744,6 +4744,15 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
case NI_System_Math_MaxNumber:
instr = is4 ? INS_fmax_s : INS_fmax_d;
break;
case NI_PRIMITIVE_LeadingZeroCount:
instr = is4 ? INS_clzw : INS_clz;
break;
case NI_PRIMITIVE_TrailingZeroCount:
instr = is4 ? INS_ctzw : INS_ctz;
break;
case NI_PRIMITIVE_PopCount:
instr = is4 ? INS_cpopw : INS_cpop;
break;
default:
NO_WAY("Unknown intrinsic");
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4850,7 +4850,7 @@ class Compiler
bool IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName);
bool IsTargetIntrinsic(NamedIntrinsic intrinsicName);
bool IsMathIntrinsic(NamedIntrinsic intrinsicName);
bool IsMathIntrinsic(GenTree* tree);
bool IsBitCountingIntrinsic(NamedIntrinsic intrinsicName);

private:
//----------------- Importing the method ----------------------------------
Expand Down
58 changes: 39 additions & 19 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ void emitter::emitIns_R_R(
{
code_t code = emitInsCode(ins);

if (INS_mov == ins || INS_sext_w == ins)
if (INS_mov == ins || INS_sext_w == ins || (INS_clz <= ins && ins <= INS_cpopw))
{
assert(isGeneralRegisterOrR0(reg1));
assert(isGeneralRegisterOrR0(reg2));
Expand Down Expand Up @@ -3805,18 +3805,30 @@ void emitter::emitDispInsName(
hasImmediate = false;
}
break;
case 0x1: // SLLI
case 0x1:
{
static constexpr unsigned kSlliFunct6 = 0b000000;

unsigned funct6 = (imm12 >> 6) & 0x3f;
// SLLI's instruction code's upper 6 bits have to be equal to zero
if (funct6 != kSlliFunct6)
unsigned shamt = imm12 & 0x3f; // 6 BITS for SHAMT in RISCV64
switch (funct6)
{
return emitDispIllegalInstruction(code);
case 0b011000:
static const char* names[] = {"clz", "ctz", "cpop"};
// shift amount is treated as additional funct opcode
if (shamt >= ARRAY_SIZE(names))
return emitDispIllegalInstruction(code);

printLength = printf("%s", names[shamt]);
hasImmediate = false;
break;

case 0b000000:
printLength = printf("slli");
imm12 = shamt;
break;

default:
return emitDispIllegalInstruction(code);
}
printLength = printf("slli");
imm12 &= 0x3f; // 6 BITS for SHAMT in RISCV64
}
break;
case 0x2: // SLTI
Expand Down Expand Up @@ -3891,19 +3903,27 @@ void emitter::emitDispInsName(
emitDispImmediate(imm12);
}
return;
case 0x1: // SLLIW
case 0x1:
{
static constexpr unsigned kSlliwFunct7 = 0b0000000;

unsigned funct7 = (imm12 >> 5) & 0x7f;
// SLLIW's instruction code's upper 7 bits have to be equal to zero
if (funct7 == kSlliwFunct7)
{
printf("slliw %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64
}
else
unsigned shamt = imm12 & 0x1f; // 5 BITS for SHAMT in RISCV64
switch (funct7)
{
emitDispIllegalInstruction(code);
case 0b0110000:
static const char* names[] = {"clzw ", "ctzw ", "cpopw"};
// shift amount is treated as funct additional opcode bits
if (shamt >= ARRAY_SIZE(names))
return emitDispIllegalInstruction(code);

printf("%s %s, %s\n", names[shamt], rd, rs1);
return;

case 0b0000000:
printf("slliw %s, %s, %d\n", rd, rs1, shamt);
return;

default:
return emitDispIllegalInstruction(code);
}
}
return;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5416,6 +5416,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
case NI_System_Math_Tan:
case NI_System_Math_Tanh:
case NI_System_Math_Truncate:
case NI_PRIMITIVE_LeadingZeroCount:
case NI_PRIMITIVE_TrailingZeroCount:
case NI_PRIMITIVE_PopCount:
{
// Giving intrinsics a large fixed execution cost is because we'd like to CSE
// them, even if they are implemented by calls. This is different from modeling
Expand Down
45 changes: 40 additions & 5 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5733,7 +5733,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
}
#endif // !TARGET_64BIT

#if defined(FEATURE_HW_INTRINSICS)
#ifdef TARGET_RISCV64
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
{
impPopStack();
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_LeadingZeroCount,
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
}
#elif defined(FEATURE_HW_INTRINSICS)
#if defined(TARGET_XARCH)
if (compOpportunisticallyDependsOn(InstructionSet_LZCNT))
{
Expand Down Expand Up @@ -5909,7 +5916,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
}
#endif // !TARGET_64BIT

#if defined(FEATURE_HW_INTRINSICS)
#ifdef TARGET_RISCV64
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
{
impPopStack();
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_PopCount,
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
}
#elif defined(FEATURE_HW_INTRINSICS)
#if defined(TARGET_XARCH)
if (compOpportunisticallyDependsOn(InstructionSet_POPCNT))
{
Expand Down Expand Up @@ -6066,7 +6080,14 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic,
}
#endif // !TARGET_64BIT

#if defined(FEATURE_HW_INTRINSICS)
#ifdef TARGET_RISCV64
if (compOpportunisticallyDependsOn(InstructionSet_Zbb))
{
impPopStack();
result = new (this, GT_INTRINSIC) GenTreeIntrinsic(retType, op1, NI_PRIMITIVE_TrailingZeroCount,
nullptr R2RARG(CORINFO_CONST_LOOKUP{IAT_VALUE}));
}
#elif defined(FEATURE_HW_INTRINSICS)
#if defined(TARGET_XARCH)
if (compOpportunisticallyDependsOn(InstructionSet_BMI1))
{
Expand Down Expand Up @@ -7927,6 +7948,11 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
case NI_System_Math_ReciprocalSqrtEstimate:
return true;

case NI_PRIMITIVE_LeadingZeroCount:
case NI_PRIMITIVE_TrailingZeroCount:
case NI_PRIMITIVE_PopCount:
return compOpportunisticallyDependsOn(InstructionSet_Zbb);

default:
return false;
}
Expand Down Expand Up @@ -8024,9 +8050,18 @@ bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName)
}
}

bool Compiler::IsMathIntrinsic(GenTree* tree)
bool Compiler::IsBitCountingIntrinsic(NamedIntrinsic intrinsicName)
{
return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicName);
switch (intrinsicName)
{
case NI_PRIMITIVE_LeadingZeroCount:
case NI_PRIMITIVE_TrailingZeroCount:
case NI_PRIMITIVE_PopCount:
return true;

default:
return false;
}
}

//------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/instrsriscv64.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,15 @@ INST(amominu_w, "amominu.w", 0, 0xc000202f) // funct5:11000
INST(amominu_d, "amominu.d", 0, 0xc000302f) // funct5:11000
INST(amomaxu_w, "amomaxu.w", 0, 0xe000202f) // funct5:11100
INST(amomaxu_d, "amomaxu.d", 0, 0xe000302f) // funct5:11100

// Zbb (RV32 + RV64)
INST(clz, "clz", 0, 0x60001013)
INST(clzw, "clzw", 0, 0x6000101b)
INST(ctz, "ctz", 0, 0x60101013)
INST(ctzw, "ctzw", 0, 0x6010101b)
INST(cpop, "cpop", 0, 0x60201013)
INST(cpopw, "cpopw", 0, 0x6020101b)

// clang-format on
/*****************************************************************************/
#undef INST
Expand Down
37 changes: 29 additions & 8 deletions src/coreclr/jit/lsrariscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,20 +346,41 @@ int LinearScan::BuildNode(GenTree* tree)

case GT_INTRINSIC:
{
NamedIntrinsic name = tree->AsIntrinsic()->gtIntrinsicName;
noway_assert((name == NI_System_Math_Abs) || (name == NI_System_Math_Sqrt) ||
(name == NI_System_Math_MinNumber) || (name == NI_System_Math_MaxNumber));

// Both operand and its result must be of the same floating point type.
GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2IfPresent();
assert(varTypeIsFloating(op1));
assert(op1->TypeIs(tree->TypeGet()));

switch (tree->AsIntrinsic()->gtIntrinsicName)
{
// Both operands and its result must be of the same floating-point type.
case NI_System_Math_MinNumber:
case NI_System_Math_MaxNumber:
assert(op2 != nullptr);
assert(op2->TypeIs(tree->TypeGet()));
FALLTHROUGH;
case NI_System_Math_Abs:
case NI_System_Math_Sqrt:
assert(op1->TypeIs(tree->TypeGet()));
assert(varTypeIsFloating(tree));
break;

// Operand and its result must be integers
case NI_PRIMITIVE_LeadingZeroCount:
case NI_PRIMITIVE_TrailingZeroCount:
case NI_PRIMITIVE_PopCount:
assert(compiler->compOpportunisticallyDependsOn(InstructionSet_Zbb));
assert(op2 == nullptr);
assert(varTypeIsIntegral(op1));
assert(varTypeIsIntegral(tree));
break;

default:
NO_WAY("Unknown intrinsic");
}

BuildUse(op1);
srcCount = 1;
if (op2 != nullptr)
{
assert(op2->TypeIs(tree->TypeGet()));
BuildUse(op2);
srcCount++;
}
Expand Down
74 changes: 68 additions & 6 deletions src/coreclr/jit/valuenum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9133,7 +9133,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(
ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN)
{
assert(arg0VN == VNNormalValue(arg0VN));
assert(m_pComp->IsMathIntrinsic(gtMathFN));
assert(m_pComp->IsMathIntrinsic(gtMathFN) RISCV64_ONLY(|| m_pComp->IsBitCountingIntrinsic(gtMathFN)));

// If the math intrinsic is not implemented by target-specific instructions, such as implemented
// by user calls, then don't do constant folding on it during ReadyToRun. This minimizes precision loss.
Expand Down Expand Up @@ -9385,10 +9385,8 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
unreached();
}
}
else
else if (gtMathFN == NI_System_Math_Round)
{
assert(gtMathFN == NI_System_Math_Round);

switch (TypeOfVN(arg0VN))
{
case TYP_DOUBLE:
Expand All @@ -9409,14 +9407,69 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
unreached();
}
}
else if (gtMathFN == NI_PRIMITIVE_LeadingZeroCount)
{
switch (TypeOfVN(arg0VN))
{
case TYP_LONG:
res = BitOperations::LeadingZeroCount((uint64_t)GetConstantInt64(arg0VN));
break;

case TYP_INT:
res = BitOperations::LeadingZeroCount((uint32_t)GetConstantInt32(arg0VN));
break;

default:
unreached();
}
}
else if (gtMathFN == NI_PRIMITIVE_TrailingZeroCount)
{
switch (TypeOfVN(arg0VN))
{
case TYP_LONG:
res = BitOperations::TrailingZeroCount((uint64_t)GetConstantInt64(arg0VN));
break;

case TYP_INT:
res = BitOperations::TrailingZeroCount((uint32_t)GetConstantInt32(arg0VN));
break;

default:
unreached();
}
}
else if (gtMathFN == NI_PRIMITIVE_PopCount)
{
switch (TypeOfVN(arg0VN))
{
case TYP_LONG:
res = BitOperations::PopCount((uint64_t)GetConstantInt64(arg0VN));
break;

case TYP_INT:
res = BitOperations::PopCount((uint32_t)GetConstantInt32(arg0VN));
break;

default:
unreached();
}
}
else
{
unreached();
}

return VNForIntCon(res);
}
}
else
{
assert((typ == TYP_DOUBLE) || (typ == TYP_FLOAT) ||
((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))));
((typ == TYP_INT) && ((gtMathFN == NI_System_Math_ILogB) || (gtMathFN == NI_System_Math_Round))) ||
(((typ == TYP_INT) || (typ == TYP_LONG)) &&
((gtMathFN == NI_PRIMITIVE_LeadingZeroCount) || (gtMathFN == NI_PRIMITIVE_TrailingZeroCount) ||
(gtMathFN == NI_PRIMITIVE_PopCount))));

VNFunc vnf = VNF_Boundary;
switch (gtMathFN)
Expand Down Expand Up @@ -9508,6 +9561,15 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN
case NI_System_Math_Truncate:
vnf = VNF_Truncate;
break;
case NI_PRIMITIVE_LeadingZeroCount:
vnf = VNF_LeadingZeroCount;
break;
case NI_PRIMITIVE_TrailingZeroCount:
vnf = VNF_TrailingZeroCount;
break;
case NI_PRIMITIVE_PopCount:
vnf = VNF_PopCount;
break;
default:
unreached(); // the above are the only math intrinsics at the time of this writing.
}
Expand Down Expand Up @@ -12829,7 +12891,7 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree)
vnStore->VNPUnpackExc(intrinsic->AsOp()->gtOp2->gtVNPair, &arg1VNP, &arg1VNPx);
}

if (IsMathIntrinsic(intrinsic->gtIntrinsicName))
if (IsMathIntrinsic(intrinsic->gtIntrinsicName) || IsBitCountingIntrinsic(intrinsic->gtIntrinsicName))
{
// GT_INTRINSIC is a currently a subtype of binary operators. But most of
// the math intrinsics are actually unary operations.
Expand Down
Loading
Loading