Skip to content

Commit

Permalink
JIT: Support more scaled addressing modes on arm64 (#97921)
Browse files Browse the repository at this point in the history
We currently support scaled addressing modes when the index also needs
an extension through contained `BFIZ` nodes. However, we did not support
scaled addressing modes if the index was 64 bits. This adds that
support as a natural extension to the `GT_LEA`.
  • Loading branch information
jakobbotsch authored Feb 5, 2024
1 parent 7c90c57 commit 1ed67a5
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 88 deletions.
10 changes: 8 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,14 @@ class CodeGen final : public CodeGenInterface

// TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
// move it to Lower
virtual bool genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr);
virtual bool genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
unsigned* mulPtr,
ssize_t* cnsPtr);

#ifdef LATE_DISASM
virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs);
Expand Down
100 changes: 51 additions & 49 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ void CodeGen::genDefineInlineTempLabel(BasicBlock* label)
// Notes:
// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
// and if 'block' is a throw helper block with a non-zero stack level.

//
void CodeGen::genAdjustStackLevel(BasicBlock* block)
{
#if !FEATURE_FIXED_OUT_ARGS
Expand Down Expand Up @@ -1003,35 +1003,33 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block)
#endif // !FEATURE_FIXED_OUT_ARGS
}

/*****************************************************************************
*
* Take an address expression and try to find the best set of components to
* form an address mode; returns non-zero if this is successful.
*
* TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
* Refactor this code so that the underlying analysis can be used in
* the RyuJIT Backend to do lowering, instead of having to call this method with the
* option to not generate the code.
*
* 'fold' specifies if it is OK to fold the array index which hangs off
* a GT_NOP node.
*
* If successful, the parameters will be set to the following values:
*
* *rv1Ptr ... base operand
* *rv2Ptr ... optional operand
* *revPtr ... true if rv2 is before rv1 in the evaluation order
* *mulPtr ... optional multiplier (2/4/8) for rv2
* Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
* *cnsPtr ... integer constant [optional]
*
* IMPORTANT NOTE: This routine doesn't generate any code, it merely
* identifies the components that might be used to
* form an address mode later on.
*/

bool CodeGen::genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr)
//------------------------------------------------------------------------
// genCreateAddrMode:
// Take an address expression and try to find the best set of components to
// form an address mode; returns true if this is successful.
//
// Parameters:
// addr - Tree that potentially computes an address
// fold - Secifies if it is OK to fold the array index which hangs off a GT_NOP node.
// naturalMul - For arm64 specifies the natural multiplier for the address mode (i.e. the size of the parent
// indirection).
// revPtr - [out] True if rv2 is before rv1 in the evaluation order
// rv1Ptr - [out] Base operand
// rv2Ptr - [out] Optional operand
// mulPtr - [out] Optional multiplier for rv2. If non-zero and naturalMul is non-zero, it must match naturalMul.
// cnsPtr - [out] Integer constant [optional]
//
// Returns:
// True if some address mode components were extracted.
//
bool CodeGen::genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
unsigned* mulPtr,
ssize_t* cnsPtr)
{
/*
The following indirections are valid address modes on x86/x64:
Expand Down Expand Up @@ -1171,8 +1169,7 @@ bool CodeGen::genCreateAddrMode(

goto AGAIN;

#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
// TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_MUL:
if (op1->gtOverflow())
{
Expand All @@ -1182,10 +1179,11 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op1->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op1->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
mul = mulCandidate;
/* We can use "[mul*rv2 + icon]" */

rv1 = nullptr;
Expand All @@ -1194,7 +1192,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
}

default:
break;
Expand All @@ -1215,8 +1213,8 @@ bool CodeGen::genCreateAddrMode(

switch (op1->gtOper)
{
#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
#ifdef TARGET_XARCH
// TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_ADD:

if (op1->gtOverflow())
Expand All @@ -1237,6 +1235,7 @@ bool CodeGen::genCreateAddrMode(
}
}
break;
#endif // TARGET_XARCH

case GT_MUL:

Expand All @@ -1248,19 +1247,20 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op1->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op1->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
/* 'op1' is a scaled value */
mul = mulCandidate;

rv1 = op2;
rv2 = op1->AsOp()->gtOp1;

int argScale;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
if (jitIsScaleIndexMul(argScale * mul, naturalMul))
{
mul = mul * argScale;
rv2 = rv2->AsOp()->gtOp1;
Expand All @@ -1277,7 +1277,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64
}

case GT_COMMA:

Expand All @@ -1291,7 +1291,7 @@ bool CodeGen::genCreateAddrMode(
noway_assert(op2);
switch (op2->gtOper)
{
#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
#ifdef TARGET_XARCH
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
// arm doesn't support both scale and offset at the same. Offset is handled
// at the emitter as a peephole optimization.
Expand All @@ -1315,6 +1315,7 @@ bool CodeGen::genCreateAddrMode(
goto AGAIN;
}
break;
#endif // TARGET_XARCH

case GT_MUL:

Expand All @@ -1326,16 +1327,17 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op2->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op2->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
mul = mulCandidate;
// 'op2' is a scaled value...is it's argument also scaled?
int argScale;
rv2 = op2->AsOp()->gtOp1;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
if (jitIsScaleIndexMul(argScale * mul, naturalMul))
{
mul = mul * argScale;
rv2 = rv2->AsOp()->gtOp1;
Expand All @@ -1351,7 +1353,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
}

case GT_COMMA:

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class CodeGenInterface
// move it to Lower
virtual bool genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
Expand Down
44 changes: 42 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,45 @@ inline unsigned __int64 BitsBetween(unsigned __int64 value, unsigned __int64 end
(end - 1); // Ones to the right of set bit in the end mask.
}

/*****************************************************************************/

//------------------------------------------------------------------------------
// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier
// amount for x64.
//
// Parameters:
// val - The multiplier
//
// Returns:
// True if value is 1, 2, 4 or 8.
//
inline bool jitIsScaleIndexMul(size_t val)
{
// TODO-Cleanup: On arm64 the scale that can be used in addressing modes
// depends on the containing indirection, so this function should be reevaluated.
switch (val)
{
case 1:
case 2:
case 4:
case 8:
return true;
default:
return false;
}
}

//------------------------------------------------------------------------------
// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier amount.
//
// Parameters:
// val - The multiplier
// naturalMul - For arm64, the natural multiplier (size of containing indirection)
//
// Returns:
// True if the multiplier can be used in an address mode.
//
inline bool jitIsScaleIndexMul(size_t val, unsigned naturalMul)
{
#if defined(TARGET_XARCH)
switch (val)
{
case 1:
Expand All @@ -211,6 +246,11 @@ inline bool jitIsScaleIndexMul(size_t val)
default:
return false;
}
#elif defined(TARGET_ARM64)
return val == naturalMul;
#else
return false;
#endif
}

// Returns "tree" iff "val" is a valid addressing mode scale shift amount on
Expand Down
27 changes: 14 additions & 13 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4538,20 +4538,21 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
GenTree* base; // This is the base of the address.
GenTree* idx; // This is the index.

if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, &mul, &cns))
{
unsigned naturalMul = 0;
#ifdef TARGET_ARM64
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
naturalMul = genTypeSize(type);
#endif

#ifdef TARGET_ARMARCH
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
if ((mul > 0) && (genTypeSize(type) != mul))
{
return false;
}
if (codeGen->genCreateAddrMode(addr, false /*fold*/, naturalMul, &rev, &base, &idx, &mul, &cns))
{
#ifdef TARGET_ARM64
assert((mul == 0) || (mul == 1) || (mul == naturalMul));
#endif

// We can form a complex addressing mode, so mark each of the interior
Expand Down
43 changes: 21 additions & 22 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6135,14 +6135,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
ssize_t offset = 0;
bool rev = false;

var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF;

unsigned naturalMul = 0;
#ifdef TARGET_ARM64
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
naturalMul = genTypeSize(targetType);
#endif

// Find out if an addressing mode can be constructed
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
&rev, // reverse ops
&base, // base addr
&index, // index val
&scale, // scaling
&offset); // displacement
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
naturalMul, // natural multiplier
&rev, // reverse ops
&base, // base addr
&index, // index val
&scale, // scaling
&offset); // displacement

#ifdef TARGET_ARM64
if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile())
Expand All @@ -6158,21 +6172,6 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
}
#endif

var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF;

#ifdef TARGET_ARMARCH
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
if ((scale > 0) && (genTypeSize(targetType) != scale))
{
return false;
}
#endif

if (scale == 0)
{
scale = 1;
Expand Down

0 comments on commit 1ed67a5

Please sign in to comment.