Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement StoreSelectedScalar for Arm64 #93223

Merged
merged 24 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
621dc68
Implement StoreSelectedScalar128x2 for Arm64
SwapnilGaikwad Oct 6, 2023
f976275
Fix the xml tags in a comment
SwapnilGaikwad Oct 9, 2023
40d8c9d
Fix formatting issues
SwapnilGaikwad Oct 9, 2023
85a7e24
Rename StoreSelectedScalarNx2 to StoreSelectedScalar
SwapnilGaikwad Oct 16, 2023
2768c81
Implement StoreSelectedScalarNx3 & StoreSelectedScalarNx4
SwapnilGaikwad Oct 16, 2023
5d33bab
Merge main
SwapnilGaikwad Oct 16, 2023
174fd93
Refactor ST2 emit
SwapnilGaikwad Oct 16, 2023
9154a89
Incorporate review comments
SwapnilGaikwad Oct 16, 2023
4eb913c
Fix System.Runtime.Intrinsics for Vector64
SwapnilGaikwad Oct 16, 2023
7341055
Combine separate Nx2, Nx3 & Nx4 into StoreSelectedScalar
SwapnilGaikwad Oct 22, 2023
c53784d
Merge main
SwapnilGaikwad Oct 24, 2023
3307ce2
Incorporate review comments
SwapnilGaikwad Oct 24, 2023
3592685
Refactor - review comments
SwapnilGaikwad Oct 24, 2023
ca7223b
Fix build issues
SwapnilGaikwad Oct 24, 2023
2f0f7d7
Move StoreSelectedScalar128x1 to AdvSimd.Arm64
SwapnilGaikwad Oct 25, 2023
1254d34
Merge main
SwapnilGaikwad Oct 31, 2023
ffa04a2
Move back StoreSelectedScalar for a single vector from AdvSimd.Arm64 …
SwapnilGaikwad Oct 31, 2023
33d4634
Add out of bounds check
SwapnilGaikwad Nov 1, 2023
a1e1364
Move index out of range check after recomputing the simdSize
SwapnilGaikwad Nov 2, 2023
142bcc0
Use assert protected range check
SwapnilGaikwad Nov 2, 2023
ba94c36
Merge main
SwapnilGaikwad Nov 6, 2023
962bd4d
Fix build failures
SwapnilGaikwad Nov 14, 2023
29f19aa
Merge main
SwapnilGaikwad Nov 15, 2023
46b5e34
Fix rebase misses
SwapnilGaikwad Nov 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ void HWIntrinsicInfo::lookupImmBounds(
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
case NI_AdvSimd_Arm64_InsertSelectedScalar:
immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
Expand Down Expand Up @@ -1796,6 +1797,65 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
assert(sig->numArgs == 3);
assert(retType == TYP_VOID);

CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
var_types argType = TYP_UNKNOWN;
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
op3 = impPopStack().val;
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = impPopStack().val;
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
int immLowerBound = 0;
int immUpperBound = 0;

if (op2->TypeGet() == TYP_STRUCT)
SwapnilGaikwad marked this conversation as resolved.
Show resolved Hide resolved
{
info.compNeedsConsecutiveRegisters = true;

if (!op2->OperIs(GT_LCL_VAR))
{
unsigned tmp = lvaGrabTemp(true DEBUGARG("StoreSelectedScalarN"));

impStoreTemp(tmp, op2, CHECK_SPILL_NONE);
op2 = gtNewLclvNode(tmp, argType);
}
op2 = gtConvertTableOpToFieldList(op2, fieldCount);
}
else
{
// While storing from a single vector, both Vector128 and Vector64 API calls are in AdvSimd class.
// Thus, we get simdSize as 8 for both of the calls. We re-calculate that simd size for such API calls.
getBaseJitTypeAndSizeOfSIMDType(argClass, &simdSize);
}

assert(HWIntrinsicInfo::isImmOp(intrinsic, op3));
HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, &immLowerBound, &immUpperBound);
op3 = addRangeCheckIfNeeded(intrinsic, op3, (!op3->IsCnsIntOrI()), immLowerBound, immUpperBound);
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);

if (op1->OperIs(GT_CAST))
{
// Although the API specifies a pointer, if what we have is a BYREF, that's what
// we really want, so throw away the cast.
if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
{
op1 = op1->gtGetOp1();
}
}

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_Vector64_Sum:
case NI_Vector128_Sum:
{
Expand Down
69 changes: 58 additions & 11 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,52 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl;
break;

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
unsigned regCount = 0;
if (intrin.op2->OperIsFieldList())
{
GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op2Reg = firstField->GetRegNum();

INDEBUG(regNumber argReg = op2Reg);
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;
#ifdef DEBUG
GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = REG_NEXT(argReg);
#endif
}
}
else
{
regCount = 1;
}

switch (regCount)
{
case 1:
ins = INS_st1;
break;
case 2:
ins = INS_st2;
break;
case 3:
ins = INS_st3;
break;
case 4:
ins = INS_st4;
break;
default:
unreached();
}
break;
}

default:
ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType);
break;
Expand Down Expand Up @@ -773,7 +819,18 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
op1Reg);
break;

case NI_AdvSimd_Arm64_StorePair:
case NI_AdvSimd_Arm64_StorePairNonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitSize, op2Reg, op3Reg, op1Reg);
break;

case NI_AdvSimd_Arm64_StorePairScalar:
case NI_AdvSimd_Arm64_StorePairScalarNonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg);
break;

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
HWIntrinsicImmOpHelper helper(this, intrin.op3, node);

Expand All @@ -783,18 +840,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

GetEmitter()->emitIns_R_R_I(ins, emitSize, op2Reg, op1Reg, elementIndex, opt);
}
}
break;

case NI_AdvSimd_Arm64_StorePair:
case NI_AdvSimd_Arm64_StorePairNonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitSize, op2Reg, op3Reg, op1Reg);
break;

case NI_AdvSimd_Arm64_StorePairScalar:
case NI_AdvSimd_Arm64_StorePairScalarNonTemporal:
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg);
break;
}

case NI_AdvSimd_StoreVector64x2AndZip:
case NI_AdvSimd_StoreVector64x3AndZip:
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower,
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2AndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3AndZip, 8, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4AndZip, 8, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
Expand Down Expand Up @@ -676,6 +676,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair,
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3035,6 +3035,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_ExtractVector64:
case NI_AdvSimd_ExtractVector128:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
26 changes: 26 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_ExtractVector64:
case NI_AdvSimd_ExtractVector128:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down Expand Up @@ -1577,16 +1578,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
case NI_AdvSimd_VectorTableLookup:
case NI_AdvSimd_Arm64_VectorTableLookup:
{
assert(intrin.op2 != nullptr);
srcCount += BuildOperandUses(intrin.op2);
assert(dstCount == 1);
buildInternalRegisterUses();
BuildDef(intrinsicTree);
*pDstCount = 1;
break;
}

case NI_AdvSimd_VectorTableLookupExtension:
case NI_AdvSimd_Arm64_VectorTableLookupExtension:
{
assert(intrin.op2 != nullptr);
assert(intrin.op3 != nullptr);
assert(isRMW);
Expand All @@ -1597,6 +1601,23 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
BuildDef(intrinsicTree);
*pDstCount = 1;
break;
}

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
assert(intrin.op1 != nullptr);
assert(intrin.op3 != nullptr);
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
if (!intrin.op3->isContainedIntOrIImmed())
SwapnilGaikwad marked this conversation as resolved.
Show resolved Hide resolved
{
srcCount += BuildOperandUses(intrin.op3);
}
assert(dstCount == 0);
buildInternalRegisterUses();
SwapnilGaikwad marked this conversation as resolved.
Show resolved Hide resolved
*pDstCount = 0;
break;
}

case NI_AdvSimd_StoreVector64x2AndZip:
case NI_AdvSimd_StoreVector64x3AndZip:
Expand All @@ -1610,19 +1631,22 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_Arm64_StoreVector128x2:
case NI_AdvSimd_Arm64_StoreVector128x3:
case NI_AdvSimd_Arm64_StoreVector128x4:
{
assert(intrin.op1 != nullptr);
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
assert(dstCount == 0);
buildInternalRegisterUses();
*pDstCount = 0;
break;
}

case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
{
assert(intrin.op2 != nullptr);
assert(intrin.op3 != nullptr);
assert(isRMW);
Expand All @@ -1634,6 +1658,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
assert(intrinsicTree->OperIsMemoryLoadOrStore());
srcCount += BuildAddrUses(intrin.op3);
FALLTHROUGH;
}

case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x4AndUnzip:
Expand Down
Loading
Loading