Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64: Implement LoadVector64x*AndUnzip and LoadVector128x*AndUnzip APIs #94128

Merged
merged 16 commits into from
Oct 30, 2023
6 changes: 6 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25852,31 +25852,37 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
return compiler->typGetBlkLayout(16);

case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
return compiler->typGetBlkLayout(32);

case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
return compiler->typGetBlkLayout(24);

case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return compiler->typGetBlkLayout(48);

case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
Expand All @@ -776,6 +778,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
return 2;

case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
Expand All @@ -784,6 +788,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return 3;

case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1883,6 +1883,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadVector64x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x3,
HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x4, 8, 1, true, {INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_invalid, INS_invalid, INS_ld4r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2AndUnzip, 8, 1, true, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_invalid, INS_invalid, INS_ld1_2regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3AndUnzip, 8, 1, true, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_invalid, INS_invalid, INS_ld1_3regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4AndUnzip, 8, 1, true, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_invalid, INS_invalid, INS_ld1_4regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2, 8, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3, 8, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4, 8, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
Expand Down Expand Up @@ -589,6 +592,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalar,
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x2, 16, 3, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x3, 16, 3, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x4, 16, 3, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x2AndUnzip, 16, 1, true, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x3AndUnzip, 16, 1, true, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x4AndUnzip, 16, 1, true, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x2, 16, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x3, 16, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x4, 16, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1632,6 +1632,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
assert(intrinsicTree->OperIsMemoryLoadOrStore());
srcCount += BuildAddrUses(intrin.op3);
FALLTHROUGH;
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadVector64x4:
Expand Down
Loading
Loading