Skip to content

Commit

Permalink
Arm64: Implement LoadVector64x*AndUnzip and LoadVector128x*AndUnzip A…
Browse files Browse the repository at this point in the history
…PIs (#94128)

* Initial LoadVector64/128AndUnzip APIs

* Added hwintrinsics

* Added generating tests

* Fix semi

* Whitespace

* Whitespace

* Whitespace

* Whitespace

* Whitespace

* Whitespace

* Fix test generation

* fix missing cases in gentree

* Minor formatting of the test generator

* Fix comments

* Fixing up tests

* Use the right register variants
  • Loading branch information
TIHan authored Oct 30, 2023
1 parent d58dd91 commit 4b5756d
Show file tree
Hide file tree
Showing 9 changed files with 643 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25852,31 +25852,37 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
case NI_AdvSimd_LoadAndReplicateToVector64x2:
return compiler->typGetBlkLayout(16);

case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_LoadAndReplicateToVector64x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
return compiler->typGetBlkLayout(32);

case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
case NI_AdvSimd_LoadAndReplicateToVector64x3:
return compiler->typGetBlkLayout(24);

case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return compiler->typGetBlkLayout(48);

case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_Arm64_LoadVector128x2:
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
Expand All @@ -776,6 +778,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
return 2;

case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_Arm64_LoadVector128x3:
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
Expand All @@ -784,6 +788,8 @@ struct HWIntrinsicInfo
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
return 3;

case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x4:
case NI_AdvSimd_Arm64_LoadVector128x4:
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1883,6 +1883,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadVector64x4:
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x3,
HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x4, 8, 1, true, {INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_invalid, INS_invalid, INS_ld4r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2AndUnzip, 8, 1, true, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_invalid, INS_invalid, INS_ld1_2regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3AndUnzip, 8, 1, true, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_invalid, INS_invalid, INS_ld1_3regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4AndUnzip, 8, 1, true, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_invalid, INS_invalid, INS_ld1_4regs, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2, 8, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3, 8, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4, 8, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
Expand Down Expand Up @@ -589,6 +592,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalar,
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x2, 16, 3, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x3, 16, 3, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadAndInsertScalarVector128x4, 16, 3, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x2AndUnzip, 16, 1, true, {INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs, INS_ld1_2regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x3AndUnzip, 16, 1, true, {INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs, INS_ld1_3regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x4AndUnzip, 16, 1, true, {INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs, INS_ld1_4regs}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x2, 16, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x3, 16, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, LoadVector128x4, 16, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1632,6 +1632,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
assert(intrinsicTree->OperIsMemoryLoadOrStore());
srcCount += BuildAddrUses(intrin.op3);
FALLTHROUGH;
case NI_AdvSimd_LoadVector64x2AndUnzip:
case NI_AdvSimd_LoadVector64x3AndUnzip:
case NI_AdvSimd_LoadVector64x4AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x2AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x3AndUnzip:
case NI_AdvSimd_Arm64_LoadVector128x4AndUnzip:
case NI_AdvSimd_LoadVector64x2:
case NI_AdvSimd_LoadVector64x3:
case NI_AdvSimd_LoadVector64x4:
Expand Down
Loading

0 comments on commit 4b5756d

Please sign in to comment.