Skip to content

Commit

Permalink
Improve the codegen of the vector accelerated System.Numerics.* typ…
Browse files Browse the repository at this point in the history
…es (#81335)

* Allow Quaternion and Plane to be imported as TYP_SIMD16

* Add some minimal intrinsification of Quaternion and Plane

* Ensure Vector indexers are marked readonly

* Ensure the vector constant properties are intrinsic

* Ensure that the vector GetElement and WithElement APIs are intrinsic

* Ensure vector division by scalar is intrinsic

* Minor cleanup to vector files

* Ensure vector arithmetic functions are consistently intrinsic

* Ensure creating a plane from a vector4 is intrinsic

* Ensure accessing the Normal field of a Plane is efficient

* Ensure Quaternion and Plane return the correct SimdAsHWIntrinsicClassId

* Specially optimize Create(Dot(..., ...)) and Create(Sqrt(Dot(..., ...)))

* Ensure vector clamp is intrinsic

* Ensure vector lerp is intrinsic

* Ensure vector length is intrinsic

* Ensure vector normalize is intrinsic

* Ensure vector distance is intrinsic

* Optimize the vector transform by matrix methods

* Ensure quaternion conjugate and inverse are intrinsic

* Fixing assert, formatting, and build failure

* Ensure Quaternion.Inverse uses LengthSquared not Length

* Ensure Create APIs are correctly imported as intrinsic

* Ensure we don't assert for AltJit

* Ensure lowering DotProd doesn't break CSE for scalar vs vector results

* Minimally fixup Mono for the new intrinsics

* Ensure SN_GetElement doesn't raise an assert

* Ensure get_UnitW is ordered correctly for Mono

* Try to fix SN_GetElement

* Fix SN_WithElement for Mono

* Resolving mono formatting feedback
  • Loading branch information
tannergooding authored Feb 2, 2023
1 parent 4354abd commit 90401dd
Show file tree
Hide file tree
Showing 25 changed files with 1,686 additions and 570 deletions.
76 changes: 70 additions & 6 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8417,6 +8417,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
CORINFO_CLASS_HANDLE SIMDNIntHandle;
CORINFO_CLASS_HANDLE SIMDNUIntHandle;

CORINFO_CLASS_HANDLE SIMDPlaneHandle;
CORINFO_CLASS_HANDLE SIMDQuaternionHandle;
CORINFO_CLASS_HANDLE SIMDVector2Handle;
CORINFO_CLASS_HANDLE SIMDVector3Handle;
CORINFO_CLASS_HANDLE SIMDVector4Handle;
Expand Down Expand Up @@ -8494,23 +8496,54 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
switch (simdType)
{
case TYP_SIMD8:
{
return m_simdHandleCache->SIMDVector2Handle;
}

case TYP_SIMD12:
{
return m_simdHandleCache->SIMDVector3Handle;
}

case TYP_SIMD16:
if ((getSIMDVectorType() == TYP_SIMD32) ||
(m_simdHandleCache->SIMDVector4Handle != NO_CLASS_HANDLE))
{
// We order the checks roughly by expected hit count so early exits are possible

if (simdBaseJitType != CORINFO_TYPE_FLOAT)
{
// We could be Vector<T>, so handle below
assert(getSIMDVectorType() == TYP_SIMD16);
break;
}

if (m_simdHandleCache->SIMDVector4Handle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDVector4Handle;
}
break;

if (m_simdHandleCache->SIMDQuaternionHandle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDQuaternionHandle;
}

if (m_simdHandleCache->SIMDPlaneHandle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDPlaneHandle;
}

return NO_CLASS_HANDLE;
}

case TYP_SIMD32:
break;

default:
unreached();
}
}

assert(emitTypeSize(simdType) <= largestEnregisterableStructSize());

switch (simdBaseJitType)
{
case CORINFO_TYPE_FLOAT:
Expand Down Expand Up @@ -8540,6 +8573,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
default:
assert(!"Didn't find a class handle for simdType");
}

return NO_CLASS_HANDLE;
}

Expand Down Expand Up @@ -8617,9 +8651,39 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// actually be declared as having fields.
bool isOpaqueSIMDType(CORINFO_CLASS_HANDLE structHandle) const
{
return ((m_simdHandleCache != nullptr) && (structHandle != m_simdHandleCache->SIMDVector2Handle) &&
(structHandle != m_simdHandleCache->SIMDVector3Handle) &&
(structHandle != m_simdHandleCache->SIMDVector4Handle));
// We order the checks roughly by expected hit count so early exits are possible

if (m_simdHandleCache == nullptr)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector4Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector3Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector2Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDQuaternionHandle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDPlaneHandle)
{
return false;
}

return true;
}

// Returns true if the lclVar is an opaque SIMD type.
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21407,7 +21407,6 @@ GenTree* Compiler::gtNewSimdDotProdNode(var_types type,
bool isSimdAsHWIntrinsic)
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(varTypeIsArithmetic(type));

var_types simdType = getSIMDTypeForSize(simdSize);
assert(varTypeIsSIMD(simdType));
Expand All @@ -21419,7 +21418,9 @@ GenTree* Compiler::gtNewSimdDotProdNode(var_types type,
assert(op2->TypeIs(simdType));

var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(JITtype2varType(simdBaseJitType) == type);

// We support the return type being a SIMD for floating-point as a special optimization
assert(varTypeIsArithmetic(type) || (varTypeIsSIMD(type) && varTypeIsFloating(simdBaseType)));

NamedIntrinsic intrinsic = NI_Illegal;

Expand Down
88 changes: 69 additions & 19 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1156,16 +1156,32 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
break;

#ifdef FEATURE_HW_INTRINSICS
// We have two cases we want to handle:
// 1. Vector2/3/4 and Quaternion where we have 4x float fields
// 2. Plane where we have 1x Vector3 and 1x float field

case IndirTransform::GetElement:
{
GenTree* hwiNode = nullptr;
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);
lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);

if (elementType == TYP_FLOAT)
{
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT,
genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ true);
}
else
{
assert(elementType == TYP_SIMD12);
assert(genTypeSize(varDsc) == 16);
hwiNode =
m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3,
CORINFO_TYPE_FLOAT, 16, /* isSimdAsHWIntrinsic */ true);
}

lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
indir = hwiNode;
*val.Use() = hwiNode;
}
Expand All @@ -1174,17 +1190,35 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
case IndirTransform::WithElement:
{
assert(user->OperIs(GT_ASG) && (user->gtGetOp1() == indir));

GenTree* hwiNode = nullptr;
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);

lclNode = BashToLclVar(indir, lclNum);
GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* elementNode = user->gtGetOp2();
user->AsOp()->gtOp2 =
m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
lclNode = BashToLclVar(indir, lclNum);
GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* elementNode = user->gtGetOp2();

if (elementType == TYP_FLOAT)
{
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode,
elementNode, CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ true);
}
else
{
assert(elementType == TYP_SIMD12);
assert(varDsc->TypeGet() == TYP_SIMD16);

// We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the
// new value. This gives us a new TYP_SIMD16 with all elements in the right spots

GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT);
hwiNode =
m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode,
CORINFO_TYPE_FLOAT, 16, /* isSimdAsHWIntrinsic */ true);
}

user->AsOp()->gtOp2 = hwiNode;
user->ChangeType(varDsc->TypeGet());
}
break;
Expand Down Expand Up @@ -1300,10 +1334,26 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
}

#ifdef FEATURE_HW_INTRINSICS
if (varTypeIsSIMD(varDsc) && indir->TypeIs(TYP_FLOAT) && ((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) &&
m_compiler->IsBaselineSimdIsaSupported())
if (varTypeIsSIMD(varDsc))
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
// We have two cases we want to handle:
// 1. Vector2/3/4 and Quaternion where we have 4x float fields
// 2. Plane where we have 1x Vector3 and 1x float field

if (indir->TypeIs(TYP_FLOAT))
{
if (((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
else if (indir->TypeIs(TYP_SIMD12))
{
if ((val.Offset() == 0) && m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
38 changes: 28 additions & 10 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,9 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);

// We support the return type being a SIMD for floating-point as a special optimization
assert(varTypeIsArithmetic(node) || (varTypeIsSIMD(node) && varTypeIsFloating(simdBaseType)));

GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);

Expand Down Expand Up @@ -1859,19 +1862,34 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
}
}

// We will be constructing the following parts:
// ...
// /--* tmp2 simd16
// node = * HWINTRINSIC simd16 T ToScalar
if (varTypeIsSIMD(node->gtType))
{
// We're producing a vector result, so just return the result directly

// This is roughly the following managed code:
// ...
// return tmp2.ToScalar();
LIR::Use use;

node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
if (BlockRange().TryGetUse(node, &use))
{
use.ReplaceWith(tmp2);
}

LowerNode(node);
return node->gtNext;
BlockRange().Remove(node);
return tmp2->gtNext;
}
else
{
// We will be constructing the following parts:
// ...
// /--* tmp2 simd16
// node = * HWINTRINSIC simd16 T ToScalar

// This is roughly the following managed code:
// ...
// return tmp2.ToScalar();

node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
return LowerNode(node);
}
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
Loading

0 comments on commit 90401dd

Please sign in to comment.