diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
index 0da8b6dfcdec2..41fe81416b27a 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs
@@ -22,7 +22,8 @@ public static partial class TensorPrimitives
         /// If a value is equal to <see cref="float.NaN"/>, the result stored into the corresponding destination location is the original NaN value with the sign bit removed.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// </remarks>
         public static void Abs(ReadOnlySpan<float> x, Span<float> destination) =>
@@ -39,7 +40,9 @@ public static void Abs(ReadOnlySpan<float> x, Span<float> destination) =>
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -58,7 +61,9 @@ public static unsafe void Add(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] + <paramref name="y" /></c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="destination" /> may overlap, but only if they start at the same memory location;
+        /// otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters, such as to perform
+        /// an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -79,7 +84,9 @@ public static void Add(ReadOnlySpan<float> x, float y, Span<float> destination)
         /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/>, <paramref name="y"/>, and <paramref name="multiplier"/> may overlap, but none of them may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/>, <paramref name="y"/>, and <paramref name="multiplier"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If any of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -100,7 +107,9 @@ public static void AddMultiply(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Rea
         /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />[i]) * <paramref name="multiplier" /></c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If any of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -121,7 +130,9 @@ public static void AddMultiply(ReadOnlySpan<float> x, ReadOnlySpan<float> y, flo
         /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] + <paramref name="y" />) * <paramref name="multiplier" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="multiplier"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="multiplier"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If any of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -139,7 +150,8 @@ public static void AddMultiply(ReadOnlySpan<float> x, float y, ReadOnlySpan<floa
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Cosh(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value is equal to <see cref="float.NegativeInfinity"/> or <see cref="float.PositiveInfinity"/>, the result stored into the corresponding destination location is set to <see cref="float.PositiveInfinity"/>.
@@ -250,7 +262,9 @@ public static float Distance(ReadOnlySpan<float> x, ReadOnlySpan<float> y)
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -269,7 +283,8 @@ public static void Divide(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<flo
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] / <paramref name="y" /></c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -320,7 +335,8 @@ public static float Dot(ReadOnlySpan<float> x, ReadOnlySpan<float> y)
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Exp(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value equals <see cref="float.NaN"/> or <see cref="float.PositiveInfinity"/>, the result stored into the corresponding destination location is set to NaN.
@@ -559,7 +575,8 @@ public static unsafe int IndexOfMinMagnitude(ReadOnlySpan<float> x)
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Log(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="float.NegativeInfinity"/>.
@@ -594,7 +611,8 @@ public static void Log(ReadOnlySpan<float> x, Span<float> destination)
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Log2(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value equals 0, the result stored into the corresponding destination location is set to <see cref="float.NegativeInfinity"/>.
@@ -648,7 +666,9 @@ public static float Max(ReadOnlySpan<float> x) =>
         /// This method effectively computes <c><paramref name="destination" />[i] = MathF.Max(<paramref name="x" />[i], <paramref name="y" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// The determination of the maximum element matches the IEEE 754:2019 `maximum` function. If either value is equal to <see cref="float.NaN"/>,
@@ -689,12 +709,9 @@ public static float MaxMagnitude(ReadOnlySpan<float> x) =>
         /// <remarks>This method effectively computes <c><paramref name="destination" />[i] = MathF.MaxMagnitude(<paramref name="x" />[i], <paramref name="y" />[i])</c>.</remarks>
         /// <remarks>
         /// <para>
-        /// The determination of the maximum magnitude matches the IEEE 754:2019 `maximumMagnitude` function. If either value is equal to <see cref="float.NaN"/>,
-        /// that value is stored as the result. If the two values have the same magnitude and one is positive and the other is negative,
-        /// the positive value is considered to have the larger magnitude.
-        /// </para>
-        /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
@@ -736,7 +753,9 @@ public static float Min(ReadOnlySpan<float> x) =>
         /// that value is stored as the result. Positive 0 is considered greater than negative 0.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
@@ -778,7 +797,9 @@ public static float MinMagnitude(ReadOnlySpan<float> x) =>
         /// the negative value is considered to have the smaller magnitude.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
@@ -799,7 +820,9 @@ public static void MinMagnitude(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Sp
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] * <paramref name="y" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with <paramref name="destination" />
+        /// if the input and the output span begin at the same memory location; otherwise, behavior is undefined. It is safe, for example,
+        /// to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -819,7 +842,8 @@ public static void Multiply(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<f
         /// It corresponds to the <c>scal</c> method defined by <c>BLAS1</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -840,12 +864,16 @@ public static void Multiply(ReadOnlySpan<float> x, float y, Span<float> destinat
         /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />[i]) + <paramref name="addend" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/>, <paramref name="y"/>, and <paramref name="addend"/> may overlap, but none of them may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/>, <paramref name="y"/>, and <paramref name="addend"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
         /// </para>
         /// </remarks>
+
+
         public static void MultiplyAdd(ReadOnlySpan<float> x, ReadOnlySpan<float> y, ReadOnlySpan<float> addend, Span<float> destination) =>
             InvokeSpanSpanSpanIntoSpan<MultiplyAddOperator>(x, y, addend, destination);
 
@@ -862,7 +890,9 @@ public static void MultiplyAdd(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Rea
         /// It corresponds to the <c>axpy</c> method defined by <c>BLAS1</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -883,7 +913,9 @@ public static void MultiplyAdd(ReadOnlySpan<float> x, ReadOnlySpan<float> y, flo
         /// This method effectively computes <c><paramref name="destination" />[i] = (<paramref name="x" />[i] * <paramref name="y" />) + <paramref name="addend" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="addend"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="addend"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -901,7 +933,8 @@ public static void MultiplyAdd(ReadOnlySpan<float> x, float y, ReadOnlySpan<floa
         /// This method effectively computes <c><paramref name="destination" />[i] = -<paramref name="x" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If any of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -1035,7 +1068,8 @@ public static float ProductOfSums(ReadOnlySpan<float> x, ReadOnlySpan<float> y)
         /// This method effectively computes <c><paramref name="destination" />[i] = 1f / (1f + <see cref="MathF" />.Exp(-<paramref name="x" />[i]))</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
@@ -1069,7 +1103,8 @@ public static void Sigmoid(ReadOnlySpan<float> x, Span<float> destination)
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Sinh(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value is equal to <see cref="float.NegativeInfinity"/>, <see cref="float.PositiveInfinity"/>, or <see cref="float.NaN"/>,
@@ -1107,7 +1142,8 @@ public static void Sinh(ReadOnlySpan<float> x, Span<float> destination)
         /// It then effectively computes <c><paramref name="destination" />[i] = MathF.Exp(<paramref name="x" />[i]) / sum</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// This method may call into the underlying C runtime or employ instructions specific to the current architecture. Exact results may differ between different
@@ -1150,7 +1186,9 @@ public static void SoftMax(ReadOnlySpan<float> x, Span<float> destination)
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" />[i]</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="y"/> may overlap, but neither may overlap with <paramref name="destination"/>; if they do, behavior is undefined.
+        /// <paramref name="x"/> and <paramref name="y"/> may overlap arbitrarily, but they may only overlap with
+        /// <paramref name="destination" /> if the input and the output span begin at the same memory location; otherwise, behavior is undefined.
+        /// It is safe, for example, to use the same span for any subset of the span parameters, such as to perform an in-place operation.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -1169,7 +1207,8 @@ public static void Subtract(ReadOnlySpan<float> x, ReadOnlySpan<float> y, Span<f
         /// This method effectively computes <c><paramref name="destination" />[i] = <paramref name="x" />[i] - <paramref name="y" /></c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If either of the element-wise input values is equal to <see cref="float.NaN"/>, the resulting element-wise value is also NaN.
@@ -1244,7 +1283,8 @@ public static float SumOfSquares(ReadOnlySpan<float> x) =>
         /// This method effectively computes <c><paramref name="destination" />[i] = <see cref="MathF" />.Tanh(<paramref name="x" />[i])</c>.
         /// </para>
         /// <para>
-        /// <paramref name="x"/> and <paramref name="destination"/> may not overlap; if they do, behavior is undefined.
+        /// <paramref name="x"/> may overlap with <paramref name="destination" />, but only if the input and the output span begin at the same memory
+        /// location; otherwise, behavior is undefined. It is safe, for example, to use the same span for all span parameters.
         /// </para>
         /// <para>
         /// If a value is equal to <see cref="float.NegativeInfinity"/>, the corresponding destination location is set to -1.
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs
index bed07eedfefd1..bd18b16d47b69 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs
@@ -23,6 +23,9 @@ public static partial class TensorPrimitives
         /// <para>
         /// This method effectively computes <c><paramref name="destination" />[i] = (Half)<paramref name="source" />[i]</c>.
         /// </para>
+        /// <para>
+        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
+        /// </para>
         /// </remarks>
         public static void ConvertToHalf(ReadOnlySpan<float> source, Span<Half> destination)
         {
@@ -48,6 +51,9 @@ public static void ConvertToHalf(ReadOnlySpan<float> source, Span<Half> destinat
         /// <para>
         /// This method effectively computes <c><paramref name="destination" />[i] = (float)<paramref name="source" />[i]</c>.
         /// </para>
+        /// <para>
+        /// <paramref name="source"/> and <paramref name="destination"/> must not overlap. If they do, behavior is undefined.
+        /// </para>
         /// </remarks>
         public static void ConvertToSingle(ReadOnlySpan<Half> source, Span<float> destination)
         {
@@ -519,7 +525,10 @@ private static float MinMaxCore<TMinMax>(ReadOnlySpan<float> x) where TMinMax :
                         return GetFirstNaN(current);
                     }
 
-                    result = TMinMax.Invoke(result, current);
+                    result = Vector512.ConditionalSelect(
+                        Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                        result,
+                        TMinMax.Invoke(result, current));
                 }
 
                 // Aggregate the lanes in the vector to create the final scalar result.
@@ -565,7 +574,10 @@ private static float MinMaxCore<TMinMax>(ReadOnlySpan<float> x) where TMinMax :
                         return GetFirstNaN(current);
                     }
 
-                    result = TMinMax.Invoke(result, current);
+                    result = Vector256.ConditionalSelect(
+                        Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                        result,
+                        TMinMax.Invoke(result, current));
                 }
 
                 // Aggregate the lanes in the vector to create the final scalar result.
@@ -610,7 +622,10 @@ private static float MinMaxCore<TMinMax>(ReadOnlySpan<float> x) where TMinMax :
                         return GetFirstNaN(current);
                     }
 
-                    result = TMinMax.Invoke(result, current);
+                    result = Vector128.ConditionalSelect(
+                        Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                        result,
+                        TMinMax.Invoke(result, current));
                 }
 
                 // Aggregate the lanes in the vector to create the final scalar result.
@@ -672,7 +687,10 @@ private static unsafe void InvokeSpanIntoSpan<TUnaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -698,7 +716,10 @@ private static unsafe void InvokeSpanIntoSpan<TUnaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -723,7 +744,10 @@ private static unsafe void InvokeSpanIntoSpan<TUnaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -777,8 +801,11 @@ private static unsafe void InvokeSpanSpanIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               Vector512.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   Vector512.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -805,8 +832,11 @@ private static unsafe void InvokeSpanSpanIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               Vector256.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   Vector256.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -832,8 +862,11 @@ private static unsafe void InvokeSpanSpanIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               Vector128.LoadUnsafe(ref yRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   Vector128.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -884,8 +917,11 @@ private static unsafe void InvokeSpanScalarIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   yVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -914,8 +950,11 @@ private static unsafe void InvokeSpanScalarIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   yVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -943,8 +982,11 @@ private static unsafe void InvokeSpanScalarIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
-                                               yVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                   yVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1001,9 +1043,12 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                Vector512.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    Vector512.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1031,9 +1076,12 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                Vector256.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    Vector256.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1060,9 +1108,12 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                Vector128.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    Vector128.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1121,9 +1172,12 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector512.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    zVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1153,9 +1207,12 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector256.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    zVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1184,9 +1241,12 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
-                                                zVec).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    Vector128.LoadUnsafe(ref yRef, lastVectorIndex),
+                                                    zVec)).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1245,9 +1305,12 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector512<float>.Count);
-                        TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                yVec,
-                                                Vector512.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector512.ConditionalSelect(
+                            Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512<float>.Zero),
+                            Vector512.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    yVec,
+                                                    Vector512.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1277,9 +1340,12 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector256<float>.Count);
-                        TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                yVec,
-                                                Vector256.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector256.ConditionalSelect(
+                            Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256<float>.Zero),
+                            Vector256.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    yVec,
+                                                    Vector256.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
@@ -1308,9 +1374,12 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         uint lastVectorIndex = (uint)(x.Length - Vector128<float>.Count);
-                        TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
-                                                yVec,
-                                                Vector128.LoadUnsafe(ref zRef, lastVectorIndex)).StoreUnsafe(ref dRef, lastVectorIndex);
+                        Vector128.ConditionalSelect(
+                            Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128<float>.Zero),
+                            Vector128.LoadUnsafe(ref dRef, lastVectorIndex),
+                            TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex),
+                                                    yVec,
+                                                    Vector128.LoadUnsafe(ref zRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex);
                     }
 
                     return;
diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs
index e05e54bcad769..70207a5c8995b 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs
@@ -320,7 +320,11 @@ private static void InvokeSpanIntoSpan<TUnaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex));
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex)));
                     }
 
                     return;
@@ -374,8 +378,12 @@ private static void InvokeSpanSpanIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex),
-                                                                        AsVector(ref yRef, lastVectorIndex));
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex),
+                                      AsVector(ref yRef, lastVectorIndex)));
                     }
 
                     return;
@@ -424,8 +432,11 @@ private static void InvokeSpanScalarIntoSpan<TBinaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex),
-                                                                        yVec);
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex), yVec));
                     }
 
                     return;
@@ -482,9 +493,13 @@ private static void InvokeSpanSpanSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex),
-                                                                        AsVector(ref yRef, lastVectorIndex),
-                                                                        AsVector(ref zRef, lastVectorIndex));
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex),
+                                      AsVector(ref yRef, lastVectorIndex),
+                                      AsVector(ref zRef, lastVectorIndex)));
                     }
 
                     return;
@@ -543,9 +558,13 @@ private static void InvokeSpanSpanScalarIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex),
-                                                                        AsVector(ref yRef, lastVectorIndex),
-                                                                        zVec);
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex),
+                                      AsVector(ref yRef, lastVectorIndex),
+                                      zVec));
                     }
 
                     return;
@@ -604,9 +623,13 @@ private static void InvokeSpanScalarSpanIntoSpan<TTernaryOperator>(
                     if (i != x.Length)
                     {
                         int lastVectorIndex = x.Length - Vector<float>.Count;
-                        AsVector(ref dRef, lastVectorIndex) = op.Invoke(AsVector(ref xRef, lastVectorIndex),
-                                                                        yVec,
-                                                                        AsVector(ref zRef, lastVectorIndex));
+                        ref Vector<float> dest = ref AsVector(ref dRef, lastVectorIndex);
+                        dest = Vector.ConditionalSelect(
+                            Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector<float>.Zero),
+                            dest,
+                            op.Invoke(AsVector(ref xRef, lastVectorIndex),
+                                      yVec,
+                                      AsVector(ref zRef, lastVectorIndex)));
                     }
 
                     return;
diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
index edcebe8eb4775..751e352dd1da5 100644
--- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
+++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs
@@ -75,6 +75,21 @@ public static void Abs(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Abs_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Abs(x, x);
+
+            for (int i = 0; i < x.Length; i++)
+            {
+                Assert.Equal(MathF.Abs(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Abs_ThrowsForTooShortDestination(int tensorLength)
@@ -96,11 +111,34 @@ public static void Add_TwoTensors(int tensorLength)
             using BoundedMemory<float> destination = CreateTensor(tensorLength);
 
             TensorPrimitives.Add(x, y, destination);
-
             for (int i = 0; i < tensorLength; i++)
             {
                 Assert.Equal(x[i] + y[i], destination[i], Tolerance);
             }
+
+            float[] xOrig = x.Span.ToArray();
+
+            // Validate that the destination can be the same as an input.
+            TensorPrimitives.Add(x, x, x);
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] + xOrig[i], x[i], Tolerance);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Add_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Add(x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] + xOrig[i], x[i], Tolerance);
+            }
         }
 
         [Theory]
@@ -142,6 +180,22 @@ public static void Add_TensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Add_TensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.Add(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] + y, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Add_TensorScalar_ThrowsForTooShortDestination(int tensorLength)
@@ -172,6 +226,21 @@ public static void AddMultiply_ThreeTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void AddMultiply_ThreeTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.AddMultiply(x, x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] + xOrig[i]) * xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void AddMultiply_ThreeTensors_ThrowsForMismatchedLengths(int tensorLength)
@@ -215,6 +284,22 @@ public static void AddMultiply_TensorTensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void AddMultiply_TensorTensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float multiplier = NextSingle();
+
+            TensorPrimitives.AddMultiply(x, x, multiplier, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] + xOrig[i]) * multiplier, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void AddMultiply_TensorTensorScalar_ThrowsForMismatchedLengths_x_y(int tensorLength)
@@ -257,6 +342,22 @@ public static void AddMultiply_TensorScalarTensor(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void AddMultiply_TensorScalarTensor_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.AddMultiply(x, y, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] + y) * xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void AddMultiply_TensorScalarTensor_ThrowsForMismatchedLengths_x_z(int tensorLength)
@@ -299,6 +400,21 @@ public static void Cosh(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Cosh_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Cosh(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Cosh(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Cosh_ThrowsForTooShortDestination(int tensorLength)
@@ -421,6 +537,21 @@ public static void Divide_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Divide_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Divide(x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] / xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Divide_TwoTensors_ThrowsForMismatchedLengths(int tensorLength)
@@ -460,6 +591,22 @@ public static void Divide_TensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Divide_TensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.Divide(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] / y, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Divide_TensorScalar_ThrowsForTooShortDestination(int tensorLength)
@@ -527,6 +674,21 @@ public static void Exp(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Exp_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Exp(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Exp(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Exp_ThrowsForTooShortDestination(int tensorLength)
@@ -735,6 +897,21 @@ public static void Log(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Log_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Log(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Log(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Log_ThrowsForTooShortDestination(int tensorLength)
@@ -762,6 +939,21 @@ public static void Log2(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Log2_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Log2(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Log(xOrig[i], 2), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Log2_ThrowsForTooShortDestination(int tensorLength)
@@ -834,6 +1026,32 @@ public static void Max_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Max_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> y = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray(), yOrig = y.Span.ToArray();
+
+            TensorPrimitives.Max(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Max(xOrig[i], y[i]), x[i], Tolerance);
+            }
+
+            xOrig.AsSpan().CopyTo(x.Span);
+            yOrig.AsSpan().CopyTo(y.Span);
+
+            TensorPrimitives.Max(x, y, y);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Max(x[i], yOrig[i]), y[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Max_TwoTensors_SpecialValues(int tensorLength)
@@ -955,6 +1173,32 @@ public static void MaxMagnitude_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void MaxMagnitude_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> y = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray(), yOrig = y.Span.ToArray();
+
+            TensorPrimitives.MaxMagnitude(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathFMaxMagnitude(xOrig[i], y[i]), x[i], Tolerance);
+            }
+
+            xOrig.AsSpan().CopyTo(x.Span);
+            yOrig.AsSpan().CopyTo(y.Span);
+
+            TensorPrimitives.MaxMagnitude(x, y, y);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathFMaxMagnitude(x[i], yOrig[i]), y[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void MaxMagnitude_TwoTensors_SpecialValues(int tensorLength)
@@ -1075,6 +1319,32 @@ public static void Min_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Min_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> y = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray(), yOrig = y.Span.ToArray();
+
+            TensorPrimitives.Min(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Min(xOrig[i], y[i]), x[i], Tolerance);
+            }
+
+            xOrig.AsSpan().CopyTo(x.Span);
+            yOrig.AsSpan().CopyTo(y.Span);
+
+            TensorPrimitives.Min(x, y, y);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Min(x[i], yOrig[i]), y[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Min_TwoTensors_SpecialValues(int tensorLength)
@@ -1194,6 +1464,32 @@ public static void MinMagnitude_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void MinMagnitude_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> y = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray(), yOrig = y.Span.ToArray();
+
+            TensorPrimitives.MinMagnitude(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathFMinMagnitude(xOrig[i], y[i]), x[i], Tolerance);
+            }
+
+            xOrig.AsSpan().CopyTo(x.Span);
+            yOrig.AsSpan().CopyTo(y.Span);
+
+            TensorPrimitives.MinMagnitude(x, y, y);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathFMinMagnitude(x[i], yOrig[i]), y[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void MinMagnitude_TwoTensors_SpecialValues(int tensorLength)
@@ -1270,6 +1566,21 @@ public static void Multiply_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Multiply_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Multiply(x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] * xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Multiply_TwoTensors_ThrowsForMismatchedLengths(int tensorLength)
@@ -1309,6 +1620,22 @@ public static void Multiply_TensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Multiply_TensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.Multiply(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] * y, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Multiply_TensorScalar_ThrowsForTooShortDestination(int tensorLength)
@@ -1339,6 +1666,21 @@ public static void MultiplyAdd_ThreeTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void MultiplyAdd_ThreeTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.MultiplyAdd(x, x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] * xOrig[i]) + xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void MultiplyAdd_ThreeTensors_ThrowsForMismatchedLengths_x_y(int tensorLength)
@@ -1382,6 +1724,22 @@ public static void MultiplyAdd_TensorTensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void MultiplyAdd_TensorTensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float addend = NextSingle();
+
+            TensorPrimitives.MultiplyAdd(x, x, addend, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] * xOrig[i]) + addend, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void MultiplyAdd_TensorTensorScalar_ThrowsForTooShortDestination(int tensorLength)
@@ -1411,6 +1769,22 @@ public static void MultiplyAdd_TensorScalarTensor(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void MultiplyAdd_TensorScalarTensor_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.MultiplyAdd(x, y, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal((xOrig[i] * y) + xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void MultiplyAdd_TensorScalarTensor_ThrowsForTooShortDestination(int tensorLength)
@@ -1440,6 +1814,21 @@ public static void Negate(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Negate_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Negate(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(-xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Negate_ThrowsForTooShortDestination(int tensorLength)
@@ -1598,6 +1987,36 @@ public static void ProductOfSums_KnownValues()
         #endregion
 
         #region Sigmoid
+        [Theory]
+        [MemberData(nameof(TensorLengths))]
+        public static void Sigmoid(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> destination = CreateTensor(tensorLength);
+
+            TensorPrimitives.Sigmoid(x, destination);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(1f / (1f + MathF.Exp(-x[i])), destination[i], Tolerance);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(TensorLengths))]
+        public static void Sigmoid_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Sigmoid(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(1f / (1f + MathF.Exp(-xOrig[i])), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Sigmoid_ThrowsForTooShortDestination(int tensorLength)
@@ -1612,7 +2031,7 @@ public static void Sigmoid_ThrowsForTooShortDestination(int tensorLength)
         [InlineData(new float[] { -5, -4.5f, -4 }, new float[] { 0.0066f, 0.0109f, 0.0179f })]
         [InlineData(new float[] { 4.5f, 5 }, new float[] { 0.9890f, 0.9933f })]
         [InlineData(new float[] { 0, -3, 3, .5f }, new float[] { 0.5f, 0.0474f, 0.9525f, 0.6224f })]
-        public static void Sigmoid(float[] x, float[] expectedResult)
+        public static void Sigmoid_KnownValues(float[] x, float[] expectedResult)
         {
             using BoundedMemory<float> dest = CreateTensor(x.Length);
             TensorPrimitives.Sigmoid(x, dest);
@@ -1663,6 +2082,21 @@ public static void Sinh(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Sinh_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Sinh(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Sinh(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Sinh_ThrowsForTooShortDestination(int tensorLength)
@@ -1675,6 +2109,38 @@ public static void Sinh_ThrowsForTooShortDestination(int tensorLength)
         #endregion
 
         #region SoftMax
+        [Theory]
+        [MemberData(nameof(TensorLengths))]
+        public static void SoftMax(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            using BoundedMemory<float> destination = CreateTensor(tensorLength);
+
+            TensorPrimitives.SoftMax(x, destination);
+
+            float expSum = MemoryMarshal.ToEnumerable<float>(x.Memory).Sum(MathF.Exp);
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Exp(x[i]) / expSum, destination[i], Tolerance);
+            }
+        }
+
+        [Theory]
+        [MemberData(nameof(TensorLengths))]
+        public static void SoftMax_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.SoftMax(x, x);
+
+            float expSum = xOrig.Sum(MathF.Exp);
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Exp(xOrig[i]) / expSum, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void SoftMax_ThrowsForTooShortDestination(int tensorLength)
@@ -1690,7 +2156,7 @@ public static void SoftMax_ThrowsForTooShortDestination(int tensorLength)
         [InlineData(new float[] { 3, 4, 1 }, new float[] { 0.2594f, 0.705384f, 0.0351f })]
         [InlineData(new float[] { 5, 3 }, new float[] { 0.8807f, 0.1192f })]
         [InlineData(new float[] { 4, 2, 1, 9 }, new float[] { 0.0066f, 9.04658e-4f, 3.32805e-4f, 0.9920f })]
-        public static void SoftMax(float[] x, float[] expectedResult)
+        public static void SoftMax_KnownValues(float[] x, float[] expectedResult)
         {
             using BoundedMemory<float> dest = CreateTensor(x.Length);
             TensorPrimitives.SoftMax(x, dest);
@@ -1739,6 +2205,21 @@ public static void Subtract_TwoTensors(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Subtract_TwoTensors_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Subtract(x, x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] - xOrig[i], x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Subtract_TwoTensors_ThrowsForMismatchedLengths(int tensorLength)
@@ -1778,6 +2259,22 @@ public static void Subtract_TensorScalar(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Subtract_TensorScalar_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+            float y = NextSingle();
+
+            TensorPrimitives.Subtract(x, y, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(xOrig[i] - y, x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Subtract_TensorScalar_ThrowsForTooShortDestination(int tensorLength)
@@ -1797,7 +2294,7 @@ public static void Sum(int tensorLength)
         {
             using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
 
-            Assert.Equal(Enumerable.Sum(MemoryMarshal.ToEnumerable<float>(x.Memory)), TensorPrimitives.Sum(x), Tolerance);
+            Assert.Equal(MemoryMarshal.ToEnumerable<float>(x.Memory).Sum(), TensorPrimitives.Sum(x), Tolerance);
 
             float sum = 0;
             foreach (float f in x.Span)
@@ -1890,6 +2387,21 @@ public static void Tanh(int tensorLength)
             }
         }
 
+        [Theory]
+        [MemberData(nameof(TensorLengthsIncluding0))]
+        public static void Tanh_InPlace(int tensorLength)
+        {
+            using BoundedMemory<float> x = CreateAndFillTensor(tensorLength);
+            float[] xOrig = x.Span.ToArray();
+
+            TensorPrimitives.Tanh(x, x);
+
+            for (int i = 0; i < tensorLength; i++)
+            {
+                Assert.Equal(MathF.Tanh(xOrig[i]), x[i], Tolerance);
+            }
+        }
+
         [Theory]
         [MemberData(nameof(TensorLengths))]
         public static void Tanh_ThrowsForTooShortDestination(int tensorLength)