From 9376fedd7708ff107d4e1516d674e4591b9726a5 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 4 Oct 2023 16:53:26 -0400 Subject: [PATCH 1/3] Vectorize TensorPrimitives.Sigmoid and TensorPrimitives.SoftMax - Adds a SigmoidOperator that just wraps the ExpOperator - Vectorizes both passes of SoftMax, on top of ExpOperator. Simplest way to do this was to augment the existing InvokeSpanScalarIntoSpan to take a transform operator. - In doing so, found some naming inconsistencies I'd previously introduced, so I did some automatic renaming to make things more consistent. - Added XML comments to all the internal/private surface area. - Fleshes out some tests (and test values). --- .../Numerics/Tensors/TensorPrimitives.cs | 24 +- .../Tensors/TensorPrimitives.netcore.cs | 368 ++++++++++++------ .../Tensors/TensorPrimitives.netstandard.cs | 231 ++++++++--- .../tests/TensorPrimitivesTests.cs | 146 +++++-- 4 files changed, 535 insertions(+), 234 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs index acc311df6e4cb0..5ddec7b23519ac 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs @@ -988,17 +988,7 @@ public static void Sigmoid(ReadOnlySpan x, Span destination) ThrowHelper.ThrowArgument_SpansMustBeNonEmpty(); } - if (x.Length > destination.Length) - { - ThrowHelper.ThrowArgument_DestinationTooShort(); - } - - ValidateInputOutputSpanNonOverlapping(x, destination); - - for (int i = 0; i < x.Length; i++) - { - destination[i] = 1f / (1f + MathF.Exp(-x[i])); - } + InvokeSpanIntoSpan(x, destination); } /// Computes the element-wise hyperbolic sine of each single-precision floating-point radian angle in the specified tensor. @@ -1067,17 +1057,9 @@ public static void SoftMax(ReadOnlySpan x, Span destination) ValidateInputOutputSpanNonOverlapping(x, destination); - float expSum = 0f; - - for (int i = 0; i < x.Length; i++) - { - expSum += MathF.Exp(x[i]); - } + float expSum = Aggregate(x); - for (int i = 0; i < x.Length; i++) - { - destination[i] = MathF.Exp(x[i]) / expSum; - } + InvokeSpanScalarIntoSpan(x, expSum, destination); } /// Computes the element-wise difference between single-precision floating-point numbers in the specified tensors. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index aa306db678d77f..c8a980d70107aa 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -605,6 +605,8 @@ static Vector512 HalfAsWidenedUInt32ToSingle_Vector512(Vector512 va #endif } + /// Computes the cosine similarity between the two specified non-empty, equal-length tensors of single-precision floating-point numbers. + /// Assumes arguments have already been validated to be non-empty and equal length. private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan y) { // Compute the same as: @@ -643,7 +645,7 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan xVec = Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count)); Vector512 yVec = Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512.Count)); - Vector512 remainderMask = LoadRemainderMaskSingleVector512(x.Length - i); + Vector512 remainderMask = CreateRemainderMaskSingleVector512(x.Length - i); xVec &= remainderMask; yVec &= remainderMask; @@ -690,7 +692,7 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan xVec = Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count)); Vector256 yVec = Vector256.LoadUnsafe(ref yRef, (uint)(x.Length - Vector256.Count)); - Vector256 remainderMask = LoadRemainderMaskSingleVector256(x.Length - i); + Vector256 remainderMask = CreateRemainderMaskSingleVector256(x.Length - i); xVec &= remainderMask; yVec &= remainderMask; @@ -736,7 +738,7 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan xVec = Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count)); Vector128 yVec = Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128.Count)); - Vector128 remainderMask = LoadRemainderMaskSingleVector128(x.Length - i); + Vector128 remainderMask = CreateRemainderMaskSingleVector128(x.Length - i); xVec &= remainderMask; yVec &= remainderMask; @@ -767,10 +769,16 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan( + /// Performs an aggregation over all elements in to produce a single-precision floating-point value. + /// Specifies the transform operation that should be applied to each element loaded from . + /// + /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value. + /// The aggregation is applied after the transform is applied to each element. + /// + private static float Aggregate( ReadOnlySpan x) - where TLoad : struct, IUnaryOperator - where TAggregate : struct, IAggregationOperator + where TTransformOperator : struct, IUnaryOperator + where TAggregationOperator : struct, IAggregationOperator { if (x.Length == 0) { @@ -783,7 +791,7 @@ private static float Aggregate( if (Vector512.IsHardwareAccelerated && x.Length >= Vector512.Count) { // Load the first vector as the initial set of results - Vector512 result = TLoad.Invoke(Vector512.LoadUnsafe(ref xRef, 0)); + Vector512 result = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, 0)); int oneVectorFromEnd = x.Length - Vector512.Count; int i = Vector512.Count; @@ -791,29 +799,29 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TLoad.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i))); i += Vector512.Count; } // Process the last vector in the span, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), - Vector512.Create(TAggregate.IdentityValue), - TLoad.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count))))); + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Create(TAggregationOperator.IdentityValue), + TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } #endif if (Vector256.IsHardwareAccelerated && x.Length >= Vector256.Count) { // Load the first vector as the initial set of results - Vector256 result = TLoad.Invoke(Vector256.LoadUnsafe(ref xRef, 0)); + Vector256 result = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, 0)); int oneVectorFromEnd = x.Length - Vector256.Count; int i = Vector256.Count; @@ -821,28 +829,28 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TLoad.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i))); i += Vector256.Count; } // Process the last vector in the span, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), - Vector256.Create(TAggregate.IdentityValue), - TLoad.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count))))); + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Create(TAggregationOperator.IdentityValue), + TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } if (Vector128.IsHardwareAccelerated && x.Length >= Vector128.Count) { // Load the first vector as the initial set of results - Vector128 result = TLoad.Invoke(Vector128.LoadUnsafe(ref xRef, 0)); + Vector128 result = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, 0)); int oneVectorFromEnd = x.Length - Vector128.Count; int i = Vector128.Count; @@ -850,41 +858,47 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TLoad.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i))); i += Vector128.Count; } // Process the last vector in the span, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), - Vector128.Create(TAggregate.IdentityValue), - TLoad.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count))))); + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Create(TAggregationOperator.IdentityValue), + TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } // Vectorization isn't supported or there are too few elements to vectorize. // Use a scalar implementation. { - float result = TLoad.Invoke(x[0]); + float result = TTransformOperator.Invoke(x[0]); for (int i = 1; i < x.Length; i++) { - result = TAggregate.Invoke(result, TLoad.Invoke(x[i])); + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(x[i])); } return result; } } - private static float Aggregate( + /// Performs an aggregation over all pair-wise elements in and to produce a single-precision floating-point value. + /// Specifies the binary operation that should be applied to the pair-wise elements loaded from and . + /// + /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value. + /// The aggregation is applied to the results of the binary operations on the pair-wise values. + /// + private static float Aggregate( ReadOnlySpan x, ReadOnlySpan y) - where TBinary : struct, IBinaryOperator - where TAggregate : struct, IAggregationOperator + where TBinaryOperator : struct, IBinaryOperator + where TAggregationOperator : struct, IAggregationOperator { Debug.Assert(x.Length == y.Length); @@ -900,7 +914,7 @@ private static float Aggregate( if (Vector512.IsHardwareAccelerated && x.Length >= Vector512.Count) { // Load the first vector as the initial set of results - Vector512 result = TBinary.Invoke(Vector512.LoadUnsafe(ref xRef, 0), Vector512.LoadUnsafe(ref yRef, 0)); + Vector512 result = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, 0), Vector512.LoadUnsafe(ref yRef, 0)); int oneVectorFromEnd = x.Length - Vector512.Count; int i = Vector512.Count; @@ -908,31 +922,31 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TBinary.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), Vector512.LoadUnsafe(ref yRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), Vector512.LoadUnsafe(ref yRef, (uint)i))); i += Vector512.Count; } // Process the last vector in the spans, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), - Vector512.Create(TAggregate.IdentityValue), - TBinary.Invoke( + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Create(TAggregationOperator.IdentityValue), + TBinaryOperator.Invoke( Vector512.LoadUnsafe(ref xRef, (uint)(x.Length - Vector512.Count)), Vector512.LoadUnsafe(ref yRef, (uint)(x.Length - Vector512.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } #endif if (Vector256.IsHardwareAccelerated && x.Length >= Vector256.Count) { // Load the first vector as the initial set of results - Vector256 result = TBinary.Invoke(Vector256.LoadUnsafe(ref xRef, 0), Vector256.LoadUnsafe(ref yRef, 0)); + Vector256 result = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, 0), Vector256.LoadUnsafe(ref yRef, 0)); int oneVectorFromEnd = x.Length - Vector256.Count; int i = Vector256.Count; @@ -940,30 +954,30 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TBinary.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i), Vector256.LoadUnsafe(ref yRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i), Vector256.LoadUnsafe(ref yRef, (uint)i))); i += Vector256.Count; } // Process the last vector in the spans, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), - Vector256.Create(TAggregate.IdentityValue), - TBinary.Invoke( + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Create(TAggregationOperator.IdentityValue), + TBinaryOperator.Invoke( Vector256.LoadUnsafe(ref xRef, (uint)(x.Length - Vector256.Count)), Vector256.LoadUnsafe(ref yRef, (uint)(x.Length - Vector256.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } if (Vector128.IsHardwareAccelerated && x.Length >= Vector128.Count) { // Load the first vector as the initial set of results - Vector128 result = TBinary.Invoke(Vector128.LoadUnsafe(ref xRef, 0), Vector128.LoadUnsafe(ref yRef, 0)); + Vector128 result = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, 0), Vector128.LoadUnsafe(ref yRef, 0)); int oneVectorFromEnd = x.Length - Vector128.Count; int i = Vector128.Count; @@ -971,34 +985,34 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - result = TAggregate.Invoke(result, TBinary.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i), Vector128.LoadUnsafe(ref yRef, (uint)i))); + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i), Vector128.LoadUnsafe(ref yRef, (uint)i))); i += Vector128.Count; } // Process the last vector in the spans, masking off elements already processed. if (i != x.Length) { - result = TAggregate.Invoke(result, + result = TAggregationOperator.Invoke(result, Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), - Vector128.Create(TAggregate.IdentityValue), - TBinary.Invoke( + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Create(TAggregationOperator.IdentityValue), + TBinaryOperator.Invoke( Vector128.LoadUnsafe(ref xRef, (uint)(x.Length - Vector128.Count)), Vector128.LoadUnsafe(ref yRef, (uint)(x.Length - Vector128.Count))))); } // Aggregate the lanes in the vector back into the scalar result - return TAggregate.Invoke(result); + return TAggregationOperator.Invoke(result); } // Vectorization isn't supported or there are too few elements to vectorize. // Use a scalar implementation. { - float result = TBinary.Invoke(xRef, yRef); + float result = TBinaryOperator.Invoke(xRef, yRef); for (int i = 1; i < x.Length; i++) { - result = TAggregate.Invoke(result, - TBinary.Invoke( + result = TAggregationOperator.Invoke(result, + TBinaryOperator.Invoke( Unsafe.Add(ref xRef, i), Unsafe.Add(ref yRef, i))); } @@ -1008,10 +1022,11 @@ private static float Aggregate( } /// - /// This is the same as , - /// except it early exits on NaN. + /// This is the same as + /// with an identity transform, except it early exits on NaN. /// - private static float MinMaxCore(ReadOnlySpan x) where TMinMax : struct, IAggregationOperator + private static float MinMaxCore(ReadOnlySpan x) + where TMinMaxOperator : struct, IAggregationOperator { if (x.IsEmpty) { @@ -1049,7 +1064,7 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : return GetFirstNaN(current); } - result = TMinMax.Invoke(result, current); + result = TMinMaxOperator.Invoke(result, current); i += Vector512.Count; } @@ -1063,13 +1078,13 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : } result = Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), result, - TMinMax.Invoke(result, current)); + TMinMaxOperator.Invoke(result, current)); } // Aggregate the lanes in the vector to create the final scalar result. - return TMinMax.Invoke(result); + return TMinMaxOperator.Invoke(result); } #endif @@ -1098,7 +1113,7 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : return GetFirstNaN(current); } - result = TMinMax.Invoke(result, current); + result = TMinMaxOperator.Invoke(result, current); i += Vector256.Count; } @@ -1112,13 +1127,13 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : } result = Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), result, - TMinMax.Invoke(result, current)); + TMinMaxOperator.Invoke(result, current)); } // Aggregate the lanes in the vector to create the final scalar result. - return TMinMax.Invoke(result); + return TMinMaxOperator.Invoke(result); } if (Vector128.IsHardwareAccelerated && x.Length >= Vector128.Count) @@ -1146,7 +1161,7 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : return GetFirstNaN(current); } - result = TMinMax.Invoke(result, current); + result = TMinMaxOperator.Invoke(result, current); i += Vector128.Count; } @@ -1160,13 +1175,13 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : } result = Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), result, - TMinMax.Invoke(result, current)); + TMinMaxOperator.Invoke(result, current)); } // Aggregate the lanes in the vector to create the final scalar result. - return TMinMax.Invoke(result); + return TMinMaxOperator.Invoke(result); } // Scalar path used when either vectorization is not supported or the input is too small to vectorize. @@ -1185,13 +1200,15 @@ private static float MinMaxCore(ReadOnlySpan x) where TMinMax : return current; } - result = TMinMax.Invoke(result, current); + result = TMinMaxOperator.Invoke(result, current); } return result; } } + /// Performs an element-wise operation on and writes the results to . + /// Specifies the operation to perform on each element loaded from . private static unsafe void InvokeSpanIntoSpan( ReadOnlySpan x, Span destination) where TUnaryOperator : struct, IUnaryOperator @@ -1227,7 +1244,7 @@ private static unsafe void InvokeSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1256,7 +1273,7 @@ private static unsafe void InvokeSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1284,7 +1301,7 @@ private static unsafe void InvokeSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1301,6 +1318,13 @@ private static unsafe void InvokeSpanIntoSpan( } } + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from and . + /// private static unsafe void InvokeSpanSpanIntoSpan( ReadOnlySpan x, ReadOnlySpan y, Span destination) where TBinaryOperator : struct, IBinaryOperator @@ -1344,7 +1368,7 @@ private static unsafe void InvokeSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex), Vector512.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); @@ -1375,7 +1399,7 @@ private static unsafe void InvokeSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex), Vector256.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); @@ -1405,7 +1429,7 @@ private static unsafe void InvokeSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex), Vector128.LoadUnsafe(ref yRef, lastVectorIndex))).StoreUnsafe(ref dRef, lastVectorIndex); @@ -1424,8 +1448,32 @@ private static unsafe void InvokeSpanSpanIntoSpan( } } + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on each element loaded from with . + /// private static unsafe void InvokeSpanScalarIntoSpan( ReadOnlySpan x, float y, Span destination) + where TBinaryOperator : struct, IBinaryOperator => + InvokeSpanScalarIntoSpan(x, y, destination); + + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on each element loaded from . + /// It is not used with . + /// + /// + /// Specifies the operation to perform on the transformed value from with . + /// + private static unsafe void InvokeSpanScalarIntoSpan( + ReadOnlySpan x, float y, Span destination) + where TTransformOperator : struct, IUnaryOperator where TBinaryOperator : struct, IBinaryOperator { if (x.Length > destination.Length) @@ -1450,7 +1498,7 @@ private static unsafe void InvokeSpanScalarIntoSpan( // Loop handling one vector at a time. do { - TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, (uint)i)), yVec).StoreUnsafe(ref dRef, (uint)i); i += Vector512.Count; @@ -1462,9 +1510,9 @@ private static unsafe void InvokeSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), - TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex)), yVec)).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1483,7 +1531,7 @@ private static unsafe void InvokeSpanScalarIntoSpan( // Loop handling one vector at a time. do { - TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, (uint)i)), yVec).StoreUnsafe(ref dRef, (uint)i); i += Vector256.Count; @@ -1495,9 +1543,9 @@ private static unsafe void InvokeSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), - TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex)), yVec)).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1515,7 +1563,7 @@ private static unsafe void InvokeSpanScalarIntoSpan( // Loop handling one vector at a time. do { - TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, (uint)i)), yVec).StoreUnsafe(ref dRef, (uint)i); i += Vector128.Count; @@ -1527,9 +1575,9 @@ private static unsafe void InvokeSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), - TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex), + TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex)), yVec)).StoreUnsafe(ref dRef, lastVectorIndex); } @@ -1539,13 +1587,21 @@ private static unsafe void InvokeSpanScalarIntoSpan( while (i < x.Length) { - Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, i), + Unsafe.Add(ref dRef, i) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, i)), y); i++; } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from , , + /// and . + /// private static unsafe void InvokeSpanSpanSpanIntoSpan( ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan z, Span destination) where TTernaryOperator : struct, ITernaryOperator @@ -1592,7 +1648,7 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex), Vector512.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1625,7 +1681,7 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex), Vector256.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1657,7 +1713,7 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex), Vector128.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1678,6 +1734,14 @@ private static unsafe void InvokeSpanSpanSpanIntoSpan( } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from and + /// with . + /// private static unsafe void InvokeSpanSpanScalarIntoSpan( ReadOnlySpan x, ReadOnlySpan y, float z, Span destination) where TTernaryOperator : struct, ITernaryOperator @@ -1724,7 +1788,7 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex), Vector512.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1759,7 +1823,7 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex), Vector256.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1793,7 +1857,7 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex), Vector128.LoadUnsafe(ref yRef, lastVectorIndex), @@ -1814,6 +1878,14 @@ private static unsafe void InvokeSpanSpanScalarIntoSpan( } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise element loaded from , with , + /// and the element loaded from . + /// private static unsafe void InvokeSpanScalarSpanIntoSpan( ReadOnlySpan x, float y, ReadOnlySpan z, Span destination) where TTernaryOperator : struct, ITernaryOperator @@ -1860,7 +1932,7 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector512.Count); Vector512.ConditionalSelect( - Vector512.Equals(LoadRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), + Vector512.Equals(CreateRemainderMaskSingleVector512(x.Length - i), Vector512.Zero), Vector512.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, lastVectorIndex), yVec, @@ -1895,7 +1967,7 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector256.Count); Vector256.ConditionalSelect( - Vector256.Equals(LoadRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), + Vector256.Equals(CreateRemainderMaskSingleVector256(x.Length - i), Vector256.Zero), Vector256.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, lastVectorIndex), yVec, @@ -1929,7 +2001,7 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan( { uint lastVectorIndex = (uint)(x.Length - Vector128.Count); Vector128.ConditionalSelect( - Vector128.Equals(LoadRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), + Vector128.Equals(CreateRemainderMaskSingleVector128(x.Length - i), Vector128.Zero), Vector128.LoadUnsafe(ref dRef, lastVectorIndex), TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, lastVectorIndex), yVec, @@ -1950,6 +2022,7 @@ private static unsafe void InvokeSpanScalarSpanIntoSpan( } } + /// Performs (x * y) + z. It will be rounded as one ternary operation if such an operation is accelerated on the current hardware. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 FusedMultiplyAdd(Vector128 x, Vector128 y, Vector128 addend) { @@ -1966,6 +2039,7 @@ private static Vector128 FusedMultiplyAdd(Vector128 x, Vector128Performs (x * y) + z. It will be rounded as one ternary operation if such an operation is accelerated on the current hardware. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 FusedMultiplyAdd(Vector256 x, Vector256 y, Vector256 addend) { @@ -1978,6 +2052,7 @@ private static Vector256 FusedMultiplyAdd(Vector256 x, Vector256Performs (x * y) + z. It will be rounded as one ternary operation if such an operation is accelerated on the current hardware. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector512 FusedMultiplyAdd(Vector512 x, Vector512 y, Vector512 addend) { @@ -1990,71 +2065,110 @@ private static Vector512 FusedMultiplyAdd(Vector512 x, Vector512Aggregates all of the elements in the into a single value. + /// Specifies the operation to be performed on each pair of values. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float HorizontalAggregate(Vector128 x) where TAggregate : struct, IBinaryOperator => TAggregate.Invoke( TAggregate.Invoke(x[0], x[1]), TAggregate.Invoke(x[2], x[3])); + /// Aggregates all of the elements in the into a single value. + /// Specifies the operation to be performed on each pair of values. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float HorizontalAggregate(Vector256 x) where TAggregate : struct, IBinaryOperator => HorizontalAggregate(TAggregate.Invoke(x.GetLower(), x.GetUpper())); #if NET8_0_OR_GREATER + /// Aggregates all of the elements in the into a single value. + /// Specifies the operation to be performed on each pair of values. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static float HorizontalAggregate(Vector512 x) where TAggregate : struct, IBinaryOperator => HorizontalAggregate(TAggregate.Invoke(x.GetLower(), x.GetUpper())); #endif + /// Gets whether the specified is negative. private static bool IsNegative(float f) => float.IsNegative(f); + /// Gets whether each specified is negative. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 IsNegative(Vector128 vector) => Vector128.LessThan(vector.AsInt32(), Vector128.Zero).AsSingle(); + /// Gets whether each specified is negative. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 IsNegative(Vector256 vector) => Vector256.LessThan(vector.AsInt32(), Vector256.Zero).AsSingle(); #if NET8_0_OR_GREATER + /// Gets whether each specified is negative. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector512 IsNegative(Vector512 vector) => Vector512.LessThan(vector.AsInt32(), Vector512.Zero).AsSingle(); #endif - private static float GetFirstNaN(Vector128 vector) => - vector[BitOperations.TrailingZeroCount((~Vector128.Equals(vector, vector)).ExtractMostSignificantBits())]; + /// Finds and returns the first NaN value in . + /// The vector must have already been validated to contain a NaN. + private static float GetFirstNaN(Vector128 vector) + { + Debug.Assert(!Vector128.EqualsAll(vector, vector), "Expected vector to contain a NaN"); + return vector[BitOperations.TrailingZeroCount((~Vector128.Equals(vector, vector)).ExtractMostSignificantBits())]; + } - private static float GetFirstNaN(Vector256 vector) => - vector[BitOperations.TrailingZeroCount((~Vector256.Equals(vector, vector)).ExtractMostSignificantBits())]; + /// Finds and returns the first NaN value in . + /// The vector must have already been validated to contain a NaN. + private static float GetFirstNaN(Vector256 vector) + { + Debug.Assert(!Vector256.EqualsAll(vector, vector), "Expected vector to contain a NaN"); + return vector[BitOperations.TrailingZeroCount((~Vector256.Equals(vector, vector)).ExtractMostSignificantBits())]; + } #if NET8_0_OR_GREATER - private static float GetFirstNaN(Vector512 vector) => - vector[BitOperations.TrailingZeroCount((~Vector512.Equals(vector, vector)).ExtractMostSignificantBits())]; + /// Finds and returns the first NaN value in . + /// The vector must have already been validated to contain a NaN. + private static float GetFirstNaN(Vector512 vector) + { + Debug.Assert(!Vector512.EqualsAll(vector, vector), "Expected vector to contain a NaN"); + return vector[BitOperations.TrailingZeroCount((~Vector512.Equals(vector, vector)).ExtractMostSignificantBits())]; + } #endif + /// Gets the base 2 logarithm of . private static float Log2(float x) => MathF.Log2(x); + /// + /// Gets a vector mask that will be all-ones-set for the last elements + /// and zero for all other elements. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector128 LoadRemainderMaskSingleVector128(int validItems) => + private static unsafe Vector128 CreateRemainderMaskSingleVector128(int count) => Vector128.LoadUnsafe( ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x16)), - (uint)((validItems * 16) + 12)); // last four floats in the row + (uint)((count * 16) + 12)); // last four floats in the row + /// + /// Gets a vector mask that will be all-ones-set for the last elements + /// and zero for all other elements. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector256 LoadRemainderMaskSingleVector256(int validItems) => + private static unsafe Vector256 CreateRemainderMaskSingleVector256(int count) => Vector256.LoadUnsafe( ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x16)), - (uint)((validItems * 16) + 8)); // last eight floats in the row + (uint)((count * 16) + 8)); // last eight floats in the row #if NET8_0_OR_GREATER + /// + /// Gets a vector mask that will be all-ones-set for the last elements + /// and zero for all other elements. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector512 LoadRemainderMaskSingleVector512(int validItems) => + private static unsafe Vector512 CreateRemainderMaskSingleVector512(int count) => Vector512.LoadUnsafe( ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x16)), - (uint)(validItems * 16)); // all sixteen floats in the row + (uint)(count * 16)); // all sixteen floats in the row #endif + /// x + y private readonly struct AddOperator : IAggregationOperator { public static float Invoke(float x, float y) => x + y; @@ -2073,6 +2187,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16 public static float IdentityValue => 0; } + /// x - y private readonly struct SubtractOperator : IBinaryOperator { public static float Invoke(float x, float y) => x - y; @@ -2083,6 +2198,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16 #endif } + /// (x - y) * (x - y) private readonly struct SubtractSquaredOperator : IBinaryOperator { public static float Invoke(float x, float y) @@ -2112,6 +2228,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// x * y private readonly struct MultiplyOperator : IAggregationOperator { public static float Invoke(float x, float y) => x * y; @@ -2130,6 +2247,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static float IdentityValue => 1; } + /// x / y private readonly struct DivideOperator : IBinaryOperator { public static float Invoke(float x, float y) => x / y; @@ -2140,6 +2258,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// MathF.Max(x, y) (but NaNs may not be propagated) private readonly struct MaxOperator : IAggregationOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2183,6 +2302,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) => #endif } + /// MathF.Max(x, y) private readonly struct MaxPropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2229,6 +2349,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) => #endif } + /// Operator to get x or y based on which has the larger MathF.Abs (but NaNs may not be propagated) private readonly struct MaxMagnitudeOperator : IAggregationOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2280,6 +2401,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// Operator to get x or y based on which has the larger MathF.Abs private readonly struct MaxMagnitudePropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2330,6 +2452,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// MathF.Min(x, y) (but NaNs may not be propagated) private readonly struct MinOperator : IAggregationOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2373,6 +2496,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) => #endif } + /// MathF.Min(x, y) private readonly struct MinPropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2419,6 +2543,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) => #endif } + /// Operator to get x or y based on which has the smaller MathF.Abs (but NaNs may not be propagated) private readonly struct MinMagnitudeOperator : IAggregationOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2469,6 +2594,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// Operator to get x or y based on which has the smaller MathF.Abs private readonly struct MinMagnitudePropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -2519,6 +2645,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// -x private readonly struct NegateOperator : IUnaryOperator { public static float Invoke(float x) => -x; @@ -2529,6 +2656,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// (x + y) * z private readonly struct AddMultiplyOperator : ITernaryOperator { public static float Invoke(float x, float y, float z) => (x + y) * z; @@ -2539,6 +2667,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// (x * y) + z private readonly struct MultiplyAddOperator : ITernaryOperator { public static float Invoke(float x, float y, float z) => (x * y) + z; @@ -2549,6 +2678,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// x private readonly struct IdentityOperator : IUnaryOperator { public static float Invoke(float x) => x; @@ -2559,6 +2689,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// x * x private readonly struct SquaredOperator : IUnaryOperator { public static float Invoke(float x) => x * x; @@ -2569,6 +2700,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// MathF.Abs(x) private readonly struct AbsoluteOperator : IUnaryOperator { public static float Invoke(float x) => MathF.Abs(x); @@ -2579,6 +2711,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + /// MathF.Exp(x) private readonly struct ExpOperator : IUnaryOperator { // This code is based on `vrs4_expf` from amd/aocl-libm-ose @@ -2859,6 +2992,7 @@ public static Vector512 Invoke(Vector512 x) #endif } + /// MathF.Log(x) private readonly struct LogOperator : IUnaryOperator { // This code is based on `vrs4_logf` from amd/aocl-libm-ose @@ -3144,6 +3278,7 @@ public static Vector512 Invoke(Vector512 x) #endif } + /// MathF.Log2(x) private readonly struct Log2Operator : IUnaryOperator { // This code is based on `vrs4_log2f` from amd/aocl-libm-ose @@ -3424,6 +3559,18 @@ public static Vector512 Invoke(Vector512 x) #endif } + /// 1f / (1f + MathF.Exp(-x)) + private readonly struct SigmoidOperator : IUnaryOperator + { + public static float Invoke(float x) => 1.0f / (1.0f + MathF.Exp(-x)); + public static Vector128 Invoke(Vector128 x) => Vector128.Create(1f) / (Vector128.Create(1f) + ExpOperator.Invoke(-x)); + public static Vector256 Invoke(Vector256 x) => Vector256.Create(1f) / (Vector256.Create(1f) + ExpOperator.Invoke(-x)); +#if NET8_0_OR_GREATER + public static Vector512 Invoke(Vector512 x) => Vector512.Create(1f) / (Vector512.Create(1f) + ExpOperator.Invoke(-x)); +#endif + } + + /// Operator that takes one input value and returns a single value. private interface IUnaryOperator { static abstract float Invoke(float x); @@ -3434,6 +3581,7 @@ private interface IUnaryOperator #endif } + /// Operator that takes two input values and returns a single value. private interface IBinaryOperator { static abstract float Invoke(float x, float y); @@ -3444,6 +3592,7 @@ private interface IBinaryOperator #endif } + /// that specializes horizontal aggregation of all elements in a vector. private interface IAggregationOperator : IBinaryOperator { static abstract float Invoke(Vector128 x); @@ -3455,6 +3604,7 @@ private interface IAggregationOperator : IBinaryOperator static virtual float IdentityValue => throw new NotSupportedException(); } + /// Operator that takes three input values and returns a single value. private interface ITernaryOperator { static abstract float Invoke(float x, float y, float z); diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs index ae72988bbe00eb..28c5e2ac5d8bd5 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs @@ -9,6 +9,8 @@ namespace System.Numerics.Tensors { public static partial class TensorPrimitives { + /// Computes the cosine similarity between the two specified non-empty, equal-length tensors of single-precision floating-point numbers. + /// Assumes arguments have already been validated to be non-empty and equal length. private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan y) { // Compute the same as: @@ -52,7 +54,7 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan xVec = AsVector(ref xRef, x.Length - Vector.Count); Vector yVec = AsVector(ref yRef, x.Length - Vector.Count); - Vector remainderMask = LoadRemainderMaskSingleVector(x.Length - i); + Vector remainderMask = CreateRemainderMaskSingleVector(x.Length - i); xVec &= remainderMask; yVec &= remainderMask; @@ -85,10 +87,16 @@ private static float CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan( - ReadOnlySpan x, TLoad load = default, TAggregate aggregate = default) - where TLoad : struct, IUnaryOperator - where TAggregate : struct, IAggregationOperator + /// Performs an aggregation over all elements in to produce a single-precision floating-point value. + /// Specifies the transform operation that should be applied to each element loaded from . + /// + /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value. + /// The aggregation is applied after the transform is applied to each element. + /// + private static float Aggregate( + ReadOnlySpan x, TTransformOperator transformOp = default, TAggregationOperator aggregationOp = default) + where TTransformOperator : struct, IUnaryOperator + where TAggregationOperator : struct, IAggregationOperator { if (x.Length == 0) { @@ -97,12 +105,12 @@ private static float Aggregate( float result; - if (Vector.IsHardwareAccelerated && load.CanVectorize && x.Length >= Vector.Count) + if (Vector.IsHardwareAccelerated && transformOp.CanVectorize && x.Length >= Vector.Count) { ref float xRef = ref MemoryMarshal.GetReference(x); // Load the first vector as the initial set of results - Vector resultVector = load.Invoke(AsVector(ref xRef, 0)); + Vector resultVector = transformOp.Invoke(AsVector(ref xRef, 0)); int oneVectorFromEnd = x.Length - Vector.Count; int i = Vector.Count; @@ -110,44 +118,50 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - resultVector = aggregate.Invoke(resultVector, load.Invoke(AsVector(ref xRef, i))); + resultVector = aggregationOp.Invoke(resultVector, transformOp.Invoke(AsVector(ref xRef, i))); i += Vector.Count; } // Process the last vector in the span, masking off elements already processed. if (i != x.Length) { - resultVector = aggregate.Invoke(resultVector, + resultVector = aggregationOp.Invoke(resultVector, Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), - new Vector(aggregate.IdentityValue), - load.Invoke(AsVector(ref xRef, x.Length - Vector.Count)))); + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), + new Vector(aggregationOp.IdentityValue), + transformOp.Invoke(AsVector(ref xRef, x.Length - Vector.Count)))); } // Aggregate the lanes in the vector back into the scalar result result = resultVector[0]; for (int f = 1; f < Vector.Count; f++) { - result = aggregate.Invoke(result, resultVector[f]); + result = aggregationOp.Invoke(result, resultVector[f]); } return result; } // Aggregate the remaining items in the input span. - result = load.Invoke(x[0]); + result = transformOp.Invoke(x[0]); for (int i = 1; i < x.Length; i++) { - result = aggregate.Invoke(result, load.Invoke(x[i])); + result = aggregationOp.Invoke(result, transformOp.Invoke(x[i])); } return result; } - private static float Aggregate( - ReadOnlySpan x, ReadOnlySpan y, TBinary binary = default, TAggregate aggregate = default) - where TBinary : struct, IBinaryOperator - where TAggregate : struct, IAggregationOperator + /// Performs an aggregation over all pair-wise elements in and to produce a single-precision floating-point value. + /// Specifies the binary operation that should be applied to the pair-wise elements loaded from and . + /// + /// Specifies the aggregation binary operation that should be applied to multiple values to aggregate them into a single value. + /// The aggregation is applied to the results of the binary operations on the pair-wise values. + /// + private static float Aggregate( + ReadOnlySpan x, ReadOnlySpan y, TBinaryOperator binaryOp = default, TAggregationOperator aggregationOp = default) + where TBinaryOperator : struct, IBinaryOperator + where TAggregationOperator : struct, IAggregationOperator { Debug.Assert(x.Length == y.Length); @@ -164,7 +178,7 @@ private static float Aggregate( if (Vector.IsHardwareAccelerated && x.Length >= Vector.Count) { // Load the first vector as the initial set of results - Vector resultVector = binary.Invoke(AsVector(ref xRef, 0), AsVector(ref yRef, 0)); + Vector resultVector = binaryOp.Invoke(AsVector(ref xRef, 0), AsVector(ref yRef, 0)); int oneVectorFromEnd = x.Length - Vector.Count; int i = Vector.Count; @@ -172,18 +186,18 @@ private static float Aggregate( // least one full vector left to process. while (i <= oneVectorFromEnd) { - resultVector = aggregate.Invoke(resultVector, binary.Invoke(AsVector(ref xRef, i), AsVector(ref yRef, i))); + resultVector = aggregationOp.Invoke(resultVector, binaryOp.Invoke(AsVector(ref xRef, i), AsVector(ref yRef, i))); i += Vector.Count; } // Process the last vector in the spans, masking off elements already processed. if (i != x.Length) { - resultVector = aggregate.Invoke(resultVector, + resultVector = aggregationOp.Invoke(resultVector, Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), - new Vector(aggregate.IdentityValue), - binary.Invoke( + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), + new Vector(aggregationOp.IdentityValue), + binaryOp.Invoke( AsVector(ref xRef, x.Length - Vector.Count), AsVector(ref yRef, x.Length - Vector.Count)))); } @@ -192,23 +206,28 @@ private static float Aggregate( result = resultVector[0]; for (int f = 1; f < Vector.Count; f++) { - result = aggregate.Invoke(result, resultVector[f]); + result = aggregationOp.Invoke(result, resultVector[f]); } return result; } // Aggregate the remaining items in the input span. - result = binary.Invoke(x[0], y[0]); + result = binaryOp.Invoke(x[0], y[0]); for (int i = 1; i < x.Length; i++) { - result = aggregate.Invoke(result, binary.Invoke(x[i], y[i])); + result = aggregationOp.Invoke(result, binaryOp.Invoke(x[i], y[i])); } return result; } - private static float MinMaxCore(ReadOnlySpan x, TMinMax minMax = default) where TMinMax : struct, IBinaryOperator + /// + /// This is the same as + /// with an identity transform, except it early exits on NaN. + /// + private static float MinMaxCore(ReadOnlySpan x, TMinMaxOperator op = default) + where TMinMaxOperator : struct, IBinaryOperator { if (x.IsEmpty) { @@ -245,7 +264,7 @@ private static float MinMaxCore(ReadOnlySpan x, TMinMax minMax = goto Scalar; } - resultVector = minMax.Invoke(resultVector, current); + resultVector = op.Invoke(resultVector, current); i += Vector.Count; } @@ -258,13 +277,13 @@ private static float MinMaxCore(ReadOnlySpan x, TMinMax minMax = goto Scalar; } - resultVector = minMax.Invoke(resultVector, current); + resultVector = op.Invoke(resultVector, current); } // Aggregate the lanes in the vector to create the final scalar result. for (int f = 0; f < Vector.Count; f++) { - result = minMax.Invoke(result, resultVector[f]); + result = op.Invoke(result, resultVector[f]); } return result; @@ -283,12 +302,14 @@ private static float MinMaxCore(ReadOnlySpan x, TMinMax minMax = return current; } - result = minMax.Invoke(result, current); + result = op.Invoke(result, current); } return result; } + /// Performs an element-wise operation on and writes the results to . + /// Specifies the operation to perform on each element loaded from . private static void InvokeSpanIntoSpan( ReadOnlySpan x, Span destination, TUnaryOperator op = default) where TUnaryOperator : struct, IUnaryOperator @@ -324,7 +345,7 @@ private static void InvokeSpanIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, op.Invoke(AsVector(ref xRef, lastVectorIndex))); } @@ -342,6 +363,13 @@ private static void InvokeSpanIntoSpan( } } + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from and . + /// private static void InvokeSpanSpanIntoSpan( ReadOnlySpan x, ReadOnlySpan y, Span destination, TBinaryOperator op = default) where TBinaryOperator : struct, IBinaryOperator @@ -385,7 +413,7 @@ private static void InvokeSpanSpanIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, op.Invoke(AsVector(ref xRef, lastVectorIndex), AsVector(ref yRef, lastVectorIndex))); @@ -404,8 +432,32 @@ private static void InvokeSpanSpanIntoSpan( } } + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on each element loaded from with . + /// private static void InvokeSpanScalarIntoSpan( ReadOnlySpan x, float y, Span destination, TBinaryOperator op = default) + where TBinaryOperator : struct, IBinaryOperator => + InvokeSpanScalarIntoSpan(x, y, destination, default, op); + + /// + /// Performs an element-wise operation on and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on each element loaded from . + /// It is not used with . + /// + /// + /// Specifies the operation to perform on the transformed value from with . + /// + private static void InvokeSpanScalarIntoSpan( + ReadOnlySpan x, float y, Span destination, TTransformOperator xTransformOp = default, TBinaryOperator binaryOp = default) + where TTransformOperator : struct, IUnaryOperator where TBinaryOperator : struct, IBinaryOperator { if (x.Length > destination.Length) @@ -419,7 +471,7 @@ private static void InvokeSpanScalarIntoSpan( ref float dRef = ref MemoryMarshal.GetReference(destination); int i = 0, oneVectorFromEnd; - if (Vector.IsHardwareAccelerated) + if (Vector.IsHardwareAccelerated && xTransformOp.CanVectorize) { oneVectorFromEnd = x.Length - Vector.Count; if (oneVectorFromEnd >= 0) @@ -428,7 +480,7 @@ private static void InvokeSpanScalarIntoSpan( Vector yVec = new(y); do { - AsVector(ref dRef, i) = op.Invoke(AsVector(ref xRef, i), + AsVector(ref dRef, i) = binaryOp.Invoke(xTransformOp.Invoke(AsVector(ref xRef, i)), yVec); i += Vector.Count; @@ -441,9 +493,9 @@ private static void InvokeSpanScalarIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, - op.Invoke(AsVector(ref xRef, lastVectorIndex), yVec)); + binaryOp.Invoke(xTransformOp.Invoke(AsVector(ref xRef, lastVectorIndex)), yVec)); } return; @@ -453,13 +505,21 @@ private static void InvokeSpanScalarIntoSpan( // Loop handling one element at a time. while (i < x.Length) { - Unsafe.Add(ref dRef, i) = op.Invoke(Unsafe.Add(ref xRef, i), + Unsafe.Add(ref dRef, i) = binaryOp.Invoke(xTransformOp.Invoke(Unsafe.Add(ref xRef, i)), y); i++; } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from , , + /// and . + /// private static void InvokeSpanSpanSpanIntoSpan( ReadOnlySpan x, ReadOnlySpan y, ReadOnlySpan z, Span destination, TTernaryOperator op = default) where TTernaryOperator : struct, ITernaryOperator @@ -506,7 +566,7 @@ private static void InvokeSpanSpanSpanIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, op.Invoke(AsVector(ref xRef, lastVectorIndex), AsVector(ref yRef, lastVectorIndex), @@ -528,6 +588,14 @@ private static void InvokeSpanSpanSpanIntoSpan( } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise elements loaded from and + /// with . + /// private static void InvokeSpanSpanScalarIntoSpan( ReadOnlySpan x, ReadOnlySpan y, float z, Span destination, TTernaryOperator op = default) where TTernaryOperator : struct, ITernaryOperator @@ -574,7 +642,7 @@ private static void InvokeSpanSpanScalarIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, op.Invoke(AsVector(ref xRef, lastVectorIndex), AsVector(ref yRef, lastVectorIndex), @@ -596,6 +664,14 @@ private static void InvokeSpanSpanScalarIntoSpan( } } + /// + /// Performs an element-wise operation on , , and , + /// and writes the results to . + /// + /// + /// Specifies the operation to perform on the pair-wise element loaded from , with , + /// and the element loaded from . + /// private static void InvokeSpanScalarSpanIntoSpan( ReadOnlySpan x, float y, ReadOnlySpan z, Span destination, TTernaryOperator op = default) where TTernaryOperator : struct, ITernaryOperator @@ -642,7 +718,7 @@ private static void InvokeSpanScalarSpanIntoSpan( int lastVectorIndex = x.Length - Vector.Count; ref Vector dest = ref AsVector(ref dRef, lastVectorIndex); dest = Vector.ConditionalSelect( - Vector.Equals(LoadRemainderMaskSingleVector(x.Length - i), Vector.Zero), + Vector.Equals(CreateRemainderMaskSingleVector(x.Length - i), Vector.Zero), dest, op.Invoke(AsVector(ref xRef, lastVectorIndex), yVec, @@ -664,27 +740,36 @@ private static void InvokeSpanScalarSpanIntoSpan( } } + /// Loads a that begins at the specified from . [MethodImpl(MethodImplOptions.AggressiveInlining)] private static ref Vector AsVector(ref float start, int offset) => ref Unsafe.As>( ref Unsafe.Add(ref start, offset)); + /// Gets whether the specified is negative. private static unsafe bool IsNegative(float f) => *(int*)&f < 0; + /// Gets whether each specified is negative. private static unsafe Vector IsNegative(Vector f) => (Vector)Vector.LessThan((Vector)f, Vector.Zero); + /// Gets the base 2 logarithm of . private static float Log2(float x) => MathF.Log(x, 2); - private static unsafe Vector LoadRemainderMaskSingleVector(int validItems) + /// + /// Gets a vector mask that will be all-ones-set for the last elements + /// and zero for all other elements. + /// + private static unsafe Vector CreateRemainderMaskSingleVector(int count) { Debug.Assert(Vector.Count is 4 or 8 or 16); return AsVector( ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16x16)), - (validItems * 16) + (16 - Vector.Count)); + (count * 16) + (16 - Vector.Count)); } + /// x + y private readonly struct AddOperator : IAggregationOperator { public float Invoke(float x, float y) => x + y; @@ -692,12 +777,14 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt32Mask_16 public float IdentityValue => 0; } + /// x - y private readonly struct SubtractOperator : IBinaryOperator { public float Invoke(float x, float y) => x - y; public Vector Invoke(Vector x, Vector y) => x - y; } + /// (x - y) * (x - y) private readonly struct SubtractSquaredOperator : IBinaryOperator { public float Invoke(float x, float y) @@ -713,6 +800,7 @@ public Vector Invoke(Vector x, Vector y) } } + /// x * y private readonly struct MultiplyOperator : IAggregationOperator { public float Invoke(float x, float y) => x * y; @@ -720,12 +808,14 @@ public Vector Invoke(Vector x, Vector y) public float IdentityValue => 1; } + /// x / y private readonly struct DivideOperator : IBinaryOperator { public float Invoke(float x, float y) => x / y; public Vector Invoke(Vector x, Vector y) => x / y; } + /// MathF.Max(x, y) (but without guaranteed NaN propagation) private readonly struct MaxOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -741,6 +831,7 @@ public Vector Invoke(Vector x, Vector y) => Vector.Max(x, y)); } + /// MathF.Max(x, y) private readonly struct MaxPropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -757,6 +848,7 @@ public Vector Invoke(Vector x, Vector y) => x); } + /// Operator to get x or y based on which has the larger MathF.Abs (but NaNs may not be propagated) private readonly struct MaxMagnitudeOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -780,6 +872,7 @@ public Vector Invoke(Vector x, Vector y) } } + /// Operator to get x or y based on which has the larger MathF.Abs private readonly struct MaxMagnitudePropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -804,6 +897,7 @@ public Vector Invoke(Vector x, Vector y) } } + /// MathF.Min(x, y) (but NaNs may not be propagated) private readonly struct MinOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -819,6 +913,7 @@ public Vector Invoke(Vector x, Vector y) => Vector.Min(x, y)); } + /// MathF.Min(x, y) private readonly struct MinPropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -835,6 +930,7 @@ public Vector Invoke(Vector x, Vector y) => x); } + /// Operator to get x or y based on which has the smaller MathF.Abs (but NaNs may not be propagated) private readonly struct MinMagnitudeOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -858,6 +954,7 @@ public Vector Invoke(Vector x, Vector y) } } + /// Operator to get x or y based on which has the smaller MathF.Abs private readonly struct MinMagnitudePropagateNaNOperator : IBinaryOperator { [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -883,6 +980,7 @@ public Vector Invoke(Vector x, Vector y) } } + /// -x private readonly struct NegateOperator : IUnaryOperator { public bool CanVectorize => true; @@ -890,18 +988,21 @@ public Vector Invoke(Vector x, Vector y) public Vector Invoke(Vector x) => -x; } + /// (x + y) * z private readonly struct AddMultiplyOperator : ITernaryOperator { public float Invoke(float x, float y, float z) => (x + y) * z; public Vector Invoke(Vector x, Vector y, Vector z) => (x + y) * z; } + /// (x * y) + z private readonly struct MultiplyAddOperator : ITernaryOperator { public float Invoke(float x, float y, float z) => (x * y) + z; public Vector Invoke(Vector x, Vector y, Vector z) => (x * y) + z; } + /// x private readonly struct IdentityOperator : IUnaryOperator { public bool CanVectorize => true; @@ -909,6 +1010,7 @@ public Vector Invoke(Vector x, Vector y) public Vector Invoke(Vector x) => x; } + /// x * x private readonly struct SquaredOperator : IUnaryOperator { public bool CanVectorize => true; @@ -916,6 +1018,7 @@ public Vector Invoke(Vector x, Vector y) public Vector Invoke(Vector x) => x * x; } + /// MathF.Abs(x) private readonly struct AbsoluteOperator : IUnaryOperator { public bool CanVectorize => true; @@ -926,42 +1029,43 @@ public Vector Invoke(Vector x, Vector y) private readonly struct ExpOperator : IUnaryOperator { public bool CanVectorize => false; - public float Invoke(float x) => MathF.Exp(x); - - public Vector Invoke(Vector x) - { - // Vectorizing requires shift left support, which is .NET 7 or later + public Vector Invoke(Vector x) => + // requires ShiftLeft (.NET 7+) throw new NotImplementedException(); - } } + /// MathF.Log(x) private readonly struct LogOperator : IUnaryOperator { public bool CanVectorize => false; - public float Invoke(float x) => MathF.Log(x); - - public Vector Invoke(Vector x) - { - // Vectorizing requires shift right support, which is .NET 7 or later + public Vector Invoke(Vector x) => + // requires ShiftRightArithmetic (.NET 7+) throw new NotImplementedException(); - } } + /// MathF.Log2(x) private readonly struct Log2Operator : IUnaryOperator { public bool CanVectorize => false; - public float Invoke(float x) => Log2(x); + public Vector Invoke(Vector x) => + // requires ShiftRightArithmetic (.NET 7+) + throw new NotImplementedException(); + } - public Vector Invoke(Vector x) - { - // Vectorizing requires shift right support, which is .NET 7 or later + /// 1f / (1f + MathF.Exp(-x)) + private readonly struct SigmoidOperator : IUnaryOperator + { + public bool CanVectorize => false; + public float Invoke(float x) => 1.0f / (1.0f + MathF.Exp(-x)); + public Vector Invoke(Vector x) => + // requires ShiftRightArithmetic (.NET 7+) throw new NotImplementedException(); - } } + /// Operator that takes one input value and returns a single value. private interface IUnaryOperator { bool CanVectorize { get; } @@ -969,17 +1073,20 @@ private interface IUnaryOperator Vector Invoke(Vector x); } + /// Operator that takes two input values and returns a single value. private interface IBinaryOperator { float Invoke(float x, float y); Vector Invoke(Vector x, Vector y); } + /// that specializes horizontal aggregation of all elements in a vector. private interface IAggregationOperator : IBinaryOperator { float IdentityValue { get; } } + /// Operator that takes three input values and returns a single value. private interface ITernaryOperator { float Invoke(float x, float y, float z); diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index 652fb07d3fa385..23f39f1bf6b0b7 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -58,30 +58,56 @@ private static unsafe float MathFMinMagnitude(float x, float y) return (ax < ay) || float.IsNaN(ax) || (ax == ay && *(int*)&x < 0) ? x : y; } - private static unsafe int SingleToInt32(float f) => *(int*)&f; + private static unsafe float UInt32ToSingle(uint i) => *(float*)&i; - private static unsafe float Int32ToSingle(int i) => *(float*)&i; - - private static float AnotherSingleNaN = Int32ToSingle(-8388607); - - /// Loads a variety of special values (e.g. NaN) into random positions in . - private static void SetSpecialValues(Span x) + /// Gets a variety of special values (e.g. NaN). + private static IEnumerable GetSpecialValues() { // NaN - x[s_random.Next(x.Length)] = float.NaN; - x[s_random.Next(x.Length)] = AnotherSingleNaN; + yield return UInt32ToSingle(0xFFC0_0000); // -qNaN / float.NaN + yield return UInt32ToSingle(0xFFFF_FFFF); // -qNaN / all-bits-set + yield return UInt32ToSingle(0x7FC0_0000); // +qNaN + yield return UInt32ToSingle(0xFFA0_0000); // -sNaN + yield return UInt32ToSingle(0x7FA0_0000); // +sNaN // +Infinity, -Infinity - x[s_random.Next(x.Length)] = float.PositiveInfinity; - x[s_random.Next(x.Length)] = float.NegativeInfinity; + yield return float.PositiveInfinity; + yield return float.NegativeInfinity; // +Zero, -Zero - x[s_random.Next(x.Length)] = +0.0f; - x[s_random.Next(x.Length)] = -0.0f; + yield return +0.0f; + yield return -0.0f; - // +Epsilon, -Epsilon - x[s_random.Next(x.Length)] = +float.Epsilon; - x[s_random.Next(x.Length)] = -float.Epsilon; + // Subnormals + yield return +float.Epsilon; + yield return -float.Epsilon; + yield return UInt32ToSingle(0x007F_FFFF); + yield return UInt32ToSingle(0x807F_FFFF); + + // Normals + yield return UInt32ToSingle(0x0080_0000); + yield return UInt32ToSingle(0x8080_0000); + yield return UInt32ToSingle(0x7F7F_FFFF); + yield return UInt32ToSingle(0x8F7F_FFFF); + } + + /// + /// Runs the specified action for each special value. Before the action is invoked, + /// the value is stored into a random position in , and the original + /// value is subsequently restored. + /// + private static void RunForEachSpecialValue(Action action, BoundedMemory x) + { + foreach (float value in GetSpecialValues()) + { + int pos = s_random.Next(x.Length); + float orig = x[pos]; + x[pos] = value; + + action(); + + x[pos] = orig; + } } /// @@ -95,7 +121,7 @@ private static void SetSpecialValues(Span x, Span y) // NaNs pos = s_random.Next(x.Length); x[pos] = float.NaN; - y[pos] = AnotherSingleNaN; + y[pos] = UInt32ToSingle(0x7FC0_0000); // +Infinity, -Infinity pos = s_random.Next(x.Length); @@ -835,6 +861,23 @@ public static void Exp_InPlace(int tensorLength) } } + [Theory] + [MemberData(nameof(TensorLengths))] + public static void Exp_SpecialValues(int tensorLength) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + RunForEachSpecialValue(() => + { + TensorPrimitives.Exp(x, destination); + for (int i = 0; i < tensorLength; i++) + { + Assert.Equal(MathF.Exp(x[i]), destination[i], Tolerance); + } + }, x); + } + [Theory] [MemberData(nameof(TensorLengths))] public static void Exp_ThrowsForTooShortDestination(int tensorLength) @@ -1073,13 +1116,14 @@ public static void Log_SpecialValues(int tensorLength) using BoundedMemory x = CreateAndFillTensor(tensorLength); using BoundedMemory destination = CreateTensor(tensorLength); - SetSpecialValues(x); - - TensorPrimitives.Log(x, destination); - for (int i = 0; i < tensorLength; i++) + RunForEachSpecialValue(() => { - Assert.Equal(MathF.Log(x[i]), destination[i], Tolerance); - } + TensorPrimitives.Log(x, destination); + for (int i = 0; i < tensorLength; i++) + { + Assert.Equal(MathF.Log(x[i]), destination[i], Tolerance); + } + }, x); } [Theory] @@ -1139,13 +1183,14 @@ public static void Log2_SpecialValues(int tensorLength) using BoundedMemory x = CreateAndFillTensor(tensorLength); using BoundedMemory destination = CreateTensor(tensorLength); - SetSpecialValues(x); - - TensorPrimitives.Log2(x, destination); - for (int i = 0; i < tensorLength; i++) + RunForEachSpecialValue(() => { - Assert.Equal(MathF.Log(x[i], 2), destination[i], Tolerance); - } + TensorPrimitives.Log2(x, destination); + for (int i = 0; i < tensorLength; i++) + { + Assert.Equal(MathF.Log(x[i], 2), destination[i], Tolerance); + } + }, x); } [Theory] @@ -2271,12 +2316,19 @@ public static void Sigmoid_InPlace(int tensorLength) [Theory] [MemberData(nameof(TensorLengths))] - public static void Sigmoid_ThrowsForTooShortDestination(int tensorLength) + public static void Sigmoid_SpecialValues(int tensorLength) { using BoundedMemory x = CreateAndFillTensor(tensorLength); - using BoundedMemory destination = CreateTensor(tensorLength - 1); + using BoundedMemory destination = CreateTensor(tensorLength); - AssertExtensions.Throws("destination", () => TensorPrimitives.Sigmoid(x, destination)); + RunForEachSpecialValue(() => + { + TensorPrimitives.Sigmoid(x, destination); + for (int i = 0; i < tensorLength; i++) + { + Assert.Equal(1f / (1f + MathF.Exp(-x[i])), destination[i], Tolerance); + } + }, x); } [Theory] @@ -2311,6 +2363,16 @@ public static void Sigmoid_DestinationLongerThanSource() Assert.Equal(originalLast, dest[dest.Length - 1]); } + [Theory] + [MemberData(nameof(TensorLengths))] + public static void Sigmoid_ThrowsForTooShortDestination(int tensorLength) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.Sigmoid(x, destination)); + } + [Fact] public static void Sigmoid_ThrowsForEmptyInput() { @@ -2409,16 +2471,6 @@ public static void SoftMax_InPlace(int tensorLength) } } - [Theory] - [MemberData(nameof(TensorLengths))] - public static void SoftMax_ThrowsForTooShortDestination(int tensorLength) - { - using BoundedMemory x = CreateAndFillTensor(tensorLength); - using BoundedMemory destination = CreateTensor(tensorLength - 1); - - AssertExtensions.Throws("destination", () => TensorPrimitives.SoftMax(x, destination)); - } - [Theory] [InlineData(new float[] { 3, 1, .2f }, new float[] { 0.8360188f, 0.11314284f, 0.05083836f })] [InlineData(new float[] { 3, 4, 1 }, new float[] { 0.2594f, 0.705384f, 0.0351f })] @@ -2449,6 +2501,16 @@ public static void SoftMax_DestinationLongerThanSource() } } + [Theory] + [MemberData(nameof(TensorLengths))] + public static void SoftMax_ThrowsForTooShortDestination(int tensorLength) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength - 1); + + AssertExtensions.Throws("destination", () => TensorPrimitives.SoftMax(x, destination)); + } + [Fact] public static void SoftMax_ThrowsForEmptyInput() { From ae8836854baf5d55e4f1e113b41a650e55cab356 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 5 Oct 2023 08:55:59 -0400 Subject: [PATCH 2/3] Disable tests on mono --- .../System.Numerics.Tensors/tests/TensorPrimitivesTests.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index 23f39f1bf6b0b7..c40715840843b2 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -863,6 +863,7 @@ public static void Exp_InPlace(int tensorLength) [Theory] [MemberData(nameof(TensorLengths))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/92885", TestRuntimes.Mono)] public static void Exp_SpecialValues(int tensorLength) { using BoundedMemory x = CreateAndFillTensor(tensorLength); @@ -2316,6 +2317,7 @@ public static void Sigmoid_InPlace(int tensorLength) [Theory] [MemberData(nameof(TensorLengths))] + [ActiveIssue("https://github.com/dotnet/runtime/issues/92885", TestRuntimes.Mono)] public static void Sigmoid_SpecialValues(int tensorLength) { using BoundedMemory x = CreateAndFillTensor(tensorLength); From c0bf298e20681d597d762b51b2356e93c71bd23e Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Thu, 5 Oct 2023 18:05:24 -0400 Subject: [PATCH 3/3] Address PR feedback --- .../src/System/Numerics/Tensors/TensorPrimitives.netcore.cs | 6 +++--- .../System.Numerics.Tensors/tests/TensorPrimitivesTests.cs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index c8a980d70107aa..75515ed9187c80 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2112,7 +2112,7 @@ private static Vector512 IsNegative(Vector512 vector) => private static float GetFirstNaN(Vector128 vector) { Debug.Assert(!Vector128.EqualsAll(vector, vector), "Expected vector to contain a NaN"); - return vector[BitOperations.TrailingZeroCount((~Vector128.Equals(vector, vector)).ExtractMostSignificantBits())]; + return vector.GetElement(BitOperations.TrailingZeroCount((~Vector128.Equals(vector, vector)).ExtractMostSignificantBits())); } /// Finds and returns the first NaN value in . @@ -2120,7 +2120,7 @@ private static float GetFirstNaN(Vector128 vector) private static float GetFirstNaN(Vector256 vector) { Debug.Assert(!Vector256.EqualsAll(vector, vector), "Expected vector to contain a NaN"); - return vector[BitOperations.TrailingZeroCount((~Vector256.Equals(vector, vector)).ExtractMostSignificantBits())]; + return vector.GetElement(BitOperations.TrailingZeroCount((~Vector256.Equals(vector, vector)).ExtractMostSignificantBits())); } #if NET8_0_OR_GREATER @@ -2129,7 +2129,7 @@ private static float GetFirstNaN(Vector256 vector) private static float GetFirstNaN(Vector512 vector) { Debug.Assert(!Vector512.EqualsAll(vector, vector), "Expected vector to contain a NaN"); - return vector[BitOperations.TrailingZeroCount((~Vector512.Equals(vector, vector)).ExtractMostSignificantBits())]; + return vector.GetElement(BitOperations.TrailingZeroCount((~Vector512.Equals(vector, vector)).ExtractMostSignificantBits())); } #endif diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index c40715840843b2..1bb23713357364 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -87,8 +87,8 @@ private static IEnumerable GetSpecialValues() // Normals yield return UInt32ToSingle(0x0080_0000); yield return UInt32ToSingle(0x8080_0000); - yield return UInt32ToSingle(0x7F7F_FFFF); - yield return UInt32ToSingle(0x8F7F_FFFF); + yield return UInt32ToSingle(0x7F7F_FFFF); // MaxValue + yield return UInt32ToSingle(0xFF7F_FFFF); // MinValue } ///