From 12ce658ee44ee44b48bc2a784fd428ae0b110605 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 6 Jun 2020 00:00:03 -0700 Subject: [PATCH 1/8] Optimize SpanHelpers.IndexOf() for byte/char --- .../src/System/SpanHelpers.Byte.cs | 37 +++++++++++++++- .../src/System/SpanHelpers.Char.cs | 44 ++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 5132148ae58325..6ce2abbdac0fa8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; @@ -194,7 +195,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Avx2.IsSupported || Sse2.IsSupported) + if (Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. if (length >= Vector128.Count * 2) @@ -370,6 +371,40 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) } } } + else if (AdvSimd.IsSupported) + { + if (offset < (nuint)(uint)length) + { + lengthToExamine = GetByteVector128SpanLength(offset, length); + + Vector128 mask = Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + Vector128 values = Vector128.Create(value); + while (lengthToExamine > offset) + { + Vector128 search = LoadVector128(ref searchSpace, offset); + + Vector128 compareResult = AdvSimd.CompareEqual(values, search); + if (AdvSimd.Arm64.MaxAcross(compareResult).ToScalar() == 0) + { + // Zero flags set so no matches + offset += (nuint)Vector128.Count; + continue; + } + + // Try to find the first lane that is set inside compareResult. + Vector128 invertedCompareResult = AdvSimd.Not(compareResult); + Vector128 selectedLanes = AdvSimd.Or(invertedCompareResult, mask); + byte firstIndexMatch = AdvSimd.Arm64.MinAcross(selectedLanes).ToScalar(); + return (int)(offset + firstIndexMatch); + } + + if (offset < (nuint)(uint)length) + { + lengthToExamine = ((nuint)(uint)length - offset); + goto SequentialScan; + } + } + } else if (Vector.IsHardwareAccelerated) { if (offset < (nuint)(uint)length) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 02f0ba9920724f..d1b8ad4d9b7a9e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; @@ -222,7 +223,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) { // Input isn't char aligned, we won't be able to align it to a Vector } - else if (Sse2.IsSupported) + else if (Sse2.IsSupported || AdvSimd.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. // Needs to be double length to allow us to align the data first. @@ -408,6 +409,47 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) } } } + else if (AdvSimd.IsSupported) + { + if (offset < length) + { + Debug.Assert(length - offset >= Vector128.Count); + + lengthToExamine = GetCharVector128SpanLength(offset, length); + if (lengthToExamine > 0) + { + Vector128 values = Vector128.Create((ushort)value); + Vector128 mask = Vector128.Create((ushort)0, 1, 2, 3, 4, 5, 6, 7); + do + { + Debug.Assert(lengthToExamine >= Vector128.Count); + + Vector128 search = LoadVector128(ref searchSpace, offset); + + Vector128 compareResult = AdvSimd.CompareEqual(values, search); + if (AdvSimd.Arm64.MaxAcross(compareResult).ToScalar() == 0) + { + // Zero flags set so no matches + offset += Vector128.Count; + lengthToExamine -= Vector128.Count; + continue; + } + + // Try to find the first lane that is set inside compareResult. + Vector128 invertedCompareResult = AdvSimd.Not(compareResult); + Vector128 selectedLanes = AdvSimd.Or(invertedCompareResult, mask); + ushort firstIndexMatch = AdvSimd.Arm64.MinAcross(selectedLanes).ToScalar(); + return (int)(offset + firstIndexMatch); + } while (lengthToExamine > 0); + } + + if (offset < length) + { + lengthToExamine = length - offset; + goto SequentialScan; + } + } + } else if (Vector.IsHardwareAccelerated) { if (offset < length) From c5c317c6f71746d18220cb0bbb645142c26bf7a7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 9 Jun 2020 17:15:35 -0700 Subject: [PATCH 2/8] Updated the implementation to use addp Also moved the code in a common method that can be used to `FindFirstMatchedLane` in other APIs as well. --- .../src/System/SpanHelpers.Byte.cs | 43 ++++++++++++++----- .../src/System/SpanHelpers.Char.cs | 18 ++++---- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 6ce2abbdac0fa8..de211e52d8a184 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -195,7 +195,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Avx2.IsSupported || Sse2.IsSupported || AdvSimd.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. if (length >= Vector128.Count * 2) @@ -377,25 +377,26 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { lengthToExamine = GetByteVector128SpanLength(offset, length); - Vector128 mask = Vector128.Create((byte)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + // 0x00100001 contant to select the first lane that is set in compareResult. + // The LSB 0x0001 corresponds to 0th lane, the MSB 0x0010 corresponds to 1st lane. + // The pattern is repeated for selecting other lanes. + Vector128 mask = Vector128.Create((uint)0x100001).AsByte(); + int matchedLane = 0; + Vector128 values = Vector128.Create(value); while (lengthToExamine > offset) { Vector128 search = LoadVector128(ref searchSpace, offset); Vector128 compareResult = AdvSimd.CompareEqual(values, search); - if (AdvSimd.Arm64.MaxAcross(compareResult).ToScalar() == 0) + if (!TryFindFirstMatchedLane(mask, compareResult, ref matchedLane)) { // Zero flags set so no matches offset += (nuint)Vector128.Count; continue; } - // Try to find the first lane that is set inside compareResult. - Vector128 invertedCompareResult = AdvSimd.Not(compareResult); - Vector128 selectedLanes = AdvSimd.Or(invertedCompareResult, mask); - byte firstIndexMatch = AdvSimd.Arm64.MinAcross(selectedLanes).ToScalar(); - return (int)(offset + firstIndexMatch); + return (int)(offset + (uint)matchedLane); } if (offset < (nuint)(uint)length) @@ -601,7 +602,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Avx2.IsSupported || Sse2.IsSupported) + if (Sse2.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. if (length >= Vector128.Count * 2) @@ -842,7 +843,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Avx2.IsSupported || Sse2.IsSupported) + if (Sse2.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. if (length >= Vector128.Count * 2) @@ -1838,5 +1839,27 @@ private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, in nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); return (nuint)(uint)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128 compareResult, ref int matchedLane) + { + if (!AdvSimd.IsSupported) + { + return false; + } + + ulong matches = AdvSimd.Arm64.MaxPairwise(compareResult, compareResult).AsUInt64().ToScalar(); + if (matches == 0) + { + return false; + } + + // Try to find the first lane that is set inside compareResult. + Vector128 selectedLanes = AdvSimd.And(compareResult.AsByte(), mask); + Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(selectedLanes, selectedLanes); + pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(pairwiseSelectedLane, pairwiseSelectedLane); + matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()); + return true; + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index d1b8ad4d9b7a9e..a1636f7eb19f23 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -419,7 +419,13 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) if (lengthToExamine > 0) { Vector128 values = Vector128.Create((ushort)value); - Vector128 mask = Vector128.Create((ushort)0, 1, 2, 3, 4, 5, 6, 7); + + // 0x00100001 contant to select the first lane that is set in compareResult. + // The LSB 0x0001 corresponds to 0th lane, the MSB 0x0010 corresponds to 1st lane. + // The pattern is repeated for selecting other lanes. + Vector128 mask = Vector128.Create((uint)0x100001).AsByte(); + int matchedLane = 0; + do { Debug.Assert(lengthToExamine >= Vector128.Count); @@ -427,19 +433,15 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) Vector128 search = LoadVector128(ref searchSpace, offset); Vector128 compareResult = AdvSimd.CompareEqual(values, search); - if (AdvSimd.Arm64.MaxAcross(compareResult).ToScalar() == 0) + + if (!TryFindFirstMatchedLane(mask, compareResult.AsByte(), ref matchedLane)) { // Zero flags set so no matches offset += Vector128.Count; lengthToExamine -= Vector128.Count; continue; } - - // Try to find the first lane that is set inside compareResult. - Vector128 invertedCompareResult = AdvSimd.Not(compareResult); - Vector128 selectedLanes = AdvSimd.Or(invertedCompareResult, mask); - ushort firstIndexMatch = AdvSimd.Arm64.MinAcross(selectedLanes).ToScalar(); - return (int)(offset + firstIndexMatch); + return (int)(offset + (matchedLane >> 2)); } while (lengthToExamine > 0); } From 53c1d4ec5249d4b43680e8d0e6fbe2e184d96b81 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 10 Jun 2020 11:13:41 -0700 Subject: [PATCH 3/8] correct implementation --- .../src/System/SpanHelpers.Byte.cs | 10 ++++------ .../src/System/SpanHelpers.Char.cs | 8 +++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index de211e52d8a184..2e80879924375a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -377,18 +377,16 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { lengthToExamine = GetByteVector128SpanLength(offset, length); - // 0x00100001 contant to select the first lane that is set in compareResult. - // The LSB 0x0001 corresponds to 0th lane, the MSB 0x0010 corresponds to 1st lane. - // The pattern is repeated for selecting other lanes. - Vector128 mask = Vector128.Create((uint)0x100001).AsByte(); + // Mask to help select first lane that is set. Each lane in the mask has different bit pattern. + Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); int matchedLane = 0; Vector128 values = Vector128.Create(value); while (lengthToExamine > offset) { Vector128 search = LoadVector128(ref searchSpace, offset); - Vector128 compareResult = AdvSimd.CompareEqual(values, search); + if (!TryFindFirstMatchedLane(mask, compareResult, ref matchedLane)) { // Zero flags set so no matches @@ -396,7 +394,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) continue; } - return (int)(offset + (uint)matchedLane); + return (int)(offset + (uint)(matchedLane >> 1)); } if (offset < (nuint)(uint)length) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a1636f7eb19f23..09c574c40a9da6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -420,10 +420,8 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) { Vector128 values = Vector128.Create((ushort)value); - // 0x00100001 contant to select the first lane that is set in compareResult. - // The LSB 0x0001 corresponds to 0th lane, the MSB 0x0010 corresponds to 1st lane. - // The pattern is repeated for selecting other lanes. - Vector128 mask = Vector128.Create((uint)0x100001).AsByte(); + // Mask to help select first lane that is set. Each lane in the mask has different bit pattern. + Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); int matchedLane = 0; do @@ -431,7 +429,6 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) Debug.Assert(lengthToExamine >= Vector128.Count); Vector128 search = LoadVector128(ref searchSpace, offset); - Vector128 compareResult = AdvSimd.CompareEqual(values, search); if (!TryFindFirstMatchedLane(mask, compareResult.AsByte(), ref matchedLane)) @@ -441,6 +438,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) lengthToExamine -= Vector128.Count; continue; } + return (int)(offset + (matchedLane >> 2)); } while (lengthToExamine > 0); } From de098d3950df31dc285a933b34dad0f8827cad3f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 10 Jun 2020 22:21:48 -0700 Subject: [PATCH 4/8] updated comment and code cleanup --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 5 +++-- .../System.Private.CoreLib/src/System/SpanHelpers.Char.cs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 2e80879924375a..96741132538caf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -377,7 +377,8 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { lengthToExamine = GetByteVector128SpanLength(offset, length); - // Mask to help select first lane that is set. Each lane in the mask has different bit pattern. + // Mask to help select first lane that is set. + // Each lane in the mask has different bit pattern to distinguish the lane selected. Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); int matchedLane = 0; @@ -394,7 +395,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) continue; } - return (int)(offset + (uint)(matchedLane >> 1)); + return (int)(offset + (uint)(matchedLane >> sizeof(byte))); } if (offset < (nuint)(uint)length) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 09c574c40a9da6..3be4b0ef7457c5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -420,7 +420,8 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) { Vector128 values = Vector128.Create((ushort)value); - // Mask to help select first lane that is set. Each lane in the mask has different bit pattern. + // Mask to help select first lane that is set. + // Each lane in the mask has different bit pattern to distinguish the lane selected. Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); int matchedLane = 0; @@ -439,7 +440,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) continue; } - return (int)(offset + (matchedLane >> 2)); + return (int)(offset + (matchedLane >> sizeof(char))); } while (lengthToExamine > 0); } From 1ee145e82939fa230c4d289b0883dc66cfa6fe3e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 10 Jun 2020 23:32:34 -0700 Subject: [PATCH 5/8] converted condition to an assert --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 96741132538caf..590f07a08238c3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1842,10 +1842,7 @@ private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, in [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128 compareResult, ref int matchedLane) { - if (!AdvSimd.IsSupported) - { - return false; - } + Debug.Assert(AdvSimd.IsSupported); ulong matches = AdvSimd.Arm64.MaxPairwise(compareResult, compareResult).AsUInt64().ToScalar(); if (matches == 0) From b5d2e3a2467d2e1bb810be161a83f80a87ac2791 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 11 Jun 2020 13:15:54 -0700 Subject: [PATCH 6/8] switch to optimal implementation --- .../src/System/SpanHelpers.Byte.cs | 13 +++++----- .../src/System/SpanHelpers.Char.cs | 26 ++++++++++++++----- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 590f07a08238c3..4285a4ec450128 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -377,9 +377,9 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { lengthToExamine = GetByteVector128SpanLength(offset, length); - // Mask to help select first lane that is set. - // Each lane in the mask has different bit pattern to distinguish the lane selected. - Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); + // Mask to help find the first lane in compareResult that is set. + // MSB 0x10 corresponds to 1st lane, 0x01 corresponds to 0th lane and so forth. + Vector128 mask = Vector128.Create((ushort)0x1001).AsByte(); int matchedLane = 0; Vector128 values = Vector128.Create(value); @@ -395,7 +395,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) continue; } - return (int)(offset + (uint)(matchedLane >> sizeof(byte))); + return (int)(offset + (uint)matchedLane); } if (offset < (nuint)(uint)length) @@ -1851,10 +1851,9 @@ private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128 selectedLanes = AdvSimd.And(compareResult.AsByte(), mask); + Vector128 selectedLanes = AdvSimd.And(compareResult, mask); Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(selectedLanes, selectedLanes); - pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(pairwiseSelectedLane, pairwiseSelectedLane); - matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()); + matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()) >> 2; return true; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 3be4b0ef7457c5..d6413cb85c873e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -419,10 +419,6 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) if (lengthToExamine > 0) { Vector128 values = Vector128.Create((ushort)value); - - // Mask to help select first lane that is set. - // Each lane in the mask has different bit pattern to distinguish the lane selected. - Vector128 mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64); int matchedLane = 0; do @@ -432,7 +428,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) Vector128 search = LoadVector128(ref searchSpace, offset); Vector128 compareResult = AdvSimd.CompareEqual(values, search); - if (!TryFindFirstMatchedLane(mask, compareResult.AsByte(), ref matchedLane)) + if (!TryFindFirstMatchedLane(compareResult.AsByte(), ref matchedLane)) { // Zero flags set so no matches offset += Vector128.Count; @@ -440,7 +436,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) continue; } - return (int)(offset + (matchedLane >> sizeof(char))); + return (int)(offset + matchedLane); } while (lengthToExamine > 0); } @@ -1093,5 +1089,23 @@ private static unsafe nint UnalignedCountVector128(ref char searchSpace) // isn't too important to pin to maintain the alignment. return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector128.Count - 1); } + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool TryFindFirstMatchedLane(Vector128 compareResult, ref int matchedLane) + { + Debug.Assert(AdvSimd.IsSupported); + + ulong matches = AdvSimd.Arm64.MaxPairwise(compareResult, compareResult).AsUInt64().ToScalar(); + if (matches == 0) + { + return false; + } + + // Try to find the first lane that is set inside compareResult. + Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult, compareResult); + matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()) >> 3; + return true; + } } } From ae00e93f415930c724eaa4688b51201efdde4d8f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 11 Jun 2020 17:32:40 -0700 Subject: [PATCH 7/8] Use result of AddPairwise instead of MaxPairwise to decide if there is any match --- .../src/System/SpanHelpers.Byte.cs | 14 ++++++++------ .../src/System/SpanHelpers.Char.cs | 13 +++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 4285a4ec450128..e9b9c9616869d6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1844,16 +1844,18 @@ private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128 maskedSelectedLanes = AdvSimd.And(compareResult, mask); + Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(maskedSelectedLanes, maskedSelectedLanes); + ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); + if (selectedLanes == 0) { + // all lanes are zero, so nothing matched. return false; } - // Try to find the first lane that is set inside compareResult. - Vector128 selectedLanes = AdvSimd.And(compareResult, mask); - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(selectedLanes, selectedLanes); - matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()) >> 2; + // Find the first lane that is set inside compareResult. + matchedLane = BitOperations.TrailingZeroCount(selectedLanes) >> 2; return true; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index d6413cb85c873e..8ec5e2960e773a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -428,7 +428,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) Vector128 search = LoadVector128(ref searchSpace, offset); Vector128 compareResult = AdvSimd.CompareEqual(values, search); - if (!TryFindFirstMatchedLane(compareResult.AsByte(), ref matchedLane)) + if (!TryFindFirstMatchedLane(compareResult, ref matchedLane)) { // Zero flags set so no matches offset += Vector128.Count; @@ -1096,15 +1096,16 @@ private static bool TryFindFirstMatchedLane(Vector128 compareResult, ref i { Debug.Assert(AdvSimd.IsSupported); - ulong matches = AdvSimd.Arm64.MaxPairwise(compareResult, compareResult).AsUInt64().ToScalar(); - if (matches == 0) + Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult.AsByte(), compareResult.AsByte()); + ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); + if (selectedLanes == 0) { + // all lanes are zero, so nothing matched. return false; } - // Try to find the first lane that is set inside compareResult. - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult, compareResult); - matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar()) >> 3; + // Find the first lane that is set inside compareResult. + matchedLane = BitOperations.TrailingZeroCount(selectedLanes) >> 3; return true; } } From 48241acc2ce12b1acd13db990a48c886094bc6a1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 11 Jun 2020 17:32:59 -0700 Subject: [PATCH 8/8] Use AdvSimd.Arm64.IsSupported --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 6 +++--- .../System.Private.CoreLib/src/System/SpanHelpers.Char.cs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index e9b9c9616869d6..12e0ea00d2bc54 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -195,7 +195,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported || AdvSimd.IsSupported) + if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. if (length >= Vector128.Count * 2) @@ -371,7 +371,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) } } } - else if (AdvSimd.IsSupported) + else if (AdvSimd.Arm64.IsSupported) { if (offset < (nuint)(uint)length) { @@ -1842,7 +1842,7 @@ private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, in [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128 compareResult, ref int matchedLane) { - Debug.Assert(AdvSimd.IsSupported); + Debug.Assert(AdvSimd.Arm64.IsSupported); // Find the first lane that is set inside compareResult. Vector128 maskedSelectedLanes = AdvSimd.And(compareResult, mask); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 8ec5e2960e773a..b5ee0d05554ced 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -223,7 +223,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) { // Input isn't char aligned, we won't be able to align it to a Vector } - else if (Sse2.IsSupported || AdvSimd.IsSupported) + else if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) { // Avx2 branch also operates on Sse2 sizes, so check is combined. // Needs to be double length to allow us to align the data first. @@ -409,7 +409,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) } } } - else if (AdvSimd.IsSupported) + else if (AdvSimd.Arm64.IsSupported) { if (offset < length) { @@ -1092,9 +1092,9 @@ private static unsafe nint UnalignedCountVector128(ref char searchSpace) [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool TryFindFirstMatchedLane(Vector128 compareResult, ref int matchedLane) + private static bool TryFindFirstMatchedLane(Vector128 compareResult, ref int matchedLane) { - Debug.Assert(AdvSimd.IsSupported); + Debug.Assert(AdvSimd.Arm64.IsSupported); Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult.AsByte(), compareResult.AsByte()); ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar();