Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit aa80e6c

Browse files
committed
Intrinsicify SpanHelpers.IndexOf(char)
1 parent 6bb19c3 commit aa80e6c

File tree

2 files changed

+126
-3
lines changed

2 files changed

+126
-3
lines changed

src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,8 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu
653653
offset += 1;
654654
}
655655

656+
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
657+
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
656658
if (Avx2.IsSupported)
657659
{
658660
if ((int)(byte*)offset < length)

src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Runtime.Intrinsics.X86;
99

1010
using Internal.Runtime.CompilerServices;
11+
using System.Runtime.Intrinsics;
1112

1213
namespace System
1314
{
@@ -196,7 +197,15 @@ public static int IndexOf(ref char searchSpace, char value, int length)
196197
int offset = 0;
197198
int lengthToExamine = length;
198199

199-
if (Vector.IsHardwareAccelerated)
200+
if (Avx2.IsSupported || Sse2.IsSupported)
201+
{
202+
// Avx2 branch also operates on Sse2 sizes, so check is combined.
203+
if (length >= Vector128<byte>.Count * 2)
204+
{
205+
lengthToExamine = UnalignedCountVector128(ref searchSpace);
206+
}
207+
}
208+
else if (Vector.IsHardwareAccelerated)
200209
{
201210
if (length >= Vector<ushort>.Count * 2)
202211
{
@@ -231,9 +240,96 @@ public static int IndexOf(ref char searchSpace, char value, int length)
231240
offset += 1;
232241
}
233242

234-
// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
243+
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
235244
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
236-
if (Vector.IsHardwareAccelerated)
245+
if (Avx2.IsSupported)
246+
{
247+
if (offset < length)
248+
{
249+
lengthToExamine = GetCharVector256SpanLength(offset, length);
250+
if (lengthToExamine > offset)
251+
{
252+
Vector256<ushort> values = Vector256.Create(value);
253+
do
254+
{
255+
Vector256<ushort> search = LoadVector256(ref searchSpace, offset);
256+
int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte());
257+
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
258+
// So the bit position in 'matches' corresponds to the element offset.
259+
if (matches == 0)
260+
{
261+
// Zero flags set so no matches
262+
offset += Vector256<ushort>.Count;
263+
continue;
264+
}
265+
266+
// Find bitflag offset of first match and add to current offset,
267+
// flags are in bytes so divide for chars
268+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
269+
} while (lengthToExamine > offset);
270+
}
271+
272+
lengthToExamine = GetCharVector128SpanLength(offset, length);
273+
if (lengthToExamine > offset)
274+
{
275+
Vector128<ushort> values = Vector128.Create(value);
276+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
277+
278+
// Same method as above
279+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
280+
if (matches == 0)
281+
{
282+
// Zero flags set so no matches
283+
offset += Vector128<ushort>.Count;
284+
}
285+
else
286+
{
287+
// Find bitflag offset of first match and add to current offset,
288+
// flags are in bytes so divide for chars
289+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
290+
}
291+
}
292+
293+
if (offset < length)
294+
{
295+
lengthToExamine = length - offset;
296+
goto SequentialScan;
297+
}
298+
}
299+
}
300+
else if (Sse2.IsSupported)
301+
{
302+
if (offset < length)
303+
{
304+
lengthToExamine = GetCharVector128SpanLength(offset, length);
305+
306+
Vector128<ushort> values = Vector128.Create(value);
307+
while (lengthToExamine > offset)
308+
{
309+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
310+
311+
// Same method as above
312+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
313+
if (matches == 0)
314+
{
315+
// Zero flags set so no matches
316+
offset += Vector128<ushort>.Count;
317+
continue;
318+
}
319+
320+
// Find bitflag offset of first match and add to current offset,
321+
// flags are in bytes so divide for chars
322+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
323+
}
324+
325+
if (offset < length)
326+
{
327+
lengthToExamine = length - offset;
328+
goto SequentialScan;
329+
}
330+
}
331+
}
332+
else if (Vector.IsHardwareAccelerated)
237333
{
238334
if (offset < length)
239335
{
@@ -842,6 +938,14 @@ private static int LocateLastFoundChar(ulong match)
842938
private static unsafe Vector<ushort> LoadVector(ref char start, int offset)
843939
=> Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));
844940

941+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
942+
private static unsafe Vector128<ushort> LoadVector128(ref char start, int offset)
943+
=> Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));
944+
945+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
946+
private static unsafe Vector256<ushort> LoadVector256(ref char start, int offset)
947+
=> Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));
948+
845949
[MethodImpl(MethodImplOptions.AggressiveInlining)]
846950
private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset)
847951
=> Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));
@@ -850,6 +954,14 @@ private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset)
850954
private static unsafe int GetCharVectorSpanLength(int offset, int length)
851955
=> ((length - offset) & ~(Vector<ushort>.Count - 1));
852956

957+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
958+
private static unsafe int GetCharVector128SpanLength(int offset, int length)
959+
=> ((length - offset) & ~(Vector128<ushort>.Count - 1));
960+
961+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
962+
private static unsafe int GetCharVector256SpanLength(int offset, int length)
963+
=> ((length - offset) & ~(Vector256<ushort>.Count - 1));
964+
853965
[MethodImpl(MethodImplOptions.AggressiveInlining)]
854966
private static unsafe int UnalignedCountVector(ref char searchSpace)
855967
{
@@ -862,6 +974,15 @@ private static unsafe int UnalignedCountVector(ref char searchSpace)
862974
return ((Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1));
863975
}
864976

977+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
978+
private static unsafe int UnalignedCountVector128(ref char searchSpace)
979+
{
980+
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
981+
982+
int unaligned = ((int)Unsafe.AsPointer(ref searchSpace) & (Unsafe.SizeOf<Vector128<ushort>>() - 1)) / elementsPerByte;
983+
return ((Vector128<ushort>.Count - unaligned) & (Vector128<ushort>.Count - 1));
984+
}
985+
865986
[MethodImpl(MethodImplOptions.AggressiveInlining)]
866987
private static unsafe int UnalignedCountVectorFromEnd(ref char searchSpace, int length)
867988
{

0 commit comments

Comments
 (0)