Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
112 changes: 110 additions & 2 deletions src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

using Internal.Runtime.CompilerServices;
Expand Down Expand Up @@ -199,7 +200,17 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
IntPtr index = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know why this code is currently using IntPtr rather than the:

#if BIT64
using nint = System.Int64;
#else
using nint = System.Int32;
#endif

code that everywhere else seems to prefer?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is left-over from times when this lived in CoreFX and it was not compiled bitness-specific. It would be good for readability to switch this over to nuint.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is a bit messy to clean that up in this PR, will do a follow up

IntPtr nLength = (IntPtr)length;

if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
if (Avx2.IsSupported && length >= Vector256<byte>.Count * 2)
{
int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector256<byte>.Count - 1);
nLength = (IntPtr)((Vector256<byte>.Count - unaligned) & (Vector256<byte>.Count - 1));
}
else if (!Avx2.IsSupported && Sse2.IsSupported && length >= Vector128<byte>.Count * 2)
{
int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector128<byte>.Count - 1);
nLength = (IntPtr)((Vector128<byte>.Count - unaligned) & (Vector128<byte>.Count - 1));
}
else if (!Avx2.IsSupported && !Sse2.IsSupported && Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
{
int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
nLength = (IntPtr)((Vector<byte>.Count - unaligned) & (Vector<byte>.Count - 1));
Expand Down Expand Up @@ -255,7 +266,91 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
index += 1;
}

if (Vector.IsHardwareAccelerated && ((int)(byte*)index < length))
if (Avx2.IsSupported && ((int)(byte*)index < length))
{
nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector256<byte>.Count - 1));

Vector256<byte> comparison256 = Vector256.Create(value);
while ((byte*)nLength > (byte*)index)
{
Vector256<byte> vSearch = Unsafe.ReadUnaligned<Vector256<byte>>(ref Unsafe.AddByteOffset(ref searchSpace, index));
int matches = Avx2.MoveMask(Avx2.CompareEqual(comparison256, vSearch));
if (matches == 0)
{
index += Vector256<byte>.Count;
continue;
}
// Find offset of first match
else if (Bmi1.IsSupported)
{
return ((int)(byte*)index) + (int)Bmi1.TrailingZeroCount((uint)matches);
}
else
{
return (int)(byte*)index + TrailingZeroCountFallback(matches);
}
}

nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector128<byte>.Count - 1));
if ((byte*)nLength > (byte*)index)
{
Vector128<byte> comparison128 = Vector128.Create(value);

Vector128<byte> vSearch = Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref searchSpace, index));
int matches = Sse2.MoveMask(Sse2.CompareEqual(comparison128, vSearch));
if (matches == 0)
{
index += Vector128<byte>.Count;
}
// Find offset of first match
else if (Bmi1.IsSupported)
{
return ((int)(byte*)index) + (int)Bmi1.TrailingZeroCount((uint)matches);
}
else
{
return (int)(byte*)index + TrailingZeroCountFallback(matches);
}
}

if ((int)(byte*)index < length)
{
nLength = (IntPtr)(length - (int)(byte*)index);
goto SequentialScan;
}
}
else if (!Avx2.IsSupported && Sse2.IsSupported && ((int)(byte*)index < length))
{
nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector128<byte>.Count - 1));

Vector128<byte> vComparison = Vector128.Create(value);
while ((byte*)nLength > (byte*)index)
{
Vector128<byte> vSearch = Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AddByteOffset(ref searchSpace, index));
int matches = Sse2.MoveMask(Sse2.CompareEqual(vComparison, vSearch));
if (matches == 0)
{
index += Vector128<byte>.Count;
continue;
}
// Find offset of first match
else if (Bmi1.IsSupported)
{
return ((int)(byte*)index) + (int)Bmi1.TrailingZeroCount((uint)matches);
}
else
{
return (int)(byte*)index + TrailingZeroCountFallback(matches);
}
}

if ((int)(byte*)index < length)
{
nLength = (IntPtr)(length - (int)(byte*)index);
goto SequentialScan;
}
}
else if (!Avx2.IsSupported && !Sse2.IsSupported && Vector.IsHardwareAccelerated && ((int)(byte*)index < length))
{
nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector<byte>.Count - 1));

Expand Down Expand Up @@ -1152,5 +1247,18 @@ private static int LocateLastFoundByte(ulong match)
0x03ul << 32 |
0x02ul << 40 |
0x01ul << 48) + 1;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int TrailingZeroCountFallback(int matches)
{
// https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
return TrailingCountMultiplyDeBruijn[(int)(((uint)((matches & -matches) * 0x077CB531U)) >> 27)];
}

private static ReadOnlySpan<byte> TrailingCountMultiplyDeBruijn => new byte[32]
{
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
}
}