Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
284 changes: 213 additions & 71 deletions src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -560,104 +560,246 @@ public static unsafe int IndexOfAny(ref char searchSpace, char value0, char valu
}

[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public static unsafe int IndexOfAny(ref char searchSpace, char value0, char value1, char value2, int length)
public static int IndexOfAny(ref char searchStart, char value0, char value1, char value2, int length)
{
Debug.Assert(length >= 0);
// If vectors are supported, we first align to Vector.
// If the searchSpan has not been fixed or pinned the GC can relocate it during the
// execution of this method, so the alignment only acts as best endeavour.
// The GC cost is likely to dominate over the misalignment that may occur after;
// so we give the GC a free hand to relocate and it is up to the caller
// whether they are operating over fixed data.

fixed (char* pChars = &searchSpace)
{
char* pCh = pChars;
char* pEndCh = pCh + length;
nint offset = 0;
nint lengthToExamine = length;

if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
if (Avx2.IsSupported || Sse2.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
if (length >= Vector128<byte>.Count * 2)
{
// Figure out how many characters to read sequentially until we are vector aligned
// This is equivalent to:
// unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
// length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
lengthToExamine = UnalignedCountVector128(ref searchStart);
}
}
else if (Vector.IsHardwareAccelerated)
{
if (length >= Vector<ushort>.Count * 2)
{
lengthToExamine = UnalignedCountVector(ref searchStart);
}
}

SequentialScan:
while (length >= 4)
SequentialScan:
int lookUp;
while (lengthToExamine >= 4)
{
ref char current = ref Add(ref searchStart, offset);

lookUp = current;
if (value0 == lookUp || value1 == lookUp || value2 == lookUp)
goto Found;
lookUp = Add(ref current, 1);
if (value0 == lookUp || value1 == lookUp || value2 == lookUp)
goto Found1;
lookUp = Add(ref current, 2);
if (value0 == lookUp || value1 == lookUp || value2 == lookUp)
goto Found2;
lookUp = Add(ref current, 3);
if (value0 == lookUp || value1 == lookUp || value2 == lookUp)
goto Found3;

offset += 4;
lengthToExamine -= 4;
}

while (lengthToExamine > 0)
{
lookUp = Add(ref searchStart, offset);
if (value0 == lookUp || value1 == lookUp || value2 == lookUp)
goto Found;

offset += 1;
lengthToExamine -= 1;
}

if (offset < length)
{
if (Sse2.IsSupported || Vector.IsHardwareAccelerated)
{
length -= 4;
goto VectorizedScan;
}
else
{
Debug.Fail("Moving to Vectorized scan when not supported");
}
}

if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
goto Found;
if (pCh[1] == value0 || pCh[1] == value1 || pCh[1] == value2)
goto Found1;
if (pCh[2] == value0 || pCh[2] == value1 || pCh[2] == value2)
goto Found2;
if (pCh[3] == value0 || pCh[3] == value1 || pCh[3] == value2)
goto Found3;
NotFound:
return -1;
Found3:
return (int)(offset + 3);
Found2:
return (int)(offset + 2);
Found1:
return (int)(offset + 1);
Found:
return (int)offset;

pCh += 4;
VectorizedScan:
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
if (Avx2.IsSupported)
{
lengthToExamine = GetCharVector256SpanLength(offset, length);
if (lengthToExamine > 0)
{
Debug.Assert(length - offset >= Vector256<ushort>.Count);
Vector256<ushort> values0 = Vector256.Create(value0);
Vector256<ushort> values1 = Vector256.Create(value1);
Vector256<ushort> values2 = Vector256.Create(value2);
do
{
Vector256<ushort> search = LoadVector256(ref searchStart, offset);

Vector256<ushort> matches0 = Avx2.CompareEqual(values0, search);
Vector256<ushort> matches1 = Avx2.CompareEqual(values1, search);
Vector256<ushort> matches2 = Avx2.CompareEqual(values2, search);
// Bitwise Or to combine the flagged matches for the second and third values to our match flags
int matches = Avx2.MoveMask(
Avx2.Or(Avx2.Or(matches0, matches1), matches2)
.AsByte());
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector256<ushort>.Count;
lengthToExamine -= Vector256<ushort>.Count;
continue;
}

// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
} while (lengthToExamine > 0);
}

while (length > 0)
lengthToExamine = GetCharVector128SpanLength(offset, length);
if (lengthToExamine > 0)
{
length--;
Debug.Assert(length - offset >= Vector128<ushort>.Count);

if (pCh[0] == value0 || pCh[0] == value1 || pCh[0] == value2)
goto Found;
Vector128<ushort> values0 = Vector128.Create(value0);
Vector128<ushort> values1 = Vector128.Create(value1);
Vector128<ushort> values2 = Vector128.Create(value2);

pCh++;
Vector128<ushort> search = LoadVector128(ref searchStart, offset);

Vector128<ushort> matches0 = Sse2.CompareEqual(values0, search);
Vector128<ushort> matches1 = Sse2.CompareEqual(values1, search);
Vector128<ushort> matches2 = Sse2.CompareEqual(values2, search);

// Same method as above
int matches = Sse2.MoveMask(
Sse2.Or(Sse2.Or(matches0, matches1), matches2)
.AsByte());
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
// Don't need to change lengthToExamine here as we don't use its current value again.
}
else
{
// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
}
}

// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
if (Vector.IsHardwareAccelerated && pCh < pEndCh)
lengthToExamine = length - offset;
if (lengthToExamine > 0)
{
// Get the highest multiple of Vector<ushort>.Count that is within the search space.
// That will be how many times we iterate in the loop below.
// This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
goto SequentialScan;
}
}
else if (Sse2.IsSupported)
{
Debug.Assert(length - offset >= Vector128<ushort>.Count);
lengthToExamine = GetCharVector128SpanLength(offset, length);
Debug.Assert(lengthToExamine > 0);

Vector128<ushort> values0 = Vector128.Create(value0);
Vector128<ushort> values1 = Vector128.Create(value1);
Vector128<ushort> values2 = Vector128.Create(value2);
do
{
Vector128<ushort> search = LoadVector128(ref searchStart, offset);

// Get comparison Vector
Vector<ushort> values0 = new Vector<ushort>(value0);
Vector<ushort> values1 = new Vector<ushort>(value1);
Vector<ushort> values2 = new Vector<ushort>(value2);
Vector128<ushort> matches0 = Sse2.CompareEqual(values0, search);
Vector128<ushort> matches1 = Sse2.CompareEqual(values1, search);
Vector128<ushort> matches2 = Sse2.CompareEqual(values2, search);

while (length > 0)
// Same method as above
int matches = Sse2.MoveMask(
Sse2.Or(Sse2.Or(matches0, matches1), matches2)
.AsByte());
if (matches == 0)
{
// Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
Vector<ushort> vData = Unsafe.Read<Vector<ushort>>(pCh);
var vMatches = Vector.BitwiseOr(
Vector.BitwiseOr(
Vector.Equals(vData, values0),
Vector.Equals(vData, values1)),
Vector.Equals(vData, values2));

if (Vector<ushort>.Zero.Equals(vMatches))
{
pCh += Vector<ushort>.Count;
length -= Vector<ushort>.Count;
continue;
}
// Find offset of first match
return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches);
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
lengthToExamine -= Vector128<ushort>.Count;
continue;
}

if (pCh < pEndCh)
// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char)));
} while (lengthToExamine > 0);

lengthToExamine = length - offset;
if (lengthToExamine > 0)
{
goto SequentialScan;
}
}
else if (Vector.IsHardwareAccelerated)
{
Debug.Assert(length - offset >= Vector<ushort>.Count);
lengthToExamine = GetCharVectorSpanLength(offset, length);
Debug.Assert(lengthToExamine > 0);

Vector<ushort> values0 = new Vector<ushort>(value0);
Vector<ushort> values1 = new Vector<ushort>(value1);
Vector<ushort> values2 = new Vector<ushort>(value2);

do
{
Vector<ushort> search = LoadVector(ref searchStart, offset);
var matches = Vector.BitwiseOr(
Vector.BitwiseOr(
Vector.Equals(search, values0),
Vector.Equals(search, values1)),
Vector.Equals(search, values2));
if (Vector<ushort>.Zero.Equals(matches))
{
length = (int)(pEndCh - pCh);
goto SequentialScan;
offset += Vector<ushort>.Count;
lengthToExamine -= Vector<ushort>.Count;
continue;
}

// Find offset of first match
return (int)(offset + LocateFirstFoundChar(matches));
} while (lengthToExamine > 0);

lengthToExamine = length - offset;
if (lengthToExamine > 0)
{
goto SequentialScan;
}
return -1;
Found3:
pCh++;
Found2:
pCh++;
Found1:
pCh++;
Found:
return (int)(pCh - pChars);
}

goto NotFound;
}

[MethodImpl(MethodImplOptions.AggressiveOptimization)]
Expand Down