Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit e02e9ab

Browse files
committed
Intrinsicify SpanHelpers.IndexOfAny(char,char,char)
1 parent 981cc45 commit e02e9ab

File tree

1 file changed

+111
-3
lines changed

1 file changed

+111
-3
lines changed

src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,15 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
703703
int offset = 0;
704704
int lengthToExamine = length;
705705

706-
if (Vector.IsHardwareAccelerated)
706+
if (Avx2.IsSupported || Sse2.IsSupported)
707+
{
708+
// Avx2 branch also operates on Sse2 sizes, so check is combined.
709+
if (length >= Vector128<byte>.Count * 2)
710+
{
711+
lengthToExamine = UnalignedCountVector128(ref searchSpace);
712+
}
713+
}
714+
else if (Vector.IsHardwareAccelerated)
707715
{
708716
if (length >= Vector<ushort>.Count * 2)
709717
{
@@ -744,9 +752,109 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
744752
offset += 1;
745753
}
746754

747-
// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
755+
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
748756
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
749-
if (Vector.IsHardwareAccelerated)
757+
if (Avx2.IsSupported)
758+
{
759+
if (offset < length)
760+
{
761+
lengthToExamine = GetCharVector256SpanLength(offset, length);
762+
if (lengthToExamine > offset)
763+
{
764+
Vector256<ushort> values0 = Vector256.Create(value0);
765+
Vector256<ushort> values1 = Vector256.Create(value1);
766+
Vector256<ushort> values2 = Vector256.Create(value2);
767+
do
768+
{
769+
Vector256<ushort> search = LoadVector256(ref searchSpace, offset);
770+
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
771+
// So the bit position in 'matches' corresponds to the element offset.
772+
int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte());
773+
// Bitwise Or to combine the flagged matches for the second and third values to our match flags
774+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte());
775+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte());
776+
if (matches == 0)
777+
{
778+
// Zero flags set so no matches
779+
offset += Vector256<ushort>.Count;
780+
continue;
781+
}
782+
783+
// Find bitflag offset of first match and add to current offset,
784+
// flags are in bytes so divide for chars
785+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
786+
} while (lengthToExamine > offset);
787+
}
788+
789+
lengthToExamine = GetCharVector128SpanLength(offset, length);
790+
if (lengthToExamine > offset)
791+
{
792+
Vector128<ushort> values0 = Vector128.Create(value0);
793+
Vector128<ushort> values1 = Vector128.Create(value1);
794+
Vector128<ushort> values2 = Vector128.Create(value2);
795+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
796+
797+
// Same method as above
798+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte());
799+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte());
800+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte());
801+
if (matches == 0)
802+
{
803+
// Zero flags set so no matches
804+
offset += Vector128<ushort>.Count;
805+
}
806+
else
807+
{
808+
// Find bitflag offset of first match and add to current offset,
809+
// flags are in bytes so divide for chars
810+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
811+
}
812+
}
813+
814+
if (offset < length)
815+
{
816+
lengthToExamine = length - offset;
817+
goto SequentialScan;
818+
}
819+
}
820+
}
821+
else if (Sse2.IsSupported)
822+
{
823+
if (offset < length)
824+
{
825+
lengthToExamine = GetCharVector128SpanLength(offset, length);
826+
827+
Vector128<ushort> values0 = Vector128.Create(value0);
828+
Vector128<ushort> values1 = Vector128.Create(value1);
829+
Vector128<ushort> values2 = Vector128.Create(value2);
830+
while (lengthToExamine > offset)
831+
{
832+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
833+
834+
// Same method as above
835+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte());
836+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte());
837+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte());
838+
if (matches == 0)
839+
{
840+
// Zero flags set so no matches
841+
offset += Vector128<ushort>.Count;
842+
continue;
843+
}
844+
845+
// Find bitflag offset of first match and add to current offset,
846+
// flags are in bytes so divide for chars
847+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
848+
}
849+
850+
if (offset < length)
851+
{
852+
lengthToExamine = length - offset;
853+
goto SequentialScan;
854+
}
855+
}
856+
}
857+
else if (Vector.IsHardwareAccelerated)
750858
{
751859
if (offset < length)
752860
{

0 commit comments

Comments
 (0)