Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 991ced6

Browse files
committed
Intrinsicify SpanHelpers.IndexOfAny(char,char,char,char,char)
1 parent 733a137 commit 991ced6

File tree

1 file changed

+123
-3
lines changed

1 file changed

+123
-3
lines changed

src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs

Lines changed: 123 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,7 +1120,15 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
11201120
int offset = 0; // Use nint for arithmetic to avoid unnecessary 64->32->64 truncations
11211121
int lengthToExamine = length;
11221122

1123-
if (Vector.IsHardwareAccelerated)
1123+
if (Avx2.IsSupported || Sse2.IsSupported)
1124+
{
1125+
// Avx2 branch also operates on Sse2 sizes, so check is combined.
1126+
if (length >= Vector128<byte>.Count * 2)
1127+
{
1128+
lengthToExamine = UnalignedCountVector128(ref searchSpace);
1129+
}
1130+
}
1131+
else if (Vector.IsHardwareAccelerated)
11241132
{
11251133
if (length >= Vector<ushort>.Count * 2)
11261134
{
@@ -1161,9 +1169,121 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
11611169
offset += 1;
11621170
}
11631171

1164-
// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
1172+
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
11651173
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
1166-
if (Vector.IsHardwareAccelerated)
1174+
if (Avx2.IsSupported)
1175+
{
1176+
if (offset < length)
1177+
{
1178+
lengthToExamine = GetCharVector256SpanLength(offset, length);
1179+
if (lengthToExamine > offset)
1180+
{
1181+
Vector256<ushort> values0 = Vector256.Create(value0);
1182+
Vector256<ushort> values1 = Vector256.Create(value1);
1183+
Vector256<ushort> values2 = Vector256.Create(value2);
1184+
Vector256<ushort> values3 = Vector256.Create(value3);
1185+
Vector256<ushort> values4 = Vector256.Create(value4);
1186+
do
1187+
{
1188+
Vector256<ushort> search = LoadVector256(ref searchSpace, offset);
1189+
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
1190+
// So the bit position in 'matches' corresponds to the element offset.
1191+
int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte());
1192+
// Bitwise Or to combine the flagged matches for the second, third, fourth and fifth values to our match flags
1193+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte());
1194+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte());
1195+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte());
1196+
matches |= Avx2.MoveMask(Avx2.CompareEqual(values4, search).AsByte());
1197+
if (matches == 0)
1198+
{
1199+
// Zero flags set so no matches
1200+
offset += Vector256<ushort>.Count;
1201+
continue;
1202+
}
1203+
1204+
// Find bitflag offset of first match and add to current offset,
1205+
// flags are in bytes so divide for chars
1206+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
1207+
} while (lengthToExamine > offset);
1208+
}
1209+
1210+
lengthToExamine = GetCharVector128SpanLength(offset, length);
1211+
if (lengthToExamine > offset)
1212+
{
1213+
Vector128<ushort> values0 = Vector128.Create(value0);
1214+
Vector128<ushort> values1 = Vector128.Create(value1);
1215+
Vector128<ushort> values2 = Vector128.Create(value2);
1216+
Vector128<ushort> values3 = Vector128.Create(value3);
1217+
Vector128<ushort> values4 = Vector128.Create(value4);
1218+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
1219+
1220+
// Same method as above
1221+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte());
1222+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte());
1223+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte());
1224+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte());
1225+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values4, search).AsByte());
1226+
if (matches == 0)
1227+
{
1228+
// Zero flags set so no matches
1229+
offset += Vector128<ushort>.Count;
1230+
}
1231+
else
1232+
{
1233+
// Find bitflag offset of first match and add to current offset,
1234+
// flags are in bytes so divide for chars
1235+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
1236+
}
1237+
}
1238+
1239+
if (offset < length)
1240+
{
1241+
lengthToExamine = length - offset;
1242+
goto SequentialScan;
1243+
}
1244+
}
1245+
}
1246+
else if (Sse2.IsSupported)
1247+
{
1248+
if (offset < length)
1249+
{
1250+
lengthToExamine = GetCharVector128SpanLength(offset, length);
1251+
1252+
Vector128<ushort> values0 = Vector128.Create(value0);
1253+
Vector128<ushort> values1 = Vector128.Create(value1);
1254+
Vector128<ushort> values2 = Vector128.Create(value2);
1255+
Vector128<ushort> values3 = Vector128.Create(value3);
1256+
Vector128<ushort> values4 = Vector128.Create(value4);
1257+
while (lengthToExamine > offset)
1258+
{
1259+
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
1260+
1261+
// Same method as above
1262+
int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte());
1263+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte());
1264+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte());
1265+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte());
1266+
matches |= Sse2.MoveMask(Sse2.CompareEqual(values4, search).AsByte());
1267+
if (matches == 0)
1268+
{
1269+
// Zero flags set so no matches
1270+
offset += Vector128<ushort>.Count;
1271+
continue;
1272+
}
1273+
1274+
// Find bitflag offset of first match and add to current offset,
1275+
// flags are in bytes so divide for chars
1276+
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
1277+
}
1278+
1279+
if (offset < length)
1280+
{
1281+
lengthToExamine = length - offset;
1282+
goto SequentialScan;
1283+
}
1284+
}
1285+
}
1286+
else if (Vector.IsHardwareAccelerated)
11671287
{
11681288
if (offset < length)
11691289
{

0 commit comments

Comments
 (0)