Skip to content

Commit 402aa85

Browse files
authored
port SpanHelpers.IndexOfAny(ref byte, byte, byte, byte, int) to Vector128/256 (#73481)
1 parent cbf2e32 commit 402aa85

File tree

1 file changed

+21
-90
lines changed

1 file changed

+21
-90
lines changed

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs

Lines changed: 21 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,13 +1100,13 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
11001100
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
11011101
nuint lengthToExamine = (nuint)(uint)length;
11021102

1103-
if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
1103+
if (Vector128.IsHardwareAccelerated)
11041104
{
11051105
// Avx2 branch also operates on Sse2 sizes, so check is combined.
11061106
nint vectorDiff = (nint)length - Vector128<byte>.Count;
11071107
if (vectorDiff >= 0)
11081108
{
1109-
// >= Sse2 intrinsics are supported, and length is enough to use them so use that path.
1109+
// >= Vector128 is accelerated, and length is enough to use them so use that path.
11101110
// We jump forward to the intrinsics at the end of the method so a naive branch predict
11111111
// will choose the non-intrinsic path so short lengths which don't gain anything aren't
11121112
// overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths
@@ -1215,10 +1215,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12151215
// the end and forwards, which may overlap on an earlier compare.
12161216

12171217
// We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported.
1218-
if (Sse2.IsSupported)
1218+
if (Vector128.IsHardwareAccelerated)
12191219
{
1220-
int matches;
1221-
if (Avx2.IsSupported)
1220+
uint matches;
1221+
if (Vector256.IsHardwareAccelerated)
12221222
{
12231223
Vector256<byte> search;
12241224
// Guard as we may only have a valid size for Vector128; when we will move to the Sse2
@@ -1235,15 +1235,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12351235
// First time this checks again against 0, however we will move into final compare if it fails.
12361236
while (lengthToExamine > offset)
12371237
{
1238-
search = LoadVector256(ref searchSpace, offset);
1238+
search = Vector256.LoadUnsafe(ref searchSpace, offset);
12391239
// Bitwise Or to combine the flagged matches for the second value to our match flags
1240-
matches = Avx2.MoveMask(
1241-
Avx2.Or(
1242-
Avx2.Or(
1243-
Avx2.CompareEqual(values0, search),
1244-
Avx2.CompareEqual(values1, search)),
1245-
Avx2.CompareEqual(values2, search)));
1246-
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
1240+
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)).ExtractMostSignificantBits();
1241+
// Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
12471242
// So the bit position in 'matches' corresponds to the element offset.
12481243
if (matches == 0)
12491244
{
@@ -1256,15 +1251,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12561251
}
12571252

12581253
// Move to Vector length from end for final compare
1259-
search = LoadVector256(ref searchSpace, lengthToExamine);
1254+
search = Vector256.LoadUnsafe(ref searchSpace, lengthToExamine);
12601255
offset = lengthToExamine;
12611256
// Same as method as above
1262-
matches = Avx2.MoveMask(
1263-
Avx2.Or(
1264-
Avx2.Or(
1265-
Avx2.CompareEqual(values0, search),
1266-
Avx2.CompareEqual(values1, search)),
1267-
Avx2.CompareEqual(values2, search)));
1257+
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)).ExtractMostSignificantBits();
12681258
if (matches == 0)
12691259
{
12701260
// None matched
@@ -1278,105 +1268,46 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12781268
// Initial size check was done on method entry.
12791269
Debug.Assert(length >= Vector128<byte>.Count);
12801270
{
1281-
Vector128<byte> search;
1271+
Vector128<byte> search, compareResult;
12821272
Vector128<byte> values0 = Vector128.Create(value0);
12831273
Vector128<byte> values1 = Vector128.Create(value1);
12841274
Vector128<byte> values2 = Vector128.Create(value2);
12851275
// First time this checks against 0 and we will move into final compare if it fails.
12861276
while (lengthToExamine > offset)
12871277
{
1288-
search = LoadVector128(ref searchSpace, offset);
1278+
search = Vector128.LoadUnsafe(ref searchSpace, offset);
12891279

1290-
matches = Sse2.MoveMask(
1291-
Sse2.Or(
1292-
Sse2.Or(
1293-
Sse2.CompareEqual(values0, search),
1294-
Sse2.CompareEqual(values1, search)),
1295-
Sse2.CompareEqual(values2, search)));
1296-
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
1297-
// So the bit position in 'matches' corresponds to the element offset.
1298-
if (matches == 0)
1280+
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search);
1281+
if (compareResult == Vector128<byte>.Zero)
12991282
{
13001283
// None matched
13011284
offset += (nuint)Vector128<byte>.Count;
13021285
continue;
13031286
}
13041287

1288+
// Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
1289+
// So the bit position in 'matches' corresponds to the element offset.
1290+
matches = compareResult.ExtractMostSignificantBits();
13051291
goto IntrinsicsMatch;
13061292
}
13071293
// Move to Vector length from end for final compare
1308-
search = LoadVector128(ref searchSpace, lengthToExamine);
1294+
search = Vector128.LoadUnsafe(ref searchSpace, lengthToExamine);
13091295
offset = lengthToExamine;
13101296
// Same as method as above
1311-
matches = Sse2.MoveMask(
1312-
Sse2.Or(
1313-
Sse2.Or(
1314-
Sse2.CompareEqual(values0, search),
1315-
Sse2.CompareEqual(values1, search)),
1316-
Sse2.CompareEqual(values2, search)));
1317-
if (matches == 0)
1297+
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search);
1298+
if (compareResult == Vector128<byte>.Zero)
13181299
{
13191300
// None matched
13201301
goto NotFound;
13211302
}
1303+
matches = compareResult.ExtractMostSignificantBits();
13221304
}
13231305

13241306
IntrinsicsMatch:
13251307
// Find bitflag offset of first difference and add to current offset
13261308
offset += (nuint)BitOperations.TrailingZeroCount(matches);
13271309
goto Found;
13281310
}
1329-
else if (AdvSimd.Arm64.IsSupported)
1330-
{
1331-
Vector128<byte> search;
1332-
Vector128<byte> matches;
1333-
Vector128<byte> values0 = Vector128.Create(value0);
1334-
Vector128<byte> values1 = Vector128.Create(value1);
1335-
Vector128<byte> values2 = Vector128.Create(value2);
1336-
// First time this checks against 0 and we will move into final compare if it fails.
1337-
while (lengthToExamine > offset)
1338-
{
1339-
search = LoadVector128(ref searchSpace, offset);
1340-
1341-
matches = AdvSimd.Or(
1342-
AdvSimd.Or(
1343-
AdvSimd.CompareEqual(values0, search),
1344-
AdvSimd.CompareEqual(values1, search)),
1345-
AdvSimd.CompareEqual(values2, search));
1346-
1347-
if (matches == Vector128<byte>.Zero)
1348-
{
1349-
offset += (nuint)Vector128<byte>.Count;
1350-
continue;
1351-
}
1352-
1353-
// Find bitflag offset of first match and add to current offset
1354-
offset += FindFirstMatchedLane(matches);
1355-
1356-
goto Found;
1357-
}
1358-
1359-
// Move to Vector length from end for final compare
1360-
search = LoadVector128(ref searchSpace, lengthToExamine);
1361-
offset = lengthToExamine;
1362-
// Same as method as above
1363-
matches = AdvSimd.Or(
1364-
AdvSimd.Or(
1365-
AdvSimd.CompareEqual(values0, search),
1366-
AdvSimd.CompareEqual(values1, search)),
1367-
AdvSimd.CompareEqual(values2, search));
1368-
1369-
if (matches == Vector128<byte>.Zero)
1370-
{
1371-
// None matched
1372-
goto NotFound;
1373-
}
1374-
1375-
// Find bitflag offset of first match and add to current offset
1376-
offset += FindFirstMatchedLane(matches);
1377-
1378-
goto Found;
1379-
}
13801311
else if (Vector.IsHardwareAccelerated)
13811312
{
13821313
Vector<byte> values0 = new Vector<byte>(value0);

0 commit comments

Comments
 (0)