@@ -1100,13 +1100,13 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
11001100 nuint offset = 0 ; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
11011101 nuint lengthToExamine = ( nuint ) ( uint ) length ;
11021102
1103- if ( Sse2 . IsSupported || AdvSimd . Arm64 . IsSupported )
1103+ if ( Vector128 . IsHardwareAccelerated )
11041104 {
11051105 // Avx2 branch also operates on Sse2 sizes, so check is combined.
11061106 nint vectorDiff = ( nint ) length - Vector128 < byte > . Count ;
11071107 if ( vectorDiff >= 0 )
11081108 {
1109- // >= Sse2 intrinsics are supported , and length is enough to use them so use that path.
1109+ // >= Vector128 is accelerated , and length is enough to use them so use that path.
11101110 // We jump forward to the intrinsics at the end of the method so a naive branch predict
11111111 // will choose the non-intrinsic path so short lengths which don't gain anything aren't
11121112 // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths
@@ -1215,10 +1215,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12151215 // the end and forwards, which may overlap on an earlier compare.
12161216
12171217 // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported.
1218- if ( Sse2 . IsSupported )
1218+ if ( Vector128 . IsHardwareAccelerated )
12191219 {
1220- int matches ;
1221- if ( Avx2 . IsSupported )
1220+ uint matches ;
1221+ if ( Vector256 . IsHardwareAccelerated )
12221222 {
12231223 Vector256 < byte > search ;
12241224 // Guard as we may only have a valid size for Vector128; when we will move to the Sse2
@@ -1235,15 +1235,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12351235 // First time this checks again against 0, however we will move into final compare if it fails.
12361236 while ( lengthToExamine > offset )
12371237 {
1238- search = LoadVector256 ( ref searchSpace , offset ) ;
1238+ search = Vector256 . LoadUnsafe ( ref searchSpace , offset ) ;
12391239 // Bitwise Or to combine the flagged matches for the second value to our match flags
1240- matches = Avx2 . MoveMask (
1241- Avx2 . Or (
1242- Avx2 . Or (
1243- Avx2 . CompareEqual ( values0 , search ) ,
1244- Avx2 . CompareEqual ( values1 , search ) ) ,
1245- Avx2 . CompareEqual ( values2 , search ) ) ) ;
1246- // Note that MoveMask has converted the equal vector elements into a set of bit flags,
1240+ matches = ( Vector256 . Equals ( values0 , search ) | Vector256 . Equals ( values1 , search ) | Vector256 . Equals ( values2 , search ) ) . ExtractMostSignificantBits ( ) ;
1241+ // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
12471242 // So the bit position in 'matches' corresponds to the element offset.
12481243 if ( matches == 0 )
12491244 {
@@ -1256,15 +1251,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12561251 }
12571252
12581253 // Move to Vector length from end for final compare
1259- search = LoadVector256 ( ref searchSpace , lengthToExamine ) ;
1254+ search = Vector256 . LoadUnsafe ( ref searchSpace , lengthToExamine ) ;
12601255 offset = lengthToExamine ;
12611256 // Same as method as above
1262- matches = Avx2 . MoveMask (
1263- Avx2 . Or (
1264- Avx2 . Or (
1265- Avx2 . CompareEqual ( values0 , search ) ,
1266- Avx2 . CompareEqual ( values1 , search ) ) ,
1267- Avx2 . CompareEqual ( values2 , search ) ) ) ;
1257+ matches = ( Vector256 . Equals ( values0 , search ) | Vector256 . Equals ( values1 , search ) | Vector256 . Equals ( values2 , search ) ) . ExtractMostSignificantBits ( ) ;
12681258 if ( matches == 0 )
12691259 {
12701260 // None matched
@@ -1278,105 +1268,46 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
12781268 // Initial size check was done on method entry.
12791269 Debug . Assert ( length >= Vector128 < byte > . Count ) ;
12801270 {
1281- Vector128 < byte > search ;
1271+ Vector128 < byte > search , compareResult ;
12821272 Vector128 < byte > values0 = Vector128 . Create ( value0 ) ;
12831273 Vector128 < byte > values1 = Vector128 . Create ( value1 ) ;
12841274 Vector128 < byte > values2 = Vector128 . Create ( value2 ) ;
12851275 // First time this checks against 0 and we will move into final compare if it fails.
12861276 while ( lengthToExamine > offset )
12871277 {
1288- search = LoadVector128 ( ref searchSpace , offset ) ;
1278+ search = Vector128 . LoadUnsafe ( ref searchSpace , offset ) ;
12891279
1290- matches = Sse2 . MoveMask (
1291- Sse2 . Or (
1292- Sse2 . Or (
1293- Sse2 . CompareEqual ( values0 , search ) ,
1294- Sse2 . CompareEqual ( values1 , search ) ) ,
1295- Sse2 . CompareEqual ( values2 , search ) ) ) ;
1296- // Note that MoveMask has converted the equal vector elements into a set of bit flags,
1297- // So the bit position in 'matches' corresponds to the element offset.
1298- if ( matches == 0 )
1280+ compareResult = Vector128 . Equals ( values0 , search ) | Vector128 . Equals ( values1 , search ) | Vector128 . Equals ( values2 , search ) ;
1281+ if ( compareResult == Vector128 < byte > . Zero )
12991282 {
13001283 // None matched
13011284 offset += ( nuint ) Vector128 < byte > . Count ;
13021285 continue ;
13031286 }
13041287
1288+ // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
1289+ // So the bit position in 'matches' corresponds to the element offset.
1290+ matches = compareResult . ExtractMostSignificantBits ( ) ;
13051291 goto IntrinsicsMatch ;
13061292 }
13071293 // Move to Vector length from end for final compare
1308- search = LoadVector128 ( ref searchSpace , lengthToExamine ) ;
1294+ search = Vector128 . LoadUnsafe ( ref searchSpace , lengthToExamine ) ;
13091295 offset = lengthToExamine ;
13101296 // Same as method as above
1311- matches = Sse2 . MoveMask (
1312- Sse2 . Or (
1313- Sse2 . Or (
1314- Sse2 . CompareEqual ( values0 , search ) ,
1315- Sse2 . CompareEqual ( values1 , search ) ) ,
1316- Sse2 . CompareEqual ( values2 , search ) ) ) ;
1317- if ( matches == 0 )
1297+ compareResult = Vector128 . Equals ( values0 , search ) | Vector128 . Equals ( values1 , search ) | Vector128 . Equals ( values2 , search ) ;
1298+ if ( compareResult == Vector128 < byte > . Zero )
13181299 {
13191300 // None matched
13201301 goto NotFound ;
13211302 }
1303+ matches = compareResult . ExtractMostSignificantBits ( ) ;
13221304 }
13231305
13241306 IntrinsicsMatch :
13251307 // Find bitflag offset of first difference and add to current offset
13261308 offset += ( nuint ) BitOperations . TrailingZeroCount ( matches ) ;
13271309 goto Found ;
13281310 }
1329- else if ( AdvSimd . Arm64 . IsSupported )
1330- {
1331- Vector128 < byte > search ;
1332- Vector128 < byte > matches ;
1333- Vector128 < byte > values0 = Vector128 . Create ( value0 ) ;
1334- Vector128 < byte > values1 = Vector128 . Create ( value1 ) ;
1335- Vector128 < byte > values2 = Vector128 . Create ( value2 ) ;
1336- // First time this checks against 0 and we will move into final compare if it fails.
1337- while ( lengthToExamine > offset )
1338- {
1339- search = LoadVector128 ( ref searchSpace , offset ) ;
1340-
1341- matches = AdvSimd . Or (
1342- AdvSimd . Or (
1343- AdvSimd . CompareEqual ( values0 , search ) ,
1344- AdvSimd . CompareEqual ( values1 , search ) ) ,
1345- AdvSimd . CompareEqual ( values2 , search ) ) ;
1346-
1347- if ( matches == Vector128 < byte > . Zero )
1348- {
1349- offset += ( nuint ) Vector128 < byte > . Count ;
1350- continue ;
1351- }
1352-
1353- // Find bitflag offset of first match and add to current offset
1354- offset += FindFirstMatchedLane ( matches ) ;
1355-
1356- goto Found ;
1357- }
1358-
1359- // Move to Vector length from end for final compare
1360- search = LoadVector128 ( ref searchSpace , lengthToExamine ) ;
1361- offset = lengthToExamine ;
1362- // Same as method as above
1363- matches = AdvSimd . Or (
1364- AdvSimd . Or (
1365- AdvSimd . CompareEqual ( values0 , search ) ,
1366- AdvSimd . CompareEqual ( values1 , search ) ) ,
1367- AdvSimd . CompareEqual ( values2 , search ) ) ;
1368-
1369- if ( matches == Vector128 < byte > . Zero )
1370- {
1371- // None matched
1372- goto NotFound ;
1373- }
1374-
1375- // Find bitflag offset of first match and add to current offset
1376- offset += FindFirstMatchedLane ( matches ) ;
1377-
1378- goto Found ;
1379- }
13801311 else if ( Vector . IsHardwareAccelerated )
13811312 {
13821313 Vector < byte > values0 = new Vector < byte > ( value0 ) ;
0 commit comments