@@ -1120,7 +1120,15 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
11201120 int offset = 0 ; // Use nint for arithmetic to avoid unnecessary 64->32->64 truncations
11211121 int lengthToExamine = length ;
11221122
1123- if ( Vector . IsHardwareAccelerated )
1123+ if ( Avx2 . IsSupported || Sse2 . IsSupported )
1124+ {
1125+ // Avx2 branch also operates on Sse2 sizes, so check is combined.
1126+ if ( length >= Vector128 < byte > . Count * 2 )
1127+ {
1128+ lengthToExamine = UnalignedCountVector128 ( ref searchSpace ) ;
1129+ }
1130+ }
1131+ else if ( Vector . IsHardwareAccelerated )
11241132 {
11251133 if ( length >= Vector < ushort > . Count * 2 )
11261134 {
@@ -1161,9 +1169,121 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
11611169 offset += 1 ;
11621170 }
11631171
1164- // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
1172+ // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
11651173 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
1166- if ( Vector . IsHardwareAccelerated )
1174+ if ( Avx2 . IsSupported )
1175+ {
1176+ if ( offset < length )
1177+ {
1178+ lengthToExamine = GetCharVector256SpanLength ( offset , length ) ;
1179+ if ( lengthToExamine > offset )
1180+ {
1181+ Vector256 < ushort > values0 = Vector256 . Create ( value0 ) ;
1182+ Vector256 < ushort > values1 = Vector256 . Create ( value1 ) ;
1183+ Vector256 < ushort > values2 = Vector256 . Create ( value2 ) ;
1184+ Vector256 < ushort > values3 = Vector256 . Create ( value3 ) ;
1185+ Vector256 < ushort > values4 = Vector256 . Create ( value4 ) ;
1186+ do
1187+ {
1188+ Vector256 < ushort > search = LoadVector256 ( ref searchSpace , offset ) ;
1189+ // Note that MoveMask has converted the equal vector elements into a set of bit flags,
1190+ // So the bit position in 'matches' corresponds to the element offset.
1191+ int matches = Avx2 . MoveMask ( Avx2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
1192+ // Bitwise Or to combine the flagged matches for the second, third, fourth and fifth values to our match flags
1193+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
1194+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
1195+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values3 , search ) . AsByte ( ) ) ;
1196+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values4 , search ) . AsByte ( ) ) ;
1197+ if ( matches == 0 )
1198+ {
1199+ // Zero flags set so no matches
1200+ offset += Vector256 < ushort > . Count ;
1201+ continue ;
1202+ }
1203+
1204+ // Find bitflag offset of first match and add to current offset,
1205+ // flags are in bytes so divide for chars
1206+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
1207+ } while ( lengthToExamine > offset ) ;
1208+ }
1209+
1210+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
1211+ if ( lengthToExamine > offset )
1212+ {
1213+ Vector128 < ushort > values0 = Vector128 . Create ( value0 ) ;
1214+ Vector128 < ushort > values1 = Vector128 . Create ( value1 ) ;
1215+ Vector128 < ushort > values2 = Vector128 . Create ( value2 ) ;
1216+ Vector128 < ushort > values3 = Vector128 . Create ( value3 ) ;
1217+ Vector128 < ushort > values4 = Vector128 . Create ( value4 ) ;
1218+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
1219+
1220+ // Same method as above
1221+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
1222+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
1223+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
1224+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values3 , search ) . AsByte ( ) ) ;
1225+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values4 , search ) . AsByte ( ) ) ;
1226+ if ( matches == 0 )
1227+ {
1228+ // Zero flags set so no matches
1229+ offset += Vector128 < ushort > . Count ;
1230+ }
1231+ else
1232+ {
1233+ // Find bitflag offset of first match and add to current offset,
1234+ // flags are in bytes so divide for chars
1235+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
1236+ }
1237+ }
1238+
1239+ if ( offset < length )
1240+ {
1241+ lengthToExamine = length - offset ;
1242+ goto SequentialScan ;
1243+ }
1244+ }
1245+ }
1246+ else if ( Sse2 . IsSupported )
1247+ {
1248+ if ( offset < length )
1249+ {
1250+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
1251+
1252+ Vector128 < ushort > values0 = Vector128 . Create ( value0 ) ;
1253+ Vector128 < ushort > values1 = Vector128 . Create ( value1 ) ;
1254+ Vector128 < ushort > values2 = Vector128 . Create ( value2 ) ;
1255+ Vector128 < ushort > values3 = Vector128 . Create ( value3 ) ;
1256+ Vector128 < ushort > values4 = Vector128 . Create ( value4 ) ;
1257+ while ( lengthToExamine > offset )
1258+ {
1259+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
1260+
1261+ // Same method as above
1262+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
1263+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
1264+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
1265+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values3 , search ) . AsByte ( ) ) ;
1266+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values4 , search ) . AsByte ( ) ) ;
1267+ if ( matches == 0 )
1268+ {
1269+ // Zero flags set so no matches
1270+ offset += Vector128 < ushort > . Count ;
1271+ continue ;
1272+ }
1273+
1274+ // Find bitflag offset of first match and add to current offset,
1275+ // flags are in bytes so divide for chars
1276+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
1277+ }
1278+
1279+ if ( offset < length )
1280+ {
1281+ lengthToExamine = length - offset ;
1282+ goto SequentialScan ;
1283+ }
1284+ }
1285+ }
1286+ else if ( Vector . IsHardwareAccelerated )
11671287 {
11681288 if ( offset < length )
11691289 {
0 commit comments