@@ -426,53 +426,49 @@ impl<'a> SliceRead<'a> {
426426 }
427427 }
428428
429- #[ inline( always) ]
430429 fn skip_to_escape ( & mut self , forbid_control_characters : bool ) {
431430 let rest = & self . slice [ self . index ..] ;
432- let end = self . index + memchr:: memchr2 ( b'"' , b'\\' , rest) . unwrap_or ( rest. len ( ) ) ;
433431
434432 if !forbid_control_characters {
435- self . index = end ;
433+ self . index += memchr :: memchr2 ( b'"' , b'\\' , rest ) . unwrap_or ( rest . len ( ) ) ;
436434 return ;
437435 }
438436
439- // We now wish to check if the chunk contains a byte in range 0x00..=0x1F. Ideally, this
440- // would be integrated this into the memchr2 check above, but memchr does not support this
441- // at the moment. Therefore, use a variation on Mycroft's algorithm [1] to provide
442- // performance better than a naive loop. It runs faster than just a single memchr call on
443- // benchmarks and is faster than both SSE2 and AVX-based code, and it's cross-platform, so
444- // probably the right fit.
437+ // We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
438+ // something akin to memchr3, but the memchr crate does not support this at the moment.
439+ // Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
440+ // than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
441+ // benchmarks and it's cross-platform, so probably the right fit.
445442 // [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
446- const STEP : usize = mem:: size_of :: < usize > ( ) ;
447-
448- // Moving this to a local variable removes a spill in the hot loop.
449- let mut index = self . index ;
450-
451- if self . slice . len ( ) >= STEP {
452- while index < end. min ( self . slice . len ( ) - STEP + 1 ) {
453- // We can safely overread past end in most cases. This ensures that SWAR code is
454- // used to handle the tail in the hot path.
455- const ONE_BYTES : usize = usize:: MAX / 255 ;
456- let chars = usize:: from_ne_bytes ( self . slice [ index..] [ ..STEP ] . try_into ( ) . unwrap ( ) ) ;
457- let mask = chars. wrapping_sub ( ONE_BYTES * 0x20 ) & !chars & ( ONE_BYTES << 7 ) ;
458-
459- if mask != 0 {
460- index += mask. trailing_zeros ( ) as usize / 8 ;
461- break ;
462- }
463-
464- index += STEP ;
465- }
466- }
467-
468- if index < end {
469- if let Some ( offset) = self . slice [ index..end] . iter ( ) . position ( |& c| c <= 0x1F ) {
470- self . index = index + offset;
443+ type Chunk = usize ;
444+ const STEP : usize = mem:: size_of :: < Chunk > ( ) ;
445+ const ONE_BYTES : Chunk = Chunk :: MAX / 255 ; // 0x0101...01
446+
447+ for chunk in rest. chunks_exact ( STEP ) {
448+ let chars = Chunk :: from_ne_bytes ( chunk. try_into ( ) . unwrap ( ) ) ;
449+ let contains_ctrl = chars. wrapping_sub ( ONE_BYTES * 0x20 ) & !chars;
450+ let chars_quote = chars ^ ( ONE_BYTES * Chunk :: from ( b'"' ) ) ;
451+ let contains_quote = chars_quote. wrapping_sub ( ONE_BYTES ) & !chars_quote;
452+ let chars_backslash = chars ^ ( ONE_BYTES * Chunk :: from ( b'\\' ) ) ;
453+ let contains_backslash = chars_backslash. wrapping_sub ( ONE_BYTES ) & !chars_backslash;
454+ let masked = ( contains_ctrl | contains_quote | contains_backslash) & ( ONE_BYTES << 7 ) ;
455+ if masked != 0 {
456+ // SAFETY: chunk is in-bounds for slice
457+ self . index = unsafe { chunk. as_ptr ( ) . offset_from ( self . slice . as_ptr ( ) ) } as usize
458+ + masked. trailing_zeros ( ) as usize / 8 ;
471459 return ;
472460 }
473461 }
474462
475- self . index = end;
463+ self . skip_to_escape_slow ( ) ;
464+ }
465+
466+ #[ cold]
467+ #[ inline( never) ]
468+ fn skip_to_escape_slow ( & mut self ) {
469+ while self . index < self . slice . len ( ) && !is_escape ( self . slice [ self . index ] ) {
470+ self . index += 1 ;
471+ }
476472 }
477473
478474 /// The big optimization here over IoRead is that if the string contains no
@@ -823,8 +819,6 @@ pub trait Fused: private::Sealed {}
823819impl < ' a > Fused for SliceRead < ' a > { }
824820impl < ' a > Fused for StrRead < ' a > { }
825821
826- // This is only used in IoRead. SliceRead hardcodes the arguments to memchr.
827- #[ cfg( feature = "std" ) ]
828822fn is_escape ( ch : u8 ) -> bool {
829823 ch == b'"' || ch == b'\\' || ch < 0x20
830824}
0 commit comments