@@ -397,12 +397,16 @@ where
397397 }
398398}
399399
400- // This uses an adaptive system to extend the vector when it fills. We want to
401- // avoid paying to allocate and zero a huge chunk of memory if the reader only
402- // has 4 bytes while still making large reads if the reader does have a ton
403- // of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
404- // time is 4,500 times (!) slower than a default reservation size of 32 if the
405- // reader has a very small amount of data to return.
400+ // Here we must serve many masters with conflicting goals:
401+ //
402+ // - avoid allocating unless necessary
403+ // - avoid overallocating if we know the exact size (#89165)
404+ // - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
405+ // - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
406+ // - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
407+ // at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost
408+ // proportional to buffer size (#110650)
409+ //
406410pub ( crate ) fn default_read_to_end < R : Read + ?Sized > (
407411 r : & mut R ,
408412 buf : & mut Vec < u8 > ,
@@ -412,20 +416,58 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
412416 let start_cap = buf. capacity ( ) ;
413417 // Optionally limit the maximum bytes read on each iteration.
414418 // This adds an arbitrary fiddle factor to allow for more data than we expect.
415- let max_read_size =
416- size_hint. and_then ( |s| s. checked_add ( 1024 ) ?. checked_next_multiple_of ( DEFAULT_BUF_SIZE ) ) ;
419+ let mut max_read_size = size_hint
420+ . and_then ( |s| s. checked_add ( 1024 ) ?. checked_next_multiple_of ( DEFAULT_BUF_SIZE ) )
421+ . unwrap_or ( DEFAULT_BUF_SIZE ) ;
417422
418423 let mut initialized = 0 ; // Extra initialized bytes from previous loop iteration
424+
425+ const PROBE_SIZE : usize = 32 ;
426+
427+ fn small_probe_read < R : Read + ?Sized > ( r : & mut R , buf : & mut Vec < u8 > ) -> Result < usize > {
428+ let mut probe = [ 0u8 ; PROBE_SIZE ] ;
429+
430+ loop {
431+ match r. read ( & mut probe) {
432+ Ok ( n) => {
433+ buf. extend_from_slice ( & probe[ ..n] ) ;
434+ return Ok ( n) ;
435+ }
436+ Err ( ref e) if e. is_interrupted ( ) => continue ,
437+ Err ( e) => return Err ( e) ,
438+ }
439+ }
440+ }
441+
442+ // avoid inflating empty/small vecs before we have determined that there's anything to read
443+ if ( size_hint. is_none ( ) || size_hint == Some ( 0 ) ) && buf. capacity ( ) - buf. len ( ) < PROBE_SIZE {
444+ let read = small_probe_read ( r, buf) ?;
445+
446+ if read == 0 {
447+ return Ok ( 0 ) ;
448+ }
449+ }
450+
419451 loop {
452+ if buf. len ( ) == buf. capacity ( ) && buf. capacity ( ) == start_cap {
453+ // The buffer might be an exact fit. Let's read into a probe buffer
454+ // and see if it returns `Ok(0)`. If so, we've avoided an
455+ // unnecessary doubling of the capacity. But if not, append the
456+ // probe buffer to the primary buffer and let its capacity grow.
457+ let read = small_probe_read ( r, buf) ?;
458+
459+ if read == 0 {
460+ return Ok ( buf. len ( ) - start_len) ;
461+ }
462+ }
463+
420464 if buf. len ( ) == buf. capacity ( ) {
421- buf. reserve ( 32 ) ; // buf is full, need more space
465+ buf. reserve ( PROBE_SIZE ) ; // buf is full, need more space
422466 }
423467
424468 let mut spare = buf. spare_capacity_mut ( ) ;
425- if let Some ( size) = max_read_size {
426- let len = cmp:: min ( spare. len ( ) , size) ;
427- spare = & mut spare[ ..len]
428- }
469+ let buf_len = cmp:: min ( spare. len ( ) , max_read_size) ;
470+ spare = & mut spare[ ..buf_len] ;
429471 let mut read_buf: BorrowedBuf < ' _ > = spare. into ( ) ;
430472
431473 // SAFETY: These bytes were initialized but not filled in the previous loop
@@ -434,42 +476,44 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
434476 }
435477
436478 let mut cursor = read_buf. unfilled ( ) ;
437- match r. read_buf ( cursor. reborrow ( ) ) {
438- Ok ( ( ) ) => { }
439- Err ( e) if e. is_interrupted ( ) => continue ,
440- Err ( e) => return Err ( e) ,
479+ loop {
480+ match r. read_buf ( cursor. reborrow ( ) ) {
481+ Ok ( ( ) ) => break ,
482+ Err ( e) if e. is_interrupted ( ) => continue ,
483+ Err ( e) => return Err ( e) ,
484+ }
441485 }
442486
443- if cursor. written ( ) == 0 {
487+ let unfilled_but_initialized = cursor. init_ref ( ) . len ( ) ;
488+ let bytes_read = cursor. written ( ) ;
489+ let was_fully_initialized = read_buf. init_len ( ) == buf_len;
490+
491+ if bytes_read == 0 {
444492 return Ok ( buf. len ( ) - start_len) ;
445493 }
446494
447495 // store how much was initialized but not filled
448- initialized = cursor . init_ref ( ) . len ( ) ;
496+ initialized = unfilled_but_initialized ;
449497
450498 // SAFETY: BorrowedBuf's invariants mean this much memory is initialized.
451499 unsafe {
452- let new_len = read_buf . filled ( ) . len ( ) + buf. len ( ) ;
500+ let new_len = bytes_read + buf. len ( ) ;
453501 buf. set_len ( new_len) ;
454502 }
455503
456- if buf. len ( ) == buf. capacity ( ) && buf. capacity ( ) == start_cap {
457- // The buffer might be an exact fit. Let's read into a probe buffer
458- // and see if it returns `Ok(0)`. If so, we've avoided an
459- // unnecessary doubling of the capacity. But if not, append the
460- // probe buffer to the primary buffer and let its capacity grow.
461- let mut probe = [ 0u8 ; 32 ] ;
462-
463- loop {
464- match r. read ( & mut probe) {
465- Ok ( 0 ) => return Ok ( buf. len ( ) - start_len) ,
466- Ok ( n) => {
467- buf. extend_from_slice ( & probe[ ..n] ) ;
468- break ;
469- }
470- Err ( ref e) if e. is_interrupted ( ) => continue ,
471- Err ( e) => return Err ( e) ,
472- }
504+ // Use heuristics to determine the max read size if no initial size hint was provided
505+ if size_hint. is_none ( ) {
506+ // The reader is returning short reads but it doesn't call ensure_init().
507+ // In that case we no longer need to restrict read sizes to avoid
508+ // initialization costs.
509+ if !was_fully_initialized {
510+ max_read_size = usize:: MAX ;
511+ }
512+
513+ // we have passed a larger buffer than previously and the
514+ // reader still hasn't returned a short read
515+ if buf_len >= max_read_size && bytes_read == buf_len {
516+ max_read_size = max_read_size. saturating_mul ( 2 ) ;
473517 }
474518 }
475519 }
0 commit comments