|  | 
| 2 | 2 | //! systems: just a `Vec<u8>`/`[u8]`. | 
| 3 | 3 | 
 | 
| 4 | 4 | use core::clone::CloneToUninit; | 
|  | 5 | +use core::str::advance_utf8; | 
| 5 | 6 | 
 | 
| 6 | 7 | use crate::borrow::Cow; | 
| 7 | 8 | use crate::collections::TryReserveError; | 
| @@ -64,25 +65,37 @@ impl fmt::Debug for Slice { | 
| 64 | 65 | 
 | 
| 65 | 66 | impl fmt::Display for Slice { | 
| 66 | 67 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
| 67 |  | -        // If we're the empty string then our iterator won't actually yield | 
| 68 |  | -        // anything, so perform the formatting manually | 
| 69 |  | -        if self.inner.is_empty() { | 
| 70 |  | -            return "".fmt(f); | 
|  | 68 | +        // Corresponds to `Formatter::pad`, but for `OsStr` instead of `str`. | 
|  | 69 | + | 
|  | 70 | +        // Make sure there's a fast path up front. | 
|  | 71 | +        if f.options().get_width().is_none() && f.options().get_precision().is_none() { | 
|  | 72 | +            return self.write_lossy(f); | 
| 71 | 73 |         } | 
| 72 | 74 | 
 | 
| 73 |  | -        for chunk in self.inner.utf8_chunks() { | 
| 74 |  | -            let valid = chunk.valid(); | 
| 75 |  | -            // If we successfully decoded the whole chunk as a valid string then | 
| 76 |  | -            // we can return a direct formatting of the string which will also | 
| 77 |  | -            // respect various formatting flags if possible. | 
| 78 |  | -            if chunk.invalid().is_empty() { | 
| 79 |  | -                return valid.fmt(f); | 
| 80 |  | -            } | 
|  | 75 | +        // The `precision` field can be interpreted as a maximum width for the | 
|  | 76 | +        // string being formatted. | 
|  | 77 | +        let max_char_count = f.options().get_precision().unwrap_or(u16::MAX); | 
|  | 78 | +        let (truncated, char_count) = truncate_chars(&self.inner, max_char_count as usize); | 
|  | 79 | + | 
|  | 80 | +        // If our string is longer than the maximum width, truncate it and | 
|  | 81 | +        // handle other flags in terms of the truncated string. | 
|  | 82 | +        // SAFETY: The truncation splits at Unicode scalar value boundaries. | 
|  | 83 | +        let s = unsafe { Slice::from_encoded_bytes_unchecked(truncated) }; | 
| 81 | 84 | 
 | 
| 82 |  | -            f.write_str(valid)?; | 
| 83 |  | -            f.write_char(char::REPLACEMENT_CHARACTER)?; | 
|  | 85 | +        // The `width` field is more of a minimum width parameter at this point. | 
|  | 86 | +        if let Some(width) = f.options().get_width() | 
|  | 87 | +            && char_count < width as usize | 
|  | 88 | +        { | 
|  | 89 | +            // If we're under the minimum width, then fill up the minimum width | 
|  | 90 | +            // with the specified string + some alignment. | 
|  | 91 | +            let post_padding = f.padding(width - char_count as u16, fmt::Alignment::Left)?; | 
|  | 92 | +            s.write_lossy(f)?; | 
|  | 93 | +            post_padding.write(f) | 
|  | 94 | +        } else { | 
|  | 95 | +            // If we're over the minimum width or there is no minimum width, we | 
|  | 96 | +            // can just emit the string. | 
|  | 97 | +            s.write_lossy(f) | 
| 84 | 98 |         } | 
| 85 |  | -        Ok(()) | 
| 86 | 99 |     } | 
| 87 | 100 | } | 
| 88 | 101 | 
 | 
| @@ -302,6 +315,18 @@ impl Slice { | 
| 302 | 315 |         String::from_utf8_lossy(&self.inner) | 
| 303 | 316 |     } | 
| 304 | 317 | 
 | 
|  | 318 | +    /// Writes the string as lossy UTF-8 like [`String::from_utf8_lossy`]. | 
|  | 319 | +    /// It ignores formatter flags. | 
|  | 320 | +    fn write_lossy(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
|  | 321 | +        for chunk in self.inner.utf8_chunks() { | 
|  | 322 | +            f.write_str(chunk.valid())?; | 
|  | 323 | +            if !chunk.invalid().is_empty() { | 
|  | 324 | +                f.write_char(char::REPLACEMENT_CHARACTER)?; | 
|  | 325 | +            } | 
|  | 326 | +        } | 
|  | 327 | +        Ok(()) | 
|  | 328 | +    } | 
|  | 329 | + | 
| 305 | 330 |     #[inline] | 
| 306 | 331 |     pub fn to_owned(&self) -> Buf { | 
| 307 | 332 |         Buf { inner: self.inner.to_vec() } | 
| @@ -376,3 +401,19 @@ unsafe impl CloneToUninit for Slice { | 
| 376 | 401 |         unsafe { self.inner.clone_to_uninit(dst) } | 
| 377 | 402 |     } | 
| 378 | 403 | } | 
|  | 404 | + | 
|  | 405 | +/// Counts the number of Unicode scalar values in the byte string, allowing | 
|  | 406 | +/// invalid UTF-8 sequences. For invalid sequences, the maximal prefix of a | 
|  | 407 | +/// valid UTF-8 code unit counts as one. Only up to `max_chars` scalar values | 
|  | 408 | +/// are scanned. Returns the character count and the byte length. | 
|  | 409 | +fn truncate_chars(bytes: &[u8], max_chars: usize) -> (&[u8], usize) { | 
|  | 410 | +    let mut iter = bytes.iter(); | 
|  | 411 | +    let mut char_count = 0; | 
|  | 412 | +    while !iter.is_empty() && char_count < max_chars { | 
|  | 413 | +        advance_utf8(&mut iter); | 
|  | 414 | +        char_count += 1; | 
|  | 415 | +    } | 
|  | 416 | +    let byte_len = bytes.len() - iter.len(); | 
|  | 417 | +    let truncated = unsafe { bytes.get_unchecked(..byte_len) }; | 
|  | 418 | +    (truncated, char_count) | 
|  | 419 | +} | 
0 commit comments