|  | 
| 2 | 2 | //! systems: just a `Vec<u8>`/`[u8]`. | 
| 3 | 3 | 
 | 
| 4 | 4 | use core::clone::CloneToUninit; | 
|  | 5 | +use core::str::advance_utf8; | 
| 5 | 6 | 
 | 
| 6 | 7 | use crate::borrow::Cow; | 
| 7 | 8 | use crate::collections::TryReserveError; | 
| @@ -33,25 +34,37 @@ impl fmt::Debug for Slice { | 
| 33 | 34 | 
 | 
| 34 | 35 | impl fmt::Display for Slice { | 
| 35 | 36 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
| 36 |  | -        // If we're the empty string then our iterator won't actually yield | 
| 37 |  | -        // anything, so perform the formatting manually | 
| 38 |  | -        if self.inner.is_empty() { | 
| 39 |  | -            return "".fmt(f); | 
|  | 37 | +        // Corresponds to `Formatter::pad`, but for `OsStr` instead of `str`. | 
|  | 38 | + | 
|  | 39 | +        // Make sure there's a fast path up front. | 
|  | 40 | +        if f.options().get_width().is_none() && f.options().get_precision().is_none() { | 
|  | 41 | +            return self.write_lossy(f); | 
| 40 | 42 |         } | 
| 41 | 43 | 
 | 
| 42 |  | -        for chunk in self.inner.utf8_chunks() { | 
| 43 |  | -            let valid = chunk.valid(); | 
| 44 |  | -            // If we successfully decoded the whole chunk as a valid string then | 
| 45 |  | -            // we can return a direct formatting of the string which will also | 
| 46 |  | -            // respect various formatting flags if possible. | 
| 47 |  | -            if chunk.invalid().is_empty() { | 
| 48 |  | -                return valid.fmt(f); | 
| 49 |  | -            } | 
|  | 44 | +        // The `precision` field can be interpreted as a maximum width for the | 
|  | 45 | +        // string being formatted. | 
|  | 46 | +        let max_char_count = f.options().get_precision().unwrap_or(usize::MAX); | 
|  | 47 | +        let (truncated, char_count) = truncate_chars(&self.inner, max_char_count); | 
|  | 48 | + | 
|  | 49 | +        // If our string is longer than the maximum width, truncate it and | 
|  | 50 | +        // handle other flags in terms of the truncated string. | 
|  | 51 | +        // SAFETY: The truncation splits at Unicode scalar value boundaries. | 
|  | 52 | +        let s = unsafe { Slice::from_encoded_bytes_unchecked(truncated) }; | 
| 50 | 53 | 
 | 
| 51 |  | -            f.write_str(valid)?; | 
| 52 |  | -            f.write_char(char::REPLACEMENT_CHARACTER)?; | 
|  | 54 | +        // The `width` field is more of a minimum width parameter at this point. | 
|  | 55 | +        if let Some(width) = f.options().get_width() | 
|  | 56 | +            && char_count < width | 
|  | 57 | +        { | 
|  | 58 | +            // If we're under the minimum width, then fill up the minimum width | 
|  | 59 | +            // with the specified string + some alignment. | 
|  | 60 | +            let post_padding = f.padding(width - char_count, fmt::Alignment::Left)?; | 
|  | 61 | +            s.write_lossy(f)?; | 
|  | 62 | +            post_padding.write(f) | 
|  | 63 | +        } else { | 
|  | 64 | +            // If we're over the minimum width or there is no minimum width, we | 
|  | 65 | +            // can just emit the string. | 
|  | 66 | +            s.write_lossy(f) | 
| 53 | 67 |         } | 
| 54 |  | -        Ok(()) | 
| 55 | 68 |     } | 
| 56 | 69 | } | 
| 57 | 70 | 
 | 
| @@ -286,6 +299,18 @@ impl Slice { | 
| 286 | 299 |         String::from_utf8_lossy(&self.inner) | 
| 287 | 300 |     } | 
| 288 | 301 | 
 | 
|  | 302 | +    /// Writes the string as lossy UTF-8 like [`String::from_utf8_lossy`]. | 
|  | 303 | +    /// It ignores formatter flags. | 
|  | 304 | +    fn write_lossy(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
|  | 305 | +        for chunk in self.inner.utf8_chunks() { | 
|  | 306 | +            f.write_str(chunk.valid())?; | 
|  | 307 | +            if !chunk.invalid().is_empty() { | 
|  | 308 | +                f.write_char(char::REPLACEMENT_CHARACTER)?; | 
|  | 309 | +            } | 
|  | 310 | +        } | 
|  | 311 | +        Ok(()) | 
|  | 312 | +    } | 
|  | 313 | + | 
| 289 | 314 |     pub fn to_owned(&self) -> Buf { | 
| 290 | 315 |         Buf { inner: self.inner.to_vec() } | 
| 291 | 316 |     } | 
| @@ -357,3 +382,19 @@ unsafe impl CloneToUninit for Slice { | 
| 357 | 382 |         unsafe { self.inner.clone_to_uninit(dst) } | 
| 358 | 383 |     } | 
| 359 | 384 | } | 
|  | 385 | + | 
|  | 386 | +/// Counts the number of Unicode scalar values in the byte string, allowing | 
|  | 387 | +/// invalid UTF-8 sequences. For invalid sequences, the maximal prefix of a | 
|  | 388 | +/// valid UTF-8 code unit counts as one. Only up to `max_chars` scalar values | 
|  | 389 | +/// are scanned. Returns the character count and the byte length. | 
|  | 390 | +fn truncate_chars(bytes: &[u8], max_chars: usize) -> (&[u8], usize) { | 
|  | 391 | +    let mut iter = bytes.iter(); | 
|  | 392 | +    let mut char_count = 0; | 
|  | 393 | +    while !iter.is_empty() && char_count < max_chars { | 
|  | 394 | +        advance_utf8(&mut iter); | 
|  | 395 | +        char_count += 1; | 
|  | 396 | +    } | 
|  | 397 | +    let byte_len = bytes.len() - iter.len(); | 
|  | 398 | +    let truncated = unsafe { bytes.get_unchecked(..byte_len) }; | 
|  | 399 | +    (truncated, char_count) | 
|  | 400 | +} | 
0 commit comments