From e6860d5c4cbcd2fe801dfa37e0d820e67b966c2f Mon Sep 17 00:00:00 2001
From: Jules Bertholet <julesbertholet@quoi.xyz>
Date: Fri, 10 May 2024 16:29:44 -0400
Subject: [PATCH] Add `fmt::Display` wrapper to pad/truncate using correct
 width

Fixes #9
---
 .github/workflows/rust.yml |  36 +++++++--
 .travis.yml                |  28 -------
 Cargo.toml                 |  16 ++--
 README.md                  |   2 +-
 benches/benches.rs         |  17 ++--
 src/display.rs             | 157 +++++++++++++++++++++++++++++++++++++
 src/lib.rs                 |  46 ++++++++++-
 tests/display.rs           | 141 +++++++++++++++++++++++++++++++++
 8 files changed, 392 insertions(+), 51 deletions(-)
 delete mode 100644 .travis.yml
 create mode 100644 src/display.rs
 create mode 100644 tests/display.rs
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c68e42d..edb22ea 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -18,16 +18,40 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
-    - name: Build
+
+    - name: Build (all features)
+      run: cargo build --features display --verbose
+    - name: Run tests (all features)
+      run: cargo test --features display --verbose
+    - name: Check clippy (all features)
+      run: cargo clippy --features display --lib --tests --verbose
+
+    - name: Build (default features)
       run: cargo build --verbose
-    - name: Run tests
+    - name: Run tests (default features)
       run: cargo test --verbose
-    - name: Build docs
-      run: cargo doc
+    - name: Check clippy (default features)
+      run: cargo clippy --lib --tests --verbose
+
+  fmt:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
     - name: Check formatting
       run: cargo fmt --check
-    - name: Check clippy
-      run: cargo clippy --lib --tests
+
+  nightly:
+    env:
+      RUSTDOCFLAGS: -D warnings --cfg docsrs
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install nightly
+      run: rustup toolchain add nightly
+    - name: Build docs
+      run: cargo +nightly doc --features display --verbose
+    - name: Check benches
+      run: cargo +nightly clippy --benches --features display --verbose
 
   regen:
     runs-on: ubuntu-latest
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 64196fa..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-language: rust
-rust: 'nightly'
-sudo: false
-script:
-  - cargo build --verbose --features bench
-  - cargo test --verbose --features bench
-  - cargo bench --verbose --features bench
-  - cargo clean
-  - cargo build --verbose
-  - cargo test --verbose
-# next line is an ugly hack to fix an annoying bug where rustdoc tries to use the rustc_private unicode_width crate
-# (there is probably a better fix than this)
-  - rm $(find /home/travis/.rustup -type f -name 'libunicode_width*')
-  - rustdoc --test README.md -L target/debug -L target/debug/deps
-  - cargo doc
-after_success: |
-  [ $TRAVIS_BRANCH = master ] &&
-  [ $TRAVIS_PULL_REQUEST = false ] &&
-  echo '<meta http-equiv=refresh content=0;url=unicode_width/index.html>' > target/doc/index.html &&
-  pip install ghp-import --user $USER &&
-  $HOME/.local/bin/ghp-import -n target/doc &&
-  git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
-env:
-  global:
-    secure: vHL3zrN8AF+H79jrB8OfzuPqsUHevo6ECzwqXPj2dMSqcSXEeCY/ENAfiyFg+oW8yEVP8X2BS1a/C9yvVQRLqLbm1HbZ/5vUpoggT9S0IhKqZMyAcLYXfIEUDMDQuaSdFndDaHvq8275ScgX1LRv1kcPjQoZHuaXWMH8y/Suvyo=
-notifications:
-  email:
-    on_success: never
diff --git a/Cargo.toml b/Cargo.toml
index 16fa8db..6939a14 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,16 +24,22 @@ according to Unicode Standard Annex #11 rules.
 """
 edition = "2021"
 
-exclude = ["target/*", "Cargo.lock"]
+exclude = ["/.github/*", "/target/*", "/Cargo.lock"]
 
 [dependencies]
-std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
-core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
+unicode-segmentation = { version = "1.11.0", optional = true }
+
 compiler_builtins = { version = "0.1", optional = true }
+core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
+std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
 
 [features]
-default = []
-rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
+display = ["dep:unicode-segmentation"]
+rustc-dep-of-std = ["dep:compiler_builtins", "dep:core", "dep:std"]
 
 # Legacy, now a no-op
 no_std = []
+
+[package.metadata.docs.rs]
+features = ["display"]
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/README.md b/README.md
index 2d9ea39..40b5947 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # `unicode-width`
 
-[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://travis-ci.org/unicode-rs/unicode-width)
+[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml)
 [![crates.io version](https://img.shields.io/crates/v/unicode-width)](https://crates.io/crates/unicode-width)
 [![Docs status](https://img.shields.io/docsrs/unicode-width)](https://docs.rs/unicode-width/)
 
diff --git a/benches/benches.rs b/benches/benches.rs
index 44aaee6..b90227e 100644
--- a/benches/benches.rs
+++ b/benches/benches.rs
@@ -10,16 +10,13 @@
 #![feature(test)]
 
 extern crate test;
-
-use std::iter;
-
 use test::Bencher;
 
 use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
 #[bench]
 fn cargo(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
+    let string = "a".repeat(4096);
 
     b.iter(|| {
         for c in string.chars() {
@@ -31,7 +28,7 @@ fn cargo(b: &mut Bencher) {
 #[bench]
 #[allow(deprecated)]
 fn stdlib(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
+    let string = "a".repeat(4096);
 
     b.iter(|| {
         for c in string.chars() {
@@ -42,7 +39,7 @@ fn stdlib(b: &mut Bencher) {
 
 #[bench]
 fn simple_if(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
+    let string = "a".repeat(4096);
 
     b.iter(|| {
         for c in string.chars() {
@@ -53,7 +50,7 @@ fn simple_if(b: &mut Bencher) {
 
 #[bench]
 fn simple_match(b: &mut Bencher) {
-    let string = iter::repeat('a').take(4096).collect::<String>();
+    let string = "a".repeat(4096);
 
     b.iter(|| {
         for c in string.chars() {
@@ -81,9 +78,9 @@ fn simple_width_if(c: char) -> Option<usize> {
 #[inline]
 fn simple_width_match(c: char) -> Option<usize> {
     match c as u32 {
-        cu if cu == 0 => Some(0),
-        cu if cu < 0x20 => None,
-        cu if cu < 0x7f => Some(1),
+        0 => Some(0),
+        1..=0x1F => None,
+        0x20..=0x7E => Some(1),
         _ => UnicodeWidthChar::width(c),
     }
 }
diff --git a/src/display.rs b/src/display.rs
new file mode 100644
index 0000000..08003a6
--- /dev/null
+++ b/src/display.rs
@@ -0,0 +1,157 @@
+use core::fmt::{self, Write};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+use crate::{UnicodeWidthChar, UnicodeWidthStr};
+
+/// A wrapper around a [`str`] with a [`fmt::Display`] impl
+/// that performs padding, truncation, and alignment based on
+/// the string width according to this crate (non-CJK).
+///
+/// Produced via [`UnicodeWidthStr::using_width`];
+/// see its documentation for more.
+#[derive(PartialEq, Eq, Hash)]
+#[repr(transparent)]
+pub struct StrWithWidth(str);
+
+impl StrWithWidth {
+    /// The advance width of the `string`
+    /// (equivalent to [`UnicodeWidthStr::width`]).
+    #[inline]
+    pub fn width(&self) -> usize {
+        self.0.width()
+    }
+}
+
+impl PartialEq<str> for StrWithWidth {
+    #[inline]
+    fn eq(&self, other: &str) -> bool {
+        &self.0 == other
+    }
+}
+
+impl AsRef<str> for StrWithWidth {
+    #[inline]
+    fn as_ref(&self) -> &str {
+        &self.0
+    }
+}
+
+impl AsMut<str> for StrWithWidth {
+    #[inline]
+    fn as_mut(&mut self) -> &mut str {
+        &mut self.0
+    }
+}
+
+impl AsRef<StrWithWidth> for str {
+    #[inline]
+    fn as_ref(&self) -> &StrWithWidth {
+        // SAFETY: `repr(transparent)` ensures compatible types
+        unsafe { core::mem::transmute(self) }
+    }
+}
+
+impl AsMut<StrWithWidth> for str {
+    #[inline]
+    fn as_mut(&mut self) -> &mut StrWithWidth {
+        // SAFETY: `repr(transparent)` ensures compatible types
+        unsafe { core::mem::transmute(self) }
+    }
+}
+
+impl fmt::Display for StrWithWidth {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Fast path
+        if f.width().is_none() && f.precision().is_none() {
+            return f.write_str(&self.0);
+        }
+
+        // Truncate the string to maximum width
+        let (truncated, truncated_width) = if let Some(max_width) = f.precision() {
+            let mut truncated_width: usize = 0;
+            let mut truncated = &self.0;
+            for (seg_offset, seg) in self.0.grapheme_indices(true) {
+                let new_width = truncated_width + seg.width();
+                if new_width > max_width {
+                    truncated = &self.0[..seg_offset];
+                    break;
+                } else {
+                    truncated_width = new_width;
+                }
+            }
+            (truncated, truncated_width)
+        } else {
+            (&self.0, self.0.width())
+        };
+
+        // Pad the string to minimum width
+        if let Some(padding) = f
+            .width()
+            .and_then(|min_width| min_width.checked_sub(truncated_width))
+            .filter(|&padding| padding > 0)
+        {
+            let align = f.align().unwrap_or(fmt::Alignment::Left);
+
+            let mut fill_char = f.fill();
+            let mut fill_char_width = fill_char.width().unwrap_or(1);
+
+            // If we try to fill with a zero-sized char, we'll never succeed, so fall back to space
+            if fill_char_width == 0 {
+                fill_char = ' ';
+                fill_char_width = 1;
+            }
+
+            let (pre_pre_pad, pre_pad, post_pad, post_post_pad) = match align {
+                fmt::Alignment::Left => {
+                    (0, 0, padding % fill_char_width, padding / fill_char_width)
+                }
+                fmt::Alignment::Right => {
+                    (padding / fill_char_width, padding % fill_char_width, 0, 0)
+                }
+                fmt::Alignment::Center => {
+                    let (left_padding, right_padding) = (padding / 2, (padding + 1) / 2);
+                    let (pre_pre_pad, mut pre_pad, mut post_pad, mut post_post_pad) = {
+                        (
+                            left_padding / fill_char_width,
+                            left_padding % fill_char_width,
+                            right_padding % fill_char_width,
+                            right_padding / fill_char_width,
+                        )
+                    };
+                    if let Some(diff) = pre_pad.checked_sub(fill_char_width - post_pad) {
+                        pre_pad = 0;
+                        post_pad = diff;
+                        post_post_pad += 1;
+                    }
+                    (pre_pre_pad, pre_pad, post_pad, post_post_pad)
+                }
+            };
+
+            for _ in 0..pre_pre_pad {
+                f.write_char(fill_char)?;
+            }
+            for _ in 0..pre_pad {
+                f.write_char(' ')?;
+            }
+            f.write_str(truncated)?;
+            for _ in 0..post_pad {
+                f.write_char(' ')?;
+            }
+            for _ in 0..post_post_pad {
+                f.write_char(fill_char)?;
+            }
+
+            Ok(())
+        } else {
+            f.write_str(truncated)
+        }
+    }
+}
+
+impl fmt::Debug for StrWithWidth {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(&self.0, f)
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 5bcdfa7..97d5df3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -81,7 +81,7 @@
 //! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width.
 //! However, this guarantee does not currently hold for the CJK width variants.
 
-#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
 #![deny(missing_docs)]
 #![doc(
     html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
@@ -92,6 +92,13 @@
 use tables::charwidth as cw;
 pub use tables::UNICODE_VERSION;
 
+#[cfg(feature = "display")]
+mod display;
+
+#[cfg(feature = "display")]
+#[cfg_attr(docsrs, doc(cfg(feature = "display")))]
+pub use display::StrWithWidth;
+
 mod tables;
 
 /// Methods for determining displayed width of Unicode characters.
@@ -160,6 +167,37 @@ pub trait UnicodeWidthStr {
     /// non-CJK contexts, or when the context cannot be reliably determined.
     fn width(&self) -> usize;
 
+    /// Returns a wrapper around the string
+    /// with a [`Display`][core::fmt::Display] impl
+    /// that pads, aligns and truncates according to the string's
+    /// displayed width.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use unicode_width::UnicodeWidthStr;
+    ///
+    /// let string = "字".using_width();
+    /// assert_eq!(format!("{string:<4}"), "字  ");
+    /// assert_eq!(format!("{string:^4}"), " 字 ");
+    /// assert_eq!(format!("{string:>4}"), "  字");
+    ///
+    /// let string = "a".using_width();
+    /// assert_eq!(format!("{string:字<7}"), "a字字字");
+    /// assert_eq!(format!("{string:字^7}"), "字a字字");
+    /// assert_eq!(format!("{string:字>7}"), "字字字a");
+    /// assert_eq!(format!("{string:字<8}"), "a 字字字");
+    /// assert_eq!(format!("{string:字^8}"), "字 a字字");
+    /// assert_eq!(format!("{string:字>8}"), "字字字 a");
+    ///
+    /// // Truncation is by extended grapheme cluster
+    /// let string = "🇺🇳🇺🇳".using_width();
+    /// assert_eq!(format!("{string:.3}"), "🇺🇳");
+    /// ```
+    #[cfg_attr(docsrs, doc(cfg(feature = "display")))]
+    #[cfg(feature = "display")]
+    fn using_width(&self) -> &StrWithWidth;
+
     /// Returns the string's displayed width in columns.
     ///
     /// This function treats characters in the Ambiguous category according
@@ -175,6 +213,12 @@ impl UnicodeWidthStr for str {
         str_width(self, false)
     }
 
+    #[cfg(feature = "display")]
+    #[inline]
+    fn using_width(&self) -> &StrWithWidth {
+        self.as_ref()
+    }
+
     #[inline]
     fn width_cjk(&self) -> usize {
         str_width(self, true)
diff --git a/tests/display.rs b/tests/display.rs
new file mode 100644
index 0000000..1eff872
--- /dev/null
+++ b/tests/display.rs
@@ -0,0 +1,141 @@
+#![cfg(feature = "display")]
+
+use unicode_width::UnicodeWidthStr;
+
+#[test]
+fn basic() {
+    for s in [
+        "",
+        "\0",
+        "a",
+        "abc",
+        "¡Olé!",
+        "kilimanjaro",
+        "Κύριε, ἐλέησον",
+    ] {
+        assert_eq!(format!("{}", s.using_width()), s);
+    }
+}
+
+#[test]
+fn basic_with_args() {
+    for min_width in 0..20 {
+        for max_width in 0..20 {
+            for s in [
+                "",
+                "\0",
+                "a",
+                "abc",
+                "¡Olé!",
+                "kilimanjaro",
+                "Κύριε, ἐλέησον",
+            ] {
+                assert_eq!(
+                    format!(
+                        "{:a<min$.max$}",
+                        s.using_width(),
+                        min = min_width,
+                        max = max_width
+                    ),
+                    format!("{:a<min$.max$}", s, min = min_width, max = max_width)
+                );
+
+                assert_eq!(
+                    format!(
+                        "{:a^min$.max$}",
+                        s.using_width(),
+                        min = min_width,
+                        max = max_width
+                    ),
+                    format!("{:a^min$.max$}", s, min = min_width, max = max_width)
+                );
+
+                assert_eq!(
+                    format!(
+                        "{:a>min$.max$}",
+                        s.using_width(),
+                        min = min_width,
+                        max = max_width
+                    ),
+                    format!("{:a>min$.max$}", s, min = min_width, max = max_width)
+                );
+            }
+        }
+    }
+}
+
+#[test]
+fn trunc() {
+    let smol_str = "汉字".using_width();
+    let smol_prefixes = ["", "", "汉", "汉", "汉字", "汉字"];
+    for (width, prefix) in smol_prefixes.into_iter().enumerate() {
+        assert_eq!(format!("{smol_str:.width$}"), prefix, "width: {width}");
+    }
+
+    let med_str = "a汉字b".using_width();
+    let med_prefixes = ["", "a", "a", "a汉", "a汉", "a汉字", "a汉字b", "a汉字b"];
+    for (width, prefix) in med_prefixes.into_iter().enumerate() {
+        assert_eq!(format!("{med_str:.width$}"), prefix, "width: {width}");
+    }
+
+    let huge_str =
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}".using_width();
+    let huge_prefixes = [
+        "\u{200B}\u{200E}",
+        "\u{200B}\u{200E}a\u{0301}",
+        "\u{200B}\u{200E}a\u{0301}",
+        "\u{200B}\u{200E}a\u{0301}汉",
+        "\u{200B}\u{200E}a\u{0301}汉",
+        "\u{200B}\u{200E}a\u{0301}汉字",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\n",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c",
+        "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}",
+    ];
+
+    for (width, prefix) in huge_prefixes.into_iter().enumerate() {
+        assert_eq!(format!("{huge_str:.width$}"), prefix, "width: {width}");
+    }
+}
+
+#[test]
+fn pad() {
+    let string = "\u{2764}\u{FE0F}a".using_width();
+
+    assert_eq!(format!("{string:q<7}"), "\u{2764}\u{FE0F}aqqqq");
+    assert_eq!(format!("{string:q^7}"), "qq\u{2764}\u{FE0F}aqq");
+    assert_eq!(format!("{string:q>7}"), "qqqq\u{2764}\u{FE0F}a");
+
+    assert_eq!(format!("{string:字<7}"), "\u{2764}\u{FE0F}a字字");
+    assert_eq!(format!("{string:字^7}"), "字\u{2764}\u{FE0F}a字");
+    assert_eq!(format!("{string:字>7}"), "字字\u{2764}\u{FE0F}a");
+
+    assert_eq!(format!("{string:\u{0301}<7}"), "\u{2764}\u{FE0F}a    ");
+    assert_eq!(format!("{string:\u{0301}^7}"), "  \u{2764}\u{FE0F}a  ");
+    assert_eq!(format!("{string:\u{0301}>7}"), "    \u{2764}\u{FE0F}a");
+
+    assert_eq!(format!("{string:q<8}"), "\u{2764}\u{FE0F}aqqqqq");
+    assert_eq!(format!("{string:q^8}"), "qq\u{2764}\u{FE0F}aqqq");
+    assert_eq!(format!("{string:q>8}"), "qqqqq\u{2764}\u{FE0F}a");
+
+    assert_eq!(format!("{string:字<8}"), "\u{2764}\u{FE0F}a 字字");
+    assert_eq!(format!("{string:字^8}"), "字\u{2764}\u{FE0F}a 字");
+    assert_eq!(format!("{string:字>8}"), "字字 \u{2764}\u{FE0F}a");
+
+    assert_eq!(format!("{string:\u{0301}<8}"), "\u{2764}\u{FE0F}a     ");
+    assert_eq!(format!("{string:\u{0301}^8}"), "  \u{2764}\u{FE0F}a   ");
+    assert_eq!(format!("{string:\u{0301}>8}"), "     \u{2764}\u{FE0F}a");
+
+    let string = "a".using_width();
+    assert_eq!(format!("{string:字^7}"), "字a字字");
+
+    let string = "字".using_width();
+    assert_eq!(format!("{string:<3}"), "字 ");
+    assert_eq!(format!("{string:^3}"), "字 ");
+    assert_eq!(format!("{string:>3}"), " 字");
+    assert_eq!(format!("{string:<4}"), "字  ");
+    assert_eq!(format!("{string:^4}"), " 字 ");
+    assert_eq!(format!("{string:>4}"), "  字");
+}