From e6860d5c4cbcd2fe801dfa37e0d820e67b966c2f Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Fri, 10 May 2024 16:29:44 -0400 Subject: [PATCH] Add `fmt::Display` wrapper to pad/truncate using correct width Fixes #9 --- .github/workflows/rust.yml | 36 +++++++-- .travis.yml | 28 ------- Cargo.toml | 16 ++-- README.md | 2 +- benches/benches.rs | 17 ++-- src/display.rs | 157 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 46 ++++++++++- tests/display.rs | 141 +++++++++++++++++++++++++++++++++ 8 files changed, 392 insertions(+), 51 deletions(-) delete mode 100644 .travis.yml create mode 100644 src/display.rs create mode 100644 tests/display.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c68e42d..edb22ea 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -18,16 +18,40 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Build + + - name: Build (all features) + run: cargo build --features display --verbose + - name: Run tests (all features) + run: cargo test --features display --verbose + - name: Check clippy (all features) + run: cargo clippy --features display --lib --tests --verbose + + - name: Build (default features) run: cargo build --verbose - - name: Run tests + - name: Run tests (default features) run: cargo test --verbose - - name: Build docs - run: cargo doc + - name: Check clippy (default features) + run: cargo clippy --lib --tests --verbose + + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 - name: Check formatting run: cargo fmt --check - - name: Check clippy - run: cargo clippy --lib --tests + + nightly: + env: + RUSTDOCFLAGS: -D warnings --cfg docsrs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install nightly + run: rustup toolchain add nightly + - name: Build docs + run: cargo +nightly doc --features display --verbose + - name: Check benches + run: cargo +nightly clippy --benches --features display --verbose regen: runs-on: ubuntu-latest diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 64196fa..0000000 --- a/.travis.yml +++ /dev/null @@ -1,28 +0,0 @@ -language: rust -rust: 'nightly' -sudo: false -script: - - cargo build --verbose --features bench - - cargo test --verbose --features bench - - cargo bench --verbose --features bench - - cargo clean - - cargo build --verbose - - cargo test --verbose -# next line is an ugly hack to fix an annoying bug where rustdoc tries to use the rustc_private unicode_width crate -# (there is probably a better fix than this) - - rm $(find /home/travis/.rustup -type f -name 'libunicode_width*') - - rustdoc --test README.md -L target/debug -L target/debug/deps - - cargo doc -after_success: | - [ $TRAVIS_BRANCH = master ] && - [ $TRAVIS_PULL_REQUEST = false ] && - echo '' > target/doc/index.html && - pip install ghp-import --user $USER && - $HOME/.local/bin/ghp-import -n target/doc && - git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages -env: - global: - secure: vHL3zrN8AF+H79jrB8OfzuPqsUHevo6ECzwqXPj2dMSqcSXEeCY/ENAfiyFg+oW8yEVP8X2BS1a/C9yvVQRLqLbm1HbZ/5vUpoggT9S0IhKqZMyAcLYXfIEUDMDQuaSdFndDaHvq8275ScgX1LRv1kcPjQoZHuaXWMH8y/Suvyo= -notifications: - email: - on_success: never diff --git a/Cargo.toml b/Cargo.toml index 16fa8db..6939a14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,16 +24,22 @@ according to Unicode Standard Annex #11 rules. """ edition = "2021" -exclude = ["target/*", "Cargo.lock"] +exclude = ["/.github/*", "/target/*", "/Cargo.lock"] [dependencies] -std = { version = "1.0", package = "rustc-std-workspace-std", optional = true } -core = { version = "1.0", package = "rustc-std-workspace-core", optional = true } +unicode-segmentation = { version = "1.11.0", optional = true } + compiler_builtins = { version = "0.1", optional = true } +core = { version = "1.0", package = "rustc-std-workspace-core", optional = true } +std = { version = "1.0", package = "rustc-std-workspace-std", optional = true } [features] -default = [] -rustc-dep-of-std = ['std', 'core', 'compiler_builtins'] +display = ["dep:unicode-segmentation"] +rustc-dep-of-std = ["dep:compiler_builtins", "dep:core", "dep:std"] # Legacy, now a no-op no_std = [] + +[package.metadata.docs.rs] +features = ["display"] +rustdoc-args = ["--cfg", "docsrs"] diff --git a/README.md b/README.md index 2d9ea39..40b5947 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # `unicode-width` -[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://travis-ci.org/unicode-rs/unicode-width) +[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml) [![crates.io version](https://img.shields.io/crates/v/unicode-width)](https://crates.io/crates/unicode-width) [![Docs status](https://img.shields.io/docsrs/unicode-width)](https://docs.rs/unicode-width/) diff --git a/benches/benches.rs b/benches/benches.rs index 44aaee6..b90227e 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -10,16 +10,13 @@ #![feature(test)] extern crate test; - -use std::iter; - use test::Bencher; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; #[bench] fn cargo(b: &mut Bencher) { - let string = iter::repeat('a').take(4096).collect::(); + let string = "a".repeat(4096); b.iter(|| { for c in string.chars() { @@ -31,7 +28,7 @@ fn cargo(b: &mut Bencher) { #[bench] #[allow(deprecated)] fn stdlib(b: &mut Bencher) { - let string = iter::repeat('a').take(4096).collect::(); + let string = "a".repeat(4096); b.iter(|| { for c in string.chars() { @@ -42,7 +39,7 @@ fn stdlib(b: &mut Bencher) { #[bench] fn simple_if(b: &mut Bencher) { - let string = iter::repeat('a').take(4096).collect::(); + let string = "a".repeat(4096); b.iter(|| { for c in string.chars() { @@ -53,7 +50,7 @@ fn simple_if(b: &mut Bencher) { #[bench] fn simple_match(b: &mut Bencher) { - let string = iter::repeat('a').take(4096).collect::(); + let string = "a".repeat(4096); b.iter(|| { for c in string.chars() { @@ -81,9 +78,9 @@ fn simple_width_if(c: char) -> Option { #[inline] fn simple_width_match(c: char) -> Option { match c as u32 { - cu if cu == 0 => Some(0), - cu if cu < 0x20 => None, - cu if cu < 0x7f => Some(1), + 0 => Some(0), + 1..=0x1F => None, + 0x20..=0x7E => Some(1), _ => UnicodeWidthChar::width(c), } } diff --git a/src/display.rs b/src/display.rs new file mode 100644 index 0000000..08003a6 --- /dev/null +++ b/src/display.rs @@ -0,0 +1,157 @@ +use core::fmt::{self, Write}; + +use unicode_segmentation::UnicodeSegmentation; + +use crate::{UnicodeWidthChar, UnicodeWidthStr}; + +/// A wrapper around a [`str`] with a [`fmt::Display`] impl +/// that performs padding, truncation, and alignment based on +/// the string width according to this crate (non-CJK). +/// +/// Produced via [`UnicodeWidthStr::using_width`]; +/// see its documentation for more. +#[derive(PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct StrWithWidth(str); + +impl StrWithWidth { + /// The advance width of the `string` + /// (equivalent to [`UnicodeWidthStr::width`]). + #[inline] + pub fn width(&self) -> usize { + self.0.width() + } +} + +impl PartialEq for StrWithWidth { + #[inline] + fn eq(&self, other: &str) -> bool { + &self.0 == other + } +} + +impl AsRef for StrWithWidth { + #[inline] + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl AsMut for StrWithWidth { + #[inline] + fn as_mut(&mut self) -> &mut str { + &mut self.0 + } +} + +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &StrWithWidth { + // SAFETY: `repr(transparent)` ensures compatible types + unsafe { core::mem::transmute(self) } + } +} + +impl AsMut for str { + #[inline] + fn as_mut(&mut self) -> &mut StrWithWidth { + // SAFETY: `repr(transparent)` ensures compatible types + unsafe { core::mem::transmute(self) } + } +} + +impl fmt::Display for StrWithWidth { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Fast path + if f.width().is_none() && f.precision().is_none() { + return f.write_str(&self.0); + } + + // Truncate the string to maximum width + let (truncated, truncated_width) = if let Some(max_width) = f.precision() { + let mut truncated_width: usize = 0; + let mut truncated = &self.0; + for (seg_offset, seg) in self.0.grapheme_indices(true) { + let new_width = truncated_width + seg.width(); + if new_width > max_width { + truncated = &self.0[..seg_offset]; + break; + } else { + truncated_width = new_width; + } + } + (truncated, truncated_width) + } else { + (&self.0, self.0.width()) + }; + + // Pad the string to minimum width + if let Some(padding) = f + .width() + .and_then(|min_width| min_width.checked_sub(truncated_width)) + .filter(|&padding| padding > 0) + { + let align = f.align().unwrap_or(fmt::Alignment::Left); + + let mut fill_char = f.fill(); + let mut fill_char_width = fill_char.width().unwrap_or(1); + + // If we try to fill with a zero-sized char, we'll never succeed, so fall back to space + if fill_char_width == 0 { + fill_char = ' '; + fill_char_width = 1; + } + + let (pre_pre_pad, pre_pad, post_pad, post_post_pad) = match align { + fmt::Alignment::Left => { + (0, 0, padding % fill_char_width, padding / fill_char_width) + } + fmt::Alignment::Right => { + (padding / fill_char_width, padding % fill_char_width, 0, 0) + } + fmt::Alignment::Center => { + let (left_padding, right_padding) = (padding / 2, (padding + 1) / 2); + let (pre_pre_pad, mut pre_pad, mut post_pad, mut post_post_pad) = { + ( + left_padding / fill_char_width, + left_padding % fill_char_width, + right_padding % fill_char_width, + right_padding / fill_char_width, + ) + }; + if let Some(diff) = pre_pad.checked_sub(fill_char_width - post_pad) { + pre_pad = 0; + post_pad = diff; + post_post_pad += 1; + } + (pre_pre_pad, pre_pad, post_pad, post_post_pad) + } + }; + + for _ in 0..pre_pre_pad { + f.write_char(fill_char)?; + } + for _ in 0..pre_pad { + f.write_char(' ')?; + } + f.write_str(truncated)?; + for _ in 0..post_pad { + f.write_char(' ')?; + } + for _ in 0..post_post_pad { + f.write_char(fill_char)?; + } + + Ok(()) + } else { + f.write_str(truncated) + } + } +} + +impl fmt::Debug for StrWithWidth { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} diff --git a/src/lib.rs b/src/lib.rs index 5bcdfa7..97d5df3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,7 +81,7 @@ //! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width. //! However, this guarantee does not currently hold for the CJK width variants. -#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] #![deny(missing_docs)] #![doc( html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", @@ -92,6 +92,13 @@ use tables::charwidth as cw; pub use tables::UNICODE_VERSION; +#[cfg(feature = "display")] +mod display; + +#[cfg(feature = "display")] +#[cfg_attr(docsrs, doc(cfg(feature = "display")))] +pub use display::StrWithWidth; + mod tables; /// Methods for determining displayed width of Unicode characters. @@ -160,6 +167,37 @@ pub trait UnicodeWidthStr { /// non-CJK contexts, or when the context cannot be reliably determined. fn width(&self) -> usize; + /// Returns a wrapper around the string + /// with a [`Display`][core::fmt::Display] impl + /// that pads, aligns and truncates according to the string's + /// displayed width. + /// + /// # Examples + /// + /// ```rust + /// use unicode_width::UnicodeWidthStr; + /// + /// let string = "字".using_width(); + /// assert_eq!(format!("{string:<4}"), "字 "); + /// assert_eq!(format!("{string:^4}"), " 字 "); + /// assert_eq!(format!("{string:>4}"), " 字"); + /// + /// let string = "a".using_width(); + /// assert_eq!(format!("{string:字<7}"), "a字字字"); + /// assert_eq!(format!("{string:字^7}"), "字a字字"); + /// assert_eq!(format!("{string:字>7}"), "字字字a"); + /// assert_eq!(format!("{string:字<8}"), "a 字字字"); + /// assert_eq!(format!("{string:字^8}"), "字 a字字"); + /// assert_eq!(format!("{string:字>8}"), "字字字 a"); + /// + /// // Truncation is by extended grapheme cluster + /// let string = "🇺🇳🇺🇳".using_width(); + /// assert_eq!(format!("{string:.3}"), "🇺🇳"); + /// ``` + #[cfg_attr(docsrs, doc(cfg(feature = "display")))] + #[cfg(feature = "display")] + fn using_width(&self) -> &StrWithWidth; + /// Returns the string's displayed width in columns. /// /// This function treats characters in the Ambiguous category according @@ -175,6 +213,12 @@ impl UnicodeWidthStr for str { str_width(self, false) } + #[cfg(feature = "display")] + #[inline] + fn using_width(&self) -> &StrWithWidth { + self.as_ref() + } + #[inline] fn width_cjk(&self) -> usize { str_width(self, true) diff --git a/tests/display.rs b/tests/display.rs new file mode 100644 index 0000000..1eff872 --- /dev/null +++ b/tests/display.rs @@ -0,0 +1,141 @@ +#![cfg(feature = "display")] + +use unicode_width::UnicodeWidthStr; + +#[test] +fn basic() { + for s in [ + "", + "\0", + "a", + "abc", + "¡Olé!", + "kilimanjaro", + "Κύριε, ἐλέησον", + ] { + assert_eq!(format!("{}", s.using_width()), s); + } +} + +#[test] +fn basic_with_args() { + for min_width in 0..20 { + for max_width in 0..20 { + for s in [ + "", + "\0", + "a", + "abc", + "¡Olé!", + "kilimanjaro", + "Κύριε, ἐλέησον", + ] { + assert_eq!( + format!( + "{:amin$.max$}", + s.using_width(), + min = min_width, + max = max_width + ), + format!("{:a>min$.max$}", s, min = min_width, max = max_width) + ); + } + } + } +} + +#[test] +fn trunc() { + let smol_str = "汉字".using_width(); + let smol_prefixes = ["", "", "汉", "汉", "汉字", "汉字"]; + for (width, prefix) in smol_prefixes.into_iter().enumerate() { + assert_eq!(format!("{smol_str:.width$}"), prefix, "width: {width}"); + } + + let med_str = "a汉字b".using_width(); + let med_prefixes = ["", "a", "a", "a汉", "a汉", "a汉字", "a汉字b", "a汉字b"]; + for (width, prefix) in med_prefixes.into_iter().enumerate() { + assert_eq!(format!("{med_str:.width$}"), prefix, "width: {width}"); + } + + let huge_str = + "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}".using_width(); + let huge_prefixes = [ + "\u{200B}\u{200E}", + "\u{200B}\u{200E}a\u{0301}", + "\u{200B}\u{200E}a\u{0301}", + "\u{200B}\u{200E}a\u{0301}汉", + "\u{200B}\u{200E}a\u{0301}汉", + "\u{200B}\u{200E}a\u{0301}汉字", + "\u{200B}\u{200E}a\u{0301}汉字\r\n", + "\u{200B}\u{200E}a\u{0301}汉字\r\nb", + "\u{200B}\u{200E}a\u{0301}汉字\r\nb", + "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}", + "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c", + "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}", + ]; + + for (width, prefix) in huge_prefixes.into_iter().enumerate() { + assert_eq!(format!("{huge_str:.width$}"), prefix, "width: {width}"); + } +} + +#[test] +fn pad() { + let string = "\u{2764}\u{FE0F}a".using_width(); + + assert_eq!(format!("{string:q<7}"), "\u{2764}\u{FE0F}aqqqq"); + assert_eq!(format!("{string:q^7}"), "qq\u{2764}\u{FE0F}aqq"); + assert_eq!(format!("{string:q>7}"), "qqqq\u{2764}\u{FE0F}a"); + + assert_eq!(format!("{string:字<7}"), "\u{2764}\u{FE0F}a字字"); + assert_eq!(format!("{string:字^7}"), "字\u{2764}\u{FE0F}a字"); + assert_eq!(format!("{string:字>7}"), "字字\u{2764}\u{FE0F}a"); + + assert_eq!(format!("{string:\u{0301}<7}"), "\u{2764}\u{FE0F}a "); + assert_eq!(format!("{string:\u{0301}^7}"), " \u{2764}\u{FE0F}a "); + assert_eq!(format!("{string:\u{0301}>7}"), " \u{2764}\u{FE0F}a"); + + assert_eq!(format!("{string:q<8}"), "\u{2764}\u{FE0F}aqqqqq"); + assert_eq!(format!("{string:q^8}"), "qq\u{2764}\u{FE0F}aqqq"); + assert_eq!(format!("{string:q>8}"), "qqqqq\u{2764}\u{FE0F}a"); + + assert_eq!(format!("{string:字<8}"), "\u{2764}\u{FE0F}a 字字"); + assert_eq!(format!("{string:字^8}"), "字\u{2764}\u{FE0F}a 字"); + assert_eq!(format!("{string:字>8}"), "字字 \u{2764}\u{FE0F}a"); + + assert_eq!(format!("{string:\u{0301}<8}"), "\u{2764}\u{FE0F}a "); + assert_eq!(format!("{string:\u{0301}^8}"), " \u{2764}\u{FE0F}a "); + assert_eq!(format!("{string:\u{0301}>8}"), " \u{2764}\u{FE0F}a"); + + let string = "a".using_width(); + assert_eq!(format!("{string:字^7}"), "字a字字"); + + let string = "字".using_width(); + assert_eq!(format!("{string:<3}"), "字 "); + assert_eq!(format!("{string:^3}"), "字 "); + assert_eq!(format!("{string:>3}"), " 字"); + assert_eq!(format!("{string:<4}"), "字 "); + assert_eq!(format!("{string:^4}"), " 字 "); + assert_eq!(format!("{string:>4}"), " 字"); +}