diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c68e42d..edb22ea 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -18,16 +18,40 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- - name: Build
+
+ - name: Build (all features)
+ run: cargo build --features display --verbose
+ - name: Run tests (all features)
+ run: cargo test --features display --verbose
+ - name: Check clippy (all features)
+ run: cargo clippy --features display --lib --tests --verbose
+
+ - name: Build (default features)
run: cargo build --verbose
- - name: Run tests
+ - name: Run tests (default features)
run: cargo test --verbose
- - name: Build docs
- run: cargo doc
+ - name: Check clippy (default features)
+ run: cargo clippy --lib --tests --verbose
+
+ fmt:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
- name: Check formatting
run: cargo fmt --check
- - name: Check clippy
- run: cargo clippy --lib --tests
+
+ nightly:
+ env:
+ RUSTDOCFLAGS: -D warnings --cfg docsrs
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install nightly
+ run: rustup toolchain add nightly
+ - name: Build docs
+ run: cargo +nightly doc --features display --verbose
+ - name: Check benches
+ run: cargo +nightly clippy --benches --features display --verbose
regen:
runs-on: ubuntu-latest
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 64196fa..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-language: rust
-rust: 'nightly'
-sudo: false
-script:
- - cargo build --verbose --features bench
- - cargo test --verbose --features bench
- - cargo bench --verbose --features bench
- - cargo clean
- - cargo build --verbose
- - cargo test --verbose
-# next line is an ugly hack to fix an annoying bug where rustdoc tries to use the rustc_private unicode_width crate
-# (there is probably a better fix than this)
- - rm $(find /home/travis/.rustup -type f -name 'libunicode_width*')
- - rustdoc --test README.md -L target/debug -L target/debug/deps
- - cargo doc
-after_success: |
- [ $TRAVIS_BRANCH = master ] &&
- [ $TRAVIS_PULL_REQUEST = false ] &&
- echo '' > target/doc/index.html &&
- pip install ghp-import --user $USER &&
- $HOME/.local/bin/ghp-import -n target/doc &&
- git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
-env:
- global:
- secure: vHL3zrN8AF+H79jrB8OfzuPqsUHevo6ECzwqXPj2dMSqcSXEeCY/ENAfiyFg+oW8yEVP8X2BS1a/C9yvVQRLqLbm1HbZ/5vUpoggT9S0IhKqZMyAcLYXfIEUDMDQuaSdFndDaHvq8275ScgX1LRv1kcPjQoZHuaXWMH8y/Suvyo=
-notifications:
- email:
- on_success: never
diff --git a/Cargo.toml b/Cargo.toml
index 16fa8db..6939a14 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,16 +24,22 @@ according to Unicode Standard Annex #11 rules.
"""
edition = "2021"
-exclude = ["target/*", "Cargo.lock"]
+exclude = ["/.github/*", "/target/*", "/Cargo.lock"]
[dependencies]
-std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
-core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
+unicode-segmentation = { version = "1.11.0", optional = true }
+
compiler_builtins = { version = "0.1", optional = true }
+core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
+std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
[features]
-default = []
-rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
+display = ["dep:unicode-segmentation"]
+rustc-dep-of-std = ["dep:compiler_builtins", "dep:core", "dep:std"]
# Legacy, now a no-op
no_std = []
+
+[package.metadata.docs.rs]
+features = ["display"]
+rustdoc-args = ["--cfg", "docsrs"]
diff --git a/README.md b/README.md
index 2d9ea39..40b5947 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# `unicode-width`
-[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://travis-ci.org/unicode-rs/unicode-width)
+[![Build status](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml/badge.svg)](https://github.com/unicode-rs/unicode-width/actions/workflows/rust.yml)
[![crates.io version](https://img.shields.io/crates/v/unicode-width)](https://crates.io/crates/unicode-width)
[![Docs status](https://img.shields.io/docsrs/unicode-width)](https://docs.rs/unicode-width/)
diff --git a/benches/benches.rs b/benches/benches.rs
index 44aaee6..b90227e 100644
--- a/benches/benches.rs
+++ b/benches/benches.rs
@@ -10,16 +10,13 @@
#![feature(test)]
extern crate test;
-
-use std::iter;
-
use test::Bencher;
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
#[bench]
fn cargo(b: &mut Bencher) {
- let string = iter::repeat('a').take(4096).collect::();
+ let string = "a".repeat(4096);
b.iter(|| {
for c in string.chars() {
@@ -31,7 +28,7 @@ fn cargo(b: &mut Bencher) {
#[bench]
#[allow(deprecated)]
fn stdlib(b: &mut Bencher) {
- let string = iter::repeat('a').take(4096).collect::();
+ let string = "a".repeat(4096);
b.iter(|| {
for c in string.chars() {
@@ -42,7 +39,7 @@ fn stdlib(b: &mut Bencher) {
#[bench]
fn simple_if(b: &mut Bencher) {
- let string = iter::repeat('a').take(4096).collect::();
+ let string = "a".repeat(4096);
b.iter(|| {
for c in string.chars() {
@@ -53,7 +50,7 @@ fn simple_if(b: &mut Bencher) {
#[bench]
fn simple_match(b: &mut Bencher) {
- let string = iter::repeat('a').take(4096).collect::();
+ let string = "a".repeat(4096);
b.iter(|| {
for c in string.chars() {
@@ -81,9 +78,9 @@ fn simple_width_if(c: char) -> Option {
#[inline]
fn simple_width_match(c: char) -> Option {
match c as u32 {
- cu if cu == 0 => Some(0),
- cu if cu < 0x20 => None,
- cu if cu < 0x7f => Some(1),
+ 0 => Some(0),
+ 1..=0x1F => None,
+ 0x20..=0x7E => Some(1),
_ => UnicodeWidthChar::width(c),
}
}
diff --git a/src/display.rs b/src/display.rs
new file mode 100644
index 0000000..08003a6
--- /dev/null
+++ b/src/display.rs
@@ -0,0 +1,157 @@
+use core::fmt::{self, Write};
+
+use unicode_segmentation::UnicodeSegmentation;
+
+use crate::{UnicodeWidthChar, UnicodeWidthStr};
+
+/// A wrapper around a [`str`] with a [`fmt::Display`] impl
+/// that performs padding, truncation, and alignment based on
+/// the string width according to this crate (non-CJK).
+///
+/// Produced via [`UnicodeWidthStr::using_width`];
+/// see its documentation for more.
+#[derive(PartialEq, Eq, Hash)]
+#[repr(transparent)]
+pub struct StrWithWidth(str);
+
+impl StrWithWidth {
+ /// The advance width of the `string`
+ /// (equivalent to [`UnicodeWidthStr::width`]).
+ #[inline]
+ pub fn width(&self) -> usize {
+ self.0.width()
+ }
+}
+
+impl PartialEq for StrWithWidth {
+ #[inline]
+ fn eq(&self, other: &str) -> bool {
+ &self.0 == other
+ }
+}
+
+impl AsRef for StrWithWidth {
+ #[inline]
+ fn as_ref(&self) -> &str {
+ &self.0
+ }
+}
+
+impl AsMut for StrWithWidth {
+ #[inline]
+ fn as_mut(&mut self) -> &mut str {
+ &mut self.0
+ }
+}
+
+impl AsRef for str {
+ #[inline]
+ fn as_ref(&self) -> &StrWithWidth {
+ // SAFETY: `repr(transparent)` ensures compatible types
+ unsafe { core::mem::transmute(self) }
+ }
+}
+
+impl AsMut for str {
+ #[inline]
+ fn as_mut(&mut self) -> &mut StrWithWidth {
+ // SAFETY: `repr(transparent)` ensures compatible types
+ unsafe { core::mem::transmute(self) }
+ }
+}
+
+impl fmt::Display for StrWithWidth {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // Fast path
+ if f.width().is_none() && f.precision().is_none() {
+ return f.write_str(&self.0);
+ }
+
+ // Truncate the string to maximum width
+ let (truncated, truncated_width) = if let Some(max_width) = f.precision() {
+ let mut truncated_width: usize = 0;
+ let mut truncated = &self.0;
+ for (seg_offset, seg) in self.0.grapheme_indices(true) {
+ let new_width = truncated_width + seg.width();
+ if new_width > max_width {
+ truncated = &self.0[..seg_offset];
+ break;
+ } else {
+ truncated_width = new_width;
+ }
+ }
+ (truncated, truncated_width)
+ } else {
+ (&self.0, self.0.width())
+ };
+
+ // Pad the string to minimum width
+ if let Some(padding) = f
+ .width()
+ .and_then(|min_width| min_width.checked_sub(truncated_width))
+ .filter(|&padding| padding > 0)
+ {
+ let align = f.align().unwrap_or(fmt::Alignment::Left);
+
+ let mut fill_char = f.fill();
+ let mut fill_char_width = fill_char.width().unwrap_or(1);
+
+ // If we try to fill with a zero-sized char, we'll never succeed, so fall back to space
+ if fill_char_width == 0 {
+ fill_char = ' ';
+ fill_char_width = 1;
+ }
+
+ let (pre_pre_pad, pre_pad, post_pad, post_post_pad) = match align {
+ fmt::Alignment::Left => {
+ (0, 0, padding % fill_char_width, padding / fill_char_width)
+ }
+ fmt::Alignment::Right => {
+ (padding / fill_char_width, padding % fill_char_width, 0, 0)
+ }
+ fmt::Alignment::Center => {
+ let (left_padding, right_padding) = (padding / 2, (padding + 1) / 2);
+ let (pre_pre_pad, mut pre_pad, mut post_pad, mut post_post_pad) = {
+ (
+ left_padding / fill_char_width,
+ left_padding % fill_char_width,
+ right_padding % fill_char_width,
+ right_padding / fill_char_width,
+ )
+ };
+ if let Some(diff) = pre_pad.checked_sub(fill_char_width - post_pad) {
+ pre_pad = 0;
+ post_pad = diff;
+ post_post_pad += 1;
+ }
+ (pre_pre_pad, pre_pad, post_pad, post_post_pad)
+ }
+ };
+
+ for _ in 0..pre_pre_pad {
+ f.write_char(fill_char)?;
+ }
+ for _ in 0..pre_pad {
+ f.write_char(' ')?;
+ }
+ f.write_str(truncated)?;
+ for _ in 0..post_pad {
+ f.write_char(' ')?;
+ }
+ for _ in 0..post_post_pad {
+ f.write_char(fill_char)?;
+ }
+
+ Ok(())
+ } else {
+ f.write_str(truncated)
+ }
+ }
+}
+
+impl fmt::Debug for StrWithWidth {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&self.0, f)
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 5bcdfa7..97d5df3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -81,7 +81,7 @@
//! The non-CJK width methods guarantee that canonically equivalent strings are assigned the same width.
//! However, this guarantee does not currently hold for the CJK width variants.
-#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
#![deny(missing_docs)]
#![doc(
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
@@ -92,6 +92,13 @@
use tables::charwidth as cw;
pub use tables::UNICODE_VERSION;
+#[cfg(feature = "display")]
+mod display;
+
+#[cfg(feature = "display")]
+#[cfg_attr(docsrs, doc(cfg(feature = "display")))]
+pub use display::StrWithWidth;
+
mod tables;
/// Methods for determining displayed width of Unicode characters.
@@ -160,6 +167,37 @@ pub trait UnicodeWidthStr {
/// non-CJK contexts, or when the context cannot be reliably determined.
fn width(&self) -> usize;
+ /// Returns a wrapper around the string
+ /// with a [`Display`][core::fmt::Display] impl
+ /// that pads, aligns and truncates according to the string's
+ /// displayed width.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use unicode_width::UnicodeWidthStr;
+ ///
+ /// let string = "字".using_width();
+ /// assert_eq!(format!("{string:<4}"), "字 ");
+ /// assert_eq!(format!("{string:^4}"), " 字 ");
+ /// assert_eq!(format!("{string:>4}"), " 字");
+ ///
+ /// let string = "a".using_width();
+ /// assert_eq!(format!("{string:字<7}"), "a字字字");
+ /// assert_eq!(format!("{string:字^7}"), "字a字字");
+ /// assert_eq!(format!("{string:字>7}"), "字字字a");
+ /// assert_eq!(format!("{string:字<8}"), "a 字字字");
+ /// assert_eq!(format!("{string:字^8}"), "字 a字字");
+ /// assert_eq!(format!("{string:字>8}"), "字字字 a");
+ ///
+ /// // Truncation is by extended grapheme cluster
+ /// let string = "🇺🇳🇺🇳".using_width();
+ /// assert_eq!(format!("{string:.3}"), "🇺🇳");
+ /// ```
+ #[cfg_attr(docsrs, doc(cfg(feature = "display")))]
+ #[cfg(feature = "display")]
+ fn using_width(&self) -> &StrWithWidth;
+
/// Returns the string's displayed width in columns.
///
/// This function treats characters in the Ambiguous category according
@@ -175,6 +213,12 @@ impl UnicodeWidthStr for str {
str_width(self, false)
}
+ #[cfg(feature = "display")]
+ #[inline]
+ fn using_width(&self) -> &StrWithWidth {
+ self.as_ref()
+ }
+
#[inline]
fn width_cjk(&self) -> usize {
str_width(self, true)
diff --git a/tests/display.rs b/tests/display.rs
new file mode 100644
index 0000000..1eff872
--- /dev/null
+++ b/tests/display.rs
@@ -0,0 +1,141 @@
+#![cfg(feature = "display")]
+
+use unicode_width::UnicodeWidthStr;
+
+#[test]
+fn basic() {
+ for s in [
+ "",
+ "\0",
+ "a",
+ "abc",
+ "¡Olé!",
+ "kilimanjaro",
+ "Κύριε, ἐλέησον",
+ ] {
+ assert_eq!(format!("{}", s.using_width()), s);
+ }
+}
+
+#[test]
+fn basic_with_args() {
+ for min_width in 0..20 {
+ for max_width in 0..20 {
+ for s in [
+ "",
+ "\0",
+ "a",
+ "abc",
+ "¡Olé!",
+ "kilimanjaro",
+ "Κύριε, ἐλέησον",
+ ] {
+ assert_eq!(
+ format!(
+ "{:amin$.max$}",
+ s.using_width(),
+ min = min_width,
+ max = max_width
+ ),
+ format!("{:a>min$.max$}", s, min = min_width, max = max_width)
+ );
+ }
+ }
+ }
+}
+
+#[test]
+fn trunc() {
+ let smol_str = "汉字".using_width();
+ let smol_prefixes = ["", "", "汉", "汉", "汉字", "汉字"];
+ for (width, prefix) in smol_prefixes.into_iter().enumerate() {
+ assert_eq!(format!("{smol_str:.width$}"), prefix, "width: {width}");
+ }
+
+ let med_str = "a汉字b".using_width();
+ let med_prefixes = ["", "a", "a", "a汉", "a汉", "a汉字", "a汉字b", "a汉字b"];
+ for (width, prefix) in med_prefixes.into_iter().enumerate() {
+ assert_eq!(format!("{med_str:.width$}"), prefix, "width: {width}");
+ }
+
+ let huge_str =
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}".using_width();
+ let huge_prefixes = [
+ "\u{200B}\u{200E}",
+ "\u{200B}\u{200E}a\u{0301}",
+ "\u{200B}\u{200E}a\u{0301}",
+ "\u{200B}\u{200E}a\u{0301}汉",
+ "\u{200B}\u{200E}a\u{0301}汉",
+ "\u{200B}\u{200E}a\u{0301}汉字",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\n",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c",
+ "\u{200B}\u{200E}a\u{0301}汉字\r\nb\u{2764}\u{FE0F}c\u{2648}\u{FE0E}\u{FF9E}",
+ ];
+
+ for (width, prefix) in huge_prefixes.into_iter().enumerate() {
+ assert_eq!(format!("{huge_str:.width$}"), prefix, "width: {width}");
+ }
+}
+
+#[test]
+fn pad() {
+ let string = "\u{2764}\u{FE0F}a".using_width();
+
+ assert_eq!(format!("{string:q<7}"), "\u{2764}\u{FE0F}aqqqq");
+ assert_eq!(format!("{string:q^7}"), "qq\u{2764}\u{FE0F}aqq");
+ assert_eq!(format!("{string:q>7}"), "qqqq\u{2764}\u{FE0F}a");
+
+ assert_eq!(format!("{string:字<7}"), "\u{2764}\u{FE0F}a字字");
+ assert_eq!(format!("{string:字^7}"), "字\u{2764}\u{FE0F}a字");
+ assert_eq!(format!("{string:字>7}"), "字字\u{2764}\u{FE0F}a");
+
+ assert_eq!(format!("{string:\u{0301}<7}"), "\u{2764}\u{FE0F}a ");
+ assert_eq!(format!("{string:\u{0301}^7}"), " \u{2764}\u{FE0F}a ");
+ assert_eq!(format!("{string:\u{0301}>7}"), " \u{2764}\u{FE0F}a");
+
+ assert_eq!(format!("{string:q<8}"), "\u{2764}\u{FE0F}aqqqqq");
+ assert_eq!(format!("{string:q^8}"), "qq\u{2764}\u{FE0F}aqqq");
+ assert_eq!(format!("{string:q>8}"), "qqqqq\u{2764}\u{FE0F}a");
+
+ assert_eq!(format!("{string:字<8}"), "\u{2764}\u{FE0F}a 字字");
+ assert_eq!(format!("{string:字^8}"), "字\u{2764}\u{FE0F}a 字");
+ assert_eq!(format!("{string:字>8}"), "字字 \u{2764}\u{FE0F}a");
+
+ assert_eq!(format!("{string:\u{0301}<8}"), "\u{2764}\u{FE0F}a ");
+ assert_eq!(format!("{string:\u{0301}^8}"), " \u{2764}\u{FE0F}a ");
+ assert_eq!(format!("{string:\u{0301}>8}"), " \u{2764}\u{FE0F}a");
+
+ let string = "a".using_width();
+ assert_eq!(format!("{string:字^7}"), "字a字字");
+
+ let string = "字".using_width();
+ assert_eq!(format!("{string:<3}"), "字 ");
+ assert_eq!(format!("{string:^3}"), "字 ");
+ assert_eq!(format!("{string:>3}"), " 字");
+ assert_eq!(format!("{string:<4}"), "字 ");
+ assert_eq!(format!("{string:^4}"), " 字 ");
+ assert_eq!(format!("{string:>4}"), " 字");
+}