Merge pull request #368 from mgeisler/modules

Move `WordSeparator` and `WordSplitter` traits to separate modules
mgeisler · May 30, 2021 · c66ed13 · c66ed13
2 parents 5db9665 + f46fe0e
commit c66ed13
Show file tree

Hide file tree

Showing 10 changed files with 355 additions and 295 deletions.
diff --git a/benches/linear.rs b/benches/linear.rs
@@ -32,7 +32,7 @@ pub fn benchmark(c: &mut Criterion) {
             {
                 let options = textwrap::Options::new(LINE_LENGTH)
                     .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit)
-                    .word_separator(textwrap::UnicodeBreakProperties);
+                    .word_separator(textwrap::word_separators::UnicodeBreakProperties);
                 group.bench_with_input(
                     BenchmarkId::new("fill_optimal_fit_unicode", length),
                     &text,
@@ -44,7 +44,7 @@ pub fn benchmark(c: &mut Criterion) {
 
             let options = textwrap::Options::new(LINE_LENGTH)
                 .wrap_algorithm(textwrap::wrap_algorithms::OptimalFit)
-                .word_separator(textwrap::AsciiSpace);
+                .word_separator(textwrap::word_separators::AsciiSpace);
             group.bench_with_input(
                 BenchmarkId::new("fill_optimal_fit_ascii", length),
                 &text,
@@ -56,7 +56,7 @@ pub fn benchmark(c: &mut Criterion) {
 
         let options = textwrap::Options::new(LINE_LENGTH)
             .wrap_algorithm(textwrap::wrap_algorithms::FirstFit)
-            .word_separator(textwrap::AsciiSpace);
+            .word_separator(textwrap::word_separators::AsciiSpace);
         group.bench_with_input(
             BenchmarkId::new("fill_first_fit", length),
             &text,

diff --git a/examples/interactive.rs b/examples/interactive.rs
@@ -19,9 +19,8 @@ mod unix_only {
     use termion::raw::{IntoRawMode, RawTerminal};
     use termion::screen::AlternateScreen;
     use termion::{color, cursor, style};
-    use textwrap::wrap_algorithms;
-    use textwrap::{wrap, AsciiSpace, Options, WordSeparator};
-    use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};
+    use textwrap::{word_separators, word_splitters, wrap_algorithms};
+    use textwrap::{wrap, Options};
 
     #[cfg(feature = "hyphenation")]
     use hyphenation::{Language, Load, Standard};
@@ -59,8 +58,8 @@ mod unix_only {
         options: &Options<
             'a,
             Box<dyn wrap_algorithms::WrapAlgorithm>,
-            Box<dyn WordSeparator>,
-            Box<dyn WordSplitter>,
+            Box<dyn word_separators::WordSeparator>,
+            Box<dyn word_splitters::WordSplitter>,
         >,
         splitter_label: &str,
         stdout: &mut RawTerminal<io::Stdout>,
@@ -238,8 +237,10 @@ mod unix_only {
         #[cfg(feature = "smawk")]
         wrap_algorithms.push(Box::new(wrap_algorithms::OptimalFit));
 
-        let mut splitters: Vec<Box<dyn WordSplitter>> =
-            vec![Box::new(HyphenSplitter), Box::new(NoHyphenation)];
+        let mut splitters: Vec<Box<dyn word_splitters::WordSplitter>> = vec![
+            Box::new(word_splitters::HyphenSplitter),
+            Box::new(word_splitters::NoHyphenation),
+        ];
         let mut splitter_labels: Vec<String> =
             splitters.iter().map(|s| format!("{:?}", s)).collect();
 
@@ -266,7 +267,9 @@ mod unix_only {
             .break_words(false)
             .wrap_algorithm(wrap_algorithms.remove(0))
             .splitter(splitters.remove(0))
-            .word_separator(Box::new(AsciiSpace) as Box<dyn WordSeparator>);
+            .word_separator(
+                Box::new(word_separators::AsciiSpace) as Box<dyn word_separators::WordSeparator>
+            );
         let mut splitter_label = splitter_labels.remove(0);
 
         let args = std::env::args().collect::<Vec<_>>();

diff --git a/examples/layout.rs b/examples/layout.rs
@@ -1,4 +1,5 @@
-use textwrap::{wrap, HyphenSplitter, Options, WordSplitter};
+use textwrap::word_splitters::{HyphenSplitter, WordSplitter};
+use textwrap::{wrap, Options};
 
 fn main() {
     let example = "Memory safety without garbage collection. \

diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs
@@ -3,6 +3,8 @@ use wasm_bindgen::prelude::*;
 use wasm_bindgen::JsCast;
 
 use textwrap::core;
+use textwrap::word_separators::{AsciiSpace, UnicodeBreakProperties, WordSeparator};
+use textwrap::word_splitters::{split_words, HyphenSplitter, NoHyphenation, WordSplitter};
 use textwrap::wrap_algorithms::{wrap_first_fit, wrap_optimal_fit};
 
 #[wasm_bindgen]
@@ -292,22 +294,22 @@ pub fn draw_wrapped_text(
     let line_height = metrics.actual_bounding_box_ascent() + metrics.actual_bounding_box_descent();
     let baseline_distance = 1.5 * line_height;
 
-    let word_separator: Box<dyn textwrap::WordSeparator> = match options.word_separator {
-        WasmWordSeparator::AsciiSpace => Box::new(textwrap::AsciiSpace),
-        WasmWordSeparator::UnicodeBreakProperties => Box::new(textwrap::UnicodeBreakProperties),
+    let word_separator: Box<dyn WordSeparator> = match options.word_separator {
+        WasmWordSeparator::AsciiSpace => Box::new(AsciiSpace),
+        WasmWordSeparator::UnicodeBreakProperties => Box::new(UnicodeBreakProperties),
         _ => Err("WasmOptions has an invalid word_separator field")?,
     };
 
-    let word_splitter: Box<dyn textwrap::WordSplitter> = match options.word_splitter {
-        WasmWordSplitter::NoHyphenation => Box::new(textwrap::NoHyphenation),
-        WasmWordSplitter::HyphenSplitter => Box::new(textwrap::HyphenSplitter),
+    let word_splitter: Box<dyn WordSplitter> = match options.word_splitter {
+        WasmWordSplitter::NoHyphenation => Box::new(NoHyphenation),
+        WasmWordSplitter::HyphenSplitter => Box::new(HyphenSplitter),
         _ => Err("WasmOptions has an invalid word_splitter field")?,
     };
 
     let mut lineno = 0;
     for line in text.split('\n') {
         let words = word_separator.find_words(line);
-        let split_words = core::split_words(words, &word_splitter);
+        let split_words = split_words(words, &word_splitter);
 
         let canvas_words = split_words
             .flat_map(|word| {

diff --git a/src/core.rs b/src/core.rs
@@ -9,12 +9,14 @@
 //!
 //! 1. Split your input into [`Fragment`]s. These are abstract blocks
 //!    of text or content which can be wrapped into lines. See
-//!    [`WordSeparator`](crate::WordSeparator) for how to do this for
-//!    text.
+//!    [`WordSeparator`](crate::word_separators::WordSeparator) for
+//!    how to do this for text.
 //!
 //! 2. Potentially split your fragments into smaller pieces. This
-//!    allows you to implement things like hyphenation. If wrapping
-//!    text, [`split_words`] can help you do this.
+//!    allows you to implement things like hyphenation. If you are
+//!    wrapping text represented as a sequence of [`Word`]s, then you
+//!    can use [`split_words`](crate::word_splitters::split_words) can
+//!    help you do this.
 //!
 //! 3. Potentially break apart fragments that are still too large to
 //!    fit on a single line. This is implemented in [`break_words`].
@@ -33,8 +35,6 @@
 //! the functionality here is not sufficient or if you have ideas for
 //! improving it. We would love to hear from you!
 
-use crate::WordSplitter;
-
 /// The CSI or “Control Sequence Introducer” introduces an ANSI escape
 /// sequence. This is typically used for colored text and will be
 /// ignored when computing the text width.
@@ -221,7 +221,7 @@ pub struct Word<'a> {
     /// Penalty string to insert if the word falls at the end of a line.
     pub penalty: &'a str,
     // Cached width in columns.
-    width: usize,
+    pub(crate) width: usize,
 }
 
 impl std::ops::Deref for Word<'_> {
@@ -323,70 +323,6 @@ impl Fragment for Word<'_> {
     }
 }
 
-/// Split words into smaller words according to the split points given
-/// by `options`.
-///
-/// Note that we split all words, regardless of their length. This is
-/// to more cleanly separate the business of splitting (including
-/// automatic hyphenation) from the business of word wrapping.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::{split_words, Word};
-/// use textwrap::{NoHyphenation, HyphenSplitter};
-///
-/// assert_eq!(
-///     split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
-///     vec![Word::from("foo-"), Word::from("bar")]
-/// );
-///
-/// // The NoHyphenation splitter ignores the '-':
-/// assert_eq!(
-///     split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
-///     vec![Word::from("foo-bar")]
-/// );
-/// ```
-pub fn split_words<'a, I, WordSplit>(
-    words: I,
-    word_splitter: &'a WordSplit,
-) -> impl Iterator<Item = Word<'a>>
-where
-    I: IntoIterator<Item = Word<'a>>,
-    WordSplit: WordSplitter,
-{
-    words.into_iter().flat_map(move |word| {
-        let mut prev = 0;
-        let mut split_points = word_splitter.split_points(&word).into_iter();
-        std::iter::from_fn(move || {
-            if let Some(idx) = split_points.next() {
-                let need_hyphen = !word[..idx].ends_with('-');
-                let w = Word {
-                    word: &word.word[prev..idx],
-                    width: display_width(&word[prev..idx]),
-                    whitespace: "",
-                    penalty: if need_hyphen { "-" } else { "" },
-                };
-                prev = idx;
-                return Some(w);
-            }
-
-            if prev < word.word.len() || prev == 0 {
-                let w = Word {
-                    word: &word.word[prev..],
-                    width: display_width(&word[prev..]),
-                    whitespace: word.whitespace,
-                    penalty: word.penalty,
-                };
-                prev = word.word.len() + 1;
-                return Some(w);
-            }
-
-            None
-        })
-    })
-}
-
 /// Forcibly break words wider than `line_width` into smaller words.
 ///
 /// This simply calls [`Word::break_apart`] on words that are too
@@ -410,18 +346,10 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::HyphenSplitter;
 
     #[cfg(feature = "unicode-width")]
     use unicode_width::UnicodeWidthChar;
 
-    // Like assert_eq!, but the left expression is an iterator.
-    macro_rules! assert_iter_eq {
-        ($left:expr, $right:expr) => {
-            assert_eq!($left.collect::<Vec<_>>(), $right);
-        };
-    }
-
     #[test]
     fn skip_ansi_escape_sequence_works() {
         let blue_text = "\u{1b}[34mHello\u{1b}[0m";
@@ -503,80 +431,4 @@ mod tests {
     fn display_width_emojis() {
         assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
     }
-
-    #[test]
-    fn split_words_no_words() {
-        assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
-    }
-
-    #[test]
-    fn split_words_empty_word() {
-        assert_iter_eq!(
-            split_words(vec![Word::from("   ")], &HyphenSplitter),
-            vec![Word::from("   ")]
-        );
-    }
-
-    #[test]
-    fn split_words_single_word() {
-        assert_iter_eq!(
-            split_words(vec![Word::from("foobar")], &HyphenSplitter),
-            vec![Word::from("foobar")]
-        );
-    }
-
-    #[test]
-    fn split_words_hyphen_splitter() {
-        assert_iter_eq!(
-            split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
-            vec![Word::from("foo-"), Word::from("bar")]
-        );
-    }
-
-    #[test]
-    fn split_words_adds_penalty() {
-        #[derive(Clone, Debug)]
-        struct FixedSplitPoint;
-        impl WordSplitter for FixedSplitPoint {
-            fn split_points(&self, _: &str) -> Vec<usize> {
-                vec![3]
-            }
-        }
-
-        assert_iter_eq!(
-            split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
-            vec![
-                Word {
-                    word: "foo",
-                    width: 3,
-                    whitespace: "",
-                    penalty: "-"
-                },
-                Word {
-                    word: "bar",
-                    width: 3,
-                    whitespace: "",
-                    penalty: ""
-                }
-            ]
-        );
-
-        assert_iter_eq!(
-            split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
-            vec![
-                Word {
-                    word: "fo-",
-                    width: 3,
-                    whitespace: "",
-                    penalty: ""
-                },
-                Word {
-                    word: "bar",
-                    width: 3,
-                    whitespace: "",
-                    penalty: ""
-                }
-            ]
-        );
-    }
 }