Skip to content

Commit

Permalink
Auto-detect line ending in unfill
Browse files Browse the repository at this point in the history
  • Loading branch information
koiuo committed Jun 7, 2022
1 parent 115b927 commit 227b0bf
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 57 deletions.
10 changes: 10 additions & 0 deletions benches/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ pub fn benchmark(c: &mut Criterion) {
});
}

{
group.bench_with_input(
BenchmarkId::new("unfill_lf", length),
&text,
|b, text| {
b.iter(|| textwrap::unfill(text));
},
);
}

#[cfg(all(feature = "smawk", feature = "hyphenation"))]
{
use hyphenation::{Language, Load, Standard};
Expand Down
137 changes: 95 additions & 42 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,24 +197,29 @@

// Make `cargo test` execute the README doctests.
#[cfg(doctest)]
#[doc = include_str!("../README.md")]
#[doc = include_str ! ("../README.md")]
mod readme_doctest {}

use std::borrow::Cow;

mod indentation;

pub use crate::indentation::{dedent, indent};

mod word_separators;

pub use word_separators::WordSeparator;

pub mod word_splitters;

pub use word_splitters::WordSplitter;

pub mod wrap_algorithms;

pub use wrap_algorithms::WrapAlgorithm;

pub mod line_ending;

pub use line_ending::LineEnding;

pub mod core;
Expand Down Expand Up @@ -563,8 +568,8 @@ pub fn termwidth() -> usize {
/// );
/// ```
pub fn fill<'a, Opt>(text: &str, width_or_options: Opt) -> String
where
Opt: Into<Options<'a>>,
where
Opt: Into<Options<'a>>,
{
let options = width_or_options.into();
let line_ending_str = options.line_ending.as_str();
Expand Down Expand Up @@ -623,19 +628,18 @@ where
/// * This is an
/// example of
/// a list item.
/// ", LineEnding::LF);
/// ");
///
/// assert_eq!(text, "This is an example of a list item.\n");
/// assert_eq!(options.initial_indent, "* ");
/// assert_eq!(options.subsequent_indent, " ");
/// ```
pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) {
let line_ending_pat = line_ending.as_str();
let trimmed = text.trim_end_matches(line_ending_pat);
pub fn unfill(text: &str) -> (String, Options<'_>) {
let trimmed = text.trim_end_matches(&['\r', '\n']);
let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/'];

let mut options = Options::new(0).line_ending(line_ending);
for (idx, line) in trimmed.split(line_ending_pat).enumerate() {
let mut options = Options::new(0).line_ending(LineEnding::CRLF);
for (idx, line) in trimmed.split('\n').enumerate() {
options.width = std::cmp::max(options.width, core::display_width(line));
let without_prefix = line.trim_start_matches(prefix_chars);
let prefix = &line[..line.len() - without_prefix.len()];
Expand All @@ -658,12 +662,25 @@ pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) {
}

let mut unfilled = String::with_capacity(text.len());
for (idx, line) in trimmed.split(line_ending_pat).enumerate() {
for (idx, line) in trimmed.split_inclusive('\n').enumerate() {
let mut line_len = line.len();
let line_bytes = line.as_bytes();
if line_len > 1 && line_bytes[line_len - 1] == b'\n' { // if not last line
if line_bytes[line_len - 2] == b'\r' {
line_len = line_len - 2;
} else {
line_len = line_len - 1;
options.line_ending = LineEnding::LF;
}
} else if idx == 0 {
options.line_ending = LineEnding::LF;
}

if idx == 0 {
unfilled.push_str(&line[options.initial_indent.len()..]);
unfilled.push_str(&line[options.initial_indent.len()..line_len]);
} else {
unfilled.push(' ');
unfilled.push_str(&line[options.subsequent_indent.len()..]);
unfilled.push_str(&line[options.subsequent_indent.len()..line_len]);
}
}

Expand Down Expand Up @@ -726,12 +743,12 @@ pub fn unfill(text: &str, line_ending: LineEnding) -> (String, Options<'_>) {
/// ");
/// ```
pub fn refill<'a, Opt>(filled_text: &str, new_width_or_options: Opt) -> String
where
Opt: Into<Options<'a>>,
where
Opt: Into<Options<'a>>,
{
let mut new_options = new_width_or_options.into();
let trimmed = filled_text.trim_end_matches(new_options.line_ending.as_str());
let (text, options) = unfill(trimmed, new_options.line_ending);
let (text, options) = unfill(trimmed);
new_options.initial_indent = options.initial_indent;
new_options.subsequent_indent = options.subsequent_indent;
let mut refilled = fill(&text, new_options);
Expand Down Expand Up @@ -910,8 +927,8 @@ where
/// assert_eq!(wrap(" foo bar", 4), vec!["", "foo", "bar"]);
/// ```
pub fn wrap<'a, Opt>(text: &str, width_or_options: Opt) -> Vec<Cow<'_, str>>
where
Opt: Into<Options<'a>>,
where
Opt: Into<Options<'a>>,
{
let options: Options = width_or_options.into();

Expand Down Expand Up @@ -1061,8 +1078,8 @@ pub fn wrap_columns<'a, Opt>(
middle_gap: &str,
right_gap: &str,
) -> Vec<String>
where
Opt: Into<Options<'a>>,
where
Opt: Into<Options<'a>>,
{
assert!(columns > 0);

Expand Down Expand Up @@ -1232,7 +1249,7 @@ mod tests {
assert_eq!(
wrap(
"To be, or not to be, that is the question.",
Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit)
Options::new(10).wrap_algorithm(WrapAlgorithm::FirstFit),
),
vec!["To be, or", "not to be,", "that is", "the", "question."]
);
Expand Down Expand Up @@ -1308,7 +1325,7 @@ mod tests {
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::AsciiSpace)
Options::new(15).word_separator(WordSeparator::AsciiSpace),
),
vec!["Hello,", "World!"]
);
Expand All @@ -1319,7 +1336,7 @@ mod tests {
assert_eq!(
wrap(
"Hello, World!",
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties)
Options::new(15).word_separator(WordSeparator::UnicodeBreakProperties),
),
vec!["Hello, W", "orld!"]
);
Expand Down Expand Up @@ -1592,14 +1609,14 @@ mod tests {
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit)
Options::new(7).wrap_algorithm(WrapAlgorithm::FirstFit),
),
"1 3 5 7\n1 3 5 7"
);
assert_eq!(
fill(
"1 3 5 7\n1 3 5 7",
Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit)
Options::new(5).wrap_algorithm(WrapAlgorithm::FirstFit),
),
"1 3 5\n7\n1 3 5\n7"
);
Expand Down Expand Up @@ -1721,29 +1738,65 @@ mod tests {

#[test]
fn unfill_simple() {
let (text, options) = unfill("foo\nbar", LineEnding::LF);
let (text, options) = unfill("foo\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_no_new_line() {
let (text, options) = unfill("foo bar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 7);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_simple_crlf() {
let (text, options) = unfill("foo\r\nbar");
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

/// If mixed new line sequence is encountered, we want to fallback to `\n`
/// 1. it is the default
/// 2. it still matches both `\n` and `\r\n` unlike `\r\n` which will not match `\n`
#[test]
fn unfill_mixed_new_lines() {
let (text, options) = unfill("foo\r\nbar\nbaz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::LF);
}

#[test]
fn unfill_trailing_newlines() {
let (text, options) = unfill("foo\nbar\n\n\n", LineEnding::LF);
let (text, options) = unfill("foo\nbar\n\n\n");
assert_eq!(text, "foo bar\n\n\n");
assert_eq!(options.width, 3);
}

#[test]
fn unfill_mixed_trailing_newlines() {
let (text, options) = unfill("foo\r\nbar\n\r\n\n");
assert_eq!(text, "foo bar\n\r\n\n");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
}

#[test]
fn unfill_initial_indent() {
let (text, options) = unfill(" foo\nbar\nbaz", LineEnding::LF);
let (text, options) = unfill(" foo\nbar\nbaz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, " ");
}

#[test]
fn unfill_differing_indents() {
let (text, options) = unfill(" foo\n bar\n baz", LineEnding::LF);
let (text, options) = unfill(" foo\n bar\n baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 7);
assert_eq!(options.initial_indent, " ");
Expand All @@ -1752,7 +1805,7 @@ mod tests {

#[test]
fn unfill_list_item() {
let (text, options) = unfill("* foo\n bar\n baz", LineEnding::LF);
let (text, options) = unfill("* foo\n bar\n baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, "* ");
Expand All @@ -1761,7 +1814,7 @@ mod tests {

#[test]
fn unfill_multiple_char_prefix() {
let (text, options) = unfill(" // foo bar\n // baz\n // quux", LineEnding::LF);
let (text, options) = unfill(" // foo bar\n // baz\n // quux");
assert_eq!(text, "foo bar baz quux");
assert_eq!(options.width, 14);
assert_eq!(options.initial_indent, " // ");
Expand All @@ -1770,7 +1823,7 @@ mod tests {

#[test]
fn unfill_block_quote() {
let (text, options) = unfill("> foo\n> bar\n> baz", LineEnding::LF);
let (text, options) = unfill("> foo\n> bar\n> baz");
assert_eq!(text, "foo bar baz");
assert_eq!(options.width, 5);
assert_eq!(options.initial_indent, "> ");
Expand All @@ -1779,16 +1832,16 @@ mod tests {

#[test]
fn unfill_whitespace() {
assert_eq!(unfill("foo bar", LineEnding::LF).0, "foo bar");
assert_eq!(unfill("foo bar").0, "foo bar");
}

#[test]
fn unfill_crlf() {
let (text, options) = unfill("foo\r\nbar", LineEnding::CRLF);
assert_eq!(text, "foo bar");
assert_eq!(options.width, 3);
assert_eq!(options.line_ending, LineEnding::CRLF);
}
// #[test]
// fn unfill_crlf() {
// let (text, options) = unfill("foo\r\nbar");
// assert_eq!(text, "foo bar");
// assert_eq!(options.width, 3);
// assert_eq!(options.line_ending, LineEnding::CRLF);
// }

#[test]
fn wrap_columns_empty_text() {
Expand Down Expand Up @@ -1835,12 +1888,12 @@ mod tests {
30,
"✨ ",
" ⚽ ",
" 👀"
" 👀",
),
vec![
"✨ Words ⚽ wrapped in 👀",
"✨ and a few ⚽ ⓶ columns 👀",
"✨ emojis 😍 ⚽ 👀"
"✨ emojis 😍 ⚽ 👀",
]
);
}
Expand All @@ -1853,7 +1906,7 @@ mod tests {
wrap_columns("xyz", 2, 10, "----> ", " !!! ", " <----"),
vec![
"----> x !!! z <----", //
"----> y !!! <----"
"----> y !!! <----",
]
);
}
Expand Down
17 changes: 2 additions & 15 deletions src/line_ending.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,19 @@ use std::str::FromStr;
/// TODO doc
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum LineEnding {
/// TODO
CR,
/// TODO
CRLF,
/// TODO
LF,
}

impl LineEnding {
/// TODO
#[inline]
pub const fn len_chars(&self) -> usize {
match self {
Self::CRLF => 2,
_ => 1,
}
}

/// TODO
#[inline]
pub const fn as_str(&self) -> &'static str {
match self {
Self::CRLF => "\u{000D}\u{000A}",
Self::LF => "\u{000A}",
Self::CR => "\u{000D}",
Self::CRLF => "\r\n",
Self::LF => "\n",
}
}
}
Expand All @@ -43,7 +31,6 @@ impl FromStr for LineEnding {
match s {
"\u{000D}\u{000A}" => Result::Ok(LineEnding::CRLF),
"\u{000A}" => Result::Ok(LineEnding::LF),
"\u{000D}" => Result::Ok(LineEnding::CR),
_ => Result::Err(()),
}
}
Expand Down

0 comments on commit 227b0bf

Please sign in to comment.