Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 85 additions & 43 deletions src/uu/fmt/src/linebreak.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen

use std::io::{BufWriter, Stdout, Write};
use std::{cmp, mem};
use std::mem;

use crate::FmtOptions;
use crate::parasplit::{ParaWords, Paragraph, WordInfo};
Expand Down Expand Up @@ -215,7 +215,7 @@ struct LineBreak<'a> {
}

#[allow(clippy::cognitive_complexity)]
fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
fn find_kp_breakpoints<'a, T: Clone + Iterator<Item = &'a WordInfo<'a>>>(
iter: T,
args: &BreakArgs<'a>,
) -> Vec<(&'a WordInfo<'a>, bool)> {
Expand All @@ -226,7 +226,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
linebreak: None,
break_before: false,
demerits: 0,
prev_rat: 0.0,
prev_rat: f32::NAN,
length: args.init_len,
fresh: false,
}];
Expand All @@ -243,8 +243,8 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
};
let mut new_linebreaks = vec![];
let mut is_sentence_start = false;
let mut least_demerits = 0;
while let Some(w) = iter.next() {
let next_word_sentence_final = is_next_word_sentence_final(iter.clone());
// if this is the last word, we don't add additional demerits for this break
let (is_last_word, is_sentence_end) = match iter.peek() {
None => (true, true),
Expand All @@ -258,23 +258,51 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
// should we be adding extra space at the beginning of the next sentence?
let slen = compute_slen(args.uniform, w.new_line, is_sentence_start, false);

let mut ld_new = i64::MAX;
let mut ld_next = i64::MAX;
let mut best_active_demerits = i64::MAX;
let mut ld_idx = 0;
new_linebreaks.clear();
let mut best_break_before: Option<LineBreak<'_>> = None;
let mut best_break_after: Option<LineBreak<'_>> = None;
next_active_breaks.clear();
// go through each active break, extending it and possibly adding a new active
// break if we are above the minimum required length
#[allow(clippy::explicit_iter_loop)]
for &i in active_breaks.iter() {
let active = &mut linebreaks[i];
// normalize demerits to avoid overflow, and record if this is the least
active.demerits -= least_demerits;
if active.demerits < ld_next {
ld_next = active.demerits;
if active.demerits < best_active_demerits {
best_active_demerits = active.demerits;
ld_idx = i;
}

// Also consider a break before this word, so the previous line can end at the
// prior word when that yields a better global layout.
if !active.fresh && active.length >= minlength {
let (mut new_demerits, new_ratio) = compute_demerits(
args.opts.goal as isize - active.length as isize,
stretch,
w.word_nchars,
active.prev_rat,
);
if is_sentence_end {
new_demerits = new_demerits.saturating_add(ORPHAN_BREAK_PENALTY);
}
let total_demerits = active.demerits.saturating_add(new_demerits);
if best_break_before
.as_ref()
.is_none_or(|best| total_demerits < best.demerits)
{
best_break_before = Some(LineBreak {
prev: i,
linebreak: Some(w),
break_before: true,
demerits: total_demerits,
prev_rat: new_ratio,
length: args.indent_len + w.word_nchars,
fresh: false,
});
}
}

// get the new length
let tlen = w.word_nchars
+ args.compute_width(w, active.length, active.fresh)
Expand All @@ -292,7 +320,7 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(

// if we're above the minlength, we can also consider breaking here
if tlen >= minlength {
let (new_demerits, new_ratio) = if is_last_word {
let (mut new_demerits, new_ratio) = if is_last_word {
// there is no penalty for the final line's length
(0, 0.0)
} else {
Expand All @@ -304,15 +332,16 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
)
};

// do not even consider adding a line that has too many demerits
// also, try to detect overflow by checking signum
let total_demerits = new_demerits + active.demerits;
if new_demerits < BAD_INFTY_SQ
&& total_demerits < ld_new
&& active.demerits.signum() <= new_demerits.signum()
if !is_last_word && next_word_sentence_final {
new_demerits = new_demerits.saturating_add(ORPHAN_BREAK_PENALTY);
}

let total_demerits = active.demerits.saturating_add(new_demerits);
if best_break_after
.as_ref()
.is_none_or(|best| total_demerits < best.demerits)
{
ld_new = total_demerits;
new_linebreaks.push(LineBreak {
best_break_after = Some(LineBreak {
prev: i,
linebreak: Some(w),
break_before: false,
Expand All @@ -326,15 +355,16 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
}
}

// if we generated any new linebreaks, add the last one to the list
// the last one is always the best because we don't add to new_linebreaks unless
// it's better than the best one so far
match new_linebreaks.pop() {
None => (),
Some(lb) => {
next_active_breaks.push(linebreaks.len());
linebreaks.push(lb);
}
if let Some(lb) = best_break_before {
new_linebreaks.push(lb);
}
if let Some(lb) = best_break_after {
new_linebreaks.push(lb);
}

for lb in new_linebreaks.drain(..) {
next_active_breaks.push(linebreaks.len());
linebreaks.push(lb);
}

if next_active_breaks.is_empty() {
Expand All @@ -343,11 +373,6 @@ fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
restart_active_breaks(args, &linebreaks[ld_idx], ld_idx, w, slen, minlength);
next_active_breaks.push(linebreaks.len());
linebreaks.push(new_break);
least_demerits = 0;
} else {
// next time around, normalize out the demerits fields
// on active linebreaks to make overflow less likely
least_demerits = cmp::max(ld_next, 0);
}
// swap in new list of active breaks
mem::swap(&mut active_breaks, &mut next_active_breaks);
Expand Down Expand Up @@ -382,15 +407,28 @@ fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a Wo
.unwrap_or_default()
}

// "infinite" badness is more like (1+BAD_INFTY)^2 because of how demerits are computed
const BAD_INFTY: i64 = 10_000_000;
const BAD_INFTY_SQ: i64 = BAD_INFTY * BAD_INFTY;
// badness = BAD_MULT * abs(r) ^ 3
const BAD_MULT: f32 = 200.0;
// DR_MULT is multiplier for delta-R between lines
const DR_MULT: f32 = 600.0;
// DL_MULT is penalty multiplier for short words at end of line
const DL_MULT: f32 = 10.0;
// Penalize breaks that leave the first word on the next line as the sentence-final word.
const ORPHAN_BREAK_PENALTY: i64 = 250_000_000;

fn is_word_sentence_final(current: &WordInfo, next: Option<&WordInfo>) -> bool {
match next {
None => true,
Some(next_word) => next_word.sentence_start || (next_word.new_line && current.ends_punct),
}
}

fn is_next_word_sentence_final<'a, T: Iterator<Item = &'a WordInfo<'a>>>(mut iter: T) -> bool {
let Some(next_word) = iter.next() else {
return false;
};
is_word_sentence_final(next_word, iter.next())
}

fn compute_demerits(delta_len: isize, stretch: usize, wlen: usize, prev_rat: f32) -> (i64, f32) {
// how much stretch are we using?
Expand All @@ -401,11 +439,7 @@ fn compute_demerits(delta_len: isize, stretch: usize, wlen: usize, prev_rat: f32
};

// compute badness given the stretch ratio
let bad_linelen = if ratio.abs() > 1.0f32 {
BAD_INFTY
} else {
(BAD_MULT * ratio.powi(3).abs()) as i64
};
let bad_linelen = (BAD_MULT * ratio.powi(3).abs()) as i64;

// we penalize lines ending in really short words
let bad_wordlen = if wlen >= stretch {
Expand All @@ -418,9 +452,17 @@ fn compute_demerits(delta_len: isize, stretch: usize, wlen: usize, prev_rat: f32
};

// we penalize lines that have very different ratios from previous lines
let bad_delta_r = (DR_MULT * ((ratio - prev_rat) / 2.0).powi(3).abs()) as i64;
let bad_delta_r = if prev_rat.is_nan() {
0
} else {
(DR_MULT * ((ratio - prev_rat) / 2.0).powi(3).abs()) as i64
};

let demerits = i64::pow(1 + bad_linelen + bad_wordlen + bad_delta_r, 2);
let demerits_base = 1_i64
.saturating_add(bad_linelen)
.saturating_add(bad_wordlen)
.saturating_add(bad_delta_r);
let demerits = demerits_base.saturating_mul(demerits_base);

(demerits, ratio)
}
Expand Down
15 changes: 15 additions & 0 deletions tests/by-util/test_fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@ fn test_fmt_width() {
.stdout_is("this is a\nfile with\none word\nper line\n");
}

#[test]
fn test_fmt_width_max_display_width() {
let input = "aa bb cc dd ee";
new_ucmd!()
.args(&["-w", "8"])
.pipe_in(input)
.succeeds()
.stdout_is("aa bb cc\ndd ee\n");
new_ucmd!()
.args(&["-w", "7"])
.pipe_in(input)
.succeeds()
.stdout_is("aa\nbb cc\ndd ee\n");
}

#[test]
fn test_fmt_width_invalid() {
new_ucmd!()
Expand Down
Loading