Skip to content

Commit

Permalink
Handling of numbered markdown lists.
Browse files Browse the repository at this point in the history
Fixes issue rust-lang#5416
  • Loading branch information
Lukasz Anforowicz committed Feb 2, 2023
1 parent 5391847 commit ca2c68e
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 38 deletions.
157 changes: 123 additions & 34 deletions src/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,9 @@ impl CodeBlockAttribute {

/// Block that is formatted as an item.
///
/// An item starts with either a star `*` a dash `-` a greater-than `>` or a plus '+'.
/// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+',
/// or a number `12.` or `34)` (with at most 2 digits).
///
/// Different level of indentation are handled by shrinking the shape accordingly.
struct ItemizedBlock {
/// the lines that are identified as part of an itemized block
Expand All @@ -446,37 +448,49 @@ struct ItemizedBlock {
}

impl ItemizedBlock {
/// Returns `true` if the line is formatted as an item
fn is_itemized_line(line: &str) -> bool {
let trimmed = line.trim_start();
/// Returns the sigil's (e.g. "- ", "* ", or "1. ") length (in bytes - same as `str::len`) or
/// None if there is no sigil.
fn get_sigil_length(trimmed: &str) -> Option<usize> {
let itemized_start = ["* ", "- ", "> ", "+ "];
itemized_start.iter().any(|s| trimmed.starts_with(s))
}

/// Creates a new ItemizedBlock described with the given line.
/// The `is_itemized_line` needs to be called first.
fn new(line: &str) -> ItemizedBlock {
let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
// +2 = '* ', which will add the appropriate amount of whitespace to keep itemized
// content formatted correctly.
let mut indent = space_to_sigil + 2;
let mut line_start = " ".repeat(indent);

// Markdown blockquote start with a "> "
if line.trim_start().starts_with(">") {
// remove the original +2 indent because there might be multiple nested block quotes
// and it's easier to reason about the final indent by just taking the length
// of th new line_start. We update the indent because it effects the max width
// of each formatted line.
line_start = itemized_block_quote_start(line, line_start, 2);
indent = line_start.len();
if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
return Some(2); // All items in `itemized_start` have length 2.
}
ItemizedBlock {
lines: vec![line[indent..].to_string()],
indent,
opener: line[..indent].to_string(),
line_start,

for suffix in [". ", ") "] {
if let Some((prefix, _)) = trimmed.split_once(suffix) {
if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
return Some(prefix.len() + suffix.len());
}
}
}

None
}

/// Creates a new ItemizedBlock described with the given `line`
/// or None if `line` doesn't start an item.
fn new(line: &str) -> Option<ItemizedBlock> {
ItemizedBlock::get_sigil_length(line.trim_start()).map(|sigil_length| {
let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
let mut indent = space_to_sigil + sigil_length;
let mut line_start = " ".repeat(indent);

// Markdown blockquote start with a "> "
if line.trim_start().starts_with(">") {
// remove the original +2 indent because there might be multiple nested block quotes
// and it's easier to reason about the final indent by just taking the length
// of the new line_start. We update the indent because it effects the max width
// of each formatted line.
line_start = itemized_block_quote_start(line, line_start, 2);
indent = line_start.len();
}
ItemizedBlock {
lines: vec![line[indent..].to_string()],
indent,
opener: line[..indent].to_string(),
line_start,
}
})
}

/// Returns a `StringFormat` used for formatting the content of an item.
Expand All @@ -495,7 +509,7 @@ impl ItemizedBlock {
/// Returns `true` if the line is part of the current itemized block.
/// If it is, then it is added to the internal lines list.
fn add_line(&mut self, line: &str) -> bool {
if !ItemizedBlock::is_itemized_line(line)
if ItemizedBlock::get_sigil_length(line.trim_start()).is_none()
&& self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
{
self.lines.push(line.to_string());
Expand Down Expand Up @@ -766,10 +780,11 @@ impl<'a> CommentRewrite<'a> {
self.item_block = None;
if let Some(stripped) = line.strip_prefix("```") {
self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
} else if self.fmt.config.wrap_comments() && ItemizedBlock::is_itemized_line(line) {
let ib = ItemizedBlock::new(line);
self.item_block = Some(ib);
return false;
} else if self.fmt.config.wrap_comments() {
if let Some(ib) = ItemizedBlock::new(line) {
self.item_block = Some(ib);
return false;
}
}

if self.result == self.opener {
Expand Down Expand Up @@ -2020,4 +2035,78 @@ fn main() {
"#;
assert_eq!(s, filter_normal_code(s_with_comment));
}

#[test]
fn test_itemized_block_first_line_handling() {
fn run_test(
test_input: &str,
expected_line: &str,
expected_indent: usize,
expected_opener: &str,
expected_line_start: &str,
) {
let block = ItemizedBlock::new(test_input).unwrap();
assert_eq!(1, block.lines.len(), "test_input: {:?}", test_input);
assert_eq!(
expected_line, &block.lines[0],
"test_input: {:?}",
test_input
);
assert_eq!(
expected_indent, block.indent,
"test_input: {:?}",
test_input
);
assert_eq!(
expected_opener, &block.opener,
"test_input: {:?}",
test_input
);
assert_eq!(
expected_line_start, &block.line_start,
"test_input: {:?}",
test_input
);
}

run_test("- foo", "foo", 2, "- ", " ");
run_test("* foo", "foo", 2, "* ", " ");
run_test("> foo", "foo", 2, "> ", "> ");

run_test("1. foo", "foo", 3, "1. ", " ");
run_test("12. foo", "foo", 4, "12. ", " ");

run_test(" - foo", "foo", 6, " - ", " ");
}

#[test]
fn test_itemized_block_nonobvious_sigils_are_rejected() {
let test_inputs = vec![
// Non-numeric sigils (e.g. `a.` or `iv.`) are not supported, because of a risk of
// misidentifying regular words as sigils. See also the discussion in
// https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
"word. rest of the paragraph.",
"a. maybe this is a list item? maybe not?",
"iv. maybe this is a list item? maybe not?",
// Numbers with 3 or more digits are not recognized as sigils, to avoid
// formatting the following example as a list:
//
// ```
// The Captain died in
// 1868. He was buried in...
// ```
"123. only 2-digit numbers are recognized as sigils.",
// Parens.
"123) giving some coverage to parens as well.",
"a) giving some coverage to parens as well.",
];
for line in test_inputs.iter() {
let maybe_block = ItemizedBlock::new(line);
assert!(
maybe_block.is_none(),
"The following line shouldn't be classified as a list item: {}",
line
);
}
}
}
36 changes: 35 additions & 1 deletion tests/source/itemized-blocks/no_wrap.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// rustfmt-normalize_comments: true
// rustfmt-format_code_in_doc_comments: true

//! This is a list:
//! This is an itemized markdown list (see also issue #3224):
//! * Outer
//! * Outer
//! * Inner
Expand All @@ -13,6 +13,40 @@
//! - when the log level is info, the level name is green and the rest of the line is white
//! - when the log level is debug, the whole line is white
//! - when the log level is trace, the whole line is gray ("bright black")
//!
//! This is a numbered markdown list (see also issue #5416):
//! 1. Long long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long long line
//! 3. Nested list
//! 1. Long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long line
//! 4. Last item
//!
//! Using the ')' instead of '.' character after the number:
//! 1) Long long long long long long long long long long long long long long long long long line
//! 2) Another very long long long long long long long long long long long long long long long line
//!
//! Deep list that mixes various bullet and number formats:
//! 1. First level with a long long long long long long long long long long long long long long
//! long long long line
//! 2. First level with another very long long long long long long long long long long long long
//! long long long line
//! * Second level with a long long long long long long long long long long long long long
//! long long long line
//! * Second level with another very long long long long long long long long long long long
//! long long long line
//! 1) Third level with a long long long long long long long long long long long long long
//! long long long line
//! 2) Third level with another very long long long long long long long long long long
//! long long long long line
//! - Forth level with a long long long long long long long long long long long long
//! long long long long line
//! - Forth level with another very long long long long long long long long long long
//! long long long long line
//! 3) One more item at the third level
//! 4) Last item of the third level
//! * Last item of second level
//! 3. Last item of first level

/// All the parameters ***except for `from_theater`*** should be inserted as sent by the remote
/// theater, i.e., as passed to [`Theater::send`] on the remote actor:
Expand Down
36 changes: 35 additions & 1 deletion tests/source/itemized-blocks/wrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// rustfmt-format_code_in_doc_comments: true
// rustfmt-max_width: 50

//! This is a list:
//! This is an itemized markdown list (see also issue #3224):
//! * Outer
//! * Outer
//! * Inner
Expand All @@ -14,6 +14,40 @@
//! - when the log level is info, the level name is green and the rest of the line is white
//! - when the log level is debug, the whole line is white
//! - when the log level is trace, the whole line is gray ("bright black")
//!
//! This is a numbered markdown list (see also issue #5416):
//! 1. Long long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long long line
//! 3. Nested list
//! 1. Long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long line
//! 4. Last item
//!
//! Using the ')' instead of '.' character after the number:
//! 1) Long long long long long long long long long long long long long long long long long line
//! 2) Another very long long long long long long long long long long long long long long long line
//!
//! Deep list that mixes various bullet and number formats:
//! 1. First level with a long long long long long long long long long long long long long long
//! long long long line
//! 2. First level with another very long long long long long long long long long long long long
//! long long long line
//! * Second level with a long long long long long long long long long long long long long
//! long long long line
//! * Second level with another very long long long long long long long long long long long
//! long long long line
//! 1) Third level with a long long long long long long long long long long long long long
//! long long long line
//! 2) Third level with another very long long long long long long long long long long
//! long long long long line
//! - Forth level with a long long long long long long long long long long long long
//! long long long long line
//! - Forth level with another very long long long long long long long long long long
//! long long long long line
//! 3) One more item at the third level
//! 4) Last item of the third level
//! * Last item of second level
//! 3. Last item of first level

// This example shows how to configure fern to output really nicely colored logs
// - when the log level is error, the whole line is red
Expand Down
36 changes: 35 additions & 1 deletion tests/target/itemized-blocks/no_wrap.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// rustfmt-normalize_comments: true
// rustfmt-format_code_in_doc_comments: true

//! This is a list:
//! This is an itemized markdown list (see also issue #3224):
//! * Outer
//! * Outer
//! * Inner
Expand All @@ -13,6 +13,40 @@
//! - when the log level is info, the level name is green and the rest of the line is white
//! - when the log level is debug, the whole line is white
//! - when the log level is trace, the whole line is gray ("bright black")
//!
//! This is a numbered markdown list (see also issue #5416):
//! 1. Long long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long long line
//! 3. Nested list
//! 1. Long long long long long long long long long long long long long long long long line
//! 2. Another very long long long long long long long long long long long long long long line
//! 4. Last item
//!
//! Using the ')' instead of '.' character after the number:
//! 1) Long long long long long long long long long long long long long long long long long line
//! 2) Another very long long long long long long long long long long long long long long long line
//!
//! Deep list that mixes various bullet and number formats:
//! 1. First level with a long long long long long long long long long long long long long long
//! long long long line
//! 2. First level with another very long long long long long long long long long long long long
//! long long long line
//! * Second level with a long long long long long long long long long long long long long
//! long long long line
//! * Second level with another very long long long long long long long long long long long
//! long long long line
//! 1) Third level with a long long long long long long long long long long long long long
//! long long long line
//! 2) Third level with another very long long long long long long long long long long
//! long long long long line
//! - Forth level with a long long long long long long long long long long long long
//! long long long long line
//! - Forth level with another very long long long long long long long long long long
//! long long long long line
//! 3) One more item at the third level
//! 4) Last item of the third level
//! * Last item of second level
//! 3. Last item of first level

/// All the parameters ***except for `from_theater`*** should be inserted as sent by the remote
/// theater, i.e., as passed to [`Theater::send`] on the remote actor:
Expand Down
Loading

0 comments on commit ca2c68e

Please sign in to comment.