diff --git a/fuzz/fuzz_targets/all_options.rs b/fuzz/fuzz_targets/all_options.rs index 7c677727..464cb048 100644 --- a/fuzz/fuzz_targets/all_options.rs +++ b/fuzz/fuzz_targets/all_options.rs @@ -50,6 +50,8 @@ fuzz_target!(|s: &str| { relaxed_tasklist_matching: true, relaxed_autolinks: true, broken_link_callback: Some(Arc::new(cb)), + ignore_setext: true, + tasklist_in_table: true, }; let render = RenderOptions { @@ -62,7 +64,6 @@ fuzz_target!(|s: &str| { list_style: ListStyleType::Star, sourcepos: true, escaped_char_spans: true, - ignore_setext: true, ignore_empty_links: true, gfm_quirks: true, prefer_fenced: true, diff --git a/src/cm.rs b/src/cm.rs index 66084610..1af2972b 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -1,15 +1,14 @@ use crate::ctype::{isalpha, isdigit, ispunct, isspace}; use crate::nodes::{ AstNode, ListDelimType, ListType, NodeAlert, NodeCodeBlock, NodeHeading, NodeHtmlBlock, - NodeLink, NodeMath, NodeTable, NodeValue, NodeWikiLink, + NodeLink, NodeList, NodeMath, NodeTable, NodeValue, NodeWikiLink, TableAlignment, }; -use crate::nodes::{NodeList, TableAlignment}; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; use crate::parser::{Options, WikiLinksMode}; use crate::scanners; use crate::strings::trim_start_match; -use crate::{nodes, Plugins}; +use crate::{node_matches, nodes, Plugins}; pub use typed_arena::Arena; use std::cmp::max; @@ -725,7 +724,13 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { } fn format_task_item(&mut self, symbol: Option, node: &'a AstNode<'a>, entering: bool) { - self.format_item(node, entering); + if node + .parent() + .map(|p| node_matches!(p, NodeValue::List(_))) + .unwrap_or_default() + { + self.format_item(node, entering); + } if entering { write!(self, "[{}] ", symbol.unwrap_or(' ')).unwrap(); } diff --git a/src/html.rs b/src/html.rs index 001435c5..52006afd 100644 --- a/src/html.rs +++ b/src/html.rs @@ -15,7 +15,7 @@ use crate::nodes::{ TableAlignment, }; use crate::parser::{Options, Plugins}; -use crate::scanners; +use crate::{node_matches, scanners}; use std::collections::HashMap; use std::fmt::{self, Write}; use std::str; @@ -1208,15 +1208,25 @@ fn render_task_item<'a, T>( unreachable!() }; + let write_li = node + .parent() + .map(|p| node_matches!(p, NodeValue::List(_))) + .unwrap_or_default(); + if entering { context.cr()?; - context.write_str("")?; } - render_sourcepos(context, node)?; - context.write_str(">")?; context.write_str("( context.write_str(" checked=\"\"")?; } context.write_str(" disabled=\"\" /> ")?; - } else { + } else if write_li { context.write_str("\n")?; } @@ -1404,7 +1414,7 @@ pub fn render_math<'a, T>( pub fn render_math_code_block<'a, T>( context: &mut Context, node: &'a AstNode<'a>, - literal: &String, + literal: &str, ) -> Result { context.cr()?; diff --git a/src/nodes.rs b/src/nodes.rs index 9aa8857c..62c5c7af 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -630,7 +630,10 @@ impl LineColumn { } impl Ast { - /// Create a new AST node with the given value. + /// Create a new AST node with the given value and starting sourcepos. The + /// end column is set to zero; it is expected this will be set manually + /// or later in the parse. Use [`new_with_sourcepos`] if you have full + /// sourcepos. pub fn new(value: NodeValue, start: LineColumn) -> Self { Ast { value, @@ -643,6 +646,20 @@ impl Ast { line_offsets: Vec::with_capacity(0), } } + + /// Create a new AST node with the given value and sourcepos. + pub fn new_with_sourcepos(value: NodeValue, sourcepos: Sourcepos) -> Self { + Ast { + value, + content: String::new(), + sourcepos, + internal_offset: 0, + open: true, + last_line_blank: false, + table_visited: false, + line_offsets: Vec::with_capacity(0), + } + } } /// The type of a node within the document. @@ -820,6 +837,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::SpoileredText | NodeValue::Underline | NodeValue::Subscript + | NodeValue::TaskItem(_) ), #[cfg(feature = "shortcodes")] @@ -841,6 +859,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::Underline | NodeValue::Subscript | NodeValue::ShortCode(..) + | NodeValue::TaskItem(_) ), NodeValue::MultilineBlockQuote(_) => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bbf38500..6d224ef1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -47,6 +47,11 @@ const CODE_INDENT: usize = 4; // be nested this deeply. const MAX_LIST_DEPTH: usize = 100; +/// Shorthand for checking if a node's value matches the given expression. +/// +/// Note this will `borrow()` the provided node's data attribute while doing the +/// check, which will fail if the node is already mutably borrowed. +#[macro_export] macro_rules! node_matches { ($node:expr, $( $pat:pat )|+) => {{ matches!( @@ -731,6 +736,25 @@ pub struct ParseOptions<'c> { #[cfg_attr(feature = "bon", builder(default))] pub relaxed_tasklist_matching: bool, + /// Whether tasklist items can be parsed in table cells. At present, the + /// tasklist item must be the only content in the cell. Both tables and + /// tasklists much be enabled for this to work. + /// + /// ``` + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// options.extension.table = true; + /// options.extension.tasklist = true; + /// assert_eq!(markdown_to_html("| val |\n| - |\n| [ ] |\n", &options), + /// "\n\n\n\n\n\n\n\n\n\n\n
val
[ ]
\n"); + /// + /// options.parse.tasklist_in_table = true; + /// assert_eq!(markdown_to_html("| val |\n| - |\n| [ ] |\n", &options), + /// "\n\n\n\n\n\n\n\n\n\n\n
val
\n
\n"); + /// ``` + #[cfg_attr(feature = "bon", builder(default))] + pub tasklist_in_table: bool, + /// Relax parsing of autolinks, allow links to be detected inside brackets /// and allow all url schemes. It is intended to allow a very specific type of autolink /// detection, such as `[this http://and.com that]` or `{http://foo.com}`, on a best can basis. @@ -3053,6 +3077,13 @@ where } } + // Processes tasklist items in a text node. This function + // must not detach `node`, as we iterate through siblings in + // `postprocess_text_nodes_with_context` and may end up relying on it + // remaining in place. + // + // `text` is the mutably borrowed textual content of `node`. If it is empty + // after the call to `process_tasklist`, it will be properly cleaned up. fn process_tasklist( &mut self, node: &'a AstNode<'a>, @@ -3072,49 +3103,73 @@ where } let parent = node.parent().unwrap(); - if node.previous_sibling().is_some() || parent.previous_sibling().is_some() { - return; - } - if !node_matches!(parent, NodeValue::Paragraph) { - return; - } + if node_matches!(parent, NodeValue::TableCell) { + if !self.options.parse.tasklist_in_table { + return; + } - let grandparent = parent.parent().unwrap(); - if !node_matches!(grandparent, NodeValue::Item(..)) { - return; - } + if node.previous_sibling().is_some() || node.next_sibling().is_some() { + return; + } - let great_grandparent = grandparent.parent().unwrap(); - if !node_matches!(great_grandparent, NodeValue::List(..)) { - return; - } + // For now, require the task item is the only content of the table cell. + // If we want to relax this later, we can. + if end != text.len() { + return; + } - // These are sound only because the exact text that we've matched and - // the count thereof (i.e. "end") will precisely map to characters in - // the source document. - text.drain(..end); + text.drain(..end); + parent.prepend( + self.arena.alloc( + Ast::new_with_sourcepos( + NodeValue::TaskItem(if symbol == ' ' { None } else { Some(symbol) }), + *sourcepos, + ) + .into(), + ), + ); + } else if node_matches!(parent, NodeValue::Paragraph) { + if node.previous_sibling().is_some() || parent.previous_sibling().is_some() { + return; + } - let adjust = spx.consume(end) + 1; - assert_eq!( - sourcepos.start.column, - parent.data.borrow().sourcepos.start.column - ); + let grandparent = parent.parent().unwrap(); + if !node_matches!(grandparent, NodeValue::Item(..)) { + return; + } - // See tests::fuzz::echaw9. The paragraph doesn't exist in the source, - // so we remove it. - if sourcepos.end.column < adjust && node.next_sibling().is_none() { - parent.detach(); - } else { - sourcepos.start.column = adjust; - parent.data.borrow_mut().sourcepos.start.column = adjust; - } + let great_grandparent = grandparent.parent().unwrap(); + if !node_matches!(great_grandparent, NodeValue::List(..)) { + return; + } - grandparent.data.borrow_mut().value = - NodeValue::TaskItem(if symbol == ' ' { None } else { Some(symbol) }); + // These are sound only because the exact text that we've matched and + // the count thereof (i.e. "end") will precisely map to characters in + // the source document. + text.drain(..end); - if let NodeValue::List(ref mut list) = &mut great_grandparent.data.borrow_mut().value { - list.is_task_list = true; + let adjust = spx.consume(end) + 1; + assert_eq!( + sourcepos.start.column, + parent.data.borrow().sourcepos.start.column + ); + + // See tests::fuzz::echaw9. The paragraph doesn't exist in the source, + // so we remove it. + if sourcepos.end.column < adjust && node.next_sibling().is_none() { + parent.detach(); + } else { + sourcepos.start.column = adjust; + parent.data.borrow_mut().sourcepos.start.column = adjust; + } + + grandparent.data.borrow_mut().value = + NodeValue::TaskItem(if symbol == ' ' { None } else { Some(symbol) }); + + if let NodeValue::List(ref mut list) = &mut great_grandparent.data.borrow_mut().value { + list.is_task_list = true; + } } } diff --git a/src/tests/tasklist.rs b/src/tests/tasklist.rs index b8230abc..62af79a7 100644 --- a/src/tests/tasklist.rs +++ b/src/tests/tasklist.rs @@ -199,6 +199,117 @@ fn tasklist_32_with_classes() { ); } +#[test] +fn tasklist_in_table() { + html_opts!( + [ + extension.tasklist, + extension.table, + parse.tasklist_in_table, + render.sourcepos + ], + concat!( + "| | name |\n", + "| --- | ---- |\n", + "| [ ] | Rell |\n", + "| [x] | Kai |\n" + ), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
name
\n", + " Rell
\n", + " Kai
\n", + ), + ); + + html_opts!( + [ + extension.tasklist, + extension.table, + parse.tasklist_in_table, + parse.relaxed_tasklist_matching, + render.sourcepos + ], + concat!( + "| | name |\n", + "| ------- | ---- |\n", + "| [~] | Rell |\n", + "| [x] | Kai |\n", + "| [x] No. | Ez |\n" + ), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
name
\n", + " Rell
\n", + " Kai
[x] No.Ez
\n" + ), + ); +} + +#[test] +fn tasklist_in_table_fuzz() { + html_opts!( + [ + extension.tasklist, + extension.table, + extension.autolink, + parse.tasklist_in_table, + parse.ignore_setext + ], + "o\n-\t\r[ ] W@W.I[ ] ", + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
o
[ ] W@W.I[ ]
\n", + ), + ); +} + #[test] fn sourcepos() { assert_ast_match!( diff --git a/src/xml.rs b/src/xml.rs index f3be5161..b7a4d99e 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -300,7 +300,7 @@ impl<'o, 'c> XmlFormatter<'o, 'c> { let title = alert.title.as_ref().unwrap(); self.output.write_str(" title=\"")?; - self.escape(&title)?; + self.escape(title)?; self.output.write_str("\"")?; }