Skip to content

Commit

Permalink
source_range persist special character escape by peeking source
Browse files Browse the repository at this point in the history
new `State::last_event_end_index` to track event end
source_mapping → source_range
  • Loading branch information
SichangHe committed May 25, 2024
1 parent a234ed5 commit 92ad2c8
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 24 deletions.
9 changes: 6 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ use std::{
borrow::{Borrow, Cow},
collections::HashSet,
fmt::{self, Write},
ops::Range,
};

use pulldown_cmark::{Alignment as TableAlignment, Event, HeadingLevel, LinkType, MetadataBlockKind, Tag, TagEnd};

mod source_mapping;
mod source_range;
mod text_modifications;

pub use source_mapping::*;
pub use source_range::*;
use text_modifications::*;

/// Similar to [Pulldown-Cmark-Alignment][Alignment], but with required
Expand All @@ -38,7 +39,7 @@ impl<'a> From<&'a TableAlignment> for Alignment {
/// The state of the [`cmark_resume()`] and [`cmark_resume_with_options()`] functions.
/// This does not only allow introspection, but enables the user
/// to halt the serialization at any time, and resume it later.
#[derive(Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Clone, Default, Debug, PartialEq, Eq, Hash)]
pub struct State<'a> {
/// The amount of newlines to insert after `Event::Start(...)`
pub newlines_before_start: usize,
Expand Down Expand Up @@ -68,6 +69,8 @@ pub struct State<'a> {
pub current_shortcut_text: Option<String>,
/// A list of shortcuts seen so far for later emission
pub shortcuts: Vec<(String, String, String)>,
/// Index of the end of the last event.
pub last_event_end_index: usize,
}

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
Expand Down
34 changes: 26 additions & 8 deletions src/source_mapping.rs → src/source_range.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::ops::Range;

use super::*;

/// Serialize a stream of [pulldown-cmark-Events][Event] each with source string into a string-backed buffer.
Expand All @@ -21,19 +19,39 @@ use super::*;
/// *Errors* are only happening if the underlying buffer fails, which is unlikely.
pub fn cmark_resume_with_source_range_and_options<'a, I, E, F>(
event_and_ranges: I,
_source: &'a str,
source: &'a str,
mut formatter: F,
state: Option<State<'a>>,
options: Options<'_>,
) -> Result<State<'a>, fmt::Error>
where
I: Iterator<Item = (E, Range<usize>)>,
I: Iterator<Item = (E, Option<Range<usize>>)>,
E: Borrow<Event<'a>>,
F: fmt::Write,
{
let mut state = state.unwrap_or_default();
for (event, _range) in event_and_ranges {
for (event, range) in event_and_ranges {
let update_event_end_index = !matches!(*event.borrow(), Event::Start(_));
let prevent_escape_leading_special_characters = match (&range, event.borrow()) {
(Some(range), Event::Text(_)) => {
range.start <= state.last_event_end_index ||
// Some source characters are not captured,
// so check the previous character.
source.as_bytes().get(range.start.saturating_sub(1)) != Some(&b'\\')
}
_ => false,
};
let was_in_code_block = state.is_in_code_block;
if prevent_escape_leading_special_characters {
// Hack to not escape leading special characters.
state.is_in_code_block = true;
}
cmark_resume_one_event(event, &mut formatter, &mut state, &options)?;
state.is_in_code_block = was_in_code_block;

if let (true, Some(range)) = (update_event_end_index, range) {
state.last_event_end_index = range.end
}
}
Ok(state)
}
Expand All @@ -46,7 +64,7 @@ pub fn cmark_resume_with_source_range<'a, I, E, F>(
state: Option<State<'a>>,
) -> Result<State<'a>, fmt::Error>
where
I: Iterator<Item = (E, Range<usize>)>,
I: Iterator<Item = (E, Option<Range<usize>>)>,
E: Borrow<Event<'a>>,
F: fmt::Write,
{
Expand All @@ -61,7 +79,7 @@ pub fn cmark_with_source_range_and_options<'a, I, E, F>(
options: Options<'_>,
) -> Result<State<'a>, fmt::Error>
where
I: Iterator<Item = (E, Range<usize>)>,
I: Iterator<Item = (E, Option<Range<usize>>)>,
E: Borrow<Event<'a>>,
F: fmt::Write,
{
Expand All @@ -82,7 +100,7 @@ pub fn cmark_with_source_range<'a, I, E, F>(
mut formatter: F,
) -> Result<State<'a>, fmt::Error>
where
I: Iterator<Item = (E, Range<usize>)>,
I: Iterator<Item = (E, Option<Range<usize>>)>,
E: Borrow<Event<'a>>,
F: fmt::Write,
{
Expand Down
42 changes: 29 additions & 13 deletions tests/source_mapping_fmt.rs → tests/source_range_fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,33 @@ use pulldown_cmark_to_cmark::{

fn fmts(s: &str) -> (String, State<'_>) {
let mut buf = String::new();
let s = cmark_with_source_range(Parser::new_ext(s, Options::all()).into_offset_iter(), s, &mut buf).unwrap();
let mut s = cmark_with_source_range(
Parser::new_ext(s, Options::all())
.into_offset_iter()
.map(|(e, r)| (e, Some(r))),
s,
&mut buf,
)
.unwrap();
// Not testing this field.
s.last_event_end_index = Default::default();
(buf, s)
}

fn fmts_with_options<'a>(s: &'a str, options: CmarkToCmarkOptions<'a>) -> (String, State<'a>) {
let mut buf = String::new();
let s = cmark_resume_with_source_range_and_options(
Parser::new_ext(s, Options::all()).into_offset_iter(),
let mut s = cmark_resume_with_source_range_and_options(
Parser::new_ext(s, Options::all())
.into_offset_iter()
.map(|(e, r)| (e, Some(r))),
s,
&mut buf,
None,
options,
)
.unwrap();
// Not testing this field.
s.last_event_end_index = Default::default();
(buf, s)
}

Expand All @@ -32,7 +45,14 @@ fn assert_events_eq(s: &str) {
let _before_events = Parser::new_ext(s, Options::all());

let mut buf = String::new();
cmark_with_source_range(Parser::new_ext(s, Options::all()).into_offset_iter(), s, &mut buf).unwrap();
cmark_with_source_range(
Parser::new_ext(s, Options::all())
.into_offset_iter()
.map(|(e, r)| (e, Some(r))),
s,
&mut buf,
)
.unwrap();

let before_events = Parser::new_ext(s, Options::all());
let after_events = Parser::new_ext(&buf, Options::all());
Expand Down Expand Up @@ -276,9 +296,8 @@ mod inline_elements {
}

#[test]
#[ignore]
fn rustdoc_link() {
// Brackets are not escaped if necessary.
// Brackets are not escaped if not escaped in the source.
assert_eq!(
fmts("[`Vec`]"),
(
Expand Down Expand Up @@ -611,11 +630,8 @@ mod escapes {
}

#[test]
fn it_does_not_recreate_escapes_for_underscores_in_the_middle_of_a_word() {
assert_eq!(
fmts("\\_hello_world_").0,
"\\_hello_world\\_" // it actually makes mal-formatted markdown better
);
fn it_preserves_underscores_escapes() {
assert_eq!(fmts("\\_hello_world_").0, "\\_hello_world_");
}

#[test]
Expand Down Expand Up @@ -697,10 +713,10 @@ mod escapes {
}

#[test]
fn it_does_esscape_lone_square_brackets_in_text() {
fn it_does_not_escape_lone_square_brackets_in_text_if_the_source_does_not() {
assert_eq!(
fmts("] a closing bracket does nothing").0,
"\\] a closing bracket does nothing"
"] a closing bracket does nothing"
)
}

Expand Down

0 comments on commit 92ad2c8

Please sign in to comment.