Skip to content
This repository has been archived by the owner on May 30, 2022. It is now read-only.

Commit

Permalink
Factor out checking of the end of <! syntax constructs into a funct…
Browse files Browse the repository at this point in the history
…ion and fix part of #344

As a result the same optimization is applied for borrowed parsed in DOCTYPE
parsing as in non-borrowing one

This fixes the following tests (which is the reason of #344):
- reader::test::buffered::read_bang_element::comment::not_closed3
- reader::test::buffered::read_bang_element::comment::with_content
  • Loading branch information
Mingun committed Mar 8, 2022
1 parent a885278 commit 58c7661
Showing 1 changed file with 25 additions and 25 deletions.
50 changes: 25 additions & 25 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1078,18 +1078,7 @@ impl<'b, 'i, R: BufRead + 'i> BufferedInput<'b, 'i, &'b mut Vec<u8>> for R {
self.consume(used);
read += used;

let finished = match bang_type {
BangType::Comment => read >= 5 && buf.ends_with(b"--"),
BangType::CData => buf.ends_with(b"]]"),
BangType::DocType => {
memchr::memchr2_iter(b'<', b'>', buf)
.map(|p| if buf[p] == b'<' { 1i32 } else { -1 })
.sum::<i32>()
== 0
}
};

if finished {
if bang_type.check_finished(buf, read) {
break;
} else {
// '>' was omitted in the extend_from_slice above
Expand Down Expand Up @@ -1291,20 +1280,9 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] {
let bang_type = BangType::new(self[1..].first().copied())?;

for i in memchr::memchr_iter(b'>', self) {
let finished = match bang_type {
BangType::Comment => i >= 5 && self[..i].ends_with(b"--"),
BangType::CData => self[..i].ends_with(b"]]"),
BangType::DocType => {
// Inefficient, but unlikely to happen often
let open = self[..i].iter().filter(|b| **b == b'<').count();
let closed = self[..i].iter().filter(|b| **b == b'>').count();
open == closed
}
};

if finished {
let bytes = &self[..i];
if bang_type.check_finished(bytes, i + 1) {
*position += i + 1;
let bytes = &self[..i];
// Skip the '>' too.
*self = &self[i + 1..];
return Ok(Some(bytes));
Expand Down Expand Up @@ -1416,6 +1394,28 @@ impl BangType {
None => return Err(Error::UnexpectedEof("Bang".to_string())),
})
}

/// Checks that element is finished
///
/// # Parameters
/// - `buf`: data from the `!` symbol
/// - `index`: position of the `>` symbol
#[inline(always)]
fn check_finished(&self, buf: &[u8], index: usize) -> bool {
match self {
// Need to read at least 6 symbols (`!---->`) for properly finished comment
// <!----> - XML comment
// 012345 - index
Self::Comment => index > 5 && buf.ends_with(b"--"),
Self::CData => buf.ends_with(b"]]"),
Self::DocType => {
memchr::memchr2_iter(b'<', b'>', buf)
.map(|p| if buf[p] == b'<' { 1i32 } else { -1 })
.sum::<i32>()
== 0
}
}
}
}

/// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab)
Expand Down

0 comments on commit 58c7661

Please sign in to comment.