Skip to content

Commit

Permalink
feat: support for doctype retrieval (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
EstebanBorai authored Sep 2, 2024
1 parent a885c17 commit 6dfaa1f
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ impl<R: Read> EventReader<R> {
pub fn into_inner(self) -> R {
self.source
}

/// Retrieves the DOCTYPE from the document if any
pub fn doctype(&self) -> Option<&String> {
self.parser.doctype()
}
}

impl<B: Read> Position for EventReader<B> {
Expand Down
8 changes: 8 additions & 0 deletions src/reader/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl PullParser {

data: MarkupData {
name: String::new(),
doctype: None,
version: None,
encoding: None,
standalone: None,
Expand All @@ -146,6 +147,12 @@ impl PullParser {
/// Checks if this parser ignores the end of stream errors.
pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream }

/// Retrieves the Doctype from the document if any
#[inline]
pub fn doctype(&self) -> Option<&String> {
self.data.doctype.as_ref()
}

#[inline(never)]
fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> {
if new_encounter <= self.encountered {
Expand Down Expand Up @@ -299,6 +306,7 @@ struct MarkupData {
name: String, // used for processing instruction name
ref_data: String, // used for reference content

doctype: Option<String>, // keeps a copy of the original doctype
version: Option<XmlVersion>, // used for XML declaration version
encoding: Option<String>, // used for XML declaration encoding
standalone: Option<bool>, // used for XML declaration standalone parameter
Expand Down
6 changes: 6 additions & 0 deletions src/reader/parser/inside_doctype.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::fmt::Write;

use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::error::SyntaxError;
use crate::reader::lexer::Token;
Expand All @@ -6,6 +8,10 @@ use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State};

impl PullParser {
pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> {
if let Some(ref mut doctype) = self.data.doctype {
write!(doctype, "{t}").ok()?;
}

match substate {
DoctypeSubstate::Outside => match t {
Token::TagEnd => self.into_state_continue(State::OutsideTag),
Expand Down
20 changes: 20 additions & 0 deletions tests/event_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,26 @@ fn push_pos_issue() {
parser.into_iter().for_each(|e| { e.unwrap(); });
}

#[test]
fn retrieve_doctype() {
let source = r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg xmlns="http://www.w3.org/2000/svg" version="1.1"
width="120" height="120">
<rect x="14" y="23" width="200" height="50" fill="lime"
stroke="black" />
</svg>"#;

let parser = ParserConfig::new()
.cdata_to_characters(true)
.ignore_comments(true)
.coalesce_characters(false)
.create_reader(std::io::Cursor::new(source));

assert_eq!(parser.doctype(), Some(&String::from(r#"<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">"#)));
}

// clones a lot but that's fine
fn trim_until_bar(s: String) -> String {
match s.trim() {
Expand Down

0 comments on commit 6dfaa1f

Please sign in to comment.