diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 836d20b2..c9ef393d 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -17,7 +17,6 @@ trace_tokenizer = [] [dependencies] log = "0.4" -mac = "0.1" markup5ever = { version = "0.17", path = "../markup5ever" } match_token = { workspace = true } diff --git a/html5ever/src/lib.rs b/html5ever/src/lib.rs index e1415f60..e4200d6b 100644 --- a/html5ever/src/lib.rs +++ b/html5ever/src/lib.rs @@ -18,13 +18,12 @@ pub use markup5ever::*; pub use serialize::serialize; -#[macro_use] -mod macros; - mod util { pub(crate) mod str; } +pub(crate) mod macros; + pub mod driver; pub mod serialize; pub mod tokenizer; diff --git a/html5ever/src/macros.rs b/html5ever/src/macros.rs index f38b5cf3..cea6f395 100644 --- a/html5ever/src/macros.rs +++ b/html5ever/src/macros.rs @@ -21,13 +21,14 @@ macro_rules! unwrap_or_return { x }}; } +pub(crate) use unwrap_or_return; macro_rules! time { ($e:expr) => {{ - let now = ::std::time::Instant::now(); + let t0 = ::std::time::Instant::now(); let result = $e; - let d = now.elapsed(); - let dt = d.as_secs() * 1_000_000_000 + u64::from(d.subsec_nanos()); + let dt = t0.elapsed().as_nanos() as u64; (result, dt) }}; } +pub(crate) use time; diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs index c97780cc..20842073 100644 --- a/html5ever/src/tokenizer/char_ref/mod.rs +++ b/html5ever/src/tokenizer/char_ref/mod.rs @@ -13,8 +13,7 @@ use crate::data; use crate::tendril::StrTendril; use log::debug; -use mac::format_if; -use std::borrow::Cow::Borrowed; +use std::borrow::Cow::{self, Borrowed}; use std::char::from_u32; use self::State::*; @@ -257,12 +256,14 @@ impl CharRefTokenizer { }; if error { - let msg = format_if!( - tokenizer.opts.exact_errors, - "Invalid numeric character reference", - "Invalid numeric character reference value 0x{:06X}", - self.num - ); + let msg = if tokenizer.opts.exact_errors { + Cow::from(format!( + "Invalid numeric character reference value 0x{:06X}", + self.num + )) + } else { + Cow::from("Invalid numeric character reference") + }; tokenizer.emit_error(msg); } @@ -299,12 +300,11 @@ impl CharRefTokenizer { } fn emit_name_error(&mut self, tokenizer: &Tokenizer) { - let msg = format_if!( - tokenizer.opts.exact_errors, - "Invalid character reference", - "Invalid character reference &{}", - self.name_buf() - ); + let msg = if tokenizer.opts.exact_errors { + Cow::from(format!("Invalid character reference &{}", self.name_buf())) + } else { + Cow::from("Invalid character reference") + }; tokenizer.emit_error(msg); } diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index edc42fc8..23d737fb 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -24,7 +24,6 @@ use self::char_ref::{CharRef, CharRefTokenizer}; use crate::util::str::lower_ascii_letter; use log::{debug, trace}; -use mac::format_if; use markup5ever::{ns, small_char_set, TokenizerResult}; use std::borrow::Cow::{self, Borrowed}; use std::cell::{Cell, RefCell, RefMut}; @@ -32,6 +31,7 @@ use std::collections::BTreeMap; use std::mem; pub use crate::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; +use crate::macros::{time, unwrap_or_return}; use crate::tendril::StrTendril; use crate::{Attribute, LocalName, QualName, SmallCharSet}; @@ -376,13 +376,13 @@ impl Tokenizer { #[cfg(feature = "trace_tokenizer")] trace!(" error"); - let msg = format_if!( - self.opts.exact_errors, - "Bad character", - "Saw {} in state {:?}", - self.current_char.get(), - self.state.get() - ); + let msg = if self.opts.exact_errors { + Cow::from("Bad character") + } else { + let c = self.current_char.get(); + let state = self.state.get(); + Cow::from(format!("Saw {c} in state {state:?}")) + }; self.emit_error(msg); } @@ -391,12 +391,12 @@ impl Tokenizer { #[cfg(feature = "trace_tokenizer")] trace!(" error_eof"); - let msg = format_if!( - self.opts.exact_errors, - "Unexpected EOF", - "Saw EOF in state {:?}", - self.state.get() - ); + let msg = if self.opts.exact_errors { + Cow::from("Unexpected EOF") + } else { + let state = self.state.get(); + Cow::from(format!("Saw EOF in state {state:?}")) + }; self.emit_error(msg); } diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 3de1e4f7..f0d89b92 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -22,7 +22,7 @@ use crate::tokenizer; use crate::tokenizer::states as tok_state; use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; -use std::borrow::Cow::Borrowed; +use std::borrow::Cow::{self, Borrowed}; use std::cell::{Cell, Ref, RefCell}; use std::collections::VecDeque; use std::iter::{Enumerate, Rev}; @@ -32,7 +32,6 @@ use crate::tokenizer::states::RawKind; use crate::tree_builder::tag_sets::*; use crate::util::str::to_escaped_string; use log::{debug, log_enabled, warn, Level}; -use mac::format_if; use markup5ever::{expanded_name, local_name, namespace_prefix, ns}; #[macro_use] @@ -488,12 +487,11 @@ where if self.mode.get() == InsertionMode::Initial { let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); if err { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Bad DOCTYPE", - "Bad DOCTYPE: {:?}", - dt - )); + self.sink.parse_error(if self.opts.exact_errors { + Cow::from(format!("Bad DOCTYPE: {dt:?}")) + } else { + Cow::from("Bad DOCTYPE") + }); } let Doctype { name, @@ -513,12 +511,11 @@ where self.mode.set(InsertionMode::BeforeHtml); return tokenizer::TokenSinkResult::Continue; } else { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "DOCTYPE in body", - "DOCTYPE in insertion mode {:?}", - self.mode.get() - )); + self.sink.parse_error(if self.opts.exact_errors { + Cow::from(format!("DOCTYPE in insertion mode {:?}", self.mode.get())) + } else { + Cow::from("DOCTYPE in body") + }); return tokenizer::TokenSinkResult::Continue; } }, @@ -618,13 +615,15 @@ where Sink: TreeSink, { fn unexpected(&self, _thing: &T) -> ProcessResult { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected token", - "Unexpected token {} in insertion mode {:?}", - to_escaped_string(_thing), - self.mode.get() - )); + self.sink.parse_error(if self.opts.exact_errors { + Cow::from(format!( + "Unexpected token {} in insertion mode {:?}", + to_escaped_string(_thing), + self.mode.get() + )) + } else { + Cow::from("Unexpected token") + }); ProcessResult::Done } @@ -1053,20 +1052,19 @@ where "thead" "tr" "body" "html"); for elem in self.open_elems.borrow().iter() { - let error; - { + let error = { let elem_name = self.sink.elem_name(elem); let name = elem_name.expanded(); if body_end_ok(name) { continue; } - error = format_if!( - self.opts.exact_errors, - "Unexpected open tag at end of body", - "Unexpected open tag {:?} at end of body", - name - ); - } + + if self.opts.exact_errors { + Cow::from(format!("Unexpected open tag {name:?} at end of body")) + } else { + Cow::from("Unexpected open tag at end of body") + } + }; self.sink.parse_error(error); // FIXME: Do we keep checking after finding one bad tag? // The spec suggests not. @@ -1193,12 +1191,11 @@ where /// Signal an error if it was not the first one. fn expect_to_close(&self, name: LocalName) { if self.pop_until_named(name.clone()) != 1 { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected open element", - "Unexpected open element while closing {:?}", - name - )); + self.sink.parse_error(if self.opts.exact_errors { + Cow::from(format!("Unexpected open element while closing {name:?}")) + } else { + Cow::from("Unexpected open element") + }); } } @@ -1242,12 +1239,14 @@ where self.orig_mode.set(Some(self.mode.get())); ProcessResult::Reprocess(InsertionMode::InTableText, token) } else { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected characters in table", - "Unexpected characters {} in table", - to_escaped_string(&token) - )); + self.sink.parse_error(if self.opts.exact_errors { + Cow::from(format!( + "Unexpected characters {} in table", + to_escaped_string(&token) + )) + } else { + Cow::from("Unexpected characters in table") + }); self.foster_parent_in_body(token) } } @@ -1553,15 +1552,11 @@ where } } - // Can't use unwrap_or_return!() due to rust-lang/rust#16617. - let match_idx = match match_idx { - None => { - // I believe this is impossible, because the root - // element is in special_tag. - self.unexpected(&tag); - return; - }, - Some(x) => x, + let Some(match_idx) = match_idx else { + // I believe this is impossible, because the root + // element is in special_tag. + self.unexpected(&tag); + return; }; self.generate_implied_end_except(tag.name.clone()); diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index 68ccadac..e3326ff0 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -491,13 +491,9 @@ where => { if !self.in_html_elem_named(local_name!("template")) { - // Can't use unwrap_or_return!() due to rust-lang/rust#16617. - let node = match self.form_elem.take() { - None => { - self.sink.parse_error(Borrowed("Null form element pointer on ")); - return ProcessResult::Done; - } - Some(x) => x, + let Some(node) = self.form_elem.take() else { + self.sink.parse_error(Borrowed("Null form element pointer on ")); + return ProcessResult::Done; }; if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) { self.sink.parse_error(Borrowed("Form element not in scope on ")); diff --git a/html5ever/src/util/str.rs b/html5ever/src/util/str.rs index 2c0ec3e2..9d788365 100644 --- a/html5ever/src/util/str.rs +++ b/html5ever/src/util/str.rs @@ -29,14 +29,24 @@ pub(crate) fn lower_ascii_letter(c: char) -> Option { #[allow(non_snake_case)] mod test { use super::lower_ascii_letter; - use mac::test_eq; - - test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a')); - test_eq!(lower_letter_A_is_a, lower_ascii_letter('A'), Some('a')); - test_eq!(lower_letter_symbol_is_None, lower_ascii_letter('!'), None); - test_eq!( - lower_letter_nonascii_is_None, - lower_ascii_letter('\u{a66e}'), - None - ); + + #[test] + fn lower_letter_a_is_a() { + assert_eq!(lower_ascii_letter('a'), Some('a')); + } + + #[test] + fn lower_letter_A_is_a() { + assert_eq!(lower_ascii_letter('A'), Some('a')); + } + + #[test] + fn lower_letter_symbol_is_None() { + assert_eq!(lower_ascii_letter('!'), None); + } + + #[test] + fn lower_letter_nonascii_is_None() { + assert_eq!(lower_ascii_letter('\u{a66e}'), None); + } } diff --git a/xml5ever/Cargo.toml b/xml5ever/Cargo.toml index e7bbfb8f..0194ac37 100644 --- a/xml5ever/Cargo.toml +++ b/xml5ever/Cargo.toml @@ -19,7 +19,6 @@ trace_tokenizer = [] [dependencies] log = "0.4" -mac = "0.1" markup5ever = { version = "0.17", path = "../markup5ever" } [dev-dependencies] diff --git a/xml5ever/src/lib.rs b/xml5ever/src/lib.rs index 9d7d2557..c9e557ff 100644 --- a/xml5ever/src/lib.rs +++ b/xml5ever/src/lib.rs @@ -35,14 +35,7 @@ pub use markup5ever::*; -macro_rules! time { - ($e:expr) => {{ - let t0 = ::std::time::Instant::now(); - let result = $e; - let dt = t0.elapsed().as_nanos() as u64; - (result, dt) - }}; -} +pub(crate) mod macros; /// Driver pub mod driver; diff --git a/xml5ever/src/macros.rs b/xml5ever/src/macros.rs new file mode 100644 index 00000000..cea6f395 --- /dev/null +++ b/xml5ever/src/macros.rs @@ -0,0 +1,34 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +macro_rules! unwrap_or_return { + ($opt:expr) => {{ + let Some(x) = $opt else { + return; + }; + x + }}; + ($opt:expr, $retval:expr) => {{ + let Some(x) = $opt else { + return $retval; + }; + x + }}; +} +pub(crate) use unwrap_or_return; + +macro_rules! time { + ($e:expr) => {{ + let t0 = ::std::time::Instant::now(); + let result = $e; + let dt = t0.elapsed().as_nanos() as u64; + (result, dt) + }}; +} +pub(crate) use time; diff --git a/xml5ever/src/tokenizer/char_ref/mod.rs b/xml5ever/src/tokenizer/char_ref/mod.rs index 84e5e70f..1a515c40 100644 --- a/xml5ever/src/tokenizer/char_ref/mod.rs +++ b/xml5ever/src/tokenizer/char_ref/mod.rs @@ -9,11 +9,11 @@ use super::{TokenSink, XmlTokenizer}; use crate::data; +use crate::macros::unwrap_or_return; use crate::tendril::StrTendril; use log::debug; -use mac::{format_if, unwrap_or_return}; use markup5ever::buffer_queue::BufferQueue; -use std::borrow::Cow::Borrowed; +use std::borrow::Cow::{self, Borrowed}; use std::char::from_u32; use self::State::*; @@ -258,12 +258,14 @@ impl CharRefTokenizer { }; if error { - let msg = format_if!( - tokenizer.opts.exact_errors, - "Invalid numeric character reference", - "Invalid numeric character reference value 0x{:06X}", - self.num - ); + let msg = if tokenizer.opts.exact_errors { + Cow::from(format!( + "Invalid numeric character reference value 0x{:06X}", + self.num + )) + } else { + Cow::from("Invalid numeric character reference") + }; tokenizer.emit_error(msg); } @@ -295,12 +297,11 @@ impl CharRefTokenizer { } fn emit_name_error(&mut self, tokenizer: &XmlTokenizer) { - let msg = format_if!( - tokenizer.opts.exact_errors, - "Invalid character reference", - "Invalid character reference &{}", - self.name_buf() - ); + let msg = if tokenizer.opts.exact_errors { + Cow::from(format!("Invalid character reference &{}", self.name_buf())) + } else { + Cow::from("Invalid character reference") + }; tokenizer.emit_error(msg); } diff --git a/xml5ever/src/tokenizer/mod.rs b/xml5ever/src/tokenizer/mod.rs index 22cacba5..e69e45e5 100644 --- a/xml5ever/src/tokenizer/mod.rs +++ b/xml5ever/src/tokenizer/mod.rs @@ -17,10 +17,10 @@ pub use self::interface::{ }; pub use crate::{LocalName, Namespace, Prefix}; +use crate::macros::{time, unwrap_or_return}; use crate::tendril::StrTendril; use crate::{buffer_queue, Attribute, QualName, SmallCharSet}; use log::debug; -use mac::{format_if, unwrap_or_return}; use markup5ever::{local_name, namespace_prefix, ns, small_char_set, TokenizerResult}; use std::borrow::Cow::{self, Borrowed}; use std::cell::{Cell, RefCell, RefMut}; @@ -233,7 +233,7 @@ impl XmlTokenizer { if self.ignore_lf.get() { self.ignore_lf.set(false); if c == '\n' { - c = unwrap_or_return!(input.next(), None); + c = input.next()?; } } @@ -265,12 +265,11 @@ impl XmlTokenizer { } fn bad_eof_error(&self) { - let msg = format_if!( - self.opts.exact_errors, - "Unexpected EOF", - "Saw EOF in state {:?}", - self.state - ); + let msg = if self.opts.exact_errors { + Cow::from(format!("Saw EOF in state {:?}", self.state)) + } else { + Cow::from("Unexpected EOF") + }; self.emit_error(msg); } @@ -365,13 +364,13 @@ impl XmlTokenizer { } fn bad_char_error(&self) { - let msg = format_if!( - self.opts.exact_errors, - "Bad character", - "Saw {} in state {:?}", - self.current_char.get(), - self.state.get() - ); + let msg = if self.opts.exact_errors { + let c = self.current_char.get(); + let state = self.state.get(); + Cow::from(format!("Saw {c} in state {state:?}")) + } else { + Cow::from("Bad character") + }; self.emit_error(msg); } diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index 059956d2..1f672ad6 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -10,7 +10,6 @@ mod types; use log::{debug, warn}; -use mac::unwrap_or_return; use markup5ever::{local_name, namespace_prefix, ns}; use std::borrow::Cow; use std::borrow::Cow::Borrowed; @@ -24,6 +23,7 @@ pub use self::interface::{ElemName, NodeOrText, Tracer, TreeSink}; use self::types::*; use crate::interface::{self, create_element, AppendNode, Attribute, QualName}; use crate::interface::{AppendText, ExpandedName}; +use crate::macros::unwrap_or_return; use crate::tokenizer::{self, EndTag, ProcessResult, StartTag, Tag, TokenSink}; use crate::tokenizer::{Doctype, EmptyTag, Pi, ShortTag}; use crate::{LocalName, Namespace, Prefix};