diff --git a/benches/microbenches.rs b/benches/microbenches.rs index 95568224..c9ddd14b 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -5,8 +5,8 @@ use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::{NsReader, Reader}; -static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); -static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); +static SAMPLE: &str = include_str!("../tests/documents/sample_rss.xml"); +static PLAYERS: &str = include_str!("../tests/documents/players.xml"); static LOREM_IPSUM_TEXT: &str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt @@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) { let mut group = c.benchmark_group("read_event"); group.bench_function("trim_text = false", |b| { b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); + let mut r = Reader::from_str(SAMPLE); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Eof) => break, _ => (), } - buf.clear(); } assert_eq!( count, 1550, @@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); + let mut r = Reader::from_str(SAMPLE); r.check_end_names(false) .check_comments(false) .trim_text(true); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, Ok(Event::Eof) => break, _ => (), } - buf.clear(); } assert_eq!( count, 1550, @@ -79,17 +75,15 @@ fn read_resolved_event_into(c: &mut Criterion) { let mut group = c.benchmark_group("NsReader::read_resolved_event_into"); group.bench_function("trim_text = false", |b| { b.iter(|| { - let mut r = NsReader::from_bytes(SAMPLE); + let mut r = NsReader::from_str(SAMPLE); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_resolved_event_into(&mut buf) { + match r.read_resolved_event() { Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, Ok((_, Event::Eof)) => break, _ => (), } - buf.clear(); } assert_eq!( count, 1550, @@ -100,19 +94,17 @@ fn read_resolved_event_into(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { - let mut r = NsReader::from_bytes(SAMPLE); + let mut r = NsReader::from_str(SAMPLE); r.check_end_names(false) .check_comments(false) .trim_text(true); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_resolved_event_into(&mut buf) { + match r.read_resolved_event() { Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, Ok((_, Event::Eof)) => break, _ => (), } - buf.clear(); } assert_eq!( count, 1550, @@ -127,79 +119,67 @@ fn read_resolved_event_into(c: &mut Criterion) { fn one_event(c: &mut Criterion) { let mut group = c.benchmark_group("One event"); group.bench_function("StartText", |b| { - let src = "Hello world!".repeat(512 / 12).into_bytes(); - let mut buf = Vec::with_capacity(1024); + let src = "Hello world!".repeat(512 / 12); b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); + let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); r.check_end_names(false).check_comments(false); - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::StartText(e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; - buf.clear(); - assert_eq!(nbtxt, 504); }) }); group.bench_function("Start", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); + let src = format!(r#""#, "world".repeat(512 / 5)); b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); + let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Start(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; - buf.clear(); - assert_eq!(nbtxt, 525); }) }); group.bench_function("Comment", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); + let src = format!(r#""#, "world".repeat(512 / 5)); b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); + let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(), something_else => panic!("Did not expect {:?}", something_else), }; - buf.clear(); - assert_eq!(nbtxt, 520); }) }); group.bench_function("CData", |b| { - let src = format!(r#""#, "world".repeat(512 / 5)).into_bytes(); - let mut buf = Vec::with_capacity(1024); + let src = format!(r#""#, "world".repeat(512 / 5)); b.iter(|| { - let mut r = Reader::from_reader(src.as_ref()); + let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); r.check_end_names(false) .check_comments(false) .trim_text(true); - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::CData(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), }; - buf.clear(); - assert_eq!(nbtxt, 518); }) }); @@ -211,12 +191,11 @@ fn attributes(c: &mut Criterion) { let mut group = c.benchmark_group("attributes"); group.bench_function("with_checks = true", |b| { b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); + let mut r = Reader::from_str(PLAYERS); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Empty(e)) => { for attr in e.attributes() { let _attr = attr.unwrap(); @@ -226,7 +205,6 @@ fn attributes(c: &mut Criterion) { Ok(Event::Eof) => break, _ => (), } - buf.clear(); } assert_eq!(count, 1041); }) @@ -234,12 +212,11 @@ fn attributes(c: &mut Criterion) { group.bench_function("with_checks = false", |b| { b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); + let mut r = Reader::from_str(PLAYERS); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Empty(e)) => { for attr in e.attributes().with_checks(false) { let _attr = attr.unwrap(); @@ -249,7 +226,6 @@ fn attributes(c: &mut Criterion) { Ok(Event::Eof) => break, _ => (), } - buf.clear(); } assert_eq!(count, 1041); }) @@ -257,12 +233,11 @@ fn attributes(c: &mut Criterion) { group.bench_function("try_get_attribute", |b| { b.iter(|| { - let mut r = Reader::from_reader(PLAYERS); + let mut r = Reader::from_str(PLAYERS); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); - let mut buf = Vec::new(); loop { - match r.read_event_into(&mut buf) { + match r.read_event() { Ok(Event::Empty(e)) if e.name() == QName(b"player") => { for name in ["num", "status", "avg"] { if let Some(_attr) = e.try_get_attribute(name).unwrap() { @@ -277,7 +252,6 @@ fn attributes(c: &mut Criterion) { Ok(Event::Eof) => break, _ => (), } - buf.clear(); } assert_eq!(count, 150); }) diff --git a/src/de/mod.rs b/src/de/mod.rs index 1d7dd4a7..e4d88a0d 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -296,15 +296,6 @@ where T::deserialize(&mut de) } -/// Deserialize an instance of type `T` from bytes of XML text. -pub fn from_slice<'de, T>(s: &'de [u8]) -> Result -where - T: Deserialize<'de>, -{ - let mut de = Deserializer::from_slice(s); - T::deserialize(&mut de) -} - /// Deserialize from a reader. This method will do internal copies of data /// readed from `reader`. If you want have a `&[u8]` or `&str` input and want /// to borrow as much as possible, use [`from_slice`] or [`from_str`] @@ -684,17 +675,7 @@ where impl<'de> Deserializer<'de, SliceReader<'de>> { /// Create new deserializer that will borrow data from the specified string pub fn from_str(s: &'de str) -> Self { - Self::from_borrowing_reader(Reader::from_str(s)) - } - - /// Create new deserializer that will borrow data from the specified byte array - pub fn from_slice(bytes: &'de [u8]) -> Self { - Self::from_borrowing_reader(Reader::from_bytes(bytes)) - } - - /// Create new deserializer that will borrow data from the specified borrowing reader - #[inline] - fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self { + let mut reader = Reader::from_str(s); reader .expand_empty_elements(true) .check_end_names(true) @@ -1024,8 +1005,8 @@ mod tests { /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { - let mut de = Deserializer::from_slice( - br#" + let mut de = Deserializer::from_str( + r#" text @@ -1144,8 +1125,8 @@ mod tests { /// Checks that `read_to_end()` behaves correctly after `skip()` #[test] fn read_to_end() { - let mut de = Deserializer::from_slice( - br#" + let mut de = Deserializer::from_str( + r#" text @@ -1239,8 +1220,8 @@ mod tests { item: Vec<()>, } - let mut de = Deserializer::from_slice( - br#" + let mut de = Deserializer::from_str( + r#" @@ -1265,8 +1246,8 @@ mod tests { fn read_to_end() { use crate::de::DeEvent::*; - let mut de = Deserializer::from_slice( - br#" + let mut de = Deserializer::from_str( + r#" textcontent @@ -1303,15 +1284,14 @@ mod tests { Some text - "## - .as_bytes(); + "##; let mut reader1 = IoReader { - reader: Reader::from_reader(s), + reader: Reader::from_reader(s.as_bytes()), buf: Vec::new(), }; let mut reader2 = SliceReader { - reader: Reader::from_bytes(s), + reader: Reader::from_str(s), }; loop { @@ -1333,11 +1313,10 @@ mod tests { - "## - .as_bytes(); + "##; let mut reader = SliceReader { - reader: Reader::from_bytes(s), + reader: Reader::from_str(s), }; reader diff --git a/src/events/mod.rs b/src/events/mod.rs index bc9ff0ae..e267e6f0 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -959,11 +959,12 @@ pub enum Event<'a> { /// use quick_xml::events::Event; /// /// // XML in UTF-8 with BOM - /// let xml = b"\xEF\xBB\xBF"; - /// let mut reader = Reader::from_bytes(xml); + /// let xml = b"\xEF\xBB\xBF".as_ref(); + /// let mut reader = Reader::from_reader(xml); + /// let mut buf = Vec::new(); /// let mut events_processed = 0; /// loop { - /// match reader.read_event() { + /// match reader.read_event_into(&mut buf) { /// Ok(Event::StartText(e)) => { /// assert_eq!(events_processed, 0); /// // Content contains BOM diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index a9fed368..d667e69b 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -426,3 +426,49 @@ impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { } } } + +#[cfg(test)] +mod test { + #[cfg(feature = "encoding")] + mod encoding { + use crate::events::Event; + use crate::reader::Reader; + use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251}; + use pretty_assertions::assert_eq; + + /// Checks that encoding is detected by BOM and changed after XML declaration + #[test] + fn bom_detected() { + let mut reader = + Reader::from_reader(b"\xFF\xFE".as_ref()); + let mut buf = Vec::new(); + + assert_eq!(reader.decoder().encoding(), UTF_8); + reader.read_event_into(&mut buf).unwrap(); + assert_eq!(reader.decoder().encoding(), UTF_16LE); + + reader.read_event_into(&mut buf).unwrap(); + assert_eq!(reader.decoder().encoding(), WINDOWS_1251); + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + + /// Checks that encoding is changed by XML declaration, but only once + #[test] + fn xml_declaration() { + let mut reader = Reader::from_reader( + b"".as_ref(), + ); + let mut buf = Vec::new(); + + assert_eq!(reader.decoder().encoding(), UTF_8); + reader.read_event_into(&mut buf).unwrap(); + assert_eq!(reader.decoder().encoding(), UTF_16LE); + + reader.read_event_into(&mut buf).unwrap(); + assert_eq!(reader.decoder().encoding(), UTF_16LE); + + assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + } + } +} diff --git a/src/reader/mod.rs b/src/reader/mod.rs index ef663f90..9797edcd 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1793,62 +1793,6 @@ mod test { ); } } - - #[cfg(feature = "encoding")] - mod encoding { - use crate::events::Event; - use crate::reader::Reader; - use encoding_rs::{UTF_8, UTF_16LE, WINDOWS_1251}; - use pretty_assertions::assert_eq; - - mod bytes { - use super::*; - use pretty_assertions::assert_eq; - - /// Checks that encoding is detected by BOM and changed after XML declaration - #[test] - fn bom_detected() { - let mut reader = Reader::from_bytes(b"\xFF\xFE"); - - assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_impl($buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_16LE); - - reader.read_event_impl($buf).unwrap(); - assert_eq!(reader.decoder().encoding(), WINDOWS_1251); - - assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); - } - - /// Checks that encoding is changed by XML declaration, but only once - #[test] - fn xml_declaration() { - let mut reader = Reader::from_bytes(b""); - - assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_impl($buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_16LE); - - reader.read_event_impl($buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_16LE); - - assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); - } - } - - /// Checks that XML declaration cannot change the encoding from UTF-8 if - /// a `Reader` was created using `from_str` method - #[test] - fn str_always_has_utf8() { - let mut reader = Reader::from_str(""); - - assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_impl($buf).unwrap(); - assert_eq!(reader.decoder().encoding(), UTF_8); - - assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof); - } - } }; } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 868e88f4..66561011 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -522,12 +522,6 @@ impl<'i> NsReader<&'i [u8]> { Self::new(Reader::from_str(s)) } - /// Creates an XML reader from a slice of bytes. - #[inline] - pub fn from_bytes(bytes: &'i [u8]) -> Self { - Self::new(Reader::from_bytes(bytes)) - } - /// Reads the next event, borrow its content from the input buffer. /// /// This method manages namespaces but doesn't resolve them automatically. diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 6cedfe2f..18306366 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -32,11 +32,6 @@ impl<'a> Reader<&'a [u8]> { Self::from_reader(s.as_bytes()) } - /// Creates an XML reader from a slice of bytes. - pub fn from_bytes(s: &'a [u8]) -> Self { - Self::from_reader(s) - } - /// Read an event that borrows from the input rather than a buffer. #[inline] pub fn read_event(&mut self) -> Result> { @@ -234,3 +229,28 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { Ok(self.first().copied()) } } + +#[cfg(test)] +mod test { + + #[cfg(feature = "encoding")] + mod encoding { + use crate::events::Event; + use crate::reader::Reader; + use encoding_rs::UTF_8; + use pretty_assertions::assert_eq; + + /// Checks that XML declaration cannot change the encoding from UTF-8 if + /// a `Reader` was created using `from_str` method + #[test] + fn str_always_has_utf8() { + let mut reader = Reader::from_str(""); + + assert_eq!(reader.decoder().encoding(), UTF_8); + reader.read_event().unwrap(); + assert_eq!(reader.decoder().encoding(), UTF_8); + + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } +} diff --git a/tests/test.rs b/tests/test.rs index 819f8ab6..5f804652 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -9,8 +9,8 @@ use pretty_assertions::assert_eq; #[test] fn test_sample() { - let src: &[u8] = include_bytes!("documents/sample_rss.xml"); - let mut r = Reader::from_bytes(src); + let src = include_str!("documents/sample_rss.xml"); + let mut r = Reader::from_str(src); let mut count = 0; loop { match r.read_event().unwrap() { @@ -25,8 +25,8 @@ fn test_sample() { #[test] fn test_attributes_empty() { - let src = b""; - let mut r = Reader::from_bytes(src); + let src = ""; + let mut r = Reader::from_str(src); r.trim_text(true).expand_empty_elements(false); match r.read_event() { Ok(Empty(e)) => { @@ -53,8 +53,8 @@ fn test_attributes_empty() { #[test] fn test_attribute_equal() { - let src = b""; - let mut r = Reader::from_reader(src as &[u8]); + let src = ""; + let mut r = Reader::from_str(src); r.trim_text(true).expand_empty_elements(false); match r.read_event() { Ok(Empty(e)) => { @@ -74,8 +74,8 @@ fn test_attribute_equal() { #[test] fn test_comment_starting_with_gt() { - let src = b"-->"; - let mut r = Reader::from_reader(src as &[u8]); + let src = "-->"; + let mut r = Reader::from_str(src); r.trim_text(true).expand_empty_elements(false); loop { match r.read_event() { @@ -92,11 +92,12 @@ fn test_comment_starting_with_gt() { #[test] #[cfg(feature = "encoding")] fn test_koi8_r_encoding() { - let src = include_bytes!("documents/opennews_all.rss"); - let mut r = Reader::from_bytes(src); + let src = include_bytes!("documents/opennews_all.rss").as_ref(); + let mut buf = vec![]; + let mut r = Reader::from_reader(src); r.trim_text(true).expand_empty_elements(false); loop { - match r.read_event() { + match r.read_event_into(&mut buf) { Ok(Text(e)) => { e.unescape().unwrap(); } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 80407736..b9d62054 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -751,13 +751,14 @@ mod decode_with_bom_removal { #[cfg(feature = "encoding")] #[ignore = "Non-ASCII compatible encodings not properly supported yet. See https://github.com/tafia/quick-xml/issues/158"] fn removes_utf16be_bom() { - let mut reader = Reader::from_bytes(include_bytes!("./documents/utf16be.xml")); + let mut reader = Reader::from_reader(include_bytes!("./documents/utf16be.xml").as_ref()); reader.trim_text(true); + let mut buf = Vec::new(); let mut txt = Vec::new(); loop { - match reader.read_event() { + match reader.read_event_into(&mut buf) { Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), @@ -769,13 +770,13 @@ mod decode_with_bom_removal { #[test] #[cfg(feature = "encoding")] fn removes_utf16le_bom() { - let mut reader = Reader::from_bytes(include_bytes!("./documents/utf16le.xml")); + let mut reader = Reader::from_reader(include_bytes!("./documents/utf16le.xml").as_ref()); reader.trim_text(true); - + let mut buf = Vec::new(); let mut txt = Vec::new(); loop { - match reader.read_event() { + match reader.read_event_into(&mut buf) { Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()), Ok(Eof) => break, _ => (), diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 14024bb8..b466a8b1 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -362,7 +362,7 @@ fn test(input: &str, output: &str, trim: bool) { #[track_caller] fn test_bytes(input: &[u8], output: &[u8], trim: bool) { - let mut reader = NsReader::from_bytes(input); + let mut reader = NsReader::from_reader(input); reader .trim_text(trim) .check_comments(true)