Skip to content

Commit

Permalink
Remove the ability to use SliceReader with raw bytes.
Browse files Browse the repository at this point in the history
In the near future, decoding will be performed automatically as the
input is read. If the input has an unknown encoding, it must be decoded
first, necessitating a buffer. Therefore only the buffered
implementation can be used for `Reader::from_bytes()`
  • Loading branch information
dralley committed Jul 25, 2022
1 parent 3fc4aa9 commit 682a6bb
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 122 deletions.
49 changes: 14 additions & 35 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,15 +296,6 @@ where
T::deserialize(&mut de)
}

/// Deserialize an instance of type `T` from bytes of XML text.
pub fn from_slice<'de, T>(s: &'de [u8]) -> Result<T, DeError>
where
T: Deserialize<'de>,
{
let mut de = Deserializer::from_slice(s);
T::deserialize(&mut de)
}

/// Deserialize from a reader. This method will do internal copies of data
/// readed from `reader`. If you want have a `&[u8]` or `&str` input and want
/// to borrow as much as possible, use [`from_slice`] or [`from_str`]
Expand Down Expand Up @@ -684,17 +675,7 @@ where
impl<'de> Deserializer<'de, SliceReader<'de>> {
/// Create new deserializer that will borrow data from the specified string
pub fn from_str(s: &'de str) -> Self {
Self::from_borrowing_reader(Reader::from_str(s))
}

/// Create new deserializer that will borrow data from the specified byte array
pub fn from_slice(bytes: &'de [u8]) -> Self {
Self::from_borrowing_reader(Reader::from_bytes(bytes))
}

/// Create new deserializer that will borrow data from the specified borrowing reader
#[inline]
fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
let mut reader = Reader::from_str(s);
reader
.expand_empty_elements(true)
.check_end_names(true)
Expand Down Expand Up @@ -1024,8 +1005,8 @@ mod tests {
/// Checks that `peek()` and `read()` behaves correctly after `skip()`
#[test]
fn read_and_peek() {
let mut de = Deserializer::from_slice(
br#"
let mut de = Deserializer::from_str(
r#"
<root>
<inner>
text
Expand Down Expand Up @@ -1144,8 +1125,8 @@ mod tests {
/// Checks that `read_to_end()` behaves correctly after `skip()`
#[test]
fn read_to_end() {
let mut de = Deserializer::from_slice(
br#"
let mut de = Deserializer::from_str(
r#"
<root>
<skip>
text
Expand Down Expand Up @@ -1239,8 +1220,8 @@ mod tests {
item: Vec<()>,
}

let mut de = Deserializer::from_slice(
br#"
let mut de = Deserializer::from_str(
r#"
<any-name>
<item/>
<another-item>
Expand All @@ -1265,8 +1246,8 @@ mod tests {
fn read_to_end() {
use crate::de::DeEvent::*;

let mut de = Deserializer::from_slice(
br#"
let mut de = Deserializer::from_str(
r#"
<root>
<tag a="1"><tag>text</tag>content</tag>
<tag a="2"><![CDATA[cdata content]]></tag>
Expand Down Expand Up @@ -1303,15 +1284,14 @@ mod tests {
<item name="hello" source="world.rs">Some text</item>
<item2/>
<item3 value="world" />
"##
.as_bytes();
"##;

let mut reader1 = IoReader {
reader: Reader::from_reader(s),
reader: Reader::from_reader(s.as_bytes()),
buf: Vec::new(),
};
let mut reader2 = SliceReader {
reader: Reader::from_bytes(s),
reader: Reader::from_str(s),
};

loop {
Expand All @@ -1333,11 +1313,10 @@ mod tests {
<item2></item2>
<item3/>
<item4 value="world" />
"##
.as_bytes();
"##;

let mut reader = SliceReader {
reader: Reader::from_bytes(s),
reader: Reader::from_str(s),
};

reader
Expand Down
7 changes: 4 additions & 3 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,11 +959,12 @@ pub enum Event<'a> {
/// use quick_xml::events::Event;
///
/// // XML in UTF-8 with BOM
/// let xml = b"\xEF\xBB\xBF<?xml version='1.0'?>";
/// let mut reader = Reader::from_bytes(xml);
/// let xml = b"\xEF\xBB\xBF<?xml version='1.0'?>".as_ref();
/// let mut reader = Reader::from_reader(xml);
/// let mut buf = Vec::new();
/// let mut events_processed = 0;
/// loop {
/// match reader.read_event() {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::StartText(e)) => {
/// assert_eq!(events_processed, 0);
/// // Content contains BOM
Expand Down
46 changes: 46 additions & 0 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,3 +426,49 @@ impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec<u8>> for R {
}
}
}

#[cfg(test)]
mod test {
#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
use pretty_assertions::assert_eq;

/// Checks that encoding is detected by BOM and changed after XML declaration
#[test]
fn bom_detected() {
let mut reader =
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn xml_declaration() {
let mut reader = Reader::from_reader(
b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
);
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

reader.read_event_into(&mut buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
}
}
56 changes: 0 additions & 56 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1793,62 +1793,6 @@ mod test {
);
}
}

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::{UTF_8, UTF_16LE, WINDOWS_1251};
use pretty_assertions::assert_eq;

mod bytes {
use super::*;
use pretty_assertions::assert_eq;

/// Checks that encoding is detected by BOM and changed after XML declaration
#[test]
fn bom_detected() {
let mut reader = Reader::from_bytes(b"\xFF\xFE<?xml encoding='windows-1251'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event_impl($buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

reader.read_event_impl($buf).unwrap();
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn xml_declaration() {
let mut reader = Reader::from_bytes(b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event_impl($buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

reader.read_event_impl($buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof);
}
}

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event_impl($buf).unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event_impl($buf).unwrap(), Event::Eof);
}
}
};
}

Expand Down
6 changes: 0 additions & 6 deletions src/reader/ns_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,12 +522,6 @@ impl<'i> NsReader<&'i [u8]> {
Self::new(Reader::from_str(s))
}

/// Creates an XML reader from a slice of bytes.
#[inline]
pub fn from_bytes(bytes: &'i [u8]) -> Self {
Self::new(Reader::from_bytes(bytes))
}

/// Reads the next event, borrow its content from the input buffer.
///
/// This method manages namespaces but doesn't resolve them automatically.
Expand Down
30 changes: 25 additions & 5 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ impl<'a> Reader<&'a [u8]> {
Self::from_reader(s.as_bytes())
}

/// Creates an XML reader from a slice of bytes.
pub fn from_bytes(s: &'a [u8]) -> Self {
Self::from_reader(s)
}

/// Read an event that borrows from the input rather than a buffer.
#[inline]
pub fn read_event(&mut self) -> Result<Event<'a>> {
Expand Down Expand Up @@ -234,3 +229,28 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
Ok(self.first().copied())
}
}

#[cfg(test)]
mod test {

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::UTF_8;
use pretty_assertions::assert_eq;

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}
23 changes: 12 additions & 11 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use pretty_assertions::assert_eq;

#[test]
fn test_sample() {
let src: &[u8] = include_bytes!("documents/sample_rss.xml");
let mut r = Reader::from_bytes(src);
let src = include_str!("documents/sample_rss.xml");
let mut r = Reader::from_str(src);
let mut count = 0;
loop {
match r.read_event().unwrap() {
Expand All @@ -25,8 +25,8 @@ fn test_sample() {

#[test]
fn test_attributes_empty() {
let src = b"<a att1='a' att2='b'/>";
let mut r = Reader::from_bytes(src);
let src = "<a att1='a' att2='b'/>";
let mut r = Reader::from_str(src);
r.trim_text(true).expand_empty_elements(false);
match r.read_event() {
Ok(Empty(e)) => {
Expand All @@ -53,8 +53,8 @@ fn test_attributes_empty() {

#[test]
fn test_attribute_equal() {
let src = b"<a att1=\"a=b\"/>";
let mut r = Reader::from_reader(src as &[u8]);
let src = "<a att1=\"a=b\"/>";
let mut r = Reader::from_str(src);
r.trim_text(true).expand_empty_elements(false);
match r.read_event() {
Ok(Empty(e)) => {
Expand All @@ -74,8 +74,8 @@ fn test_attribute_equal() {

#[test]
fn test_comment_starting_with_gt() {
let src = b"<a /><!-->-->";
let mut r = Reader::from_reader(src as &[u8]);
let src = "<a /><!-->-->";
let mut r = Reader::from_str(src);
r.trim_text(true).expand_empty_elements(false);
loop {
match r.read_event() {
Expand All @@ -92,11 +92,12 @@ fn test_comment_starting_with_gt() {
#[test]
#[cfg(feature = "encoding")]
fn test_koi8_r_encoding() {
let src = include_bytes!("documents/opennews_all.rss");
let mut r = Reader::from_bytes(src);
let src = include_bytes!("documents/opennews_all.rss").as_ref();
let mut buf = vec![];
let mut r = Reader::from_reader(src);
r.trim_text(true).expand_empty_elements(false);
loop {
match r.read_event() {
match r.read_event_into(&mut buf) {
Ok(Text(e)) => {
e.unescape().unwrap();
}
Expand Down
11 changes: 6 additions & 5 deletions tests/unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,13 +751,14 @@ mod decode_with_bom_removal {
#[cfg(feature = "encoding")]
#[ignore = "Non-ASCII compatible encodings not properly supported yet. See https://github.com/tafia/quick-xml/issues/158"]
fn removes_utf16be_bom() {
let mut reader = Reader::from_bytes(include_bytes!("./documents/utf16be.xml"));
let mut reader = Reader::from_reader(include_bytes!("./documents/utf16be.xml").as_ref());
reader.trim_text(true);

let mut buf = Vec::new();
let mut txt = Vec::new();

loop {
match reader.read_event() {
match reader.read_event_into(&mut buf) {
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
Ok(Eof) => break,
_ => (),
Expand All @@ -769,13 +770,13 @@ mod decode_with_bom_removal {
#[test]
#[cfg(feature = "encoding")]
fn removes_utf16le_bom() {
let mut reader = Reader::from_bytes(include_bytes!("./documents/utf16le.xml"));
let mut reader = Reader::from_reader(include_bytes!("./documents/utf16le.xml").as_ref());
reader.trim_text(true);

let mut buf = Vec::new();
let mut txt = Vec::new();

loop {
match reader.read_event() {
match reader.read_event_into(&mut buf) {
Ok(StartText(e)) => txt.push(e.decode_with_bom_removal().unwrap()),
Ok(Eof) => break,
_ => (),
Expand Down
2 changes: 1 addition & 1 deletion tests/xmlrs_reader_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ fn test(input: &str, output: &str, trim: bool) {

#[track_caller]
fn test_bytes(input: &[u8], output: &[u8], trim: bool) {
let mut reader = NsReader::from_bytes(input);
let mut reader = NsReader::from_reader(input);
reader
.trim_text(trim)
.check_comments(true)
Expand Down

0 comments on commit 682a6bb

Please sign in to comment.