Skip to content

Commit 6cf8c9f

Browse files
authored
Merge pull request #884 from Mingun/eol-normalization
Proper EOL normalization
2 parents dee4b1b + 38b44d4 commit 6cf8c9f

File tree

12 files changed

+487
-18
lines changed

12 files changed

+487
-18
lines changed

Changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,18 @@
2222
- `Deserializer::buffering_with_resolver`
2323
- [#878]: Add ability to serialize structs in `$value` fields. The struct name will
2424
be used as a tag name. Previously only enums was allowed there.
25+
- [#806]: Add `BytesText::xml_content`, `BytesCData::xml_content` and `BytesRef::xml_content`
26+
methods which returns XML EOL normalized strings.
27+
- [#806]: Add `BytesText::html_content`, `BytesCData::html_content` and `BytesRef::html_content`
28+
methods which returns HTML EOL normalized strings.
2529

2630
### Bug Fixes
2731

32+
- [#806]: Properly normalize EOL characters in `Deserializer`.
33+
2834
### Misc Changes
2935

36+
[#806]: https://github.com/tafia/quick-xml/issues/806
3037
[#878]: https://github.com/tafia/quick-xml/pull/878
3138
[#882]: https://github.com/tafia/quick-xml/pull/882
3239

benches/macrobenches.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
5555
}
5656
}
5757
Event::Text(e) => {
58-
black_box(e.decode()?);
58+
black_box(e.xml_content()?);
5959
}
6060
Event::CData(e) => {
6161
black_box(e.into_inner());
@@ -80,7 +80,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
8080
}
8181
}
8282
Event::Text(e) => {
83-
black_box(e.decode()?);
83+
black_box(e.xml_content()?);
8484
}
8585
Event::CData(e) => {
8686
black_box(e.into_inner());
@@ -106,7 +106,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
106106
}
107107
}
108108
(resolved_ns, Event::Text(e)) => {
109-
black_box(e.decode()?);
109+
black_box(e.xml_content()?);
110110
black_box(resolved_ns);
111111
}
112112
(resolved_ns, Event::CData(e)) => {
@@ -134,7 +134,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
134134
}
135135
}
136136
(resolved_ns, Event::Text(e)) => {
137-
black_box(e.decode()?);
137+
black_box(e.xml_content()?);
138138
black_box(resolved_ns);
139139
}
140140
(resolved_ns, Event::CData(e)) => {

benches/microbenches.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ fn one_event(c: &mut Criterion) {
146146
config.trim_text(true);
147147
config.check_end_names = false;
148148
match r.read_event() {
149-
Ok(Event::Comment(e)) => nbtxt += e.decode().unwrap().len(),
149+
Ok(Event::Comment(e)) => nbtxt += e.xml_content().unwrap().len(),
150150
something_else => panic!("Did not expect {:?}", something_else),
151151
};
152152

src/de/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,8 +2439,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
24392439
}
24402440

24412441
match self.next_impl()? {
2442-
PayloadEvent::Text(e) => result.to_mut().push_str(&e.decode()?),
2443-
PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?),
2442+
PayloadEvent::Text(e) => result.to_mut().push_str(&e.xml_content()?),
2443+
PayloadEvent::CData(e) => result.to_mut().push_str(&e.xml_content()?),
24442444
PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,
24452445

24462446
// SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
@@ -2456,8 +2456,8 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
24562456
return match self.next_impl()? {
24572457
PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
24582458
PayloadEvent::End(e) => Ok(DeEvent::End(e)),
2459-
PayloadEvent::Text(e) => self.drain_text(e.decode()?),
2460-
PayloadEvent::CData(e) => self.drain_text(e.decode()?),
2459+
PayloadEvent::Text(e) => self.drain_text(e.xml_content()?),
2460+
PayloadEvent::CData(e) => self.drain_text(e.xml_content()?),
24612461
PayloadEvent::DocType(e) => {
24622462
self.entity_resolver
24632463
.capture(e)

src/encoding.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,30 @@ impl Decoder {
150150
Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()),
151151
}
152152
}
153+
154+
/// Decodes the `Cow` buffer, normalizes XML EOLs, preserves the lifetime
155+
pub(crate) fn content<'b>(
156+
&self,
157+
bytes: &Cow<'b, [u8]>,
158+
normalize_eol: impl Fn(&str) -> Cow<str>,
159+
) -> Result<Cow<'b, str>, EncodingError> {
160+
match bytes {
161+
Cow::Borrowed(bytes) => {
162+
let text = self.decode(bytes)?;
163+
match normalize_eol(&text) {
164+
// If text borrowed after normalization that means that it's not changed
165+
Cow::Borrowed(_) => Ok(text),
166+
Cow::Owned(s) => Ok(Cow::Owned(s)),
167+
}
168+
}
169+
Cow::Owned(bytes) => {
170+
let text = self.decode(bytes)?;
171+
let text = normalize_eol(&text);
172+
// Convert to owned, because otherwise Cow will be bound with wrong lifetime
173+
Ok(text.into_owned().into())
174+
}
175+
}
176+
}
153177
}
154178

155179
/// Decodes the provided bytes using the specified encoding.

0 commit comments

Comments
 (0)