From c6538e2df218c5c344b5374557751524b6d422aa Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Sun, 30 Jul 2023 21:10:31 +0100 Subject: [PATCH 1/2] Escape only certain characters Signed-off-by: Stephen Wakely --- src/structured_data.rs | 175 ++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 82 deletions(-) diff --git a/src/structured_data.rs b/src/structured_data.rs index e42a953..3f2f430 100644 --- a/src/structured_data.rs +++ b/src/structured_data.rs @@ -1,7 +1,7 @@ use nom::{ branch::alt, bytes::complete::{escaped, tag, take_till1, take_until, take_while1}, - character::complete::{one_of, space0}, + character::complete::{anychar, space0}, combinator::map, multi::{many1, separated_list0}, sequence::{delimited, separated_pair, terminated, tuple}, @@ -97,6 +97,11 @@ impl<'a, S: AsRef + Ord + Clone> Iterator for ParamsIter<'a, S> { } else if c == 'n' && escaped { escaped = false; trimmed.push('\n'); + } else if c != '"' && c != ']' && c != '\\' && escaped { + // If the character following the escape isn't a ', " or ] we treat it like an unescaped character. + escaped = false; + trimmed.push('\\'); + trimmed.push(c); } else { escaped = false; trimmed.push(c); @@ -115,11 +120,7 @@ fn param_value(input: &str) -> IResult<&str, &str> { map(tag(r#""""#), |_| ""), delimited( tag("\""), - escaped( - take_while1(|c: char| c != '\\' && c != '"'), - '\\', - one_of(r#""n\]"#), - ), + escaped(take_while1(|c: char| c != '\\' && c != '"'), '\\', anychar), tag("\""), ), ))(input) @@ -190,86 +191,86 @@ pub(crate) fn structured_data_optional( } } -#[test] -fn parse_param_value() { - assert_eq!( - param_value("\"Some \\\"lovely\\\" string\"").unwrap(), - ("", "Some \\\"lovely\\\" string") - ); -} +#[cfg(test)] +mod tests { + use super::*; -#[test] -fn parse_empty_param_value() { - assert_eq!(param_value(r#""""#).unwrap(), ("", "")); -} + #[test] + fn parse_param_value() { + assert_eq!( + param_value("\"Some \\\"lovely\\\" string\"").unwrap(), + ("", "Some \\\"lovely\\\" string") + ); + } -#[test] -fn parse_structured_data() { - assert_eq!( - structured_datum_strict( - "[exampleSDID@32473 iut=\"3\" eventSource=\"Application\" eventID=\"1011\"]" - ) - .unwrap(), - ( - "", - Some(StructuredElement { - id: "exampleSDID@32473", - params: vec![ - ("iut", "3"), - ("eventSource", "Application"), - ("eventID", "1011"), - ] - }) - ) - ); -} + #[test] + fn parse_empty_param_value() { + assert_eq!(param_value(r#""""#).unwrap(), ("", "")); + } -#[test] -fn parse_structured_data_no_values() { - assert_eq!( - structured_datum(false)("[exampleSDID@32473]").unwrap(), - ( - "", - Some(StructuredElement { - id: "exampleSDID@32473", - params: vec![] - }) - ) - ); -} + #[test] + fn parse_structured_data() { + assert_eq!( + structured_datum_strict( + "[exampleSDID@32473 iut=\"3\" eventSource=\"Application\" eventID=\"1011\"]" + ) + .unwrap(), + ( + "", + Some(StructuredElement { + id: "exampleSDID@32473", + params: vec![ + ("iut", "3"), + ("eventSource", "Application"), + ("eventID", "1011"), + ] + }) + ) + ); + } -#[test] -fn parse_structured_data_with_space() { - assert_eq!( - structured_datum(false)( - "[exampleSDID@32473 iut=\"3\" eventSource= \"Application\" eventID=\"1011\"]" - ) - .unwrap(), - ( - "", - Some(StructuredElement { - id: "exampleSDID@32473", - params: vec![ - ("iut", "3"), - ("eventSource", "Application"), - ("eventID", "1011"), - ] - }) - ) - ); -} + #[test] + fn parse_structured_data_no_values() { + assert_eq!( + structured_datum(false)("[exampleSDID@32473]").unwrap(), + ( + "", + Some(StructuredElement { + id: "exampleSDID@32473", + params: vec![] + }) + ) + ); + } -#[test] -fn parse_invalid_structured_data() { - assert_eq!( - structured_datum(true)("[exampleSDID@32473 iut=]"), - Ok(("", None)) - ); -} + #[test] + fn parse_structured_data_with_space() { + assert_eq!( + structured_datum(false)( + "[exampleSDID@32473 iut=\"3\" eventSource= \"Application\" eventID=\"1011\"]" + ) + .unwrap(), + ( + "", + Some(StructuredElement { + id: "exampleSDID@32473", + params: vec![ + ("iut", "3"), + ("eventSource", "Application"), + ("eventID", "1011"), + ] + }) + ) + ); + } -#[cfg(test)] -mod tests { - use super::*; + #[test] + fn parse_invalid_structured_data() { + assert_eq!( + structured_datum(true)("[exampleSDID@32473 iut=]"), + Ok(("", None)) + ); + } #[test] fn parse_multiple_structured_data() { @@ -351,7 +352,7 @@ mod tests { #[test] fn params_remove_escapes() { let data = structured_data( - r#"[id aa="hullo \"there\"" bb="let's \\\\do this\\\\" cc="hello [bye\]" dd="hello\nbye"]"#, + r#"[id aa="hullo \"there\"" bb="let's \\\\do this\\\\" cc="hello [bye\]" dd="hello\nbye" ee="not \esc\aped"]"#, ) .unwrap(); let params = data.1[0].params().collect::>(); @@ -367,8 +368,18 @@ mod tests { r#"hello bye"# .to_string(), - ) + ), + (&"ee", r#"not \esc\aped"#.to_string()) ] ); } + + #[test] + fn sd_param_escapes() { + let (_, value) = param_value(r#""Here are some escaped characters -> \"\\\]""#).unwrap(); + assert_eq!(r#"Here are some escaped characters -> \"\\\]"#, value); + + let (_, value) = param_value(r#""These should not be escaped -> \n\m\o""#).unwrap(); + assert_eq!(r#"These should not be escaped -> \n\m\o"#, value); + } } From 00052a35326d353f6bd291933a3bc1f22a9ac271 Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Sun, 30 Jul 2023 21:15:53 +0100 Subject: [PATCH 2/2] Clippy Signed-off-by: Stephen Wakely --- src/parsers.rs | 2 +- src/rfc3164.rs | 2 +- src/rfc5424.rs | 2 +- src/structured_data.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parsers.rs b/src/parsers.rs index 605f3d0..ba7dbfc 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -1,4 +1,4 @@ -///! Parsers shared by both protocols. +//! Parsers shared by both protocols. use nom::{ bytes::complete::take_while1, character::complete::digit1, combinator::map, combinator::map_res, IResult, diff --git a/src/rfc3164.rs b/src/rfc3164.rs index 1f0de5c..d5d4a63 100644 --- a/src/rfc3164.rs +++ b/src/rfc3164.rs @@ -1,4 +1,4 @@ -///! Parsers for rfc 3164 specific formats. +//! Parsers for rfc 3164 specific formats. use crate::{ message::{Message, Protocol}, parsers::{hostname, tagname}, diff --git a/src/rfc5424.rs b/src/rfc5424.rs index d671ec0..b88e06b 100644 --- a/src/rfc5424.rs +++ b/src/rfc5424.rs @@ -1,4 +1,4 @@ -///! Parsers for rfc 5424 specific formats. +//! Parsers for rfc 5424 specific formats. use crate::{ message::{Message, Protocol}, parsers::{appname, digits, hostname, msgid, procid}, diff --git a/src/structured_data.rs b/src/structured_data.rs index 3f2f430..b646f3d 100644 --- a/src/structured_data.rs +++ b/src/structured_data.rs @@ -98,7 +98,7 @@ impl<'a, S: AsRef + Ord + Clone> Iterator for ParamsIter<'a, S> { escaped = false; trimmed.push('\n'); } else if c != '"' && c != ']' && c != '\\' && escaped { - // If the character following the escape isn't a ', " or ] we treat it like an unescaped character. + // If the character following the escape isn't a \, " or ] we treat it like an normal unescaped character. escaped = false; trimmed.push('\\'); trimmed.push(c);