diff --git a/Cargo.lock b/Cargo.lock index 44eb1db4a7..7374b167f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4632,6 +4632,7 @@ dependencies = [ "libs", "serde", "tempfile", + "time", ] [[package]] diff --git a/components/content/src/front_matter/page.rs b/components/content/src/front_matter/page.rs index b4bacd307c..bdc0491572 100644 --- a/components/content/src/front_matter/page.rs +++ b/components/content/src/front_matter/page.rs @@ -7,7 +7,7 @@ use time::macros::{format_description, time}; use time::{Date, OffsetDateTime, PrimitiveDateTime}; use errors::{bail, Result}; -use utils::de::{fix_toml_dates, from_toml_datetime}; +use utils::de::{fix_toml_dates, from_unknown_datetime}; use crate::front_matter::split::RawFrontMatter; @@ -20,7 +20,7 @@ pub struct PageFrontMatter { /// Description in that appears when linked, e.g. on twitter pub description: Option, /// Updated date - #[serde(default, deserialize_with = "from_toml_datetime")] + #[serde(default, deserialize_with = "from_unknown_datetime")] pub updated: Option, /// Datetime content was last updated #[serde(default, skip_deserializing)] @@ -29,7 +29,7 @@ pub struct PageFrontMatter { #[serde(default, skip_deserializing)] pub updated_datetime_tuple: Option<(i32, u8, u8)>, /// Date if we want to order pages (ie blog post) - #[serde(default, deserialize_with = "from_toml_datetime")] + #[serde(default, deserialize_with = "from_unknown_datetime")] pub date: Option, /// Datetime content was created #[serde(default, skip_deserializing)] @@ -129,6 +129,7 @@ impl PageFrontMatter { /// Converts the TOML datetime to a time::OffsetDateTime /// Also grabs the year/month/day tuple that will be used in serialization pub fn date_to_datetime(&mut self) { + println!("{:?}", self.date); self.datetime = self.date.as_ref().map(|s| s.as_ref()).and_then(parse_datetime); self.datetime_tuple = self.datetime.map(|dt| (dt.year(), dt.month().into(), dt.day())); @@ -333,6 +334,36 @@ date: 2002-10-02T15:00:00.123456Z assert_eq!(res.datetime.unwrap(), datetime!(2002 - 10 - 02 15:00:00.123456 UTC)); } + #[test_case(&RawFrontMatter::Yaml(r#" +title: Hello +description: hey there +date: 2001-12-15T02:59:43.1Z +"#); "canonical")] + #[test_case(&RawFrontMatter::Yaml(r#" +title: Hello +description: hey there +date: 2001-12-14t21:59:43.10-05:00 +"#); "iso8601")] + #[test_case(&RawFrontMatter::Yaml(r#" +title: Hello +description: hey there +date: 2001-12-14 21:59:43.10 -5 +"#); "space separated")] + #[test_case(&RawFrontMatter::Yaml(r#" +title: Hello +description: hey there +date: 2001-12-15 2:59:43.10 +"#); "no time zone")] + #[test_case(&RawFrontMatter::Yaml(r#" +title: Hello +description: hey there +date: 2001-12-15 +"#); "date only")] + fn can_parse_yaml_dates(content: &RawFrontMatter) { + let res = PageFrontMatter::parse(content).unwrap(); + assert!(res.datetime.is_some()); + } + #[test_case(&RawFrontMatter::Toml(r#" title = "Hello" description = "hey there" diff --git a/components/utils/Cargo.toml b/components/utils/Cargo.toml index 65c4513381..73c2f86845 100644 --- a/components/utils/Cargo.toml +++ b/components/utils/Cargo.toml @@ -13,3 +13,4 @@ libs = { path = "../libs" } [dev-dependencies] tempfile = "3" +time = { version = "0.3", features = ["macros"] } diff --git a/components/utils/src/de.rs b/components/utils/src/de.rs index 983b31d9da..c257fd0e04 100644 --- a/components/utils/src/de.rs +++ b/components/utils/src/de.rs @@ -1,12 +1,83 @@ +use core::convert::TryFrom; +use errors::{anyhow, Result}; +use libs::regex::Regex; use libs::tera::{Map, Value}; +use libs::time; +use libs::time::format_description::well_known::Rfc3339; use libs::toml; use serde::{Deserialize, Deserializer}; +pub fn parse_yaml_datetime(date_string: &str) -> Result { + // See https://github.com/getzola/zola/issues/2071#issuecomment-1530610650 + let re = Regex::new(r#"^"?([0-9]{4})-([0-9][0-9]?)-([0-9][0-9]?)([Tt]|[ \t]+)([0-9][0-9]?):([0-9]{2}):([0-9]{2})\.([0-9]*)?Z?([ \t]([-+][0-9][0-9]?)(:([0-9][0-9]?))?Z?|([-+][0-9]{2})?:([0-9]{2})?)?|([0-9]{4})-([0-9]{2})-([0-9]{2})"?$"#).unwrap(); + let captures = if let Some(captures_) = re.captures(date_string) { + Ok(captures_) + } else { + Err(anyhow!("Error parsing YAML datetime")) + }?; + let year = + if let Some(cap) = captures.get(1) { cap } else { captures.get(15).unwrap() }.as_str(); + let month = + if let Some(cap) = captures.get(2) { cap } else { captures.get(16).unwrap() }.as_str(); + let day = + if let Some(cap) = captures.get(3) { cap } else { captures.get(17).unwrap() }.as_str(); + let hours = if let Some(hours_) = captures.get(5) { hours_.as_str() } else { "0" }; + let minutes = if let Some(minutes_) = captures.get(6) { minutes_.as_str() } else { "0" }; + let seconds = if let Some(seconds_) = captures.get(7) { seconds_.as_str() } else { "0" }; + let fractional_seconds_raw = + if let Some(fractionals) = captures.get(8) { fractionals.as_str() } else { "" }; + let fractional_seconds_intermediate = fractional_seconds_raw.trim_end_matches("0"); + // + // Prepare for eventual conversion into nanoseconds + let fractional_seconds = if fractional_seconds_intermediate.len() > 0 + && fractional_seconds_intermediate.len() <= 9 + { + fractional_seconds_intermediate + } else { + "0" + }; + let maybe_timezone_hour_1 = captures.get(10); + let maybe_timezone_minute_1 = captures.get(12); + let maybe_timezone_hour_2 = captures.get(13); + let maybe_timezone_minute_2 = captures.get(14); + let maybe_timezone_hour; + let maybe_timezone_minute; + if maybe_timezone_hour_2.is_some() { + maybe_timezone_hour = maybe_timezone_hour_2; + maybe_timezone_minute = maybe_timezone_minute_2; + } else { + maybe_timezone_hour = maybe_timezone_hour_1; + maybe_timezone_minute = maybe_timezone_minute_1; + } + + let mut offset_datetime = time::OffsetDateTime::UNIX_EPOCH; + + if let Some(hour) = maybe_timezone_hour { + let minute_str = + if let Some(minute_) = maybe_timezone_minute { minute_.as_str() } else { "0" }; + offset_datetime = offset_datetime.to_offset(time::UtcOffset::from_hms( + hour.as_str().parse()?, + minute_str.parse()?, + 0, + )?); + } + + // Free parse unwraps since we know everything is a digit courtesy of prior regex. + Ok(offset_datetime + .replace_year(year.parse().unwrap())? + .replace_month(time::Month::try_from(month.parse::().unwrap())?)? + .replace_day(day.parse().unwrap())? + .replace_hour(hours.parse().unwrap())? + .replace_minute(minutes.parse().unwrap())? + .replace_second(seconds.parse().unwrap())? + .replace_nanosecond(fractional_seconds.parse::().unwrap() * 100_000_000)?) +} + /// Used as an attribute when we want to convert from TOML to a string date /// If a TOML datetime isn't present, it will accept a string and push it through /// TOML's date time parser to ensure only valid dates are accepted. /// Inspired by this proposal: -pub fn from_toml_datetime<'de, D>(deserializer: D) -> Result, D::Error> +pub fn from_unknown_datetime<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { @@ -22,10 +93,19 @@ where match MaybeDatetime::deserialize(deserializer)? { MaybeDatetime::Datetime(d) => Ok(Some(d.to_string())), - MaybeDatetime::String(s) => match toml::value::Datetime::from_str(&s) { - Ok(d) => Ok(Some(d.to_string())), - Err(e) => Err(D::Error::custom(e)), - }, + MaybeDatetime::String(s) => { + if let Ok(d) = toml::value::Datetime::from_str(&s) { + Ok(Some(d.to_string())) + } else if let Ok(d) = parse_yaml_datetime(&s) { + // Ensure that the resulting string is easily reparseable down the line. + // In content::front_matter::page.rs where these strings are currently used, + // Rfc3339 works with the explicit demands in that code but not always with the result of + // _to_string. + Ok(Some(d.format(&Rfc3339).unwrap())) + } else { + Err(D::Error::custom("Unable to parse datetime")) + } + } } } @@ -80,3 +160,62 @@ pub fn fix_toml_dates(table: Map) -> Value { Value::Object(new) } + +#[cfg(test)] +mod tests { + use super::parse_yaml_datetime; + use time::macros::datetime; + + #[test] + fn yaml_spec_examples_pass() { + let canonical = "2001-12-15T02:59:43.1Z"; + let valid_iso8601 = "2001-12-14t21:59:43.10-05:00"; + let space_separated = "2001-12-14 21:59:43.10 -5"; + let no_time_zone = "2001-12-15 2:59:43.10"; + let date = "2002-12-14"; + assert_eq!(parse_yaml_datetime(canonical).unwrap(), datetime!(2001-12-15 2:59:43.1 +0)); + assert_eq!( + parse_yaml_datetime(valid_iso8601).unwrap(), + datetime!(2001-12-14 21:59:43.1 -5) + ); + assert_eq!( + parse_yaml_datetime(space_separated).unwrap(), + datetime!(2001-12-14 21:59:43.1 -5) + ); + assert_eq!(parse_yaml_datetime(no_time_zone).unwrap(), datetime!(2001-12-15 2:59:43.1 +0)); + assert_eq!(parse_yaml_datetime(date).unwrap(), datetime!(2002-12-14 0:00:00 +0)); + } + + #[test] + fn yaml_spec_invalid_dates_fail() { + let invalid_month = "2001-13-15"; + assert!(parse_yaml_datetime(invalid_month).is_err()); + + let invalid_month = "2001-13-15T02:59:43.1Z"; + assert!(parse_yaml_datetime(invalid_month).is_err()); + + let no_digits_in_year = "xxxx-12-15"; + assert!(parse_yaml_datetime(no_digits_in_year).is_err()); + + let no_digits_in_year = "xxxx-12-15T02:59:43.1Z"; + assert!(parse_yaml_datetime(no_digits_in_year).is_err()); + + let no_digits_in_month = "2001-xx-15"; + assert!(parse_yaml_datetime(no_digits_in_month).is_err()); + + let no_digits_in_month = "2001-xx-15T02:59:43.1Z"; + assert!(parse_yaml_datetime(no_digits_in_month).is_err()); + + let no_digits_in_day = "2001-12-xx"; + assert!(parse_yaml_datetime(no_digits_in_day).is_err()); + + let no_digits_in_day = "2001-12-xx:59:43.1Z"; + assert!(parse_yaml_datetime(no_digits_in_day).is_err()); + + let unparseable_time = "2001-12-15:69:43.1Z"; + assert!(parse_yaml_datetime(unparseable_time).is_err()); + + let unparseable_time = "2001-12-15:59:4x.1Z"; + assert!(parse_yaml_datetime(unparseable_time).is_err()); + } +}