diff --git a/benches/parse_language_id.rs b/benches/parse_language_id.rs index 9fc1dba..f3e3b3b 100644 --- a/benches/parse_language_id.rs +++ b/benches/parse_language_id.rs @@ -2,40 +2,40 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use unicode_locale_parser::parse_language_id; fn language_identifier_parser_bench(c: &mut Criterion) { - let strings = [ - "en-US", - "en-GB", - "es-AR", - "it", - "zh-Hans-CN", - "de-AT", - "pl", - "fr-FR", - "de-AT", - "sr-Cyrl-SR", - "nb-NO", - "fr-FR", - "mk", - "uk", - "en-US", - "en-GB", - "es-AR", - "th", - "de", - "zh-Cyrl-HN", - "en-Latn-US", - ]; + let strings = [ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", + ]; - c.bench_function( - "unicode_locale_id_parser::lang:::parse_unicode_language_id", - |b| { - b.iter(|| { - for s in strings { - let _ = parse_language_id(black_box(s)); - } - }) - }, - ); + c.bench_function( + "unicode_locale_id_parser::lang:::parse_unicode_language_id", + |b| { + b.iter(|| { + for s in strings { + let _ = parse_language_id(black_box(s)); + } + }) + }, + ); } criterion_group!(benches, language_identifier_parser_bench); diff --git a/benches/parse_locale_id.rs b/benches/parse_locale_id.rs index 7fd1fcd..e0b416f 100644 --- a/benches/parse_locale_id.rs +++ b/benches/parse_locale_id.rs @@ -2,40 +2,40 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use unicode_locale_parser::parse_locale_id; fn locale_identifier_parser_bench(c: &mut Criterion) { - let strings = [ - "en-US", - "en-GB", - "es-AR", - "it", - "zh-Hans-CN", - "de-AT", - "pl", - "fr-FR", - "de-AT", - "sr-Cyrl-SR", - "nb-NO", - "fr-FR", - "mk", - "uk", - "en-US", - "en-GB", - "es-AR", - "th", - "de", - "zh-Cyrl-HN", - "en-Latn-US", - ]; + let strings = [ + "en-US", + "en-GB", + "es-AR", + "it", + "zh-Hans-CN", + "de-AT", + "pl", + "fr-FR", + "de-AT", + "sr-Cyrl-SR", + "nb-NO", + "fr-FR", + "mk", + "uk", + "en-US", + "en-GB", + "es-AR", + "th", + "de", + "zh-Cyrl-HN", + "en-Latn-US", + ]; - c.bench_function( - "unicode_locale_id_parser::locale::parse_unicode_locale_id", - |b| { - b.iter(|| { - for s in strings { - let _ = parse_locale_id(black_box(s)); - } - }) - }, - ); + c.bench_function( + "unicode_locale_id_parser::locale::parse_unicode_locale_id", + |b| { + b.iter(|| { + for s in strings { + let _ = parse_locale_id(black_box(s)); + } + }) + }, + ); } criterion_group!(benches, locale_identifier_parser_bench); diff --git a/examples/parse.rs b/examples/parse.rs index 1cd6e73..8e8da13 100644 --- a/examples/parse.rs +++ b/examples/parse.rs @@ -1,11 +1,11 @@ use unicode_locale_parser::parse_locale_id; fn main() { - // simple language - let locale = parse_locale_id("ja-JP"); - println!("{:#?}", locale); + // simple language + let locale = parse_locale_id("ja-JP"); + println!("{:#?}", locale); - // language & unicode locale extension - let locale = parse_locale_id("de-Latn-DE-u-ca-buddhist"); - println!("{:#?}", locale); + // language & unicode locale extension + let locale = parse_locale_id("de-Latn-DE-u-ca-buddhist"); + println!("{:#?}", locale); } diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..6f2e075 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +tab_spaces = 2 \ No newline at end of file diff --git a/src/errors.rs b/src/errors.rs index 84f2d5f..9e20165 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -4,32 +4,32 @@ use std::fmt::{Display, Formatter, Result}; /// Enum representing the possible errors that can occur when parsing [Unicode UTS #35 Language and Locale Identifiers](https://unicode.org/reports/tr35/#Identifiers). #[derive(Debug, PartialEq)] pub enum ParserError { - /// A missing identifier error. - Missing, - /// An invalid language identifier error. - InvalidLanguage, - /// An invalid subtag error. - InvalidSubtag, - /// An invalid unicode extensions error. - InvalidExtension, - /// An invalid unicode subdivision error. - InvalidSubdivision, - /// An unexpected error. - Unexpected, + /// A missing identifier error. + Missing, + /// An invalid language identifier error. + InvalidLanguage, + /// An invalid subtag error. + InvalidSubtag, + /// An invalid unicode extensions error. + InvalidExtension, + /// An invalid unicode subdivision error. + InvalidSubdivision, + /// An unexpected error. + Unexpected, } impl Error for ParserError {} impl Display for ParserError { - fn fmt(&self, f: &mut Formatter) -> Result { - let value = match self { - ParserError::Missing => "Missing identifier", - ParserError::InvalidLanguage => "Invalid language identifier", - ParserError::InvalidSubtag => "Invalid subtag", - ParserError::InvalidExtension => "Invalid extension", - ParserError::InvalidSubdivision => "Invalid subdivision", - ParserError::Unexpected => "Unexpected error", - }; - f.write_str(value) - } + fn fmt(&self, f: &mut Formatter) -> Result { + let value = match self { + ParserError::Missing => "Missing identifier", + ParserError::InvalidLanguage => "Invalid language identifier", + ParserError::InvalidSubtag => "Invalid subtag", + ParserError::InvalidExtension => "Invalid extension", + ParserError::InvalidSubdivision => "Invalid subdivision", + ParserError::Unexpected => "Unexpected error", + }; + f.write_str(value) + } } diff --git a/src/extensions.rs b/src/extensions.rs index 6630476..cbbac60 100644 --- a/src/extensions.rs +++ b/src/extensions.rs @@ -17,144 +17,144 @@ use std::iter::Peekable; #[derive(Debug, PartialEq)] enum ExtensionKind { - UnicodeLocale, - Transformed, - Pu, - Other(char), + UnicodeLocale, + Transformed, + Pu, + Other(char), } impl ExtensionKind { - fn from_byte(key: u8) -> Result { - let key = key.to_ascii_lowercase(); - match key { - b'u' => Ok(ExtensionKind::UnicodeLocale), - b't' => Ok(ExtensionKind::Transformed), - b'x' => Ok(ExtensionKind::Pu), - other if other.is_ascii_alphanumeric() => Ok(ExtensionKind::Other(char::from(other))), - _ => Err(ParserError::InvalidExtension), - } + fn from_byte(key: u8) -> Result { + let key = key.to_ascii_lowercase(); + match key { + b'u' => Ok(ExtensionKind::UnicodeLocale), + b't' => Ok(ExtensionKind::Transformed), + b'x' => Ok(ExtensionKind::Pu), + other if other.is_ascii_alphanumeric() => Ok(ExtensionKind::Other(char::from(other))), + _ => Err(ParserError::InvalidExtension), } + } } impl fmt::Display for ExtensionKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let c = match self { - ExtensionKind::UnicodeLocale => 'u', - ExtensionKind::Transformed => 't', - ExtensionKind::Pu => 'x', - ExtensionKind::Other(c) => *c, - }; - f.write_char(c) - } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let c = match self { + ExtensionKind::UnicodeLocale => 'u', + ExtensionKind::Transformed => 't', + ExtensionKind::Pu => 'x', + ExtensionKind::Other(c) => *c, + }; + f.write_char(c) + } } #[derive(Debug)] pub struct Extensions { - pub unicode_locale: Option>, - pub transformed: Option>, - pub other: Option>, - pub pu: Option, + pub unicode_locale: Option>, + pub transformed: Option>, + pub other: Option>, + pub pu: Option, } impl fmt::Display for Extensions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut messages = vec![]; - if let Some(unicode_locale) = &self.unicode_locale { - for u in unicode_locale { - messages.push(format!("{}", u)); - } - } - if let Some(transformed) = &self.transformed { - for t in transformed { - messages.push(format!("{}", t)); - } - } - if let Some(other) = &self.other { - for o in other { - messages.push(format!("{}", o)); - } - } - if let Some(pu) = &self.pu { - messages.push(format!("{}", pu)); - } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut messages = vec![]; + if let Some(unicode_locale) = &self.unicode_locale { + for u in unicode_locale { + messages.push(format!("{}", u)); + } + } + if let Some(transformed) = &self.transformed { + for t in transformed { + messages.push(format!("{}", t)); + } + } + if let Some(other) = &self.other { + for o in other { + messages.push(format!("{}", o)); + } + } + if let Some(pu) = &self.pu { + messages.push(format!("{}", pu)); + } - if !messages.is_empty() { - f.write_str(&messages.join(&SEP.to_string()))?; - } - Ok(()) + if !messages.is_empty() { + f.write_str(&messages.join(&SEP.to_string()))?; } + Ok(()) + } } #[allow(dead_code)] pub fn parse_extensions(chunk: &str) -> Result { - // check empty - if chunk.is_empty() { - return Err(ParserError::Missing); - } + // check empty + if chunk.is_empty() { + return Err(ParserError::Missing); + } - parse_extensions_from_iter(&mut split_str(chunk).peekable()) + parse_extensions_from_iter(&mut split_str(chunk).peekable()) } pub fn parse_extensions_from_iter<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - let mut unicode_locale = vec![]; - let mut transformed = vec![]; - let mut other = vec![]; - let mut pu = None; + let mut unicode_locale = vec![]; + let mut transformed = vec![]; + let mut other = vec![]; + let mut pu = None; - let mut chunk = iter.next(); - while let Some(subtag) = chunk { - match subtag - .as_bytes() - .first() - .map(|c| ExtensionKind::from_byte(*c)) - { - Some(Ok(ExtensionKind::UnicodeLocale)) => { - unicode_locale.push(parse_unicode_locale_extensions(iter)?); - } - Some(Ok(ExtensionKind::Transformed)) => { - transformed.push(parse_transformed_extensions(iter)?); - } - Some(Ok(ExtensionKind::Pu)) => { - if pu.is_some() { - return Err(ParserError::Unexpected); - } - pu = Some(parse_pu_extensions(iter)?); - } - Some(Ok(ExtensionKind::Other(c))) => { - other.push(parse_other_extensions(iter, c)?); - } - None => {} - _ => unreachable!(), + let mut chunk = iter.next(); + while let Some(subtag) = chunk { + match subtag + .as_bytes() + .first() + .map(|c| ExtensionKind::from_byte(*c)) + { + Some(Ok(ExtensionKind::UnicodeLocale)) => { + unicode_locale.push(parse_unicode_locale_extensions(iter)?); + } + Some(Ok(ExtensionKind::Transformed)) => { + transformed.push(parse_transformed_extensions(iter)?); + } + Some(Ok(ExtensionKind::Pu)) => { + if pu.is_some() { + return Err(ParserError::Unexpected); } - - chunk = iter.next(); + pu = Some(parse_pu_extensions(iter)?); + } + Some(Ok(ExtensionKind::Other(c))) => { + other.push(parse_other_extensions(iter, c)?); + } + None => {} + _ => unreachable!(), } - // normalize unicode locale extensions - let unicode_locale = if unicode_locale.is_empty() { - None - } else { - Some(unicode_locale) - }; + chunk = iter.next(); + } - // normalize transformed extensions - let transformed = if transformed.is_empty() { - None - } else { - Some(transformed) - }; + // normalize unicode locale extensions + let unicode_locale = if unicode_locale.is_empty() { + None + } else { + Some(unicode_locale) + }; + + // normalize transformed extensions + let transformed = if transformed.is_empty() { + None + } else { + Some(transformed) + }; - // normalize other extensions - let other = if other.is_empty() { None } else { Some(other) }; + // normalize other extensions + let other = if other.is_empty() { None } else { Some(other) }; - Ok(Extensions { - unicode_locale, - transformed, - pu, - other, - }) + Ok(Extensions { + unicode_locale, + transformed, + pu, + other, + }) } /** @@ -163,52 +163,51 @@ pub fn parse_extensions_from_iter<'a>( #[test] fn success_parse_extensions() { - // basic - let extensions = parse_extensions( + // basic + let extensions = + parse_extensions("U-attr1-kz-value2-t-en-Latn-US-macos-t1-value1-value2-a-vue-rust-x-foo-123") + .unwrap(); + let unicode_locale = extensions.unicode_locale.unwrap(); + assert_eq!( + ["u-attr1-kz-value2"], + unicode_locale + .iter() + .map(|u| format!("{}", u)) + .collect::>() + .as_slice() + ); + let transformed = extensions.transformed.unwrap(); + assert_eq!( + ["t-en-Latn-US-macos-t1-value1-value2"], + transformed + .iter() + .map(|t| format!("{}", t)) + .collect::>() + .as_slice() + ); + let other = extensions.other.unwrap(); + assert_eq!( + ["a-vue-rust"], + other + .iter() + .map(|o| format!("{}", o)) + .collect::>() + .as_slice() + ); + let pu = extensions.pu.unwrap(); + assert_eq!("x-foo-123", format!("{}", pu)); + + // Display trait implementation + assert_eq!( + "u-attr1-kz-value2-t-en-Latn-US-macos-t1-value1-value2-a-vue-rust-x-foo-123", + format!( + "{}", + parse_extensions( "U-attr1-kz-value2-t-en-Latn-US-macos-t1-value1-value2-a-vue-rust-x-foo-123", + ) + .unwrap() ) - .unwrap(); - let unicode_locale = extensions.unicode_locale.unwrap(); - assert_eq!( - ["u-attr1-kz-value2"], - unicode_locale - .iter() - .map(|u| format!("{}", u)) - .collect::>() - .as_slice() - ); - let transformed = extensions.transformed.unwrap(); - assert_eq!( - ["t-en-Latn-US-macos-t1-value1-value2"], - transformed - .iter() - .map(|t| format!("{}", t)) - .collect::>() - .as_slice() - ); - let other = extensions.other.unwrap(); - assert_eq!( - ["a-vue-rust"], - other - .iter() - .map(|o| format!("{}", o)) - .collect::>() - .as_slice() - ); - let pu = extensions.pu.unwrap(); - assert_eq!("x-foo-123", format!("{}", pu)); - - // Display trait implementation - assert_eq!( - "u-attr1-kz-value2-t-en-Latn-US-macos-t1-value1-value2-a-vue-rust-x-foo-123", - format!( - "{}", - parse_extensions( - "U-attr1-kz-value2-t-en-Latn-US-macos-t1-value1-value2-a-vue-rust-x-foo-123", - ) - .unwrap() - ) - ); + ); } /* @@ -217,33 +216,33 @@ fn success_parse_extensions() { #[test] fn fail_parse_unicode_extensions() { - // missing locale - assert_eq!(ParserError::Missing, parse_extensions("").unwrap_err()); + // missing locale + assert_eq!(ParserError::Missing, parse_extensions("").unwrap_err()); } #[test] fn success_extension_kind_from_byte() { - assert_eq!( - ExtensionKind::UnicodeLocale, - ExtensionKind::from_byte(b'u').unwrap() - ); - assert_eq!( - ExtensionKind::Transformed, - ExtensionKind::from_byte(b't').unwrap() - ); - assert_eq!( - ExtensionKind::Transformed, - ExtensionKind::from_byte(b'T').unwrap() - ); - assert_eq!(ExtensionKind::Pu, ExtensionKind::from_byte(b'x').unwrap()); - assert_eq!( - ExtensionKind::Other('a'), - ExtensionKind::from_byte(b'a').unwrap() - ); - assert_eq!( - ExtensionKind::Other('1'), - ExtensionKind::from_byte(b'1').unwrap() - ); + assert_eq!( + ExtensionKind::UnicodeLocale, + ExtensionKind::from_byte(b'u').unwrap() + ); + assert_eq!( + ExtensionKind::Transformed, + ExtensionKind::from_byte(b't').unwrap() + ); + assert_eq!( + ExtensionKind::Transformed, + ExtensionKind::from_byte(b'T').unwrap() + ); + assert_eq!(ExtensionKind::Pu, ExtensionKind::from_byte(b'x').unwrap()); + assert_eq!( + ExtensionKind::Other('a'), + ExtensionKind::from_byte(b'a').unwrap() + ); + assert_eq!( + ExtensionKind::Other('1'), + ExtensionKind::from_byte(b'1').unwrap() + ); } /** @@ -252,20 +251,20 @@ fn success_extension_kind_from_byte() { #[test] fn fail_extension_kind_from_byte() { - assert_eq!( - ParserError::InvalidExtension, - ExtensionKind::from_byte(b'!').unwrap_err() - ); - assert_eq!( - ParserError::InvalidExtension, - ExtensionKind::from_byte(b' ').unwrap_err() - ); + assert_eq!( + ParserError::InvalidExtension, + ExtensionKind::from_byte(b'!').unwrap_err() + ); + assert_eq!( + ParserError::InvalidExtension, + ExtensionKind::from_byte(b' ').unwrap_err() + ); } #[test] fn extention_kind_display() { - assert_eq!("u", format!("{}", ExtensionKind::UnicodeLocale)); - assert_eq!("t", format!("{}", ExtensionKind::Transformed)); - assert_eq!("x", format!("{}", ExtensionKind::Pu)); - assert_eq!("a", format!("{}", ExtensionKind::Other('a'))); + assert_eq!("u", format!("{}", ExtensionKind::UnicodeLocale)); + assert_eq!("t", format!("{}", ExtensionKind::Transformed)); + assert_eq!("x", format!("{}", ExtensionKind::Pu)); + assert_eq!("a", format!("{}", ExtensionKind::Other('a'))); } diff --git a/src/extensions/other.rs b/src/extensions/other.rs index 67f8585..378abb5 100644 --- a/src/extensions/other.rs +++ b/src/extensions/other.rs @@ -6,51 +6,51 @@ use std::iter::Peekable; #[derive(Debug, PartialEq)] pub struct OtherExtensions { - pub values: Vec, - pub extension: char, + pub values: Vec, + pub extension: char, } impl fmt::Display for OtherExtensions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_char(self.extension)?; - for value in &self.values { - f.write_char(SEP)?; - f.write_str(value)?; - } - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_char(self.extension)?; + for value in &self.values { + f.write_char(SEP)?; + f.write_str(value)?; } + Ok(()) + } } pub fn parse_other_extensions<'a>( - iter: &mut Peekable>, - extension: char, + iter: &mut Peekable>, + extension: char, ) -> Result { - // other_extensions - // https://www.unicode.org/reports/tr35/tr35-71/tr35.html#other_extensions - let mut values = vec![]; + // other_extensions + // https://www.unicode.org/reports/tr35/tr35-71/tr35.html#other_extensions + let mut values = vec![]; - while let Some(subtag) = iter.peek() { - if subtag.len() == 1 { - break; - } else { - values.push(String::from(parse_value(subtag)?)); - iter.next(); - } + while let Some(subtag) = iter.peek() { + if subtag.len() == 1 { + break; + } else { + values.push(String::from(parse_value(subtag)?)); + iter.next(); } + } - Ok(OtherExtensions { values, extension }) + Ok(OtherExtensions { values, extension }) } fn is_other_value_subtag(subtag: &[u8]) -> bool { - (2..=8).contains(&subtag.len()) && subtag.iter().all(|c| c.is_ascii_alphanumeric()) + (2..=8).contains(&subtag.len()) && subtag.iter().all(|c| c.is_ascii_alphanumeric()) } fn parse_value(subtag: &str) -> Result<&str, ParserError> { - if !is_other_value_subtag(subtag.as_bytes()) { - Err(ParserError::InvalidSubtag) - } else { - Ok(subtag) - } + if !is_other_value_subtag(subtag.as_bytes()) { + Err(ParserError::InvalidSubtag) + } else { + Ok(subtag) + } } /** @@ -62,27 +62,27 @@ use crate::shared::split_str; #[test] fn success_other_extensions() { - // full case - let mut iter = split_str("abc-123").peekable(); - assert_eq!( - vec!["abc", "123"], - parse_other_extensions(&mut iter, 'a').unwrap().values - ); + // full case + let mut iter = split_str("abc-123").peekable(); + assert_eq!( + vec!["abc", "123"], + parse_other_extensions(&mut iter, 'a').unwrap().values + ); - // Display trait implementation - let mut iter = split_str("abc-123").peekable(); - assert_eq!( - "b-abc-123", - format!("{}", parse_other_extensions(&mut iter, 'b').unwrap()) - ); + // Display trait implementation + let mut iter = split_str("abc-123").peekable(); + assert_eq!( + "b-abc-123", + format!("{}", parse_other_extensions(&mut iter, 'b').unwrap()) + ); } #[test] fn fail_pu_extensions() { - // invalid subtag - let mut iter = split_str("abc-123456789").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_other_extensions(&mut iter, '1').unwrap_err() - ); + // invalid subtag + let mut iter = split_str("abc-123456789").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_other_extensions(&mut iter, '1').unwrap_err() + ); } diff --git a/src/extensions/pu.rs b/src/extensions/pu.rs index 5cfa245..0720677 100644 --- a/src/extensions/pu.rs +++ b/src/extensions/pu.rs @@ -7,44 +7,44 @@ use std::iter::Peekable; #[derive(Debug)] pub struct PuExtensions { - pub values: Vec, + pub values: Vec, } impl fmt::Display for PuExtensions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", ExtensionKind::Pu)?; - for value in &self.values { - f.write_char(SEP)?; - f.write_str(value)?; - } - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", ExtensionKind::Pu)?; + for value in &self.values { + f.write_char(SEP)?; + f.write_str(value)?; } + Ok(()) + } } pub fn parse_pu_extensions<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - // pu_extensions - // https://www.unicode.org/reports/tr35/tr35-71/tr35.html#pu_extensions - let mut values = vec![]; + // pu_extensions + // https://www.unicode.org/reports/tr35/tr35-71/tr35.html#pu_extensions + let mut values = vec![]; - for subtag in iter { - values.push(String::from(parse_value(subtag)?)); - } + for subtag in iter { + values.push(String::from(parse_value(subtag)?)); + } - Ok(PuExtensions { values }) + Ok(PuExtensions { values }) } fn is_pu_value_subtag(subtag: &[u8]) -> bool { - (1..=8).contains(&subtag.len()) && subtag.iter().all(|c| c.is_ascii_alphanumeric()) + (1..=8).contains(&subtag.len()) && subtag.iter().all(|c| c.is_ascii_alphanumeric()) } fn parse_value(subtag: &str) -> Result<&str, ParserError> { - if !is_pu_value_subtag(subtag.as_bytes()) { - Err(ParserError::InvalidSubtag) - } else { - Ok(subtag) - } + if !is_pu_value_subtag(subtag.as_bytes()) { + Err(ParserError::InvalidSubtag) + } else { + Ok(subtag) + } } /** @@ -56,27 +56,27 @@ use crate::shared::split_str; #[test] fn success_pu_extensions() { - // full case - let mut iter = split_str("abc-123").peekable(); - assert_eq!( - vec!["abc", "123"], - parse_pu_extensions(&mut iter).unwrap().values - ); + // full case + let mut iter = split_str("abc-123").peekable(); + assert_eq!( + vec!["abc", "123"], + parse_pu_extensions(&mut iter).unwrap().values + ); - // Display trait implementation - let mut iter = split_str("abc-123").peekable(); - assert_eq!( - "x-abc-123", - format!("{}", parse_pu_extensions(&mut iter).unwrap()) - ); + // Display trait implementation + let mut iter = split_str("abc-123").peekable(); + assert_eq!( + "x-abc-123", + format!("{}", parse_pu_extensions(&mut iter).unwrap()) + ); } #[test] fn fail_pu_extensions() { - // invalid subtag - let mut iter = split_str("abc-123456789").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_pu_extensions(&mut iter).unwrap_err() - ); + // invalid subtag + let mut iter = split_str("abc-123456789").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_pu_extensions(&mut iter).unwrap_err() + ); } diff --git a/src/extensions/transformed.rs b/src/extensions/transformed.rs index 6eadd41..b5eabcc 100644 --- a/src/extensions/transformed.rs +++ b/src/extensions/transformed.rs @@ -10,89 +10,87 @@ use std::iter::Peekable; #[derive(Debug)] pub struct TransformedExtensions { - pub tlang: Option, - pub tfield: BTreeMap>, + pub tlang: Option, + pub tfield: BTreeMap>, } impl fmt::Display for TransformedExtensions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", ExtensionKind::Transformed)?; - if let Some(tlang) = &self.tlang { - f.write_char(SEP)?; - write!(f, "{}", tlang)?; - } - for (key, values) in &self.tfield { - f.write_char(SEP)?; - f.write_str(key)?; - for value in values { - f.write_char(SEP)?; - f.write_str(value)?; - } - } - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", ExtensionKind::Transformed)?; + if let Some(tlang) = &self.tlang { + f.write_char(SEP)?; + write!(f, "{}", tlang)?; + } + for (key, values) in &self.tfield { + f.write_char(SEP)?; + f.write_str(key)?; + for value in values { + f.write_char(SEP)?; + f.write_str(value)?; + } } + Ok(()) + } } pub fn parse_transformed_extensions<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - // transformed_extensions - // https://unicode.org/reports/tr35/#transformed_extensions - let mut tlang = None; - let mut tfield = BTreeMap::new(); - let mut tkey: Option = None; - let mut tvalue: Vec = vec![]; - - while let Some(subtag) = iter.peek() { - let subtag_bytes = subtag.as_bytes(); - let len = subtag_bytes.len(); - if len == 1 { - break; - } else if len == 2 - && subtag_bytes[0].is_ascii_alphabetic() - && subtag_bytes[1].is_ascii_digit() - { - // for tkey - if let Some(tkey) = tkey { - if !tfield.contains_key(&tkey) { - tfield.insert(tkey.clone(), vec![]); - } - let values = tfield.get_mut(&tkey).unwrap(); - for value in tvalue { - values.push(value); - } - tvalue = vec![]; - } - tkey = Some(subtag.to_string()); - iter.next(); - } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) { - // for tvalue - if tkey.is_none() { - return Err(ParserError::InvalidSubtag); - } - tvalue.push(subtag.to_string()); - iter.next(); - } else if is_language_subtag(subtag_bytes) { - tlang = Some(parse_unicode_language_id_from_iter(iter)?); - } else { - return Err(ParserError::InvalidSubtag); - } - } - - if let Some(tkey) = tkey { - if tvalue.is_empty() { - return Err(ParserError::InvalidSubtag); - } + // transformed_extensions + // https://unicode.org/reports/tr35/#transformed_extensions + let mut tlang = None; + let mut tfield = BTreeMap::new(); + let mut tkey: Option = None; + let mut tvalue: Vec = vec![]; + + while let Some(subtag) = iter.peek() { + let subtag_bytes = subtag.as_bytes(); + let len = subtag_bytes.len(); + if len == 1 { + break; + } else if len == 2 && subtag_bytes[0].is_ascii_alphabetic() && subtag_bytes[1].is_ascii_digit() + { + // for tkey + if let Some(tkey) = tkey { if !tfield.contains_key(&tkey) { - tfield.insert(tkey.clone(), vec![]); + tfield.insert(tkey.clone(), vec![]); } let values = tfield.get_mut(&tkey).unwrap(); for value in tvalue { - values.push(value); + values.push(value); } + tvalue = vec![]; + } + tkey = Some(subtag.to_string()); + iter.next(); + } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) { + // for tvalue + if tkey.is_none() { + return Err(ParserError::InvalidSubtag); + } + tvalue.push(subtag.to_string()); + iter.next(); + } else if is_language_subtag(subtag_bytes) { + tlang = Some(parse_unicode_language_id_from_iter(iter)?); + } else { + return Err(ParserError::InvalidSubtag); + } + } + + if let Some(tkey) = tkey { + if tvalue.is_empty() { + return Err(ParserError::InvalidSubtag); + } + if !tfield.contains_key(&tkey) { + tfield.insert(tkey.clone(), vec![]); + } + let values = tfield.get_mut(&tkey).unwrap(); + for value in tvalue { + values.push(value); } + } - Ok(TransformedExtensions { tlang, tfield }) + Ok(TransformedExtensions { tlang, tfield }) } /* @@ -104,55 +102,55 @@ use crate::shared::split_str; #[test] fn success_transformed_extensions() { - // basic case - let mut iter = split_str("en-US-a1-foo").peekable(); - assert_eq!( - "t-en-US-a1-foo", - format!("{}", parse_transformed_extensions(&mut iter).unwrap()) - ); - - // no tlang - let mut iter = split_str("a1-foo").peekable(); - assert_eq!( - "t-a1-foo", - format!("{}", parse_transformed_extensions(&mut iter).unwrap()) - ); - - // tvalue multiple - let mut iter = split_str("en-a1-foo-b1-bar").peekable(); - assert_eq!( - "t-en-a1-foo-b1-bar", - format!("{}", parse_transformed_extensions(&mut iter).unwrap()) - ); - - // tlang only - let mut iter = split_str("en-Latn-US-macos").peekable(); - assert_eq!( - "t-en-Latn-US-macos", - format!("{}", parse_transformed_extensions(&mut iter).unwrap()) - ); + // basic case + let mut iter = split_str("en-US-a1-foo").peekable(); + assert_eq!( + "t-en-US-a1-foo", + format!("{}", parse_transformed_extensions(&mut iter).unwrap()) + ); + + // no tlang + let mut iter = split_str("a1-foo").peekable(); + assert_eq!( + "t-a1-foo", + format!("{}", parse_transformed_extensions(&mut iter).unwrap()) + ); + + // tvalue multiple + let mut iter = split_str("en-a1-foo-b1-bar").peekable(); + assert_eq!( + "t-en-a1-foo-b1-bar", + format!("{}", parse_transformed_extensions(&mut iter).unwrap()) + ); + + // tlang only + let mut iter = split_str("en-Latn-US-macos").peekable(); + assert_eq!( + "t-en-Latn-US-macos", + format!("{}", parse_transformed_extensions(&mut iter).unwrap()) + ); } #[test] fn fail_transformed_extensions() { - // invalid tkey - let mut iter = split_str("1a-foo").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_transformed_extensions(&mut iter).unwrap_err() - ); - - // missing tkey - let mut iter = split_str("foo").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_transformed_extensions(&mut iter).unwrap_err() - ); - - // missing tvalue - let mut iter = split_str("a1-foo-b1").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_transformed_extensions(&mut iter).unwrap_err() - ); + // invalid tkey + let mut iter = split_str("1a-foo").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_transformed_extensions(&mut iter).unwrap_err() + ); + + // missing tkey + let mut iter = split_str("foo").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_transformed_extensions(&mut iter).unwrap_err() + ); + + // missing tvalue + let mut iter = split_str("a1-foo-b1").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_transformed_extensions(&mut iter).unwrap_err() + ); } diff --git a/src/extensions/unicode_locale.rs b/src/extensions/unicode_locale.rs index ea64b5a..5925113 100644 --- a/src/extensions/unicode_locale.rs +++ b/src/extensions/unicode_locale.rs @@ -8,87 +8,87 @@ use std::iter::Peekable; #[derive(Debug)] pub struct UnicodeLocaleExtensions { - pub attribute: Vec, - pub ufield: BTreeMap>, + pub attribute: Vec, + pub ufield: BTreeMap>, } impl fmt::Display for UnicodeLocaleExtensions { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", ExtensionKind::UnicodeLocale)?; - for attribute in &self.attribute { - f.write_char(SEP)?; - f.write_str(attribute)?; - } - for (key, values) in &self.ufield { - f.write_char(SEP)?; - f.write_str(key)?; - for value in values { - f.write_char(SEP)?; - f.write_str(value)?; - } - } - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", ExtensionKind::UnicodeLocale)?; + for attribute in &self.attribute { + f.write_char(SEP)?; + f.write_str(attribute)?; + } + for (key, values) in &self.ufield { + f.write_char(SEP)?; + f.write_str(key)?; + for value in values { + f.write_char(SEP)?; + f.write_str(value)?; + } } + Ok(()) + } } pub fn parse_unicode_locale_extensions<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - // unicode_locale_extensions - // https://unicode.org/reports/tr35/#unicode_locale_extensions - - let mut attribute = vec![]; - let mut ufield = BTreeMap::new(); - let mut ukey: Option = None; - let mut uvalue: Vec = vec![]; - - while let Some(subtag) = iter.peek() { - let subtag_bytes = subtag.as_bytes(); - let len = subtag_bytes.len(); - if len == 1 { - break; - } else if len == 2 - && subtag_bytes[0].is_ascii_alphanumeric() - && subtag_bytes[1].is_ascii_alphabetic() - { - // for ukey - if let Some(ukey) = ukey { - if !ufield.contains_key(&ukey) { - ufield.insert(ukey.clone(), vec![]); - } - let values = ufield.get_mut(&ukey).unwrap(); - for value in uvalue { - values.push(value); - } - uvalue = vec![]; - } - ukey = Some(subtag.to_string()); - iter.next(); - } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) { - if ukey.is_some() { - // for uvalue - uvalue.push(subtag.to_string()); - } else { - // for attribute - attribute.push(subtag.to_string()); - } - iter.next(); - } else { - return Err(ParserError::InvalidSubtag); - } - } - - if let Some(ukey) = ukey { + // unicode_locale_extensions + // https://unicode.org/reports/tr35/#unicode_locale_extensions + + let mut attribute = vec![]; + let mut ufield = BTreeMap::new(); + let mut ukey: Option = None; + let mut uvalue: Vec = vec![]; + + while let Some(subtag) = iter.peek() { + let subtag_bytes = subtag.as_bytes(); + let len = subtag_bytes.len(); + if len == 1 { + break; + } else if len == 2 + && subtag_bytes[0].is_ascii_alphanumeric() + && subtag_bytes[1].is_ascii_alphabetic() + { + // for ukey + if let Some(ukey) = ukey { if !ufield.contains_key(&ukey) { - ufield.insert(ukey.clone(), vec![]); + ufield.insert(ukey.clone(), vec![]); } let values = ufield.get_mut(&ukey).unwrap(); for value in uvalue { - values.push(value); + values.push(value); } + uvalue = vec![]; + } + ukey = Some(subtag.to_string()); + iter.next(); + } else if (3..=8).contains(&len) && subtag_bytes.iter().all(|c| c.is_ascii_alphanumeric()) { + if ukey.is_some() { + // for uvalue + uvalue.push(subtag.to_string()); + } else { + // for attribute + attribute.push(subtag.to_string()); + } + iter.next(); + } else { + return Err(ParserError::InvalidSubtag); + } + } + + if let Some(ukey) = ukey { + if !ufield.contains_key(&ukey) { + ufield.insert(ukey.clone(), vec![]); + } + let values = ufield.get_mut(&ukey).unwrap(); + for value in uvalue { + values.push(value); } + } - Ok(UnicodeLocaleExtensions { attribute, ufield }) + Ok(UnicodeLocaleExtensions { attribute, ufield }) } /* @@ -100,60 +100,60 @@ use crate::shared::split_str; #[test] fn success_unicode_locale_extensions() { - // basic case - let mut iter = split_str("attr1-ky-value1").peekable(); - let result = parse_unicode_locale_extensions(&mut iter).unwrap(); - assert_eq!("u-attr1-ky-value1", format!("{}", result)); - - // no attribute - let mut iter = split_str("ky-value1").peekable(); - assert_eq!( - "u-ky-value1", - format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) - ); - - // attribute multiple - let mut iter = split_str("attr1-attr2-ky-value1").peekable(); - assert_eq!( - "u-attr1-attr2-ky-value1", - format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) - ); - - // uvalue multiple - let mut iter = split_str("ky-value1-value2").peekable(); - assert_eq!( - "u-ky-value1-value2", - format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) - ); - - // no uvalue - let mut iter = split_str("ky").peekable(); - assert_eq!( - "u-ky", - format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) - ); + // basic case + let mut iter = split_str("attr1-ky-value1").peekable(); + let result = parse_unicode_locale_extensions(&mut iter).unwrap(); + assert_eq!("u-attr1-ky-value1", format!("{}", result)); + + // no attribute + let mut iter = split_str("ky-value1").peekable(); + assert_eq!( + "u-ky-value1", + format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) + ); + + // attribute multiple + let mut iter = split_str("attr1-attr2-ky-value1").peekable(); + assert_eq!( + "u-attr1-attr2-ky-value1", + format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) + ); + + // uvalue multiple + let mut iter = split_str("ky-value1-value2").peekable(); + assert_eq!( + "u-ky-value1-value2", + format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) + ); + + // no uvalue + let mut iter = split_str("ky").peekable(); + assert_eq!( + "u-ky", + format!("{}", parse_unicode_locale_extensions(&mut iter).unwrap()) + ); } #[test] fn fail_unicode_locale_extensions() { - // invalid ukey - let mut iter = split_str("k1").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_unicode_locale_extensions(&mut iter).unwrap_err() - ); - - // invalid uvalue - let mut iter = split_str("ky-{}").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_unicode_locale_extensions(&mut iter).unwrap_err() - ); - - // invalid attribute - let mut iter = split_str("ky-value1-{?}").peekable(); - assert_eq!( - ParserError::InvalidSubtag, - parse_unicode_locale_extensions(&mut iter).unwrap_err() - ); + // invalid ukey + let mut iter = split_str("k1").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_unicode_locale_extensions(&mut iter).unwrap_err() + ); + + // invalid uvalue + let mut iter = split_str("ky-{}").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_unicode_locale_extensions(&mut iter).unwrap_err() + ); + + // invalid attribute + let mut iter = split_str("ky-value1-{?}").peekable(); + assert_eq!( + ParserError::InvalidSubtag, + parse_unicode_locale_extensions(&mut iter).unwrap_err() + ); } diff --git a/src/lang.rs b/src/lang.rs index be7fcfc..9fc0961 100644 --- a/src/lang.rs +++ b/src/lang.rs @@ -9,10 +9,10 @@ use std::str::FromStr; #[derive(Debug, PartialEq)] pub struct UnicodeLanguageIdentifier { - pub language: String, - pub script: Option, - pub region: Option, - pub variants: Option>, + pub language: String, + pub script: Option, + pub region: Option, + pub variants: Option>, } /// Parse the given string as an Unicode Language Identifier. @@ -40,109 +40,109 @@ pub struct UnicodeLanguageIdentifier { /// - [`ParserError::InvalidSubtag`] if the given language id is not a valid subtag. /// pub fn parse_unicode_language_id(lang_id: &str) -> Result { - // check empty - if lang_id.is_empty() { - return Err(ParserError::Missing); - } + // check empty + if lang_id.is_empty() { + return Err(ParserError::Missing); + } - parse_unicode_language_id_from_iter(&mut split_str(lang_id).peekable()) + parse_unicode_language_id_from_iter(&mut split_str(lang_id).peekable()) } pub fn parse_unicode_language_id_from_iter<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - // language subtag - let language = if let Some(lang) = iter.next() { - language_subtag(lang)? - } else { - return Err(ParserError::Unexpected); - }; - let language = String::from(language); + // language subtag + let language = if let Some(lang) = iter.next() { + language_subtag(lang)? + } else { + return Err(ParserError::Unexpected); + }; + let language = String::from(language); - // other subtags - let mut script = None; - let mut region = None; - let mut variants = vec![]; - let mut current = 1; - while let Some(subtag) = iter.peek() { - if current == 1 { - if let Ok(script_subtag) = script_subtag(subtag) { - script = Some(String::from(script_subtag)); - current = 2; - } else if let Ok(region_subtag) = region_subtag(subtag) { - region = Some(String::from(region_subtag)); - current = 3; - } else if let Ok(variant_subtag) = variant_subtag(subtag) { - variants.push(String::from(variant_subtag)); - current = 3; - } else { - break; - } - } else if current == 2 { - if let Ok(region_subtag) = region_subtag(subtag) { - region = Some(String::from(region_subtag)); - current = 3; - } else if let Ok(variant_subtag) = variant_subtag(subtag) { - variants.push(String::from(variant_subtag)); - current = 3; - } else { - break; - } - } else if let Ok(variant_subtag) = variant_subtag(subtag) { - variants.push(String::from(variant_subtag)); - } else { - break; - } - iter.next(); + // other subtags + let mut script = None; + let mut region = None; + let mut variants = vec![]; + let mut current = 1; + while let Some(subtag) = iter.peek() { + if current == 1 { + if let Ok(script_subtag) = script_subtag(subtag) { + script = Some(String::from(script_subtag)); + current = 2; + } else if let Ok(region_subtag) = region_subtag(subtag) { + region = Some(String::from(region_subtag)); + current = 3; + } else if let Ok(variant_subtag) = variant_subtag(subtag) { + variants.push(String::from(variant_subtag)); + current = 3; + } else { + break; + } + } else if current == 2 { + if let Ok(region_subtag) = region_subtag(subtag) { + region = Some(String::from(region_subtag)); + current = 3; + } else if let Ok(variant_subtag) = variant_subtag(subtag) { + variants.push(String::from(variant_subtag)); + current = 3; + } else { + break; + } + } else if let Ok(variant_subtag) = variant_subtag(subtag) { + variants.push(String::from(variant_subtag)); + } else { + break; } + iter.next(); + } - // normalize variants - let variants = if variants.is_empty() { - None - } else { - variants.dedup(); - Some(variants) - }; + // normalize variants + let variants = if variants.is_empty() { + None + } else { + variants.dedup(); + Some(variants) + }; - Ok(UnicodeLanguageIdentifier { - language, - script, - region, - variants, - }) + Ok(UnicodeLanguageIdentifier { + language, + script, + region, + variants, + }) } impl fmt::Display for UnicodeLanguageIdentifier { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.language.is_empty() { - f.write_str(LANG_UND)?; - } else { - self.language.fmt(f)?; - } - if let Some(ref script) = self.script { - f.write_char(SEP)?; - script.fmt(f)?; - } - if let Some(ref region) = self.region { - f.write_char(SEP)?; - region.fmt(f)?; - } - if let Some(ref variants) = self.variants { - for variant in variants.iter() { - f.write_char(SEP)?; - variant.fmt(f)?; - } - } - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.language.is_empty() { + f.write_str(LANG_UND)?; + } else { + self.language.fmt(f)?; } + if let Some(ref script) = self.script { + f.write_char(SEP)?; + script.fmt(f)?; + } + if let Some(ref region) = self.region { + f.write_char(SEP)?; + region.fmt(f)?; + } + if let Some(ref variants) = self.variants { + for variant in variants.iter() { + f.write_char(SEP)?; + variant.fmt(f)?; + } + } + Ok(()) + } } impl FromStr for UnicodeLanguageIdentifier { - type Err = ParserError; + type Err = ParserError; - fn from_str(source: &str) -> Result { - parse_unicode_language_id(source) - } + fn from_str(source: &str) -> Result { + parse_unicode_language_id(source) + } } /** @@ -151,109 +151,109 @@ impl FromStr for UnicodeLanguageIdentifier { #[test] fn success_parse_unicode_language_id() { - // full case - let result = parse_unicode_language_id("en-Latn-US-macos-windows-linux").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, Some("Latn".to_string())); - assert_eq!(result.region, Some("US".to_string())); - assert_eq!( - result.variants, - Some(vec![ - "macos".to_string(), - "windows".to_string(), - "linux".to_string() - ]) - ); + // full case + let result = parse_unicode_language_id("en-Latn-US-macos-windows-linux").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, Some("Latn".to_string())); + assert_eq!(result.region, Some("US".to_string())); + assert_eq!( + result.variants, + Some(vec![ + "macos".to_string(), + "windows".to_string(), + "linux".to_string() + ]) + ); - // use sep with underscore - let result = parse_unicode_language_id("en_Latn_US").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, Some("Latn".to_string())); - assert_eq!(result.region, Some("US".to_string())); + // use sep with underscore + let result = parse_unicode_language_id("en_Latn_US").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, Some("Latn".to_string())); + assert_eq!(result.region, Some("US".to_string())); - // language subtag only - let result = parse_unicode_language_id("en").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, None); - assert_eq!(result.region, None); - assert_eq!(result.variants, None); + // language subtag only + let result = parse_unicode_language_id("en").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, None); + assert_eq!(result.region, None); + assert_eq!(result.variants, None); - // language subtag and region subtag - let result = parse_unicode_language_id("en-US").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, None); - assert_eq!(result.region, Some("US".to_string())); - assert_eq!(result.variants, None); + // language subtag and region subtag + let result = parse_unicode_language_id("en-US").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, None); + assert_eq!(result.region, Some("US".to_string())); + assert_eq!(result.variants, None); - // language subtag and script subtag - let result = parse_unicode_language_id("en-Latn").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, Some("Latn".to_string())); - assert_eq!(result.region, None); - assert_eq!(result.variants, None); + // language subtag and script subtag + let result = parse_unicode_language_id("en-Latn").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, Some("Latn".to_string())); + assert_eq!(result.region, None); + assert_eq!(result.variants, None); - // language subtag and variant subtag - let result = parse_unicode_language_id("en-macos").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, None); - assert_eq!(result.region, None); - assert_eq!(result.variants, Some(vec!["macos".to_string()])); + // language subtag and variant subtag + let result = parse_unicode_language_id("en-macos").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, None); + assert_eq!(result.region, None); + assert_eq!(result.variants, Some(vec!["macos".to_string()])); - // language subtag, script subtag and region subtag - let result = parse_unicode_language_id("en-Latn-US").unwrap(); - assert_eq!(result.language, "en"); - assert_eq!(result.script, Some("Latn".to_string())); - assert_eq!(result.region, Some("US".to_string())); - assert_eq!(result.variants, None); + // language subtag, script subtag and region subtag + let result = parse_unicode_language_id("en-Latn-US").unwrap(); + assert_eq!(result.language, "en"); + assert_eq!(result.script, Some("Latn".to_string())); + assert_eq!(result.region, Some("US".to_string())); + assert_eq!(result.variants, None); - // language subtag: 'root' - let result = parse_unicode_language_id("root").unwrap(); - assert_eq!(result.language, ""); - assert_eq!(result.script, None); - assert_eq!(result.region, None); - assert_eq!(result.variants, None); + // language subtag: 'root' + let result = parse_unicode_language_id("root").unwrap(); + assert_eq!(result.language, ""); + assert_eq!(result.script, None); + assert_eq!(result.region, None); + assert_eq!(result.variants, None); - // include language subtag: 'und' - let result = parse_unicode_language_id("und-Latn-AT-macos").unwrap(); - assert_eq!(result.language, ""); - assert_eq!(result.script, Some("Latn".to_string())); - assert_eq!(result.region, Some("AT".to_string())); - assert_eq!(result.variants, Some(vec!["macos".to_string()])); + // include language subtag: 'und' + let result = parse_unicode_language_id("und-Latn-AT-macos").unwrap(); + assert_eq!(result.language, ""); + assert_eq!(result.script, Some("Latn".to_string())); + assert_eq!(result.region, Some("AT".to_string())); + assert_eq!(result.variants, Some(vec!["macos".to_string()])); - // Display trait implementation - assert_eq!( - "en-Latn-US-macos", - format!("{}", parse_unicode_language_id("en-Latn-US-macos").unwrap()) - ); - assert_eq!( - "und-Latn-US-macos", - format!( - "{}", - parse_unicode_language_id("und-Latn-US-macos").unwrap() - ) - ); + // Display trait implementation + assert_eq!( + "en-Latn-US-macos", + format!("{}", parse_unicode_language_id("en-Latn-US-macos").unwrap()) + ); + assert_eq!( + "und-Latn-US-macos", + format!( + "{}", + parse_unicode_language_id("und-Latn-US-macos").unwrap() + ) + ); - // PartialEq trait implementation - assert_eq!( - parse_unicode_language_id("en-Latn-US").unwrap(), - parse_unicode_language_id("en-Latn-US").unwrap() - ); + // PartialEq trait implementation + assert_eq!( + parse_unicode_language_id("en-Latn-US").unwrap(), + parse_unicode_language_id("en-Latn-US").unwrap() + ); - // FromStr trait implementation - let result: UnicodeLanguageIdentifier = "en-Latn-US-macos".parse().unwrap(); - assert_eq!("en", result.language); - assert_eq!(Some("Latn".to_string()), result.script); - assert_eq!(Some("US".to_string()), result.region); - assert_eq!(Some(vec!["macos".to_string()]), result.variants); - let result: UnicodeLanguageIdentifier = "en-Latn-US".parse().unwrap(); - assert_eq!("en-Latn-US", format!("{}", result)); + // FromStr trait implementation + let result: UnicodeLanguageIdentifier = "en-Latn-US-macos".parse().unwrap(); + assert_eq!("en", result.language); + assert_eq!(Some("Latn".to_string()), result.script); + assert_eq!(Some("US".to_string()), result.region); + assert_eq!(Some(vec!["macos".to_string()]), result.variants); + let result: UnicodeLanguageIdentifier = "en-Latn-US".parse().unwrap(); + assert_eq!("en-Latn-US", format!("{}", result)); } #[test] fn fail_parse_unicode_language_id() { - // missing language - assert_eq!( - ParserError::Missing, - parse_unicode_language_id("").unwrap_err() - ); + // missing language + assert_eq!( + ParserError::Missing, + parse_unicode_language_id("").unwrap_err() + ); } diff --git a/src/lib.rs b/src/lib.rs index ba5df77..c50e745 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,5 +40,5 @@ pub use crate::lang::{parse_unicode_language_id as parse_language_id, UnicodeLan pub use crate::locale::{parse_unicode_locale_id as parse_locale_id, UnicodeLocaleIdentifier}; pub use crate::measure::{parse_unicode_measure_unit as parse_measure_unit, UnicodeMeasureUnit}; pub use crate::subdivision::{ - parse_unicode_subdivision_id as parse_subdivision_id, UnicodeSubdivisionIdentifier, + parse_unicode_subdivision_id as parse_subdivision_id, UnicodeSubdivisionIdentifier, }; diff --git a/src/locale.rs b/src/locale.rs index aee58b9..adc6a59 100644 --- a/src/locale.rs +++ b/src/locale.rs @@ -10,29 +10,29 @@ use std::str::FromStr; #[derive(Debug)] pub struct UnicodeLocaleIdentifier { - pub language: UnicodeLanguageIdentifier, - pub extensions: Extensions, + pub language: UnicodeLanguageIdentifier, + pub extensions: Extensions, } impl fmt::Display for UnicodeLocaleIdentifier { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut msg = vec![]; - msg.push(format!("{}", self.language)); - let extensions_msg = format!("{}", self.extensions); - if !extensions_msg.is_empty() { - msg.push(extensions_msg); - } - f.write_str(&msg.join(&SEP.to_string()))?; - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut msg = vec![]; + msg.push(format!("{}", self.language)); + let extensions_msg = format!("{}", self.extensions); + if !extensions_msg.is_empty() { + msg.push(extensions_msg); } + f.write_str(&msg.join(&SEP.to_string()))?; + Ok(()) + } } impl FromStr for UnicodeLocaleIdentifier { - type Err = ParserError; + type Err = ParserError; - fn from_str(source: &str) -> Result { - parse_unicode_locale_id(source) - } + fn from_str(source: &str) -> Result { + parse_unicode_locale_id(source) + } } /// Parse the given string as an Unicode Locale Identifier. @@ -65,19 +65,19 @@ impl FromStr for UnicodeLocaleIdentifier { /// - [`ParserError::InvalidSubtag`] if the given locale id is not a valid subtag. /// - [`ParserError::InvalidExtension`] if the given locale id is not a valid unicode extensions pub fn parse_unicode_locale_id(locale_id: &str) -> Result { - // check empty - if locale_id.is_empty() { - return Err(ParserError::Missing); - } + // check empty + if locale_id.is_empty() { + return Err(ParserError::Missing); + } - let mut iter = split_str(locale_id).peekable(); - let language = parse_unicode_language_id_from_iter(&mut iter)?; - let extensions = parse_extensions_from_iter(&mut iter)?; + let mut iter = split_str(locale_id).peekable(); + let language = parse_unicode_language_id_from_iter(&mut iter)?; + let extensions = parse_extensions_from_iter(&mut iter)?; - Ok(UnicodeLocaleIdentifier { - language, - extensions, - }) + Ok(UnicodeLocaleIdentifier { + language, + extensions, + }) } /* @@ -86,44 +86,46 @@ pub fn parse_unicode_locale_id(locale_id: &str) -> Result, + pub values: Vec, } impl fmt::Display for UnicodeMeasureUnit { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut messages = vec![]; - for value in &self.values { - messages.push(value.to_string()); - } - write!(f, "{}", messages.join(&SEP.to_string()))?; - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut messages = vec![]; + for value in &self.values { + messages.push(value.to_string()); } + write!(f, "{}", messages.join(&SEP.to_string()))?; + Ok(()) + } } impl FromStr for UnicodeMeasureUnit { - type Err = ParserError; + type Err = ParserError; - fn from_str(source: &str) -> Result { - parse_unicode_measure_unit(source) - } + fn from_str(source: &str) -> Result { + parse_unicode_measure_unit(source) + } } /// Parse the given string as an Unicode Measure Unit @@ -50,40 +50,40 @@ impl FromStr for UnicodeMeasureUnit { /// - [`ParserError::Missing`] if the given measure unit is empty. /// - [`ParserError::InvalidSubtag`] if the given measure unit is not a valid. pub fn parse_unicode_measure_unit(measure_unit: &str) -> Result { - if measure_unit.is_empty() { - return Err(ParserError::Missing); - } + if measure_unit.is_empty() { + return Err(ParserError::Missing); + } - parse_unicode_measure_unit_from_iter(&mut split_str(measure_unit).peekable()) + parse_unicode_measure_unit_from_iter(&mut split_str(measure_unit).peekable()) } fn parse_unicode_measure_unit_from_iter<'a>( - iter: &mut Peekable>, + iter: &mut Peekable>, ) -> Result { - // unicode_measure_unit - // https://unicode.org/reports/tr35/#unicode_measure_unit - let mut values = vec![]; + // unicode_measure_unit + // https://unicode.org/reports/tr35/#unicode_measure_unit + let mut values = vec![]; - while let Some(subtag) = iter.peek() { - let subtag_bytes = subtag.as_bytes(); + while let Some(subtag) = iter.peek() { + let subtag_bytes = subtag.as_bytes(); - if !(3..=8).contains(&subtag_bytes.len()) - || !subtag_bytes.iter().all(|b: &u8| b.is_ascii_alphanumeric()) - { - return Err(ParserError::InvalidSubtag); - } - - values.push(subtag.to_string()); - iter.next(); + if !(3..=8).contains(&subtag_bytes.len()) + || !subtag_bytes.iter().all(|b: &u8| b.is_ascii_alphanumeric()) + { + return Err(ParserError::InvalidSubtag); } - let values = if values.is_empty() { - return Err(ParserError::Missing); - } else { - values - }; + values.push(subtag.to_string()); + iter.next(); + } + + let values = if values.is_empty() { + return Err(ParserError::Missing); + } else { + values + }; - Ok(UnicodeMeasureUnit { values }) + Ok(UnicodeMeasureUnit { values }) } /* @@ -92,38 +92,38 @@ fn parse_unicode_measure_unit_from_iter<'a>( #[test] fn success_parse_unicode_measure_unit() { - // basic - let measure = parse_unicode_measure_unit("area-hectare").unwrap(); - assert_eq!(vec!["area", "hectare"], measure.values); - - // Display trait implementation - assert_eq!( - "area-hectare", - format!("{}", parse_unicode_measure_unit("area-hectare").unwrap()) - ); - - // PartialEq trait implementation - assert_eq!( - parse_unicode_measure_unit("area-hectare").unwrap(), - parse_unicode_measure_unit("area-hectare").unwrap() - ); - - // FromStr trait implementation - let measure: UnicodeMeasureUnit = "area-hectare".parse().unwrap(); - assert_eq!(vec!["area", "hectare"], measure.values); + // basic + let measure = parse_unicode_measure_unit("area-hectare").unwrap(); + assert_eq!(vec!["area", "hectare"], measure.values); + + // Display trait implementation + assert_eq!( + "area-hectare", + format!("{}", parse_unicode_measure_unit("area-hectare").unwrap()) + ); + + // PartialEq trait implementation + assert_eq!( + parse_unicode_measure_unit("area-hectare").unwrap(), + parse_unicode_measure_unit("area-hectare").unwrap() + ); + + // FromStr trait implementation + let measure: UnicodeMeasureUnit = "area-hectare".parse().unwrap(); + assert_eq!(vec!["area", "hectare"], measure.values); } #[test] fn fail_parse_unicode_measure_unit() { - // missing - assert_eq!( - ParserError::Missing, - parse_unicode_measure_unit("").unwrap_err() - ); - - // invalid subtag - assert_eq!( - ParserError::InvalidSubtag, - parse_unicode_measure_unit("acceleration-g-force").unwrap_err() - ); + // missing + assert_eq!( + ParserError::Missing, + parse_unicode_measure_unit("").unwrap_err() + ); + + // invalid subtag + assert_eq!( + ParserError::InvalidSubtag, + parse_unicode_measure_unit("acceleration-g-force").unwrap_err() + ); } diff --git a/src/shared.rs b/src/shared.rs index 4c616dd..5e8184b 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -3,5 +3,5 @@ use crate::constants::{LEGACY_SEP, SEP}; use std::str::Split; pub fn split_str(s: &str) -> Split<'_, impl Fn(char) -> bool> { - s.split(|c| c == SEP || c == LEGACY_SEP) + s.split(|c| c == SEP || c == LEGACY_SEP) } diff --git a/src/subdivision.rs b/src/subdivision.rs index 044ea5a..23dfbae 100644 --- a/src/subdivision.rs +++ b/src/subdivision.rs @@ -6,23 +6,23 @@ use std::str::FromStr; #[derive(Debug, PartialEq)] pub struct UnicodeSubdivisionIdentifier { - pub region: String, - pub suffix: String, + pub region: String, + pub suffix: String, } impl fmt::Display for UnicodeSubdivisionIdentifier { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}{}", self.region, self.suffix)?; - Ok(()) - } + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}{}", self.region, self.suffix)?; + Ok(()) + } } impl FromStr for UnicodeSubdivisionIdentifier { - type Err = ParserError; + type Err = ParserError; - fn from_str(source: &str) -> Result { - parse_unicode_subdivision_id(source) - } + fn from_str(source: &str) -> Result { + parse_unicode_subdivision_id(source) + } } /// Parse the given string as an Unicode Subdivision Identifier. @@ -46,55 +46,55 @@ impl FromStr for UnicodeSubdivisionIdentifier { /// - [`ParserError::Missing`] if the given subdivision id is empty. /// - [`ParserError::InvalidSubdivision`] if the given subdivision id is not a valid subdivision identifier. pub fn parse_unicode_subdivision_id( - subdivision_id: &str, + subdivision_id: &str, ) -> Result { - // unicode_subdivision_id - // https://unicode.org/reports/tr35/#unicode_subdivision_id - - let chunks = subdivision_id.as_bytes(); - - if chunks.is_empty() { - return Err(ParserError::Missing); - } - - let len = chunks.len(); - if !(2..=7).contains(&len) { - return Err(ParserError::InvalidSubdivision); - } - - let region_index = region_index(chunks)?; - let region = match str::from_utf8(&chunks[0..region_index]) { - Ok(s) => s, - Err(_) => return Err(ParserError::Unexpected), + // unicode_subdivision_id + // https://unicode.org/reports/tr35/#unicode_subdivision_id + + let chunks = subdivision_id.as_bytes(); + + if chunks.is_empty() { + return Err(ParserError::Missing); + } + + let len = chunks.len(); + if !(2..=7).contains(&len) { + return Err(ParserError::InvalidSubdivision); + } + + let region_index = region_index(chunks)?; + let region = match str::from_utf8(&chunks[0..region_index]) { + Ok(s) => s, + Err(_) => return Err(ParserError::Unexpected), + }; + + let suffix_len = len - region_index; + if !(3..7).contains(&suffix_len) + || !chunks[region_index..] + .iter() + .all(|b: &u8| b.is_ascii_alphanumeric()) + { + Err(ParserError::InvalidSubdivision) + } else { + let suffix = match str::from_utf8(&chunks[region_index..]) { + Ok(s) => s, + Err(_) => return Err(ParserError::Unexpected), }; - - let suffix_len = len - region_index; - if !(3..7).contains(&suffix_len) - || !chunks[region_index..] - .iter() - .all(|b: &u8| b.is_ascii_alphanumeric()) - { - Err(ParserError::InvalidSubdivision) - } else { - let suffix = match str::from_utf8(&chunks[region_index..]) { - Ok(s) => s, - Err(_) => return Err(ParserError::Unexpected), - }; - Ok(UnicodeSubdivisionIdentifier { - region: String::from(region), - suffix: String::from(suffix), - }) - } + Ok(UnicodeSubdivisionIdentifier { + region: String::from(region), + suffix: String::from(suffix), + }) + } } fn region_index(chunks: &[u8]) -> Result { - if chunks[0..2].iter().all(|b| b.is_ascii_alphabetic()) { - Ok(2) - } else if chunks[0..3].iter().all(|b| b.is_ascii_digit()) { - Ok(3) - } else { - Err(ParserError::InvalidSubdivision) - } + if chunks[0..2].iter().all(|b| b.is_ascii_alphabetic()) { + Ok(2) + } else if chunks[0..3].iter().all(|b| b.is_ascii_digit()) { + Ok(3) + } else { + Err(ParserError::InvalidSubdivision) + } } /* @@ -103,63 +103,63 @@ fn region_index(chunks: &[u8]) -> Result { #[test] fn success_parse_unicode_subdivision_id() { - // alpha region + suffix - let subdivision = parse_unicode_subdivision_id("ussct").unwrap(); - assert_eq!("us", subdivision.region); - assert_eq!("sct", subdivision.suffix); - - // digit region + suffix - let subdivision = parse_unicode_subdivision_id("123abcd").unwrap(); - assert_eq!("123", subdivision.region); - assert_eq!("abcd", subdivision.suffix); - - // Display trait implementation - assert_eq!( - "123abcd", - format!("{}", parse_unicode_subdivision_id("123abcd").unwrap()) - ); - - // PartialEq trait implementation - assert_eq!( - parse_unicode_subdivision_id("123abcd").unwrap(), - parse_unicode_subdivision_id("123abcd").unwrap() - ); - - // FromStr trait implementation - let subdivision: UnicodeSubdivisionIdentifier = "ussct".parse().unwrap(); - assert_eq!("us", subdivision.region); - assert_eq!("sct", subdivision.suffix); + // alpha region + suffix + let subdivision = parse_unicode_subdivision_id("ussct").unwrap(); + assert_eq!("us", subdivision.region); + assert_eq!("sct", subdivision.suffix); + + // digit region + suffix + let subdivision = parse_unicode_subdivision_id("123abcd").unwrap(); + assert_eq!("123", subdivision.region); + assert_eq!("abcd", subdivision.suffix); + + // Display trait implementation + assert_eq!( + "123abcd", + format!("{}", parse_unicode_subdivision_id("123abcd").unwrap()) + ); + + // PartialEq trait implementation + assert_eq!( + parse_unicode_subdivision_id("123abcd").unwrap(), + parse_unicode_subdivision_id("123abcd").unwrap() + ); + + // FromStr trait implementation + let subdivision: UnicodeSubdivisionIdentifier = "ussct".parse().unwrap(); + assert_eq!("us", subdivision.region); + assert_eq!("sct", subdivision.suffix); } #[test] fn fail_parse_unicode_subdivision_id() { - // missing - assert_eq!( - ParserError::Missing, - parse_unicode_subdivision_id("").unwrap_err() - ); - - // 2 characters - assert_eq!( - ParserError::InvalidSubdivision, - parse_unicode_subdivision_id("ab").unwrap_err() - ); - - // 8 characters - assert_eq!( - ParserError::InvalidSubdivision, - parse_unicode_subdivision_id("12312345").unwrap_err() - ); - - // invalid region - assert_eq!( - ParserError::InvalidSubdivision, - parse_unicode_subdivision_id("1b123").unwrap_err() - ); - - // invalid suffix - assert_eq!( - ParserError::InvalidSubdivision, - parse_unicode_subdivision_id("ab{}").unwrap_err() - ); + // missing + assert_eq!( + ParserError::Missing, + parse_unicode_subdivision_id("").unwrap_err() + ); + + // 2 characters + assert_eq!( + ParserError::InvalidSubdivision, + parse_unicode_subdivision_id("ab").unwrap_err() + ); + + // 8 characters + assert_eq!( + ParserError::InvalidSubdivision, + parse_unicode_subdivision_id("12312345").unwrap_err() + ); + + // invalid region + assert_eq!( + ParserError::InvalidSubdivision, + parse_unicode_subdivision_id("1b123").unwrap_err() + ); + + // invalid suffix + assert_eq!( + ParserError::InvalidSubdivision, + parse_unicode_subdivision_id("ab{}").unwrap_err() + ); } diff --git a/src/subtags.rs b/src/subtags.rs index 530b3e6..9127c31 100644 --- a/src/subtags.rs +++ b/src/subtags.rs @@ -2,79 +2,79 @@ use crate::constants::{LANG_EMPTY, LANG_ROOT, LANG_UND}; use crate::errors::ParserError; pub fn is_language_subtag(subtag: &[u8]) -> bool { - let len = subtag.len(); - (2..=8).contains(&len) && len != 4 && subtag.iter().all(|b| b.is_ascii_alphabetic()) + let len = subtag.len(); + (2..=8).contains(&len) && len != 4 && subtag.iter().all(|b| b.is_ascii_alphabetic()) } pub fn language_subtag(subtag: &str) -> Result<&str, ParserError> { - // unicode_language_subtag - // https://unicode.org/reports/tr35/#unicode_language_subtag + // unicode_language_subtag + // https://unicode.org/reports/tr35/#unicode_language_subtag - // 'root' is a special case - if LANG_ROOT.eq(subtag) { - return Ok(LANG_EMPTY); - } + // 'root' is a special case + if LANG_ROOT.eq(subtag) { + return Ok(LANG_EMPTY); + } - if !is_language_subtag(subtag.as_bytes()) { - return Err(ParserError::InvalidLanguage); - } + if !is_language_subtag(subtag.as_bytes()) { + return Err(ParserError::InvalidLanguage); + } - if LANG_UND == subtag { - Ok(LANG_EMPTY) - } else { - Ok(subtag) - } + if LANG_UND == subtag { + Ok(LANG_EMPTY) + } else { + Ok(subtag) + } } fn is_scritp_subtag(subtag: &[u8]) -> bool { - subtag.len() == 4 && subtag.iter().all(|b| b.is_ascii_alphabetic()) + subtag.len() == 4 && subtag.iter().all(|b| b.is_ascii_alphabetic()) } pub fn script_subtag(subtag: &str) -> Result<&str, ParserError> { - // unicode_script_subtag - // https://unicode.org/reports/tr35/#unicode_script_subtag + // unicode_script_subtag + // https://unicode.org/reports/tr35/#unicode_script_subtag - if is_scritp_subtag(subtag.as_bytes()) { - Ok(subtag) - } else { - Err(ParserError::InvalidSubtag) - } + if is_scritp_subtag(subtag.as_bytes()) { + Ok(subtag) + } else { + Err(ParserError::InvalidSubtag) + } } fn is_region_subtag(subtag: &[u8]) -> bool { - let len = subtag.len(); - len == 2 && subtag.iter().all(|b| b.is_ascii_alphabetic()) - || len == 3 && subtag.iter().all(|b| b.is_ascii_digit()) + let len = subtag.len(); + len == 2 && subtag.iter().all(|b| b.is_ascii_alphabetic()) + || len == 3 && subtag.iter().all(|b| b.is_ascii_digit()) } pub fn region_subtag(subtag: &str) -> Result<&str, ParserError> { - // unicode_region_subtag - // https://unicode.org/reports/tr35/#unicode_region_subtag + // unicode_region_subtag + // https://unicode.org/reports/tr35/#unicode_region_subtag - if is_region_subtag(subtag.as_bytes()) { - Ok(subtag) - } else { - Err(ParserError::InvalidSubtag) - } + if is_region_subtag(subtag.as_bytes()) { + Ok(subtag) + } else { + Err(ParserError::InvalidSubtag) + } } fn is_variant_subtag(subtag: &[u8]) -> bool { - let len = subtag.len(); - (4..=8).contains(&len) - && ((len >= 5 && subtag.iter().all(|b| b.is_ascii_alphanumeric())) - || (len == 4 && subtag[0].is_ascii_digit()) - && subtag[1..].iter().all(|b: &u8| b.is_ascii_alphanumeric())) + let len = subtag.len(); + (4..=8).contains(&len) + && ((len >= 5 && subtag.iter().all(|b| b.is_ascii_alphanumeric())) + || (len == 4 && subtag[0].is_ascii_digit()) + && subtag[1..].iter().all(|b: &u8| b.is_ascii_alphanumeric())) } pub fn variant_subtag(subtag: &str) -> Result<&str, ParserError> { - // unicode_variant_subtag - // https://unicode.org/reports/tr35/#unicode_variant_subtag + // unicode_variant_subtag + // https://unicode.org/reports/tr35/#unicode_variant_subtag - if is_variant_subtag(subtag.as_bytes()) { - Ok(subtag) - } else { - Err(ParserError::InvalidSubtag) - } + if is_variant_subtag(subtag.as_bytes()) { + Ok(subtag) + } else { + Err(ParserError::InvalidSubtag) + } } /** @@ -83,131 +83,131 @@ pub fn variant_subtag(subtag: &str) -> Result<&str, ParserError> { #[test] fn success_language_subtag() { - // 'root' - assert_eq!(LANG_EMPTY, language_subtag("root").unwrap()); + // 'root' + assert_eq!(LANG_EMPTY, language_subtag("root").unwrap()); - // language subtag only - assert_eq!("en", language_subtag("en").unwrap()); + // language subtag only + assert_eq!("en", language_subtag("en").unwrap()); - // 3 characters - assert_eq!("jpn", language_subtag("jpn").unwrap()); + // 3 characters + assert_eq!("jpn", language_subtag("jpn").unwrap()); - // 'und' - assert_eq!(LANG_EMPTY, language_subtag("und").unwrap()); + // 'und' + assert_eq!(LANG_EMPTY, language_subtag("und").unwrap()); } #[test] fn fail_get_language_subtag() { - // 1 character - assert_eq!( - ParserError::InvalidLanguage, - language_subtag("i").unwrap_err() - ); - - // 4 characters - assert_eq!( - ParserError::InvalidLanguage, - language_subtag("food").unwrap_err() - ); - - // 9 characters - assert_eq!( - ParserError::InvalidLanguage, - language_subtag("unicodela").unwrap_err() - ); - - // not alphabet - assert_eq!( - ParserError::InvalidLanguage, - language_subtag("12").unwrap_err() - ); + // 1 character + assert_eq!( + ParserError::InvalidLanguage, + language_subtag("i").unwrap_err() + ); + + // 4 characters + assert_eq!( + ParserError::InvalidLanguage, + language_subtag("food").unwrap_err() + ); + + // 9 characters + assert_eq!( + ParserError::InvalidLanguage, + language_subtag("unicodela").unwrap_err() + ); + + // not alphabet + assert_eq!( + ParserError::InvalidLanguage, + language_subtag("12").unwrap_err() + ); } #[test] fn success_script_subtag() { - assert_eq!("Latn", script_subtag("Latn").unwrap()); + assert_eq!("Latn", script_subtag("Latn").unwrap()); } #[test] fn fail_script_subtag() { - // 3 character - assert_eq!( - ParserError::InvalidSubtag, - script_subtag("foo").unwrap_err() - ); + // 3 character + assert_eq!( + ParserError::InvalidSubtag, + script_subtag("foo").unwrap_err() + ); - // 5 characters - assert_eq!( - ParserError::InvalidSubtag, - script_subtag("Japan").unwrap_err() - ); + // 5 characters + assert_eq!( + ParserError::InvalidSubtag, + script_subtag("Japan").unwrap_err() + ); - // not alphabet - assert_eq!( - ParserError::InvalidSubtag, - script_subtag("123").unwrap_err() - ); + // not alphabet + assert_eq!( + ParserError::InvalidSubtag, + script_subtag("123").unwrap_err() + ); } #[test] fn success_region_subtag() { - // ascii alphabet - assert_eq!("JP", region_subtag("JP").unwrap()); + // ascii alphabet + assert_eq!("JP", region_subtag("JP").unwrap()); - // 3 digit number - assert_eq!("001", region_subtag("001").unwrap()); + // 3 digit number + assert_eq!("001", region_subtag("001").unwrap()); } #[test] fn fail_region_subtag() { - // 1 character - assert_eq!(ParserError::InvalidSubtag, region_subtag("J").unwrap_err()); + // 1 character + assert_eq!(ParserError::InvalidSubtag, region_subtag("J").unwrap_err()); - // 3 ascii characters - assert_eq!( - ParserError::InvalidSubtag, - region_subtag("JPN").unwrap_err() - ); + // 3 ascii characters + assert_eq!( + ParserError::InvalidSubtag, + region_subtag("JPN").unwrap_err() + ); - // 4 digit characters - assert_eq!( - ParserError::InvalidSubtag, - region_subtag("1234").unwrap_err() - ); + // 4 digit characters + assert_eq!( + ParserError::InvalidSubtag, + region_subtag("1234").unwrap_err() + ); } #[test] fn success_variant_subtag() { - // 4 characters with digit - assert_eq!("1996", variant_subtag("1996").unwrap()); + // 4 characters with digit + assert_eq!("1996", variant_subtag("1996").unwrap()); - // 4 characters with digit & alphabet - assert_eq!("1ABC", variant_subtag("1ABC").unwrap()); + // 4 characters with digit & alphabet + assert_eq!("1ABC", variant_subtag("1ABC").unwrap()); - // 5 characters with alphabet and digit - assert_eq!("abcd1", variant_subtag("abcd1").unwrap()); + // 5 characters with alphabet and digit + assert_eq!("abcd1", variant_subtag("abcd1").unwrap()); - // 8 characters with alphabet and digit - assert_eq!("abcdefgh", variant_subtag("abcdefgh").unwrap()); + // 8 characters with alphabet and digit + assert_eq!("abcdefgh", variant_subtag("abcdefgh").unwrap()); } #[test] fn fail_variant_subtag() { - // 3 characters - assert_eq!( - ParserError::InvalidSubtag, - variant_subtag("abc").unwrap_err() - ); - - // 9 characters - assert_eq!( - ParserError::InvalidSubtag, - variant_subtag("abcdefghi").unwrap_err() - ); - - // 4 characters with alphabet - assert_eq!( - ParserError::InvalidSubtag, - variant_subtag("aBCD").unwrap_err() - ); + // 3 characters + assert_eq!( + ParserError::InvalidSubtag, + variant_subtag("abc").unwrap_err() + ); + + // 9 characters + assert_eq!( + ParserError::InvalidSubtag, + variant_subtag("abcdefghi").unwrap_err() + ); + + // 4 characters with alphabet + assert_eq!( + ParserError::InvalidSubtag, + variant_subtag("aBCD").unwrap_err() + ); }