Skip to content

Commit 919d9c1

Browse files
committed
feat(matching): improve matching, more tests
1 parent b541c32 commit 919d9c1

File tree

2 files changed

+53
-62
lines changed

2 files changed

+53
-62
lines changed

src/lib.rs

Lines changed: 45 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub enum Error {
8686
InvalidLanguage,
8787
/// A subtag may be eight characters in length at maximum.
8888
SubtagTooLong,
89-
/// At maximum three extlangss are allowed, but zero to one extlangss are preferred.
89+
/// At maximum three extlangs are allowed, but zero to one extlangs are preferred.
9090
TooManyExtlangs,
9191
}
9292

@@ -100,7 +100,7 @@ impl ErrorTrait for Error {
100100
Error::InvalidSubtag => "A subtag fails to parse, it does not match any other subtags",
101101
Error::InvalidLanguage => "The given language subtag is invalid",
102102
Error::SubtagTooLong => "A subtag may be eight characters in length at maximum",
103-
Error::TooManyExtlangs => "At maximum three extlangss are allowed",
103+
Error::TooManyExtlangs => "At maximum three extlangs are allowed",
104104
}
105105
}
106106
}
@@ -153,7 +153,7 @@ pub const GRANDFATHERED_REGULAR: [&'static str; 9] = [
153153
/// communication. This includes constructed and artificial languages
154154
/// but excludes languages not intended primarily for human
155155
/// communication, such as programming languages.
156-
#[derive(Debug, Eq, Clone)]
156+
#[derive(Debug, Default, Eq, Clone)]
157157
pub struct LanguageTag {
158158
/// Language subtags are used to indicate the language, ignoring all
159159
/// other aspects such as script, region or spefic invariants.
@@ -208,71 +208,60 @@ impl LanguageTag {
208208
/// ```
209209
/// # #[macro_use] extern crate language_tags;
210210
/// # fn main() {
211-
/// let language_range1 = langtag!(it);
212-
/// let language_tag1 = langtag!(de);
213-
/// let language_tag2 = langtag!(it;;;CH);
214-
/// assert!(!language_range1.matches(&language_tag1));
215-
/// assert!(language_range1.matches(&language_tag2));
211+
/// let range_italian = langtag!(it);
212+
/// let tag_german = langtag!(de);
213+
/// let tag_italian_switzerland = langtag!(it;;;CH);
214+
/// assert!(!range_italian.matches(&tag_german));
215+
/// assert!(range_italian.matches(&tag_italian_switzerland));
216216
///
217-
/// let language_range2 = langtag!(es;;;BR);
218-
/// let language_tag3 = langtag!(es);
219-
/// assert!(!language_range2.matches(&language_tag3));
217+
/// let range_spanish_brazil = langtag!(es;;;BR);
218+
/// let tag_spanish = langtag!(es);
219+
/// assert!(!range_spanish_brazil.matches(&tag_spanish));
220220
/// # }
221221
/// ```
222222
pub fn matches(&self, other: &LanguageTag) -> bool {
223223
assert!(self.extensions.is_empty());
224224
assert!(self.privateuse.is_empty());
225-
return matches_option(&self.language, &other.language) &&
226-
self.extlangs.iter().all(|x| other.extlangs.iter().all(|y| x.eq_ignore_ascii_case(y))) &&
227-
matches_option(&self.script, &other.script) &&
228-
matches_option(&self.region, &other.region);
225+
return matches_option(&self.language, &other.language)
226+
&& matches_vec(&self.extlangs, &other.extlangs)
227+
&& matches_option(&self.script, &other.script)
228+
&& matches_option(&self.region, &other.region)
229+
&& matches_vec(&self.variants, &other.variants);
229230

230231
fn matches_option(a: &Option<String>, b: &Option<String>) -> bool {
231-
match (a.is_some(), b.is_some()) {
232-
(true, true) => a.as_ref().unwrap().eq_ignore_ascii_case(b.as_ref().unwrap()),
233-
(true, false) => false,
234-
(false, _) => true,
232+
match (a, b) {
233+
(&Some(ref a), &Some(ref b)) => a.eq_ignore_ascii_case(b),
234+
(&None, _) => true,
235+
(_, &None) => false,
235236
}
236237
}
238+
fn matches_vec(a: &Vec<String>, b: &Vec<String>) -> bool {
239+
a.iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
240+
}
237241
}
238242
}
239243

240244
impl PartialEq for LanguageTag {
241245
fn eq(&self, other: &LanguageTag) -> bool {
242-
return eq_option(&self.language, &other.language) &&
243-
eq_vec(&self.extlangs, &other.extlangs) &&
244-
eq_option(&self.script, &other.script) &&
245-
eq_option(&self.region, &other.region) &&
246-
self.variants.iter().all(|x| other.variants.iter().all(|y| x.eq_ignore_ascii_case(y))) &&
247-
BTreeSet::from_iter(&self.extensions) == BTreeSet::from_iter(&other.extensions) &&
248-
self.extensions.keys().all(|a| eq_vec(self.extensions.get(a).unwrap(),
249-
other.extensions.get(a).unwrap())) &&
250-
eq_vec(&self.privateuse, &other.privateuse);
246+
return eq_option(&self.language, &other.language)
247+
&& eq_vec(&self.extlangs, &other.extlangs)
248+
&& eq_option(&self.script, &other.script)
249+
&& eq_option(&self.region, &other.region)
250+
&& eq_vec(&self.variants, &other.variants)
251+
&& BTreeSet::from_iter(&self.extensions) == BTreeSet::from_iter(&other.extensions)
252+
&& self.extensions.keys().all(|a|
253+
eq_vec(self.extensions.get(a).unwrap(), other.extensions.get(a).unwrap()))
254+
&& eq_vec(&self.privateuse, &other.privateuse);
251255

252256
fn eq_option(a: &Option<String>, b: &Option<String>) -> bool {
253-
match (a.is_some(), b.is_some()) {
254-
(true, true) => a.as_ref().unwrap().eq_ignore_ascii_case(b.as_ref().unwrap()),
255-
(false, false) => true,
257+
match (a, b) {
258+
(&Some(ref a), &Some(ref b)) => a.eq_ignore_ascii_case(b),
259+
(&None, &None) => true,
256260
_ => false,
257261
}
258262
}
259263
fn eq_vec(a: &Vec<String>, b: &Vec<String>) -> bool {
260-
a.len() == b.len() &&
261-
a.iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
262-
}
263-
}
264-
}
265-
266-
impl Default for LanguageTag {
267-
fn default() -> LanguageTag {
268-
LanguageTag {
269-
language: None,
270-
extlangs: Vec::new(),
271-
script: None,
272-
region: None,
273-
variants: Vec::new(),
274-
extensions: BTreeMap::new(),
275-
privateuse: Vec::new(),
264+
a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
276265
}
277266
}
278267
}
@@ -284,19 +273,19 @@ impl std::str::FromStr for LanguageTag {
284273
if !is_alphanumeric_or_dash(t) {
285274
return Err(Error::ForbiddenChar);
286275
}
276+
let mut langtag: LanguageTag = Default::default();
287277
// Handle grandfathered tags
288-
if let Some(tag) = GRANDFATHERED_IRREGULAR.iter().find(|x| x.eq_ignore_ascii_case(t)) {
289-
return Ok(simple_langtag(tag))
290-
}
291-
if let Some(tag) = GRANDFATHERED_REGULAR.iter().find(|x| x.eq_ignore_ascii_case(t)) {
292-
return Ok(simple_langtag(tag))
278+
if let Some(tag) = GRANDFATHERED_IRREGULAR.iter()
279+
.chain(GRANDFATHERED_REGULAR.iter())
280+
.find(|x| x.eq_ignore_ascii_case(t)) {
281+
langtag.language = Some((*tag).to_owned());
282+
return Ok(langtag)
293283
}
294284
// Handle normal tags
295285
// The parser has a position from 0 to 6. Bigger positions reepresent the ASCII codes of
296286
// single character extensions
297287
// language-extlangs-script-region-variant-extension-privateuse
298288
// --- 0 -- -- 1 -- -- 2 - -- 3 - -- 4 -- --- x --- ---- 6 ---
299-
let mut langtag: LanguageTag = Default::default();
300289
let mut position: u8 = 0;
301290
for subtag in t.split('-') {
302291
if subtag.len() > 8 {
@@ -314,7 +303,7 @@ impl std::str::FromStr for LanguageTag {
314303
}
315304
langtag.language = Some(subtag.to_owned());
316305
if subtag.len() < 4 {
317-
// extlangss are only allowed for short language tags
306+
// extlangs are only allowed for short language tags
318307
position = 1;
319308
} else {
320309
position = 2;
@@ -325,9 +314,9 @@ impl std::str::FromStr for LanguageTag {
325314
position = 2;
326315
} else if position == 2 && subtag.len() == 3 && is_alphabetic(subtag)
327316
&& !langtag.extlangs.is_empty() {
328-
// Multiple extlangss
317+
// Multiple extlangs
329318
if langtag.extlangs.len() > 2 {
330-
// maximum 3 extlangss
319+
// maximum 3 extlangs
331320
return Err(Error::TooManyExtlangs);
332321
}
333322
langtag.extlangs.push(subtag.to_owned());
@@ -364,12 +353,6 @@ impl std::str::FromStr for LanguageTag {
364353
return Err(Error::EmptyPrivateUse);
365354
}
366355
return Ok(langtag);
367-
368-
fn simple_langtag(s: &str) -> LanguageTag {
369-
let mut x: LanguageTag = Default::default();
370-
x.language = Some(s.to_owned());
371-
x
372-
}
373356
}
374357
}
375358

tests/tests.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,14 @@ fn test_private_tag() {
204204
assert_eq!(format!("{}", tag), "x-foo-bar");
205205
}
206206

207+
#[test]
208+
fn test_grandfathered_tag() {
209+
let tag_irregular: LanguageTag = "i-klingon".parse().unwrap();
210+
assert_eq!(tag_irregular.language.unwrap(), "i-klingon");
211+
let tag_regular: LanguageTag = "zh-hakka".parse().unwrap();
212+
assert_eq!(tag_regular.language.unwrap(), "zh-hakka");
213+
}
214+
207215
#[test]
208216
fn test_eq() {
209217
let mut tag1: LanguageTag = Default::default();

0 commit comments

Comments
 (0)