Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add to_ascii_upper, to_ascii_lower and eq_ignore_ascii_case in std::ascii #8231

Merged
merged 1 commit into from
Aug 7, 2013
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 169 additions & 11 deletions src/libstd/str/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
use to_str::{ToStr,ToStrConsume};
use str;
use str::StrSlice;
use str::OwnedStr;
use container::Container;
use cast;
use ptr;
use iterator::{Iterator, IteratorUtil};
use vec::{CopyableVector, ImmutableVector, OwnedVector};
use to_bytes::IterBytes;
Expand All @@ -39,27 +42,19 @@ impl Ascii {
/// Convert to lowercase.
#[inline]
pub fn to_lower(self) -> Ascii {
if self.chr >= 65 && self.chr <= 90 {
Ascii{chr: self.chr | 0x20 }
} else {
self
}
Ascii{chr: ASCII_LOWER_MAP[self.chr]}
}

/// Convert to uppercase.
#[inline]
pub fn to_upper(self) -> Ascii {
if self.chr >= 97 && self.chr <= 122 {
Ascii{chr: self.chr & !0x20 }
} else {
self
}
Ascii{chr: ASCII_UPPER_MAP[self.chr]}
}

/// Compares two ascii characters of equality, ignoring case.
#[inline]
pub fn eq_ignore_case(self, other: Ascii) -> bool {
self.to_lower().chr == other.to_lower().chr
ASCII_LOWER_MAP[self.chr] == ASCII_LOWER_MAP[other.chr]
}
}

Expand Down Expand Up @@ -261,10 +256,124 @@ impl ToBytesConsume for ~[Ascii] {
}
}


/// Convert the string to ASCII upper case:
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
/// but non-ASCII letters are unchanged.
#[inline]
pub fn to_ascii_upper(string: &str) -> ~str {
map_bytes(string, ASCII_UPPER_MAP)
}

/// Convert the string to ASCII lower case:
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
/// but non-ASCII letters are unchanged.
#[inline]
pub fn to_ascii_lower(string: &str) -> ~str {
map_bytes(string, ASCII_LOWER_MAP)
}

#[inline]
priv fn map_bytes(string: &str, map: &'static [u8]) -> ~str {
let len = string.len();
let mut result = str::with_capacity(len);
unsafe {
do result.as_mut_buf |mut buf, _| {
for c in string.as_bytes().iter() {
*buf = map[*c];
buf = ptr::mut_offset(buf, 1)
}
}
str::raw::set_len(&mut result, len);
}
result
}

/// Check that two strings are an ASCII case-insensitive match.
/// Same as `to_ascii_lower(a) == to_ascii_lower(b)`,
/// but without allocating and copying temporary strings.
#[inline]
pub fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
a.len() == b.len() && a.as_bytes().iter().zip(b.as_bytes().iter()).all(
|(byte_a, byte_b)| ASCII_LOWER_MAP[*byte_a] == ASCII_LOWER_MAP[*byte_b])
}

priv static ASCII_LOWER_MAP: &'static [u8] = &[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];

priv static ASCII_UPPER_MAP: &'static [u8] = &[
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];


#[cfg(test)]
mod tests {
use super::*;
use to_bytes::ToBytes;
use str::from_char;

macro_rules! v2ascii (
( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
Expand Down Expand Up @@ -347,4 +456,53 @@ mod tests {

#[test] #[should_fail]
fn test_ascii_fail_char_slice() { 'λ'.to_ascii(); }

#[test]
fn test_to_ascii_upper() {
assert_eq!(to_ascii_upper("url()URL()uRl()ürl"), ~"URL()URL()URL()üRL");
assert_eq!(to_ascii_upper("hıKß"), ~"HıKß");

let mut i = 0;
while i <= 500 {
let c = i as char;
let upper = if 'a' <= c && c <= 'z' { c + 'A' - 'a' } else { c };
assert_eq!(to_ascii_upper(from_char(i as char)), from_char(upper))
i += 1;
}
}

#[test]
fn test_to_ascii_lower() {
assert_eq!(to_ascii_lower("url()URL()uRl()Ürl"), ~"url()url()url()Ürl");
// Dotted capital I, Kelvin sign, Sharp S.
assert_eq!(to_ascii_lower("HİKß"), ~"hİKß");

let mut i = 0;
while i <= 500 {
let c = i as char;
let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c };
assert_eq!(to_ascii_lower(from_char(i as char)), from_char(lower))
i += 1;
}
}


#[test]
fn test_eq_ignore_ascii_case() {
assert!(eq_ignore_ascii_case("url()URL()uRl()Ürl", "url()url()url()Ürl"));
assert!(!eq_ignore_ascii_case("Ürl", "ürl"));
// Dotted capital I, Kelvin sign, Sharp S.
assert!(eq_ignore_ascii_case("HİKß", "hİKß"));
assert!(!eq_ignore_ascii_case("İ", "i"));
assert!(!eq_ignore_ascii_case("K", "k"));
assert!(!eq_ignore_ascii_case("ß", "s"));

let mut i = 0;
while i <= 500 {
let c = i as char;
let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c };
assert!(eq_ignore_ascii_case(from_char(i as char), from_char(lower)));
i += 1;
}
}
}