From d8c2956906eae34516f3d215cda16dc16656766a Mon Sep 17 00:00:00 2001 From: BO41 Date: Sun, 6 Oct 2019 15:59:49 +0200 Subject: [PATCH] Improve docs on some char boolean methods --- src/libcore/char/methods.rs | 172 +++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 63 deletions(-) diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index a69eb0f6d4b20..971d89e004446 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -116,9 +116,9 @@ impl char { // the code is split up here to improve execution speed for cases where // the `radix` is constant and 10 or smaller - let val = if radix <= 10 { + let val = if radix <= 10 { match self { - '0' ..= '9' => self as u32 - '0' as u32, + '0'..='9' => self as u32 - '0' as u32, _ => return None, } } else { @@ -130,8 +130,11 @@ impl char { } }; - if val < radix { Some(val) } - else { None } + if val < radix { + Some(val) + } else { + None + } } /// Returns an iterator that yields the hexadecimal Unicode escape of a @@ -303,8 +306,8 @@ impl char { '\r' => EscapeDefaultState::Backslash('r'), '\n' => EscapeDefaultState::Backslash('n'), '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), - '\x20' ..= '\x7e' => EscapeDefaultState::Char(self), - _ => EscapeDefaultState::Unicode(self.escape_unicode()) + '\x20'..='\x7e' => EscapeDefaultState::Char(self), + _ => EscapeDefaultState::Unicode(self.escape_unicode()), }; EscapeDefault { state: init_state } } @@ -436,30 +439,31 @@ impl char { pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str { let code = self as u32; unsafe { - let len = - if code < MAX_ONE_B && !dst.is_empty() { + let len = if code < MAX_ONE_B && !dst.is_empty() { *dst.get_unchecked_mut(0) = code as u8; 1 } else if code < MAX_TWO_B && dst.len() >= 2 { *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; 2 - } else if code < MAX_THREE_B && dst.len() >= 3 { + } else if code < MAX_THREE_B && dst.len() >= 3 { *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; 3 } else if dst.len() >= 4 { *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; - *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; 4 } else { - panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", + panic!( + "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", from_u32_unchecked(code).len_utf8(), code, - dst.len()) + dst.len(), + ) }; from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) } @@ -515,15 +519,24 @@ impl char { *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) } else { - panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", + panic!( + "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", from_u32_unchecked(code).len_utf16(), code, - dst.len()) + dst.len(), + ) } } } - /// Returns `true` if this `char` is an alphabetic code point, and false if not. + /// Returns `true` if this `char` has the `Alphabetic` property. + /// + /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and + /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// /// # Examples /// @@ -547,10 +560,14 @@ impl char { } } - /// Returns `true` if this `char` is lowercase. + /// Returns `true` if this `char` has the `Lowercase` property. /// - /// 'Lowercase' is defined according to the terms of the Unicode Derived Core - /// Property `Lowercase`. + /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and + /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// /// # Examples /// @@ -575,10 +592,14 @@ impl char { } } - /// Returns `true` if this `char` is uppercase. + /// Returns `true` if this `char` has the `Uppercase` property. + /// + /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and + /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. /// - /// 'Uppercase' is defined according to the terms of the Unicode Derived Core - /// Property `Uppercase`. + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// /// # Examples /// @@ -603,10 +624,12 @@ impl char { } } - /// Returns `true` if this `char` is whitespace. + /// Returns `true` if this `char` has the `White_Space` property. /// - /// 'Whitespace' is defined according to the terms of the Unicode Derived Core - /// Property `White_Space`. + /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`]. + /// + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt /// /// # Examples /// @@ -630,10 +653,10 @@ impl char { } } - /// Returns `true` if this `char` is alphanumeric. + /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`]. /// - /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories - /// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`. + /// [`is_alphabetic()`]: #method.is_alphabetic + /// [`is_numeric()`]: #method.is_numeric /// /// # Examples /// @@ -655,10 +678,15 @@ impl char { self.is_alphabetic() || self.is_numeric() } - /// Returns `true` if this `char` is a control code point. + /// Returns `true` if this `char` has the general category for control codes. + /// + /// Control codes (code points with the general category of `Cc`) are described in Chapter 4 + /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`]. /// - /// 'Control code point' is defined in terms of the Unicode General - /// Category `Cc`. + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt /// /// # Examples /// @@ -675,19 +703,29 @@ impl char { general_category::Cc(self) } - /// Returns `true` if this `char` is an extended grapheme character. + /// Returns `true` if this `char` has the `Grapheme_Extend` property. /// - /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering - /// Category `Grapheme_Extend`. + /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text + /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd] + /// [`DerivedCoreProperties.txt`]. + /// + /// [uax29]: https://www.unicode.org/reports/tr29/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt #[inline] pub(crate) fn is_grapheme_extended(self) -> bool { derived_property::Grapheme_Extend(self) } - /// Returns `true` if this `char` is numeric. + /// Returns `true` if this `char` has one of the general categories for numbers. + /// + /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric + /// characters, and `No` for other numeric characters) are specified in the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`]. /// - /// 'Numeric'-ness is defined in terms of the Unicode General Categories - /// `Nd`, `Nl`, `No`. + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt /// /// # Examples /// @@ -713,25 +751,29 @@ impl char { } } - /// Returns an iterator that yields the lowercase equivalent of a `char` - /// as one or more `char`s. + /// Returns an iterator that yields the lowercase mapping of this `char` as one or more + /// `char`s. /// - /// If a character does not have a lowercase equivalent, the same character - /// will be returned back by the iterator. + /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`. /// - /// This performs complex unconditional mappings with no tailoring: it maps - /// one Unicode character to its lowercase equivalent according to the - /// [Unicode database] and the additional complex mappings - /// [`SpecialCasing.txt`]. Conditional mappings (based on context or - /// language) are not considered here. + /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. /// - /// For a full reference, see [here][reference]. + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt /// - /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt + /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields + /// the `char`(s) given by [`SpecialCasing.txt`]. /// - /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt + /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt /// - /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 + /// This operation performs an unconditional mapping without tailoring. That is, the conversion + /// is independent of context and language. + /// + /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in + /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ /// /// # Examples /// @@ -774,25 +816,29 @@ impl char { ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) } - /// Returns an iterator that yields the uppercase equivalent of a `char` - /// as one or more `char`s. + /// Returns an iterator that yields the uppercase mapping of this `char` as one or more + /// `char`s. + /// + /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`. + /// + /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. /// - /// If a character does not have an uppercase equivalent, the same character - /// will be returned back by the iterator. + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt /// - /// This performs complex unconditional mappings with no tailoring: it maps - /// one Unicode character to its uppercase equivalent according to the - /// [Unicode database] and the additional complex mappings - /// [`SpecialCasing.txt`]. Conditional mappings (based on context or - /// language) are not considered here. + /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields + /// the `char`(s) given by [`SpecialCasing.txt`]. /// - /// For a full reference, see [here][reference]. + /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt /// - /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt + /// This operation performs an unconditional mapping without tailoring. That is, the conversion + /// is independent of context and language. /// - /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt + /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in + /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. /// - /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ /// /// # Examples ///