diff --git a/src/lib.rs b/src/lib.rs index 9cba1da..cb0672d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,18 @@ -//! Substring method for string types. -//! -//! This crate provides a [`substring()`] method on Rust string types. The method takes a start and -//! end character index and returns a string slice of the characters within that range. -//! -//! The method is provided via the [`Substring`] trait which is implemented on the [`str`] -//! primitive. -//! -//! # Example +//! Substring methods for string types. +//! +//! This crate provides substring methods for Rust string types. These methods take an index (a type implementing [`RangeBounds`]) and return a string slice containing the specified substring. +//! +//! These methods are provided via traits which are implemented on the [`str`] primitive. There are two traits, providing two different types of substrings: +//! +//! - [`CharSubstring`] - Obtains a substring using character indexes. This won't always match your intuition, as some graphemes are made of multiple characters, leading to unexpected results. +//! - [`GraphemeSubstring`] - Obtains a substring using grapheme indexes. **Requires the `grapheme` feature to be enabled.** +//! +//! As Rust strings are UTF-8 encoded, the algorithm for finding a substring is `O(n)`, where `n` +//! is the byte length of the string. This is due to characters not being of predictible byte +//! lengths. +//! +//! # Examples +//! ## Character-based Substring //! ``` //! use substring::CharSubstring; //! @@ -16,29 +22,20 @@ //! // Also works on a String. //! assert_eq!("foobar".to_string().char_substring(1..6), "oobar"); //! ``` -//! -//! As Rust strings are UTF-8 encoded, the algorithm for finding a character substring is `O(n)`, -//! where `n` is the byte length of the string. This is due to characters not being of predictible -//! byte lengths. -//! -//! # Note -//! The indexing of substrings is based on [*Unicode Scalar Value*]. As such, substrings may not -//! always match your intuition: -//! +//! +//! ## Grapheme-based Substring +//! First, enable the `grapheme` feature: +//! ``` toml +//! substring = {version = "2", features = ["grapheme"]} //! ``` -//! use substring::CharSubstring; -//! -//! assert_eq!("ã".char_substring(0..1), "a"); // As opposed to "ã". -//! assert_eq!("ã".char_substring(1..2), "\u{0303}") +//! +//! Then, use the `GraphemeSubstring` trait in your code. //! ``` +//! use substring::GraphemeSubstring; //! -//! The above example occurs because "ã" is technically made up of two UTF-8 scalar values. -//! -//! [`str`]: https://doc.rust-lang.org/std/primitive.str.html -//! [`Substring`]: trait.Substring.html -//! [`substring()`]: trait.Substring.html#tymethod.substring -//! -//! [*Unicode Scalar Value*]: http://www.unicode.org/glossary/#unicode_scalar_value +//! // Obtains the full "ã" grapheme (consisting of two `char`s). +//! assert_eq!("foobãr".grapheme_substring(2..5), "obã"); +//! ``` #![no_std]