From f309dde65c6d70a706a366c1691452aa6e15a234 Mon Sep 17 00:00:00 2001 From: Sergey Potapov Date: Fri, 23 Jun 2023 23:36:35 +0200 Subject: [PATCH] Make max_len and mix_len check length in chars, not bytes --- CHANGELOG.md | 1 + README.md | 4 +- dummy/src/main.rs | 67 ++--------------------------- nutype/src/lib.rs | 4 +- nutype_macros/src/string/gen/mod.rs | 19 +++++++- test_suite/tests/string.rs | 6 +++ 6 files changed, 31 insertions(+), 70 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1364242..e65beae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### v0.3.0 - 2023-??-?? +* [BREAKING] `min_len` and `max_len` validators run against number of characters in a string (`val.chars().count()`), not number of bytes (`val.len()`). * Add `finite` validation for float types which checks against NaN and infinity. * Enable deriving of `Eq` and `Ord` on float types (if `finite` validation is present) * Enable deriving of `TryFrom` for types without validation (in this case Error type is `std::convert::Infallible`) diff --git a/README.md b/README.md index b578827..7a5f6d4 100644 --- a/README.md +++ b/README.md @@ -127,8 +127,8 @@ At the moment the string inner type supports only `String` (owned) type. | Validator | Description | Error variant | Example | |-------------|---------------------------------------------------------------------------------|-----------------|----------------------------------------------| -| `max_len` | Max length of the string | `TooLong` | `max_len = 255` | -| `min_len` | Min length of the string | `TooShort` | `min_len = 5` | +| `max_len` | Max length of the string (in chars, not bytes) | `TooLong` | `max_len = 255` | +| `min_len` | Min length of the string (in chars, not bytes) | `TooShort` | `min_len = 5` | | `not_empty` | Rejects an empty string | `Empty` | `not_empty` | | `regex` | Validates format with a regex. Requires `regex` feature. | `RegexMismatch` | `regex = "^[0-9]{7}$"` or `regex = ID_REGEX` | | `with` | Custom validator. A function or closure that receives `&str` and returns `bool` | `Invalid` | `with = \|s: &str\| s.contains('@')` | diff --git a/dummy/src/main.rs b/dummy/src/main.rs index 7836cfb..7d05bc3 100644 --- a/dummy/src/main.rs +++ b/dummy/src/main.rs @@ -1,67 +1,6 @@ use nutype::nutype; -use schemars::JsonSchema; -#[nutype(validate(finite, max = 12.34))] -#[derive(FromStr, Display, Clone, Copy, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] -pub struct Dist(f64); -#[nutype( - new_unchecked - validate(min = 18, max = 99) -)] -#[derive(FromStr, Display, Clone, Copy, Serialize, Deserialize, JsonSchema)] -pub struct Age(u8); +#[nutype(validate(min_len = 3, max_len = 255))] +pub struct Name(String); -#[nutype(new_unchecked)] -#[derive(Debug, FromStr, Display, Clone, Serialize, JsonSchema)] -pub struct Username(String); - -use lazy_static::lazy_static; -use regex::Regex; - -lazy_static! { - static ref PIN_CODE_REGEX_LAZY_STATIC: Regex = Regex::new("^[0-9]{4}$").unwrap(); -} - -#[nutype(validate(regex = PIN_CODE_REGEX_LAZY_STATIC))] -// #[nutype(validate(regex = "^[0-9]{4}$"))] -#[derive(Debug)] -pub struct PinCode(String); - -#[nutype( - new_unchecked - validate(finite) -)] -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub struct Coefficient(f64); - -fn main() { - let dist: Dist = "11.4".parse().unwrap(); - println!("dist = {}", dist.into_inner()); - - let age: Age = "77".parse().unwrap(); - let json = serde_json::to_string(&age).unwrap(); - println!("AGE JSON = {json}"); - - let username: Username = "greyblake".parse().unwrap(); - let json = serde_json::to_string(&username).unwrap(); - println!("USERNAME JSON = {json}"); - - let dist: Dist = serde_json::from_str("12.339999999999").unwrap(); - println!("Dist = {dist}"); - - let name = "Bang".to_string(); - - let username = unsafe { Username::new_unchecked(name) }; - println!("{username:#?}"); - - let pin_result = PinCode::new("1223 "); - println!("\npin_result = {pin_result:?}\n"); - - let k1 = Coefficient::new(0.0).unwrap(); - let k2 = Coefficient::new(1.21).unwrap(); - let k3 = Coefficient::new(3.21).unwrap(); - - let mut ks = [k3, k1, k2, k1, k3]; - ks.sort(); - println!("{ks:?}"); -} +fn main() {} diff --git a/nutype/src/lib.rs b/nutype/src/lib.rs index 099ef1a..1c03e4f 100644 --- a/nutype/src/lib.rs +++ b/nutype/src/lib.rs @@ -121,8 +121,8 @@ //! //! | Validator | Description | Error variant | Example | //! |-------------|---------------------------------------------------------------------------------|-----------------|----------------------------------------------| -//! | `max_len` | Max length of the string | `TooLong` | `max_len = 255` | -//! | `min_len` | Min length of the string | `TooShort` | `min_len = 5` | +//! | `max_len` | Max length of the string (in chars, not bytes) | `TooLong` | `max_len = 255` | +//! | `min_len` | Min length of the string (in chars, not bytes) | `TooShort` | `min_len = 5` | //! | `not_empty` | Rejects an empty string | `Empty` | `not_empty` | //! | `regex` | Validates format with a regex. Requires `regex` feature. | `RegexMismatch` | `regex = "^[0-9]{7}$"` or `regex = ID_REGEX` | //! | `with` | Custom validator. A function or closure that receives `&str` and returns `bool` | `Invalid` | `with = \|s: &str\| s.contains('@')` | diff --git a/nutype_macros/src/string/gen/mod.rs b/nutype_macros/src/string/gen/mod.rs index 8545e66..e38f578 100644 --- a/nutype_macros/src/string/gen/mod.rs +++ b/nutype_macros/src/string/gen/mod.rs @@ -177,19 +177,25 @@ pub fn gen_string_sanitize_fn(sanitizers: &[StringSanitizer]) -> TokenStream { pub fn gen_string_validate_fn(type_name: &TypeName, validators: &[StringValidator]) -> TokenStream { let error_name = gen_error_type_name(type_name); + // Indicates that `chars_count` variable needs to be set, which is used within + // min_len and max_len validations. + let mut requires_chars_count = false; + let validations: TokenStream = validators .iter() .map(|validator| match validator { StringValidator::MaxLen(max_len) => { + requires_chars_count = true; quote!( - if val.len() > #max_len { + if chars_count > #max_len { return Err(#error_name::TooLong); } ) } StringValidator::MinLen(min_len) => { + requires_chars_count = true; quote!( - if val.len() < #min_len { + if chars_count < #min_len { return Err(#error_name::TooShort); } ) @@ -237,8 +243,17 @@ pub fn gen_string_validate_fn(type_name: &TypeName, validators: &[StringValidato }) .collect(); + let chars_count_if_required = if requires_chars_count { + quote!( + let chars_count = val.chars().count(); + ) + } else { + quote!() + }; + quote!( fn validate(val: &str) -> ::core::result::Result<(), #error_name> { + #chars_count_if_required #validations Ok(()) } diff --git a/test_suite/tests/string.rs b/test_suite/tests/string.rs index b8cfef0..a0d3fa3 100644 --- a/test_suite/tests/string.rs +++ b/test_suite/tests/string.rs @@ -99,6 +99,9 @@ mod validators { assert_eq!(Name::new("Anton").unwrap().into_inner(), "Anton"); assert_eq!(Name::new("Serhii"), Err(NameError::TooLong)); + + // Ukranian, Cyrillic. Every char is 2 bytes. + assert_eq!(Name::new("Антон").unwrap().into_inner(), "Антон"); } #[test] @@ -109,6 +112,9 @@ mod validators { assert_eq!(Name::new("Anton"), Err(NameError::TooShort)); assert_eq!(Name::new("Serhii").unwrap().into_inner(), "Serhii"); + + // Ukranian, Cyrillic. Every char is 2 bytes. + assert_eq!(Name::new("Антон"), Err(NameError::TooShort)); } #[test]