Skip to content

Commit

Permalink
Improve the formatting API.
Browse files Browse the repository at this point in the history
This addressed #96 and #97, fixing the lack of processing with
consecutive digit separators by enhancing the internal logic, adds logic
for internal and first digit separators to simplify logic and improve
performance, fix unittests, and also make it so the errors are
consistent by adding checks when formatting is enabled to ensure the
correct logic is used.

Closes #96
Closes #97
  • Loading branch information
Alexhuszagh committed Sep 24, 2024
1 parent c102122 commit 6d82e08
Show file tree
Hide file tree
Showing 19 changed files with 2,292 additions and 262 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Higher performance when parsing floats with digit separators.

### Fixed

- Inlining inconsistency between public API methods (credit to @zheland)
- Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled.
- Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`.
- Incorrect parsing of consecutive digit separators.
- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned.

## [1.0.1] 2024-09-16

Expand Down
1 change: 1 addition & 0 deletions ci/comprehensive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ run_tests() {
cd "${home}"
cd lexical-parse-float/etc/correctness
cargo run "${@}" --release --bin test-parse-golang
cargo run "${@}" --release --bin test-parse-golang --features digit-separator
cargo run "${@}" --release --bin test-parse-unittests

# Test the write-float correctness tests.
Expand Down
2 changes: 2 additions & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ disallowed-macros = [
{ path = "std::println", reason = "no IO allowed" },
{ path = "std::format", reason = "no string allocation allowed" },
{ path = "std::debug", reason = "debugging macros should not be present in any release" },
# NOTE: unimplemented is fine because this can be for intentionally disabled methods
{ path = "std::todo", reason = "should never have TODO macros in releases" },
]
disallowed-methods = [
{ path = "std::io::stdout", reason = "no IO allowed" },
Expand Down
16 changes: 12 additions & 4 deletions lexical-parse-float/etc/correctness/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,27 @@ path = "../.."
default-features = false
features = []

[dependencies.lexical-util]
path = "../../../lexical-util"
default-features = false
features = []

[dependencies]
rand = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
rand_isaac = ">=0.3.0"
regex = { version = ">=1.10.6", optional = true}
lazy_static = { version = ">=1.5.0", optional = true }

[features]
std = ["lexical-parse-float/std"]
power-of-two = ["lexical-parse-float/power-of-two"]
radix = ["lexical-parse-float/radix"]
format = ["lexical-parse-float/format"]
std = ["lexical-parse-float/std", "lexical-util/std"]
power-of-two = ["lexical-parse-float/power-of-two", "lexical-util/power-of-two"]
radix = ["lexical-parse-float/radix", "lexical-util/radix"]
format = ["lexical-parse-float/format", "lexical-util/format"]
compact = ["lexical-parse-float/compact"]
digit-separator = ["format", "regex", "lazy_static"]

[workspace]

Expand Down
72 changes: 67 additions & 5 deletions lexical-parse-float/etc/correctness/test-parse-golang/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,70 @@
// Copyright 2021, Alex Huszagh. Unlicensed.
// See https://unlicense.org/

use lexical_parse_float::FromLexical;
#![allow(unused_imports)]

use lexical_parse_float::{FromLexicalWithOptions, Options};
use lexical_util::format::{NumberFormatBuilder, STANDARD};
use rand::{Rng, SeedableRng};
use rand_isaac::Isaac64Rng;
use core::{num, str};
use std::collections::HashMap;

fn run_test(line: &str) {
#[allow(dead_code)]
pub const ISAAC_SEED: [u8; 32] = [
49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51,
51, 56, 51, 50, 55, 57, 53, 48,
];

#[cfg(feature = "digit-separator")]
lazy_static::lazy_static! {
static ref SIGN: regex::Regex = regex::Regex::new("(_+)([+-])").unwrap();
}

#[cfg(feature = "digit-separator")]
fn run_test<Random: Rng>(line: &str, rng: &mut Random) {
const FMT: u128 = NumberFormatBuilder::new()
.digit_separator(num::NonZeroU8::new(b'_'))
.leading_digit_separator(true)
.internal_digit_separator(true)
.trailing_digit_separator(true)
.consecutive_digit_separator(true)
.build();

// Tests have the following format:
// hhhh ssssssss dddddddddddddddddd ....
// The `hhhh` part is the hexadecimal representation for f16,
// the `ssssssss` part is the hexadecimal representation of f32,
// the `dddddddddddddddddd` is the hex representation of f64,
// and the remaining bytes are the string to parse.
let hex32 = line[5..13].to_lowercase();
let hex64 = line[14..30].to_lowercase();
let string = &line[31..];
let options = Options::new();

// now we want to add the number of digit separators we'll use
let count = rng.gen_range(1..=4);
let mut vec = string.as_bytes().to_vec();
let length = vec.len();
for _ in 0..count {
let idx = rng.gen_range(0..length);
vec.insert(idx, b'_');
}
// we need to make sure that our digit separators are in the correct location
// that is, they cannot be before a `+-` symbol
let string = str::from_utf8(&vec).unwrap();
let valid = SIGN.replace(string, "${2}${1}");

let float32 = f32::from_lexical_with_options::<FMT>(valid.as_bytes(), &options).unwrap();
let float64 = f64::from_lexical_with_options::<FMT>(valid.as_bytes(), &options).unwrap();
assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
}

#[cfg(not(feature = "digit-separator"))]
fn run_test<Random: Rng>(line: &str, _: &mut Random) {
const FMT: u128 = STANDARD;

// Tests have the following format:
// hhhh ssssssss dddddddddddddddddd ....
// The `hhhh` part is the hexadecimal representation for f16,
Expand All @@ -14,9 +74,10 @@ fn run_test(line: &str) {
let hex32 = line[5..13].to_lowercase();
let hex64 = line[14..30].to_lowercase();
let string = &line[31..];
let options = Options::new();

let float32 = f32::from_lexical(string.as_bytes()).unwrap();
let float64 = f64::from_lexical(string.as_bytes()).unwrap();
let float32 = f32::from_lexical_with_options::<FMT>(string.as_bytes(), &options).unwrap();
let float64 = f64::from_lexical_with_options::<FMT>(string.as_bytes(), &options).unwrap();
assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
}
Expand Down Expand Up @@ -68,13 +129,14 @@ fn main() {
]);

// Unfortunately, randomize the data with miri is too expensive so we just use it normally.
let mut rng = Isaac64Rng::from_seed(ISAAC_SEED);
for (&filename, data) in tests.iter() {
println!("Running Test: {}", filename);
for (count, line) in data.lines().enumerate() {
if cfg!(miri) && count % 10 == 0 {
println!("Running test {count} for conversion tests.");
}
run_test(line);
run_test(line, &mut rng);
if cfg!(miri) && count > 3000 {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ use rand_isaac::Isaac64Rng;
use std::mem::transmute;

fn main() {
let mut rnd = Isaac64Rng::from_seed(ISAAC_SEED);
let mut rng = Isaac64Rng::from_seed(ISAAC_SEED);
let mut i = 0;
while i < 10_000_000 {
let bits = rnd.next_u64();
let bits = rng.next_u64();
let x: f64 = unsafe { transmute(bits) };
if x.is_finite() {
validate(&format!("{:e}", x));
Expand Down
Loading

0 comments on commit 6d82e08

Please sign in to comment.