Skip to content

Commit

Permalink
Merge pull request #161 from Alexhuszagh/issue_96
Browse files Browse the repository at this point in the history
Improve the formatting API.
  • Loading branch information
Alexhuszagh authored Sep 24, 2024
2 parents c102122 + df828cd commit eb3eb29
Show file tree
Hide file tree
Showing 19 changed files with 2,299 additions and 262 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Higher performance when parsing floats with digit separators.

### Fixed

- Inlining inconsistency between public API methods (credit to @zheland)
- Incorrectly accepting leading zeros when `no_integer_leading_zeros` was enabled.
- Have consistent errors when an invalid leading digit is found for floating point numbers to always be `Error::InvalidDigit`.
- Incorrect parsing of consecutive digit separators.
- Inaccuracies when parsing digit separators at various positions leading to incorect errors being returned.

## [1.0.1] 2024-09-16

Expand Down
1 change: 1 addition & 0 deletions ci/comprehensive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ run_tests() {
cd "${home}"
cd lexical-parse-float/etc/correctness
cargo run "${@}" --release --bin test-parse-golang
cargo run "${@}" --release --bin test-parse-golang --features digit-separator
cargo run "${@}" --release --bin test-parse-unittests

# Test the write-float correctness tests.
Expand Down
2 changes: 2 additions & 0 deletions clippy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ disallowed-macros = [
{ path = "std::println", reason = "no IO allowed" },
{ path = "std::format", reason = "no string allocation allowed" },
{ path = "std::debug", reason = "debugging macros should not be present in any release" },
# NOTE: unimplemented is fine because this can be for intentionally disabled methods
{ path = "std::todo", reason = "should never have TODO macros in releases" },
]
disallowed-methods = [
{ path = "std::io::stdout", reason = "no IO allowed" },
Expand Down
16 changes: 12 additions & 4 deletions lexical-parse-float/etc/correctness/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,27 @@ path = "../.."
default-features = false
features = []

[dependencies.lexical-util]
path = "../../../lexical-util"
default-features = false
features = []

[dependencies]
rand = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
toml = "0.8"
rand_isaac = ">=0.3.0"
regex = { version = ">=1.10.6", optional = true}
lazy_static = { version = ">=1.5.0", optional = true }

[features]
std = ["lexical-parse-float/std"]
power-of-two = ["lexical-parse-float/power-of-two"]
radix = ["lexical-parse-float/radix"]
format = ["lexical-parse-float/format"]
std = ["lexical-parse-float/std", "lexical-util/std"]
power-of-two = ["lexical-parse-float/power-of-two", "lexical-util/power-of-two"]
radix = ["lexical-parse-float/radix", "lexical-util/radix"]
format = ["lexical-parse-float/format", "lexical-util/format"]
compact = ["lexical-parse-float/compact"]
digit-separator = ["format", "regex", "lazy_static"]

[workspace]

Expand Down
72 changes: 67 additions & 5 deletions lexical-parse-float/etc/correctness/test-parse-golang/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,70 @@
// Copyright 2021, Alex Huszagh. Unlicensed.
// See https://unlicense.org/

use lexical_parse_float::FromLexical;
#![allow(unused_imports)]

use lexical_parse_float::{FromLexicalWithOptions, Options};
use lexical_util::format::{NumberFormatBuilder, STANDARD};
use rand::{Rng, SeedableRng};
use rand_isaac::Isaac64Rng;
use core::{num, str};
use std::collections::HashMap;

fn run_test(line: &str) {
#[allow(dead_code)]
pub const ISAAC_SEED: [u8; 32] = [
49, 52, 49, 53, 57, 50, 54, 53, 51, 53, 56, 57, 55, 57, 51, 50, 51, 56, 52, 54, 50, 54, 52, 51,
51, 56, 51, 50, 55, 57, 53, 48,
];

#[cfg(feature = "digit-separator")]
lazy_static::lazy_static! {
static ref SIGN: regex::Regex = regex::Regex::new("(_+)([+-])").unwrap();
}

#[cfg(feature = "digit-separator")]
fn run_test<Random: Rng>(line: &str, rng: &mut Random) {
const FMT: u128 = NumberFormatBuilder::new()
.digit_separator(num::NonZeroU8::new(b'_'))
.leading_digit_separator(true)
.internal_digit_separator(true)
.trailing_digit_separator(true)
.consecutive_digit_separator(true)
.build();

// Tests have the following format:
// hhhh ssssssss dddddddddddddddddd ....
// The `hhhh` part is the hexadecimal representation for f16,
// the `ssssssss` part is the hexadecimal representation of f32,
// the `dddddddddddddddddd` is the hex representation of f64,
// and the remaining bytes are the string to parse.
let hex32 = line[5..13].to_lowercase();
let hex64 = line[14..30].to_lowercase();
let string = &line[31..];
let options = Options::new();

// now we want to add the number of digit separators we'll use
let count = rng.gen_range(1..=4);
let mut vec = string.as_bytes().to_vec();
let length = vec.len();
for _ in 0..count {
let idx = rng.gen_range(0..length);
vec.insert(idx, b'_');
}
// we need to make sure that our digit separators are in the correct location
// that is, they cannot be before a `+-` symbol
let string = str::from_utf8(&vec).unwrap();
let valid = SIGN.replace(string, "${2}${1}");

let float32 = f32::from_lexical_with_options::<FMT>(valid.as_bytes(), &options).unwrap();
let float64 = f64::from_lexical_with_options::<FMT>(valid.as_bytes(), &options).unwrap();
assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
}

#[cfg(not(feature = "digit-separator"))]
fn run_test<Random: Rng>(line: &str, _: &mut Random) {
const FMT: u128 = STANDARD;

// Tests have the following format:
// hhhh ssssssss dddddddddddddddddd ....
// The `hhhh` part is the hexadecimal representation for f16,
Expand All @@ -14,9 +74,10 @@ fn run_test(line: &str) {
let hex32 = line[5..13].to_lowercase();
let hex64 = line[14..30].to_lowercase();
let string = &line[31..];
let options = Options::new();

let float32 = f32::from_lexical(string.as_bytes()).unwrap();
let float64 = f64::from_lexical(string.as_bytes()).unwrap();
let float32 = f32::from_lexical_with_options::<FMT>(string.as_bytes(), &options).unwrap();
let float64 = f64::from_lexical_with_options::<FMT>(string.as_bytes(), &options).unwrap();
assert_eq!(hex32, format!("{:0>8x}", float32.to_bits()));
assert_eq!(hex64, format!("{:0>16x}", float64.to_bits()));
}
Expand Down Expand Up @@ -68,13 +129,14 @@ fn main() {
]);

// Unfortunately, randomize the data with miri is too expensive so we just use it normally.
let mut rng = Isaac64Rng::from_seed(ISAAC_SEED);
for (&filename, data) in tests.iter() {
println!("Running Test: {}", filename);
for (count, line) in data.lines().enumerate() {
if cfg!(miri) && count % 10 == 0 {
println!("Running test {count} for conversion tests.");
}
run_test(line);
run_test(line, &mut rng);
if cfg!(miri) && count > 3000 {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ use rand_isaac::Isaac64Rng;
use std::mem::transmute;

fn main() {
let mut rnd = Isaac64Rng::from_seed(ISAAC_SEED);
let mut rng = Isaac64Rng::from_seed(ISAAC_SEED);
let mut i = 0;
while i < 10_000_000 {
let bits = rnd.next_u64();
let bits = rng.next_u64();
let x: f64 = unsafe { transmute(bits) };
if x.is_finite() {
validate(&format!("{:e}", x));
Expand Down
Loading

0 comments on commit eb3eb29

Please sign in to comment.