From 54c88d66bc67744e0f1a476cd85c9d4f5333b2fc Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 18 Sep 2025 17:39:19 +0200 Subject: [PATCH 01/18] feat: Use fixed-scale Decimals --- Cargo.lock | 3 + Cargo.toml | 2 + crates/polars-arrow/src/compute/decimal.rs | 304 +----- crates/polars-arrow/src/types/native.rs | 8 + crates/polars-compute/Cargo.toml | 2 + crates/polars-compute/src/cast/binview_to.rs | 14 +- crates/polars-compute/src/cast/decimal_to.rs | 99 +- crates/polars-compute/src/cast/mod.rs | 2 +- crates/polars-compute/src/decimal.rs | 971 ++++++++++++++++++ crates/polars-compute/src/lib.rs | 2 + .../src/chunked_array/arithmetic/decimal.rs | 128 ++- .../src/chunked_array/arithmetic/mod.rs | 2 - .../src/chunked_array/builder/list/mod.rs | 2 +- crates/polars-core/src/chunked_array/cast.rs | 27 +- .../src/chunked_array/logical/decimal.rs | 191 ++-- crates/polars-core/src/chunked_array/mod.rs | 30 +- .../src/chunked_array/ops/any_value.rs | 4 +- .../src/chunked_array/ops/decimal.rs | 33 +- .../src/chunked_array/ops/fill_null.rs | 6 +- .../src/chunked_array/ops/row_encode.rs | 4 +- crates/polars-core/src/datatypes/_serde.rs | 6 +- crates/polars-core/src/datatypes/any_value.rs | 141 +-- crates/polars-core/src/datatypes/dtype.rs | 56 +- crates/polars-core/src/datatypes/field.rs | 4 +- crates/polars-core/src/datatypes/proptest.rs | 9 +- crates/polars-core/src/fmt.rs | 8 +- .../frame/group_by/aggregations/dispatch.rs | 4 +- crates/polars-core/src/frame/row/av_buffer.rs | 7 + crates/polars-core/src/scalar/new.rs | 6 +- crates/polars-core/src/scalar/serde.rs | 6 +- crates/polars-core/src/series/any_value.rs | 68 +- crates/polars-core/src/series/from.rs | 67 +- .../src/series/implementations/decimal.rs | 14 +- crates/polars-core/src/series/into.rs | 2 +- crates/polars-core/src/series/mod.rs | 36 +- crates/polars-core/src/series/ops/downcast.rs | 2 +- crates/polars-core/src/series/ops/null.rs | 4 +- crates/polars-core/src/utils/supertype.rs | 31 +- crates/polars-expr/src/expressions/binary.rs | 2 +- crates/polars-expr/src/groups/mod.rs | 2 +- crates/polars-expr/src/hash_keys.rs | 4 +- crates/polars-expr/src/hot_groups/mod.rs | 2 +- crates/polars-expr/src/idx_table/mod.rs | 2 +- crates/polars-expr/src/reduce/mean.rs | 6 +- crates/polars-expr/src/reduce/min_max.rs | 4 +- crates/polars-expr/src/reduce/sum.rs | 2 +- crates/polars-expr/src/reduce/var_std.rs | 2 +- crates/polars-io/Cargo.toml | 1 + crates/polars-io/src/catalog/unity/schema.rs | 11 +- crates/polars-io/src/csv/read/reader.rs | 13 +- .../src/csv/write/write_impl/serializer.rs | 8 +- .../polars-json/src/json/write/serialize.rs | 4 +- .../src/chunked_array/gather/chunked.rs | 4 +- crates/polars-ops/src/series/ops/abs.rs | 2 +- crates/polars-ops/src/series/ops/clip.rs | 12 +- crates/polars-ops/src/series/ops/cum_agg.rs | 41 +- crates/polars-ops/src/series/ops/index_of.rs | 2 +- .../series/ops/interpolation/interpolate.rs | 6 +- crates/polars-ops/src/series/ops/is_in.rs | 26 +- crates/polars-ops/src/series/ops/negate.rs | 2 +- .../src/plans/aexpr/function_expr/schema.rs | 4 +- .../src/plans/aexpr/function_expr/strings.rs | 6 +- .../src/plans/aexpr/predicates/column_expr.rs | 2 +- crates/polars-plan/src/plans/aexpr/schema.rs | 33 +- .../plans/conversion/type_coercion/is_in.rs | 4 +- .../polars-python/src/conversion/any_value.rs | 39 +- .../src/conversion/chunked_array.rs | 26 +- crates/polars-python/src/conversion/mod.rs | 35 +- .../src/dataframe/construction.rs | 16 +- .../src/interop/numpy/to_numpy_series.rs | 2 +- crates/polars-python/src/series/comparison.rs | 25 +- crates/polars-python/src/series/export.rs | 2 +- crates/polars-python/src/series/map.rs | 2 +- crates/polars-sql/src/types.rs | 6 +- crates/polars-testing/src/asserts/series.rs | 8 +- crates/polars-utils/src/decimal.rs | 1 + crates/polars-utils/src/float.rs | 2 + crates/polars-utils/src/lib.rs | 1 + crates/polars/tests/it/lazy/group_by.rs | 2 +- .../polars/_utils/construction/series.py | 22 +- py-polars/polars/_utils/convert.py | 7 +- py-polars/polars/datatypes/classes.py | 3 + .../constructors/test_any_value_fallbacks.py | 8 +- .../unit/constructors/test_constructors.py | 2 +- .../tests/unit/datatypes/test_decimal.py | 97 +- py-polars/tests/unit/expr/test_serde.py | 2 +- .../tests/unit/functions/test_when_then.py | 2 +- py-polars/tests/unit/operations/test_cast.py | 4 +- .../tests/unit/operations/test_fill_null.py | 2 +- .../tests/unit/operations/test_rolling.py | 2 +- py-polars/tests/unit/test_datatype_exprs.py | 5 +- py-polars/tests/unit/test_format.py | 7 +- py-polars/tests/unit/test_selectors.py | 2 +- pyo3-polars/pyo3-polars/src/types.rs | 8 +- 94 files changed, 1767 insertions(+), 1085 deletions(-) create mode 100644 crates/polars-compute/src/decimal.rs create mode 100644 crates/polars-utils/src/decimal.rs diff --git a/Cargo.lock b/Cargo.lock index 6eeac905c330..384481b7e838 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3115,12 +3115,14 @@ name = "polars-compute" version = "0.51.0" dependencies = [ "atoi_simd", + "bigdecimal", "bytemuck", "chrono", "either", "fast-float2", "hashbrown 0.15.4", "itoa", + "num-bigint", "num-traits", "polars-arrow", "polars-error", @@ -3285,6 +3287,7 @@ dependencies = [ "object_store", "percent-encoding", "polars-arrow", + "polars-compute", "polars-core", "polars-error", "polars-json", diff --git a/Cargo.toml b/Cargo.toml index 267bcfa7a59d..c5a269f91e38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ atoi_simd = "0.16" atomic-waker = "1" avro-schema = { version = "0.3" } base64 = "0.22.0" +bigdecimal = "0.4.8" bincode = { version = "2.0", features = ["serde", "std"] } bitflags = "2" boxcar = "0.2.12" @@ -59,6 +60,7 @@ libm = "0.2" memchr = "2.6" memmap = { package = "memmap2", version = "0.9" } ndarray = { version = "0.16", default-features = false } +num-bigint = "0.4.6" num-traits = "0.2" numpy = "0.25" object_store = { version = "0.12", default-features = false, features = ["fs"] } diff --git a/crates/polars-arrow/src/compute/decimal.rs b/crates/polars-arrow/src/compute/decimal.rs index c8417f780085..16926609c517 100644 --- a/crates/polars-arrow/src/compute/decimal.rs +++ b/crates/polars-arrow/src/compute/decimal.rs @@ -1,4 +1,3 @@ -use num_traits::Euclid; use polars_utils::relaxed_cell::RelaxedCell; static TRIM_DECIMAL_ZEROS: RelaxedCell = RelaxedCell::new_bool(false); @@ -6,308 +5,7 @@ static TRIM_DECIMAL_ZEROS: RelaxedCell = RelaxedCell::new_bool(false); pub fn get_trim_decimal_zeros() -> bool { TRIM_DECIMAL_ZEROS.load() } + pub fn set_trim_decimal_zeros(trim: Option) { TRIM_DECIMAL_ZEROS.store(trim.unwrap_or(false)) } - -/// Assuming bytes are a well-formed decimal number (with or without a separator), -/// infer the scale of the number. If no separator is present, the scale is 0. -pub fn infer_scale(bytes: &[u8]) -> u8 { - let Some(separator) = bytes.iter().position(|b| *b == b'.') else { - return 0; - }; - (bytes.len() - (1 + separator)) as u8 -} - -/// Deserialize bytes to a single i128 representing a decimal, at a specified -/// precision (optional) and scale (required). The number is checked to ensure -/// it fits within the specified precision and scale. Consistent with float -/// parsing, no decimal separator is required (eg "500", "500.", and "500.0" are -/// all accepted); this allows mixed integer/decimal sequences to be parsed as -/// decimals. All trailing zeros are assumed to be significant, whether or not -/// a separator is present: 1200 requires precision >= 4, while 1200.200 -/// requires precision >= 7 and scale >= 3. Returns None if the number is not -/// well-formed, or does not fit. Only b'.' is allowed as a decimal separator -/// (issue #6698). -#[inline] -pub fn deserialize_decimal(bytes: &[u8], precision: Option, scale: u8) -> Option { - let precision_digits = precision.unwrap_or(38).min(38) as usize; - if scale as usize > precision_digits { - return None; - } - - let separator = bytes.iter().position(|b| *b == b'.').unwrap_or(bytes.len()); - let (mut int, mut frac) = bytes.split_at(separator); - if frac.len() <= 1 || scale == 0 { - // Only integer fast path. - let n: i128 = atoi_simd::parse(int).ok()?; - let ret = n.checked_mul(POW10[scale as usize] as i128)?; - if precision.is_some() && ret >= POW10[precision_digits] as i128 { - return None; - } - return Some(ret); - } - - // Skip period. - frac = &frac[1..]; - - // Skip sign. - let negative = match bytes.first() { - Some(s @ (b'+' | b'-')) => { - int = &int[1..]; - *s == b'-' - }, - _ => false, - }; - - // Truncate trailing digits that extend beyond the scale. - let frac_scale = if scale as usize <= frac.len() { - frac = &frac[..scale as usize]; - 0 - } else { - scale as usize - frac.len() - }; - - // Parse and combine parts. - let pint: u128 = if int.is_empty() { - 0 - } else { - atoi_simd::parse_pos(int).ok()? - }; - let pfrac: u128 = atoi_simd::parse_pos(frac).ok()?; - - let ret = pint - .checked_mul(POW10[scale as usize])? - .checked_add(pfrac.checked_mul(POW10[frac_scale])?)?; - if precision.is_some() && ret >= POW10[precision_digits] { - return None; - } - if negative { - if ret > (1 << 127) { - None - } else { - Some(ret.wrapping_neg() as i128) - } - } else { - ret.try_into().ok() - } -} - -const MAX_DECIMAL_LEN: usize = 48; - -#[derive(Clone, Copy)] -pub struct DecimalFmtBuffer { - data: [u8; MAX_DECIMAL_LEN], - len: usize, -} - -impl Default for DecimalFmtBuffer { - fn default() -> Self { - Self::new() - } -} - -impl DecimalFmtBuffer { - #[inline] - pub const fn new() -> Self { - Self { - data: [0; MAX_DECIMAL_LEN], - len: 0, - } - } - - pub fn format(&mut self, x: i128, scale: usize, trim_zeros: bool) -> &str { - let factor = POW10[scale]; - let mut itoa_buf = itoa::Buffer::new(); - - self.len = 0; - let (div, rem) = x.unsigned_abs().div_rem_euclid(&factor); - if x < 0 { - self.data[0] = b'-'; - self.len += 1; - } - - let div_fmt = itoa_buf.format(div); - self.data[self.len..self.len + div_fmt.len()].copy_from_slice(div_fmt.as_bytes()); - self.len += div_fmt.len(); - - if scale == 0 { - return unsafe { std::str::from_utf8_unchecked(&self.data[..self.len]) }; - } - - self.data[self.len] = b'.'; - self.len += 1; - - let rem_fmt = itoa_buf.format(rem + factor); // + factor adds leading 1 where period would be. - self.data[self.len..self.len + rem_fmt.len() - 1].copy_from_slice(&rem_fmt.as_bytes()[1..]); - self.len += rem_fmt.len() - 1; - - if trim_zeros { - while self.data.get(self.len - 1) == Some(&b'0') { - self.len -= 1; - } - if self.data.get(self.len - 1) == Some(&b'.') { - self.len -= 1; - } - } - - unsafe { std::str::from_utf8_unchecked(&self.data[..self.len]) } - } -} - -const POW10: [u128; 39] = [ - 1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000, - 10000000000, - 100000000000, - 1000000000000, - 10000000000000, - 100000000000000, - 1000000000000000, - 10000000000000000, - 100000000000000000, - 1000000000000000000, - 10000000000000000000, - 100000000000000000000, - 1000000000000000000000, - 10000000000000000000000, - 100000000000000000000000, - 1000000000000000000000000, - 10000000000000000000000000, - 100000000000000000000000000, - 1000000000000000000000000000, - 10000000000000000000000000000, - 100000000000000000000000000000, - 1000000000000000000000000000000, - 10000000000000000000000000000000, - 100000000000000000000000000000000, - 1000000000000000000000000000000000, - 10000000000000000000000000000000000, - 100000000000000000000000000000000000, - 1000000000000000000000000000000000000, - 10000000000000000000000000000000000000, - 100000000000000000000000000000000000000, -]; - -#[cfg(test)] -mod test { - use super::*; - #[test] - fn test_decimal() { - let precision = Some(8); - let scale = 2; - - let val = "12.09"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(1209) - ); - - let val = "1200.90"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(120090) - ); - - let val = "143.9"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(14390) - ); - - let val = "+000000.5"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(50) - ); - - let val = "-0.5"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(-50) - ); - - let val = "-1.5"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(-150) - ); - - let scale = 20; - let val = "0.01"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - assert_eq!( - deserialize_decimal(val.as_bytes(), None, scale), - Some(1000000000000000000) - ); - - let scale = 5; - let val = "12ABC.34"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = "1ABC2.34"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = "12.3ABC4"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = "12.3.ABC4"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = "12.-3"; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = ""; - assert_eq!(deserialize_decimal(val.as_bytes(), precision, scale), None); - - let val = "5."; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(500000i128) - ); - - let val = "5"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(500000i128) - ); - - let val = ".5"; - assert_eq!( - deserialize_decimal(val.as_bytes(), precision, scale), - Some(50000i128) - ); - - // Precision and scale fitting: - let val = b"1200"; - assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); - assert_eq!(deserialize_decimal(val, Some(4), 0), Some(1200)); - assert_eq!(deserialize_decimal(val, Some(3), 0), None); - assert_eq!(deserialize_decimal(val, Some(4), 1), None); - - let val = b"1200.010"; - assert_eq!(deserialize_decimal(val, None, 0), Some(1200)); // truncate scale - assert_eq!(deserialize_decimal(val, None, 3), Some(1200010)); // exact scale - assert_eq!(deserialize_decimal(val, None, 6), Some(1200010000)); // excess scale - assert_eq!(deserialize_decimal(val, Some(7), 0), Some(1200)); // sufficient precision and truncate scale - assert_eq!(deserialize_decimal(val, Some(7), 3), Some(1200010)); // exact precision and scale - assert_eq!(deserialize_decimal(val, Some(10), 6), Some(1200010000)); // exact precision, excess scale - assert_eq!(deserialize_decimal(val, Some(5), 6), None); // insufficient precision, excess scale - assert_eq!(deserialize_decimal(val, Some(5), 3), None); // insufficient precision, exact scale - assert_eq!(deserialize_decimal(val, Some(12), 5), Some(120001000)); // excess precision, excess scale - assert_eq!( - deserialize_decimal(val, None, 35), - Some(120001000000000000000000000000000000000) - ); - assert_eq!(deserialize_decimal(val, None, 36), None); - assert_eq!(deserialize_decimal(val, Some(38), 35), None); // scale causes insufficient precision - } -} diff --git a/crates/polars-arrow/src/types/native.rs b/crates/polars-arrow/src/types/native.rs index 253572088b47..07b32bc26e3d 100644 --- a/crates/polars-arrow/src/types/native.rs +++ b/crates/polars-arrow/src/types/native.rs @@ -710,6 +710,14 @@ impl i256 { } } +impl TryFrom for i128 { + type Error = core::num::TryFromIntError; + + fn try_from(value: i256) -> Result { + value.0.try_into() + } +} + impl IsNull for i256 { const HAS_NULLS: bool = false; type Inner = i256; diff --git a/crates/polars-compute/Cargo.toml b/crates/polars-compute/Cargo.toml index ea321c86852e..08cd1c18cdf6 100644 --- a/crates/polars-compute/Cargo.toml +++ b/crates/polars-compute/Cargo.toml @@ -29,6 +29,8 @@ strength_reduce = { workspace = true } strum_macros = { workspace = true } [dev-dependencies] +bigdecimal = { workspace = true } +num-bigint = { workspace = true } rand = { workspace = true } arrow = { workspace = true, features = ["proptest"] } diff --git a/crates/polars-compute/src/cast/binview_to.rs b/crates/polars-compute/src/cast/binview_to.rs index f6f6e99f6b5d..50173a952f52 100644 --- a/crates/polars-compute/src/cast/binview_to.rs +++ b/crates/polars-compute/src/cast/binview_to.rs @@ -2,8 +2,6 @@ use std::ptr::copy_nonoverlapping; use arrow::array::*; use arrow::bitmap::MutableBitmap; -#[cfg(feature = "dtype-decimal")] -use arrow::compute::decimal::deserialize_decimal; use arrow::datatypes::{ArrowDataType, Field, TimeUnit}; use arrow::offset::Offset; use arrow::types::NativeType; @@ -15,6 +13,8 @@ use polars_error::{PolarsResult, polars_err}; use super::CastOptionsImpl; use super::binary_to::Parse; use super::temporal::EPOCH_DAYS_FROM_CE; +#[cfg(feature = "dtype-decimal")] +use crate::decimal::str_to_dec128; pub(super) const RFC3339: &str = "%Y-%m-%dT%H:%M:%S%.f%:z"; @@ -104,19 +104,15 @@ where #[cfg(feature = "dtype-decimal")] pub fn binview_to_decimal( array: &BinaryViewArray, - precision: Option, + precision: usize, scale: usize, ) -> PrimitiveArray { - let precision = precision.map(|p| p as u8); PrimitiveArray::::from_trusted_len_iter( array .iter() - .map(|val| val.and_then(|val| deserialize_decimal(val, precision, scale as u8))), + .map(|val| val.and_then(|val| str_to_dec128(val, precision, scale))), ) - .to(ArrowDataType::Decimal( - precision.unwrap_or(38).into(), - scale, - )) + .to(ArrowDataType::Decimal(precision, scale)) } pub(super) fn utf8view_to_naive_timestamp_dyn( diff --git a/crates/polars-compute/src/cast/decimal_to.rs b/crates/polars-compute/src/cast/decimal_to.rs index e19481ff6154..c80b04e390c0 100644 --- a/crates/polars-compute/src/cast/decimal_to.rs +++ b/crates/polars-compute/src/cast/decimal_to.rs @@ -4,32 +4,9 @@ use arrow::types::NativeType; use num_traits::{AsPrimitive, Float, NumCast}; use polars_error::PolarsResult; -#[inline] -fn decimal_to_decimal_impl Option>( - from: &PrimitiveArray, - op: F, - to_precision: usize, - to_scale: usize, -) -> PrimitiveArray { - let upper_bound_for_precision = 10_i128.saturating_pow(to_precision as u32); - let lower_bound_for_precision = upper_bound_for_precision.saturating_neg(); - - let values = from.iter().map(|x| { - x.and_then(|x| { - op(*x).and_then(|x| { - if x >= upper_bound_for_precision || x <= lower_bound_for_precision { - None - } else { - Some(x) - } - }) - }) - }); - PrimitiveArray::::from_trusted_len_iter(values) - .to(ArrowDataType::Decimal(to_precision, to_scale)) -} +use crate::decimal::{dec128_fits, dec128_rescale, dec128_to_f64, dec128_to_i128}; -/// Returns a [`PrimitiveArray`] with the cast values. Values are `None` on overflow +/// Returns a [`PrimitiveArray`] with the cast values. Values become null on overflow. pub fn decimal_to_decimal( from: &PrimitiveArray, to_precision: usize, @@ -42,32 +19,26 @@ pub fn decimal_to_decimal( panic!("internal error: i128 is always a decimal") }; - if to_scale == from_scale && to_precision >= from_precision { - // fast path - return from - .clone() - .to(ArrowDataType::Decimal(to_precision, to_scale)); - } - // todo: other fast paths include increasing scale and precision by so that - // a number will never overflow (validity is preserved) - - if from_scale > to_scale { - let factor = 10_i128.pow((from_scale - to_scale) as u32); - decimal_to_decimal_impl( - from, - |x: i128| x.checked_div(factor), - to_precision, - to_scale, - ) - } else { - let factor = 10_i128.pow((to_scale - from_scale) as u32); - decimal_to_decimal_impl( - from, - |x: i128| x.checked_mul(factor), - to_precision, - to_scale, - ) + if to_scale == from_scale { + if to_precision >= from_precision { + // Increasing precision is always allowed. + return from + .clone() + .to(ArrowDataType::Decimal(to_precision, to_scale)); + } else { + let it = from + .iter() + .map(|opt_x| opt_x.copied().filter(|x| dec128_fits(*x, to_precision))); + return PrimitiveArray::::from_trusted_len_iter(it) + .to(ArrowDataType::Decimal(to_precision, to_scale)); + } } + + let it = from + .iter() + .map(|opt_x| dec128_rescale(*(opt_x?), from_scale, to_precision, to_scale)); + PrimitiveArray::::from_trusted_len_iter(it) + .to(ArrowDataType::Decimal(to_precision, to_scale)) } pub(super) fn decimal_to_decimal_dyn( @@ -88,17 +59,13 @@ where let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { - panic!("internal error: i128 is always a decimal") + unreachable!() }; - let div = 10_f64.powi(from_scale as i32); - let values = from - .values() + let it = from .iter() - .map(|x| (*x as f64 / div).as_()) - .collect(); - - PrimitiveArray::::new(T::PRIMITIVE.into(), values, from.validity().cloned()) + .map(|opt_x| Some(dec128_to_f64(*(opt_x?), from_scale).as_())); + PrimitiveArray::::from_trusted_len_iter(it) } pub(super) fn decimal_to_float_dyn(from: &dyn Array) -> PolarsResult> @@ -118,13 +85,13 @@ where let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { - panic!("internal error: i128 is always a decimal") + unreachable!() }; - let factor = 10_i128.pow(from_scale as u32); - let values = from.iter().map(|x| x.and_then(|x| T::from(*x / factor))); - - PrimitiveArray::from_trusted_len_iter(values) + let it = from + .iter() + .map(|opt_x| T::from(dec128_to_i128(*(opt_x?), from_scale))); + PrimitiveArray::::from_trusted_len_iter(it) } pub(super) fn decimal_to_integer_dyn(from: &dyn Array) -> PolarsResult> @@ -138,18 +105,18 @@ where /// Returns a [`Utf8Array`] where every element is the utf8 representation of the decimal. #[cfg(feature = "dtype-decimal")] pub(super) fn decimal_to_utf8view(from: &PrimitiveArray) -> Utf8ViewArray { - use arrow::compute::decimal::DecimalFmtBuffer; + use crate::decimal::DecimalFmtBuffer; let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.dtype().to_logical_type() { (*p, *s) } else { - panic!("internal error: i128 is always a decimal") + unreachable!() }; let mut mutable = MutableBinaryViewArray::with_capacity(from.len()); let mut fmt_buf = DecimalFmtBuffer::new(); for &x in from.values().iter() { - mutable.push_value_ignore_validity(fmt_buf.format(x, from_scale, false)) + mutable.push_value_ignore_validity(fmt_buf.format_dec128(x, from_scale, false)) } mutable.freeze().with_validity(from.validity().cloned()) diff --git a/crates/polars-compute/src/cast/mod.rs b/crates/polars-compute/src/cast/mod.rs index 720aff9160ad..13140de8cce7 100644 --- a/crates/polars-compute/src/cast/mod.rs +++ b/crates/polars-compute/src/cast/mod.rs @@ -552,7 +552,7 @@ pub fn cast( Date32 => utf8view_to_date32_dyn(array), #[cfg(feature = "dtype-decimal")] Decimal(precision, scale) => { - Ok(binview_to_decimal(&arr.to_binview(), Some(*precision), *scale).to_boxed()) + Ok(binview_to_decimal(&arr.to_binview(), *precision, *scale).to_boxed()) }, _ => polars_bail!(InvalidOperation: "casting from {from_type:?} to {to_type:?} not supported", diff --git a/crates/polars-compute/src/decimal.rs b/crates/polars-compute/src/decimal.rs new file mode 100644 index 000000000000..19aca1a72de7 --- /dev/null +++ b/crates/polars-compute/src/decimal.rs @@ -0,0 +1,971 @@ +/* + Decimal implementation. + + Throughout this module it's assumed that p and s fit in the maximum precision, + giving panics otherwise. + + Constants for division have been generated with the following Python code: + + # Finds integer (c, s) such that floor(n / d) == (n * c) >> s for all n in [0, N]. + # From Integer division by constants: optimal bounds by Lemire et. al, Theorem 1. + # This constant allows for fast division, as well as a divisibility check, + # namely we find that n % d == 0 for all n in [0, N] iff (n * c) % 2**s < c. + def inv_mult_shift(d, N): + s = 0 + m = 1 + c = 1 + K = N - (N + 1) % d + while True: + if c * d * K < (1 + K)*m: + break + s += 1 + m *= 2 + c = (m + d - 1) // d # ceil(m / d) + return (c, s) + + Also from that paper is the algorithm we use for round-to-nearest division. + We compute z = n + floor(d/2), and then return floor(z / d) unless z % d == 0 + and the result is odd in which case we subtract 1. +*/ + +use std::cmp::Ordering; + +use polars_error::{PolarsResult, polars_ensure}; + +/// The maximum precision of a Decimal128. +pub const DEC128_MAX_PREC: usize = 38; + +pub fn dec128_verify_prec_scale(p: usize, s: usize) -> PolarsResult<()> { + polars_ensure!((1..=DEC128_MAX_PREC).contains(&p), InvalidOperation: "precision must be between 1 and 38"); + polars_ensure!(s <= p, InvalidOperation: "scale must be less than or equal to precision"); + Ok(()) +} + +pub const POW10_I128: &[i128; 39] = &{ + let mut out = [0; 39]; + let mut i = 0; + while i < 39 { + out[i] = 10_i128.pow(i as u32); + i += 1; + } + out +}; + +pub const POW10_F64: &[f64; 39] = &{ + let mut out = [0.0; 39]; + let mut i = 0; + while i < 39 { + out[i] = POW10_I128[i] as f64; + i += 1; + } + out +}; + +// for e in range(39): +// c, s = inv_mult_shift(10**e, 2**127-1) +#[rustfmt::skip] +const POW10_127_INV_MUL: &[u128; 39] = &[ + 0x00000000000000000000000000000001, 0x66666666666666666666666666666667, + 0x28f5c28f5c28f5c28f5c28f5c28f5c29, 0x4189374bc6a7ef9db22d0e5604189375, + 0x68db8bac710cb295e9e1b089a0275255, 0x29f16b11c6d1e108c3f3e0370cdc8755, + 0x08637bd05af6c69b5a63f9a49c2c1b11, 0xd6bf94d5e57a42bc3d32907604691b4d, + 0x55e63b88c230e77e7ee106959b5d3e1f, 0x89705f4136b4a59731680a88f8953031, + 0x36f9bfb3af7b756fad5cd10396a21347, 0x57f5ff85e592557f7bc7b4d28a9ceba5, + 0x232f33025bd42232fe4fe1edd10b9175, 0x709709a125da07099432d2f9035837dd, + 0xb424dc35095cd80f538484c19ef38c95, 0x901d7cf73ab0acd90f9d37014bf60a11, + 0x39a5652fb1137856d30baf9a1e626a6d, 0x2e1dea8c8da92d12426fbfae7eb521f1, + 0x09392ee8e921d5d073aff322e62439fd, 0x760f253edb4ab0d29598f4f1e8361973, + 0xbce5086492111aea88f4bb1ca6bcf585, 0x25c768141d369efbb4fdbf05baf29781, + 0xf1c90080baf72cb15324c68b12dd6339, 0x305b66802564a289dd6dc14f03c5e0a5, + 0x9abe14cd44753b52c4926a9672793543, 0x7bcb43d769f762a89d41eedec1fa9103, + 0x63090312bb2c4eed4a9b257f019540cf, 0x4f3a68dbc8f03f243baf513267aa9a3f, + 0xfd87b5f28300ca0d8bca9d6e188853fd, 0xcad2f7f5359a3b3e096ee45813a04331, + 0x51212ffbaf0a7e18d092c1bcd4a68147, 0x1039d66589687f9e901d59f290ee19db, + 0xcfb11ead453994ba67de18eda5814af3, 0xa6274bbdd0fadd61ecb1ad8aeacdd58f, + 0x84ec3c97da624ab4bd5af13bef0b113f, 0xd4ad2dbfc3d07787955e4ec64b44e865, + 0x5512124cb4b9c9696ef285e8eae85cf5, 0x881cea14545c75757e50d64177da2e55, + 0x6ce3ee76a9e3912acb73de9ac6482511, +]; + +const POW10_127_SHIFT: &[u8; 39] = &[ + 0, 2, 4, 8, 12, 14, 15, 23, 25, 29, 31, 35, 37, 42, 46, 49, 51, 54, 55, 62, 66, 67, 73, 74, 79, + 82, 85, 88, 93, 96, 98, 99, 106, 109, 112, 116, 118, 122, 125, +]; + +// for e in range(39): +// c, s = inv_mult_shift(10**e, 2**255-1) +#[rustfmt::skip] +const POW10_255_INV_MUL: &[U256; 39] = &[ + U256([0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000]), + U256([0x6666666666666667, 0x6666666666666666, 0x6666666666666666, 0x6666666666666666]), + U256([0xae147ae147ae147b, 0x7ae147ae147ae147, 0x47ae147ae147ae14, 0x147ae147ae147ae1]), + U256([0x5604189374bc6a7f, 0x4bc6a7ef9db22d0e, 0xdb22d0e560418937, 0x04189374bc6a7ef9]), + U256([0x19ce075f6fd21ff3, 0x305532617c1bda51, 0xf4f0d844d013a92a, 0x346dc5d63886594a]), + U256([0x85c67dfe32a0663d, 0xcddd6e04c0592103, 0x0fcf80dc33721d53, 0xa7c5ac471b478423]), + U256([0x37d1fe64f54d1e97, 0xd7e45803cd141a69, 0xa63f9a49c2c1b10f, 0x8637bd05af6c69b5]), + U256([0xc6419850c43db213, 0x4650466970dce1ed, 0x1e99483b02348da6, 0x6b5fca6af2bd215e]), + U256([0x7068f3b46d2f8351, 0x3d4d3d758161697c, 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc]), + U256([0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030, 0x89705f4136b4a597]), + U256([0x2e36108ba80f3443, 0xcbefc1bf33a44ab7, 0xad5cd10396a21346, 0x36f9bfb3af7b756f]), + U256([0x49f01a790ce5206b, 0x797f9c651f6d4458, 0x7bc7b4d28a9ceba4, 0x57f5ff85e592557f]), + U256([0x4319c3f4e16e9a45, 0xf598fa3b657ba08d, 0xf93f87b7442e45d3, 0x8cbccc096f5088cb]), + U256([0x409ec0ca937c8541, 0x311e9872477f201c, 0x650cb4be40d60df7, 0x1c25c268497681c2]), + U256([0x01fc02883e5b4403, 0x36c84e3a7e6399f4, 0xa9c24260cf79c64a, 0x5a126e1a84ae6c07]), + U256([0x3660040d3092066b, 0x57a6e390ca38f653, 0x0f9d37014bf60a10, 0x901d7cf73ab0acd9]), + U256([0x48f334d2136d9c2b, 0xefdc5b06b749fc21, 0xd30baf9a1e626a6c, 0x39a5652fb1137856]), + U256([0x4fd70f6d0af85a23, 0xff8df0157db98d37, 0x09befeb9fad487c2, 0xb877aa3236a4b449]), + U256([0x8656062b9dfcf0db, 0x996bf9a2324a387c, 0x9d7f99173121cfe7, 0x49c97747490eae83]), + U256([0xe11346f1f98fcf89, 0x1e2652070753e7f4, 0x2b31e9e3d06c32e5, 0xec1e4a7db69561a5]), + U256([0x26d482c7309fec9d, 0x0c0f5402cfbb2995, 0x447a5d8e535e7ac2, 0x5e72843249088d75]), + U256([0xa48737a51a997a95, 0x467eecd14c5ea8ee, 0xd3f6fc16ebca5e03, 0x971da05074da7bee]), + U256([0x1d38f950e2146211, 0x38658a4109e553f2, 0xa9926345896eb19c, 0x78e480405d7b9658]), + U256([0x05d831dcfa04139d, 0x71ade873686110ca, 0xeeb6e0a781e2f052, 0x182db34012b25144]), + U256([0xf23472530ce6e3ed, 0xd78c3615cf3a050c, 0xc4926a9672793542, 0x9abe14cd44753b52]), + U256([0xe9ed83b814a49fe1, 0x8c1389bc7ec33b47, 0x3a83ddbd83f52204, 0xf79687aed3eec551]), + U256([0x87f1362cdd507fe7, 0x3cdc6e306568fc39, 0x95364afe032a819d, 0xc612062576589dda]), + U256([0xcffa15ab8bb9ccc3, 0xe524f8e0289064e3, 0x3baf513267aa9a3e, 0x4f3a68dbc8f03f24]), + U256([0xe65cef78df8fae05, 0x3b6e5b0040e707d2, 0xc5e54eb70c4429fe, 0x7ec3daf941806506]), + U256([0x28f1f9638c9fdf35, 0x17c5be0019f60321, 0x825bb91604e810cc, 0x32b4bdfd4d668ecf]), + U256([0x3b6398471c1ff971, 0xd18df2ccd1fe00a0, 0x1a1258379a94d028, 0x0a2425ff75e14fc3]), + U256([0x7c1701c71a663c6d, 0xa38c78520cc00401, 0x407567ca43b8676b, 0x40e7599625a1fe7a]), + U256([0x59e338e387ad8e29, 0x0b5b1aa028ccd99e, 0x67de18eda5814af2, 0xcfb11ead453994ba]), + U256([0x11fa3e93e7ef82d5, 0x9bdf05533b5c2b86, 0x7b2c6b62bab37563, 0x2989d2ef743eb758]), + U256([0xe990641fd97f37bb, 0x5fcb3bb85ef9df3c, 0x5ead789df785889f, 0x42761e4bed31255a]), + U256([0x90a0280cbd66164b, 0x8cb7b17cf2ca594b, 0xf2abc9d8c9689d0c, 0x1a95a5b7f87a0ef0]), + U256([0xb4337347957023ab, 0x7abf82618476f545, 0xb77942f475742e7a, 0x2a8909265a5ce4b4]), + U256([0x40a4a418449a0bbd, 0xbbfe6e04db164412, 0x7e50d64177da2e54, 0x881cea14545c7575]), + U256([0x735420d1a7520259, 0x259949342bd140d0, 0xb2dcf7a6b1920944, 0x1b38fb9daa78e44a]), +]; + +const POW10_255_SHIFT: &[u8; 39] = &[ + 0, 2, 3, 4, 11, 16, 19, 22, 26, 29, 31, 35, 39, 40, 45, 49, 51, 56, 58, 63, 65, 69, 72, 73, 79, + 83, 86, 88, 92, 94, 95, 101, 106, 107, 111, 113, 117, 122, 123, +]; + +// Limbs in little-endian order (limb 0 is least significant). +#[derive(Copy, Clone, PartialEq, Eq)] +struct U256([u64; 4]); + +impl U256 { + #[inline(always)] + fn from_lo_hi(lo: u128, hi: u128) -> Self { + Self([lo as u64, (lo >> 64) as u64, hi as u64, (hi >> 64) as u64]) + } +} + +impl PartialOrd for U256 { + #[inline(always)] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for U256 { + #[inline(always)] + fn cmp(&self, other: &Self) -> Ordering { + self.0[3] + .cmp(&other.0[3]) + .then(self.0[2].cmp(&other.0[2])) + .then(self.0[1].cmp(&other.0[1])) + .then(self.0[0].cmp(&other.0[0])) + } +} + +#[inline] +fn u128_from_lo_hi(lo: u64, hi: u64) -> u128 { + (lo as u128) | ((hi as u128) << 64) +} + +#[inline(always)] +fn widening_mul_64(a: u64, b: u64) -> (u64, u64) { + let t = (a as u128) * (b as u128); + (t as u64, (t >> 64) as u64) +} + +#[inline(always)] +fn carrying_add_64(a: u64, b: u64, carry: bool) -> (u64, bool) { + let (t0, c1) = a.overflowing_add(b); + let (t1, c2) = t0.overflowing_add(carry as u64); + (t1, c1 | c2) +} + +#[inline] +fn widening_mul_128(a: u128, b: u128) -> (u128, u128) { + let a_lo = a as u64; + let a_hi = (a >> 64) as u64; + let b_lo = b as u64; + let b_hi = (b >> 64) as u64; + let (x0, x1) = widening_mul_64(a_lo, b_lo); + let (y1, y2) = widening_mul_64(a_hi, b_lo); + let (z1, z2) = widening_mul_64(a_lo, b_hi); + let (w2, w3) = widening_mul_64(a_hi, b_hi); + + let mut out = [0; 4]; + let (c1, c2, c3, c4); + out[0] = x0; + (out[1], c1) = carrying_add_64(x1, y1, false); + (out[1], c2) = carrying_add_64(out[1], z1, false); + (out[2], c3) = carrying_add_64(y2, z2, c1); + (out[2], c4) = carrying_add_64(out[2], w2, c2); + out[3] = w3.wrapping_add(c3 as u64 + c4 as u64); + + ( + out[0] as u128 | ((out[1] as u128) << 64), + out[2] as u128 | ((out[3] as u128) << 64), + ) +} + +fn widening_mul_256(a: U256, b: U256) -> (U256, U256) { + let mut out = [0; 8]; + + // Algorithm M from TAOCP: Seminumerical algorithms, ch 4.3.1. + // We represent the carry as carry_word + c1 + c2, which fits in a u64. + for i in 0..4 { + let mut carry_word = 0; + let mut c1 = false; + let mut c2 = false; + for j in 0..4 { + let (mut lo, hi) = widening_mul_64(a.0[i], b.0[j]); + (lo, c1) = carrying_add_64(lo, out[i + j], c1); + (lo, c2) = carrying_add_64(lo, carry_word, c2); + out[i + j] = lo; + carry_word = hi; + } + out[i + 4] = carry_word + c1 as u64 + c2 as u64; + } + + let (lo, hi) = out.split_at(4); + (U256(lo.try_into().unwrap()), U256(hi.try_into().unwrap())) +} + +/// Returns x * 10^e, with e <= DEC128_MAX_PREC. +/// +/// Returns None if the multiplication overflows. +#[inline] +fn mul_128_pow10(x: i128, e: usize) -> Option { + x.checked_mul(POW10_I128[e]) +} + +/// Returns round(x / 10^e), with e <= DEC128_MAX_PREC, rounding to nearest even. +#[inline] +fn div_128_pow10(x: i128, e: usize) -> i128 { + if e == 0 { + return x; + } + + let n = x.unsigned_abs(); + let z = n + ((POW10_I128[e] as u128) / 2); // Can't overflow. + let c = POW10_127_INV_MUL[e]; + let s = POW10_127_SHIFT[e]; + let (lo, hi) = widening_mul_128(z, c); + let mut ret = (hi >> s) as i128; + if lo < c && ret % 2 == 1 && (hi << (128 - s)) == 0 { + ret -= 1; + } + if x < 0 { -ret } else { ret } +} + +/// Returns round(x / 10^e), with e <= DEC128_MAX_PREC, rounding to nearest even. +/// x is assumed to be < 2^255. Returns None if the result doesn't fit in a u128. +#[inline] +fn div_255_pow10(x: U256, e: usize) -> Option { + if e == 0 { + if x.0[2] == 0 && x.0[3] == 0 { + return Some(u128_from_lo_hi(x.0[0], x.0[1])); + } else { + return None; + } + } + + let half = (POW10_I128[e] as u128) / 2; + let mut carry; + let mut z = x; + (z.0[0], carry) = z.0[0].overflowing_add(half as u64); + (z.0[1], carry) = carrying_add_64(z.0[1], (half >> 64) as u64, carry); + (z.0[2], carry) = z.0[2].overflowing_add(carry as u64); + z.0[3] += carry as u64; + let c = POW10_255_INV_MUL[e]; + let s = POW10_255_SHIFT[e]; + let (lo, hi) = widening_mul_256(z, c); + let shifted_out_is_zero; + let mut ret = if s < 64 { + if (hi.0[2] >> s) != 0 || hi.0[3] != 0 { + return None; + } + shifted_out_is_zero = (hi.0[0] << (64 - s)) == 0; + (u128_from_lo_hi(hi.0[0], hi.0[1]) >> s) | u128_from_lo_hi(0, hi.0[2] << (64 - s)) + } else { + debug_assert!(s < 128); + let s = s - 64; + if (hi.0[3] >> s) != 0 { + return None; + } + shifted_out_is_zero = hi.0[0] == 0 && (hi.0[1] << (64 - s)) == 0; + (u128_from_lo_hi(hi.0[1], hi.0[2]) >> s) | u128_from_lo_hi(0, hi.0[3] << (64 - s)) + }; + + if lo < c && ret % 2 == 1 && shifted_out_is_zero { + ret -= 1; + } + + Some(ret) +} + +/// Calculates n / d, returning quotient and remainder. +/// +/// # Safety +/// Assumes quotient fits in u64, and d != 0. +unsafe fn divrem_128_64(n: u128, d: u64) -> (u64, u64) { + let quo: u64; + let rem: u64; + + #[cfg(target_arch = "x86_64")] + unsafe { + let nlo = n as u64; + let nhi = (n >> 64) as u64; + std::arch::asm!( + "div {d}", + d = in(reg) d, + inlateout("rax") nlo => quo, + inlateout("rdx") nhi => rem, + options(pure, nomem, nostack) + ); + } + + #[cfg(not(target_arch = "x86_64"))] + unsafe { + // TODO: more optimized implementation. + if n < (1 << 64) { + quo = (n as u64).checked_div(d).unwrap_unchecked(); + rem = (n as u64).checked_rem(d).unwrap_unchecked(); + } else { + quo = n.checked_div(d as u128).unwrap_unchecked() as u64; + rem = n.checked_rem(d as u128).unwrap_unchecked() as u64; + } + } + + (quo, rem) +} + +/// Calculates the quotient and remainder of ((hi << 128) | lo) / d. +/// Returns None if the quotient overflows a 128-bit integer. +fn divrem_256_128(lo: u128, hi: u128, d: u128) -> Option<(u128, u128)> { + if d == 0 || hi >= d { + return None; + } + + if hi == 0 { + return Some(((lo / d), (lo % d))); + } + + if d < (1 << 64) { + // Short division (exercise 16, TAOCP, 4.3.1). + let d = d as u64; + let (q_hi, r_hi) = + unsafe { divrem_128_64(u128_from_lo_hi((lo >> 64) as u64, hi as u64), d) }; + let (q_lo, r_lo) = unsafe { divrem_128_64(u128_from_lo_hi(lo as u64, r_hi), d) }; + return Some((u128_from_lo_hi(q_lo, q_hi), u128_from_lo_hi(r_lo, 0))); + } + + // Long division (algorithm D, TAOCP, 4.3.1). + // Normalize d, n so that d has the top bit set. + let shift = ((d >> 64) as u64).leading_zeros(); + let d1 = ((d << shift) >> 64) as u64; + let d0 = (d as u64) << shift; + let mut n3 = (hi >> 64) as u64; + let mut n2 = hi as u64; + let mut n1 = (lo >> 64) as u64; + let mut n0 = lo as u64; + n3 = ((u128_from_lo_hi(n2, n3) << shift) >> 64) as u64; + n2 = ((u128_from_lo_hi(n1, n2) << shift) >> 64) as u64; + n1 = ((u128_from_lo_hi(n0, n1) << shift) >> 64) as u64; + n0 <<= shift; + + // We want to calculate + // (qhat, rhat) = divmod(n3n2, d1) + // and then do the test qhat * d0 > (rhat << 64) + n1, possibly twice, to + // adjust qhat downwards. But we have to be very careful around overflow, + // as both the division and intermediate steps can overflow. + let (mut qhat, mut rhat, mut qhd0_lo, mut qhd0_hi, mut borrow); + if n3 < d1 { + (qhat, rhat) = unsafe { divrem_128_64(u128_from_lo_hi(n2, n3), d1) }; + (qhd0_lo, qhd0_hi) = widening_mul_64(qhat, d0); + } else { + qhat = 0; // Represents 1 << 64, will be corrected below. + rhat = n2; + qhd0_lo = 0; + qhd0_hi = d0; + }; + + if qhd0_hi > rhat || qhd0_hi == rhat && qhd0_lo > n1 { + qhat = qhat.wrapping_sub(1); + let rhat_overflow; + (rhat, rhat_overflow) = rhat.overflowing_add(d1); + (qhd0_lo, borrow) = qhd0_lo.overflowing_sub(d0); + qhd0_hi -= borrow as u64; + if !rhat_overflow && (qhd0_hi > rhat || qhd0_hi == rhat && qhd0_lo > n1) { + qhat -= 1; + (qhd0_lo, borrow) = qhd0_lo.overflowing_sub(d0); + qhd0_hi -= borrow as u64; + } + } + + // Subtract qhat*d from n3n2n1, this zeroes out n3. We don't need to worry + // about our number going negative like in the original Algorithm D because + // we only have two limbs worth of divisor (making qhat exact). + let q_hi = qhat; + n2 = n2.wrapping_sub(qhat.wrapping_mul(d1)); + (n1, borrow) = n1.overflowing_sub(qhd0_lo); + n2 = n2.wrapping_sub(qhd0_hi + borrow as u64); + + // Repeat the whole process again with n2n1n0. + if n2 < d1 { + (qhat, rhat) = unsafe { divrem_128_64(u128_from_lo_hi(n1, n2), d1) }; + (qhd0_lo, qhd0_hi) = widening_mul_64(qhat, d0); + } else { + qhat = 0; // Represents 1 << 64, will be corrected below. + rhat = n1; + qhd0_lo = 0; + qhd0_hi = d0; + }; + + if qhd0_hi > rhat || qhd0_hi == rhat && qhd0_lo > n0 { + qhat = qhat.wrapping_sub(1); + let rhat_overflow; + (rhat, rhat_overflow) = rhat.overflowing_add(d1); + (qhd0_lo, borrow) = qhd0_lo.overflowing_sub(d0); + qhd0_hi -= borrow as u64; + if !rhat_overflow && (qhd0_hi > rhat || qhd0_hi == rhat && qhd0_lo > n0) { + qhat -= 1; + (qhd0_lo, borrow) = qhd0_lo.overflowing_sub(d0); + qhd0_hi -= borrow as u64; + } + } + + let q_lo = qhat; + n1 = n1.wrapping_sub(qhat.wrapping_mul(d1)); + (n0, borrow) = n0.overflowing_sub(qhd0_lo); + n1 = n1.wrapping_sub(qhd0_hi + borrow as u64); + + // n1n0 is now our remainder, once we account for the shift. + let r_lo = (u128_from_lo_hi(n0, n1) >> shift) as u64; + let r_hi = n1 >> shift; + + Some((u128_from_lo_hi(q_lo, q_hi), u128_from_lo_hi(r_lo, r_hi))) +} + +/// Returns whether the given Decimal128 fits in the given precision. +#[inline] +pub fn dec128_fits(x: i128, p: usize) -> bool { + x.abs() < POW10_I128[p] +} + +#[inline] +pub fn dec128_to_i128(x: i128, s: usize) -> i128 { + if s == 0 { x } else { div_128_pow10(x, s) } +} + +/// Converts an i128 to a Decimal128 with the given precision and scale, +/// returning None if the value doesn't fit. +#[inline] +pub fn i128_to_dec128(x: i128, p: usize, s: usize) -> Option { + let r = x.checked_mul(POW10_I128[s])?; + dec128_fits(r, p).then_some(r) +} + +/// Converts a Decimal128 with the given scale to a f64. +#[inline] +pub fn dec128_to_f64(x: i128, s: usize) -> f64 { + // TODO: correctly rounded result. This rounds multiple times. + x as f64 / POW10_F64[s] +} + +/// Converts a f64 to a Decimal128 with the given precision and scale, returning +/// None if the value doesn't fit. +#[inline] +pub fn f64_to_dec128(x: f64, p: usize, s: usize) -> Option { + // TODO: correctly rounded result. This rounds multiple times. + let n = (x * POW10_F64[s]) as i128; + dec128_fits(n, p).then_some(n) +} + +/// Converts between two Decimal128s, with a new precision and scale, returning +/// None if the value doesn't fit. +#[inline] +pub fn dec128_rescale(x: i128, old_s: usize, new_p: usize, new_s: usize) -> Option { + let r = if new_s < old_s { + div_128_pow10(x, old_s - new_s) + } else if new_s > old_s { + mul_128_pow10(x, new_s - old_s)? + } else { + return Some(x); + }; + + dec128_fits(r, new_p).then_some(r) +} + +/// Adds two Decimal128s, assuming they have the same scale. +#[inline] +pub fn dec128_add(l: i128, r: i128, p: usize) -> Option { + l.checked_add(r).filter(|x| dec128_fits(*x, p)) +} + +/// Subs two Decimal128s, assuming they have the same scale. +#[inline] +pub fn dec128_sub(l: i128, r: i128, p: usize) -> Option { + l.checked_sub(r).filter(|x| dec128_fits(*x, p)) +} + +/// Multiplies two Decimal128s, assuming they have the same scale s. +#[inline] +pub fn dec128_mul(l: i128, r: i128, p: usize, s: usize) -> Option { + // Computes round(l * r / 10^s), rounding to nearest even. + if let (Ok(ls), Ok(rs)) = (i64::try_from(l), i64::try_from(r)) { + // Fast path, both small. + let ret = div_128_pow10(ls as i128 * rs as i128, s); + dec128_fits(ret, p).then_some(ret) + } else { + let negative = (l < 0) ^ (r < 0); + let lu = l.unsigned_abs(); + let ru = r.unsigned_abs(); + + let (lo, hi) = widening_mul_128(lu, ru); + let retu = if hi == 0 && lo <= i128::MAX as u128 { + div_128_pow10(lo as i128, s) as u128 + } else { + div_255_pow10(U256::from_lo_hi(lo, hi), s)? + }; + if retu >= POW10_I128[p] as u128 { + return None; + } + if negative { + Some(-(retu as i128)) + } else { + Some(retu as i128) + } + } +} + +/// Divides two Decimal128s, assuming they have the same scale s. +#[inline] +pub fn dec128_div(l: i128, r: i128, p: usize, s: usize) -> Option { + if r == 0 { + return None; + } + + let negative = (l < 0) ^ (r < 0); + let lu = l.unsigned_abs(); + let ru = r.unsigned_abs(); + + // Computes round((l / r) * 10^s), rounding to nearest even. + let (mut retu, rem) = if s == 0 { + // Fast path, integer division. + let z = lu + ru / 2; // Can't overflow, 10^38 + 10^38 / 2 < 2^128. + (z / ru, z % ru) + } else { + let m = POW10_I128[s]; + + if let (Ok(ls), Ok(ms)) = (i64::try_from(l), u64::try_from(m)) { + // Fast path, intermediate product representable as u128. + let lsu = ls.unsigned_abs(); + let mut tmp = lsu as u128 * ms as u128; + tmp += ru / 2; // Checked that adding this can't overflow, assuming l < 2^63 and m, r < POW10_I128[DEC128_MAX_PREC]. + (tmp / ru, tmp % ru) + } else { + let (mut lo, mut hi) = widening_mul_128(lu, m as u128); + let carry; + (lo, carry) = lo.overflowing_add(ru / 2); + hi += carry as u128; + divrem_256_128(lo, hi, ru)? + } + }; + + // Round to nearest even. + if r % 2 == 0 && retu % 2 == 1 && rem == 0 { + retu -= 1; + } + + if retu >= POW10_I128[p] as u128 { + return None; + } + if negative { + Some(-(retu as i128)) + } else { + Some(retu as i128) + } +} + +/// Checks if two Decimal128s are equal in value. +#[inline] +pub fn dec128_eq(mut lv: i128, ls: usize, mut rv: i128, rs: usize) -> bool { + // Rescale to largest scale. If this overflows the numbers can't be equal anyway. + if ls < rs { + let Some(scaled_lv) = mul_128_pow10(lv, rs - ls) else { + return false; + }; + lv = scaled_lv; + } else if ls > rs { + let Some(scaled_rv) = mul_128_pow10(rv, ls - rs) else { + return false; + }; + rv = scaled_rv; + } + + lv == rv +} + +/// Checks how two Decimal128s compare. +#[inline] +pub fn dec128_cmp(mut lv: i128, ls: usize, mut rv: i128, rs: usize) -> Ordering { + // Rescale to largest scale. If this overflows we know the magnitude of the + // (attempted) rescaled number is larger and we can resolve the answer just + // using its sign. + if ls < rs { + let Some(scaled_lv) = mul_128_pow10(lv, rs - ls) else { + return if lv < 0 { + Ordering::Less + } else { + Ordering::Greater + }; + }; + lv = scaled_lv; + } else if ls > rs { + let Some(scaled_rv) = mul_128_pow10(rv, ls - rs) else { + return if 0 < rv { + Ordering::Less + } else { + Ordering::Greater + }; + }; + rv = scaled_rv; + } + + lv.cmp(&rv) +} + +/// Deserialize bytes to a single i128 representing a decimal, at a specified +/// precision and scale. The number is checked to ensure it fits within the +/// specified precision and scale. Consistent with float parsing, no decimal +/// separator is required (eg "500", "500.", and "500.0" are all accepted); +/// this allows mixed integer/decimal sequences to be parsed as decimals. +/// Returns None if the number is not well-formed, or does not fit. +/// Only b'.' is allowed as a decimal separator (issue #6698). +#[inline] +pub fn str_to_dec128(bytes: &[u8], p: usize, s: usize) -> Option { + // TODO + // assert!(dec128_verify_prec_scale(p, s).is_ok()); + + let separator = bytes.iter().position(|b| *b == b'.').unwrap_or(bytes.len()); + let (mut int, mut frac) = bytes.split_at(separator); + + // Trim trailing zeroes. + while let Some((b'0', rest)) = frac.split_last() { + frac = rest; + } + + if frac.len() <= 1 || s == 0 { + // Only integer fast path. + let n: i128 = atoi_simd::parse(int).ok()?; + return i128_to_dec128(n, p, s); + } + + // Skip period. + frac = &frac[1..]; + + // Skip sign. + let negative = match int.first() { + Some(s @ (b'+' | b'-')) => { + int = &int[1..]; + *s == b'-' + }, + _ => false, + }; + + // Round if digits extend beyond the scale. + let next_digit; + let frac_scale = if frac.len() > s { + if !frac[s..].iter().all(|b| b.is_ascii_digit()) { + return None; + } + next_digit = frac[s]; + frac = &frac[..s]; + 0 + } else { + next_digit = b'0'; + s - frac.len() + }; + + // Parse and combine parts. + let pint: i128 = if int.is_empty() { + 0 + } else { + atoi_simd::parse_pos(int).ok()? + }; + + let mut pfrac: i128 = if frac.is_empty() { + 0 + } else { + atoi_simd::parse_pos(frac).ok()? + }; + + // Round-to-even. + if next_digit > b'5' { + pfrac += 1; + } else if next_digit == b'5' { + pfrac += pfrac % 2; + } + + let ret = mul_128_pow10(pint, s)? + mul_128_pow10(pfrac, frac_scale)?; + if !dec128_fits(ret, p) { + return None; + } + if negative { Some(-ret) } else { Some(ret) } +} + +const DEC128_MAX_LEN: usize = 39 + 2; + +#[derive(Clone, Copy)] +pub struct DecimalFmtBuffer { + data: [u8; DEC128_MAX_LEN], + len: usize, +} + +impl Default for DecimalFmtBuffer { + fn default() -> Self { + Self::new() + } +} + +impl DecimalFmtBuffer { + #[inline] + pub const fn new() -> Self { + Self { + data: [0; DEC128_MAX_LEN], + len: 0, + } + } + + pub fn format_dec128(&mut self, x: i128, scale: usize, trim_zeros: bool) -> &str { + let mut itoa_buf = itoa::Buffer::new(); + let xs = itoa_buf.format(x.unsigned_abs()).as_bytes(); + + if x >= 0 { + self.len = 0; + } else { + self.data[0] = b'-'; + self.len = 1; + } + + if scale == 0 { + self.data[self.len..self.len + xs.len()].copy_from_slice(xs); + self.len += xs.len(); + } else { + let whole_len = xs.len().saturating_sub(scale); + let frac_len = xs.len() - whole_len; + if whole_len == 0 { + self.data[self.len] = b'0'; + self.data[self.len + 1] = b'.'; + self.data[self.len + 2..self.len + 2 + scale - frac_len].fill(b'0'); + self.len += 2 + scale - frac_len; + } else { + self.data[self.len..self.len + whole_len].copy_from_slice(&xs[..whole_len]); + self.data[self.len + whole_len] = b'.'; + self.len += whole_len + 1; + } + + self.data[self.len..self.len + frac_len].copy_from_slice(&xs[whole_len..]); + self.len += frac_len; + + if trim_zeros { + while self.data.get(self.len - 1) == Some(&b'0') { + self.len -= 1; + } + if self.data.get(self.len - 1) == Some(&b'.') { + self.len -= 1; + } + } + } + + unsafe { std::str::from_utf8_unchecked(&self.data[..self.len]) } + } +} + +#[cfg(test)] +mod test { + use std::sync::LazyLock; + + use bigdecimal::{BigDecimal, RoundingMode}; + use num_bigint::BigInt; + use num_traits::Signed; + use polars_utils::aliases::PlHashSet; + use rand::prelude::*; + + use super::*; + + fn bigdecimal_to_dec128(x: &BigDecimal, p: usize, s: usize) -> Option { + let n = x + .with_scale_round(s as i64, RoundingMode::HalfEven) + .into_bigint_and_scale() + .0; + if n.abs() < POW10_I128[p].into() { + Some(n.try_into().unwrap()) + } else { + None + } + } + + fn dec128_to_bigdecimal(x: i128, s: usize) -> BigDecimal { + BigDecimal::from_bigint(BigInt::from(x), s as i64) + } + + static INTERESTING_SCALE_PREC: [usize; 13] = [0, 1, 2, 3, 5, 8, 11, 16, 21, 27, 32, 37, 38]; + + static INTERESTING_VALUES: LazyLock> = LazyLock::new(|| { + let mut r = SmallRng::seed_from_u64(42); + let mut base = Vec::new(); + base.extend((0..128).map(|e| BigDecimal::from(1i128 << e))); + base.extend((0..39).map(|e| BigDecimal::from(POW10_I128[e]))); + base.extend((0..32).map(BigDecimal::from)); + base.extend((0..32).map(|_| BigDecimal::from(r.random::()))); + base.extend((0..32).map(|_| BigDecimal::from(r.random::()))); + base.extend((0..32).map(|_| BigDecimal::from(r.random::()))); + base.extend(base.clone().into_iter().map(|x| -x)); + + let mut out = PlHashSet::default(); + out.extend(base.iter().cloned()); + + let zero = BigDecimal::from(0u8); + for l in &base { + for r in &base { + out.insert(l + r); + out.insert(l * r); + if *r != zero { + out.insert(l / r); + } + } + } + + let mut out: Vec<_> = out.into_iter().collect(); + out.sort_by_key(|d| d.abs()); + out + }); + + #[test] + fn test_str_to_dec() { + assert_eq!(str_to_dec128(b"12.09", 8, 2), Some(1209)); + assert_eq!(str_to_dec128(b"1200.90", 8, 2), Some(120090)); + assert_eq!(str_to_dec128(b"143.9", 8, 2), Some(14390)); + + assert_eq!(str_to_dec128(b"+000000.5", 8, 2), Some(50)); + assert_eq!(str_to_dec128(b"-0.5", 8, 2), Some(-50)); + assert_eq!(str_to_dec128(b"-1.5", 8, 2), Some(-150)); + + assert_eq!(str_to_dec128(b"12ABC.34", 8, 5), None); + assert_eq!(str_to_dec128(b"1ABC2.34", 8, 5), None); + assert_eq!(str_to_dec128(b"12.3ABC4", 8, 5), None); + assert_eq!(str_to_dec128(b"12.3.ABC4", 8, 5), None); + + assert_eq!(str_to_dec128(b"12.-3", 8, 5), None); + assert_eq!(str_to_dec128(b"", 8, 5), None); + assert_eq!(str_to_dec128(b"5.", 8, 5), Some(500000i128)); + assert_eq!(str_to_dec128(b"5", 8, 5), Some(500000i128)); + assert_eq!(str_to_dec128(b".5", 8, 5), Some(50000i128)); + + // Precision and scale fitting. + let val = b"1200"; + assert_eq!(str_to_dec128(val, 4, 0), Some(1200)); + assert_eq!(str_to_dec128(val, 3, 0), None); + assert_eq!(str_to_dec128(val, 4, 1), None); + + let val = b"1200.010"; + assert_eq!(str_to_dec128(val, 7, 0), Some(1200)); + assert_eq!(str_to_dec128(val, 7, 3), Some(1200010)); + assert_eq!(str_to_dec128(val, 10, 6), Some(1200010000)); + assert_eq!(str_to_dec128(val, 5, 3), None); + assert_eq!(str_to_dec128(val, 12, 5), Some(120001000)); + assert_eq!(str_to_dec128(val, 38, 35), None); + + // Rounding. + assert_eq!(str_to_dec128(b"2.10", 5, 1), Some(21)); + assert_eq!(str_to_dec128(b"2.14", 5, 1), Some(21)); + assert_eq!(str_to_dec128(b"2.15", 5, 1), Some(22)); + assert_eq!(str_to_dec128(b"2.24", 5, 1), Some(22)); + assert_eq!(str_to_dec128(b"2.25", 5, 1), Some(22)); + assert_eq!(str_to_dec128(b"2.26", 5, 1), Some(23)); + } + + #[test] + fn str_dec_roundtrip() { + let mut buf = DecimalFmtBuffer::new(); + for &p in &INTERESTING_SCALE_PREC { + for &s in &INTERESTING_SCALE_PREC { + if s > p { + continue; + } + for x in INTERESTING_VALUES.iter() { + if let Some(d) = bigdecimal_to_dec128(x, p, s) { + let fmt = buf.format_dec128(d, s, false); + let d2 = str_to_dec128(fmt.as_bytes(), p, s); + assert_eq!(d, d2.unwrap()); + } else { + break; + } + } + } + } + } + + #[test] + fn test_mul() { + for &p in &INTERESTING_SCALE_PREC { + for &s in &INTERESTING_SCALE_PREC { + if s > p { + continue; + } + let values: Vec<_> = INTERESTING_VALUES + .iter() + .map_while(|x| bigdecimal_to_dec128(x, p, s)) + .map(|d| (d, dec128_to_bigdecimal(d, s))) + .collect(); + let mut r = SmallRng::seed_from_u64(42); + for _ in 0..1_000 { + // Kept small for CI, ran with 10 million during development. + let (x, xb) = values.choose(&mut r).unwrap(); + let (y, yb) = values.choose(&mut r).unwrap(); + let prod = dec128_mul(*x, *y, p, s); + let prodb = bigdecimal_to_dec128(&(xb * yb), p, s); + assert_eq!(prod, prodb); + } + } + } + } + + #[test] + fn test_div() { + for &p in &INTERESTING_SCALE_PREC { + for &s in &INTERESTING_SCALE_PREC { + if s > p { + continue; + } + let values: Vec<_> = INTERESTING_VALUES + .iter() + .map_while(|x| bigdecimal_to_dec128(x, p, s)) + .map(|d| (d, dec128_to_bigdecimal(d, s))) + .collect(); + let mut r = SmallRng::seed_from_u64(42); + for _ in 0..1_000 { + // Kept small for CI, ran with 10 million during development. + let (x, xb) = values.choose(&mut r).unwrap(); + let (y, yb) = values.choose(&mut r).unwrap(); + if *y == 0 { + assert!(dec128_div(*x, *y, p, s).is_none()); + continue; + } + let prod = dec128_mul(*x, *y, p, s); + let prodb = bigdecimal_to_dec128(&(xb * yb), p, s); + assert_eq!(prod, prodb); + } + } + } + } +} diff --git a/crates/polars-compute/src/lib.rs b/crates/polars-compute/src/lib.rs index a2c4d1b406e7..2c2025db6157 100644 --- a/crates/polars-compute/src/lib.rs +++ b/crates/polars-compute/src/lib.rs @@ -11,6 +11,8 @@ pub mod cardinality; #[cfg(feature = "cast")] pub mod cast; pub mod comparisons; +#[cfg(feature = "dtype-decimal")] +pub mod decimal; pub mod filter; #[cfg(feature = "cast")] pub mod find_validity_mismatch; diff --git a/crates/polars-core/src/chunked_array/arithmetic/decimal.rs b/crates/polars-core/src/chunked_array/arithmetic/decimal.rs index cdc693513289..debb847c02a0 100644 --- a/crates/polars-core/src/chunked_array/arithmetic/decimal.rs +++ b/crates/polars-core/src/chunked_array/arithmetic/decimal.rs @@ -1,13 +1,38 @@ +use polars_compute::decimal::{ + DEC128_MAX_PREC, dec128_add, dec128_div, dec128_mul, dec128_rescale, dec128_sub, +}; + use super::*; +use crate::prelude::arity::broadcast_try_binary_elementwise; impl Add for &DecimalChunked { type Output = PolarsResult; fn add(self, rhs: Self) -> Self::Output { - let scale = _get_decimal_scale_add_sub(self.scale(), rhs.scale()); - let lhs = self.to_scale(scale)?; - let rhs = rhs.to_scale(scale)?; - Ok((&lhs.phys + &rhs.phys).into_decimal_unchecked(None, scale)) + let left_s = self.scale(); + let right_s = rhs.scale(); + let scale = left_s.max(right_s); + let prec = DEC128_MAX_PREC; + let phys = broadcast_try_binary_elementwise( + self.physical(), + rhs.physical(), + |opt_l, opt_r| { + let (Some(l), Some(r)) = (opt_l, opt_r) else { + return PolarsResult::Ok(None); + }; + let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; + let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; + let ret = dec128_add(ls, rs, prec).ok_or_else( + || polars_err!(ComputeError: "overflow in decimal addition for {ls} + {rs}"), + )?; + Ok(Some(ret)) + }, + ); + Ok(phys?.into_decimal_unchecked(prec, scale)) } } @@ -15,10 +40,30 @@ impl Sub for &DecimalChunked { type Output = PolarsResult; fn sub(self, rhs: Self) -> Self::Output { - let scale = _get_decimal_scale_add_sub(self.scale(), rhs.scale()); - let lhs = self.to_scale(scale)?; - let rhs = rhs.to_scale(scale)?; - Ok((&lhs.phys - &rhs.phys).into_decimal_unchecked(None, scale)) + let left_s = self.scale(); + let right_s = rhs.scale(); + let scale = left_s.max(right_s); + let prec = DEC128_MAX_PREC; + let phys = broadcast_try_binary_elementwise( + self.physical(), + rhs.physical(), + |opt_l, opt_r| { + let (Some(l), Some(r)) = (opt_l, opt_r) else { + return PolarsResult::Ok(None); + }; + let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; + let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; + let ret = dec128_sub(ls, rs, prec).ok_or_else( + || polars_err!(ComputeError: "overflow in decimal subtraction for {ls} + {rs}"), + )?; + Ok(Some(ret)) + }, + ); + Ok(phys?.into_decimal_unchecked(prec, scale)) } } @@ -26,8 +71,30 @@ impl Mul for &DecimalChunked { type Output = PolarsResult; fn mul(self, rhs: Self) -> Self::Output { - let scale = _get_decimal_scale_mul(self.scale(), rhs.scale()); - Ok((&self.phys * &rhs.phys).into_decimal_unchecked(None, scale)) + let left_s = self.scale(); + let right_s = rhs.scale(); + let scale = left_s.max(right_s); + let prec = DEC128_MAX_PREC; + let phys = broadcast_try_binary_elementwise( + self.physical(), + rhs.physical(), + |opt_l, opt_r| { + let (Some(l), Some(r)) = (opt_l, opt_r) else { + return PolarsResult::Ok(None); + }; + let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; + let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; + let ret = dec128_mul(ls, rs, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in decimal multiplication for {ls} * {rs}") + })?; + Ok(Some(ret)) + }, + ); + Ok(phys?.into_decimal_unchecked(prec, scale)) } } @@ -35,22 +102,29 @@ impl Div for &DecimalChunked { type Output = PolarsResult; fn div(self, rhs: Self) -> Self::Output { - let scale = _get_decimal_scale_div(self.scale()); - let lhs = self.to_scale(scale + rhs.scale())?; - Ok((&lhs.phys / &rhs.phys).into_decimal_unchecked(None, scale)) + let left_s = self.scale(); + let right_s = rhs.scale(); + let scale = left_s.max(right_s); + let prec = DEC128_MAX_PREC; + let phys = broadcast_try_binary_elementwise( + self.physical(), + rhs.physical(), + |opt_l, opt_r| { + let (Some(l), Some(r)) = (opt_l, opt_r) else { + return PolarsResult::Ok(None); + }; + let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; + let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; + let ret = dec128_div(ls, rs, prec, scale).ok_or_else( + || polars_err!(ComputeError: "overflow in decimal division for {ls} * {rs}"), + )?; + Ok(Some(ret)) + }, + ); + Ok(phys?.into_decimal_unchecked(prec, scale)) } } - -// Used by polars-plan to determine schema. -pub fn _get_decimal_scale_add_sub(scale_left: usize, scale_right: usize) -> usize { - scale_left.max(scale_right) -} - -pub fn _get_decimal_scale_mul(scale_left: usize, scale_right: usize) -> usize { - scale_left + scale_right -} - -pub fn _get_decimal_scale_div(scale_left: usize) -> usize { - // Follow postgres and MySQL adding a fixed scale increment of 4 - scale_left + 4 -} diff --git a/crates/polars-core/src/chunked_array/arithmetic/mod.rs b/crates/polars-core/src/chunked_array/arithmetic/mod.rs index 0285f2b6a81b..4c5d2229bf94 100644 --- a/crates/polars-core/src/chunked_array/arithmetic/mod.rs +++ b/crates/polars-core/src/chunked_array/arithmetic/mod.rs @@ -6,8 +6,6 @@ mod numeric; use std::ops::{Add, Div, Mul, Rem, Sub}; use arrow::compute::utils::combine_validities_and; -#[cfg(feature = "dtype-decimal")] -pub use decimal::{_get_decimal_scale_add_sub, _get_decimal_scale_div, _get_decimal_scale_mul}; use num_traits::{Num, NumCast, ToPrimitive}; pub use numeric::ArithmeticChunked; diff --git a/crates/polars-core/src/chunked_array/builder/list/mod.rs b/crates/polars-core/src/chunked_array/builder/list/mod.rs index 8bc8c62b93d0..92163a78f27b 100644 --- a/crates/polars-core/src/chunked_array/builder/list/mod.rs +++ b/crates/polars-core/src/chunked_array/builder/list/mod.rs @@ -113,7 +113,7 @@ pub fn get_list_builder( Some(inner_type_logical.clone()), )), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => Box::new( + DataType::NewDecimal(_, _) => Box::new( ListPrimitiveChunkedBuilder::::new_with_values_type( name, list_capacity, diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index ea7af762d73b..1b595babb2ee 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -79,7 +79,7 @@ fn cast_impl_inner( ) -> PolarsResult { let chunks = match dtype { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => { + DataType::NewDecimal(_, _) => { let mut chunks = cast_chunks(chunks, dtype, options)?; // @NOTE: We cannot cast here as that will lower the scale. for chunk in chunks.iter_mut() { @@ -113,7 +113,7 @@ fn cast_impl_inner( #[cfg(feature = "dtype-time")] Time => out.into_time(), #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => out.into_decimal(*precision, scale.unwrap_or(0))?, + NewDecimal(precision, scale) => out.into_decimal(*precision, *scale)?, _ => out, }; @@ -295,24 +295,13 @@ impl ChunkCast for StringChunked { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => match (precision, scale) { - (precision, Some(scale)) => { - let chunks = self.downcast_iter().map(|arr| { - polars_compute::cast::binview_to_decimal( - &arr.to_binview(), - *precision, - *scale, - ) + DataType::NewDecimal(precision, scale) => { + let chunks = self.downcast_iter().map(|arr| { + polars_compute::cast::binview_to_decimal(&arr.to_binview(), *precision, *scale) .to(ArrowDataType::Int128) - }); - Ok(Int128Chunked::from_chunk_iter(self.name().clone(), chunks) - .into_decimal_unchecked(*precision, *scale) - .into_series()) - }, - (None, None) => self.to_decimal_infer(100), - _ => { - polars_bail!(ComputeError: "expected 'precision' or 'scale' when casting to Decimal") - }, + }); + let ca = Int128Chunked::from_chunk_iter(self.name().clone(), chunks); + Ok(ca.into_decimal_unchecked(*precision, *scale).into_series()) }, #[cfg(feature = "dtype-date")] DataType::Date => { diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index ee3a69e86644..15366644deb1 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -1,35 +1,32 @@ use std::borrow::Cow; +use arrow::bitmap::Bitmap; +use polars_compute::decimal::{dec128_fits, dec128_rescale, dec128_verify_prec_scale}; + use super::*; use crate::chunked_array::cast::cast_chunks; +use crate::prelude::arity::{unary_elementwise, unary_kernel}; use crate::prelude::*; pub type DecimalChunked = Logical; impl Int128Chunked { #[inline] - pub fn into_decimal_unchecked(self, precision: Option, scale: usize) -> DecimalChunked { - // SAFETY: no invalid states. - unsafe { DecimalChunked::new_logical(self, DataType::Decimal(precision, Some(scale))) } + pub fn into_decimal_unchecked(self, precision: usize, scale: usize) -> DecimalChunked { + // SAFETY: no invalid states (from a safety perspective). + unsafe { DecimalChunked::new_logical(self, DataType::NewDecimal(precision, scale)) } } - pub fn into_decimal( - self, - precision: Option, - scale: usize, - ) -> PolarsResult { - // TODO: if precision is None, do we check that the value fits within precision of 38?... - if let Some(precision) = precision { - let precision_max = 10_i128.pow(precision as u32); - if let Some((min, max)) = self.min_max() { - let max_abs = max.abs().max(min.abs()); - polars_ensure!( - max_abs < precision_max, - ComputeError: "decimal precision {} can't fit values with {} digits", - precision, - max_abs.to_string().len() - ); - } + pub fn into_decimal(self, precision: usize, scale: usize) -> PolarsResult { + dec128_verify_prec_scale(precision, scale)?; + if let Some((min, max)) = self.min_max() { + let max_abs = max.abs().max(min.abs()); + polars_ensure!( + dec128_fits(max_abs, precision), + ComputeError: "decimal precision {} can't fit values with {} digits", + precision, + max_abs.to_string().len() + ); } Ok(self.into_decimal_unchecked(precision, scale)) } @@ -49,7 +46,7 @@ impl LogicalType for DecimalChunked { #[inline] unsafe fn get_any_value_unchecked(&self, i: usize) -> AnyValue<'_> { match self.phys.get_unchecked(i) { - Some(v) => AnyValue::Decimal(v, self.scale()), + Some(v) => AnyValue::NewDecimal(v, self.precision(), self.scale()), None => AnyValue::Null, } } @@ -59,72 +56,136 @@ impl LogicalType for DecimalChunked { dtype: &DataType, cast_options: CastOptions, ) -> PolarsResult { - let mut dtype = Cow::Borrowed(dtype); - if let DataType::Decimal(to_precision, to_scale) = dtype.as_ref() { - let from_precision = self.precision(); - let from_scale = self.scale(); - - let to_precision = to_precision.or(from_precision); - let to_scale = to_scale.unwrap_or(from_scale); - - if to_precision == from_precision && to_scale == from_scale { - return Ok(self.clone().into_series()); - } - - dtype = Cow::Owned(DataType::Decimal(to_precision, Some(to_scale))); + if let DataType::NewDecimal(to_prec, to_scale) = dtype { + return Ok(self + .with_prec_scale(*to_prec, *to_scale, cast_options.is_strict())? + .into_owned() + .into_series()); } - let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); - let chunks = self - .physical() - .chunks - .iter() - .map(|arr| { - arr.as_any() - .downcast_ref::>() - .unwrap() - .clone() - .to(arrow_dtype.clone()) - .to_boxed() - }) - .collect::>(); - let chunks = cast_chunks(&chunks, dtype.as_ref(), cast_options)?; - Series::try_from((self.name().clone(), chunks)) + match dtype { + DataType::NewDecimal(to_prec, to_scale) => { + return Ok(self + .with_prec_scale(*to_prec, *to_scale, cast_options.is_strict())? + .into_owned() + .into_series()); + }, + + dt if dt.is_primitive_numeric() + | matches!(dt, DataType::String | DataType::Boolean) => + { + // Normally we don't set the Arrow logical type, but now we temporarily set it so + // we can re-use the compute cast kernels. + let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); + let chunks = self + .physical() + .chunks + .iter() + .map(|arr| { + arr.as_any() + .downcast_ref::>() + .unwrap() + .clone() + .to(arrow_dtype.clone()) + .to_boxed() + }) + .collect::>(); + let chunks = cast_chunks(&chunks, dtype, cast_options)?; + Series::try_from((self.name().clone(), chunks)) + }, + + dt => { + polars_bail!( + InvalidOperation: + "casting from {:?} to {:?} not supported", + self.dtype(), dt + ) + }, + } } } impl DecimalChunked { - pub fn precision(&self) -> Option { + pub fn precision(&self) -> usize { match &self.dtype { - DataType::Decimal(precision, _) => *precision, + DataType::NewDecimal(precision, _) => *precision, _ => unreachable!(), } } pub fn scale(&self) -> usize { match &self.dtype { - DataType::Decimal(_, scale) => scale.unwrap_or_else(|| unreachable!()), + DataType::NewDecimal(_, scale) => *scale, _ => unreachable!(), } } - pub fn to_scale(&self, scale: usize) -> PolarsResult> { - if self.scale() == scale { + pub fn with_prec_scale( + &self, + prec: usize, + scale: usize, + strict: bool, + ) -> PolarsResult> { + if self.precision() == prec && self.scale() == scale { return Ok(Cow::Borrowed(self)); } - let mut precision = self.precision(); - if let Some(ref mut precision) = precision { - if self.scale() < scale { - *precision += scale; - *precision = (*precision).min(38); + dec128_verify_prec_scale(prec, scale)?; + let phys = if self.scale() == scale { + if prec >= self.precision() { + // Increasing precision is always allowed. + self.phys.clone() + } else if strict { + if let Some((min, max)) = self.phys.min_max() { + let max_abs = max.abs().max(min.abs()); + polars_ensure!( + dec128_fits(max_abs, prec), + ComputeError: "decimal precision {} can't fit values with {} digits", + prec, + max_abs.to_string().len() + ); + } + self.phys.clone() + } else { + unary_kernel(&self.phys, |arr| { + let new_valid: Bitmap = arr + .iter() + .map(|opt_x| { + if let Some(x) = opt_x { + dec128_fits(*x, prec) + } else { + false + } + }) + .collect(); + arr.clone().with_validity_typed(Some(new_valid)) + }) } + } else { + let old_s = self.scale(); + unary_elementwise(&self.phys, |x| dec128_rescale(x?, old_s, prec, scale)) + }; + + let ca = unsafe { DecimalChunked::new_logical(phys, DataType::NewDecimal(prec, scale)) }; + Ok(Cow::Owned(ca)) + } + + /// Converts self to a physical representation with the given precision and + /// scale, returning the given sentinel value instead for values which don't + /// fit in the given precision and scale. This can be useful for comparisons. + pub fn into_phys_with_prec_scale_or_sentinel( + &self, + prec: usize, + scale: usize, + sentinel: i128, + ) -> Int128Chunked { + if self.precision() <= prec && self.scale() == scale { + return self.phys.clone(); } - let s = self.cast_with_options( - &DataType::Decimal(precision, Some(scale)), - CastOptions::NonStrict, - )?; - Ok(Cow::Owned(s.decimal().unwrap().clone())) + let old_s = self.scale(); + unary_elementwise(&self.phys, |x| { + Some(dec128_rescale(x?, old_s, prec, scale).unwrap_or(sentinel)) + }) } } diff --git a/crates/polars-core/src/chunked_array/mod.rs b/crates/polars-core/src/chunked_array/mod.rs index 7e2b8bd37f6d..498430cd5d4f 100644 --- a/crates/polars-core/src/chunked_array/mod.rs +++ b/crates/polars-core/src/chunked_array/mod.rs @@ -172,27 +172,15 @@ impl ChunkedArray { &self, series: &'a Series, ) -> PolarsResult<&'a ChunkedArray> { - match self.dtype() { - #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => { - let logical = series.decimal()?; - - let ca = logical.physical(); - Ok(ca.as_any().downcast_ref::>().unwrap()) - }, - dt => { - polars_ensure!( - dt == series.dtype(), - SchemaMismatch: "cannot unpack series of type `{}` into `{}`", - series.dtype(), - dt, - ); - - // SAFETY: - // dtype will be correct. - Ok(unsafe { self.unpack_series_matching_physical_type(series) }) - }, - } + polars_ensure!( + self.dtype() == series.dtype(), + SchemaMismatch: "cannot unpack series of type `{}` into `{}`", + series.dtype(), + self.dtype(), + ); + + // SAFETY: dtype will be correct. + Ok(unsafe { self.unpack_series_matching_physical_type(series) }) } /// Create a new [`ChunkedArray`] and compute its `length` and `null_count`. diff --git a/crates/polars-core/src/chunked_array/ops/any_value.rs b/crates/polars-core/src/chunked_array/ops/any_value.rs index c191e4e867f1..e7f1f17930b4 100644 --- a/crates/polars-core/src/chunked_array/ops/any_value.rs +++ b/crates/polars-core/src/chunked_array/ops/any_value.rs @@ -128,10 +128,10 @@ pub(crate) unsafe fn arr_to_any_value<'a>( AnyValue::Time(v) }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let arr = &*(arr as *const dyn Array as *const Int128Array); let v = arr.value_unchecked(idx); - AnyValue::Decimal(v, scale.unwrap_or_else(|| unreachable!())) + AnyValue::NewDecimal(v, *precision, *scale) }, #[cfg(feature = "object")] DataType::Object(_) => { diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs index 94f0d32b47ef..e0b373606a34 100644 --- a/crates/polars-core/src/chunked_array/ops/decimal.rs +++ b/crates/polars-core/src/chunked_array/ops/decimal.rs @@ -11,25 +11,32 @@ impl StringChunked { /// using the `cast` method. pub fn to_decimal_infer(&self, infer_length: usize) -> PolarsResult { let mut scale = 0; + let mut prec = 0; let mut iter = self.into_iter(); let mut valid_count = 0; while let Some(Some(v)) = iter.next() { - let scale_value = arrow::compute::decimal::infer_scale(v.as_bytes()); - scale = std::cmp::max(scale, scale_value); + let mut bytes = v.as_bytes(); + if bytes.first() == Some(&b'-') { + bytes = &bytes[1..]; + } + if let Some(separator) = bytes.iter().position(|b| *b == b'.') { + scale = scale.max(bytes.len() - 1 - separator); + prec = prec.max(bytes.len() - 1); + } else { + prec = prec.max(bytes.len()); + } + valid_count += 1; if valid_count == infer_length { break; } } - self.to_decimal(scale as usize) + self.to_decimal(prec, scale) } - pub fn to_decimal(&self, scale: usize) -> PolarsResult { - self.cast_with_options( - &DataType::Decimal(None, Some(scale)), - CastOptions::NonStrict, - ) + pub fn to_decimal(&self, prec: usize, scale: usize) -> PolarsResult { + self.cast_with_options(&DataType::NewDecimal(prec, scale), CastOptions::NonStrict) } } @@ -49,12 +56,12 @@ mod test { ]; let s = StringChunked::from_slice(PlSmallStr::from_str("test"), &vals); let s = s.to_decimal_infer(6).unwrap(); - assert_eq!(s.dtype(), &DataType::Decimal(None, Some(5))); + assert_eq!(s.dtype(), &DataType::NewDecimal(12, 5)); assert_eq!(s.len(), 7); - assert_eq!(s.get(0).unwrap(), AnyValue::Decimal(100000, 5)); + assert_eq!(s.get(0).unwrap(), AnyValue::NewDecimal(100000, 12, 5)); assert_eq!(s.get(1).unwrap(), AnyValue::Null); - assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 5)); - assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 5)); - assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 5)); + assert_eq!(s.get(3).unwrap(), AnyValue::NewDecimal(300045, 12, 5)); + assert_eq!(s.get(4).unwrap(), AnyValue::NewDecimal(-400000, 12, 5)); + assert_eq!(s.get(6).unwrap(), AnyValue::NewDecimal(525251, 12, 5)); } } diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index b591a0062a5b..391ad0c24c66 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -85,10 +85,14 @@ impl Series { FillNullStrategy::Backward(Some(limit)) => fill_backward_gather_limit(self, limit), #[cfg(feature = "dtype-decimal")] FillNullStrategy::One if self.dtype().is_decimal() => { + use polars_compute::decimal::i128_to_dec128; + let ca = self.decimal().unwrap(); let precision = ca.precision(); let scale = ca.scale(); - let fill_value = 10i128.pow(scale as u32); + let fill_value = i128_to_dec128(1, precision, scale).ok_or_else(|| { + polars_err!(ComputeError: "value '1' is out of range for Decimal({precision}, {scale})") + })?; let phys = ca.physical().fill_null_with_values(fill_value)?; Ok(phys.into_decimal_unchecked(precision, scale).into_series()) }, diff --git a/crates/polars-core/src/chunked_array/ops/row_encode.rs b/crates/polars-core/src/chunked_array/ops/row_encode.rs index d2a21ef9a113..413f3a66b7c6 100644 --- a/crates/polars-core/src/chunked_array/ops/row_encode.rs +++ b/crates/polars-core/src/chunked_array/ops/row_encode.rs @@ -114,9 +114,7 @@ pub fn get_row_encoding_context(dtype: &DataType) -> Option DataType::Object(_) => panic!("Unsupported in row encoding"), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, _) => { - Some(RowEncodingContext::Decimal(precision.unwrap_or(38))) - }, + DataType::NewDecimal(precision, _) => Some(RowEncodingContext::Decimal(*precision)), #[cfg(feature = "dtype-array")] DataType::Array(dtype, _) => get_row_encoding_context(dtype), diff --git a/crates/polars-core/src/datatypes/_serde.rs b/crates/polars-core/src/datatypes/_serde.rs index ec69a1c29ceb..e0f472e799a9 100644 --- a/crates/polars-core/src/datatypes/_serde.rs +++ b/crates/polars-core/src/datatypes/_serde.rs @@ -92,7 +92,7 @@ enum SerializableDataType { strings: Series, }, #[cfg(feature = "dtype-decimal")] - Decimal(Option, Option), + NewDecimal(usize, usize), #[cfg(feature = "object")] Object(String), } @@ -143,7 +143,7 @@ impl From<&DataType> for SerializableDataType { .into_series(), }, #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => Self::Decimal(*precision, *scale), + NewDecimal(precision, scale) => Self::NewDecimal(*precision, *scale), #[cfg(feature = "object")] Object(name) => Self::Object(name.to_string()), } @@ -202,7 +202,7 @@ impl From for DataType { Self::Enum(fcats, mapping) }, #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => Self::Decimal(precision, scale), + NewDecimal(precision, scale) => Self::NewDecimal(precision, scale), #[cfg(feature = "object")] Object(_) => Self::Object("unknown"), } diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 5c4e2d565244..92529a0313c9 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -22,6 +22,12 @@ impl Clone for OwnedObject { } } +#[cfg(feature = "dtype-decimal")] +use polars_compute::decimal::{ + dec128_cmp, dec128_eq, dec128_rescale, dec128_to_f64, dec128_to_i128, f64_to_dec128, + i128_to_dec128, +}; + #[derive(Debug, Clone, Default)] pub enum AnyValue<'a> { #[default] @@ -101,9 +107,9 @@ pub enum AnyValue<'a> { StringOwned(PlSmallStr), Binary(&'a [u8]), BinaryOwned(Vec), - /// A 128-bit fixed point decimal number with a scale. + /// A 128-bit fixed point decimal number with a precision and scale. #[cfg(feature = "dtype-decimal")] - Decimal(i128, usize), + NewDecimal(i128, usize, usize), } impl AnyValue<'static> { @@ -121,9 +127,7 @@ impl AnyValue<'static> { #[cfg(feature = "dtype-duration")] DataType::Duration(unit) => AnyValue::Duration(0, *unit), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_p, s) => { - AnyValue::Decimal(0, s.expect("unknown scale during execution")) - }, + DataType::NewDecimal(p, s) => AnyValue::NewDecimal(0, *p, *s), _ => AnyValue::Null, } } @@ -155,7 +159,7 @@ impl AnyValue<'static> { DT::Float32 => AV::Float32(numeric_to_one.into()), DT::Float64 => AV::Float64(numeric_to_one.into()), #[cfg(feature = "dtype-decimal")] - DT::Decimal(_, scale) => AV::Decimal(0, scale.unwrap()), + DT::NewDecimal(p, s) => AV::NewDecimal(0, *p, *s), DT::String => AV::String(""), DT::Binary => AV::Binary(&[]), DT::BinaryOffset => AV::Binary(&[]), @@ -262,7 +266,7 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-struct")] StructOwned(payload) => DataType::Struct(payload.1.clone()), #[cfg(feature = "dtype-decimal")] - Decimal(_, scale) => DataType::Decimal(None, Some(*scale)), + NewDecimal(_, p, s) => DataType::NewDecimal(*p, *s), #[cfg(feature = "object")] Object(o) => DataType::Object(o.type_name()), #[cfg(feature = "object")] @@ -273,7 +277,7 @@ impl<'a> AnyValue<'a> { /// Extract a numerical value from the AnyValue #[doc(hidden)] #[inline] - pub fn extract(&self) -> Option { + pub fn extract(&self) -> Option { use AnyValue::*; match self { Int8(v) => NumCast::from(*v), @@ -297,12 +301,11 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-duration")] Duration(v, _) => NumCast::from(*v), #[cfg(feature = "dtype-decimal")] - Decimal(v, scale) => { - if *scale == 0 { - NumCast::from(*v) + NewDecimal(v, _p, s) => { + if T::is_float() { + NumCast::from(dec128_to_f64(*v, *s)) } else { - let f: Option = NumCast::from(*v); - NumCast::from(f? / 10f64.powi(*scale as _)) + NumCast::from(dec128_to_i128(*v, *s)) } }, Boolean(v) => NumCast::from(if *v { 1 } else { 0 }), @@ -319,7 +322,7 @@ impl<'a> AnyValue<'a> { } #[inline] - pub fn try_extract(&self) -> PolarsResult { + pub fn try_extract(&self) -> PolarsResult { self.extract().ok_or_else(|| { polars_err!( ComputeError: "could not extract number from any-value of dtype: '{:?}'", @@ -595,33 +598,24 @@ impl<'a> AnyValue<'a> { *tu_r, ), - // to decimal #[cfg(feature = "dtype-decimal")] - (av, DataType::Decimal(prec, scale)) if av.is_integer() => { - let value = av.try_extract::().unwrap(); - let scale = scale.unwrap_or(0); - let factor = 10_i128.pow(scale as _); // Conversion to u32 is safe, max value is 38. - let converted = value.checked_mul(factor)?; - - // Check if the converted value fits into the specified precision - let prec = prec.unwrap_or(38) as u32; - let num_digits = (converted.abs() as f64).log10().ceil() as u32; - if num_digits > prec { - return None; - } + (av, DataType::NewDecimal(p, s)) if av.is_integer() => { + let int = av.try_extract::().ok()?; + let dec = i128_to_dec128(int, *p, *s)?; + AnyValue::NewDecimal(dec, *p, *s) + }, - AnyValue::Decimal(converted, scale) + #[cfg(feature = "dtype-decimal")] + (av, DataType::NewDecimal(p, s)) if av.is_float() => { + let f = av.try_extract::().unwrap(); + let dec = f64_to_dec128(f, *p, *s)?; + AnyValue::NewDecimal(dec, *p, *s) }, + #[cfg(feature = "dtype-decimal")] - (AnyValue::Decimal(value, scale_av), DataType::Decimal(_, scale)) => { - let Some(scale) = scale else { - return Some(self.clone()); - }; - // TODO: Allow lossy conversion? - let scale_diff = scale.checked_sub(*scale_av)?; - let factor = 10_i128.pow(scale_diff as _); // Conversion is safe, max value is 38. - let converted = value.checked_mul(factor)?; - AnyValue::Decimal(converted, *scale) + (AnyValue::NewDecimal(value, _old_p, old_s), DataType::NewDecimal(p, s)) => { + let converted = dec128_rescale(*value, *old_s, *p, *s)?; + AnyValue::NewDecimal(converted, *p, *s) }, // to self @@ -736,7 +730,7 @@ impl<'a> AnyValue<'a> { ))), #[cfg(feature = "dtype-decimal")] - Self::Decimal(v, _) => Self::Int128(v), + Self::NewDecimal(v, _, _) => Self::Int128(v), } } @@ -853,9 +847,10 @@ impl AnyValue<'_> { #[cfg(feature = "dtype-struct")] StructOwned(v) => v.0.hash(state), #[cfg(feature = "dtype-decimal")] - Decimal(v, k) => { + NewDecimal(v, s, p) => { v.hash(state); - k.hash(state); + s.hash(state); + p.hash(state); }, Null => {}, } @@ -964,12 +959,14 @@ impl<'a> AnyValue<'a> { Duration(l + r, *lu) }, #[cfg(feature = "dtype-decimal")] - (Decimal(l, ls), Decimal(r, rs)) => { - if ls != rs { - unimplemented!("adding decimals with different scales is not supported here"); + (NewDecimal(l, lp, ls), NewDecimal(r, rp, rs)) => { + if (lp, ls) != (rp, rs) { + unimplemented!( + "adding decimals with different precisions/scales is not supported here" + ); } - Decimal(l + r, *ls) + NewDecimal(l + r, *lp, *ls) }, _ => unimplemented!(), } @@ -1049,7 +1046,7 @@ impl<'a> AnyValue<'a> { unsafe { std::mem::transmute::, AnyValue<'static>>(av) } }, #[cfg(feature = "dtype-decimal")] - Decimal(val, scale) => Decimal(val, scale), + NewDecimal(val, s, p) => NewDecimal(val, s, p), #[cfg(feature = "dtype-categorical")] Categorical(cat, map) => CategoricalOwned(cat, map.clone()), #[cfg(feature = "dtype-categorical")] @@ -1236,32 +1233,7 @@ impl AnyValue<'_> { null_equal, ), #[cfg(feature = "dtype-decimal")] - (Decimal(l_v, l_s), Decimal(r_v, r_s)) => { - // l_v / 10**l_s == r_v / 10**r_s - if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 { - true - } else if l_s < r_s { - // l_v * 10**(r_s - l_s) == r_v - if let Some(lhs) = (|| { - let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?; - l_v.checked_mul(exp) - })() { - lhs == *r_v - } else { - false - } - } else { - // l_v == r_v * 10**(l_s - r_s) - if let Some(rhs) = (|| { - let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?; - r_v.checked_mul(exp) - })() { - *l_v == rhs - } else { - false - } - } - }, + (NewDecimal(lv, _lp, ls), NewDecimal(rv, _rp, rs)) => dec128_eq(*lv, *ls, *rv, *rs), #[cfg(feature = "object")] (Object(l), Object(r)) => l == r, #[cfg(feature = "dtype-array")] @@ -1410,31 +1382,8 @@ impl PartialOrd for AnyValue<'_> { unimplemented!("ordering for Struct dtype is not supported") }, #[cfg(feature = "dtype-decimal")] - (Decimal(l_v, l_s), Decimal(r_v, r_s)) => { - // l_v / 10**l_s <=> r_v / 10**r_s - if l_s == r_s && l_v == r_v || *l_v == 0 && *r_v == 0 { - Some(Ordering::Equal) - } else if l_s < r_s { - // l_v * 10**(r_s - l_s) <=> r_v - if let Some(lhs) = (|| { - let exp = i128::checked_pow(10, (r_s - l_s).try_into().ok()?)?; - l_v.checked_mul(exp) - })() { - lhs.partial_cmp(r_v) - } else { - Some(Ordering::Greater) - } - } else { - // l_v <=> r_v * 10**(l_s - r_s) - if let Some(rhs) = (|| { - let exp = i128::checked_pow(10, (l_s - r_s).try_into().ok()?)?; - r_v.checked_mul(exp) - })() { - l_v.partial_cmp(&rhs) - } else { - Some(Ordering::Less) - } - } + (NewDecimal(lv, _lp, ls), NewDecimal(rv, _rp, rs)) => { + Some(dec128_cmp(*lv, *ls, *rv, *rs)) }, (_, _) => { diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 7f75d152949e..81db20ca2801 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -103,7 +103,7 @@ pub enum DataType { /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits. /// Meaning max precision is 38. #[cfg(feature = "dtype-decimal")] - Decimal(Option, Option), // precision/scale; scale being None means "infer" + NewDecimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38. /// String data String, Binary, @@ -169,12 +169,7 @@ impl PartialEq for DataType { #[cfg(feature = "dtype-duration")] (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r, #[cfg(feature = "dtype-decimal")] - (Decimal(l_prec, l_scale), Decimal(r_prec, r_scale)) => { - let is_prec_eq = l_prec.is_none() || r_prec.is_none() || l_prec == r_prec; - let is_scale_eq = l_scale.is_none() || r_scale.is_none() || l_scale == r_scale; - - is_prec_eq && is_scale_eq - }, + (NewDecimal(p1, s1), NewDecimal(p2, s2)) => (p1, s1) == (p2, s2), #[cfg(feature = "object")] (Object(lhs), Object(rhs)) => lhs == rhs, #[cfg(feature = "dtype-struct")] @@ -408,7 +403,7 @@ impl DataType { (D::Boolean, dt) | (dt, D::Boolean) => match dt { dt if dt.is_primitive_numeric() => true, #[cfg(feature = "dtype-decimal")] - D::Decimal(_, _) => true, + D::NewDecimal(_, _) => true, D::String | D::Binary => true, _ => false, }, @@ -456,7 +451,7 @@ impl DataType { Duration(_) => Int64, Time => Int64, #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => Int128, + NewDecimal(_, _) => Int128, #[cfg(feature = "dtype-categorical")] Categorical(cats, _) => cats.physical().dtype(), #[cfg(feature = "dtype-categorical")] @@ -659,7 +654,7 @@ impl DataType { pub fn is_decimal(&self) -> bool { match self { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => true, + DataType::NewDecimal(_, _) => true, _ => false, } } @@ -860,14 +855,9 @@ impl DataType { Float32 => Ok(ArrowDataType::Float32), Float64 => Ok(ArrowDataType::Float64), #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => { - let precision = (*precision).unwrap_or(38); - polars_ensure!(precision <= 38 && precision > 0, InvalidOperation: "decimal precision should be <= 38 & >= 1"); - - Ok(ArrowDataType::Decimal( - precision, - scale.unwrap_or(0), // and what else can we do here? - )) + NewDecimal(precision, scale) => { + assert!(*precision >= 1 && *precision <= 38); + Ok(ArrowDataType::Decimal(*precision, *scale)) }, String => { let dt = if compat_level.0 >= 1 { @@ -984,7 +974,9 @@ impl DataType { }, (DataType::Null, DataType::Null) => Ok(false), #[cfg(feature = "dtype-decimal")] - (DataType::Decimal(_, s1), DataType::Decimal(_, s2)) => Ok(s1 != s2), + (DataType::NewDecimal(p1, s1), DataType::NewDecimal(p2, s2)) => { + Ok((p1, s1) != (p2, s2)) + }, // We don't allow the other way around, only if our current type is // null and the schema isn't we allow it. (DataType::Null, _) => Ok(true), @@ -1079,25 +1071,12 @@ impl Display for DataType { DataType::Float32 => "f32", DataType::Float64 => "f64", #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { - return match (precision, scale) { - (Some(precision), Some(scale)) => { - f.write_str(&format!("decimal[{precision},{scale}]")) - }, - (None, Some(scale)) => f.write_str(&format!("decimal[*,{scale}]")), - _ => f.write_str("decimal[?]"), // shouldn't happen - }; - }, + DataType::NewDecimal(p, s) => return write!(f, "decimal[{p},{s}]"), DataType::String => "str", DataType::Binary => "binary", DataType::Date => "date", - DataType::Datetime(tu, tz) => { - let s = match tz { - None => format!("datetime[{tu}]"), - Some(tz) => format!("datetime[{tu}, {tz}]"), - }; - return f.write_str(&s); - }, + DataType::Datetime(tu, None) => return write!(f, "datetime[{tu}]"), + DataType::Datetime(tu, Some(tz)) => return write!(f, "datetime[{tu}, {tz}]"), DataType::Duration(tu) => return write!(f, "duration[{tu}]"), DataType::Time => "time", #[cfg(feature = "dtype-array")] @@ -1165,12 +1144,7 @@ impl std::fmt::Debug for DataType { } }, #[cfg(feature = "dtype-decimal")] - Decimal(opt_p, opt_s) => match (opt_p, opt_s) { - (None, None) => write!(f, "Decimal(None, None)"), - (None, Some(s)) => write!(f, "Decimal(None, {s})"), - (Some(p), None) => write!(f, "Decimal({p}, None)"), - (Some(p), Some(s)) => write!(f, "Decimal({p}, {s})"), - }, + NewDecimal(p, s) => write!(f, "Decimal({p}, {s})"), #[cfg(feature = "dtype-array")] Array(inner, size) => write!(f, "Array({inner:?}, {size})"), List(inner) => write!(f, "List({inner:?})"), diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index 7caf435126f4..accd8f36b98c 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -261,9 +261,7 @@ impl DataType { } }, #[cfg(feature = "dtype-decimal")] - ArrowDataType::Decimal(precision, scale) => { - DataType::Decimal(Some(*precision), Some(*scale)) - }, + ArrowDataType::Decimal(precision, scale) => DataType::NewDecimal(*precision, *scale), ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => { DataType::String }, diff --git a/crates/polars-core/src/datatypes/proptest.rs b/crates/polars-core/src/datatypes/proptest.rs index 06f2f299da8f..15c5e495d01e 100644 --- a/crates/polars-core/src/datatypes/proptest.rs +++ b/crates/polars-core/src/datatypes/proptest.rs @@ -150,14 +150,13 @@ pub fn dtypes( fn decimal_strategy( decimal_precision_range: RangeInclusive, ) -> impl Strategy { - prop::option::of(decimal_precision_range.clone()) + decimal_precision_range + .clone() .prop_flat_map(move |precision| { - let max_scale = precision.unwrap_or(*decimal_precision_range.end()); - let scale_strategy = prop::option::of(0_usize..=max_scale); - + let scale_strategy = (0_usize..=precision); (Just(precision), scale_strategy) }) - .prop_map(|(precision, scale)| DataType::Decimal(precision, scale)) + .prop_map(|(precision, scale)| DataType::NewDecimal(precision, scale)) } fn datetime_strategy() -> impl Strategy { diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index b605951e50ec..5b733e2712aa 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -392,7 +392,7 @@ impl Debug for Series { format_array!(f, self.duration().unwrap(), &dt, self.name(), "Series") }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => { + DataType::NewDecimal(_, _) => { let dt = format!("{}", self.dtype()); format_array!(f, self.decimal().unwrap(), &dt, self.name(), "Series") }, @@ -1212,7 +1212,7 @@ impl Display for AnyValue<'_> { #[cfg(feature = "dtype-struct")] AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0), #[cfg(feature = "dtype-decimal")] - AnyValue::Decimal(v, scale) => fmt_decimal(f, *v, *scale), + AnyValue::NewDecimal(v, _prec, scale) => fmt_decimal(f, *v, *scale), } } } @@ -1309,9 +1309,9 @@ impl Series { #[inline] #[cfg(feature = "dtype-decimal")] fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result { - let mut fmt_buf = arrow::compute::decimal::DecimalFmtBuffer::new(); + let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new(); let trim_zeros = get_trim_decimal_zeros(); - f.write_str(fmt_float_string(fmt_buf.format(v, scale, trim_zeros)).as_str()) + f.write_str(fmt_float_string(fmt_buf.format_dec128(v, scale, trim_zeros)).as_str()) } #[cfg(all( diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index fedff4357f18..58a4e70dd197 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -137,7 +137,7 @@ impl Series { Float64 => SeriesWrap(s.f64().unwrap().clone()).agg_mean(groups), dt if dt.is_primitive_numeric() => apply_method_physical_integer!(s, agg_mean, groups), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => self.cast(&Float64).unwrap().agg_mean(groups), + NewDecimal(_, _) => self.cast(&Float64).unwrap().agg_mean(groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() @@ -193,7 +193,7 @@ impl Series { apply_method_physical_integer!(s, agg_median, groups) }, #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => self.cast(&Float64).unwrap().agg_median(groups), + NewDecimal(_, _) => self.cast(&Float64).unwrap().agg_median(groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index fe98597bbddc..c20b1921dfad 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -1,6 +1,8 @@ use std::hint::unreachable_unchecked; use arrow::bitmap::BitmapBuilder; +#[cfg(feature = "dtype-decimal")] +use polars_compute::decimal::DecimalFmtBuffer; #[cfg(feature = "dtype-struct")] use polars_utils::pl_str::PlSmallStr; @@ -138,6 +140,11 @@ impl<'a> AnyValueBuffer<'a> { AnyValue::Float64(v) => builder.append_value(format!("{v}")), AnyValue::Boolean(true) => builder.append_value("true"), AnyValue::Boolean(false) => builder.append_value("false"), + #[cfg(feature = "dtype-decimal")] + AnyValue::NewDecimal(v, _p, s) => { + let mut fmt = DecimalFmtBuffer::new(); + builder.append_value(fmt.format_dec128(v, s, false)); + }, _ => return None, }, _ => return None, diff --git a/crates/polars-core/src/scalar/new.rs b/crates/polars-core/src/scalar/new.rs index 94a1e7377020..567b770ed11b 100644 --- a/crates/polars-core/src/scalar/new.rs +++ b/crates/polars-core/src/scalar/new.rs @@ -51,10 +51,10 @@ impl Scalar { } #[cfg(feature = "dtype-decimal")] - pub fn new_decimal(value: i128, scale: usize) -> Self { + pub fn new_decimal(value: i128, precision: usize, scale: usize) -> Self { Scalar::new( - DataType::Decimal(Some(38), Some(scale)), - AnyValue::Decimal(value, scale), + DataType::NewDecimal(precision, scale), + AnyValue::NewDecimal(value, precision, scale), ) } diff --git a/crates/polars-core/src/scalar/serde.rs b/crates/polars-core/src/scalar/serde.rs index 80353c591f2e..0d8124ec0cf9 100644 --- a/crates/polars-core/src/scalar/serde.rs +++ b/crates/polars-core/src/scalar/serde.rs @@ -114,7 +114,7 @@ pub enum SerializableScalar { /// A 128-bit fixed point decimal number with a scale. #[cfg(feature = "dtype-decimal")] - Decimal(i128, usize), + Decimal(i128, usize, usize), #[cfg(feature = "dtype-categorical")] Categorical { @@ -257,7 +257,7 @@ impl TryFrom for SerializableScalar { }, #[cfg(feature = "dtype-decimal")] - AnyValue::Decimal(v, scale) => Self::Decimal(v, scale), + AnyValue::NewDecimal(v, prec, scale) => Self::Decimal(v, prec, scale), }; Ok(out) } @@ -297,7 +297,7 @@ impl TryFrom for Scalar { #[cfg(feature = "dtype-array")] S::Array(v, width) => Self::new_array(v, width), #[cfg(feature = "dtype-decimal")] - S::Decimal(v, scale) => Self::new_decimal(v, scale), + S::Decimal(v, prec, scale) => Self::new_decimal(v, prec, scale), #[cfg(feature = "dtype-categorical")] S::Categorical { diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index 4dd2cbffbb9d..6e470d7c5a4e 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -30,7 +30,8 @@ impl Series { /// and the `strict` parameter: /// - If `strict` is `true`, the data type is equal to the data type of the /// first non-null value. If any other non-null values do not match this - /// data type, an error is raised. + /// data type, an error is raised. If the first non-null value is a + /// decimal the slice is scanned for the maximum precision and scale possible. /// - If `strict` is `false`, the data type is the supertype of the `values`. /// An error is returned if no supertype can be determined. /// **WARNING**: A full pass over the values is required to determine the supertype. @@ -63,19 +64,22 @@ impl Series { } } let dtype = if strict { - get_first_non_null_dtype(values) + match get_first_non_null_dtype(values) { + DataType::NewDecimal(mut prec, mut scale) => { + for v in values { + if let DataType::NewDecimal(p, s) = v.dtype() { + prec = prec.max(p); + scale = scale.max(s); + } + } + DataType::NewDecimal(prec, scale) + }, + dt => dt, + } } else { - // Currently does not work correctly for Decimal because equality is not implemented. any_values_to_supertype(values)? }; - // TODO: Remove this when Decimal data type equality is implemented. - #[cfg(feature = "dtype-decimal")] - if dtype.is_decimal() { - let dtype = DataType::Decimal(None, None); - return Self::from_any_values_and_dtype(name, values, &dtype, strict); - } - Self::from_any_values_and_dtype(name, values, &dtype, strict) } @@ -134,7 +138,7 @@ impl Series { any_values_to_categorical(values, dt, strict)? }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { any_values_to_decimal(values, *precision, *scale, strict)?.into_series() }, DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(), @@ -507,49 +511,22 @@ fn any_values_to_categorical( #[cfg(feature = "dtype-decimal")] fn any_values_to_decimal( values: &[AnyValue], - precision: Option, - scale: Option, // If None, we're inferring the scale. + precision: usize, + scale: usize, strict: bool, ) -> PolarsResult { - /// Get the maximum scale among AnyValues - fn infer_scale( - values: &[AnyValue], - precision: Option, - strict: bool, - ) -> PolarsResult { - let mut max_scale = 0; - for av in values { - let av_scale = match av { - AnyValue::Decimal(_, scale) => *scale, - AnyValue::Null => continue, - av => { - if strict { - let target_dtype = DataType::Decimal(precision, None); - return Err(invalid_value_error(&target_dtype, av)); - } - continue; - }, - }; - max_scale = max_scale.max(av_scale); - } - Ok(max_scale) - } - let scale = match scale { - Some(s) => s, - None => infer_scale(values, precision, strict)?, - }; - let target_dtype = DataType::Decimal(precision, Some(scale)); + let target_dtype = DataType::NewDecimal(precision, scale); let mut builder = PrimitiveChunkedBuilder::::new(PlSmallStr::EMPTY, values.len()); for av in values { match av { // Allow equal or less scale. We do want to support different scales even in 'strict' mode. - AnyValue::Decimal(v, s) if *s <= scale => { - if *s == scale { + AnyValue::NewDecimal(v, p, s) if *s <= scale => { + if *p <= precision && *s == scale { builder.append_value(*v) } else { match av.strict_cast(&target_dtype) { - Some(AnyValue::Decimal(i, _)) => builder.append_value(i), + Some(AnyValue::NewDecimal(i, _, _)) => builder.append_value(i), _ => builder.append_null(), } } @@ -559,9 +536,8 @@ fn any_values_to_decimal( if strict { return Err(invalid_value_error(&target_dtype, av)); } - // TODO: Precision check, else set to null match av.strict_cast(&target_dtype) { - Some(AnyValue::Decimal(i, _)) => builder.append_value(i), + Some(AnyValue::NewDecimal(i, _, _)) => builder.append_value(i), _ => builder.append_null(), } }, diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 0aeebc11e45a..5f8fd3bfb352 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -9,6 +9,7 @@ use arrow::offset::OffsetsBuffer; use arrow::temporal_conversions::*; use arrow::types::months_days_ns; use polars_compute::cast::cast_unchecked as cast; +use polars_compute::decimal::dec128_fits; use polars_error::feature_gated; use polars_utils::check_allow_importing_interval_as_struct; use polars_utils::itertools::Itertools; @@ -48,9 +49,9 @@ impl Series { Ok(series) } - /// Takes chunks and a polars datatype and constructs the Series + /// Takes chunks and a polars datatype and constructs the Series. /// This is faster than creating from chunks and an arrow datatype because there is no - /// casting involved + /// casting involved. /// /// # Safety /// @@ -91,11 +92,8 @@ impl Series { .into_datetime(*tu, tz.clone()) .into_series(), #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks) - .into_decimal_unchecked( - *precision, - scale.unwrap_or_else(|| unreachable!("scale should be set")), - ) + NewDecimal(precision, scale) => Int128Chunked::from_chunks(name, chunks) + .into_decimal_unchecked(*precision, *scale) .into_series(), #[cfg(feature = "dtype-array")] Array(_, _) => { @@ -306,8 +304,7 @@ impl Series { }, ArrowDataType::Decimal32(precision, scale) => { feature_gated!("dtype-decimal", { - polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})"); - polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision"); + polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; let mut chunks = chunks; for chunk in chunks.iter_mut() { @@ -326,17 +323,15 @@ impl Series { .to_boxed(); } - // @NOTE: We cannot cast here as that will lower the scale. let s = Int128Chunked::from_chunks(name, chunks) - .into_decimal_unchecked(Some(*precision), *scale) + .into_decimal_unchecked(*precision, *scale) .into_series(); Ok(s) }) }, ArrowDataType::Decimal64(precision, scale) => { feature_gated!("dtype-decimal", { - polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})"); - polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision"); + polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; let mut chunks = chunks; for chunk in chunks.iter_mut() { @@ -355,20 +350,16 @@ impl Series { .to_boxed(); } - // @NOTE: We cannot cast here as that will lower the scale. let s = Int128Chunked::from_chunks(name, chunks) - .into_decimal_unchecked(Some(*precision), *scale) + .into_decimal_unchecked(*precision, *scale) .into_series(); Ok(s) }) }, - ArrowDataType::Decimal(precision, scale) - | ArrowDataType::Decimal256(precision, scale) => { + ArrowDataType::Decimal(precision, scale) => { feature_gated!("dtype-decimal", { - polars_ensure!(*scale <= *precision, InvalidOperation: "invalid decimal precision and scale (prec={precision}, scale={scale})"); - polars_ensure!(*precision <= 38, InvalidOperation: "polars does not support decimals above 38 precision"); + polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; - // Q? I don't think this is correct for Decimal256? let mut chunks = chunks; for chunk in chunks.iter_mut() { *chunk = std::mem::take( @@ -381,13 +372,45 @@ impl Series { .to_boxed(); } - // @NOTE: We cannot cast here as that will lower the scale. let s = Int128Chunked::from_chunks(name, chunks) - .into_decimal_unchecked(Some(*precision), *scale) + .into_decimal_unchecked(*precision, *scale) .into_series(); Ok(s) }) }, + ArrowDataType::Decimal256(precision, scale) => { + use arrow::types::i256; + + polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; + + let mut chunks = chunks; + for chunk in chunks.iter_mut() { + let arr = std::mem::take( + chunk + .as_any_mut() + .downcast_mut::>() + .unwrap(), + ); + let arr_128: PrimitiveArray = arr.iter().map(|opt_v| { + if let Some(v) = opt_v { + let smaller: Option = (*v).try_into().ok(); + let smaller = smaller.filter(|v| dec128_fits(*v, *precision)); + smaller.ok_or_else(|| { + polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars") + }).map(Some) + } else { + Ok(None) + } + }).try_collect_arr_trusted()?; + + *chunk = arr_128.to(ArrowDataType::Int128).to_boxed(); + } + + let s = Int128Chunked::from_chunks(name, chunks) + .into_decimal_unchecked(*precision, *scale) + .into_series(); + Ok(s) + }, ArrowDataType::Null => Ok(new_null(name, &chunks)), #[cfg(not(feature = "dtype-categorical"))] ArrowDataType::Dictionary(_, _, _) => { diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 96669dee89d0..14db9d3556bf 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -84,7 +84,7 @@ impl SeriesWrap { ListChunked::from_chunks_and_dtype_unchecked( agg_s.name().clone(), vec![Box::new(new_arr)], - DataType::List(Box::new(DataType::Decimal(precision, Some(scale)))), + DataType::List(Box::new(DataType::NewDecimal(precision, scale))), ) .into_series() } @@ -393,21 +393,21 @@ impl SeriesTrait for SeriesWrap { fn sum_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let sum = ca.sum(); - let DataType::Decimal(_, Some(scale)) = self.dtype() else { + let DataType::NewDecimal(prec, scale) = self.dtype() else { unreachable!() }; - let av = AnyValue::Decimal(sum.unwrap(), *scale); + let av = AnyValue::NewDecimal(sum.unwrap(), *prec, *scale); Scalar::new(self.dtype().clone(), av) })) } fn min_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let min = ca.min(); - let DataType::Decimal(_, Some(scale)) = self.dtype() else { + let DataType::NewDecimal(prec, scale) = self.dtype() else { unreachable!() }; let av = if let Some(min) = min { - AnyValue::Decimal(min, *scale) + AnyValue::NewDecimal(min, *prec, *scale) } else { AnyValue::Null }; @@ -417,11 +417,11 @@ impl SeriesTrait for SeriesWrap { fn max_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let max = ca.max(); - let DataType::Decimal(_, Some(scale)) = self.dtype() else { + let DataType::NewDecimal(prec, scale) = self.dtype() else { unreachable!() }; let av = if let Some(m) = max { - AnyValue::Decimal(m, *scale) + AnyValue::NewDecimal(m, *prec, *scale) } else { AnyValue::Null }; diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index 5b7d24609815..53d212d2dec5 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -150,7 +150,7 @@ impl Series { ) .unwrap(), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => self.decimal().unwrap().physical().chunks()[chunk_idx] + DataType::NewDecimal(_, _) => self.decimal().unwrap().physical().chunks()[chunk_idx] .as_any() .downcast_ref::>() .unwrap() diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 6881ca7d1a1f..f78e89965370 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -37,6 +37,7 @@ pub use from::*; pub use iterator::{SeriesIter, SeriesPhysIter}; use num_traits::NumCast; use polars_error::feature_gated; +use polars_utils::float::IsFloat; pub use series_trait::{IsSorted, *}; use crate::POOL; @@ -505,8 +506,12 @@ impl Series { use DataType as D; match (self.dtype(), dtype) { #[cfg(feature = "dtype-decimal")] - (D::Int128, D::Decimal(precision, scale)) => { - self.clone().into_decimal(*precision, scale.unwrap()) + (D::Int128, D::NewDecimal(precision, scale)) => { + let ca = self.i128().unwrap(); + Ok(ca + .clone() + .into_decimal_unchecked(*precision, *scale) + .into_series()) }, #[cfg(feature = "dtype-categorical")] @@ -585,7 +590,7 @@ impl Series { /// first cast to `Int64` to prevent overflow issues. pub fn sum(&self) -> PolarsResult where - T: NumCast, + T: NumCast + IsFloat, { let sum = self.sum_reduce()?; let sum = sum.value().extract().unwrap(); @@ -596,7 +601,7 @@ impl Series { /// Returns an option because the array is nullable. pub fn min(&self) -> PolarsResult> where - T: NumCast, + T: NumCast + IsFloat, { let min = self.min_reduce()?; let min = min.value().extract::(); @@ -607,7 +612,7 @@ impl Series { /// Returns an option because the array is nullable. pub fn max(&self) -> PolarsResult> where - T: NumCast, + T: NumCast + IsFloat, { let max = self.max_reduce()?; let max = max.value().extract::(); @@ -725,7 +730,7 @@ impl Series { }) }, #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()), + NewDecimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()), List(_) => match self.list().unwrap().to_physical_repr() { Cow::Borrowed(_) => Cow::Borrowed(self), Cow::Owned(ca) => Cow::Owned(ca.into_series()), @@ -811,11 +816,7 @@ impl Series { } #[cfg(feature = "dtype-decimal")] - pub(crate) fn into_decimal( - self, - precision: Option, - scale: usize, - ) -> PolarsResult { + pub(crate) fn into_decimal(self, precision: usize, scale: usize) -> PolarsResult { match self.dtype() { DataType::Int128 => Ok(self .i128() @@ -823,9 +824,8 @@ impl Series { .clone() .into_decimal(precision, scale)? .into_series()), - DataType::Decimal(cur_prec, cur_scale) - if (cur_prec.is_none() || precision.is_none() || *cur_prec == precision) - && *cur_scale == Some(scale) => + DataType::NewDecimal(cur_prec, cur_scale) + if scale == *cur_scale && precision >= *cur_prec => { Ok(self) }, @@ -1171,23 +1171,23 @@ mod test { #[cfg(feature = "dtype-decimal")] fn series_append_decimal() { let s1 = Series::new("a".into(), &[1.1, 2.3]) - .cast(&DataType::Decimal(None, Some(2))) + .cast(&DataType::NewDecimal(38, 2)) .unwrap(); let s2 = Series::new("b".into(), &[3]) - .cast(&DataType::Decimal(None, Some(0))) + .cast(&DataType::NewDecimal(38, 0)) .unwrap(); { let mut s1 = s1.clone(); s1.append(&s2).unwrap(); assert_eq!(s1.len(), 3); - assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 2)); + assert_eq!(s1.get(2).unwrap(), AnyValue::NewDecimal(300, 38, 2)); } { let mut s2 = s2; s2.extend(&s1).unwrap(); - assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 0)); + assert_eq!(s2.get(2).unwrap(), AnyValue::NewDecimal(2, 38, 0)); } } diff --git a/crates/polars-core/src/series/ops/downcast.rs b/crates/polars-core/src/series/ops/downcast.rs index 8afe3c14d827..d44d80e0b43e 100644 --- a/crates/polars-core/src/series/ops/downcast.rs +++ b/crates/polars-core/src/series/ops/downcast.rs @@ -151,7 +151,7 @@ impl Series { /// Unpack to [`ChunkedArray`] of dtype [`DataType::Decimal`] #[cfg(feature = "dtype-decimal")] pub fn try_decimal(&self) -> Option<&DecimalChunked> { - try_unpack_chunked!(self, DataType::Decimal(_, _) => DecimalChunked) + try_unpack_chunked!(self, DataType::NewDecimal(_, _) => DecimalChunked) } /// Unpack to [`ChunkedArray`] of dtype list diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index fed384336bdd..8c430a7f0568 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -45,8 +45,8 @@ impl Series { .into_time() .into_series(), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => Int128Chunked::full_null(name, size) - .into_decimal_unchecked(*precision, scale.unwrap_or(0)) + DataType::NewDecimal(precision, scale) => Int128Chunked::full_null(name, size) + .into_decimal_unchecked(*precision, *scale) .into_series(), #[cfg(feature = "dtype-struct")] DataType::Struct(fields) => { diff --git a/crates/polars-core/src/utils/supertype.rs b/crates/polars-core/src/utils/supertype.rs index 63029a9b65e7..4bf92bea7cda 100644 --- a/crates/polars-core/src/utils/supertype.rs +++ b/crates/polars-core/src/utils/supertype.rs @@ -468,13 +468,34 @@ pub fn get_supertype_with_options( Some(Struct(new_fields)) } #[cfg(feature = "dtype-decimal")] - (Decimal(p1, s1), Decimal(p2, s2)) => { - Some(Decimal((*p1).zip(*p2).map(|(p1, p2)| p1.max(p2)), (*s1).max(*s2))) - } + (NewDecimal(p1, s1), NewDecimal(p2, s2)) => { + Some(NewDecimal((*p1).max(*p2), (*s1).max(*s2))) + }, #[cfg(feature = "dtype-decimal")] - (Decimal(_, _), f @ (Float32 | Float64)) => Some(f.clone()), + (NewDecimal(_, _), Float32 | Float64) => Some(Float64), #[cfg(feature = "dtype-decimal")] - (d @ Decimal(_, _), dt) if dt.is_signed_integer() || dt.is_unsigned_integer() => Some(d.clone()), + (NewDecimal(prec, scale), dt) if dt.is_signed_integer() || dt.is_unsigned_integer() => { + use polars_compute::decimal::{i128_to_dec128, DEC128_MAX_PREC}; + let fits = |v| { i128_to_dec128(v, *prec, *scale).is_some() }; + let fits_orig_prec_scale = match dt { + UInt8 => fits(u8::MAX as i128), + UInt16 => fits(u16::MAX as i128), + UInt32 => fits(u32::MAX as i128), + UInt64 => fits(u64::MAX as i128), + UInt128 => false, + Int8 => fits(i8::MAX as i128), + Int16 => fits(i16::MAX as i128), + Int32 => fits(i32::MAX as i128), + Int64 => fits(i64::MAX as i128), + Int128 => false, + _ => unreachable!(), + }; + if fits_orig_prec_scale { + Some(NewDecimal(*prec, *scale)) + } else { + Some(NewDecimal(DEC128_MAX_PREC, *scale)) + } + } _ => None, } } diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index 639efbaa17f6..5021f6e25cd5 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -74,7 +74,7 @@ pub fn apply_operator(left: &Column, right: &Column, op: Operator) -> PolarsResu Operator::Divide => left / right, Operator::TrueDivide => match left.dtype() { #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => left / right, + NewDecimal(_, _) => left / right, Duration(_) | Date | Datetime(_, _) | Float32 | Float64 => left / right, #[cfg(feature = "dtype-array")] Array(..) => left / right, diff --git a/crates/polars-expr/src/groups/mod.rs b/crates/polars-expr/src/groups/mod.rs index c78c61c00940..5778e5c44f05 100644 --- a/crates/polars-expr/src/groups/mod.rs +++ b/crates/polars-expr/src/groups/mod.rs @@ -83,7 +83,7 @@ pub fn new_hash_grouper(key_schema: Arc) -> Box { }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => { + DataType::NewDecimal(_, _) => { Box::new(single_key::SingleKeyHashGrouper::::new()) }, #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-expr/src/hash_keys.rs b/crates/polars-expr/src/hash_keys.rs index 84c28ea00bdd..cadff93f9bbe 100644 --- a/crates/polars-expr/src/hash_keys.rs +++ b/crates/polars-expr/src/hash_keys.rs @@ -27,7 +27,7 @@ pub fn hash_keys_variant_for_dtype(dt: &DataType) -> HashKeysVariant { dt if dt.is_primitive_numeric() | dt.is_temporal() => HashKeysVariant::Single, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => HashKeysVariant::Single, + DataType::NewDecimal(_, _) => HashKeysVariant::Single, #[cfg(feature = "dtype-categorical")] DataType::Enum(_, _) | DataType::Categorical(_, _) => HashKeysVariant::Single, @@ -76,7 +76,7 @@ macro_rules! downcast_single_key_ca { DataType::Duration(..) => { let $ca = $self.duration().unwrap().physical(); $($body)* }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(..) => { let $ca = $self.decimal().unwrap().physical(); $($body)* }, + DataType::NewDecimal(..) => { let $ca = $self.decimal().unwrap().physical(); $($body)* }, #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { match dt.cat_physical().unwrap() { diff --git a/crates/polars-expr/src/hot_groups/mod.rs b/crates/polars-expr/src/hot_groups/mod.rs index a7a19be0c2de..b527f9b88205 100644 --- a/crates/polars-expr/src/hot_groups/mod.rs +++ b/crates/polars-expr/src/hot_groups/mod.rs @@ -83,7 +83,7 @@ pub fn new_hash_hot_grouper(key_schema: Arc, num_groups: usize) -> Box Box::new(SK::::new(dt, ng)), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => Box::new(SK::::new(dt, ng)), + DataType::NewDecimal(_, _) => Box::new(SK::::new(dt, ng)), #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| { diff --git a/crates/polars-expr/src/idx_table/mod.rs b/crates/polars-expr/src/idx_table/mod.rs index b61e38402d7c..5e8daeb0e992 100644 --- a/crates/polars-expr/src/idx_table/mod.rs +++ b/crates/polars-expr/src/idx_table/mod.rs @@ -104,7 +104,7 @@ pub fn new_idx_table(key_schema: Arc) -> Box { DataType::Time => Box::new(SKIT::::new()), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => Box::new(SKIT::::new()), + DataType::NewDecimal(_, _) => Box::new(SKIT::::new()), #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| { diff --git a/crates/polars-expr/src/reduce/mean.rs b/crates/polars-expr/src/reduce/mean.rs index 44aa4a500ffb..34879e57b5a0 100644 --- a/crates/polars-expr/src/reduce/mean.rs +++ b/crates/polars-expr/src/reduce/mean.rs @@ -17,7 +17,7 @@ pub fn new_mean_reduction(dtype: DataType) -> Box { }) }, #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => Box::new(VGR::new(dtype, NumMeanReducer::(PhantomData))), + NewDecimal(_, _) => Box::new(VGR::new(dtype, NumMeanReducer::(PhantomData))), // For compatibility with the current engine, should probably be an error. String | Binary => Box::new(super::NullGroupedReduction::new(dtype)), @@ -43,8 +43,8 @@ fn finish_output(values: Vec<(f64, usize)>, dtype: &DataType) -> Series { ca.into_series() }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_prec, scale) => { - let inv_scale_factor = 1.0 / 10u128.pow(scale.unwrap() as u32) as f64; + DataType::NewDecimal(_prec, scale) => { + let inv_scale_factor = 1.0 / 10u128.pow(*scale as u32) as f64; let ca: Float64Chunked = values .into_iter() .map(|(s, c)| (c != 0).then(|| s / c as f64 * inv_scale_factor)) diff --git a/crates/polars-expr/src/reduce/min_max.rs b/crates/polars-expr/src/reduce/min_max.rs index a199230888a6..44a6991c2c09 100644 --- a/crates/polars-expr/src/reduce/min_max.rs +++ b/crates/polars-expr/src/reduce/min_max.rs @@ -36,7 +36,7 @@ pub fn new_min_reduction(dtype: DataType, propagate_nans: bool) -> Box Box::new(VMGR::new(dtype, NumReducer::>::new())), + NewDecimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::>::new())), #[cfg(feature = "dtype-categorical")] Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| { Box::new(VMGR::new(dtype.clone(), CatMinReducer::<$C>(map.clone(), PhantomData))) @@ -68,7 +68,7 @@ pub fn new_max_reduction(dtype: DataType, propagate_nans: bool) -> Box Box::new(VMGR::new(dtype, NumReducer::>::new())), + NewDecimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::>::new())), #[cfg(feature = "dtype-categorical")] Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| { Box::new(VMGR::new(dtype.clone(), CatMaxReducer::<$C>(map.clone(), PhantomData))) diff --git a/crates/polars-expr/src/reduce/sum.rs b/crates/polars-expr/src/reduce/sum.rs index 9d2fe4cf8e8d..57214d8eabcd 100644 --- a/crates/polars-expr/src/reduce/sum.rs +++ b/crates/polars-expr/src/reduce/sum.rs @@ -53,7 +53,7 @@ pub fn new_sum_reduction(dtype: DataType) -> Box { }) }, #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), + NewDecimal(_, _) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), Duration(_) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), // For compatibility with the current engine, should probably be an error. String | Binary => Box::new(super::NullGroupedReduction::new(dtype)), diff --git a/crates/polars-expr/src/reduce/var_std.rs b/crates/polars-expr/src/reduce/var_std.rs index f48508e2f86b..977ccaad86b5 100644 --- a/crates/polars-expr/src/reduce/var_std.rs +++ b/crates/polars-expr/src/reduce/var_std.rs @@ -22,7 +22,7 @@ pub fn new_var_std_reduction(dtype: DataType, is_std: bool, ddof: u8) -> Box Box::new(VGR::new( + NewDecimal(_, _) => Box::new(VGR::new( dtype, VarStdReducer:: { is_std, diff --git a/crates/polars-io/Cargo.toml b/crates/polars-io/Cargo.toml index b97b17535997..2cb11d07c7b4 100644 --- a/crates/polars-io/Cargo.toml +++ b/crates/polars-io/Cargo.toml @@ -9,6 +9,7 @@ repository = { workspace = true } description = "IO related logic for the Polars DataFrame library" [dependencies] +polars-compute = { workspace = true } polars-core = { workspace = true } polars-error = { workspace = true } polars-json = { workspace = true, optional = true } diff --git a/crates/polars-io/src/catalog/unity/schema.rs b/crates/polars-io/src/catalog/unity/schema.rs index 8dba964a7d09..a81492140df9 100644 --- a/crates/polars-io/src/catalog/unity/schema.rs +++ b/crates/polars-io/src/catalog/unity/schema.rs @@ -219,7 +219,7 @@ fn parse_type_text(type_text: &str) -> PolarsResult { let precision: usize = precision.parse().ok()?; let scale: usize = scale.parse().ok()?; - Some(DataType::Decimal(Some(precision), Some(scale))) + Some(DataType::NewDecimal(precision, scale)) })() .ok_or_else(|| { polars_err!( @@ -297,10 +297,7 @@ fn dtype_to_type_text(dtype: &DataType) -> PolarsResult { Null => S!("null"), - Decimal(precision, scale) => { - let precision = precision.unwrap_or(38); - let scale = scale.unwrap_or(0); - + NewDecimal(precision, scale) => { format_pl_smallstr!("decimal({},{})", precision, scale) }, @@ -376,7 +373,7 @@ fn dtype_to_type_name(dtype: &DataType) -> PolarsResult { Null => S!("NULL"), - Decimal(..) => S!("DECIMAL"), + NewDecimal(..) => S!("DECIMAL"), List(inner) => { if get_list_map_type(inner).is_some() { @@ -440,7 +437,7 @@ fn dtype_to_type_json(dtype: &DataType) -> PolarsResult { Null => S!("null"), - Decimal(..) => ColumnTypeJsonType::TypeName(dtype_to_type_text(dtype)?), + NewDecimal(..) => ColumnTypeJsonType::TypeName(dtype_to_type_text(dtype)?), List(inner) => { let out = if let Some((key_type, value_type)) = get_list_map_type(inner) { diff --git a/crates/polars-io/src/csv/read/reader.rs b/crates/polars-io/src/csv/read/reader.rs index 592d335e3b4b..c978c063e928 100644 --- a/crates/polars-io/src/csv/read/reader.rs +++ b/crates/polars-io/src/csv/read/reader.rs @@ -219,15 +219,10 @@ pub fn prepare_csv_schema( PolarsResult::Ok(fld) }, #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => match (precision, scale) { - (_, Some(_)) => { - fields_to_cast.push(fld.clone()); - fld.coerce(String); - PolarsResult::Ok(fld) - }, - _ => Err(PolarsError::ComputeError( - "'scale' must be set when reading csv column as Decimal".into(), - )), + NewDecimal(_, _) => { + fields_to_cast.push(fld.clone()); + fld.coerce(String); + PolarsResult::Ok(fld) }, _ => { matched = false; diff --git a/crates/polars-io/src/csv/write/write_impl/serializer.rs b/crates/polars-io/src/csv/write/write_impl/serializer.rs index 18420f67128e..fd0d74019436 100644 --- a/crates/polars-io/src/csv/write/write_impl/serializer.rs +++ b/crates/polars-io/src/csv/write/write_impl/serializer.rs @@ -372,9 +372,9 @@ fn bool_serializer(array: &BooleanArray) -> impl Ser fn decimal_serializer(array: &PrimitiveArray, scale: usize) -> impl Serializer<'_> { let trim_zeros = arrow::compute::decimal::get_trim_decimal_zeros(); - let mut fmt_buf = arrow::compute::decimal::DecimalFmtBuffer::new(); + let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new(); let f = move |&item, buf: &mut Vec, _options: &SerializeOptions| { - buf.extend_from_slice(fmt_buf.format(item, scale, trim_zeros).as_bytes()); + buf.extend_from_slice(fmt_buf.format_dec128(item, scale, trim_zeros).as_bytes()); }; make_serializer::<_, _, false>(f, array.iter(), |array| { @@ -906,8 +906,8 @@ pub(super) fn serializer_for<'a>( }) }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, scale) => { - quote_wrapper!(decimal_serializer, scale.unwrap_or(0)) + DataType::NewDecimal(_, scale) => { + quote_wrapper!(decimal_serializer, *scale) }, _ => { polars_bail!(ComputeError: "datatype {dtype} cannot be written to CSV\n\nConsider using JSON or a binary format.") diff --git a/crates/polars-json/src/json/write/serialize.rs b/crates/polars-json/src/json/write/serialize.rs index 2a5cbbc31959..778f5a77ebbf 100644 --- a/crates/polars-json/src/json/write/serialize.rs +++ b/crates/polars-json/src/json/write/serialize.rs @@ -122,10 +122,10 @@ fn decimal_serializer<'a>( take: usize, ) -> Box + 'a + Send + Sync> { let trim_zeros = get_trim_decimal_zeros(); - let mut fmt_buf = arrow::compute::decimal::DecimalFmtBuffer::new(); + let mut fmt_buf = polars_compute::decimal::DecimalFmtBuffer::new(); let f = move |x: Option<&i128>, buf: &mut Vec| { if let Some(x) = x { - utf8::write_str(buf, fmt_buf.format(*x, scale, trim_zeros)).unwrap() + utf8::write_str(buf, fmt_buf.format_dec128(*x, scale, trim_zeros)).unwrap() } else { buf.extend(b"null") } diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index 2321ad455283..b2cb878710b3 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -180,7 +180,7 @@ impl TakeChunked for Series { #[cfg(feature = "object")] Object(_) => take_unchecked_object(self, by, sorted), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => { + NewDecimal(_, _) => { let ca = self.decimal().unwrap(); let out = ca.phys.take_chunked_unchecked(by, sorted, avoid_sharing); out.into_decimal_unchecked(ca.precision(), ca.scale()) @@ -280,7 +280,7 @@ impl TakeChunked for Series { #[cfg(feature = "object")] Object(_) => take_opt_unchecked_object(self, by, avoid_sharing), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => { + NewDecimal(_, _) => { let ca = self.decimal().unwrap(); let out = ca.phys.take_opt_chunked_unchecked(by, avoid_sharing); out.into_decimal_unchecked(ca.precision(), ca.scale()) diff --git a/crates/polars-ops/src/series/ops/abs.rs b/crates/polars-ops/src/series/ops/abs.rs index 0046b8031fcc..19c87cc3ef1d 100644 --- a/crates/polars-ops/src/series/ops/abs.rs +++ b/crates/polars-ops/src/series/ops/abs.rs @@ -15,7 +15,7 @@ pub fn abs(s: &Series) -> PolarsResult { Float32 => s.f32().unwrap().wrapping_abs().into_series(), Float64 => s.f64().unwrap().wrapping_abs().into_series(), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => { + NewDecimal(_, _) => { let ca = s.decimal().unwrap(); let precision = ca.precision(); let scale = ca.scale(); diff --git a/crates/polars-ops/src/series/ops/clip.rs b/crates/polars-ops/src/series/ops/clip.rs index 2d7b21a05c93..85cafe80050a 100644 --- a/crates/polars-ops/src/series/ops/clip.rs +++ b/crates/polars-ops/src/series/ops/clip.rs @@ -43,9 +43,9 @@ pub fn clip(s: &Series, min: &Series, max: &Series) -> PolarsResult { let out = clip_helper_both_bounds(ca, min, max).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); - Ok(phys.into_decimal_unchecked(*precision, scale.unwrap()).into_series()) + Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, dt if dt.is_logical() => out.cast(original_type), _ => Ok(out) @@ -77,9 +77,9 @@ pub fn clip_max(s: &Series, max: &Series) -> PolarsResult { let out = clip_helper_single_bound(ca, max, num_traits::clamp_max).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); - Ok(phys.into_decimal_unchecked(*precision, scale.unwrap()).into_series()) + Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, dt if dt.is_logical() => out.cast(original_type), _ => Ok(out) @@ -111,9 +111,9 @@ pub fn clip_min(s: &Series, min: &Series) -> PolarsResult { let out = clip_helper_single_bound(ca, min, num_traits::clamp_min).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); - Ok(phys.into_decimal_unchecked(*precision, scale.unwrap()).into_series()) + Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, dt if dt.is_logical() => out.cast(original_type), _ => Ok(out) diff --git a/crates/polars-ops/src/series/ops/cum_agg.rs b/crates/polars-ops/src/series/ops/cum_agg.rs index 4ac900107d51..014bec6dc825 100644 --- a/crates/polars-ops/src/series/ops/cum_agg.rs +++ b/crates/polars-ops/src/series/ops/cum_agg.rs @@ -214,6 +214,32 @@ where cum_scan_numeric(ca, reverse, init, det_sum) } +#[cfg(feature = "dtype-decimal")] +fn cum_sum_decimal( + ca: &Int128Chunked, + reverse: bool, + init: Option, +) -> PolarsResult { + use polars_compute::decimal::{DEC128_MAX_PREC, dec128_add}; + + let mut value = init.unwrap_or(0); + let update = |opt_v| { + if let Some(v) = opt_v { + value = dec128_add(value, v, DEC128_MAX_PREC).ok_or_else( + || polars_err!(ComputeError: "overflow in decimal addition in cum_sum"), + )?; + Ok(Some(value)) + } else { + Ok(None) + } + }; + if reverse { + ca.iter().rev().map(update).try_collect_ca_trusted_like(ca) + } else { + ca.iter().map(update).try_collect_ca_trusted_like(ca) + } +} + fn cum_prod_numeric( ca: &ChunkedArray, reverse: bool, @@ -285,10 +311,11 @@ pub fn cum_sum_with_init( Float32 => cum_sum_numeric(s.f32()?, reverse, init.extract()).into_series(), Float64 => cum_sum_numeric(s.f64()?, reverse, init.extract()).into_series(), #[cfg(feature = "dtype-decimal")] - Decimal(precision, scale) => { + NewDecimal(_precision, scale) => { + use polars_compute::decimal::DEC128_MAX_PREC; let ca = s.decimal().unwrap().physical(); - cum_sum_numeric(ca, reverse, init.clone().to_physical().extract()) - .into_decimal_unchecked(*precision, scale.unwrap()) + cum_sum_decimal(ca, reverse, init.clone().to_physical().extract())? + .into_decimal_unchecked(DEC128_MAX_PREC, *scale) .into_series() }, #[cfg(feature = "dtype-duration")] @@ -320,10 +347,10 @@ pub fn cum_min_with_init( Ok(cum_min_bool(s.bool()?, reverse, init.extract_bool()).into_series()) }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let ca = s.decimal().unwrap().physical(); let out = cum_min_numeric(ca, reverse, init.clone().to_physical().extract()) - .into_decimal_unchecked(*precision, scale.unwrap()) + .into_decimal_unchecked(*precision, *scale) .into_series(); Ok(out) }, @@ -358,10 +385,10 @@ pub fn cum_max_with_init( Ok(cum_max_bool(s.bool()?, reverse, init.extract_bool()).into_series()) }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let ca = s.decimal().unwrap().physical(); let out = cum_max_numeric(ca, reverse, init.clone().to_physical().extract()) - .into_decimal_unchecked(*precision, scale.unwrap()) + .into_decimal_unchecked(*precision, *scale) .into_series(); Ok(out) }, diff --git a/crates/polars-ops/src/series/ops/index_of.rs b/crates/polars-ops/src/series/ops/index_of.rs index e1580db70a21..9fea4e9c5c43 100644 --- a/crates/polars-ops/src/series/ops/index_of.rs +++ b/crates/polars-ops/src/series/ops/index_of.rs @@ -151,7 +151,7 @@ pub fn index_of(series: &Series, needle: Scalar) -> PolarsResult> // to_physical #[cfg(feature = "dtype-decimal")] - DT::Decimal(..) => unreachable!(), + DT::NewDecimal(..) => unreachable!(), #[cfg(feature = "dtype-categorical")] DT::Categorical(..) | DT::Enum(..) => unreachable!(), DT::Date | DT::Datetime(..) | DT::Duration(..) | DT::Time => unreachable!(), diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index f3bc1f3af9f1..53fd15fbc9f8 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -127,7 +127,9 @@ fn interpolate_nearest(s: &Series) -> Series { let out = downcast_as_macro_arg_physical!(s, dispatch); match logical { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => unsafe { out.from_physical_unchecked(logical).unwrap() }, + DataType::NewDecimal(_, _) => unsafe { + out.from_physical_unchecked(logical).unwrap() + }, _ => out.cast(logical).unwrap(), } }, @@ -149,7 +151,7 @@ fn interpolate_linear(s: &Series) -> Series { #[cfg(feature = "dtype-decimal")] { - if matches!(logical, DataType::Decimal(_, _)) { + if matches!(logical, DataType::NewDecimal(_, _)) { let out = linear_interp_signed(s.i128().unwrap()); return unsafe { out.from_physical_unchecked(logical).unwrap() }; } diff --git a/crates/polars-ops/src/series/ops/is_in.rs b/crates/polars-ops/src/series/ops/is_in.rs index 722e7e6ea0b6..cf7f110f4022 100644 --- a/crates/polars-ops/src/series/ops/is_in.rs +++ b/crates/polars-ops/src/series/ops/is_in.rs @@ -509,36 +509,40 @@ fn is_in_decimal( other: &Series, nulls_equal: bool, ) -> PolarsResult { - let Some(DataType::Decimal(_, other_scale)) = other.dtype().inner_dtype() else { + let Some(DataType::NewDecimal(other_precision, other_scale)) = other.dtype().inner_dtype() + else { polars_bail!(opq = is_in, ca_in.dtype(), other.dtype()); }; - let other_scale = other_scale.unwrap(); - let scale = ca_in.scale().max(other_scale); - let ca_in = ca_in.to_scale(scale)?; + let prec = ca_in.precision().max(*other_precision); + let scale = ca_in.scale().max(*other_scale); + + // We convert both sides to a common scale, mapping any out-of-range values to unique integers, + // allowing us to then use is_in on the integer representation. + let sentinel_in = i128::MAX; + let sentinel_other = i128::MAX - 1; + let ca_in_phys = ca_in.into_phys_with_prec_scale_or_sentinel(prec, scale, sentinel_in); match other.dtype() { DataType::List(_) => { let other = other.list()?; let other = other.apply_to_inner(&|s| { let s = s.decimal()?; - let s = s.to_scale(scale)?; - let s = s.physical(); + let s = s.into_phys_with_prec_scale_or_sentinel(prec, scale, sentinel_other); Ok(s.to_owned().into_series()) })?; let other = other.into_series(); - is_in_numeric(ca_in.physical(), &other, nulls_equal) + is_in_numeric(&ca_in_phys, &other, nulls_equal) }, #[cfg(feature = "dtype-array")] DataType::Array(_, _) => { let other = other.array()?; let other = other.apply_to_inner(&|s| { let s = s.decimal()?; - let s = s.to_scale(scale)?; - let s = s.physical(); + let s = s.into_phys_with_prec_scale_or_sentinel(prec, scale, sentinel_other); Ok(s.to_owned().into_series()) })?; let other = other.into_series(); - is_in_numeric(ca_in.physical(), &other, nulls_equal) + is_in_numeric(&ca_in_phys, &other, nulls_equal) }, _ => unreachable!(), } @@ -647,7 +651,7 @@ pub fn is_in(s: &Series, other: &Series, nulls_equal: bool) -> PolarsResult is_in_null(s, other, nulls_equal), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => { + DataType::NewDecimal(_, _) => { let ca_in = s.decimal()?; is_in_decimal(ca_in, other, nulls_equal) }, diff --git a/crates/polars-ops/src/series/ops/negate.rs b/crates/polars-ops/src/series/ops/negate.rs index d47698e09573..95c2b26b143d 100644 --- a/crates/polars-ops/src/series/ops/negate.rs +++ b/crates/polars-ops/src/series/ops/negate.rs @@ -12,7 +12,7 @@ pub fn negate(s: &Series) -> PolarsResult { Float32 => s.f32().unwrap().wrapping_neg().into_series(), Float64 => s.f64().unwrap().wrapping_neg().into_series(), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => { + NewDecimal(_, _) => { let ca = s.decimal().unwrap(); let precision = ca.precision(); let scale = ca.scale(); diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs b/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs index 09cd0b8b3625..6eb14d7d070c 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs @@ -502,7 +502,7 @@ impl<'a> FieldsMapper<'a> { #[cfg(feature = "dtype-time")] dt @ DataType::Time => dt.clone(), #[cfg(feature = "dtype-decimal")] - DataType::Decimal(..) => DataType::Float64, + DataType::NewDecimal(..) => DataType::Float64, // All other types get mapped to a single `null` of the same type. dt => dt.clone(), @@ -530,7 +530,7 @@ impl<'a> FieldsMapper<'a> { let should_coerce = match dt { DataType::Float32 => false, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(..) => coerce_decimal, + DataType::NewDecimal(..) => coerce_decimal, DataType::Boolean => true, dt => dt.is_primitive_numeric(), }; diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs b/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs index a487c180fa8f..be683e809ee9 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs @@ -1,6 +1,8 @@ use std::borrow::Cow; use arrow::legacy::utils::CustomIterTools; +#[cfg(feature = "dtype-decimal")] +use polars_compute::decimal::DEC128_MAX_PREC; use polars_core::utils::handle_casting_failures; #[cfg(feature = "dtype-struct")] use polars_utils::format_pl_smallstr; @@ -191,7 +193,7 @@ impl IRStringFunction { #[cfg(feature = "nightly")] Titlecase => mapper.with_same_dtype(), #[cfg(feature = "dtype-decimal")] - ToDecimal { scale } => mapper.with_dtype(DataType::Decimal(None, Some(*scale))), + ToDecimal { scale } => mapper.with_dtype(DataType::NewDecimal(DEC128_MAX_PREC, *scale)), #[cfg(feature = "string_encoding")] HexEncode => mapper.with_same_dtype(), #[cfg(feature = "binary_encoding")] @@ -1201,7 +1203,7 @@ pub(super) fn base64_decode(s: &Column, strict: bool) -> PolarsResult { #[cfg(feature = "dtype-decimal")] pub(super) fn to_decimal(s: &Column, scale: usize) -> PolarsResult { let ca = s.str()?; - ca.to_decimal(scale).map(Column::from) + ca.to_decimal(DEC128_MAX_PREC, scale).map(Column::from) } #[cfg(feature = "extract_jsonpath")] diff --git a/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs b/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs index 072b25498638..e0b431a0b41f 100644 --- a/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs +++ b/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs @@ -60,7 +60,7 @@ pub fn aexpr_to_column_predicates( continue; }, #[cfg(feature = "dtype-decimal")] - D::Decimal(_, _) => { + D::NewDecimal(_, _) => { is_sumwise_complete = false; continue; }, diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index 2823d8680aae..bd4cd3e8bb5e 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -1,7 +1,5 @@ #[cfg(feature = "dtype-decimal")] -use polars_core::chunked_array::arithmetic::{ - _get_decimal_scale_add_sub, _get_decimal_scale_div, _get_decimal_scale_mul, -}; +use polars_compute::decimal::DEC128_MAX_PREC; use polars_utils::format_pl_smallstr; use recursive::recursive; @@ -492,9 +490,8 @@ fn get_arithmetic_field( )?) }, #[cfg(feature = "dtype-decimal")] - (Decimal(_, Some(scale_left)), Decimal(_, Some(scale_right))) => { - let scale = _get_decimal_scale_add_sub(*scale_left, *scale_right); - Decimal(None, Some(scale)) + (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { + NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, (left, right) => try_get_supertype(left, right)?, } @@ -555,9 +552,8 @@ fn get_arithmetic_field( )?) }, #[cfg(feature = "dtype-decimal")] - (Decimal(_, Some(scale_left)), Decimal(_, Some(scale_right))) => { - let scale = _get_decimal_scale_add_sub(*scale_left, *scale_right); - Decimal(None, Some(scale)) + (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { + NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, (left, right) => try_get_supertype(left, right)?, } @@ -605,18 +601,8 @@ fn get_arithmetic_field( }, }, #[cfg(feature = "dtype-decimal")] - (Decimal(_, Some(scale_left)), Decimal(_, Some(scale_right))) => { - let scale = match op { - Operator::Multiply => _get_decimal_scale_mul(*scale_left, *scale_right), - Operator::Divide | Operator::TrueDivide => { - _get_decimal_scale_div(*scale_left) - }, - _ => { - debug_assert!(false); - *scale_left - }, - }; - let dtype = Decimal(None, Some(scale)); + (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { + let dtype = NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)); left_field.coerce(dtype); return Ok(left_field); }, @@ -770,9 +756,8 @@ fn get_truediv_dtype(left_dtype: &DataType, right_dtype: &DataType) -> PolarsRes InvalidOperation: "division with 'String' datatypes is not allowed" ), #[cfg(feature = "dtype-decimal")] - (Decimal(_, Some(scale_left)), Decimal(_, _)) => { - let scale = _get_decimal_scale_div(*scale_left); - Decimal(None, Some(scale)) + (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { + NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, #[cfg(feature = "dtype-u8")] (UInt8 | Int8, Float32) => Float32, diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs index d84f276debb6..f72b3d0af075 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs @@ -94,14 +94,14 @@ See https://github.com/pola-rs/polars/issues/22149 for more information." }, #[cfg(feature = "dtype-decimal")] - (DataType::Decimal(_, _), dt) if dt.is_primitive_numeric() => { + (DataType::NewDecimal(_, _), dt) if dt.is_primitive_numeric() => { IsInTypeCoercionResult::OtherCast { dtype: cast_type, strict: false, } }, #[cfg(feature = "dtype-decimal")] - (DataType::Decimal(_, _), _) | (_, DataType::Decimal(_, _)) => { + (DataType::NewDecimal(_, _), _) | (_, DataType::NewDecimal(_, _)) => { polars_bail!(InvalidOperation: "'{op}' cannot check for {:?} values in {:?} data", &type_other, &type_left) }, // can't check for more granular time_unit in less-granular time_unit data, diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index 26b168893230..d49b55e6bf39 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -12,6 +12,7 @@ use polars::chunked_array::object::PolarsObjectSafe; use polars::datatypes::OwnedObject; use polars::datatypes::{DataType, Field, TimeUnit}; use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone}; +use polars_compute::decimal::{DEC128_MAX_PREC, DecimalFmtBuffer, dec128_fits}; use polars_core::utils::any_values_to_supertype_and_n_dtypes; use polars_core::utils::arrow::temporal_conversions::date32_to_date; use polars_utils::aliases::PlFixedStateQuality; @@ -27,7 +28,7 @@ use pyo3::{IntoPyObjectExt, PyTypeCheck, intern}; use super::datetime::{ datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, }; -use super::{ObjectValue, Wrap, decimal_to_digits, struct_dict}; +use super::{ObjectValue, Wrap, struct_dict}; use crate::error::PyPolarsErr; use crate::py_modules::{pl_series, pl_utils}; use crate::series::PySeries; @@ -120,19 +121,11 @@ pub(crate) fn any_value_into_py_object<'py>( }, AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py), AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py), - AnyValue::Decimal(v, scale) => { + AnyValue::NewDecimal(v, prec, scale) => { let convert = utils.getattr(intern!(py, "to_py_decimal"))?; - const N: usize = 3; - let mut buf = [0_u128; N]; - let n_digits = decimal_to_digits(v.abs(), &mut buf); - let buf = unsafe { - std::slice::from_raw_parts( - buf.as_slice().as_ptr() as *const u8, - N * size_of::(), - ) - }; - let digits = PyTuple::new(py, buf.iter().take(n_digits))?; - convert.call1((v.is_negative() as u8, digits, n_digits, -(scale as i32))) + let mut buf = DecimalFmtBuffer::new(); + let s = buf.format_dec128(v, scale, false); + convert.call1((prec, s)) }, } } @@ -331,27 +324,17 @@ pub(crate) fn py_object_to_any_value( digits: impl IntoIterator, exp: i32, ) -> Option<(i128, usize)> { - const MAX_ABS_DEC: i128 = 10_i128.pow(38) - 1; let mut v = 0_i128; - for (i, d) in digits.into_iter().map(i128::from).enumerate() { - if i < 38 { - v = v * 10 + d; - } else { - v = v.checked_mul(10).and_then(|v| v.checked_add(d))?; - } + for d in digits { + v = v.checked_mul(10)?.checked_add(d as i128)?; } - // We only support non-negative scale (=> non-positive exponent). let scale = if exp > 0 { - // The decimal may be in a non-canonical representation, try to fix it first. - v = 10_i128 - .checked_pow(exp as u32) - .and_then(|factor| v.checked_mul(factor))?; + v = 10_i128.checked_pow(exp as u32)?.checked_mul(v)?; 0 } else { (-exp) as usize }; - // TODO: Do we care for checking if it fits in MAX_ABS_DEC? (if we set precision to None anyway?) - (v <= MAX_ABS_DEC).then_some((v, scale)) + dec128_fits(v, DEC128_MAX_PREC).then_some((v, scale)) } // Note: Using Vec is not the most efficient thing here (input is a tuple) @@ -368,7 +351,7 @@ pub(crate) fn py_object_to_any_value( if sign > 0 { v = -v; // Won't overflow since -i128::MAX > i128::MIN } - Ok(AnyValue::Decimal(v, scale)) + Ok(AnyValue::NewDecimal(v, DEC128_MAX_PREC, scale)) } fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult> { diff --git a/crates/polars-python/src/conversion/chunked_array.rs b/crates/polars-python/src/conversion/chunked_array.rs index 0f68cf329cc5..516f8ad44c11 100644 --- a/crates/polars-python/src/conversion/chunked_array.rs +++ b/crates/polars-python/src/conversion/chunked_array.rs @@ -1,13 +1,14 @@ use chrono::NaiveTime; +use polars_compute::decimal::DecimalFmtBuffer; use polars_core::utils::arrow::temporal_conversions::date32_to_date; use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyList, PyNone, PyTuple}; +use pyo3::types::{PyBytes, PyList, PyNone}; use pyo3::{BoundObject, intern}; use super::datetime::{ datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, }; -use super::{decimal_to_digits, struct_dict}; +use super::struct_dict; use crate::prelude::*; use crate::py_modules::pl_utils; @@ -138,25 +139,12 @@ pub(crate) fn decimal_to_pyobject_iter<'py, 'a>( ) -> PyResult>> + use<'py, 'a>> { let utils = pl_utils(py).bind(py); let convert = utils.getattr(intern!(py, "to_py_decimal"))?; - let py_scale = (-(ca.scale() as i32)).into_pyobject(py)?; - // if we don't know precision, the only safe bet is to set it to 39 - let py_precision = ca.precision().unwrap_or(39).into_pyobject(py)?; + let py_precision = ca.precision().into_pyobject(py)?; + let mut buf = DecimalFmtBuffer::new(); Ok(ca.physical().iter().map(move |opt_v| { opt_v.map(|v| { - // TODO! use AnyValue so that we have a single impl. - const N: usize = 3; - let mut buf = [0_u128; N]; - let n_digits = decimal_to_digits(v.abs(), &mut buf); - let buf = unsafe { - std::slice::from_raw_parts( - buf.as_slice().as_ptr() as *const u8, - N * size_of::(), - ) - }; - let digits = PyTuple::new(py, buf.iter().take(n_digits)).unwrap(); - convert - .call1((v.is_negative() as u8, digits, &py_precision, &py_scale)) - .unwrap() + let s = buf.format_dec128(v, ca.scale(), false); + convert.call1((&py_precision, s)).unwrap() }) })) } diff --git a/crates/polars-python/src/conversion/mod.rs b/crates/polars-python/src/conversion/mod.rs index d6f4fb564c0a..aebbec3d8e60 100644 --- a/crates/polars-python/src/conversion/mod.rs +++ b/crates/polars-python/src/conversion/mod.rs @@ -22,6 +22,7 @@ use polars::prelude::default_values::{ }; use polars::prelude::deletion::DeletionFilesList; use polars::series::ops::NullBehavior; +use polars_compute::decimal::dec128_verify_prec_scale; use polars_core::schema::iceberg::IcebergSchema; use polars_core::utils::arrow::array::Array; use polars_core::utils::arrow::types::NativeType; @@ -168,29 +169,6 @@ fn struct_dict<'a, 'py>( Ok(dict) } -// accept u128 array to ensure alignment is correct -fn decimal_to_digits(v: i128, buf: &mut [u128; 3]) -> usize { - const ZEROS: i128 = 0x3030_3030_3030_3030_3030_3030_3030_3030; - // SAFETY: transmute is safe as there are 48 bytes in 3 128bit ints - // and the minimal alignment of u8 fits u16 - let buf = unsafe { std::mem::transmute::<&mut [u128; 3], &mut [u8; 48]>(buf) }; - let mut buffer = itoa::Buffer::new(); - let value = buffer.format(v); - let len = value.len(); - for (dst, src) in buf.iter_mut().zip(value.as_bytes().iter()) { - *dst = *src - } - - let ptr = buf.as_mut_ptr() as *mut i128; - unsafe { - // this is safe because we know that the buffer is exactly 48 bytes long - *ptr -= ZEROS; - *ptr.add(1) -= ZEROS; - *ptr.add(2) -= ZEROS; - } - len -} - impl<'py> IntoPyObject<'py> for &Wrap { type Target = PyAny; type Output = Bound<'py, Self::Target>; @@ -248,7 +226,7 @@ impl<'py> IntoPyObject<'py> for &Wrap { let class = pl.getattr(intern!(py, "Float64"))?; class.call0() }, - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let class = pl.getattr(intern!(py, "Decimal"))?; let args = (*precision, *scale); class.call1(args) @@ -390,7 +368,6 @@ impl<'py> FromPyObject<'py> for Wrap { "Time" => DataType::Time, "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None), "Duration" => DataType::Duration(TimeUnit::Microseconds), - "Decimal" => DataType::Decimal(None, None), // "none" scale => "infer" "List" => DataType::List(Box::new(DataType::Null)), "Array" => DataType::Array(Box::new(DataType::Null), 0), "Struct" => DataType::Struct(vec![]), @@ -398,6 +375,11 @@ impl<'py> FromPyObject<'py> for Wrap { #[cfg(feature = "object")] "Object" => DataType::Object(OBJECT_NAME), "Unknown" => DataType::Unknown(Default::default()), + "Decimal" => { + return Err(PyTypeError::new_err( + "Decimal without precision/scale set is not a valid Polars datatype", + )); + }, dt => { return Err(PyTypeError::new_err(format!( "'{dt}' is not a Polars data type", @@ -456,7 +438,8 @@ impl<'py> FromPyObject<'py> for Wrap { "Decimal" => { let precision = ob.getattr(intern!(py, "precision"))?.extract()?; let scale = ob.getattr(intern!(py, "scale"))?.extract()?; - DataType::Decimal(precision, Some(scale)) + dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?; + DataType::NewDecimal(precision, scale) }, "List" => { let inner = ob.getattr(intern!(py, "inner")).unwrap(); diff --git a/crates/polars-python/src/dataframe/construction.rs b/crates/polars-python/src/dataframe/construction.rs index 62455bb9a2d0..5edf04709834 100644 --- a/crates/polars-python/src/dataframe/construction.rs +++ b/crates/polars-python/src/dataframe/construction.rs @@ -87,7 +87,7 @@ fn finish_from_rows( schema_overrides: Option, infer_schema_length: Option, ) -> PyResult { - let mut schema = if let Some(mut schema) = schema { + let schema = if let Some(mut schema) = schema { resolve_schema_overrides(&mut schema, schema_overrides); update_schema_from_rows(&mut schema, &rows, infer_schema_length)?; schema @@ -95,11 +95,6 @@ fn finish_from_rows( rows_to_schema_supertypes(&rows, infer_schema_length).map_err(PyPolarsErr::from)? }; - // TODO: Remove this step when Decimals are supported properly. - // Erasing the decimal precision/scale here will just require us to infer it again later. - // https://github.com/pola-rs/polars/issues/14427 - erase_decimal_precision_scale(&mut schema); - let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(PyPolarsErr::from)?; Ok(df.into()) } @@ -142,15 +137,6 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option(column_names: I) -> Schema where I: IntoIterator, diff --git a/crates/polars-python/src/interop/numpy/to_numpy_series.rs b/crates/polars-python/src/interop/numpy/to_numpy_series.rs index a40d6a738410..9bd40eb4349d 100644 --- a/crates/polars-python/src/interop/numpy/to_numpy_series.rs +++ b/crates/polars-python/src/interop/numpy/to_numpy_series.rs @@ -257,7 +257,7 @@ fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> PyOb PyArray1::from_iter(py, values).into_py_any(py).unwrap() }) }, - Decimal(_, _) => { + NewDecimal(_, _) => { let ca = s.decimal().unwrap(); let values = decimal_to_pyobject_iter(py, ca) .unwrap() diff --git a/crates/polars-python/src/series/comparison.rs b/crates/polars-python/src/series/comparison.rs index 8249d47a5c3c..f6874e5829da 100644 --- a/crates/polars-python/src/series/comparison.rs +++ b/crates/polars-python/src/series/comparison.rs @@ -1,3 +1,4 @@ +use polars_compute::decimal::{DEC128_MAX_PREC, dec128_fits}; use pyo3::prelude::*; use crate::PySeries; @@ -121,14 +122,18 @@ impl_op!(lt_eq, lt_eq_f32, f32); impl_op!(lt_eq, lt_eq_f64, f64); impl_op!(lt_eq, lt_eq_str, &str); -struct PyDecimal(i128, usize); +struct PyDecimal(i128, usize, usize); impl<'source> FromPyObject<'source> for PyDecimal { fn extract_bound(obj: &Bound<'source, PyAny>) -> PyResult { if let Ok(val) = obj.extract() { - return Ok(PyDecimal(val, 0)); + return Ok(PyDecimal(val, DEC128_MAX_PREC, 0)); } + let err = || { + Err(PyPolarsErr::from(polars_err!(ComputeError: "overflow in Python Decimal to Polars Decimal conversion")).into()) + }; + let (sign, digits, exponent) = obj .call_method0("as_tuple")? .extract::<(i8, Vec, i8)>()?; @@ -137,23 +142,27 @@ impl<'source> FromPyObject<'source> for PyDecimal { if let Some(v) = val.checked_mul(10).and_then(|val| val.checked_add(d as _)) { val = v; } else { - return Err(PyPolarsErr::from(polars_err!(ComputeError: "overflow")).into()); + return err(); } } - let exponent = if exponent > 0 { + let scale = if exponent > 0 { if let Some(v) = val.checked_mul(10_i128.pow((-exponent) as u32)) { val = v; } else { - return Err(PyPolarsErr::from(polars_err!(ComputeError: "overflow")).into()); + return err(); }; 0_usize } else { - -exponent as _ + (-exponent) as usize }; if sign == 1 { val = -val }; - Ok(PyDecimal(val, exponent)) + if dec128_fits(val, DEC128_MAX_PREC) { + Ok(PyDecimal(val, DEC128_MAX_PREC, scale)) + } else { + err() + } } } @@ -164,7 +173,7 @@ macro_rules! impl_decimal { fn $name(&self, py: Python<'_>, rhs: PyDecimal) -> PyResult { let rhs = Series::new( PlSmallStr::from_static("decimal"), - &[AnyValue::Decimal(rhs.0, rhs.1)], + &[AnyValue::NewDecimal(rhs.0, rhs.1, rhs.2)], ); py.enter_polars_series(|| self.series.read().$method(&rhs)) } diff --git a/crates/polars-python/src/series/export.rs b/crates/polars-python/src/series/export.rs index 015a567c156c..4f93d37d3286 100644 --- a/crates/polars-python/src/series/export.rs +++ b/crates/polars-python/src/series/export.rs @@ -90,7 +90,7 @@ impl PySeries { let ca = series.datetime().map_err(PyPolarsErr::from)?; return Wrap(ca).into_bound_py_any(py); }, - DataType::Decimal(_, _) => { + DataType::NewDecimal(_, _) => { let ca = series.decimal().map_err(PyPolarsErr::from)?; return Wrap(ca).into_bound_py_any(py); }, diff --git a/crates/polars-python/src/series/map.rs b/crates/polars-python/src/series/map.rs index 14adc5368ee5..d283ca4ece4d 100644 --- a/crates/polars-python/src/series/map.rs +++ b/crates/polars-python/src/series/map.rs @@ -67,7 +67,7 @@ impl PySeries { | DataType::Binary | DataType::Array(_, _) | DataType::Time - | DataType::Decimal(_, _) + | DataType::NewDecimal(_, _) ) || !skip_nulls { let mut avs = Vec::with_capacity(series.len()); diff --git a/crates/polars-sql/src/types.rs b/crates/polars-sql/src/types.rs index 83e2cf5fe8d2..319b8e19c2fb 100644 --- a/crates/polars-sql/src/types.rs +++ b/crates/polars-sql/src/types.rs @@ -132,10 +132,10 @@ pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult match *info { ExactNumberInfo::PrecisionAndScale(p, s) => { - DataType::Decimal(Some(p as usize), Some(s as usize)) + DataType::NewDecimal(p as usize, s as usize) }, - ExactNumberInfo::Precision(p) => DataType::Decimal(Some(p as usize), Some(0)), - ExactNumberInfo::None => DataType::Decimal(Some(38), Some(9)), + ExactNumberInfo::Precision(p) => DataType::NewDecimal(p as usize, 0), + ExactNumberInfo::None => DataType::NewDecimal(38, 9), }, // --------------------------------- diff --git a/crates/polars-testing/src/asserts/series.rs b/crates/polars-testing/src/asserts/series.rs index 03cc1a22d4bf..a0698eeec674 100644 --- a/crates/polars-testing/src/asserts/series.rs +++ b/crates/polars-testing/src/asserts/series.rs @@ -601,10 +601,10 @@ mod tests { #[should_panic(expected = "exact value mismatch")] fn test_series_decimal_values_mismatch() { let s1 = Series::new("".into(), &[1, 2]) - .cast(&DataType::Decimal(Some(10), Some(2))) + .cast(&DataType::NewDecimal(10, 2)) .unwrap(); let s2 = Series::new("".into(), &[1, 3]) - .cast(&DataType::Decimal(Some(10), Some(2))) + .cast(&DataType::NewDecimal(10, 2)) .unwrap(); assert_series_equal!(&s1, &s2); @@ -613,10 +613,10 @@ mod tests { #[test] fn test_series_decimal_values_match() { let s1 = Series::new("".into(), &[1, 2]) - .cast(&DataType::Decimal(Some(10), Some(2))) + .cast(&DataType::NewDecimal(10, 2)) .unwrap(); let s2 = Series::new("".into(), &[1, 2]) - .cast(&DataType::Decimal(Some(10), Some(2))) + .cast(&DataType::NewDecimal(10, 2)) .unwrap(); assert_series_equal!(&s1, &s2); diff --git a/crates/polars-utils/src/decimal.rs b/crates/polars-utils/src/decimal.rs new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/crates/polars-utils/src/decimal.rs @@ -0,0 +1 @@ + diff --git a/crates/polars-utils/src/float.rs b/crates/polars-utils/src/float.rs index 0f8fbbd67de0..6d45cba1edf2 100644 --- a/crates/polars-utils/src/float.rs +++ b/crates/polars-utils/src/float.rs @@ -51,6 +51,7 @@ unsafe impl IsFloat for u16 {} unsafe impl IsFloat for u32 {} unsafe impl IsFloat for u64 {} unsafe impl IsFloat for u128 {} +unsafe impl IsFloat for usize {} unsafe impl IsFloat for &str {} unsafe impl IsFloat for &[u8] {} unsafe impl IsFloat for bool {} @@ -68,6 +69,7 @@ mod private { impl Sealed for u32 {} impl Sealed for u64 {} impl Sealed for u128 {} + impl Sealed for usize {} impl Sealed for f32 {} impl Sealed for f64 {} impl Sealed for &str {} diff --git a/crates/polars-utils/src/lib.rs b/crates/polars-utils/src/lib.rs index c281662c3ea0..769b2ac852e4 100644 --- a/crates/polars-utils/src/lib.rs +++ b/crates/polars-utils/src/lib.rs @@ -17,6 +17,7 @@ pub mod clmul; mod config; pub use config::check_allow_importing_interval_as_struct; pub mod cpuid; +pub mod decimal; pub mod enum_unit_vec; pub mod error; pub mod floor_divmod; diff --git a/crates/polars/tests/it/lazy/group_by.rs b/crates/polars/tests/it/lazy/group_by.rs index 81cd759db30b..98b07b335b5d 100644 --- a/crates/polars/tests/it/lazy/group_by.rs +++ b/crates/polars/tests/it/lazy/group_by.rs @@ -141,7 +141,7 @@ fn test_logical_mean_partitioned_group_by_block() -> PolarsResult<()> { let out = df .lazy() - .with_column(col("decimal").cast(DataType::Decimal(None, Some(2)))) + .with_column(col("decimal").cast(DataType::NewDecimal(38, 2))) .with_column(col("duration").cast(DataType::Duration(TimeUnit::Microseconds))) .group_by([col("decimal")]) .agg([col("duration").mean()]) diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index 24412da8b5c6..c406ace73812 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -37,6 +37,7 @@ List, Null, Object, + String, Struct, Time, Unknown, @@ -164,10 +165,27 @@ def sequence_to_pyseries( Boolean, Categorical, Enum, - Decimal, - ) or isinstance(dtype, Categorical): + ) or isinstance(dtype, (Categorical, Decimal)): if pyseries.dtype() != dtype: pyseries = pyseries.cast(dtype, strict=strict, wrap_numerical=False) + + # Uninstanced Decimal is a bit special and has various inference paths + if dtype == Decimal: + if pyseries.dtype() == String: + pyseries = pyseries.str_to_decimal_infer(inference_length=0) + elif pyseries.dtype().is_float(): + # Go through string so we infer an appropriate scale. + pyseries = pyseries.cast( + String, strict=strict, wrap_numerical=False + ).str_to_decimal_infer(inference_length=0) + elif pyseries.dtype().is_integer() or pyseries.dtype() == Null: + pyseries = pyseries.cast( + Decimal(scale=0), strict=strict, wrap_numerical=False + ) + elif not isinstance(pyseries.dtype(), Decimal): + msg = f"can't convert {pyseries.dtype()} to Decimal" + raise TypeError(msg) + return pyseries elif dtype == Struct: diff --git a/py-polars/polars/_utils/convert.py b/py-polars/polars/_utils/convert.py index 341bdd5eab7b..bf3fcc581b9c 100644 --- a/py-polars/polars/_utils/convert.py +++ b/py-polars/polars/_utils/convert.py @@ -24,7 +24,6 @@ ) if TYPE_CHECKING: - from collections.abc import Sequence from datetime import date, tzinfo from decimal import Decimal @@ -207,15 +206,15 @@ def to_py_timedelta(value: int | float, time_unit: TimeUnit) -> timedelta: _raise_invalid_time_unit(time_unit) -def to_py_decimal(sign: int, digits: Sequence[int], prec: int, scale: int) -> Decimal: +def to_py_decimal(prec: int, value: str) -> Decimal: """Convert decimal components to a Python Decimal object.""" - return _create_decimal_with_prec(prec)((sign, digits, scale)) + return _create_decimal_with_prec(prec)(value) @lru_cache(None) def _create_decimal_with_prec( precision: int, -) -> Callable[[tuple[int, Sequence[int], int]], Decimal]: +) -> Callable[[str], Decimal]: # pre-cache contexts so we don't have to spend time on recreating them every time return Context(prec=precision).create_decimal diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py index 8d2471979139..f8b64c747cb5 100644 --- a/py-polars/polars/datatypes/classes.py +++ b/py-polars/polars/datatypes/classes.py @@ -449,6 +449,9 @@ def __init__( " It is a work-in-progress feature and may not always work as expected." ) + if precision is None: + precision = 38 + self.precision = precision self.scale = scale diff --git a/py-polars/tests/unit/constructors/test_any_value_fallbacks.py b/py-polars/tests/unit/constructors/test_any_value_fallbacks.py index 04ad3c1b27d1..f189a572c766 100644 --- a/py-polars/tests/unit/constructors/test_any_value_fallbacks.py +++ b/py-polars/tests/unit/constructors/test_any_value_fallbacks.py @@ -212,7 +212,7 @@ def test_fallback_with_dtype_strict_failure( [ D("12"), D("1.2345"), - # D("123456"), + D("123456"), False, True, 0, @@ -225,14 +225,14 @@ def test_fallback_with_dtype_strict_failure( ], [ D("12.000"), + D("1.234"), None, - # None, None, None, D("0.000"), D("-1.000"), - None, - None, + D("0.000"), + D("2.500"), None, None, None, diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 8aa64bd6abe2..3c30ff7e557a 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -844,7 +844,7 @@ def test_init_series() -> None: (time, pl.Time), (datetime, pl.Datetime("us")), (timedelta, pl.Duration("us")), - (Decimal, pl.Decimal(precision=None, scale=0)), + (Decimal, pl.Decimal(scale=0)), ], ) def test_init_py_dtype(dtype: Any, expected_dtype: PolarsDataType) -> None: diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index a95488f8d29b..db3c4443f843 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -152,11 +152,8 @@ def test_decimal_cast() -> None: def test_decimal_cast_no_scale() -> None: - s = pl.Series().cast(pl.Decimal) - assert s.dtype == pl.Decimal(precision=None, scale=0) - - s = pl.Series([D("10.0")]).cast(pl.Decimal) - assert s.dtype == pl.Decimal(precision=None, scale=1) + with pytest.raises(TypeError): + pl.Series().cast(pl.Decimal) def test_decimal_scale_precision_roundtrip(monkeypatch: Any) -> None: @@ -176,7 +173,7 @@ def test_string_to_decimal() -> None: ] s = pl.Series(values).str.to_decimal() - assert s.dtype == pl.Decimal(scale=2) + assert s.dtype == pl.Decimal(precision=8, scale=2) assert s.to_list() == [D(v) for v in values] @@ -229,41 +226,35 @@ def test_decimal_compare( def test_decimal_arithmetic() -> None: + dt = pl.Decimal(20, 10) df = pl.DataFrame( { "a": [D("0.1"), D("10.1"), D("100.01")], "b": [D("20.1"), D("10.19"), D("39.21")], }, strict=False, + schema={"a": dt, "b": dt}, ) - dt = pl.Decimal(20, 10) out = df.select( - out1=pl.col("a") * pl.col("b"), - out2=pl.col("a") + pl.col("b"), - out3=pl.col("a") / pl.col("b"), - out4=pl.col("a") - pl.col("b"), - out5=pl.col("a").cast(dt) / pl.col("b").cast(dt), - ) - assert out.dtypes == [ - pl.Decimal(precision=None, scale=4), - pl.Decimal(precision=None, scale=2), - pl.Decimal(precision=None, scale=6), - pl.Decimal(precision=None, scale=2), - pl.Decimal(precision=None, scale=14), - ] + out1=pl.col("a") + pl.col("b"), + out2=pl.col("a") - pl.col("b"), + out3=pl.col("a") * pl.col("b"), + out4=pl.col("a") / pl.col("b"), + ) + assert all(dt == pl.Decimal(38, 10) for dt in out.dtypes) assert out.to_dict(as_series=False) == { - "out1": [D("2.0100"), D("102.9190"), D("3921.3921")], - "out2": [D("20.20"), D("20.29"), D("139.22")], - "out3": [D("0.004975"), D("0.991167"), D("2.550624")], - "out4": [D("-20.00"), D("-0.09"), D("60.80")], - "out5": [D("0.00497512437810"), D("0.99116781157998"), D("2.55062484060188")], + "out1": [D("20.2"), D("20.29"), D("139.22")], + "out2": [D("-20.0"), D("-0.09"), D("60.80")], + "out3": [D("2.01"), D("102.919"), D("3921.3921")], + "out4": [D("0.0049751244"), D("0.9911678116"), D("2.5506248406")], } def test_decimal_series_value_arithmetic() -> None: s = pl.Series([D("0.10"), D("10.10"), D("100.01")]) + assert s.dtype == pl.Decimal(scale=2) out1 = s + 10 out2 = s + D("10") @@ -272,22 +263,22 @@ def test_decimal_series_value_arithmetic() -> None: out5 = s / D("1.5") out6 = s - 5 - assert out1.dtype == pl.Decimal(precision=None, scale=2) - assert out2.dtype == pl.Decimal(precision=None, scale=2) - assert out3.dtype == pl.Decimal(precision=None, scale=4) - assert out4.dtype == pl.Decimal(precision=None, scale=8) - assert out5.dtype == pl.Decimal(precision=None, scale=6) - assert out6.dtype == pl.Decimal(precision=None, scale=2) + assert out1.dtype == pl.Decimal(scale=2) + assert out2.dtype == pl.Decimal(scale=2) + assert out3.dtype == pl.Decimal(scale=4) + assert out4.dtype == pl.Decimal(scale=2) + assert out5.dtype == pl.Decimal(scale=2) + assert out6.dtype == pl.Decimal(scale=2) assert out1.to_list() == [D("10.1"), D("20.1"), D("110.01")] assert out2.to_list() == [D("10.1"), D("20.1"), D("110.01")] assert out3.to_list() == [D("10.1001"), D("20.1001"), D("110.0101")] assert out4.to_list() == [ - D("0.06666666"), - D("6.73333333"), - D("66.67333333"), + D("0.07"), + D("6.73"), + D("66.67"), ] # TODO: do we want floor instead of round? - assert out5.to_list() == [D("0.066666"), D("6.733333"), D("66.673333")] + assert out5.to_list() == [D("0.07"), D("6.73"), D("66.67")] assert out6.to_list() == [D("-4.9"), D("5.1"), D("95.01")] @@ -411,7 +402,7 @@ def test_decimal_in_filter() -> None: "bar": ["6", "7", "8"], } ) - df = df.with_columns(pl.col("bar").cast(pl.Decimal)) + df = df.with_columns(pl.col("bar").cast(pl.Decimal(scale=0))) assert df.filter(pl.col("foo") > 1).to_dict(as_series=False) == { "foo": [2, 3], "bar": [D("7"), D("8")], @@ -479,7 +470,6 @@ def test_decimal_write_parquet_12375() -> None: def test_decimal_list_get_13847() -> None: df = pl.DataFrame({"a": [[D("1.1"), D("1.2")], [D("2.1")]]}) out = df.select(pl.col("a").list.get(0)) - print(out) expected = pl.DataFrame({"a": [D("1.1"), D("2.1")]}) assert_frame_equal(out, expected) @@ -588,26 +578,16 @@ def test_decimal_arithmetic_schema() -> None: def test_decimal_arithmetic_schema_float_20369() -> None: - s = pl.Series("x", [1.0], dtype=pl.Decimal(15, 2)) - assert_series_equal((s - 1.0), pl.Series("x", [0.0], dtype=pl.Decimal(None, 2))) - assert_series_equal( - (3.0 - s), pl.Series("literal", [2.0], dtype=pl.Decimal(None, 2)) - ) - assert_series_equal( - (3.0 / s), pl.Series("literal", [3.0], dtype=pl.Decimal(None, 6)) - ) - assert_series_equal( - (s / 3.0), pl.Series("x", [0.333333], dtype=pl.Decimal(None, 6)) - ) + s = pl.Series("x", [1.0], dtype=pl.Decimal(15, 6)) + assert_series_equal((s - 1.0), pl.Series("x", [0.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((3.0 - s), pl.Series("literal", [2.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((3.0 / s), pl.Series("literal", [3.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((s / 3.0), pl.Series("x", [0.333333], dtype=pl.Decimal(38, 6))) - assert_series_equal((s + 1.0), pl.Series("x", [2.0], dtype=pl.Decimal(None, 2))) - assert_series_equal( - (1.0 + s), pl.Series("literal", [2.0], dtype=pl.Decimal(None, 2)) - ) - assert_series_equal((s * 1.0), pl.Series("x", [1.0], dtype=pl.Decimal(None, 4))) - assert_series_equal( - (1.0 * s), pl.Series("literal", [1.0], dtype=pl.Decimal(None, 4)) - ) + assert_series_equal((s + 1.0), pl.Series("x", [2.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((1.0 + s), pl.Series("literal", [2.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((s * 1.0), pl.Series("x", [1.0], dtype=pl.Decimal(38, 6))) + assert_series_equal((1.0 * s), pl.Series("literal", [1.0], dtype=pl.Decimal(38, 6))) def test_decimal_horizontal_20482() -> None: @@ -715,14 +695,13 @@ def test_groupby_agg_single_element_11232() -> None: def test_decimal_from_large_ints_9084() -> None: numbers = [2963091539321097135000000000, 25658709114149718824803874] - s = pl.Series(numbers, dtype=pl.Decimal) + s = pl.Series(numbers, dtype=pl.Decimal(38, 0)) assert s.to_list() == [D(n) for n in numbers] def test_cast_float_to_decimal_12775() -> None: s = pl.Series([1.5]) - # default scale = 0 - assert s.cast(pl.Decimal).to_list() == [D("1")] + assert s.cast(pl.Decimal(scale=0)).to_list() == [D("1")] assert s.cast(pl.Decimal(scale=1)).to_list() == [D("1.5")] diff --git a/py-polars/tests/unit/expr/test_serde.py b/py-polars/tests/unit/expr/test_serde.py index 415abb5db415..52fdc6feb6ac 100644 --- a/py-polars/tests/unit/expr/test_serde.py +++ b/py-polars/tests/unit/expr/test_serde.py @@ -52,7 +52,7 @@ def test_expr_deserialize_invalid_json() -> None: def test_expression_json_13991() -> None: - expr = pl.col("foo").cast(pl.Decimal) + expr = pl.col("foo").cast(pl.Decimal(38, 10)) json = expr.meta.serialize(format="json") round_tripped = pl.Expr.deserialize(io.StringIO(json), format="json") diff --git a/py-polars/tests/unit/functions/test_when_then.py b/py-polars/tests/unit/functions/test_when_then.py index 77bce001c58f..43e646d09b0c 100644 --- a/py-polars/tests/unit/functions/test_when_then.py +++ b/py-polars/tests/unit/functions/test_when_then.py @@ -760,7 +760,7 @@ def test_when_then_to_decimal_18375() -> None: "b": ["1.23", "4.56"], "c": ["1.23", "4.56"], }, - schema={"a": pl.String, "b": pl.Decimal, "c": pl.Decimal}, + schema={"a": pl.String, "b": pl.Decimal(scale=2), "c": pl.Decimal(scale=2)}, ) assert_frame_equal(result, expected) diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py index 2d02c4ad115d..a354572bcdbd 100644 --- a/py-polars/tests/unit/operations/test_cast.py +++ b/py-polars/tests/unit/operations/test_cast.py @@ -623,9 +623,9 @@ def test_invalid_cast_float_to_decimal(value: float) -> None: s = pl.Series([value], dtype=pl.Float64) with pytest.raises( InvalidOperationError, - match=r"conversion from `f64` to `decimal\[\*,0\]` failed", + match=r"conversion from `f64` to `decimal\[10,2\]` failed", ): - s.cast(pl.Decimal) + s.cast(pl.Decimal(10, 2)) def test_err_on_time_datetime_cast() -> None: diff --git a/py-polars/tests/unit/operations/test_fill_null.py b/py-polars/tests/unit/operations/test_fill_null.py index 0e19606a2ebc..bc3099ef1a2a 100644 --- a/py-polars/tests/unit/operations/test_fill_null.py +++ b/py-polars/tests/unit/operations/test_fill_null.py @@ -41,7 +41,7 @@ def test_fill_null_non_lit() -> None: "a": pl.Series([1, None], dtype=pl.Int32), "b": pl.Series([None, 2], dtype=pl.UInt32), "c": pl.Series([None, 2], dtype=pl.Int64), - "d": pl.Series([None, 2], dtype=pl.Decimal), + "d": pl.Series([None, 2], dtype=pl.Decimal(10, 2)), } ) assert df.fill_null(0).select(pl.all().null_count()).transpose().sum().item() == 0 diff --git a/py-polars/tests/unit/operations/test_rolling.py b/py-polars/tests/unit/operations/test_rolling.py index 0cb254cf9ee1..c3184ff68187 100644 --- a/py-polars/tests/unit/operations/test_rolling.py +++ b/py-polars/tests/unit/operations/test_rolling.py @@ -639,7 +639,7 @@ def test_rolling_unsupported_22065() -> None: with pytest.raises(pl.exceptions.InvalidOperationError): pl.Series("a", [[]]).rolling_sum(10) with pytest.raises(pl.exceptions.InvalidOperationError): - pl.Series("a", ["1.0"], pl.Decimal).rolling_min(1) + pl.Series("a", ["1.0"], pl.Decimal(10, 2)).rolling_min(1) with pytest.raises(pl.exceptions.InvalidOperationError): pl.Series("a", [None]).rolling_sum(10) with pytest.raises(pl.exceptions.InvalidOperationError): diff --git a/py-polars/tests/unit/test_datatype_exprs.py b/py-polars/tests/unit/test_datatype_exprs.py index da6c60ca2391..d27bb51cb2b3 100644 --- a/py-polars/tests/unit/test_datatype_exprs.py +++ b/py-polars/tests/unit/test_datatype_exprs.py @@ -74,8 +74,9 @@ def test_self_dtype_in_wrong_context() -> None: (pl.UInt128(), "uint", "u128", 128), (pl.Float32(), "float", "f32", 32), (pl.Float64(), "float", "f64", 64), - (pl.Decimal(scale=4), "decimal", "decimal[*,4]", 128), - (pl.Decimal(scale=12), "decimal", "decimal[*,12]", 128), + (pl.Decimal(scale=4), "decimal", "decimal[38,4]", 128), + (pl.Decimal(scale=12), "decimal", "decimal[38,12]", 128), + (pl.Decimal(precision=20, scale=12), "decimal", "decimal[20,12]", 128), (pl.Categorical(), "categorical", "cat", 32), (pl.Enum([]), "enum", "enum", 32), (pl.Enum(["a", "b"]), "enum", "enum", 32), diff --git a/py-polars/tests/unit/test_format.py b/py-polars/tests/unit/test_format.py index 98593f9fd948..e0247a847dc3 100644 --- a/py-polars/tests/unit/test_format.py +++ b/py-polars/tests/unit/test_format.py @@ -363,14 +363,13 @@ def test_format_numeric_locale_options() -> None: thousands_separator=",", float_precision=3, ): - print(df) assert ( str(df) == """shape: (2, 4) ┌─────┬──────────────┬────────────────┬─────────────────┐ │ a ┆ b ┆ c ┆ d │ │ --- ┆ --- ┆ --- ┆ --- │ -│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │ +│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │ ╞═════╪══════════════╪════════════════╪═════════════════╡ │ xx ┆ 100,000.988 ┆ -11,111,111 ┆ 12,345.6789 │ │ yy ┆ -234,567.890 ┆ 44,444,444,444 ┆ -9,999,999.9900 │ @@ -388,7 +387,7 @@ def test_format_numeric_locale_options() -> None: ┌─────┬────────────────┬────────────────┬─────────────────┐ │ a ┆ b ┆ c ┆ d │ │ --- ┆ --- ┆ --- ┆ --- │ -│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │ +│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │ ╞═════╪════════════════╪════════════════╪═════════════════╡ │ xx ┆ 100.000,987654 ┆ -11.111.111 ┆ 12.345,6789 │ │ yy ┆ -234.567,89 ┆ 44.444.444.444 ┆ -9.999.999,9900 │ @@ -402,7 +401,7 @@ def test_format_numeric_locale_options() -> None: ┌─────┬───────────────┬─────────────┬───────────────┐ │ a ┆ b ┆ c ┆ d │ │ --- ┆ --- ┆ --- ┆ --- │ -│ str ┆ f64 ┆ i64 ┆ decimal[*,4] │ +│ str ┆ f64 ┆ i64 ┆ decimal[38,4] │ ╞═════╪═══════════════╪═════════════╪═══════════════╡ │ xx ┆ 100000.987654 ┆ -11111111 ┆ 12345.6789 │ │ yy ┆ -234567.89 ┆ 44444444444 ┆ -9999999.9900 │ diff --git a/py-polars/tests/unit/test_selectors.py b/py-polars/tests/unit/test_selectors.py index d36bc0a7a93c..3d82631a0801 100644 --- a/py-polars/tests/unit/test_selectors.py +++ b/py-polars/tests/unit/test_selectors.py @@ -375,7 +375,7 @@ def test_select_decimal(df: pl.DataFrame) -> None: df = pl.DataFrame( schema={ "zz0": pl.Float64, - "zz1": pl.Decimal, + "zz1": pl.Decimal(38, 5), "zz2": pl.Decimal(10, 10), } ) diff --git a/pyo3-polars/pyo3-polars/src/types.rs b/pyo3-polars/pyo3-polars/src/types.rs index 0502f326d1ef..34a58995e7d8 100644 --- a/pyo3-polars/pyo3-polars/src/types.rs +++ b/pyo3-polars/pyo3-polars/src/types.rs @@ -465,7 +465,7 @@ impl<'py> IntoPyObject<'py> for PyDataType { class.call0() }, #[cfg(feature = "dtype-decimal")] - DataType::Decimal(precision, scale) => { + DataType::NewDecimal(precision, scale) => { let class = pl.getattr(intern!(py, "Decimal")).unwrap(); let args = (*precision, *scale); class.call1(args) @@ -618,7 +618,9 @@ impl<'py> FromPyObject<'py> for PyDataType { "Datetime" => DataType::Datetime(TimeUnit::Microseconds, None), "Duration" => DataType::Duration(TimeUnit::Microseconds), #[cfg(feature = "dtype-decimal")] - "Decimal" => DataType::Decimal(None, None), // "none" scale => "infer" + "Decimal" => { + return Err(PyTypeError::new_err("Decimal without specifying precision and scale is not a valid Polars data type".to_string())); + }, "List" => DataType::List(Box::new(DataType::Null)), #[cfg(feature = "dtype-array")] "Array" => DataType::Array(Box::new(DataType::Null), 0), @@ -681,7 +683,7 @@ impl<'py> FromPyObject<'py> for PyDataType { "Decimal" => { let precision = ob.getattr(intern!(py, "precision"))?.extract()?; let scale = ob.getattr(intern!(py, "scale"))?.extract()?; - DataType::Decimal(precision, Some(scale)) + DataType::NewDecimal(precision, scale) }, "List" => { let inner = ob.getattr(intern!(py, "inner")).unwrap(); From 1a0829e581fee26b583be5a5e3c4bb9d1ce399bc Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 18 Sep 2025 17:41:56 +0200 Subject: [PATCH 02/18] Rename NewDecimal back to Decimal --- .../src/chunked_array/builder/list/mod.rs | 2 +- crates/polars-core/src/chunked_array/cast.rs | 6 ++-- .../src/chunked_array/logical/decimal.rs | 14 ++++---- .../src/chunked_array/ops/any_value.rs | 4 +-- .../src/chunked_array/ops/decimal.rs | 12 +++---- .../src/chunked_array/ops/row_encode.rs | 2 +- crates/polars-core/src/datatypes/_serde.rs | 6 ++-- crates/polars-core/src/datatypes/any_value.rs | 36 +++++++++---------- crates/polars-core/src/datatypes/dtype.rs | 18 +++++----- crates/polars-core/src/datatypes/field.rs | 2 +- crates/polars-core/src/datatypes/proptest.rs | 2 +- crates/polars-core/src/fmt.rs | 4 +-- .../frame/group_by/aggregations/dispatch.rs | 4 +-- crates/polars-core/src/frame/row/av_buffer.rs | 2 +- crates/polars-core/src/scalar/new.rs | 4 +-- crates/polars-core/src/scalar/serde.rs | 2 +- crates/polars-core/src/series/any_value.rs | 16 ++++----- crates/polars-core/src/series/from.rs | 2 +- .../src/series/implementations/decimal.rs | 14 ++++---- crates/polars-core/src/series/into.rs | 2 +- crates/polars-core/src/series/mod.rs | 14 ++++---- crates/polars-core/src/series/ops/downcast.rs | 2 +- crates/polars-core/src/series/ops/null.rs | 2 +- crates/polars-core/src/utils/supertype.rs | 12 +++---- crates/polars-expr/src/expressions/binary.rs | 2 +- crates/polars-expr/src/groups/mod.rs | 2 +- crates/polars-expr/src/hash_keys.rs | 4 +-- crates/polars-expr/src/hot_groups/mod.rs | 2 +- crates/polars-expr/src/idx_table/mod.rs | 2 +- crates/polars-expr/src/reduce/mean.rs | 4 +-- crates/polars-expr/src/reduce/min_max.rs | 4 +-- crates/polars-expr/src/reduce/sum.rs | 2 +- crates/polars-expr/src/reduce/var_std.rs | 2 +- crates/polars-io/src/catalog/unity/schema.rs | 8 ++--- crates/polars-io/src/csv/read/reader.rs | 2 +- .../src/csv/write/write_impl/serializer.rs | 2 +- .../src/chunked_array/gather/chunked.rs | 4 +-- crates/polars-ops/src/series/ops/abs.rs | 2 +- crates/polars-ops/src/series/ops/clip.rs | 6 ++-- crates/polars-ops/src/series/ops/cum_agg.rs | 6 ++-- crates/polars-ops/src/series/ops/index_of.rs | 2 +- .../series/ops/interpolation/interpolate.rs | 4 +-- crates/polars-ops/src/series/ops/is_in.rs | 4 +-- crates/polars-ops/src/series/ops/negate.rs | 2 +- .../src/plans/aexpr/function_expr/schema.rs | 4 +-- .../src/plans/aexpr/function_expr/strings.rs | 2 +- .../src/plans/aexpr/predicates/column_expr.rs | 2 +- crates/polars-plan/src/plans/aexpr/schema.rs | 16 ++++----- .../plans/conversion/type_coercion/is_in.rs | 4 +-- .../polars-python/src/conversion/any_value.rs | 4 +-- crates/polars-python/src/conversion/mod.rs | 4 +-- .../src/interop/numpy/to_numpy_series.rs | 2 +- crates/polars-python/src/series/comparison.rs | 2 +- crates/polars-python/src/series/export.rs | 2 +- crates/polars-python/src/series/import.rs | 2 +- crates/polars-python/src/series/map.rs | 2 +- crates/polars-sql/src/types.rs | 6 ++-- crates/polars-testing/src/asserts/series.rs | 8 ++--- crates/polars/tests/it/lazy/group_by.rs | 2 +- pyo3-polars/pyo3-polars/src/types.rs | 4 +-- 60 files changed, 158 insertions(+), 158 deletions(-) diff --git a/crates/polars-core/src/chunked_array/builder/list/mod.rs b/crates/polars-core/src/chunked_array/builder/list/mod.rs index 92163a78f27b..8bc8c62b93d0 100644 --- a/crates/polars-core/src/chunked_array/builder/list/mod.rs +++ b/crates/polars-core/src/chunked_array/builder/list/mod.rs @@ -113,7 +113,7 @@ pub fn get_list_builder( Some(inner_type_logical.clone()), )), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => Box::new( + DataType::Decimal(_, _) => Box::new( ListPrimitiveChunkedBuilder::::new_with_values_type( name, list_capacity, diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 1b595babb2ee..40f8119ab149 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -79,7 +79,7 @@ fn cast_impl_inner( ) -> PolarsResult { let chunks = match dtype { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => { + DataType::Decimal(_, _) => { let mut chunks = cast_chunks(chunks, dtype, options)?; // @NOTE: We cannot cast here as that will lower the scale. for chunk in chunks.iter_mut() { @@ -113,7 +113,7 @@ fn cast_impl_inner( #[cfg(feature = "dtype-time")] Time => out.into_time(), #[cfg(feature = "dtype-decimal")] - NewDecimal(precision, scale) => out.into_decimal(*precision, *scale)?, + Decimal(precision, scale) => out.into_decimal(*precision, *scale)?, _ => out, }; @@ -295,7 +295,7 @@ impl ChunkCast for StringChunked { cast_single_to_struct(self.name().clone(), &self.chunks, fields, options) }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let chunks = self.downcast_iter().map(|arr| { polars_compute::cast::binview_to_decimal(&arr.to_binview(), *precision, *scale) .to(ArrowDataType::Int128) diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index 15366644deb1..8b4a89467a29 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -14,7 +14,7 @@ impl Int128Chunked { #[inline] pub fn into_decimal_unchecked(self, precision: usize, scale: usize) -> DecimalChunked { // SAFETY: no invalid states (from a safety perspective). - unsafe { DecimalChunked::new_logical(self, DataType::NewDecimal(precision, scale)) } + unsafe { DecimalChunked::new_logical(self, DataType::Decimal(precision, scale)) } } pub fn into_decimal(self, precision: usize, scale: usize) -> PolarsResult { @@ -46,7 +46,7 @@ impl LogicalType for DecimalChunked { #[inline] unsafe fn get_any_value_unchecked(&self, i: usize) -> AnyValue<'_> { match self.phys.get_unchecked(i) { - Some(v) => AnyValue::NewDecimal(v, self.precision(), self.scale()), + Some(v) => AnyValue::Decimal(v, self.precision(), self.scale()), None => AnyValue::Null, } } @@ -56,7 +56,7 @@ impl LogicalType for DecimalChunked { dtype: &DataType, cast_options: CastOptions, ) -> PolarsResult { - if let DataType::NewDecimal(to_prec, to_scale) = dtype { + if let DataType::Decimal(to_prec, to_scale) = dtype { return Ok(self .with_prec_scale(*to_prec, *to_scale, cast_options.is_strict())? .into_owned() @@ -64,7 +64,7 @@ impl LogicalType for DecimalChunked { } match dtype { - DataType::NewDecimal(to_prec, to_scale) => { + DataType::Decimal(to_prec, to_scale) => { return Ok(self .with_prec_scale(*to_prec, *to_scale, cast_options.is_strict())? .into_owned() @@ -108,14 +108,14 @@ impl LogicalType for DecimalChunked { impl DecimalChunked { pub fn precision(&self) -> usize { match &self.dtype { - DataType::NewDecimal(precision, _) => *precision, + DataType::Decimal(precision, _) => *precision, _ => unreachable!(), } } pub fn scale(&self) -> usize { match &self.dtype { - DataType::NewDecimal(_, scale) => *scale, + DataType::Decimal(_, scale) => *scale, _ => unreachable!(), } } @@ -166,7 +166,7 @@ impl DecimalChunked { unary_elementwise(&self.phys, |x| dec128_rescale(x?, old_s, prec, scale)) }; - let ca = unsafe { DecimalChunked::new_logical(phys, DataType::NewDecimal(prec, scale)) }; + let ca = unsafe { DecimalChunked::new_logical(phys, DataType::Decimal(prec, scale)) }; Ok(Cow::Owned(ca)) } diff --git a/crates/polars-core/src/chunked_array/ops/any_value.rs b/crates/polars-core/src/chunked_array/ops/any_value.rs index e7f1f17930b4..6bcd7a6e03b6 100644 --- a/crates/polars-core/src/chunked_array/ops/any_value.rs +++ b/crates/polars-core/src/chunked_array/ops/any_value.rs @@ -128,10 +128,10 @@ pub(crate) unsafe fn arr_to_any_value<'a>( AnyValue::Time(v) }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let arr = &*(arr as *const dyn Array as *const Int128Array); let v = arr.value_unchecked(idx); - AnyValue::NewDecimal(v, *precision, *scale) + AnyValue::Decimal(v, *precision, *scale) }, #[cfg(feature = "object")] DataType::Object(_) => { diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs index e0b373606a34..a6997efead4f 100644 --- a/crates/polars-core/src/chunked_array/ops/decimal.rs +++ b/crates/polars-core/src/chunked_array/ops/decimal.rs @@ -36,7 +36,7 @@ impl StringChunked { } pub fn to_decimal(&self, prec: usize, scale: usize) -> PolarsResult { - self.cast_with_options(&DataType::NewDecimal(prec, scale), CastOptions::NonStrict) + self.cast_with_options(&DataType::Decimal(prec, scale), CastOptions::NonStrict) } } @@ -56,12 +56,12 @@ mod test { ]; let s = StringChunked::from_slice(PlSmallStr::from_str("test"), &vals); let s = s.to_decimal_infer(6).unwrap(); - assert_eq!(s.dtype(), &DataType::NewDecimal(12, 5)); + assert_eq!(s.dtype(), &DataType::Decimal(12, 5)); assert_eq!(s.len(), 7); - assert_eq!(s.get(0).unwrap(), AnyValue::NewDecimal(100000, 12, 5)); + assert_eq!(s.get(0).unwrap(), AnyValue::Decimal(100000, 12, 5)); assert_eq!(s.get(1).unwrap(), AnyValue::Null); - assert_eq!(s.get(3).unwrap(), AnyValue::NewDecimal(300045, 12, 5)); - assert_eq!(s.get(4).unwrap(), AnyValue::NewDecimal(-400000, 12, 5)); - assert_eq!(s.get(6).unwrap(), AnyValue::NewDecimal(525251, 12, 5)); + assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 12, 5)); + assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 12, 5)); + assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 12, 5)); } } diff --git a/crates/polars-core/src/chunked_array/ops/row_encode.rs b/crates/polars-core/src/chunked_array/ops/row_encode.rs index 413f3a66b7c6..201bc8ea61af 100644 --- a/crates/polars-core/src/chunked_array/ops/row_encode.rs +++ b/crates/polars-core/src/chunked_array/ops/row_encode.rs @@ -114,7 +114,7 @@ pub fn get_row_encoding_context(dtype: &DataType) -> Option DataType::Object(_) => panic!("Unsupported in row encoding"), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, _) => Some(RowEncodingContext::Decimal(*precision)), + DataType::Decimal(precision, _) => Some(RowEncodingContext::Decimal(*precision)), #[cfg(feature = "dtype-array")] DataType::Array(dtype, _) => get_row_encoding_context(dtype), diff --git a/crates/polars-core/src/datatypes/_serde.rs b/crates/polars-core/src/datatypes/_serde.rs index e0f472e799a9..0dc8b452f44e 100644 --- a/crates/polars-core/src/datatypes/_serde.rs +++ b/crates/polars-core/src/datatypes/_serde.rs @@ -92,7 +92,7 @@ enum SerializableDataType { strings: Series, }, #[cfg(feature = "dtype-decimal")] - NewDecimal(usize, usize), + Decimal(usize, usize), #[cfg(feature = "object")] Object(String), } @@ -143,7 +143,7 @@ impl From<&DataType> for SerializableDataType { .into_series(), }, #[cfg(feature = "dtype-decimal")] - NewDecimal(precision, scale) => Self::NewDecimal(*precision, *scale), + Decimal(precision, scale) => Self::Decimal(*precision, *scale), #[cfg(feature = "object")] Object(name) => Self::Object(name.to_string()), } @@ -202,7 +202,7 @@ impl From for DataType { Self::Enum(fcats, mapping) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(precision, scale) => Self::NewDecimal(precision, scale), + Decimal(precision, scale) => Self::Decimal(precision, scale), #[cfg(feature = "object")] Object(_) => Self::Object("unknown"), } diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 92529a0313c9..111f9f7f5109 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -109,7 +109,7 @@ pub enum AnyValue<'a> { BinaryOwned(Vec), /// A 128-bit fixed point decimal number with a precision and scale. #[cfg(feature = "dtype-decimal")] - NewDecimal(i128, usize, usize), + Decimal(i128, usize, usize), } impl AnyValue<'static> { @@ -127,7 +127,7 @@ impl AnyValue<'static> { #[cfg(feature = "dtype-duration")] DataType::Duration(unit) => AnyValue::Duration(0, *unit), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(p, s) => AnyValue::NewDecimal(0, *p, *s), + DataType::Decimal(p, s) => AnyValue::Decimal(0, *p, *s), _ => AnyValue::Null, } } @@ -159,7 +159,7 @@ impl AnyValue<'static> { DT::Float32 => AV::Float32(numeric_to_one.into()), DT::Float64 => AV::Float64(numeric_to_one.into()), #[cfg(feature = "dtype-decimal")] - DT::NewDecimal(p, s) => AV::NewDecimal(0, *p, *s), + DT::Decimal(p, s) => AV::Decimal(0, *p, *s), DT::String => AV::String(""), DT::Binary => AV::Binary(&[]), DT::BinaryOffset => AV::Binary(&[]), @@ -266,7 +266,7 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-struct")] StructOwned(payload) => DataType::Struct(payload.1.clone()), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, p, s) => DataType::NewDecimal(*p, *s), + Decimal(_, p, s) => DataType::Decimal(*p, *s), #[cfg(feature = "object")] Object(o) => DataType::Object(o.type_name()), #[cfg(feature = "object")] @@ -301,7 +301,7 @@ impl<'a> AnyValue<'a> { #[cfg(feature = "dtype-duration")] Duration(v, _) => NumCast::from(*v), #[cfg(feature = "dtype-decimal")] - NewDecimal(v, _p, s) => { + Decimal(v, _p, s) => { if T::is_float() { NumCast::from(dec128_to_f64(*v, *s)) } else { @@ -599,23 +599,23 @@ impl<'a> AnyValue<'a> { ), #[cfg(feature = "dtype-decimal")] - (av, DataType::NewDecimal(p, s)) if av.is_integer() => { + (av, DataType::Decimal(p, s)) if av.is_integer() => { let int = av.try_extract::().ok()?; let dec = i128_to_dec128(int, *p, *s)?; - AnyValue::NewDecimal(dec, *p, *s) + AnyValue::Decimal(dec, *p, *s) }, #[cfg(feature = "dtype-decimal")] - (av, DataType::NewDecimal(p, s)) if av.is_float() => { + (av, DataType::Decimal(p, s)) if av.is_float() => { let f = av.try_extract::().unwrap(); let dec = f64_to_dec128(f, *p, *s)?; - AnyValue::NewDecimal(dec, *p, *s) + AnyValue::Decimal(dec, *p, *s) }, #[cfg(feature = "dtype-decimal")] - (AnyValue::NewDecimal(value, _old_p, old_s), DataType::NewDecimal(p, s)) => { + (AnyValue::Decimal(value, _old_p, old_s), DataType::Decimal(p, s)) => { let converted = dec128_rescale(*value, *old_s, *p, *s)?; - AnyValue::NewDecimal(converted, *p, *s) + AnyValue::Decimal(converted, *p, *s) }, // to self @@ -730,7 +730,7 @@ impl<'a> AnyValue<'a> { ))), #[cfg(feature = "dtype-decimal")] - Self::NewDecimal(v, _, _) => Self::Int128(v), + Self::Decimal(v, _, _) => Self::Int128(v), } } @@ -847,7 +847,7 @@ impl AnyValue<'_> { #[cfg(feature = "dtype-struct")] StructOwned(v) => v.0.hash(state), #[cfg(feature = "dtype-decimal")] - NewDecimal(v, s, p) => { + Decimal(v, s, p) => { v.hash(state); s.hash(state); p.hash(state); @@ -959,14 +959,14 @@ impl<'a> AnyValue<'a> { Duration(l + r, *lu) }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(l, lp, ls), NewDecimal(r, rp, rs)) => { + (Decimal(l, lp, ls), Decimal(r, rp, rs)) => { if (lp, ls) != (rp, rs) { unimplemented!( "adding decimals with different precisions/scales is not supported here" ); } - NewDecimal(l + r, *lp, *ls) + Decimal(l + r, *lp, *ls) }, _ => unimplemented!(), } @@ -1046,7 +1046,7 @@ impl<'a> AnyValue<'a> { unsafe { std::mem::transmute::, AnyValue<'static>>(av) } }, #[cfg(feature = "dtype-decimal")] - NewDecimal(val, s, p) => NewDecimal(val, s, p), + Decimal(val, s, p) => Decimal(val, s, p), #[cfg(feature = "dtype-categorical")] Categorical(cat, map) => CategoricalOwned(cat, map.clone()), #[cfg(feature = "dtype-categorical")] @@ -1233,7 +1233,7 @@ impl AnyValue<'_> { null_equal, ), #[cfg(feature = "dtype-decimal")] - (NewDecimal(lv, _lp, ls), NewDecimal(rv, _rp, rs)) => dec128_eq(*lv, *ls, *rv, *rs), + (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => dec128_eq(*lv, *ls, *rv, *rs), #[cfg(feature = "object")] (Object(l), Object(r)) => l == r, #[cfg(feature = "dtype-array")] @@ -1382,7 +1382,7 @@ impl PartialOrd for AnyValue<'_> { unimplemented!("ordering for Struct dtype is not supported") }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(lv, _lp, ls), NewDecimal(rv, _rp, rs)) => { + (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => { Some(dec128_cmp(*lv, *ls, *rv, *rs)) }, diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 81db20ca2801..0a73b83cf4ae 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -103,7 +103,7 @@ pub enum DataType { /// This is backed by a signed 128-bit integer which allows for up to 38 significant digits. /// Meaning max precision is 38. #[cfg(feature = "dtype-decimal")] - NewDecimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38. + Decimal(usize, usize), // (precision, scale), invariant: 1 <= precision <= 38. /// String data String, Binary, @@ -169,7 +169,7 @@ impl PartialEq for DataType { #[cfg(feature = "dtype-duration")] (Duration(tu_l), Duration(tu_r)) => tu_l == tu_r, #[cfg(feature = "dtype-decimal")] - (NewDecimal(p1, s1), NewDecimal(p2, s2)) => (p1, s1) == (p2, s2), + (Decimal(p1, s1), Decimal(p2, s2)) => (p1, s1) == (p2, s2), #[cfg(feature = "object")] (Object(lhs), Object(rhs)) => lhs == rhs, #[cfg(feature = "dtype-struct")] @@ -403,7 +403,7 @@ impl DataType { (D::Boolean, dt) | (dt, D::Boolean) => match dt { dt if dt.is_primitive_numeric() => true, #[cfg(feature = "dtype-decimal")] - D::NewDecimal(_, _) => true, + D::Decimal(_, _) => true, D::String | D::Binary => true, _ => false, }, @@ -451,7 +451,7 @@ impl DataType { Duration(_) => Int64, Time => Int64, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => Int128, + Decimal(_, _) => Int128, #[cfg(feature = "dtype-categorical")] Categorical(cats, _) => cats.physical().dtype(), #[cfg(feature = "dtype-categorical")] @@ -654,7 +654,7 @@ impl DataType { pub fn is_decimal(&self) -> bool { match self { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => true, + DataType::Decimal(_, _) => true, _ => false, } } @@ -855,7 +855,7 @@ impl DataType { Float32 => Ok(ArrowDataType::Float32), Float64 => Ok(ArrowDataType::Float64), #[cfg(feature = "dtype-decimal")] - NewDecimal(precision, scale) => { + Decimal(precision, scale) => { assert!(*precision >= 1 && *precision <= 38); Ok(ArrowDataType::Decimal(*precision, *scale)) }, @@ -974,7 +974,7 @@ impl DataType { }, (DataType::Null, DataType::Null) => Ok(false), #[cfg(feature = "dtype-decimal")] - (DataType::NewDecimal(p1, s1), DataType::NewDecimal(p2, s2)) => { + (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => { Ok((p1, s1) != (p2, s2)) }, // We don't allow the other way around, only if our current type is @@ -1071,7 +1071,7 @@ impl Display for DataType { DataType::Float32 => "f32", DataType::Float64 => "f64", #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(p, s) => return write!(f, "decimal[{p},{s}]"), + DataType::Decimal(p, s) => return write!(f, "decimal[{p},{s}]"), DataType::String => "str", DataType::Binary => "binary", DataType::Date => "date", @@ -1144,7 +1144,7 @@ impl std::fmt::Debug for DataType { } }, #[cfg(feature = "dtype-decimal")] - NewDecimal(p, s) => write!(f, "Decimal({p}, {s})"), + Decimal(p, s) => write!(f, "Decimal({p}, {s})"), #[cfg(feature = "dtype-array")] Array(inner, size) => write!(f, "Array({inner:?}, {size})"), List(inner) => write!(f, "List({inner:?})"), diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index accd8f36b98c..73b574fbca6d 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -261,7 +261,7 @@ impl DataType { } }, #[cfg(feature = "dtype-decimal")] - ArrowDataType::Decimal(precision, scale) => DataType::NewDecimal(*precision, *scale), + ArrowDataType::Decimal(precision, scale) => DataType::Decimal(*precision, *scale), ArrowDataType::Utf8View | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8 => { DataType::String }, diff --git a/crates/polars-core/src/datatypes/proptest.rs b/crates/polars-core/src/datatypes/proptest.rs index 15c5e495d01e..96e831514cb3 100644 --- a/crates/polars-core/src/datatypes/proptest.rs +++ b/crates/polars-core/src/datatypes/proptest.rs @@ -156,7 +156,7 @@ fn decimal_strategy( let scale_strategy = (0_usize..=precision); (Just(precision), scale_strategy) }) - .prop_map(|(precision, scale)| DataType::NewDecimal(precision, scale)) + .prop_map(|(precision, scale)| DataType::Decimal(precision, scale)) } fn datetime_strategy() -> impl Strategy { diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index 5b733e2712aa..c606a1cb6ea2 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -392,7 +392,7 @@ impl Debug for Series { format_array!(f, self.duration().unwrap(), &dt, self.name(), "Series") }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => { + DataType::Decimal(_, _) => { let dt = format!("{}", self.dtype()); format_array!(f, self.decimal().unwrap(), &dt, self.name(), "Series") }, @@ -1212,7 +1212,7 @@ impl Display for AnyValue<'_> { #[cfg(feature = "dtype-struct")] AnyValue::StructOwned(payload) => fmt_struct(f, &payload.0), #[cfg(feature = "dtype-decimal")] - AnyValue::NewDecimal(v, _prec, scale) => fmt_decimal(f, *v, *scale), + AnyValue::Decimal(v, _prec, scale) => fmt_decimal(f, *v, *scale), } } } diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index 58a4e70dd197..fedff4357f18 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -137,7 +137,7 @@ impl Series { Float64 => SeriesWrap(s.f64().unwrap().clone()).agg_mean(groups), dt if dt.is_primitive_numeric() => apply_method_physical_integer!(s, agg_mean, groups), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => self.cast(&Float64).unwrap().agg_mean(groups), + Decimal(_, _) => self.cast(&Float64).unwrap().agg_mean(groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() @@ -193,7 +193,7 @@ impl Series { apply_method_physical_integer!(s, agg_median, groups) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => self.cast(&Float64).unwrap().agg_median(groups), + Decimal(_, _) => self.cast(&Float64).unwrap().agg_median(groups), #[cfg(feature = "dtype-datetime")] dt @ Datetime(_, _) => self .to_physical_repr() diff --git a/crates/polars-core/src/frame/row/av_buffer.rs b/crates/polars-core/src/frame/row/av_buffer.rs index c20b1921dfad..3e52101c9456 100644 --- a/crates/polars-core/src/frame/row/av_buffer.rs +++ b/crates/polars-core/src/frame/row/av_buffer.rs @@ -141,7 +141,7 @@ impl<'a> AnyValueBuffer<'a> { AnyValue::Boolean(true) => builder.append_value("true"), AnyValue::Boolean(false) => builder.append_value("false"), #[cfg(feature = "dtype-decimal")] - AnyValue::NewDecimal(v, _p, s) => { + AnyValue::Decimal(v, _p, s) => { let mut fmt = DecimalFmtBuffer::new(); builder.append_value(fmt.format_dec128(v, s, false)); }, diff --git a/crates/polars-core/src/scalar/new.rs b/crates/polars-core/src/scalar/new.rs index 567b770ed11b..d64222fa6e1c 100644 --- a/crates/polars-core/src/scalar/new.rs +++ b/crates/polars-core/src/scalar/new.rs @@ -53,8 +53,8 @@ impl Scalar { #[cfg(feature = "dtype-decimal")] pub fn new_decimal(value: i128, precision: usize, scale: usize) -> Self { Scalar::new( - DataType::NewDecimal(precision, scale), - AnyValue::NewDecimal(value, precision, scale), + DataType::Decimal(precision, scale), + AnyValue::Decimal(value, precision, scale), ) } diff --git a/crates/polars-core/src/scalar/serde.rs b/crates/polars-core/src/scalar/serde.rs index 0d8124ec0cf9..a2ca225cbfae 100644 --- a/crates/polars-core/src/scalar/serde.rs +++ b/crates/polars-core/src/scalar/serde.rs @@ -257,7 +257,7 @@ impl TryFrom for SerializableScalar { }, #[cfg(feature = "dtype-decimal")] - AnyValue::NewDecimal(v, prec, scale) => Self::Decimal(v, prec, scale), + AnyValue::Decimal(v, prec, scale) => Self::Decimal(v, prec, scale), }; Ok(out) } diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index 6e470d7c5a4e..f648d005e15c 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -65,14 +65,14 @@ impl Series { } let dtype = if strict { match get_first_non_null_dtype(values) { - DataType::NewDecimal(mut prec, mut scale) => { + DataType::Decimal(mut prec, mut scale) => { for v in values { - if let DataType::NewDecimal(p, s) = v.dtype() { + if let DataType::Decimal(p, s) = v.dtype() { prec = prec.max(p); scale = scale.max(s); } } - DataType::NewDecimal(prec, scale) + DataType::Decimal(prec, scale) }, dt => dt, } @@ -138,7 +138,7 @@ impl Series { any_values_to_categorical(values, dt, strict)? }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { any_values_to_decimal(values, *precision, *scale, strict)?.into_series() }, DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(), @@ -515,18 +515,18 @@ fn any_values_to_decimal( scale: usize, strict: bool, ) -> PolarsResult { - let target_dtype = DataType::NewDecimal(precision, scale); + let target_dtype = DataType::Decimal(precision, scale); let mut builder = PrimitiveChunkedBuilder::::new(PlSmallStr::EMPTY, values.len()); for av in values { match av { // Allow equal or less scale. We do want to support different scales even in 'strict' mode. - AnyValue::NewDecimal(v, p, s) if *s <= scale => { + AnyValue::Decimal(v, p, s) if *s <= scale => { if *p <= precision && *s == scale { builder.append_value(*v) } else { match av.strict_cast(&target_dtype) { - Some(AnyValue::NewDecimal(i, _, _)) => builder.append_value(i), + Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i), _ => builder.append_null(), } } @@ -537,7 +537,7 @@ fn any_values_to_decimal( return Err(invalid_value_error(&target_dtype, av)); } match av.strict_cast(&target_dtype) { - Some(AnyValue::NewDecimal(i, _, _)) => builder.append_value(i), + Some(AnyValue::Decimal(i, _, _)) => builder.append_value(i), _ => builder.append_null(), } }, diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 5f8fd3bfb352..9a29b6aba24e 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -92,7 +92,7 @@ impl Series { .into_datetime(*tu, tz.clone()) .into_series(), #[cfg(feature = "dtype-decimal")] - NewDecimal(precision, scale) => Int128Chunked::from_chunks(name, chunks) + Decimal(precision, scale) => Int128Chunked::from_chunks(name, chunks) .into_decimal_unchecked(*precision, *scale) .into_series(), #[cfg(feature = "dtype-array")] diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 14db9d3556bf..ab7f70d0bc87 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -84,7 +84,7 @@ impl SeriesWrap { ListChunked::from_chunks_and_dtype_unchecked( agg_s.name().clone(), vec![Box::new(new_arr)], - DataType::List(Box::new(DataType::NewDecimal(precision, scale))), + DataType::List(Box::new(DataType::Decimal(precision, scale))), ) .into_series() } @@ -393,21 +393,21 @@ impl SeriesTrait for SeriesWrap { fn sum_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let sum = ca.sum(); - let DataType::NewDecimal(prec, scale) = self.dtype() else { + let DataType::Decimal(prec, scale) = self.dtype() else { unreachable!() }; - let av = AnyValue::NewDecimal(sum.unwrap(), *prec, *scale); + let av = AnyValue::Decimal(sum.unwrap(), *prec, *scale); Scalar::new(self.dtype().clone(), av) })) } fn min_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let min = ca.min(); - let DataType::NewDecimal(prec, scale) = self.dtype() else { + let DataType::Decimal(prec, scale) = self.dtype() else { unreachable!() }; let av = if let Some(min) = min { - AnyValue::NewDecimal(min, *prec, *scale) + AnyValue::Decimal(min, *prec, *scale) } else { AnyValue::Null }; @@ -417,11 +417,11 @@ impl SeriesTrait for SeriesWrap { fn max_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let max = ca.max(); - let DataType::NewDecimal(prec, scale) = self.dtype() else { + let DataType::Decimal(prec, scale) = self.dtype() else { unreachable!() }; let av = if let Some(m) = max { - AnyValue::NewDecimal(m, *prec, *scale) + AnyValue::Decimal(m, *prec, *scale) } else { AnyValue::Null }; diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index 53d212d2dec5..5b7d24609815 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -150,7 +150,7 @@ impl Series { ) .unwrap(), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => self.decimal().unwrap().physical().chunks()[chunk_idx] + DataType::Decimal(_, _) => self.decimal().unwrap().physical().chunks()[chunk_idx] .as_any() .downcast_ref::>() .unwrap() diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index f78e89965370..d5e93fce81fc 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -506,7 +506,7 @@ impl Series { use DataType as D; match (self.dtype(), dtype) { #[cfg(feature = "dtype-decimal")] - (D::Int128, D::NewDecimal(precision, scale)) => { + (D::Int128, D::Decimal(precision, scale)) => { let ca = self.i128().unwrap(); Ok(ca .clone() @@ -730,7 +730,7 @@ impl Series { }) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()), + Decimal(_, _) => Cow::Owned(self.decimal().unwrap().phys.clone().into_series()), List(_) => match self.list().unwrap().to_physical_repr() { Cow::Borrowed(_) => Cow::Borrowed(self), Cow::Owned(ca) => Cow::Owned(ca.into_series()), @@ -824,7 +824,7 @@ impl Series { .clone() .into_decimal(precision, scale)? .into_series()), - DataType::NewDecimal(cur_prec, cur_scale) + DataType::Decimal(cur_prec, cur_scale) if scale == *cur_scale && precision >= *cur_prec => { Ok(self) @@ -1171,23 +1171,23 @@ mod test { #[cfg(feature = "dtype-decimal")] fn series_append_decimal() { let s1 = Series::new("a".into(), &[1.1, 2.3]) - .cast(&DataType::NewDecimal(38, 2)) + .cast(&DataType::Decimal(38, 2)) .unwrap(); let s2 = Series::new("b".into(), &[3]) - .cast(&DataType::NewDecimal(38, 0)) + .cast(&DataType::Decimal(38, 0)) .unwrap(); { let mut s1 = s1.clone(); s1.append(&s2).unwrap(); assert_eq!(s1.len(), 3); - assert_eq!(s1.get(2).unwrap(), AnyValue::NewDecimal(300, 38, 2)); + assert_eq!(s1.get(2).unwrap(), AnyValue::Decimal(300, 38, 2)); } { let mut s2 = s2; s2.extend(&s1).unwrap(); - assert_eq!(s2.get(2).unwrap(), AnyValue::NewDecimal(2, 38, 0)); + assert_eq!(s2.get(2).unwrap(), AnyValue::Decimal(2, 38, 0)); } } diff --git a/crates/polars-core/src/series/ops/downcast.rs b/crates/polars-core/src/series/ops/downcast.rs index d44d80e0b43e..8afe3c14d827 100644 --- a/crates/polars-core/src/series/ops/downcast.rs +++ b/crates/polars-core/src/series/ops/downcast.rs @@ -151,7 +151,7 @@ impl Series { /// Unpack to [`ChunkedArray`] of dtype [`DataType::Decimal`] #[cfg(feature = "dtype-decimal")] pub fn try_decimal(&self) -> Option<&DecimalChunked> { - try_unpack_chunked!(self, DataType::NewDecimal(_, _) => DecimalChunked) + try_unpack_chunked!(self, DataType::Decimal(_, _) => DecimalChunked) } /// Unpack to [`ChunkedArray`] of dtype list diff --git a/crates/polars-core/src/series/ops/null.rs b/crates/polars-core/src/series/ops/null.rs index 8c430a7f0568..9fa4c9272846 100644 --- a/crates/polars-core/src/series/ops/null.rs +++ b/crates/polars-core/src/series/ops/null.rs @@ -45,7 +45,7 @@ impl Series { .into_time() .into_series(), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => Int128Chunked::full_null(name, size) + DataType::Decimal(precision, scale) => Int128Chunked::full_null(name, size) .into_decimal_unchecked(*precision, *scale) .into_series(), #[cfg(feature = "dtype-struct")] diff --git a/crates/polars-core/src/utils/supertype.rs b/crates/polars-core/src/utils/supertype.rs index 4bf92bea7cda..c3957200cd1a 100644 --- a/crates/polars-core/src/utils/supertype.rs +++ b/crates/polars-core/src/utils/supertype.rs @@ -468,13 +468,13 @@ pub fn get_supertype_with_options( Some(Struct(new_fields)) } #[cfg(feature = "dtype-decimal")] - (NewDecimal(p1, s1), NewDecimal(p2, s2)) => { - Some(NewDecimal((*p1).max(*p2), (*s1).max(*s2))) + (Decimal(p1, s1), Decimal(p2, s2)) => { + Some(Decimal((*p1).max(*p2), (*s1).max(*s2))) }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(_, _), Float32 | Float64) => Some(Float64), + (Decimal(_, _), Float32 | Float64) => Some(Float64), #[cfg(feature = "dtype-decimal")] - (NewDecimal(prec, scale), dt) if dt.is_signed_integer() || dt.is_unsigned_integer() => { + (Decimal(prec, scale), dt) if dt.is_signed_integer() || dt.is_unsigned_integer() => { use polars_compute::decimal::{i128_to_dec128, DEC128_MAX_PREC}; let fits = |v| { i128_to_dec128(v, *prec, *scale).is_some() }; let fits_orig_prec_scale = match dt { @@ -491,9 +491,9 @@ pub fn get_supertype_with_options( _ => unreachable!(), }; if fits_orig_prec_scale { - Some(NewDecimal(*prec, *scale)) + Some(Decimal(*prec, *scale)) } else { - Some(NewDecimal(DEC128_MAX_PREC, *scale)) + Some(Decimal(DEC128_MAX_PREC, *scale)) } } _ => None, diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index 5021f6e25cd5..639efbaa17f6 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -74,7 +74,7 @@ pub fn apply_operator(left: &Column, right: &Column, op: Operator) -> PolarsResu Operator::Divide => left / right, Operator::TrueDivide => match left.dtype() { #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => left / right, + Decimal(_, _) => left / right, Duration(_) | Date | Datetime(_, _) | Float32 | Float64 => left / right, #[cfg(feature = "dtype-array")] Array(..) => left / right, diff --git a/crates/polars-expr/src/groups/mod.rs b/crates/polars-expr/src/groups/mod.rs index 5778e5c44f05..c78c61c00940 100644 --- a/crates/polars-expr/src/groups/mod.rs +++ b/crates/polars-expr/src/groups/mod.rs @@ -83,7 +83,7 @@ pub fn new_hash_grouper(key_schema: Arc) -> Box { }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => { + DataType::Decimal(_, _) => { Box::new(single_key::SingleKeyHashGrouper::::new()) }, #[cfg(feature = "dtype-categorical")] diff --git a/crates/polars-expr/src/hash_keys.rs b/crates/polars-expr/src/hash_keys.rs index cadff93f9bbe..84c28ea00bdd 100644 --- a/crates/polars-expr/src/hash_keys.rs +++ b/crates/polars-expr/src/hash_keys.rs @@ -27,7 +27,7 @@ pub fn hash_keys_variant_for_dtype(dt: &DataType) -> HashKeysVariant { dt if dt.is_primitive_numeric() | dt.is_temporal() => HashKeysVariant::Single, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => HashKeysVariant::Single, + DataType::Decimal(_, _) => HashKeysVariant::Single, #[cfg(feature = "dtype-categorical")] DataType::Enum(_, _) | DataType::Categorical(_, _) => HashKeysVariant::Single, @@ -76,7 +76,7 @@ macro_rules! downcast_single_key_ca { DataType::Duration(..) => { let $ca = $self.duration().unwrap().physical(); $($body)* }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(..) => { let $ca = $self.decimal().unwrap().physical(); $($body)* }, + DataType::Decimal(..) => { let $ca = $self.decimal().unwrap().physical(); $($body)* }, #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { match dt.cat_physical().unwrap() { diff --git a/crates/polars-expr/src/hot_groups/mod.rs b/crates/polars-expr/src/hot_groups/mod.rs index b527f9b88205..a7a19be0c2de 100644 --- a/crates/polars-expr/src/hot_groups/mod.rs +++ b/crates/polars-expr/src/hot_groups/mod.rs @@ -83,7 +83,7 @@ pub fn new_hash_hot_grouper(key_schema: Arc, num_groups: usize) -> Box Box::new(SK::::new(dt, ng)), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => Box::new(SK::::new(dt, ng)), + DataType::Decimal(_, _) => Box::new(SK::::new(dt, ng)), #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| { diff --git a/crates/polars-expr/src/idx_table/mod.rs b/crates/polars-expr/src/idx_table/mod.rs index 5e8daeb0e992..b61e38402d7c 100644 --- a/crates/polars-expr/src/idx_table/mod.rs +++ b/crates/polars-expr/src/idx_table/mod.rs @@ -104,7 +104,7 @@ pub fn new_idx_table(key_schema: Arc) -> Box { DataType::Time => Box::new(SKIT::::new()), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => Box::new(SKIT::::new()), + DataType::Decimal(_, _) => Box::new(SKIT::::new()), #[cfg(feature = "dtype-categorical")] dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => { with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| { diff --git a/crates/polars-expr/src/reduce/mean.rs b/crates/polars-expr/src/reduce/mean.rs index 34879e57b5a0..ba5c738e83da 100644 --- a/crates/polars-expr/src/reduce/mean.rs +++ b/crates/polars-expr/src/reduce/mean.rs @@ -17,7 +17,7 @@ pub fn new_mean_reduction(dtype: DataType) -> Box { }) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => Box::new(VGR::new(dtype, NumMeanReducer::(PhantomData))), + Decimal(_, _) => Box::new(VGR::new(dtype, NumMeanReducer::(PhantomData))), // For compatibility with the current engine, should probably be an error. String | Binary => Box::new(super::NullGroupedReduction::new(dtype)), @@ -43,7 +43,7 @@ fn finish_output(values: Vec<(f64, usize)>, dtype: &DataType) -> Series { ca.into_series() }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_prec, scale) => { + DataType::Decimal(_prec, scale) => { let inv_scale_factor = 1.0 / 10u128.pow(*scale as u32) as f64; let ca: Float64Chunked = values .into_iter() diff --git a/crates/polars-expr/src/reduce/min_max.rs b/crates/polars-expr/src/reduce/min_max.rs index 44a6991c2c09..a199230888a6 100644 --- a/crates/polars-expr/src/reduce/min_max.rs +++ b/crates/polars-expr/src/reduce/min_max.rs @@ -36,7 +36,7 @@ pub fn new_min_reduction(dtype: DataType, propagate_nans: bool) -> Box Box::new(VMGR::new(dtype, NumReducer::>::new())), + Decimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::>::new())), #[cfg(feature = "dtype-categorical")] Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| { Box::new(VMGR::new(dtype.clone(), CatMinReducer::<$C>(map.clone(), PhantomData))) @@ -68,7 +68,7 @@ pub fn new_max_reduction(dtype: DataType, propagate_nans: bool) -> Box Box::new(VMGR::new(dtype, NumReducer::>::new())), + Decimal(_, _) => Box::new(VMGR::new(dtype, NumReducer::>::new())), #[cfg(feature = "dtype-categorical")] Categorical(cats, map) => with_match_categorical_physical_type!(cats.physical(), |$C| { Box::new(VMGR::new(dtype.clone(), CatMaxReducer::<$C>(map.clone(), PhantomData))) diff --git a/crates/polars-expr/src/reduce/sum.rs b/crates/polars-expr/src/reduce/sum.rs index 57214d8eabcd..9d2fe4cf8e8d 100644 --- a/crates/polars-expr/src/reduce/sum.rs +++ b/crates/polars-expr/src/reduce/sum.rs @@ -53,7 +53,7 @@ pub fn new_sum_reduction(dtype: DataType) -> Box { }) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), + Decimal(_, _) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), Duration(_) => Box::new(VGR::new(dtype, NumSumReducer::(PhantomData))), // For compatibility with the current engine, should probably be an error. String | Binary => Box::new(super::NullGroupedReduction::new(dtype)), diff --git a/crates/polars-expr/src/reduce/var_std.rs b/crates/polars-expr/src/reduce/var_std.rs index 977ccaad86b5..f48508e2f86b 100644 --- a/crates/polars-expr/src/reduce/var_std.rs +++ b/crates/polars-expr/src/reduce/var_std.rs @@ -22,7 +22,7 @@ pub fn new_var_std_reduction(dtype: DataType, is_std: bool, ddof: u8) -> Box Box::new(VGR::new( + Decimal(_, _) => Box::new(VGR::new( dtype, VarStdReducer:: { is_std, diff --git a/crates/polars-io/src/catalog/unity/schema.rs b/crates/polars-io/src/catalog/unity/schema.rs index a81492140df9..f73d600273bf 100644 --- a/crates/polars-io/src/catalog/unity/schema.rs +++ b/crates/polars-io/src/catalog/unity/schema.rs @@ -219,7 +219,7 @@ fn parse_type_text(type_text: &str) -> PolarsResult { let precision: usize = precision.parse().ok()?; let scale: usize = scale.parse().ok()?; - Some(DataType::NewDecimal(precision, scale)) + Some(DataType::Decimal(precision, scale)) })() .ok_or_else(|| { polars_err!( @@ -297,7 +297,7 @@ fn dtype_to_type_text(dtype: &DataType) -> PolarsResult { Null => S!("null"), - NewDecimal(precision, scale) => { + Decimal(precision, scale) => { format_pl_smallstr!("decimal({},{})", precision, scale) }, @@ -373,7 +373,7 @@ fn dtype_to_type_name(dtype: &DataType) -> PolarsResult { Null => S!("NULL"), - NewDecimal(..) => S!("DECIMAL"), + Decimal(..) => S!("DECIMAL"), List(inner) => { if get_list_map_type(inner).is_some() { @@ -437,7 +437,7 @@ fn dtype_to_type_json(dtype: &DataType) -> PolarsResult { Null => S!("null"), - NewDecimal(..) => ColumnTypeJsonType::TypeName(dtype_to_type_text(dtype)?), + Decimal(..) => ColumnTypeJsonType::TypeName(dtype_to_type_text(dtype)?), List(inner) => { let out = if let Some((key_type, value_type)) = get_list_map_type(inner) { diff --git a/crates/polars-io/src/csv/read/reader.rs b/crates/polars-io/src/csv/read/reader.rs index c978c063e928..635e15c904c6 100644 --- a/crates/polars-io/src/csv/read/reader.rs +++ b/crates/polars-io/src/csv/read/reader.rs @@ -219,7 +219,7 @@ pub fn prepare_csv_schema( PolarsResult::Ok(fld) }, #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => { + Decimal(_, _) => { fields_to_cast.push(fld.clone()); fld.coerce(String); PolarsResult::Ok(fld) diff --git a/crates/polars-io/src/csv/write/write_impl/serializer.rs b/crates/polars-io/src/csv/write/write_impl/serializer.rs index fd0d74019436..5936d3485eb2 100644 --- a/crates/polars-io/src/csv/write/write_impl/serializer.rs +++ b/crates/polars-io/src/csv/write/write_impl/serializer.rs @@ -906,7 +906,7 @@ pub(super) fn serializer_for<'a>( }) }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, scale) => { + DataType::Decimal(_, scale) => { quote_wrapper!(decimal_serializer, *scale) }, _ => { diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index b2cb878710b3..2321ad455283 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -180,7 +180,7 @@ impl TakeChunked for Series { #[cfg(feature = "object")] Object(_) => take_unchecked_object(self, by, sorted), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => { + Decimal(_, _) => { let ca = self.decimal().unwrap(); let out = ca.phys.take_chunked_unchecked(by, sorted, avoid_sharing); out.into_decimal_unchecked(ca.precision(), ca.scale()) @@ -280,7 +280,7 @@ impl TakeChunked for Series { #[cfg(feature = "object")] Object(_) => take_opt_unchecked_object(self, by, avoid_sharing), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => { + Decimal(_, _) => { let ca = self.decimal().unwrap(); let out = ca.phys.take_opt_chunked_unchecked(by, avoid_sharing); out.into_decimal_unchecked(ca.precision(), ca.scale()) diff --git a/crates/polars-ops/src/series/ops/abs.rs b/crates/polars-ops/src/series/ops/abs.rs index 19c87cc3ef1d..0046b8031fcc 100644 --- a/crates/polars-ops/src/series/ops/abs.rs +++ b/crates/polars-ops/src/series/ops/abs.rs @@ -15,7 +15,7 @@ pub fn abs(s: &Series) -> PolarsResult { Float32 => s.f32().unwrap().wrapping_abs().into_series(), Float64 => s.f64().unwrap().wrapping_abs().into_series(), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => { + Decimal(_, _) => { let ca = s.decimal().unwrap(); let precision = ca.precision(); let scale = ca.scale(); diff --git a/crates/polars-ops/src/series/ops/clip.rs b/crates/polars-ops/src/series/ops/clip.rs index 85cafe80050a..e3c400f4e423 100644 --- a/crates/polars-ops/src/series/ops/clip.rs +++ b/crates/polars-ops/src/series/ops/clip.rs @@ -43,7 +43,7 @@ pub fn clip(s: &Series, min: &Series, max: &Series) -> PolarsResult { let out = clip_helper_both_bounds(ca, min, max).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, @@ -77,7 +77,7 @@ pub fn clip_max(s: &Series, max: &Series) -> PolarsResult { let out = clip_helper_single_bound(ca, max, num_traits::clamp_max).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, @@ -111,7 +111,7 @@ pub fn clip_min(s: &Series, min: &Series) -> PolarsResult { let out = clip_helper_single_bound(ca, min, num_traits::clamp_min).into_series(); match original_type { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let phys = out.i128()?.as_ref().clone(); Ok(phys.into_decimal_unchecked(*precision, *scale).into_series()) }, diff --git a/crates/polars-ops/src/series/ops/cum_agg.rs b/crates/polars-ops/src/series/ops/cum_agg.rs index 014bec6dc825..42b56a2db195 100644 --- a/crates/polars-ops/src/series/ops/cum_agg.rs +++ b/crates/polars-ops/src/series/ops/cum_agg.rs @@ -311,7 +311,7 @@ pub fn cum_sum_with_init( Float32 => cum_sum_numeric(s.f32()?, reverse, init.extract()).into_series(), Float64 => cum_sum_numeric(s.f64()?, reverse, init.extract()).into_series(), #[cfg(feature = "dtype-decimal")] - NewDecimal(_precision, scale) => { + Decimal(_precision, scale) => { use polars_compute::decimal::DEC128_MAX_PREC; let ca = s.decimal().unwrap().physical(); cum_sum_decimal(ca, reverse, init.clone().to_physical().extract())? @@ -347,7 +347,7 @@ pub fn cum_min_with_init( Ok(cum_min_bool(s.bool()?, reverse, init.extract_bool()).into_series()) }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let ca = s.decimal().unwrap().physical(); let out = cum_min_numeric(ca, reverse, init.clone().to_physical().extract()) .into_decimal_unchecked(*precision, *scale) @@ -385,7 +385,7 @@ pub fn cum_max_with_init( Ok(cum_max_bool(s.bool()?, reverse, init.extract_bool()).into_series()) }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let ca = s.decimal().unwrap().physical(); let out = cum_max_numeric(ca, reverse, init.clone().to_physical().extract()) .into_decimal_unchecked(*precision, *scale) diff --git a/crates/polars-ops/src/series/ops/index_of.rs b/crates/polars-ops/src/series/ops/index_of.rs index 9fea4e9c5c43..e1580db70a21 100644 --- a/crates/polars-ops/src/series/ops/index_of.rs +++ b/crates/polars-ops/src/series/ops/index_of.rs @@ -151,7 +151,7 @@ pub fn index_of(series: &Series, needle: Scalar) -> PolarsResult> // to_physical #[cfg(feature = "dtype-decimal")] - DT::NewDecimal(..) => unreachable!(), + DT::Decimal(..) => unreachable!(), #[cfg(feature = "dtype-categorical")] DT::Categorical(..) | DT::Enum(..) => unreachable!(), DT::Date | DT::Datetime(..) | DT::Duration(..) | DT::Time => unreachable!(), diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index 53fd15fbc9f8..175016615360 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -127,7 +127,7 @@ fn interpolate_nearest(s: &Series) -> Series { let out = downcast_as_macro_arg_physical!(s, dispatch); match logical { #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => unsafe { + DataType::Decimal(_, _) => unsafe { out.from_physical_unchecked(logical).unwrap() }, _ => out.cast(logical).unwrap(), @@ -151,7 +151,7 @@ fn interpolate_linear(s: &Series) -> Series { #[cfg(feature = "dtype-decimal")] { - if matches!(logical, DataType::NewDecimal(_, _)) { + if matches!(logical, DataType::Decimal(_, _)) { let out = linear_interp_signed(s.i128().unwrap()); return unsafe { out.from_physical_unchecked(logical).unwrap() }; } diff --git a/crates/polars-ops/src/series/ops/is_in.rs b/crates/polars-ops/src/series/ops/is_in.rs index cf7f110f4022..6041ef942574 100644 --- a/crates/polars-ops/src/series/ops/is_in.rs +++ b/crates/polars-ops/src/series/ops/is_in.rs @@ -509,7 +509,7 @@ fn is_in_decimal( other: &Series, nulls_equal: bool, ) -> PolarsResult { - let Some(DataType::NewDecimal(other_precision, other_scale)) = other.dtype().inner_dtype() + let Some(DataType::Decimal(other_precision, other_scale)) = other.dtype().inner_dtype() else { polars_bail!(opq = is_in, ca_in.dtype(), other.dtype()); }; @@ -651,7 +651,7 @@ pub fn is_in(s: &Series, other: &Series, nulls_equal: bool) -> PolarsResult is_in_null(s, other, nulls_equal), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(_, _) => { + DataType::Decimal(_, _) => { let ca_in = s.decimal()?; is_in_decimal(ca_in, other, nulls_equal) }, diff --git a/crates/polars-ops/src/series/ops/negate.rs b/crates/polars-ops/src/series/ops/negate.rs index 95c2b26b143d..d47698e09573 100644 --- a/crates/polars-ops/src/series/ops/negate.rs +++ b/crates/polars-ops/src/series/ops/negate.rs @@ -12,7 +12,7 @@ pub fn negate(s: &Series) -> PolarsResult { Float32 => s.f32().unwrap().wrapping_neg().into_series(), Float64 => s.f64().unwrap().wrapping_neg().into_series(), #[cfg(feature = "dtype-decimal")] - NewDecimal(_, _) => { + Decimal(_, _) => { let ca = s.decimal().unwrap(); let precision = ca.precision(); let scale = ca.scale(); diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs b/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs index 6eb14d7d070c..09cd0b8b3625 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/schema.rs @@ -502,7 +502,7 @@ impl<'a> FieldsMapper<'a> { #[cfg(feature = "dtype-time")] dt @ DataType::Time => dt.clone(), #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(..) => DataType::Float64, + DataType::Decimal(..) => DataType::Float64, // All other types get mapped to a single `null` of the same type. dt => dt.clone(), @@ -530,7 +530,7 @@ impl<'a> FieldsMapper<'a> { let should_coerce = match dt { DataType::Float32 => false, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(..) => coerce_decimal, + DataType::Decimal(..) => coerce_decimal, DataType::Boolean => true, dt => dt.is_primitive_numeric(), }; diff --git a/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs b/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs index be683e809ee9..389b1df193d4 100644 --- a/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs +++ b/crates/polars-plan/src/plans/aexpr/function_expr/strings.rs @@ -193,7 +193,7 @@ impl IRStringFunction { #[cfg(feature = "nightly")] Titlecase => mapper.with_same_dtype(), #[cfg(feature = "dtype-decimal")] - ToDecimal { scale } => mapper.with_dtype(DataType::NewDecimal(DEC128_MAX_PREC, *scale)), + ToDecimal { scale } => mapper.with_dtype(DataType::Decimal(DEC128_MAX_PREC, *scale)), #[cfg(feature = "string_encoding")] HexEncode => mapper.with_same_dtype(), #[cfg(feature = "binary_encoding")] diff --git a/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs b/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs index e0b431a0b41f..072b25498638 100644 --- a/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs +++ b/crates/polars-plan/src/plans/aexpr/predicates/column_expr.rs @@ -60,7 +60,7 @@ pub fn aexpr_to_column_predicates( continue; }, #[cfg(feature = "dtype-decimal")] - D::NewDecimal(_, _) => { + D::Decimal(_, _) => { is_sumwise_complete = false; continue; }, diff --git a/crates/polars-plan/src/plans/aexpr/schema.rs b/crates/polars-plan/src/plans/aexpr/schema.rs index bd4cd3e8bb5e..19f52396af39 100644 --- a/crates/polars-plan/src/plans/aexpr/schema.rs +++ b/crates/polars-plan/src/plans/aexpr/schema.rs @@ -490,8 +490,8 @@ fn get_arithmetic_field( )?) }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { - NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) + (Decimal(_, scale_left), Decimal(_, scale_right)) => { + Decimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, (left, right) => try_get_supertype(left, right)?, } @@ -552,8 +552,8 @@ fn get_arithmetic_field( )?) }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { - NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) + (Decimal(_, scale_left), Decimal(_, scale_right)) => { + Decimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, (left, right) => try_get_supertype(left, right)?, } @@ -601,8 +601,8 @@ fn get_arithmetic_field( }, }, #[cfg(feature = "dtype-decimal")] - (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { - let dtype = NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)); + (Decimal(_, scale_left), Decimal(_, scale_right)) => { + let dtype = Decimal(DEC128_MAX_PREC, *scale_left.max(scale_right)); left_field.coerce(dtype); return Ok(left_field); }, @@ -756,8 +756,8 @@ fn get_truediv_dtype(left_dtype: &DataType, right_dtype: &DataType) -> PolarsRes InvalidOperation: "division with 'String' datatypes is not allowed" ), #[cfg(feature = "dtype-decimal")] - (NewDecimal(_, scale_left), NewDecimal(_, scale_right)) => { - NewDecimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) + (Decimal(_, scale_left), Decimal(_, scale_right)) => { + Decimal(DEC128_MAX_PREC, *scale_left.max(scale_right)) }, #[cfg(feature = "dtype-u8")] (UInt8 | Int8, Float32) => Float32, diff --git a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs index f72b3d0af075..d84f276debb6 100644 --- a/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs +++ b/crates/polars-plan/src/plans/conversion/type_coercion/is_in.rs @@ -94,14 +94,14 @@ See https://github.com/pola-rs/polars/issues/22149 for more information." }, #[cfg(feature = "dtype-decimal")] - (DataType::NewDecimal(_, _), dt) if dt.is_primitive_numeric() => { + (DataType::Decimal(_, _), dt) if dt.is_primitive_numeric() => { IsInTypeCoercionResult::OtherCast { dtype: cast_type, strict: false, } }, #[cfg(feature = "dtype-decimal")] - (DataType::NewDecimal(_, _), _) | (_, DataType::NewDecimal(_, _)) => { + (DataType::Decimal(_, _), _) | (_, DataType::Decimal(_, _)) => { polars_bail!(InvalidOperation: "'{op}' cannot check for {:?} values in {:?} data", &type_other, &type_left) }, // can't check for more granular time_unit in less-granular time_unit data, diff --git a/crates/polars-python/src/conversion/any_value.rs b/crates/polars-python/src/conversion/any_value.rs index d49b55e6bf39..518c03234ae1 100644 --- a/crates/polars-python/src/conversion/any_value.rs +++ b/crates/polars-python/src/conversion/any_value.rs @@ -121,7 +121,7 @@ pub(crate) fn any_value_into_py_object<'py>( }, AnyValue::Binary(v) => PyBytes::new(py, v).into_bound_py_any(py), AnyValue::BinaryOwned(v) => PyBytes::new(py, &v).into_bound_py_any(py), - AnyValue::NewDecimal(v, prec, scale) => { + AnyValue::Decimal(v, prec, scale) => { let convert = utils.getattr(intern!(py, "to_py_decimal"))?; let mut buf = DecimalFmtBuffer::new(); let s = buf.format_dec128(v, scale, false); @@ -351,7 +351,7 @@ pub(crate) fn py_object_to_any_value( if sign > 0 { v = -v; // Won't overflow since -i128::MAX > i128::MIN } - Ok(AnyValue::NewDecimal(v, DEC128_MAX_PREC, scale)) + Ok(AnyValue::Decimal(v, DEC128_MAX_PREC, scale)) } fn get_list(ob: &Bound<'_, PyAny>, strict: bool) -> PyResult> { diff --git a/crates/polars-python/src/conversion/mod.rs b/crates/polars-python/src/conversion/mod.rs index aebbec3d8e60..7525438a091a 100644 --- a/crates/polars-python/src/conversion/mod.rs +++ b/crates/polars-python/src/conversion/mod.rs @@ -226,7 +226,7 @@ impl<'py> IntoPyObject<'py> for &Wrap { let class = pl.getattr(intern!(py, "Float64"))?; class.call0() }, - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let class = pl.getattr(intern!(py, "Decimal"))?; let args = (*precision, *scale); class.call1(args) @@ -439,7 +439,7 @@ impl<'py> FromPyObject<'py> for Wrap { let precision = ob.getattr(intern!(py, "precision"))?.extract()?; let scale = ob.getattr(intern!(py, "scale"))?.extract()?; dec128_verify_prec_scale(precision, scale).map_err(to_py_err)?; - DataType::NewDecimal(precision, scale) + DataType::Decimal(precision, scale) }, "List" => { let inner = ob.getattr(intern!(py, "inner")).unwrap(); diff --git a/crates/polars-python/src/interop/numpy/to_numpy_series.rs b/crates/polars-python/src/interop/numpy/to_numpy_series.rs index 9bd40eb4349d..a40d6a738410 100644 --- a/crates/polars-python/src/interop/numpy/to_numpy_series.rs +++ b/crates/polars-python/src/interop/numpy/to_numpy_series.rs @@ -257,7 +257,7 @@ fn series_to_numpy_with_copy(py: Python<'_>, s: &Series, writable: bool) -> PyOb PyArray1::from_iter(py, values).into_py_any(py).unwrap() }) }, - NewDecimal(_, _) => { + Decimal(_, _) => { let ca = s.decimal().unwrap(); let values = decimal_to_pyobject_iter(py, ca) .unwrap() diff --git a/crates/polars-python/src/series/comparison.rs b/crates/polars-python/src/series/comparison.rs index f6874e5829da..91896275765c 100644 --- a/crates/polars-python/src/series/comparison.rs +++ b/crates/polars-python/src/series/comparison.rs @@ -173,7 +173,7 @@ macro_rules! impl_decimal { fn $name(&self, py: Python<'_>, rhs: PyDecimal) -> PyResult { let rhs = Series::new( PlSmallStr::from_static("decimal"), - &[AnyValue::NewDecimal(rhs.0, rhs.1, rhs.2)], + &[AnyValue::Decimal(rhs.0, rhs.1, rhs.2)], ); py.enter_polars_series(|| self.series.read().$method(&rhs)) } diff --git a/crates/polars-python/src/series/export.rs b/crates/polars-python/src/series/export.rs index 4f93d37d3286..015a567c156c 100644 --- a/crates/polars-python/src/series/export.rs +++ b/crates/polars-python/src/series/export.rs @@ -90,7 +90,7 @@ impl PySeries { let ca = series.datetime().map_err(PyPolarsErr::from)?; return Wrap(ca).into_bound_py_any(py); }, - DataType::NewDecimal(_, _) => { + DataType::Decimal(_, _) => { let ca = series.decimal().map_err(PyPolarsErr::from)?; return Wrap(ca).into_bound_py_any(py); }, diff --git a/crates/polars-python/src/series/import.rs b/crates/polars-python/src/series/import.rs index c012cd473ec8..4b82e41dff65 100644 --- a/crates/polars-python/src/series/import.rs +++ b/crates/polars-python/src/series/import.rs @@ -213,7 +213,7 @@ impl PySeries { Series::from_chunks_and_dtype_unchecked( PlSmallStr::EMPTY, vec![PrimitiveArray::::from_vec(out).boxed()], - &DataType::Decimal(Some(precision), Some(scale)), + &DataType::NewDecimal(precision, scale), ) })) } diff --git a/crates/polars-python/src/series/map.rs b/crates/polars-python/src/series/map.rs index d283ca4ece4d..14adc5368ee5 100644 --- a/crates/polars-python/src/series/map.rs +++ b/crates/polars-python/src/series/map.rs @@ -67,7 +67,7 @@ impl PySeries { | DataType::Binary | DataType::Array(_, _) | DataType::Time - | DataType::NewDecimal(_, _) + | DataType::Decimal(_, _) ) || !skip_nulls { let mut avs = Vec::with_capacity(series.len()); diff --git a/crates/polars-sql/src/types.rs b/crates/polars-sql/src/types.rs index 319b8e19c2fb..1ec23e05af4e 100644 --- a/crates/polars-sql/src/types.rs +++ b/crates/polars-sql/src/types.rs @@ -132,10 +132,10 @@ pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult match *info { ExactNumberInfo::PrecisionAndScale(p, s) => { - DataType::NewDecimal(p as usize, s as usize) + DataType::Decimal(p as usize, s as usize) }, - ExactNumberInfo::Precision(p) => DataType::NewDecimal(p as usize, 0), - ExactNumberInfo::None => DataType::NewDecimal(38, 9), + ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0), + ExactNumberInfo::None => DataType::Decimal(38, 9), }, // --------------------------------- diff --git a/crates/polars-testing/src/asserts/series.rs b/crates/polars-testing/src/asserts/series.rs index a0698eeec674..9c4cbaf2d537 100644 --- a/crates/polars-testing/src/asserts/series.rs +++ b/crates/polars-testing/src/asserts/series.rs @@ -601,10 +601,10 @@ mod tests { #[should_panic(expected = "exact value mismatch")] fn test_series_decimal_values_mismatch() { let s1 = Series::new("".into(), &[1, 2]) - .cast(&DataType::NewDecimal(10, 2)) + .cast(&DataType::Decimal(10, 2)) .unwrap(); let s2 = Series::new("".into(), &[1, 3]) - .cast(&DataType::NewDecimal(10, 2)) + .cast(&DataType::Decimal(10, 2)) .unwrap(); assert_series_equal!(&s1, &s2); @@ -613,10 +613,10 @@ mod tests { #[test] fn test_series_decimal_values_match() { let s1 = Series::new("".into(), &[1, 2]) - .cast(&DataType::NewDecimal(10, 2)) + .cast(&DataType::Decimal(10, 2)) .unwrap(); let s2 = Series::new("".into(), &[1, 2]) - .cast(&DataType::NewDecimal(10, 2)) + .cast(&DataType::Decimal(10, 2)) .unwrap(); assert_series_equal!(&s1, &s2); diff --git a/crates/polars/tests/it/lazy/group_by.rs b/crates/polars/tests/it/lazy/group_by.rs index 98b07b335b5d..31d33100615a 100644 --- a/crates/polars/tests/it/lazy/group_by.rs +++ b/crates/polars/tests/it/lazy/group_by.rs @@ -141,7 +141,7 @@ fn test_logical_mean_partitioned_group_by_block() -> PolarsResult<()> { let out = df .lazy() - .with_column(col("decimal").cast(DataType::NewDecimal(38, 2))) + .with_column(col("decimal").cast(DataType::Decimal(38, 2))) .with_column(col("duration").cast(DataType::Duration(TimeUnit::Microseconds))) .group_by([col("decimal")]) .agg([col("duration").mean()]) diff --git a/pyo3-polars/pyo3-polars/src/types.rs b/pyo3-polars/pyo3-polars/src/types.rs index 34a58995e7d8..5aa04f60c32c 100644 --- a/pyo3-polars/pyo3-polars/src/types.rs +++ b/pyo3-polars/pyo3-polars/src/types.rs @@ -465,7 +465,7 @@ impl<'py> IntoPyObject<'py> for PyDataType { class.call0() }, #[cfg(feature = "dtype-decimal")] - DataType::NewDecimal(precision, scale) => { + DataType::Decimal(precision, scale) => { let class = pl.getattr(intern!(py, "Decimal")).unwrap(); let args = (*precision, *scale); class.call1(args) @@ -683,7 +683,7 @@ impl<'py> FromPyObject<'py> for PyDataType { "Decimal" => { let precision = ob.getattr(intern!(py, "precision"))?.extract()?; let scale = ob.getattr(intern!(py, "scale"))?.extract()?; - DataType::NewDecimal(precision, scale) + DataType::Decimal(precision, scale) }, "List" => { let inner = ob.getattr(intern!(py, "inner")).unwrap(); From 7b5f95bf5e135eab76ac7295d2777bef9360d802 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 18 Sep 2025 17:42:34 +0200 Subject: [PATCH 03/18] Fmt --- crates/polars-core/src/datatypes/any_value.rs | 4 +--- crates/polars-core/src/datatypes/dtype.rs | 4 +--- crates/polars-ops/src/series/ops/interpolation/interpolate.rs | 4 +--- crates/polars-ops/src/series/ops/is_in.rs | 3 +-- crates/polars-sql/src/types.rs | 4 +--- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/crates/polars-core/src/datatypes/any_value.rs b/crates/polars-core/src/datatypes/any_value.rs index 111f9f7f5109..d0bf63a4cdd6 100644 --- a/crates/polars-core/src/datatypes/any_value.rs +++ b/crates/polars-core/src/datatypes/any_value.rs @@ -1382,9 +1382,7 @@ impl PartialOrd for AnyValue<'_> { unimplemented!("ordering for Struct dtype is not supported") }, #[cfg(feature = "dtype-decimal")] - (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => { - Some(dec128_cmp(*lv, *ls, *rv, *rs)) - }, + (Decimal(lv, _lp, ls), Decimal(rv, _rp, rs)) => Some(dec128_cmp(*lv, *ls, *rv, *rs)), (_, _) => { unimplemented!( diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 0a73b83cf4ae..ad9b65396341 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -974,9 +974,7 @@ impl DataType { }, (DataType::Null, DataType::Null) => Ok(false), #[cfg(feature = "dtype-decimal")] - (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => { - Ok((p1, s1) != (p2, s2)) - }, + (DataType::Decimal(p1, s1), DataType::Decimal(p2, s2)) => Ok((p1, s1) != (p2, s2)), // We don't allow the other way around, only if our current type is // null and the schema isn't we allow it. (DataType::Null, _) => Ok(true), diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index 175016615360..f3bc1f3af9f1 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -127,9 +127,7 @@ fn interpolate_nearest(s: &Series) -> Series { let out = downcast_as_macro_arg_physical!(s, dispatch); match logical { #[cfg(feature = "dtype-decimal")] - DataType::Decimal(_, _) => unsafe { - out.from_physical_unchecked(logical).unwrap() - }, + DataType::Decimal(_, _) => unsafe { out.from_physical_unchecked(logical).unwrap() }, _ => out.cast(logical).unwrap(), } }, diff --git a/crates/polars-ops/src/series/ops/is_in.rs b/crates/polars-ops/src/series/ops/is_in.rs index 6041ef942574..a538bb10651f 100644 --- a/crates/polars-ops/src/series/ops/is_in.rs +++ b/crates/polars-ops/src/series/ops/is_in.rs @@ -509,8 +509,7 @@ fn is_in_decimal( other: &Series, nulls_equal: bool, ) -> PolarsResult { - let Some(DataType::Decimal(other_precision, other_scale)) = other.dtype().inner_dtype() - else { + let Some(DataType::Decimal(other_precision, other_scale)) = other.dtype().inner_dtype() else { polars_bail!(opq = is_in, ca_in.dtype(), other.dtype()); }; let prec = ca_in.precision().max(*other_precision); diff --git a/crates/polars-sql/src/types.rs b/crates/polars-sql/src/types.rs index 1ec23e05af4e..f5b38798dd70 100644 --- a/crates/polars-sql/src/types.rs +++ b/crates/polars-sql/src/types.rs @@ -131,9 +131,7 @@ pub(crate) fn map_sql_dtype_to_polars(dtype: &SQLDataType) -> PolarsResult match *info { - ExactNumberInfo::PrecisionAndScale(p, s) => { - DataType::Decimal(p as usize, s as usize) - }, + ExactNumberInfo::PrecisionAndScale(p, s) => DataType::Decimal(p as usize, s as usize), ExactNumberInfo::Precision(p) => DataType::Decimal(p as usize, 0), ExactNumberInfo::None => DataType::Decimal(38, 9), }, From 723a4f091858d15ffc2c0af75c3728388e9f615e Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 18 Sep 2025 17:50:35 +0200 Subject: [PATCH 04/18] Stray NewDecimal --- crates/polars-python/src/series/import.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-python/src/series/import.rs b/crates/polars-python/src/series/import.rs index 4b82e41dff65..725743636959 100644 --- a/crates/polars-python/src/series/import.rs +++ b/crates/polars-python/src/series/import.rs @@ -213,7 +213,7 @@ impl PySeries { Series::from_chunks_and_dtype_unchecked( PlSmallStr::EMPTY, vec![PrimitiveArray::::from_vec(out).boxed()], - &DataType::NewDecimal(precision, scale), + &DataType::Decimal(precision, scale), ) })) } From 758a91a166166570a0c1dda239d1dcaa77971ff7 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 09:33:36 +0200 Subject: [PATCH 05/18] Clipp/feature flag --- crates/polars-compute/src/cast/mod.rs | 32 +++++++++++++++++--- crates/polars-core/src/datatypes/proptest.rs | 2 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/polars-compute/src/cast/mod.rs b/crates/polars-compute/src/cast/mod.rs index 13140de8cce7..10438e62c2a9 100644 --- a/crates/polars-compute/src/cast/mod.rs +++ b/crates/polars-compute/src/cast/mod.rs @@ -3,6 +3,7 @@ mod binary_to; mod binview_to; mod boolean_to; +#[cfg(feature = "dtype-decimal")] mod decimal_to; mod dictionary_to; mod primitive_to; @@ -15,6 +16,7 @@ pub use binview_to::binview_to_decimal; use binview_to::utf8view_to_primitive_dyn; pub use binview_to::utf8view_to_utf8; pub use boolean_to::*; +#[cfg(feature = "dtype-decimal")] pub use decimal_to::*; pub mod temporal; use arrow::array::*; @@ -575,6 +577,7 @@ pub fn cast( Int128 => primitive_to_boolean_dyn::(array, to_type.clone()), Float32 => primitive_to_boolean_dyn::(array, to_type.clone()), Float64 => primitive_to_boolean_dyn::(array, to_type.clone()), + #[cfg(feature = "dtype-decimal")] Decimal(_, _) => primitive_to_boolean_dyn::(array, to_type.clone()), _ => polars_bail!(InvalidOperation: "casting from {from_type:?} to {to_type:?} not supported", @@ -710,6 +713,7 @@ pub fn cast( (UInt8, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (UInt8, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (UInt8, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (UInt8, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (UInt16, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -725,6 +729,7 @@ pub fn cast( (UInt16, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (UInt16, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (UInt16, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (UInt16, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (UInt32, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -740,6 +745,7 @@ pub fn cast( (UInt32, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (UInt32, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (UInt32, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (UInt32, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (UInt64, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -755,6 +761,7 @@ pub fn cast( (UInt64, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (UInt64, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (UInt64, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (UInt64, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), #[cfg(feature = "dtype-u128")] @@ -779,7 +786,7 @@ pub fn cast( (UInt128, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), #[cfg(feature = "dtype-u128")] (UInt128, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), - #[cfg(feature = "dtype-u128")] + #[cfg(all(feature = "dtype-u128", feature = "dtype-decimal"))] (UInt128, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Int8, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -795,6 +802,7 @@ pub fn cast( (Int8, Int128) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int8, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int8, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (Int8, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Int16, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -810,6 +818,7 @@ pub fn cast( (Int16, Int128) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int16, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int16, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (Int16, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Int32, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -825,6 +834,7 @@ pub fn cast( (Int32, Int128) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int32, Float32) => primitive_to_primitive_dyn::(array, to_type, as_options), (Int32, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (Int32, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Int64, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -840,6 +850,7 @@ pub fn cast( (Int64, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (Int64, Float32) => primitive_to_primitive_dyn::(array, to_type, options), (Int64, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (Int64, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), #[cfg(feature = "dtype-i128")] @@ -864,7 +875,7 @@ pub fn cast( (Int128, Float32) => primitive_to_primitive_dyn::(array, to_type, options), #[cfg(feature = "dtype-i128")] (Int128, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), - #[cfg(feature = "dtype-i128")] + #[cfg(all(feature = "dtype-i128", feature="dtype-decimal"))] (Int128, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Float16, Float32) => { @@ -885,6 +896,7 @@ pub fn cast( #[cfg(feature = "dtype-i128")] (Float32, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (Float32, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), + #[cfg(feature = "dtype-decimal")] (Float32, Decimal(p, s)) => float_to_decimal_dyn::(array, *p, *s), (Float64, UInt8) => primitive_to_primitive_dyn::(array, to_type, options), @@ -900,22 +912,34 @@ pub fn cast( #[cfg(feature = "dtype-i128")] (Float64, Int128) => primitive_to_primitive_dyn::(array, to_type, options), (Float64, Float32) => primitive_to_primitive_dyn::(array, to_type, options), + #[cfg(feature = "dtype-decimal")] (Float64, Decimal(p, s)) => float_to_decimal_dyn::(array, *p, *s), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), UInt8) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), UInt16) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), UInt32) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), UInt64) => decimal_to_integer_dyn::(array), - #[cfg(feature = "dtype-u128")] + #[cfg(all(feature = "dtype-decimal", feature = "dtype-u128"))] (Decimal(_, _), UInt128) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Int8) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Int16) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Int32) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Int64) => decimal_to_integer_dyn::(array), - #[cfg(feature = "dtype-i128")] + #[cfg(all(feature = "dtype-decimal", feature = "dtype-i128"))] (Decimal(_, _), Int128) => decimal_to_integer_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Float32) => decimal_to_float_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Float64) => decimal_to_float_dyn::(array), + #[cfg(feature = "dtype-decimal")] (Decimal(_, _), Decimal(to_p, to_s)) => decimal_to_decimal_dyn(array, *to_p, *to_s), // end numeric casts diff --git a/crates/polars-core/src/datatypes/proptest.rs b/crates/polars-core/src/datatypes/proptest.rs index 96e831514cb3..e484143b5eee 100644 --- a/crates/polars-core/src/datatypes/proptest.rs +++ b/crates/polars-core/src/datatypes/proptest.rs @@ -153,7 +153,7 @@ fn decimal_strategy( decimal_precision_range .clone() .prop_flat_map(move |precision| { - let scale_strategy = (0_usize..=precision); + let scale_strategy = 0_usize..=precision; (Just(precision), scale_strategy) }) .prop_map(|(precision, scale)| DataType::Decimal(precision, scale)) From e605be5be3252e4b22d7a9898643e38ef553034c Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 09:44:51 +0200 Subject: [PATCH 06/18] More feature flags / test fixes --- crates/polars-compute/src/cast/mod.rs | 2 +- .../src/chunked_array/ops/decimal.rs | 17 +++--- crates/polars-core/src/series/any_value.rs | 1 + crates/polars-core/src/series/from.rs | 59 ++++++++++--------- 4 files changed, 43 insertions(+), 36 deletions(-) diff --git a/crates/polars-compute/src/cast/mod.rs b/crates/polars-compute/src/cast/mod.rs index 10438e62c2a9..59f3bde1b027 100644 --- a/crates/polars-compute/src/cast/mod.rs +++ b/crates/polars-compute/src/cast/mod.rs @@ -875,7 +875,7 @@ pub fn cast( (Int128, Float32) => primitive_to_primitive_dyn::(array, to_type, options), #[cfg(feature = "dtype-i128")] (Int128, Float64) => primitive_to_primitive_dyn::(array, to_type, as_options), - #[cfg(all(feature = "dtype-i128", feature="dtype-decimal"))] + #[cfg(all(feature = "dtype-i128", feature = "dtype-decimal"))] (Int128, Decimal(p, s)) => integer_to_decimal_dyn::(array, *p, *s), (Float16, Float32) => { diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs index a6997efead4f..697320ae974e 100644 --- a/crates/polars-core/src/chunked_array/ops/decimal.rs +++ b/crates/polars-core/src/chunked_array/ops/decimal.rs @@ -1,3 +1,5 @@ +use polars_compute::decimal::dec128_verify_prec_scale; + use crate::chunked_array::cast::CastOptions; use crate::prelude::*; @@ -16,7 +18,7 @@ impl StringChunked { let mut valid_count = 0; while let Some(Some(v)) = iter.next() { let mut bytes = v.as_bytes(); - if bytes.first() == Some(&b'-') { + if bytes.first() == Some(&b'-') || bytes.first() == Some(&b'+') { bytes = &bytes[1..]; } if let Some(separator) = bytes.iter().position(|b| *b == b'.') { @@ -36,6 +38,7 @@ impl StringChunked { } pub fn to_decimal(&self, prec: usize, scale: usize) -> PolarsResult { + dec128_verify_prec_scale(prec, scale)?; self.cast_with_options(&DataType::Decimal(prec, scale), CastOptions::NonStrict) } } @@ -47,7 +50,7 @@ mod test { use super::*; let vals = [ "1.0", - "invalid", + "wrong", "225.0", "3.00045", "-4.0", @@ -56,12 +59,12 @@ mod test { ]; let s = StringChunked::from_slice(PlSmallStr::from_str("test"), &vals); let s = s.to_decimal_infer(6).unwrap(); - assert_eq!(s.dtype(), &DataType::Decimal(12, 5)); + assert_eq!(s.dtype(), &DataType::Decimal(6, 5)); assert_eq!(s.len(), 7); - assert_eq!(s.get(0).unwrap(), AnyValue::Decimal(100000, 12, 5)); + assert_eq!(s.get(0).unwrap(), AnyValue::Decimal(100000, 6, 5)); assert_eq!(s.get(1).unwrap(), AnyValue::Null); - assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 12, 5)); - assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 12, 5)); - assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 12, 5)); + assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 6, 5)); + assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 6, 5)); + assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 6, 5)); } } diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index f648d005e15c..5ec1890d6b9d 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -65,6 +65,7 @@ impl Series { } let dtype = if strict { match get_first_non_null_dtype(values) { + #[cfg(feature = "dtype-decimal")] DataType::Decimal(mut prec, mut scale) => { for v in values { if let DataType::Decimal(p, s) = v.dtype() { diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 9a29b6aba24e..92b254cb7893 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -9,6 +9,7 @@ use arrow::offset::OffsetsBuffer; use arrow::temporal_conversions::*; use arrow::types::months_days_ns; use polars_compute::cast::cast_unchecked as cast; +#[cfg(feature = "dtype-decimal")] use polars_compute::decimal::dec128_fits; use polars_error::feature_gated; use polars_utils::check_allow_importing_interval_as_struct; @@ -379,37 +380,39 @@ impl Series { }) }, ArrowDataType::Decimal256(precision, scale) => { - use arrow::types::i256; - - polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; + feature_gated!("dtype-decimal", { + use arrow::types::i256; - let mut chunks = chunks; - for chunk in chunks.iter_mut() { - let arr = std::mem::take( - chunk - .as_any_mut() - .downcast_mut::>() - .unwrap(), - ); - let arr_128: PrimitiveArray = arr.iter().map(|opt_v| { - if let Some(v) = opt_v { - let smaller: Option = (*v).try_into().ok(); - let smaller = smaller.filter(|v| dec128_fits(*v, *precision)); - smaller.ok_or_else(|| { - polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars") - }).map(Some) - } else { - Ok(None) - } - }).try_collect_arr_trusted()?; + polars_compute::decimal::dec128_verify_prec_scale(*precision, *scale)?; - *chunk = arr_128.to(ArrowDataType::Int128).to_boxed(); - } + let mut chunks = chunks; + for chunk in chunks.iter_mut() { + let arr = std::mem::take( + chunk + .as_any_mut() + .downcast_mut::>() + .unwrap(), + ); + let arr_128: PrimitiveArray = arr.iter().map(|opt_v| { + if let Some(v) = opt_v { + let smaller: Option = (*v).try_into().ok(); + let smaller = smaller.filter(|v| dec128_fits(*v, *precision)); + smaller.ok_or_else(|| { + polars_err!(ComputeError: "Decimal256 to Decimal128 conversion overflowed, Decimal256 is not (yet) supported in Polars") + }).map(Some) + } else { + Ok(None) + } + }).try_collect_arr_trusted()?; + + *chunk = arr_128.to(ArrowDataType::Int128).to_boxed(); + } - let s = Int128Chunked::from_chunks(name, chunks) - .into_decimal_unchecked(*precision, *scale) - .into_series(); - Ok(s) + let s = Int128Chunked::from_chunks(name, chunks) + .into_decimal_unchecked(*precision, *scale) + .into_series(); + Ok(s) + }) }, ArrowDataType::Null => Ok(new_null(name, &chunks)), #[cfg(not(feature = "dtype-categorical"))] From 72ee19cca95ed3a7ff1128057160ddf97e99e479 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 10:36:21 +0200 Subject: [PATCH 07/18] Fix string to decimal bug --- crates/polars-compute/src/decimal.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/crates/polars-compute/src/decimal.rs b/crates/polars-compute/src/decimal.rs index 19aca1a72de7..63a9f30cf5e6 100644 --- a/crates/polars-compute/src/decimal.rs +++ b/crates/polars-compute/src/decimal.rs @@ -652,8 +652,7 @@ pub fn dec128_cmp(mut lv: i128, ls: usize, mut rv: i128, rs: usize) -> Ordering /// Only b'.' is allowed as a decimal separator (issue #6698). #[inline] pub fn str_to_dec128(bytes: &[u8], p: usize, s: usize) -> Option { - // TODO - // assert!(dec128_verify_prec_scale(p, s).is_ok()); + assert!(dec128_verify_prec_scale(p, s).is_ok()); let separator = bytes.iter().position(|b| *b == b'.').unwrap_or(bytes.len()); let (mut int, mut frac) = bytes.split_at(separator); @@ -663,7 +662,7 @@ pub fn str_to_dec128(bytes: &[u8], p: usize, s: usize) -> Option { frac = rest; } - if frac.len() <= 1 || s == 0 { + if frac.len() <= 1 { // Only integer fast path. let n: i128 = atoi_simd::parse(int).ok()?; return i128_to_dec128(n, p, s); @@ -682,21 +681,23 @@ pub fn str_to_dec128(bytes: &[u8], p: usize, s: usize) -> Option { }; // Round if digits extend beyond the scale. - let next_digit; + let (next_digit, all_zero_after); let frac_scale = if frac.len() > s { if !frac[s..].iter().all(|b| b.is_ascii_digit()) { return None; } next_digit = frac[s]; + all_zero_after = frac[s + 1..].iter().all(|b| *b == b'0'); frac = &frac[..s]; 0 } else { next_digit = b'0'; + all_zero_after = true; s - frac.len() }; // Parse and combine parts. - let pint: i128 = if int.is_empty() { + let mut pint: i128 = if int.is_empty() { 0 } else { atoi_simd::parse_pos(int).ok()? @@ -709,10 +710,14 @@ pub fn str_to_dec128(bytes: &[u8], p: usize, s: usize) -> Option { }; // Round-to-even. - if next_digit > b'5' { + if next_digit > b'5' || next_digit == b'5' && !all_zero_after { pfrac += 1; } else if next_digit == b'5' { - pfrac += pfrac % 2; + if s == 0 { + pint += pint % 2; + } else { + pfrac += pfrac % 2; + } } let ret = mul_128_pow10(pint, s)? + mul_128_pow10(pfrac, frac_scale)?; From ffb39c38144cdf625f1bdae5a5eacaf024c8b1de Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 10:48:42 +0200 Subject: [PATCH 08/18] Fix dataframe mean returning null for decimal --- crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs index 25870932d07b..59dc6b67c31c 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs @@ -791,7 +791,7 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult |dt| { dt.is_primitive_numeric() || dt.is_temporal() - || dt == &DataType::Boolean + || matches!(dt, DataType::Boolean | DataType::Decimal(_, _)) }, |name| col(name.clone()).mean(), &input_schema, From 1e951121c8df5be3971789dd4151aa6db7b580e8 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 11:43:13 +0200 Subject: [PATCH 09/18] Fix list of decimal --- crates/polars-arrow/src/datatypes/mod.rs | 7 +++++-- crates/polars-expr/src/expressions/eval.rs | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/polars-arrow/src/datatypes/mod.rs b/crates/polars-arrow/src/datatypes/mod.rs index eb50dae249cf..e90cf8707b28 100644 --- a/crates/polars-arrow/src/datatypes/mod.rs +++ b/crates/polars-arrow/src/datatypes/mod.rs @@ -334,14 +334,17 @@ impl ArrowDataType { pub fn underlying_physical_type(&self) -> ArrowDataType { use ArrowDataType::*; match self { - Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => Int32, - Date64 + Decimal32(_, _) | Date32 | Time32(_) | Interval(IntervalUnit::YearMonth) => Int32, + Decimal64(_, _) + | Date64 | Timestamp(_, _) | Time64(_) | Duration(_) | Interval(IntervalUnit::DayTime) => Int64, Interval(IntervalUnit::MonthDayNano) => unimplemented!(), Binary => Binary, + Decimal(_, _) => Int128, + Decimal256(_, _) => unimplemented!(), List(field) => List(Box::new(Field { dtype: field.dtype.underlying_physical_type(), ..*field.clone() diff --git a/crates/polars-expr/src/expressions/eval.rs b/crates/polars-expr/src/expressions/eval.rs index fc27fbd028b9..56858f311450 100644 --- a/crates/polars-expr/src/expressions/eval.rs +++ b/crates/polars-expr/src/expressions/eval.rs @@ -155,9 +155,10 @@ impl EvalExpr { .collect::>>>()? }; + let out_inner_dt = self.non_aggregated_output_dtype.inner_dtype().unwrap(); Ok(unsafe { ListChunked::from_chunks(self.output_field_with_ctx.name.clone(), chunks) - .cast_unchecked(&self.non_aggregated_output_dtype) + .from_physical_unchecked(out_inner_dt.clone()) .unwrap() } .into_column()) From ac60fc53327fe4967f9335a915efdf8d1ee6b5b4 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 13:06:05 +0200 Subject: [PATCH 10/18] Disallow decimal.Decimal in dataclasses for now --- .../unit/constructors/test_constructors.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 3c30ff7e557a..624ac457da44 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -199,25 +199,25 @@ def test_init_structured_objects() -> None: class TradeDC: timestamp: datetime ticker: str - price: Decimal + price: float size: int | None = None class TradePD(pydantic.BaseModel): timestamp: datetime ticker: str - price: Decimal + price: float size: int class TradeNT(NamedTuple): timestamp: datetime ticker: str - price: Decimal + price: float size: int | None = None raw_data = [ - (datetime(2022, 9, 8, 14, 30, 45), "AAPL", Decimal("157.5"), 125), - (datetime(2022, 9, 9, 10, 15, 12), "FLSY", Decimal("10.0"), 1500), - (datetime(2022, 9, 7, 15, 30), "MU", Decimal("55.5"), 400), + (datetime(2022, 9, 8, 14, 30, 45), "AAPL", 157.5, 125), + (datetime(2022, 9, 9, 10, 15, 12), "FLSY", 10.0, 1500), + (datetime(2022, 9, 7, 15, 30), "MU", 55.5, 400), ] columns = ["timestamp", "ticker", "price", "size"] @@ -229,7 +229,7 @@ class TradeNT(NamedTuple): assert df.schema == { "timestamp": pl.Datetime("us"), "ticker": pl.String, - "price": pl.Decimal(scale=1), + "price": pl.Float64, "size": pl.Int64, } assert df.rows() == raw_data @@ -242,7 +242,7 @@ class TradeNT(NamedTuple): assert df.schema == { "timestamp": pl.Datetime("ms"), "ticker": pl.String, - "price": pl.Decimal(scale=1), + "price": pl.Float64, "size": pl.Int32, } @@ -252,14 +252,14 @@ class TradeNT(NamedTuple): schema=[ ("ts", pl.Datetime("ms")), ("tk", pl.Categorical), - ("pc", pl.Decimal(scale=1)), + ("pc", pl.Float64), ("sz", pl.UInt16), ], ) assert df.schema == { "ts": pl.Datetime("ms"), "tk": pl.Categorical(ordering="lexical"), - "pc": pl.Decimal(scale=1), + "pc": pl.Float64, "sz": pl.UInt16, } assert df.rows() == raw_data From 725ae05a068958aaaf5cf27206ed59a9e2eb829d Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 13:06:59 +0200 Subject: [PATCH 11/18] Fix parquet test --- py-polars/tests/unit/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index ebf779da8bb2..eb634b0b26a5 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -603,7 +603,7 @@ def test_decimal_parquet(tmp_path: Path) -> None: } ) - df = df.with_columns(pl.col("bar").cast(pl.Decimal)) + df = df.with_columns(pl.col("bar").cast(pl.Decimal(scale=3))) df.write_parquet(path, statistics=True) out = pl.scan_parquet(path).filter(foo=2).collect().to_dict(as_series=False) From 5abc0de3c69038aaa6a7e1e1a29f6127753b9314 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 13:44:17 +0200 Subject: [PATCH 12/18] Fix var/std/quantile for decimal --- .../frame/group_by/aggregations/dispatch.rs | 2 ++ .../src/series/implementations/decimal.rs | 29 +++++++++++++++---- .../src/plans/conversion/dsl_to_ir/mod.rs | 9 +++--- .../tests/unit/datatypes/test_decimal.py | 29 +++++++++++++++++++ 4 files changed, 60 insertions(+), 9 deletions(-) diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index fedff4357f18..4d9565999ab4 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -249,6 +249,8 @@ impl Series { match s.dtype() { Float32 => s.f32().unwrap().agg_quantile(groups, quantile, method), Float64 => s.f64().unwrap().agg_quantile(groups, quantile, method), + #[cfg(feature = "dtype-decimal")] + Decimal(_, _) => s.cast(&DataType::Float64).unwrap().agg_quantile(groups, quantile, method), dt if dt.is_primitive_numeric() || dt.is_temporal() => { let ca = s.to_physical_repr(); let physical_type = ca.dtype(); diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index ab7f70d0bc87..79ab1249e5d8 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -169,6 +169,16 @@ impl private::PrivateSeries for SeriesWrap { self.agg_helper(|ca| ca.agg_list(groups)) } + #[cfg(feature = "algorithm_group_by")] + unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Series { + self.0.cast(&DataType::Float64).unwrap().agg_var(groups, ddof) + } + + #[cfg(feature = "algorithm_group_by")] + unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Series { + self.0.cast(&DataType::Float64).unwrap().agg_std(groups, ddof) + } + fn subtract(&self, rhs: &Series) -> PolarsResult { let rhs = rhs.decimal()?; ((&self.0) - rhs).map(|ca| ca.into_series()) @@ -400,6 +410,7 @@ impl SeriesTrait for SeriesWrap { Scalar::new(self.dtype().clone(), av) })) } + fn min_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let min = ca.min(); @@ -414,6 +425,7 @@ impl SeriesTrait for SeriesWrap { Scalar::new(self.dtype().clone(), av) })) } + fn max_reduce(&self) -> PolarsResult { Ok(self.apply_physical(|ca| { let max = ca.max(); @@ -446,18 +458,25 @@ impl SeriesTrait for SeriesWrap { .median() .map(|v| v / self.scale_factor() as f64) } + fn median_reduce(&self) -> PolarsResult { Ok(self.apply_scale(self.0.physical().median_reduce())) } fn std(&self, ddof: u8) -> Option { - self.0 - .physical() - .std(ddof) - .map(|v| v / self.scale_factor() as f64) + self.0.cast(&DataType::Float64).ok()?.std(ddof) } + fn std_reduce(&self, ddof: u8) -> PolarsResult { - Ok(self.apply_scale(self.0.physical().std_reduce(ddof))) + self.0.cast(&DataType::Float64)?.std_reduce(ddof) + } + + fn var(&self, ddof: u8) -> Option { + self.0.cast(&DataType::Float64).ok()?.var(ddof) + } + + fn var_reduce(&self, ddof: u8) -> PolarsResult { + self.0.cast(&DataType::Float64)?.var_reduce(ddof) } fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult { diff --git a/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs b/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs index 59dc6b67c31c..649cb7a3fd21 100644 --- a/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs +++ b/crates/polars-plan/src/plans/conversion/dsl_to_ir/mod.rs @@ -773,17 +773,17 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult DslFunction::Stats(sf) => { let exprs = match sf { StatsFunction::Var { ddof } => stats_helper( - |dt| dt.is_primitive_numeric() || dt.is_bool(), + |dt| dt.is_primitive_numeric() || dt.is_bool() || dt.is_decimal(), |name| col(name.clone()).var(ddof), &input_schema, ), StatsFunction::Std { ddof } => stats_helper( - |dt| dt.is_primitive_numeric() || dt.is_bool(), + |dt| dt.is_primitive_numeric() || dt.is_bool() || dt.is_decimal(), |name| col(name.clone()).std(ddof), &input_schema, ), StatsFunction::Quantile { quantile, method } => stats_helper( - |dt| dt.is_primitive_numeric(), + |dt| dt.is_primitive_numeric() || dt.is_decimal(), |name| col(name.clone()).quantile(quantile.clone(), method), &input_schema, ), @@ -791,7 +791,8 @@ pub fn to_alp_impl(lp: DslPlan, ctxt: &mut DslConversionContext) -> PolarsResult |dt| { dt.is_primitive_numeric() || dt.is_temporal() - || matches!(dt, DataType::Boolean | DataType::Decimal(_, _)) + || dt.is_bool() + || dt.is_decimal() }, |name| col(name.clone()).mean(), &input_schema, diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index db3c4443f843..85ccaaf672db 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -753,3 +753,32 @@ def test_decimal_cast_limit() -> None: too_large1.cast(pl.Decimal(38, 0)) with pytest.raises(InvalidOperationError): too_large2.cast(pl.Decimal(38, 0)) + + +def test_decimal_agg() -> None: + df = pl.DataFrame( + { + "g": [1, 1, 2, 2], + "x": [1, 10, 100, 1000], + } + ) + ddf = df.with_columns(x = pl.col.x.cast(pl.Decimal(scale=3))) + + agg_exprs = { + "min": pl.col.x.min(), + "max": pl.col.x.max(), + "mean": pl.col.x.mean(), + "quantile": pl.col.x.quantile(0.4), + "median": pl.col.x.median(), + "sum": pl.col.x.sum(), + "var": pl.col.x.var(), + "std": pl.col.x.std(), + } + + assert_frame_equal( + df.select(**agg_exprs).cast(pl.Float64), ddf.select(**agg_exprs).cast(pl.Float64) + ) + assert_frame_equal( + df.group_by("g").agg(**agg_exprs).cast(pl.Float64), ddf.group_by("g").agg(**agg_exprs).cast(pl.Float64) + ) + From 85a1b1d549977b262a084edee02044061b9898ef Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 13:45:34 +0200 Subject: [PATCH 13/18] Fix rust test --- crates/polars-compute/src/decimal.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/polars-compute/src/decimal.rs b/crates/polars-compute/src/decimal.rs index 63a9f30cf5e6..580426b949a9 100644 --- a/crates/polars-compute/src/decimal.rs +++ b/crates/polars-compute/src/decimal.rs @@ -904,7 +904,7 @@ mod test { let mut buf = DecimalFmtBuffer::new(); for &p in &INTERESTING_SCALE_PREC { for &s in &INTERESTING_SCALE_PREC { - if s > p { + if s > p || p == 0 { continue; } for x in INTERESTING_VALUES.iter() { @@ -924,7 +924,7 @@ mod test { fn test_mul() { for &p in &INTERESTING_SCALE_PREC { for &s in &INTERESTING_SCALE_PREC { - if s > p { + if s > p || p == 0 { continue; } let values: Vec<_> = INTERESTING_VALUES @@ -949,7 +949,7 @@ mod test { fn test_div() { for &p in &INTERESTING_SCALE_PREC { for &s in &INTERESTING_SCALE_PREC { - if s > p { + if s > p || p == 0 { continue; } let values: Vec<_> = INTERESTING_VALUES From 8a30471106ac40287bc9998b413d8c7c517928e5 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 13:49:37 +0200 Subject: [PATCH 14/18] Fix doctests --- .../frame/group_by/aggregations/dispatch.rs | 5 ++- .../src/series/implementations/decimal.rs | 10 ++++-- py-polars/polars/config.py | 32 +++++++++---------- py-polars/polars/expr/expr.py | 30 ++++++++--------- py-polars/polars/expr/string.py | 2 +- py-polars/polars/selectors.py | 16 +++++----- .../tests/unit/datatypes/test_decimal.py | 9 +++--- 7 files changed, 57 insertions(+), 47 deletions(-) diff --git a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs index 4d9565999ab4..6fc075b0b146 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/dispatch.rs @@ -250,7 +250,10 @@ impl Series { Float32 => s.f32().unwrap().agg_quantile(groups, quantile, method), Float64 => s.f64().unwrap().agg_quantile(groups, quantile, method), #[cfg(feature = "dtype-decimal")] - Decimal(_, _) => s.cast(&DataType::Float64).unwrap().agg_quantile(groups, quantile, method), + Decimal(_, _) => s + .cast(&DataType::Float64) + .unwrap() + .agg_quantile(groups, quantile, method), dt if dt.is_primitive_numeric() || dt.is_temporal() => { let ca = s.to_physical_repr(); let physical_type = ca.dtype(); diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 79ab1249e5d8..871bbef1a06f 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -171,12 +171,18 @@ impl private::PrivateSeries for SeriesWrap { #[cfg(feature = "algorithm_group_by")] unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Series { - self.0.cast(&DataType::Float64).unwrap().agg_var(groups, ddof) + self.0 + .cast(&DataType::Float64) + .unwrap() + .agg_var(groups, ddof) } #[cfg(feature = "algorithm_group_by")] unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Series { - self.0.cast(&DataType::Float64).unwrap().agg_std(groups, ddof) + self.0 + .cast(&DataType::Float64) + .unwrap() + .agg_std(groups, ddof) } fn subtract(&self, rhs: &Series) -> PolarsResult { diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index df7490953d45..9f7d28b7ab4e 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -1393,25 +1393,25 @@ def set_trim_decimal_zeros(cls, active: bool | None = True) -> type[Config]: >>> with pl.Config(trim_decimal_zeros=False): ... print(df) shape: (2, 1) - ┌──────────────┐ - │ d │ - │ --- │ - │ decimal[*,5] │ - ╞══════════════╡ - │ 1.01000 │ - │ -5.67890 │ - └──────────────┘ + ┌───────────────┐ + │ d │ + │ --- │ + │ decimal[38,5] │ + ╞═══════════════╡ + │ 1.01000 │ + │ -5.67890 │ + └───────────────┘ >>> with pl.Config(trim_decimal_zeros=True): ... print(df) shape: (2, 1) - ┌──────────────┐ - │ d │ - │ --- │ - │ decimal[*,5] │ - ╞══════════════╡ - │ 1.01 │ - │ -5.6789 │ - └──────────────┘ + ┌───────────────┐ + │ d │ + │ --- │ + │ decimal[38,5] │ + ╞═══════════════╡ + │ 1.01 │ + │ -5.6789 │ + └───────────────┘ """ plr.set_trim_decimal_zeros(active) return cls diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 8040cf604ce4..75511a2cbba1 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -1684,21 +1684,21 @@ def round(self, decimals: int = 0, mode: RoundMode = "half_to_even") -> Expr: ... pl.all().round(mode="half_to_even").name.suffix("_to_even"), ... ) shape: (8, 6) - ┌──────┬──────────────┬──────────┬──────────────┬─────────────┬──────────────┐ - │ f64 ┆ d ┆ f64_away ┆ d_away ┆ f64_to_even ┆ d_to_even │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ f64 ┆ decimal[*,1] ┆ f64 ┆ decimal[*,1] ┆ f64 ┆ decimal[*,1] │ - ╞══════╪══════════════╪══════════╪══════════════╪═════════════╪══════════════╡ - │ -3.5 ┆ -3.5 ┆ -4.0 ┆ -4.0 ┆ -4.0 ┆ -4.0 │ - │ -2.5 ┆ -2.5 ┆ -3.0 ┆ -3.0 ┆ -2.0 ┆ -2.0 │ - │ -1.5 ┆ -1.5 ┆ -2.0 ┆ -2.0 ┆ -2.0 ┆ -2.0 │ - │ -0.5 ┆ -0.5 ┆ -1.0 ┆ -1.0 ┆ -0.0 ┆ 0.0 │ - │ 0.5 ┆ 0.5 ┆ 1.0 ┆ 1.0 ┆ 0.0 ┆ 0.0 │ - │ 1.5 ┆ 1.5 ┆ 2.0 ┆ 2.0 ┆ 2.0 ┆ 2.0 │ - │ 2.5 ┆ 2.5 ┆ 3.0 ┆ 3.0 ┆ 2.0 ┆ 2.0 │ - │ 3.5 ┆ 3.5 ┆ 4.0 ┆ 4.0 ┆ 4.0 ┆ 4.0 │ - └──────┴──────────────┴──────────┴──────────────┴─────────────┴──────────────┘ - """ + ┌──────┬───────────────┬──────────┬───────────────┬─────────────┬───────────────┐ + │ f64 ┆ d ┆ f64_away ┆ d_away ┆ f64_to_even ┆ d_to_even │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ f64 ┆ decimal[38,1] ┆ f64 ┆ decimal[38,1] ┆ f64 ┆ decimal[38,1] │ + ╞══════╪═══════════════╪══════════╪═══════════════╪═════════════╪═══════════════╡ + │ -3.5 ┆ -3.5 ┆ -4.0 ┆ -4.0 ┆ -4.0 ┆ -4.0 │ + │ -2.5 ┆ -2.5 ┆ -3.0 ┆ -3.0 ┆ -2.0 ┆ -2.0 │ + │ -1.5 ┆ -1.5 ┆ -2.0 ┆ -2.0 ┆ -2.0 ┆ -2.0 │ + │ -0.5 ┆ -0.5 ┆ -1.0 ┆ -1.0 ┆ -0.0 ┆ 0.0 │ + │ 0.5 ┆ 0.5 ┆ 1.0 ┆ 1.0 ┆ 0.0 ┆ 0.0 │ + │ 1.5 ┆ 1.5 ┆ 2.0 ┆ 2.0 ┆ 2.0 ┆ 2.0 │ + │ 2.5 ┆ 2.5 ┆ 3.0 ┆ 3.0 ┆ 2.0 ┆ 2.0 │ + │ 3.5 ┆ 3.5 ┆ 4.0 ┆ 4.0 ┆ 4.0 ┆ 4.0 │ + └──────┴───────────────┴──────────┴───────────────┴─────────────┴───────────────┘ + """ # noqa: W505 return wrap_expr(self._pyexpr.round(decimals, mode)) def round_sig_figs(self, digits: int) -> Expr: diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 9535f0818515..452c62e24109 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -364,7 +364,7 @@ def to_decimal(self, *, scale: int) -> Expr: ┌───────────┬─────────────────┐ │ numbers ┆ numbers_decimal │ │ --- ┆ --- │ - │ str ┆ decimal[*,2] │ + │ str ┆ decimal[38,2] │ ╞═══════════╪═════════════════╡ │ 40.12 ┆ 40.12 │ │ 3420.13 ┆ 3420.13 │ diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py index 8b6051df4698..3212b5f70ba8 100644 --- a/py-polars/polars/selectors.py +++ b/py-polars/polars/selectors.py @@ -1986,14 +1986,14 @@ def decimal() -> Selector: >>> df.select(cs.decimal()) shape: (2, 2) - ┌──────────────┬───────────────┐ - │ bar ┆ baz │ - │ --- ┆ --- │ - │ decimal[*,0] ┆ decimal[10,5] │ - ╞══════════════╪═══════════════╡ - │ 123 ┆ 2.00050 │ - │ 456 ┆ -50.55550 │ - └──────────────┴───────────────┘ + ┌───────────────┬───────────────┐ + │ bar ┆ baz │ + │ --- ┆ --- │ + │ decimal[38,0] ┆ decimal[10,5] │ + ╞═══════════════╪═══════════════╡ + │ 123 ┆ 2.00050 │ + │ 456 ┆ -50.55550 │ + └───────────────┴───────────────┘ Select all columns *except* the decimal ones: diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index 85ccaaf672db..1411db8f2956 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -762,7 +762,7 @@ def test_decimal_agg() -> None: "x": [1, 10, 100, 1000], } ) - ddf = df.with_columns(x = pl.col.x.cast(pl.Decimal(scale=3))) + ddf = df.with_columns(x=pl.col.x.cast(pl.Decimal(scale=3))) agg_exprs = { "min": pl.col.x.min(), @@ -776,9 +776,10 @@ def test_decimal_agg() -> None: } assert_frame_equal( - df.select(**agg_exprs).cast(pl.Float64), ddf.select(**agg_exprs).cast(pl.Float64) + df.select(**agg_exprs).cast(pl.Float64), + ddf.select(**agg_exprs).cast(pl.Float64), ) assert_frame_equal( - df.group_by("g").agg(**agg_exprs).cast(pl.Float64), ddf.group_by("g").agg(**agg_exprs).cast(pl.Float64) + df.group_by("g").agg(**agg_exprs).cast(pl.Float64), + ddf.group_by("g").agg(**agg_exprs).cast(pl.Float64), ) - From a8c9a73ada81d12b6cd6276db4c4738c9866eb4c Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 14:16:03 +0200 Subject: [PATCH 15/18] Fix float -> decimal cast to round to nearest even --- .../polars-compute/src/cast/primitive_to.rs | 69 +++++-------------- crates/polars-compute/src/decimal.rs | 6 +- .../tests/unit/datatypes/test_decimal.py | 3 +- py-polars/tests/unit/sql/test_numeric.py | 4 +- 4 files changed, 26 insertions(+), 56 deletions(-) diff --git a/crates/polars-compute/src/cast/primitive_to.rs b/crates/polars-compute/src/cast/primitive_to.rs index 41c2041531a3..d4a3a4014987 100644 --- a/crates/polars-compute/src/cast/primitive_to.rs +++ b/crates/polars-compute/src/cast/primitive_to.rs @@ -6,11 +6,16 @@ use arrow::compute::arity::unary; use arrow::datatypes::{ArrowDataType, TimeUnit}; use arrow::offset::{Offset, Offsets}; use arrow::types::{NativeType, f16}; -use num_traits::{AsPrimitive, Float, ToPrimitive}; +use num_traits::AsPrimitive; +#[cfg(feature = "dtype-decimal")] +use num_traits::Float; use polars_error::PolarsResult; use polars_utils::pl_str::PlSmallStr; use polars_utils::vec::PushUnchecked; +#[cfg(feature = "dtype-decimal")] +use crate::decimal::{dec128_verify_prec_scale, i128_to_dec128, f64_to_dec128}; + use super::CastOptionsImpl; use super::temporal::*; @@ -225,34 +230,19 @@ where } /// Returns a [`PrimitiveArray`] with the cast values. Values are `None` on overflow +#[cfg(feature = "dtype-decimal")] pub fn integer_to_decimal>( from: &PrimitiveArray, to_precision: usize, to_scale: usize, ) -> PrimitiveArray { - assert!(to_precision <= 38); - assert!(to_scale <= 38); - - let multiplier = 10_i128.pow(to_scale as u32); - let max_for_precision = 10_i128.pow(to_precision as u32) - 1; - let min_for_precision = -max_for_precision; - - let values = from.iter().map(|x| { - x.and_then(|x| { - x.as_().checked_mul(multiplier).and_then(|x| { - if x > max_for_precision || x < min_for_precision { - None - } else { - Some(x) - } - }) - }) - }); - + assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok()); + let values = from.iter().map(|x| i128_to_dec128(x?.as_(), to_precision, to_scale)); PrimitiveArray::::from_trusted_len_iter(values) .to(ArrowDataType::Decimal(to_precision, to_scale)) } +#[cfg(feature = "dtype-decimal")] pub(super) fn integer_to_decimal_dyn( from: &dyn Array, precision: usize, @@ -266,47 +256,24 @@ where } /// Returns a [`PrimitiveArray`] with the cast values. Values are `None` on overflow -pub fn float_to_decimal( +#[cfg(feature = "dtype-decimal")] +pub fn float_to_decimal>( from: &PrimitiveArray, to_precision: usize, to_scale: usize, -) -> PrimitiveArray -where - T: NativeType + Float + ToPrimitive, - f64: AsPrimitive, -{ - assert!(to_precision <= 38); - assert!(to_scale <= 38); - - // 1.2 => 12 - let multiplier: T = (10_f64).powi(to_scale as i32).as_(); - let max_for_precision = 10_i128.pow(to_precision as u32) - 1; - let min_for_precision = -max_for_precision; - - let values = from.iter().map(|x| { - x.and_then(|x| { - let x = (*x * multiplier).to_i128()?; - if x > max_for_precision || x < min_for_precision { - None - } else { - Some(x) - } - }) - }); - +) -> PrimitiveArray { + assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok()); + let values = from.iter().map(|x| f64_to_dec128(x?.as_(), to_precision, to_scale)); PrimitiveArray::::from_trusted_len_iter(values) .to(ArrowDataType::Decimal(to_precision, to_scale)) } -pub(super) fn float_to_decimal_dyn( +#[cfg(feature = "dtype-decimal")] +pub(super) fn float_to_decimal_dyn>( from: &dyn Array, precision: usize, scale: usize, -) -> PolarsResult> -where - T: NativeType + Float + ToPrimitive, - f64: AsPrimitive, -{ +) -> PolarsResult> { let from = from.as_any().downcast_ref().unwrap(); Ok(Box::new(float_to_decimal::(from, precision, scale))) } diff --git a/crates/polars-compute/src/decimal.rs b/crates/polars-compute/src/decimal.rs index 580426b949a9..7066c2d70601 100644 --- a/crates/polars-compute/src/decimal.rs +++ b/crates/polars-compute/src/decimal.rs @@ -485,8 +485,10 @@ pub fn dec128_to_f64(x: i128, s: usize) -> f64 { #[inline] pub fn f64_to_dec128(x: f64, p: usize, s: usize) -> Option { // TODO: correctly rounded result. This rounds multiple times. - let n = (x * POW10_F64[s]) as i128; - dec128_fits(n, p).then_some(n) + if !(x.abs() < POW10_F64[p]) { + return None; + } + unsafe { Some((x * POW10_F64[s]).round_ties_even().to_int_unchecked()) } } /// Converts between two Decimal128s, with a new precision and scale, returning diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index 1411db8f2956..15fc976afbe2 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -701,7 +701,7 @@ def test_decimal_from_large_ints_9084() -> None: def test_cast_float_to_decimal_12775() -> None: s = pl.Series([1.5]) - assert s.cast(pl.Decimal(scale=0)).to_list() == [D("1")] + assert s.cast(pl.Decimal(scale=0)).to_list() == [D("2")] assert s.cast(pl.Decimal(scale=1)).to_list() == [D("1.5")] @@ -782,4 +782,5 @@ def test_decimal_agg() -> None: assert_frame_equal( df.group_by("g").agg(**agg_exprs).cast(pl.Float64), ddf.group_by("g").agg(**agg_exprs).cast(pl.Float64), + check_row_order=False ) diff --git a/py-polars/tests/unit/sql/test_numeric.py b/py-polars/tests/unit/sql/test_numeric.py index fed1d97a948e..5b5cfec2afef 100644 --- a/py-polars/tests/unit/sql/test_numeric.py +++ b/py-polars/tests/unit/sql/test_numeric.py @@ -73,8 +73,8 @@ def test_modulo() -> None: (64.5, "numeric", "(3,1)", D("64.5"), pl.Decimal(3, 1)), (512.5, "decimal", "(4,1)", D("512.5"), pl.Decimal(4, 1)), (512.5, "numeric", "(4,0)", D("512"), pl.Decimal(4, 0)), - (-1024.75, "decimal", "(10,0)", D("-1024"), pl.Decimal(10, 0)), - (-1024.75, "numeric", "(10)", D("-1024"), pl.Decimal(10, 0)), + (-1024.75, "decimal", "(10,0)", D("-1025"), pl.Decimal(10, 0)), + (-1024.75, "numeric", "(10)", D("-1025"), pl.Decimal(10, 0)), (-1024.75, "dec", "", D("-1024.75"), pl.Decimal(38, 9)), ], ) From 5635b62d6eba38ef6fe7c43855fe7c7ff67798e4 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 14:23:32 +0200 Subject: [PATCH 16/18] Fix decimal arithmetic error messages --- .../polars-compute/src/cast/primitive_to.rs | 13 +++--- .../src/chunked_array/arithmetic/decimal.rs | 43 ++++++++++--------- .../tests/unit/datatypes/test_decimal.py | 2 +- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/crates/polars-compute/src/cast/primitive_to.rs b/crates/polars-compute/src/cast/primitive_to.rs index d4a3a4014987..cccd760d7509 100644 --- a/crates/polars-compute/src/cast/primitive_to.rs +++ b/crates/polars-compute/src/cast/primitive_to.rs @@ -13,11 +13,10 @@ use polars_error::PolarsResult; use polars_utils::pl_str::PlSmallStr; use polars_utils::vec::PushUnchecked; -#[cfg(feature = "dtype-decimal")] -use crate::decimal::{dec128_verify_prec_scale, i128_to_dec128, f64_to_dec128}; - use super::CastOptionsImpl; use super::temporal::*; +#[cfg(feature = "dtype-decimal")] +use crate::decimal::{dec128_verify_prec_scale, f64_to_dec128, i128_to_dec128}; pub trait SerPrimitive { fn write(f: &mut Vec, val: Self) -> usize @@ -237,7 +236,9 @@ pub fn integer_to_decimal>( to_scale: usize, ) -> PrimitiveArray { assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok()); - let values = from.iter().map(|x| i128_to_dec128(x?.as_(), to_precision, to_scale)); + let values = from + .iter() + .map(|x| i128_to_dec128(x?.as_(), to_precision, to_scale)); PrimitiveArray::::from_trusted_len_iter(values) .to(ArrowDataType::Decimal(to_precision, to_scale)) } @@ -263,7 +264,9 @@ pub fn float_to_decimal>( to_scale: usize, ) -> PrimitiveArray { assert!(dec128_verify_prec_scale(to_precision, to_scale).is_ok()); - let values = from.iter().map(|x| f64_to_dec128(x?.as_(), to_precision, to_scale)); + let values = from + .iter() + .map(|x| f64_to_dec128(x?.as_(), to_precision, to_scale)); PrimitiveArray::::from_trusted_len_iter(values) .to(ArrowDataType::Decimal(to_precision, to_scale)) } diff --git a/crates/polars-core/src/chunked_array/arithmetic/decimal.rs b/crates/polars-core/src/chunked_array/arithmetic/decimal.rs index debb847c02a0..9052fd62f91c 100644 --- a/crates/polars-core/src/chunked_array/arithmetic/decimal.rs +++ b/crates/polars-core/src/chunked_array/arithmetic/decimal.rs @@ -21,11 +21,11 @@ impl Add for &DecimalChunked { return PolarsResult::Ok(None); }; let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; let ret = dec128_add(ls, rs, prec).ok_or_else( || polars_err!(ComputeError: "overflow in decimal addition for {ls} + {rs}"), )?; @@ -52,13 +52,13 @@ impl Sub for &DecimalChunked { return PolarsResult::Ok(None); }; let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; let ret = dec128_sub(ls, rs, prec).ok_or_else( - || polars_err!(ComputeError: "overflow in decimal subtraction for {ls} + {rs}"), + || polars_err!(ComputeError: "overflow in decimal subtraction for {ls} - {rs}"), )?; Ok(Some(ret)) }, @@ -83,14 +83,14 @@ impl Mul for &DecimalChunked { return PolarsResult::Ok(None); }; let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; let ret = dec128_mul(ls, rs, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in decimal multiplication for {ls} * {rs}") - })?; + polars_err!(ComputeError: "overflow in decimal multiplication for {ls} * {rs}") + })?; Ok(Some(ret)) }, ); @@ -113,14 +113,17 @@ impl Div for &DecimalChunked { let (Some(l), Some(r)) = (opt_l, opt_r) else { return PolarsResult::Ok(None); }; + if r == 0 { + polars_bail!(ComputeError: "division by zero Decimal"); + } let ls = dec128_rescale(l, left_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {l} from scale {left_s} to {scale}") + })?; let rs = dec128_rescale(r, right_s, prec, scale).ok_or_else(|| { - polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") - })?; + polars_err!(ComputeError: "overflow in Decimal cast for {r} from scale {right_s} to {scale}") + })?; let ret = dec128_div(ls, rs, prec, scale).ok_or_else( - || polars_err!(ComputeError: "overflow in decimal division for {ls} * {rs}"), + || polars_err!(ComputeError: "overflow in decimal division for {ls} / {rs}"), )?; Ok(Some(ret)) }, diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index 15fc976afbe2..1c38a54b2d95 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -782,5 +782,5 @@ def test_decimal_agg() -> None: assert_frame_equal( df.group_by("g").agg(**agg_exprs).cast(pl.Float64), ddf.group_by("g").agg(**agg_exprs).cast(pl.Float64), - check_row_order=False + check_row_order=False, ) From 5010f49df1114f97bf12e72bef32c2c895752b32 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 14:55:49 +0200 Subject: [PATCH 17/18] Fix some more tests/clippy warnings --- crates/polars-compute/src/decimal.rs | 2 ++ crates/polars-core/src/chunked_array/ops/decimal.rs | 2 +- py-polars/polars/series/string.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/polars-compute/src/decimal.rs b/crates/polars-compute/src/decimal.rs index 7066c2d70601..da7fff199e30 100644 --- a/crates/polars-compute/src/decimal.rs +++ b/crates/polars-compute/src/decimal.rs @@ -485,7 +485,9 @@ pub fn dec128_to_f64(x: i128, s: usize) -> f64 { #[inline] pub fn f64_to_dec128(x: f64, p: usize, s: usize) -> Option { // TODO: correctly rounded result. This rounds multiple times. + #[allow(clippy::neg_cmp_op_on_partial_ord)] if !(x.abs() < POW10_F64[p]) { + // Comparison will fail for NaN, making us return None. return None; } unsafe { Some((x * POW10_F64[s]).round_ties_even().to_int_unchecked()) } diff --git a/crates/polars-core/src/chunked_array/ops/decimal.rs b/crates/polars-core/src/chunked_array/ops/decimal.rs index 697320ae974e..fe1b5564a7c1 100644 --- a/crates/polars-core/src/chunked_array/ops/decimal.rs +++ b/crates/polars-core/src/chunked_array/ops/decimal.rs @@ -65,6 +65,6 @@ mod test { assert_eq!(s.get(1).unwrap(), AnyValue::Null); assert_eq!(s.get(3).unwrap(), AnyValue::Decimal(300045, 6, 5)); assert_eq!(s.get(4).unwrap(), AnyValue::Decimal(-400000, 6, 5)); - assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525251, 6, 5)); + assert_eq!(s.get(6).unwrap(), AnyValue::Decimal(525252, 6, 5)); } } diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index d55ff8f6f0d2..e23da34df99d 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -377,7 +377,7 @@ def to_decimal( ... ) >>> s.str.to_decimal() shape: (7,) - Series: '' [decimal[*,2]] + Series: '' [decimal[8,2]] [ 40.12 3420.13 From 89356e5cb7076fd7c9f8998f908d83fbf5567707 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Fri, 19 Sep 2025 15:04:12 +0200 Subject: [PATCH 18/18] Update DSL schema --- crates/polars-plan/dsl-schema-hashes.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/polars-plan/dsl-schema-hashes.json b/crates/polars-plan/dsl-schema-hashes.json index 9bef6f81a6fb..f9664bba9fb8 100644 --- a/crates/polars-plan/dsl-schema-hashes.json +++ b/crates/polars-plan/dsl-schema-hashes.json @@ -1,7 +1,7 @@ { "AggExpr": "5398ac46a31d511fa6c645556c45b3ebeba6544df2629cabac079230822b1130", "AnonymousColumnsUdf": "04e8b658fac4f09f7f9607c73be6fd3fe258064dd33468710f2c3e188c281a69", - "AnyValue": "42e6852521da0b317d9239dcbb778de2b393e15fd0c4bf224c274c2a3a4134d5", + "AnyValue": "ef2b7f7588918138f192b3545a8474915a90d211b7c786e642427b5cd565d4ef", "ArrayDataTypeFunction": "f6606e9a91efce34563b32adb32473cd19d8c1e9b184b102be72268d14306136", "ArrayFunction": "4ad69231f749063041ee719306227a20579f1a645994d2d284137eb9c0f0e857", "AsOfOptions": "f20cf1b14073828bd45951ee857b0cf65d0325aca4bdc1c00b9a2863b3b130c4", @@ -31,7 +31,7 @@ "CsvReadOptions": "041a17f31ec3bc2a8aab49a7f16519a07666379e1571ac6e3562ed4b07c28906", "CsvWriterOptions": "189261470cc62d3af1cca63a241e96465a60bec617bc6420c27e36fe90916eba", "DataFrame": "04e8b658fac4f09f7f9607c73be6fd3fe258064dd33468710f2c3e188c281a69", - "DataType": "60c5f9067047a466fd5be795530c52b4f5b70310de5bf6258da7ebed01893460", + "DataType": "ef1194f5e6ec62e475ab499cff6b0e1357c40ebf67a4f7382b12c31e293f9457", "DataTypeExpr": "3304a33a01090cd946ec1444fd8a7527a2576c80b2ea643363b1f4961f480f4d", "DataTypeFunction": "55c708d2ec752d5ec3cc0d1efcb3ed823a389858c59955140a9f4abdaa7c6acd", "DataTypeSelector": "2cf166ffa145c2bb96c06e4974aa7e9c779444d55f2aaa13a5ae4a9a34e639cc",