diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 42594e7a129d..3bee0c9b318c 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1600,10 +1600,16 @@ impl PrimitiveArray { /// Validates values in this array can be properly interpreted /// with the specified precision. pub fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { + if precision < self.scale() as u8 { + return Err(ArrowError::InvalidArgumentError(format!( + "Decimal precision {precision} is less than scale {}", + self.scale() + ))); + } (0..self.len()).try_for_each(|idx| { if self.is_valid(idx) { let decimal = unsafe { self.value_unchecked(idx) }; - T::validate_decimal_precision(decimal, precision) + T::validate_decimal_precision(decimal, precision, self.scale()) } else { Ok(()) } @@ -2436,7 +2442,7 @@ mod tests { let result = arr.validate_decimal_precision(5); let error = result.unwrap_err(); assert_eq!( - "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", + "Invalid argument error: 123.456 is too large to store in a Decimal128 of precision 5. Max is 99.999", error.to_string() ); @@ -2455,7 +2461,7 @@ mod tests { let result = arr.validate_decimal_precision(2); let error = result.unwrap_err(); assert_eq!( - "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99", + "Invalid argument error: 10.0 is too large to store in a Decimal128 of precision 2. Max is 9.9", error.to_string() ); } @@ -2541,7 +2547,7 @@ mod tests { #[test] #[should_panic( - expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999" + expected = "-1232234234324.32 is too small to store in a Decimal128 of precision 5. Min is -999.99" )] fn test_decimal_array_with_precision_and_scale_out_of_range() { let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 144de8dbecbd..4032e6a75e0c 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -25,7 +25,7 @@ use crate::timezone::Tz; use crate::{ArrowNativeTypeOp, OffsetSizeTrait}; use arrow_buffer::{i256, Buffer, OffsetBuffer}; use arrow_data::decimal::{ - is_validate_decimal256_precision, is_validate_decimal32_precision, + format_decimal_str, is_validate_decimal256_precision, is_validate_decimal32_precision, is_validate_decimal64_precision, is_validate_decimal_precision, validate_decimal256_precision, validate_decimal32_precision, validate_decimal64_precision, validate_decimal_precision, }; @@ -1335,7 +1335,11 @@ pub trait DecimalType: fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String; /// Validates that `value` contains no more than `precision` decimal digits - fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>; + fn validate_decimal_precision( + value: Self::Native, + precision: u8, + scale: i8, + ) -> Result<(), ArrowError>; /// Determines whether `value` contains no more than `precision` decimal digits fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool; @@ -1398,8 +1402,8 @@ impl DecimalType for Decimal32Type { format_decimal_str(&value.to_string(), precision as usize, scale) } - fn validate_decimal_precision(num: i32, precision: u8) -> Result<(), ArrowError> { - validate_decimal32_precision(num, precision) + fn validate_decimal_precision(num: i32, precision: u8, scale: i8) -> Result<(), ArrowError> { + validate_decimal32_precision(num, precision, scale) } fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool { @@ -1432,8 +1436,8 @@ impl DecimalType for Decimal64Type { format_decimal_str(&value.to_string(), precision as usize, scale) } - fn validate_decimal_precision(num: i64, precision: u8) -> Result<(), ArrowError> { - validate_decimal64_precision(num, precision) + fn validate_decimal_precision(num: i64, precision: u8, scale: i8) -> Result<(), ArrowError> { + validate_decimal64_precision(num, precision, scale) } fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool { @@ -1466,8 +1470,8 @@ impl DecimalType for Decimal128Type { format_decimal_str(&value.to_string(), precision as usize, scale) } - fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> { - validate_decimal_precision(num, precision) + fn validate_decimal_precision(num: i128, precision: u8, scale: i8) -> Result<(), ArrowError> { + validate_decimal_precision(num, precision, scale) } fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool { @@ -1500,8 +1504,8 @@ impl DecimalType for Decimal256Type { format_decimal_str(&value.to_string(), precision as usize, scale) } - fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> { - validate_decimal256_precision(num, precision) + fn validate_decimal_precision(num: i256, precision: u8, scale: i8) -> Result<(), ArrowError> { + validate_decimal256_precision(num, precision, scale) } fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool { @@ -1517,29 +1521,6 @@ impl ArrowPrimitiveType for Decimal256Type { impl primitive::PrimitiveTypeSealed for Decimal256Type {} -fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String { - let (sign, rest) = match value_str.strip_prefix('-') { - Some(stripped) => ("-", stripped), - None => ("", value_str), - }; - let bound = precision.min(rest.len()) + sign.len(); - let value_str = &value_str[0..bound]; - - if scale == 0 { - value_str.to_string() - } else if scale < 0 { - let padding = value_str.len() + scale.unsigned_abs() as usize; - format!("{value_str:0 scale as usize { - // Decimal separator is in the middle of the string - let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize); - format!("{whole}.{decimal}") - } else { - // String has to be padded - format!("{}0.{:0>width$}", sign, rest, width = scale as usize) - } -} - /// Crate private types for Byte Arrays /// /// Not intended to be used outside this crate diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs index 095e31274887..6c2b6f388e6d 100644 --- a/arrow-cast/src/cast/decimal.rs +++ b/arrow-cast/src/cast/decimal.rs @@ -219,8 +219,9 @@ where array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) } else { array.try_unary(|x| { - f(x).ok_or_else(|| error(x)) - .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v)) + f(x).ok_or_else(|| error(x)).and_then(|v| { + O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) + }) })? }) } @@ -264,8 +265,9 @@ where array.unary_opt(|x| f(x).filter(|v| O::is_valid_decimal_precision(*v, output_precision))) } else { array.try_unary(|x| { - f(x).ok_or_else(|| error(x)) - .and_then(|v| O::validate_decimal_precision(v, output_precision).map(|_| v)) + f(x).ok_or_else(|| error(x)).and_then(|v| { + O::validate_decimal_precision(v, output_precision, output_scale).map(|_| v) + }) })? }) } @@ -491,7 +493,7 @@ where T::DATA_TYPE, )) }) - .and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v)) + .and_then(|v| T::validate_decimal_precision(v, precision, scale).map(|_| v)) }) .transpose() }) @@ -621,7 +623,7 @@ where v )) }) - .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v)) + .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v)) })? .with_precision_and_scale(precision, scale) .map(|a| Arc::new(a) as ArrayRef) diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index fc241bea48da..f604b6ea4820 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -375,7 +375,7 @@ where false => array.try_unary::<_, D, _>(|v| { v.as_() .div_checked(scale_factor) - .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v)) + .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v)) })?, } } else { @@ -389,7 +389,7 @@ where false => array.try_unary::<_, D, _>(|v| { v.as_() .mul_checked(scale_factor) - .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v)) + .and_then(|v| D::validate_decimal_precision(v, precision, scale).map(|_| v)) })?, } }; @@ -2921,7 +2921,7 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal32 of precision 2. Max is 99", + assert_eq!("Invalid argument error: 123456.00 is too large to store in a Decimal32 of precision 2. Max is 0.99", result_unsafe.unwrap_err().to_string()); } @@ -2955,7 +2955,7 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal64 of precision 2. Max is 99", + assert_eq!("Invalid argument error: 123456.00 is too large to store in a Decimal64 of precision 2. Max is 0.99", result_unsafe.unwrap_err().to_string()); } @@ -2989,7 +2989,7 @@ mod tests { }; let result_unsafe = cast_with_options(&array, &DataType::Decimal128(2, 2), &options); - assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", + assert_eq!("Invalid argument error: 123456.00 is too large to store in a Decimal128 of precision 2. Max is 0.99", result_unsafe.unwrap_err().to_string()); } @@ -9045,7 +9045,7 @@ mod tests { }, ); let err = casted_array.unwrap_err().to_string(); - let expected_error = "Invalid argument error: 110 is too large to store in a Decimal128 of precision 2. Max is 99"; + let expected_error = "Invalid argument error: 1.10 is too large to store in a Decimal128 of precision 2. Max is 0.99"; assert!( err.contains(expected_error), "did not find expected error '{expected_error}' in actual error '{err}'" @@ -9076,11 +9076,8 @@ mod tests { }, ); let err = casted_array.unwrap_err().to_string(); - let expected_error = "Invalid argument error: 110 is too large to store in a Decimal256 of precision 2. Max is 99"; - assert!( - err.contains(expected_error), - "did not find expected error '{expected_error}' in actual error '{err}'" - ); + let expected_error = "Invalid argument error: 1.10 is too large to store in a Decimal256 of precision 2. Max is 0.99"; + assert_eq!(err, expected_error); } #[test] @@ -9685,7 +9682,7 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", err.unwrap_err().to_string()); + assert_eq!("Invalid argument error: 1000.00000000 is too large to store in a Decimal128 of precision 10. Max is 99.99999999", err.unwrap_err().to_string()); } #[test] @@ -9768,7 +9765,7 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", err.unwrap_err().to_string()); + assert_eq!("Invalid argument error: 1000.00000000 is too large to store in a Decimal256 of precision 10. Max is 99.99999999", err.unwrap_err().to_string()); } #[test] @@ -10173,7 +10170,7 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal128 of precision 7. Max is 9999999", err.unwrap_err().to_string()); + assert_eq!("Invalid argument error: 1234567.000 is too large to store in a Decimal128 of precision 7. Max is 9999.999", err.unwrap_err().to_string()); } #[test] @@ -10199,7 +10196,7 @@ mod tests { format_options: FormatOptions::default(), }, ); - assert_eq!("Invalid argument error: 1234567000 is too large to store in a Decimal256 of precision 7. Max is 9999999", err.unwrap_err().to_string()); + assert_eq!("Invalid argument error: 1234567.000 is too large to store in a Decimal256 of precision 7. Max is 9999.999", err.unwrap_err().to_string()); } /// helper function to test casting from duration to interval @@ -10839,7 +10836,7 @@ mod tests { input_repr: 99999, // 9999.9 output_prec: 7, output_scale: 6, - expected_output_repr: Err("Invalid argument error: 9999900000 is too large to store in a {} of precision 7. Max is 9999999".to_string()) // max is 9.999999 + expected_output_repr: Err("Invalid argument error: 9999.900000 is too large to store in a {} of precision 7. Max is 9.999999".to_string()) // max is 9.999999 }, // increase precision, decrease scale, always infallible DecimalCastTestConfig { @@ -10884,7 +10881,7 @@ mod tests { input_repr: 9999999, // 99.99999 output_prec: 8, output_scale: 7, - expected_output_repr: Err("Invalid argument error: 999999900 is too large to store in a {} of precision 8. Max is 99999999".to_string()) // max is 9.9999999 + expected_output_repr: Err("Invalid argument error: 99.9999900 is too large to store in a {} of precision 8. Max is 9.9999999".to_string()) // max is 9.9999999 }, // decrease precision, decrease scale, safe, infallible DecimalCastTestConfig { @@ -10911,7 +10908,7 @@ mod tests { input_repr: 9999999, // 99.99999 output_prec: 4, output_scale: 3, - expected_output_repr: Err("Invalid argument error: 100000 is too large to store in a {} of precision 4. Max is 9999".to_string()) // max is 9.999 + expected_output_repr: Err("Invalid argument error: 100.000 is too large to store in a {} of precision 4. Max is 9.999".to_string()) // max is 9.999 }, // decrease precision, same scale, safe DecimalCastTestConfig { @@ -10929,7 +10926,7 @@ mod tests { input_repr: 9999999, // 99.99999 output_prec: 6, output_scale: 5, - expected_output_repr: Err("Invalid argument error: 9999999 is too large to store in a {} of precision 6. Max is 999999".to_string()) // max is 9.99999 + expected_output_repr: Err("Invalid argument error: 99.99999 is too large to store in a {} of precision 6. Max is 9.99999".to_string()) // max is 9.99999 }, // same precision, increase scale, safe DecimalCastTestConfig { @@ -10947,7 +10944,7 @@ mod tests { input_repr: 123456, // 12.3456 output_prec: 7, output_scale: 6, - expected_output_repr: Err("Invalid argument error: 12345600 is too large to store in a {} of precision 7. Max is 9999999".to_string()) // max is 9.99999 + expected_output_repr: Err("Invalid argument error: 12.345600 is too large to store in a {} of precision 7. Max is 9.999999".to_string()) // max is 9.99999 }, // same precision, decrease scale, infallible DecimalCastTestConfig { @@ -11042,7 +11039,7 @@ mod tests { input_repr: -12345, output_prec: 6, output_scale: 5, - expected_output_repr: Err("Invalid argument error: -1234500 is too small to store in a {} of precision 6. Min is -999999".to_string()) + expected_output_repr: Err("Invalid argument error: -12.34500 is too small to store in a {} of precision 6. Min is -9.99999".to_string()) }, ]; @@ -11093,7 +11090,7 @@ mod tests { output_prec: 6, output_scale: 3, expected_output_repr: - Err("Invalid argument error: 1000000 is too large to store in a {} of precision 6. Max is 999999".to_string()), + Err("Invalid argument error: 1000.000 is too large to store in a {} of precision 6. Max is 999.999".to_string()), }, ]; for t in test_cases { @@ -11115,7 +11112,7 @@ mod tests { }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 1234567.89 is too large to store in a Decimal128 of precision 6. Max is 9999.99"); } #[test] @@ -11161,7 +11158,7 @@ mod tests { }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 12345.68 is too large to store in a Decimal128 of precision 6. Max is 9999.99"); } #[test] @@ -11178,7 +11175,7 @@ mod tests { }; let result = cast_with_options(&array, &output_type, &options); assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 1234567890 is too large to store in a Decimal128 of precision 6. Max is 999999"); + "Invalid argument error: 1234567.890 is too large to store in a Decimal128 of precision 6. Max is 999.999"); } #[test] @@ -11193,9 +11190,9 @@ mod tests { safe: false, ..Default::default() }; - let result = cast_with_options(&array, &output_type, &options); - assert_eq!(result.unwrap_err().to_string(), - "Invalid argument error: 123456789 is too large to store in a Decimal256 of precision 6. Max is 999999"); + let result = cast_with_options(&array, &output_type, &options).unwrap_err(); + assert_eq!(result.to_string(), + "Invalid argument error: 1234567.89 is too large to store in a Decimal256 of precision 6. Max is 9999.99"); } #[test] @@ -11234,4 +11231,31 @@ mod tests { )) as ArrayRef; assert_eq!(*fixed_array, *r); } + + #[test] + fn test_cast_decimal_error_output() { + let array = Int64Array::from(vec![1]); + let error = cast_with_options( + &array, + &DataType::Decimal32(1, 1), + &CastOptions { + safe: false, + format_options: FormatOptions::default(), + }, + ) + .unwrap_err(); + assert_eq!(error.to_string(), "Invalid argument error: 1.0 is too large to store in a Decimal32 of precision 1. Max is 0.9"); + + let array = Int64Array::from(vec![-1]); + let error = cast_with_options( + &array, + &DataType::Decimal32(1, 1), + &CastOptions { + safe: false, + format_options: FormatOptions::default(), + }, + ) + .unwrap_err(); + assert_eq!(error.to_string(), "Invalid argument error: -1.0 is too small to store in a Decimal32 of precision 1. Min is -0.9"); + } } diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs index 35a7c08d8e47..2c26ca42196b 100644 --- a/arrow-data/src/decimal.rs +++ b/arrow-data/src/decimal.rs @@ -1021,21 +1021,39 @@ pub const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [ /// /// [`Decimal32`]: arrow_schema::DataType::Decimal32 #[inline] -pub fn validate_decimal32_precision(value: i32, precision: u8) -> Result<(), ArrowError> { +pub fn validate_decimal32_precision( + value: i32, + precision: u8, + scale: i8, +) -> Result<(), ArrowError> { if precision > DECIMAL32_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal32 is {DECIMAL32_MAX_PRECISION}, but got {precision}", ))); } if value > MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscale_max_value = format_decimal_str( + &MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too large to store in a Decimal32 of precision {precision}. Max is {}", - MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too large to store in a Decimal32 of precision {precision}. Max is {}", + unscale_max_value ))) } else if value < MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscale_min_value = format_decimal_str( + &MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too small to store in a Decimal32 of precision {precision}. Min is {}", - MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too small to store in a Decimal32 of precision {precision}. Min is {}", + unscale_min_value ))) } else { Ok(()) @@ -1058,21 +1076,39 @@ pub fn is_validate_decimal32_precision(value: i32, precision: u8) -> bool { /// /// [`Decimal64`]: arrow_schema::DataType::Decimal64 #[inline] -pub fn validate_decimal64_precision(value: i64, precision: u8) -> Result<(), ArrowError> { +pub fn validate_decimal64_precision( + value: i64, + precision: u8, + scale: i8, +) -> Result<(), ArrowError> { if precision > DECIMAL64_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal64 is {DECIMAL64_MAX_PRECISION}, but got {precision}", ))); } if value > MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_max_value = format_decimal_str( + &MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too large to store in a Decimal64 of precision {precision}. Max is {}", - MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too large to store in a Decimal64 of precision {precision}. Max is {}", + unscaled_max_value ))) } else if value < MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_min_value = format_decimal_str( + &MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too small to store in a Decimal64 of precision {precision}. Min is {}", - MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too small to store in a Decimal64 of precision {precision}. Min is {}", + unscaled_min_value ))) } else { Ok(()) @@ -1095,21 +1131,35 @@ pub fn is_validate_decimal64_precision(value: i64, precision: u8) -> bool { /// /// [`Decimal128`]: arrow_schema::DataType::Decimal128 #[inline] -pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), ArrowError> { +pub fn validate_decimal_precision(value: i128, precision: u8, scale: i8) -> Result<(), ArrowError> { if precision > DECIMAL128_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal128 is {DECIMAL128_MAX_PRECISION}, but got {precision}", ))); } if value > MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_max_value = format_decimal_str( + &MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too large to store in a Decimal128 of precision {precision}. Max is {}", - MAX_DECIMAL128_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too large to store in a Decimal128 of precision {precision}. Max is {}", + unscaled_max_value ))) } else if value < MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_min_value = format_decimal_str( + &MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value} is too small to store in a Decimal128 of precision {precision}. Min is {}", - MIN_DECIMAL128_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too small to store in a Decimal128 of precision {precision}. Min is {}", + unscaled_min_value ))) } else { Ok(()) @@ -1132,21 +1182,40 @@ pub fn is_validate_decimal_precision(value: i128, precision: u8) -> bool { /// /// [`Decimal256`]: arrow_schema::DataType::Decimal256 #[inline] -pub fn validate_decimal256_precision(value: i256, precision: u8) -> Result<(), ArrowError> { +pub fn validate_decimal256_precision( + value: i256, + precision: u8, + scale: i8, +) -> Result<(), ArrowError> { if precision > DECIMAL256_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal256 is {DECIMAL256_MAX_PRECISION}, but got {precision}", ))); } + if value > MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_max_value = format_decimal_str( + &MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value:?} is too large to store in a Decimal256 of precision {precision}. Max is {:?}", - MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too large to store in a Decimal256 of precision {precision}. Max is {}", + unscaled_max_value ))) } else if value < MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize] { + let unscaled_value = + format_decimal_str_internal(&value.to_string(), precision.into(), scale, false); + let unscaled_min_value = format_decimal_str( + &MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize].to_string(), + precision.into(), + scale, + ); Err(ArrowError::InvalidArgumentError(format!( - "{value:?} is too small to store in a Decimal256 of precision {precision}. Min is {:?}", - MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize] + "{unscaled_value} is too small to store in a Decimal256 of precision {precision}. Min is {}", + unscaled_min_value ))) } else { Ok(()) @@ -1163,3 +1232,44 @@ pub fn is_validate_decimal256_precision(value: i256, precision: u8) -> bool { && value >= MIN_DECIMAL256_FOR_EACH_PRECISION[precision as usize] && value <= MAX_DECIMAL256_FOR_EACH_PRECISION[precision as usize] } + +#[inline] +/// Formats a decimal string given the precision and scale. +pub fn format_decimal_str(value_str: &str, precision: usize, scale: i8) -> String { + format_decimal_str_internal(value_str, precision, scale, true) +} + +// Format a decimal string given the precision and scale. +// If `safe_decimal` is true, the function will ensure that the output string +// does not exceed the specified precision. +fn format_decimal_str_internal( + value_str: &str, + precision: usize, + scale: i8, + safe_decimal: bool, +) -> String { + let (sign, rest) = match value_str.strip_prefix('-') { + Some(stripped) => ("-", stripped), + None => ("", value_str), + }; + let bound = if safe_decimal { + precision.min(rest.len()) + sign.len() + } else { + value_str.len() + }; + let value_str = &value_str[0..bound]; + + if scale == 0 { + value_str.to_string() + } else if scale < 0 { + let padding = value_str.len() + scale.unsigned_abs() as usize; + format!("{value_str:0 scale as usize { + // Decimal separator is in the middle of the string + let (whole, decimal) = value_str.split_at(value_str.len() - scale as usize); + format!("{whole}.{decimal}") + } else { + // String has to be padded + format!("{}0.{:0>width$}", sign, rest, width = scale as usize) + } +} diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs index 62cda6b8ec79..e1f6944a93bb 100644 --- a/arrow/tests/array_validation.rs +++ b/arrow/tests/array_validation.rs @@ -1056,10 +1056,19 @@ fn test_string_data_from_foreign() { #[test] fn test_decimal_full_validation() { + let array = Decimal128Array::from(vec![123456_i128]) + .with_precision_and_scale(5, 2) + .unwrap(); + let error = array.validate_decimal_precision(5).unwrap_err(); + assert_eq!( + "Invalid argument error: 1234.56 is too large to store in a Decimal128 of precision 5. Max is 999.99", + error.to_string() + ); + let array = Decimal128Array::from(vec![123456_i128]); let error = array.validate_decimal_precision(5).unwrap_err(); assert_eq!( - "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", + "Invalid argument error: Decimal precision 5 is less than scale 10", error.to_string() ); }