From 327530bf9af8576003c59cf16e906af75e924399 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Tue, 24 Jun 2025 23:21:16 +0800 Subject: [PATCH 1/5] fix: The inconsistency between scalar and array on the cast of timestamp --- datafusion/common/src/scalar/mod.rs | 2 +- datafusion/sqllogictest/test_files/timestamps.slt | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index f774f46b424d5..74a3f160bf5de 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3069,7 +3069,7 @@ impl ScalarValue { ScalarValue::Decimal128(Some(decimal_value), _, scale), DataType::Timestamp(time_unit, None), ) => { - let scale_factor = 10_i128.pow(*scale as u32); + let scale_factor = 10_i128.pow(*scale as u32 + 3); let seconds = decimal_value / scale_factor; let fraction = decimal_value % scale_factor; diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 44d0f1f97d4d5..bb37664a7228a 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -421,12 +421,12 @@ SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as time query PPP SELECT to_timestamp(arrow_cast(1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 +1970-01-01T00:00:01.100 1970-01-01T00:00:00.001100 1970-01-01T00:00:00.001100 query PPP SELECT to_timestamp(arrow_cast(-1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(-1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(-1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 +1969-12-31T23:59:58.900 1969-12-31T23:59:59.998900 1969-12-31T23:59:59.998900 query PPP SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0.0, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(0.0, 'Decimal128(2,1)')::timestamp as c3; @@ -436,12 +436,12 @@ SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0 query PPP SELECT to_timestamp(arrow_cast(1.23456789, 'Decimal128(9,8)')) as c1, cast(arrow_cast(1.23456789, 'Decimal128(9,8)') as timestamp) as c2, arrow_cast(1.23456789, 'Decimal128(9,8)')::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.001234567 1970-01-01T00:00:00.001234567 query PPP SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1, cast(arrow_cast(123456789.123456789, 'Decimal128(18,9)') as timestamp) as c2, arrow_cast(123456789.123456789, 'Decimal128(18,9)')::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 +1973-11-29T21:33:09.123456784 1970-01-02T10:17:36.789123456 1970-01-02T10:17:36.789123456 # from_unixtime @@ -3420,3 +3420,8 @@ select to_timestamp('-1'); query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from '\-1': timestamp must contain at least 10 characters select to_timestamp(arrow_cast('-1', 'Utf8')); + +query B +SELECT CAST(CAST(x AS decimal(17,2)) AS timestamp(3)) = CAST(CAST(1 AS decimal(17,2)) AS timestamp(3)) from (values (1)) t(x); +---- +true From 1810f7dcba99daab6299963d0a20834eecfb2ec4 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Wed, 25 Jun 2025 08:26:28 +0800 Subject: [PATCH 2/5] fix for timeunit --- datafusion/common/src/scalar/mod.rs | 8 +++- .../sqllogictest/test_files/timestamps.slt | 44 ++++++++++++++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 74a3f160bf5de..33c312978d09b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3069,7 +3069,13 @@ impl ScalarValue { ScalarValue::Decimal128(Some(decimal_value), _, scale), DataType::Timestamp(time_unit, None), ) => { - let scale_factor = 10_i128.pow(*scale as u32 + 3); + let scale = (*scale as u32) + match time_unit { + TimeUnit::Second => 0, + TimeUnit::Millisecond => 3, + TimeUnit::Microsecond => 6, + TimeUnit::Nanosecond => 9, + }; + let scale_factor = 10_i128.pow(scale); let seconds = decimal_value / scale_factor; let fraction = decimal_value % scale_factor; diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index bb37664a7228a..1c76745dfac27 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -421,12 +421,12 @@ SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as time query PPP SELECT to_timestamp(arrow_cast(1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1970-01-01T00:00:01.100 1970-01-01T00:00:00.001100 1970-01-01T00:00:00.001100 +1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(arrow_cast(-1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(-1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(-1.1, 'Decimal128(2,1)')::timestamp as c3; ---- -1969-12-31T23:59:58.900 1969-12-31T23:59:59.998900 1969-12-31T23:59:59.998900 +1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999 query PPP SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0.0, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(0.0, 'Decimal128(2,1)')::timestamp as c3; @@ -436,12 +436,12 @@ SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0 query PPP SELECT to_timestamp(arrow_cast(1.23456789, 'Decimal128(9,8)')) as c1, cast(arrow_cast(1.23456789, 'Decimal128(9,8)') as timestamp) as c2, arrow_cast(1.23456789, 'Decimal128(9,8)')::timestamp as c3; ---- -1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.001234567 1970-01-01T00:00:00.001234567 +1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001 query PPP SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1, cast(arrow_cast(123456789.123456789, 'Decimal128(18,9)') as timestamp) as c2, arrow_cast(123456789.123456789, 'Decimal128(18,9)')::timestamp as c3; ---- -1973-11-29T21:33:09.123456784 1970-01-02T10:17:36.789123456 1970-01-02T10:17:36.789123456 +1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789 # from_unixtime @@ -3421,7 +3421,37 @@ select to_timestamp('-1'); query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from '\-1': timestamp must contain at least 10 characters select to_timestamp(arrow_cast('-1', 'Utf8')); -query B -SELECT CAST(CAST(x AS decimal(17,2)) AS timestamp(3)) = CAST(CAST(1 AS decimal(17,2)) AS timestamp(3)) from (values (1)) t(x); +query P +SELECT CAST(CAST(1 AS decimal(17,2)) AS timestamp(3)) AS a UNION ALL +SELECT CAST(CAST(one AS decimal(17,2)) AS timestamp(3)) AS a FROM (VALUES (1)) t(one); ---- -true +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Nanosecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Nanosecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.000000001 +1970-01-01T00:00:00.000000001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Microsecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Microsecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.000001 +1970-01-01T00:00:00.000001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Millisecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Millisecond, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1 AS decimal(17,2)), 'Timestamp(Second, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Second, None)') AS a FROM (VALUES (1)) t(one); +---- +1970-01-01T00:00:01 +1970-01-01T00:00:01 From 92f26f1c7c618fa0cc4c438fa39ea7c1f3569043 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Wed, 25 Jun 2025 08:28:29 +0800 Subject: [PATCH 3/5] fmt --- datafusion/common/src/scalar/mod.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 33c312978d09b..307dea2666b8d 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3069,12 +3069,13 @@ impl ScalarValue { ScalarValue::Decimal128(Some(decimal_value), _, scale), DataType::Timestamp(time_unit, None), ) => { - let scale = (*scale as u32) + match time_unit { - TimeUnit::Second => 0, - TimeUnit::Millisecond => 3, - TimeUnit::Microsecond => 6, - TimeUnit::Nanosecond => 9, - }; + let scale = (*scale as u32) + + match time_unit { + TimeUnit::Second => 0, + TimeUnit::Millisecond => 3, + TimeUnit::Microsecond => 6, + TimeUnit::Nanosecond => 9, + }; let scale_factor = 10_i128.pow(scale); let seconds = decimal_value / scale_factor; let fraction = decimal_value % scale_factor; From d89f0f974cf6db3cd730de14a720df7f6c91a333 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Thu, 26 Jun 2025 22:12:55 +0800 Subject: [PATCH 4/5] add slt --- .../sqllogictest/test_files/timestamps.slt | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 1c76745dfac27..ce8c094c01065 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -3455,3 +3455,32 @@ SELECT arrow_cast(CAST(one AS decimal(17,2)), 'Timestamp(Second, None)') AS a FR ---- 1970-01-01T00:00:01 1970-01-01T00:00:01 + + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Nanosecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Nanosecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.000000001 +1970-01-01T00:00:00.000000001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Microsecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Microsecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.000001 +1970-01-01T00:00:00.000001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Millisecond, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Millisecond, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:00.001 +1970-01-01T00:00:00.001 + +query P +SELECT arrow_cast(CAST(1.123 AS decimal(17,3)), 'Timestamp(Second, None)') AS a UNION ALL +SELECT arrow_cast(CAST(one AS decimal(17,3)), 'Timestamp(Second, None)') AS a FROM (VALUES (1.123)) t(one); +---- +1970-01-01T00:00:01 +1970-01-01T00:00:01 From 0b084f3f64a1b30c8f26a2d8a574c82b7ff25ba2 Mon Sep 17 00:00:00 2001 From: Chongchen Chen Date: Fri, 18 Jul 2025 08:55:56 +0800 Subject: [PATCH 5/5] update based on findepi's suggestion --- datafusion/common/src/scalar/mod.rs | 45 +---------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 307dea2666b8d..084e2b0ad83e3 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3059,50 +3059,7 @@ impl ScalarValue { target_type: &DataType, cast_options: &CastOptions<'static>, ) -> Result { - let scalar_array = match (self, target_type) { - ( - ScalarValue::Float64(Some(float_ts)), - DataType::Timestamp(TimeUnit::Nanosecond, None), - ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64)) - .to_array()?, - ( - ScalarValue::Decimal128(Some(decimal_value), _, scale), - DataType::Timestamp(time_unit, None), - ) => { - let scale = (*scale as u32) - + match time_unit { - TimeUnit::Second => 0, - TimeUnit::Millisecond => 3, - TimeUnit::Microsecond => 6, - TimeUnit::Nanosecond => 9, - }; - let scale_factor = 10_i128.pow(scale); - let seconds = decimal_value / scale_factor; - let fraction = decimal_value % scale_factor; - - let timestamp_value = match time_unit { - TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)), - TimeUnit::Millisecond => { - let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor; - ScalarValue::Int64(Some(millis as i64)) - } - TimeUnit::Microsecond => { - let micros = - seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor; - ScalarValue::Int64(Some(micros as i64)) - } - TimeUnit::Nanosecond => { - let nanos = seconds * 1_000_000_000 - + (fraction * 1_000_000_000) / scale_factor; - ScalarValue::Int64(Some(nanos as i64)) - } - }; - - timestamp_value.to_array()? - } - _ => self.to_array()?, - }; - + let scalar_array = self.to_array()?; let cast_arr = cast_with_options(&scalar_array, target_type, cast_options)?; ScalarValue::try_from_array(&cast_arr, 0) }