diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index bc321b227ee52..f4afdf7002078 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -467,9 +467,8 @@ config_namespace! { /// The default time zone /// - /// Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime - /// according to this time zone, and then extract the hour - pub time_zone: String, default = "+00:00".into() + /// Some functions, e.g. `now` return timestamps in this time zone + pub time_zone: Option, default = None /// Parquet options pub parquet: ParquetOptions, default = Default::default() diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 572a7e2b335c4..46c36c6abdacc 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -514,7 +514,7 @@ fn multiple_now() -> Result<()> { // expect the same timestamp appears in both exprs let actual = get_optimized_plan_formatted(plan, &time); let expected = format!( - "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\n TableScan: test", + "Projection: TimestampNanosecond({}, None) AS now(), TimestampNanosecond({}, None) AS t2\n TableScan: test", time.timestamp_nanos_opt().unwrap(), time.timestamp_nanos_opt().unwrap() ); diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index fb1371da6ceb3..3ca8f846aa5e5 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -1812,7 +1812,7 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> { }); let mut config = SessionConfig::new(); - config.options_mut().execution.time_zone = "AEST".into(); + config.options_mut().execution.time_zone = Some("AEST".into()); let ctx = SessionContext::new_with_config(config); diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 18b99bca8638e..da690b4e6be18 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -108,7 +108,14 @@ impl ScalarUDFImpl for CurrentDateFunc { let days = info .execution_props() .config_options() - .and_then(|config| config.execution.time_zone.parse::().ok()) + .and_then(|config| { + config + .execution + .time_zone + .as_ref() + .map(|tz| tz.parse::().ok()) + }) + .flatten() .map_or_else( || datetime_to_days(&now_ts), |tz| { diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 4f5b199cce41e..9f3456b8777f0 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -104,7 +104,14 @@ impl ScalarUDFImpl for CurrentTimeFunc { let nano = info .execution_props() .config_options() - .and_then(|config| config.execution.time_zone.parse::().ok()) + .and_then(|config| { + config + .execution + .time_zone + .as_ref() + .map(|tz| tz.parse::().ok()) + }) + .flatten() .map_or_else( || datetime_to_time_nanos(&now_ts), |tz| { @@ -167,7 +174,11 @@ mod tests { fn set_session_timezone_env(tz: &str, start_time: DateTime) -> MockSimplifyInfo { let mut config = datafusion_common::config::ConfigOptions::default(); - config.execution.time_zone = tz.to_string(); + config.execution.time_zone = if tz.is_empty() { + None + } else { + Some(tz.to_string()) + }; let mut execution_props = ExecutionProps::new().with_query_execution_start_time(start_time); execution_props.config_options = Some(Arc::new(config)); diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index fe317d0a16f95..4723548a45584 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -33,7 +33,7 @@ use datafusion_macros::user_doc; #[user_doc( doc_section(label = "Time and Date Functions"), description = r#" -Returns the current UTC timestamp. +Returns the current timestamp in the system configured timezone (None by default). The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes. "#, @@ -58,8 +58,7 @@ impl NowFunc { /// /// Prefer [`NowFunc::new_with_config`] which allows specifying the /// timezone via [`ConfigOptions`]. This helper now mirrors the - /// canonical default offset (`"+00:00"`) provided by - /// `ConfigOptions::default()`. + /// canonical default offset (None) provided by `ConfigOptions::default()`. pub fn new() -> Self { Self::new_with_config(&ConfigOptions::default()) } @@ -68,7 +67,11 @@ impl NowFunc { Self { signature: Signature::nullary(Volatility::Stable), aliases: vec!["current_timestamp".to_string()], - timezone: Some(Arc::from(config.execution.time_zone.as_str())), + timezone: config + .execution + .time_zone + .as_ref() + .map(|tz| Arc::from(tz.as_str())), } } } @@ -178,6 +181,6 @@ mod tests { ScalarValue::TimestampNanosecond(None, configured_now.timezone.clone()); assert_eq!(legacy_scalar, configured_scalar); - assert_eq!(Some("+00:00"), legacy_now.timezone.as_deref()); + assert_eq!(None, legacy_now.timezone.as_deref()); } } diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 99138e1b00162..7bac0337672dc 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -693,7 +693,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // Timestamp With Time Zone // INPUT : [SQLDataType] TimestampTz + [Config] Time Zone // OUTPUT: [ArrowDataType] Timestamp - Some(self.context_provider.options().execution.time_zone.clone()) + self.context_provider.options().execution.time_zone.clone() } else { // Timestamp Without Time zone None diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index cbb20acb2d910..5ba62be6873c3 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -67,7 +67,7 @@ Timestamp(ns) query T SELECT arrow_typeof(now()) ---- -Timestamp(ns, "+00:00") +Timestamp(ns) # arrow_typeof_timestamp_date32( query T diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt index 32315eec20e63..abf64675e9039 100644 --- a/datafusion/sqllogictest/test_files/dates.slt +++ b/datafusion/sqllogictest/test_files/dates.slt @@ -91,7 +91,7 @@ where d3_date > now() + '5 days'; ---- DataFusion error: type_coercion caused by -Error during planning: Cannot coerce arithmetic expression Timestamp(ns, "+00:00") + Utf8 to valid types +Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types # DATE minus DATE diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index 64c78284594f6..0579659832feb 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -867,7 +867,7 @@ query TTTTTT show columns FROM table_with_pk; ---- datafusion public table_with_pk sn Int32 NO -datafusion public table_with_pk ts Timestamp(ns, "+00:00") NO +datafusion public table_with_pk ts Timestamp(ns) NO datafusion public table_with_pk currency Utf8View NO datafusion public table_with_pk amount Float32 YES diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt index 08636b482e38d..fe7871c22b4c3 100644 --- a/datafusion/sqllogictest/test_files/group_by.slt +++ b/datafusion/sqllogictest/test_files/group_by.slt @@ -5556,7 +5556,7 @@ SELECT arrow_cast('2024-01-01T00:00:00Z'::timestamptz, 'Timestamp(Second, Some("+08:00"))') AS ts GROUP BY ts, text ---- -foo 2024-01-01T08:00:00+08:00 +foo 2024-01-01T00:00:00+08:00 # Test multi group by int + Decimal128 statement ok diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index c674057151492..7009d976d646f 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -267,7 +267,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760 datafusion.execution.spill_compression uncompressed datafusion.execution.split_file_groups_by_statistics false datafusion.execution.target_partitions 7 -datafusion.execution.time_zone +00:00 +datafusion.execution.time_zone NULL datafusion.execution.use_row_number_estimates_to_optimize_partitioning false datafusion.explain.analyze_level dev datafusion.explain.format indent @@ -387,7 +387,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserve datafusion.execution.spill_compression uncompressed Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed. datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system -datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour +datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future. datafusion.explain.analyze_level dev Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers. datafusion.explain.format indent Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format. @@ -459,14 +459,14 @@ datafusion.execution.batch_size 8192 Default batch size while creating new batch query TT SHOW TIME ZONE ---- -datafusion.execution.time_zone +00:00 +datafusion.execution.time_zone NULL # show_timezone_default_utc # https://github.com/apache/datafusion/issues/3255 query TT SHOW TIMEZONE ---- -datafusion.execution.time_zone +00:00 +datafusion.execution.time_zone NULL # show_time_zone_default_utc_verbose @@ -474,14 +474,14 @@ datafusion.execution.time_zone +00:00 query TTT SHOW TIME ZONE VERBOSE ---- -datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour +datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone # show_timezone_default_utc # https://github.com/apache/datafusion/issues/3255 query TTT SHOW TIMEZONE VERBOSE ---- -datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour +datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone # show empty verbose diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 7252c84caf141..8753d39cb7ef7 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -165,7 +165,7 @@ GROUP BY ts ORDER BY ts LIMIT 1; ---- -2020-09-08T13:42:29.190855123Z +2020-09-08T13:42:29.190855123 diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt index e4676ae5332dd..0166cd2572ce6 100644 --- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt +++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt @@ -570,4 +570,4 @@ WHERE trace_id = '00000000000000000000000000000002' AND deployment_environment = ORDER BY start_timestamp, trace_id LIMIT 1; ---- -2024-10-01T00:00:00Z +2024-10-01T00:00:00 diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt index 1bffbc3b3a646..c843400efc2b2 100644 --- a/datafusion/sqllogictest/test_files/table_functions.slt +++ b/datafusion/sqllogictest/test_files/table_functions.slt @@ -414,7 +414,7 @@ SELECT * FROM range(TIMESTAMP '2023-01-01T00:00:00', TIMESTAMP '2023-01-03T00:00 query P SELECT * FROM range(TIMESTAMPTZ '2023-02-01T00:00:00-07:00', TIMESTAMPTZ '2023-02-01T09:00:00+01:00', INTERVAL '1' HOUR); ---- -2023-02-01T07:00:00Z +2023-02-01T07:00:00 # Basic date range with hour interval query P diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index d9b4a818f99e4..c2fabb5e6eff4 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -942,22 +942,22 @@ CREATE TABLE table1 ( statement ok INSERT INTO table1 (bar, foo, time) VALUES -(200.0, 'me', '1970-01-01T00:00:00.000000010Z'), -(1.0, 'me', '1970-01-01T00:00:00.000000030Z'), -(1.0, 'me', '1970-01-01T00:00:00.000000040Z'), -(2.0, 'you', '1970-01-01T00:00:00.000000020Z'); +(200.0, 'me', '1970-01-01T00:00:00.000000010'), +(1.0, 'me', '1970-01-01T00:00:00.000000030'), +(1.0, 'me', '1970-01-01T00:00:00.000000040'), +(2.0, 'you', '1970-01-01T00:00:00.000000020'); query TP SELECT foo, first_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo; ---- -me 1970-01-01T00:00:00.000000040Z -you 1970-01-01T00:00:00.000000020Z +me 1970-01-01T00:00:00.000000040 +you 1970-01-01T00:00:00.000000020 query TP SELECT foo, last_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo; ---- -me 1970-01-01T00:00:00.000000010Z -you 1970-01-01T00:00:00.000000020Z +me 1970-01-01T00:00:00.000000010 +you 1970-01-01T00:00:00.000000020 statement ok drop table table1; @@ -5766,15 +5766,15 @@ CREATE TABLE table_test_distinct_count ( statement ok INSERT INTO table_test_distinct_count (k, v, time) VALUES - ('a', 1, '1970-01-01T00:01:00.00Z'), - ('a', 1, '1970-01-01T00:02:00.00Z'), - ('a', 1, '1970-01-01T00:03:00.00Z'), - ('a', 2, '1970-01-01T00:03:00.00Z'), - ('a', 1, '1970-01-01T00:04:00.00Z'), - ('b', 3, '1970-01-01T00:01:00.00Z'), - ('b', 3, '1970-01-01T00:02:00.00Z'), - ('b', 4, '1970-01-01T00:03:00.00Z'), - ('b', 4, '1970-01-01T00:03:00.00Z'); + ('a', 1, '1970-01-01T00:01:00.00'), + ('a', 1, '1970-01-01T00:02:00.00'), + ('a', 1, '1970-01-01T00:03:00.00'), + ('a', 2, '1970-01-01T00:03:00.00'), + ('a', 1, '1970-01-01T00:04:00.00'), + ('b', 3, '1970-01-01T00:01:00.00'), + ('b', 3, '1970-01-01T00:02:00.00'), + ('b', 4, '1970-01-01T00:03:00.00'), + ('b', 4, '1970-01-01T00:03:00.00'); query TPII SELECT @@ -5793,15 +5793,15 @@ SELECT FROM table_test_distinct_count ORDER BY k, time; ---- -a 1970-01-01T00:01:00Z 1 1 -a 1970-01-01T00:02:00Z 2 1 -a 1970-01-01T00:03:00Z 4 2 -a 1970-01-01T00:03:00Z 4 2 -a 1970-01-01T00:04:00Z 4 2 -b 1970-01-01T00:01:00Z 1 1 -b 1970-01-01T00:02:00Z 2 1 -b 1970-01-01T00:03:00Z 4 2 -b 1970-01-01T00:03:00Z 4 2 +a 1970-01-01T00:01:00 1 1 +a 1970-01-01T00:02:00 2 1 +a 1970-01-01T00:03:00 4 2 +a 1970-01-01T00:03:00 4 2 +a 1970-01-01T00:04:00 4 2 +b 1970-01-01T00:01:00 1 1 +b 1970-01-01T00:02:00 2 1 +b 1970-01-01T00:03:00 4 2 +b 1970-01-01T00:03:00 4 2 query TT @@ -5854,15 +5854,15 @@ SELECT FROM table_test_distinct_count ORDER BY k, time; ---- -a 1970-01-01T00:01:00Z 1 1 -a 1970-01-01T00:02:00Z 2 1 -a 1970-01-01T00:03:00Z 5 3 -a 1970-01-01T00:03:00Z 5 3 -a 1970-01-01T00:04:00Z 5 3 -b 1970-01-01T00:01:00Z 3 3 -b 1970-01-01T00:02:00Z 6 3 -b 1970-01-01T00:03:00Z 14 7 -b 1970-01-01T00:03:00Z 14 7 +a 1970-01-01T00:01:00 1 1 +a 1970-01-01T00:02:00 2 1 +a 1970-01-01T00:03:00 5 3 +a 1970-01-01T00:03:00 5 3 +a 1970-01-01T00:04:00 5 3 +b 1970-01-01T00:01:00 3 3 +b 1970-01-01T00:02:00 6 3 +b 1970-01-01T00:03:00 14 7 +b 1970-01-01T00:03:00 14 7 diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index af5c603404554..0b227000f73d9 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -176,6 +176,20 @@ let indices = projection_exprs.column_indices(); _execution plan_ of the query. With this release, `DESCRIBE query` now outputs the computed _schema_ of the query, consistent with the behavior of `DESCRIBE table_name`. +### `datafusion.execution.time_zone` default configuration changed + +The default value for `datafusion.execution.time_zone` previously was a string value of `+00:00` (GMT/Zulu time). +This was changed to be an `Option` with a default of `None`. If you want to change the timezone back +to the previous value you can execute the sql: + +```sql +SET +TIMEZONE = '+00:00'; +``` + +This change was made to better support using the default timezone in scalar UDF functions such as +`now`, `current_date`, `current_time`, and `to_timestamp` among others. + ### Introduction of `TableSchema` and changes to `FileSource::with_schema()` method A new `TableSchema` struct has been introduced in the `datafusion-datasource` crate to better manage table schemas with partition columns. This struct helps distinguish between: diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 7ca5eb8f7be45..5950a4fa9a6a9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -77,7 +77,7 @@ The following configuration settings are available: | datafusion.execution.coalesce_batches | true | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting | | datafusion.execution.collect_statistics | true | Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true. | | datafusion.execution.target_partitions | 0 | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system | -| datafusion.execution.time_zone | +00:00 | The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour | +| datafusion.execution.time_zone | NULL | The default time zone Some functions, e.g. `now` return timestamps in this time zone | | datafusion.execution.parquet.enable_page_index | true | (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded. | | datafusion.execution.parquet.pruning | true | (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file | | datafusion.execution.parquet.skip_metadata | true | (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata | diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index d090b5b70cda9..7c88d1fd9c3eb 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2629,7 +2629,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ### `now` -Returns the current UTC timestamp. +Returns the current timestamp in the system configured timezone (None by default). The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.