From 0e120461203f5d8b6dfcde0cb29d76c776461344 Mon Sep 17 00:00:00 2001 From: TCeason Date: Tue, 11 Jun 2024 19:47:10 +0800 Subject: [PATCH] feat(query): support try_to_timestmap(string, string), try_to_date(string, string) If format ts or date with format err, the try function will retrun Null --- src/query/functions/src/scalars/datetime.rs | 174 ++++++++++++------ .../it/scalars/testdata/function_list.txt | 24 ++- .../functions/02_0012_function_datetimes.test | 16 ++ 3 files changed, 146 insertions(+), 68 deletions(-) diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs index 3a69978f3f18b..5138ad94527dc 100644 --- a/src/query/functions/src/scalars/datetime.rs +++ b/src/query/functions/src/scalars/datetime.rs @@ -20,6 +20,7 @@ use chrono::format::StrftimeItems; use chrono::prelude::*; use chrono::Datelike; use chrono::Days; +use chrono::ParseError; use chrono::Utc; use chrono_tz::Tz; use databend_common_arrow::arrow::bitmap::Bitmap; @@ -168,69 +169,40 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { "to_timestamp", |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::>( - |timestamp, format, output, ctx| { - if format.is_empty() { + |timestamp, format, output, ctx| match string_to_format_timestmap( + timestamp, format, ctx, + ) { + Ok((ts, need_null)) => { + if need_null { + output.push_null(); + } else { + output.push(ts); + } + } + Err(e) => { + ctx.set_error(output.len(), e.to_string()); output.push_null(); - return; } - // Parse with extra checks for timezone - // %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6 - // %z +0930 Offset from the local time to UTC (with UTC being +0000). - // %:z +09:30 Same as %z but with a colon. - // %::z +09:30:00 Offset from the local time to UTC with seconds. - // %:::z +09 Offset from the local time to UTC without minutes. - // %#z +09 Parsing only: Same as %z but allows minutes to be missing or present. - let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"]; - let parse_tz = timezone_strftime - .iter() - .any(|&pattern| format.contains(pattern)); - let res = if ctx.func_ctx.parse_datetime_ignore_remainder { - let mut parsed = Parsed::new(); - if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format)) - .is_err() - { + }, + ), + ); + + registry.register_combine_nullable_2_arg::( + "try_to_timestamp", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::>( + |timestamp, format, output, ctx| match string_to_format_timestmap( + timestamp, format, ctx, + ) { + Ok((ts, need_null)) => { + if need_null { output.push_null(); - return; - } - // Additional checks and adjustments for parsed timestamp - if parsed.month.is_none() { - parsed.month = Some(1); - } - if parsed.day.is_none() { - parsed.day = Some(1); - } - if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() { - parsed.hour_div_12 = Some(0); - parsed.hour_mod_12 = Some(0); - } - if parsed.minute.is_none() { - parsed.minute = Some(0); - } - if parsed.second.is_none() { - parsed.second = Some(0); - } - // Convert parsed timestamp to datetime or naive datetime based on parse_tz - if parse_tz { - parsed.offset.get_or_insert(0); - parsed - .to_datetime() - .map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros()) } else { - parsed - .to_naive_datetime_with_offset(0) - .map(|res| res.and_utc().timestamp_micros()) + output.push(ts); } - } else if parse_tz { - DateTime::parse_from_str(timestamp, format) - .map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros()) - } else { - NaiveDateTime::parse_from_str(timestamp, format) - .map(|res| res.and_utc().timestamp_micros()) - }; - if let Ok(res) = res { - output.push(res); - } else { - output.push_null() + } + Err(_) => { + output.push_null(); } }, ), @@ -257,6 +229,92 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { }, ), ); + + registry.register_combine_nullable_2_arg::( + "try_to_date", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::>( + |date, format, output, _| { + if format.is_empty() { + output.push_null(); + } else { + match NaiveDate::parse_from_str(date, format) { + Ok(res) => { + output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); + } + Err(_) => { + output.push_null(); + } + } + } + }, + ), + ); +} + +fn string_to_format_timestmap( + timestamp: &str, + format: &str, + ctx: &mut EvalContext, +) -> Result<(i64, bool), ParseError> { + if format.is_empty() { + return Ok((0, true)); + } + // Parse with extra checks for timezone + // %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6 + // %z +0930 Offset from the local time to UTC (with UTC being +0000). + // %:z +09:30 Same as %z but with a colon. + // %::z +09:30:00 Offset from the local time to UTC with seconds. + // %:::z +09 Offset from the local time to UTC without minutes. + // %#z +09 Parsing only: Same as %z but allows minutes to be missing or present. + let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"]; + let parse_tz = timezone_strftime + .iter() + .any(|&pattern| format.contains(pattern)); + let res = if ctx.func_ctx.parse_datetime_ignore_remainder { + let mut parsed = Parsed::new(); + if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format)).is_err() { + return Ok((0, true)); + } + // Additional checks and adjustments for parsed timestamp + if parsed.month.is_none() { + parsed.month = Some(1); + } + if parsed.day.is_none() { + parsed.day = Some(1); + } + if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() { + parsed.hour_div_12 = Some(0); + parsed.hour_mod_12 = Some(0); + } + if parsed.minute.is_none() { + parsed.minute = Some(0); + } + if parsed.second.is_none() { + parsed.second = Some(0); + } + // Convert parsed timestamp to datetime or naive datetime based on parse_tz + if parse_tz { + parsed.offset.get_or_insert(0); + parsed + .to_datetime() + .map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros()) + } else { + parsed + .to_naive_datetime_with_offset(0) + .map(|res| res.and_utc().timestamp_micros()) + } + } else if parse_tz { + DateTime::parse_from_str(timestamp, format) + .map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros()) + } else { + NaiveDateTime::parse_from_str(timestamp, format).map(|res| res.and_utc().timestamp_micros()) + }; + + match res { + Ok(res) => Ok((res, false)), + Err(err) => Err(err), + } } fn register_date_to_timestamp(registry: &mut FunctionRegistry) { diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 952d380d2c615..6104daee12e91 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -4184,12 +4184,14 @@ Functions overloads: 23 try_to_boolean(Float64 NULL) :: Boolean NULL 0 try_to_date(Variant) :: Date NULL 1 try_to_date(Variant NULL) :: Date NULL -2 try_to_date(String) :: Date NULL -3 try_to_date(String NULL) :: Date NULL -4 try_to_date(Timestamp) :: Date NULL -5 try_to_date(Timestamp NULL) :: Date NULL -6 try_to_date(Int64) :: Date NULL -7 try_to_date(Int64 NULL) :: Date NULL +2 try_to_date(String, String) :: Date NULL +3 try_to_date(String NULL, String NULL) :: Date NULL +4 try_to_date(String) :: Date NULL +5 try_to_date(String NULL) :: Date NULL +6 try_to_date(Timestamp) :: Date NULL +7 try_to_date(Timestamp NULL) :: Date NULL +8 try_to_date(Int64) :: Date NULL +9 try_to_date(Int64 NULL) :: Date NULL 0 try_to_decimal FACTORY 1 try_to_decimal FACTORY 0 try_to_float32(Variant) :: Float32 NULL @@ -4398,10 +4400,12 @@ Functions overloads: 1 try_to_timestamp(Variant NULL) :: Timestamp NULL 2 try_to_timestamp(String) :: Timestamp NULL 3 try_to_timestamp(String NULL) :: Timestamp NULL -4 try_to_timestamp(Date) :: Timestamp NULL -5 try_to_timestamp(Date NULL) :: Timestamp NULL -6 try_to_timestamp(Int64) :: Timestamp NULL -7 try_to_timestamp(Int64 NULL) :: Timestamp NULL +4 try_to_timestamp(String, String) :: Timestamp NULL +5 try_to_timestamp(String NULL, String NULL) :: Timestamp NULL +6 try_to_timestamp(Date) :: Timestamp NULL +7 try_to_timestamp(Date NULL) :: Timestamp NULL +8 try_to_timestamp(Int64) :: Timestamp NULL +9 try_to_timestamp(Int64 NULL) :: Timestamp NULL 0 try_to_uint16(Variant) :: UInt16 NULL 1 try_to_uint16(Variant NULL) :: UInt16 NULL 2 try_to_uint16(String) :: UInt16 NULL diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test index a7693317edef4..80d8db0dd41a4 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test @@ -1089,6 +1089,22 @@ select to_timestamp('10000-09-09 01:46:39', '%Y-%m-%d %H:%M:%S'); ---- NULL +statement error 1006 +select to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S'); + +query T +select try_to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S'); +---- +NULL + +statement error 1006 +select to_date('2022-02-02', '%m'); + +query T +select try_to_date('2022-02-02', '%m'); +---- +NULL + query I select week('2017-01-01'); ----