Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query): support try_to_timestmap(string, string), try_to_date(string, string) #15775

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 116 additions & 58 deletions src/query/functions/src/scalars/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use chrono::format::StrftimeItems;
use chrono::prelude::*;
use chrono::Datelike;
use chrono::Days;
use chrono::ParseError;
use chrono::Utc;
use chrono_tz::Tz;
use databend_common_arrow::arrow::bitmap::Bitmap;
Expand Down Expand Up @@ -168,69 +169,40 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) {
"to_timestamp",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<TimestampType>>(
|timestamp, format, output, ctx| {
if format.is_empty() {
|timestamp, format, output, ctx| match string_to_format_timestmap(
timestamp, format, ctx,
) {
Ok((ts, need_null)) => {
if need_null {
output.push_null();
} else {
output.push(ts);
}
}
Err(e) => {
ctx.set_error(output.len(), e.to_string());
output.push_null();
return;
}
// Parse with extra checks for timezone
// %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6
// %z +0930 Offset from the local time to UTC (with UTC being +0000).
// %:z +09:30 Same as %z but with a colon.
// %::z +09:30:00 Offset from the local time to UTC with seconds.
// %:::z +09 Offset from the local time to UTC without minutes.
// %#z +09 Parsing only: Same as %z but allows minutes to be missing or present.
let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"];
let parse_tz = timezone_strftime
.iter()
.any(|&pattern| format.contains(pattern));
let res = if ctx.func_ctx.parse_datetime_ignore_remainder {
let mut parsed = Parsed::new();
if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format))
.is_err()
{
},
),
);

registry.register_combine_nullable_2_arg::<StringType, StringType, TimestampType, _, _>(
"try_to_timestamp",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<TimestampType>>(
|timestamp, format, output, ctx| match string_to_format_timestmap(
timestamp, format, ctx,
) {
Ok((ts, need_null)) => {
if need_null {
output.push_null();
return;
}
// Additional checks and adjustments for parsed timestamp
if parsed.month.is_none() {
parsed.month = Some(1);
}
if parsed.day.is_none() {
parsed.day = Some(1);
}
if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() {
parsed.hour_div_12 = Some(0);
parsed.hour_mod_12 = Some(0);
}
if parsed.minute.is_none() {
parsed.minute = Some(0);
}
if parsed.second.is_none() {
parsed.second = Some(0);
}
// Convert parsed timestamp to datetime or naive datetime based on parse_tz
if parse_tz {
parsed.offset.get_or_insert(0);
parsed
.to_datetime()
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
parsed
.to_naive_datetime_with_offset(0)
.map(|res| res.and_utc().timestamp_micros())
output.push(ts);
}
} else if parse_tz {
DateTime::parse_from_str(timestamp, format)
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
NaiveDateTime::parse_from_str(timestamp, format)
.map(|res| res.and_utc().timestamp_micros())
};
if let Ok(res) = res {
output.push(res);
} else {
output.push_null()
}
Err(_) => {
output.push_null();
}
},
),
Expand All @@ -257,6 +229,92 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) {
},
),
);

registry.register_combine_nullable_2_arg::<StringType, StringType, DateType, _, _>(
"try_to_date",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<DateType>>(
|date, format, output, _| {
if format.is_empty() {
output.push_null();
} else {
match NaiveDate::parse_from_str(date, format) {
Ok(res) => {
output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE);
}
Err(_) => {
output.push_null();
}
}
}
},
),
);
}

fn string_to_format_timestmap(
timestamp: &str,
format: &str,
ctx: &mut EvalContext,
) -> Result<(i64, bool), ParseError> {
if format.is_empty() {
return Ok((0, true));
}
// Parse with extra checks for timezone
// %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6
// %z +0930 Offset from the local time to UTC (with UTC being +0000).
// %:z +09:30 Same as %z but with a colon.
// %::z +09:30:00 Offset from the local time to UTC with seconds.
// %:::z +09 Offset from the local time to UTC without minutes.
// %#z +09 Parsing only: Same as %z but allows minutes to be missing or present.
let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"];
let parse_tz = timezone_strftime
.iter()
.any(|&pattern| format.contains(pattern));
let res = if ctx.func_ctx.parse_datetime_ignore_remainder {
let mut parsed = Parsed::new();
if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format)).is_err() {
return Ok((0, true));
}
// Additional checks and adjustments for parsed timestamp
if parsed.month.is_none() {
parsed.month = Some(1);
}
if parsed.day.is_none() {
parsed.day = Some(1);
}
if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() {
parsed.hour_div_12 = Some(0);
parsed.hour_mod_12 = Some(0);
}
if parsed.minute.is_none() {
parsed.minute = Some(0);
}
if parsed.second.is_none() {
parsed.second = Some(0);
}
// Convert parsed timestamp to datetime or naive datetime based on parse_tz
if parse_tz {
parsed.offset.get_or_insert(0);
parsed
.to_datetime()
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
parsed
.to_naive_datetime_with_offset(0)
.map(|res| res.and_utc().timestamp_micros())
}
} else if parse_tz {
DateTime::parse_from_str(timestamp, format)
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
NaiveDateTime::parse_from_str(timestamp, format).map(|res| res.and_utc().timestamp_micros())
};

match res {
Ok(res) => Ok((res, false)),
Err(err) => Err(err),
}
}

fn register_date_to_timestamp(registry: &mut FunctionRegistry) {
Expand Down
24 changes: 14 additions & 10 deletions src/query/functions/tests/it/scalars/testdata/function_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4184,12 +4184,14 @@ Functions overloads:
23 try_to_boolean(Float64 NULL) :: Boolean NULL
0 try_to_date(Variant) :: Date NULL
1 try_to_date(Variant NULL) :: Date NULL
2 try_to_date(String) :: Date NULL
3 try_to_date(String NULL) :: Date NULL
4 try_to_date(Timestamp) :: Date NULL
5 try_to_date(Timestamp NULL) :: Date NULL
6 try_to_date(Int64) :: Date NULL
7 try_to_date(Int64 NULL) :: Date NULL
2 try_to_date(String, String) :: Date NULL
3 try_to_date(String NULL, String NULL) :: Date NULL
4 try_to_date(String) :: Date NULL
5 try_to_date(String NULL) :: Date NULL
6 try_to_date(Timestamp) :: Date NULL
7 try_to_date(Timestamp NULL) :: Date NULL
8 try_to_date(Int64) :: Date NULL
9 try_to_date(Int64 NULL) :: Date NULL
0 try_to_decimal FACTORY
1 try_to_decimal FACTORY
0 try_to_float32(Variant) :: Float32 NULL
Expand Down Expand Up @@ -4398,10 +4400,12 @@ Functions overloads:
1 try_to_timestamp(Variant NULL) :: Timestamp NULL
2 try_to_timestamp(String) :: Timestamp NULL
3 try_to_timestamp(String NULL) :: Timestamp NULL
4 try_to_timestamp(Date) :: Timestamp NULL
5 try_to_timestamp(Date NULL) :: Timestamp NULL
6 try_to_timestamp(Int64) :: Timestamp NULL
7 try_to_timestamp(Int64 NULL) :: Timestamp NULL
4 try_to_timestamp(String, String) :: Timestamp NULL
5 try_to_timestamp(String NULL, String NULL) :: Timestamp NULL
6 try_to_timestamp(Date) :: Timestamp NULL
7 try_to_timestamp(Date NULL) :: Timestamp NULL
8 try_to_timestamp(Int64) :: Timestamp NULL
9 try_to_timestamp(Int64 NULL) :: Timestamp NULL
0 try_to_uint16(Variant) :: UInt16 NULL
1 try_to_uint16(Variant NULL) :: UInt16 NULL
2 try_to_uint16(String) :: UInt16 NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,22 @@ select to_timestamp('10000-09-09 01:46:39', '%Y-%m-%d %H:%M:%S');
----
NULL

statement error 1006
select to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S');

query T
select try_to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S');
TCeason marked this conversation as resolved.
Show resolved Hide resolved
----
NULL

statement error 1006
select to_date('2022-02-02', '%m');

query T
select try_to_date('2022-02-02', '%m');
----
NULL

query I
select week('2017-01-01');
----
Expand Down
Loading