Skip to content

Commit

Permalink
feat(query): support try_to_timestmap(string, string), try_to_date(st…
Browse files Browse the repository at this point in the history
…ring, string)

If format ts or date with format err, the try function will retrun Null
  • Loading branch information
TCeason committed Jun 11, 2024
1 parent e749619 commit 41bac38
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 58 deletions.
174 changes: 116 additions & 58 deletions src/query/functions/src/scalars/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use chrono::format::StrftimeItems;
use chrono::prelude::*;
use chrono::Datelike;
use chrono::Days;
use chrono::ParseError;
use chrono::Utc;
use chrono_tz::Tz;
use databend_common_arrow::arrow::bitmap::Bitmap;
Expand Down Expand Up @@ -168,69 +169,40 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) {
"to_timestamp",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<TimestampType>>(
|timestamp, format, output, ctx| {
if format.is_empty() {
|timestamp, format, output, ctx| match string_to_format_timestmap(
timestamp, format, ctx,
) {
Ok((ts, need_null)) => {
if need_null {
output.push_null();
} else {
output.push(ts);
}
}
Err(e) => {
ctx.set_error(output.len(), e.to_string());
output.push_null();
return;
}
// Parse with extra checks for timezone
// %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6
// %z +0930 Offset from the local time to UTC (with UTC being +0000).
// %:z +09:30 Same as %z but with a colon.
// %::z +09:30:00 Offset from the local time to UTC with seconds.
// %:::z +09 Offset from the local time to UTC without minutes.
// %#z +09 Parsing only: Same as %z but allows minutes to be missing or present.
let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"];
let parse_tz = timezone_strftime
.iter()
.any(|&pattern| format.contains(pattern));
let res = if ctx.func_ctx.parse_datetime_ignore_remainder {
let mut parsed = Parsed::new();
if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format))
.is_err()
{
},
),
);

registry.register_combine_nullable_2_arg::<StringType, StringType, TimestampType, _, _>(
"try_to_timestamp",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<TimestampType>>(
|timestamp, format, output, ctx| match string_to_format_timestmap(
timestamp, format, ctx,
) {
Ok((ts, need_null)) => {
if need_null {
output.push_null();
return;
}
// Additional checks and adjustments for parsed timestamp
if parsed.month.is_none() {
parsed.month = Some(1);
}
if parsed.day.is_none() {
parsed.day = Some(1);
}
if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() {
parsed.hour_div_12 = Some(0);
parsed.hour_mod_12 = Some(0);
}
if parsed.minute.is_none() {
parsed.minute = Some(0);
}
if parsed.second.is_none() {
parsed.second = Some(0);
}
// Convert parsed timestamp to datetime or naive datetime based on parse_tz
if parse_tz {
parsed.offset.get_or_insert(0);
parsed
.to_datetime()
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
parsed
.to_naive_datetime_with_offset(0)
.map(|res| res.and_utc().timestamp_micros())
output.push(ts);
}
} else if parse_tz {
DateTime::parse_from_str(timestamp, format)
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
NaiveDateTime::parse_from_str(timestamp, format)
.map(|res| res.and_utc().timestamp_micros())
};
if let Ok(res) = res {
output.push(res);
} else {
output.push_null()
}
Err(_) => {
output.push_null();
}
},
),
Expand All @@ -257,6 +229,92 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) {
},
),
);

registry.register_combine_nullable_2_arg::<StringType, StringType, DateType, _, _>(
"try_to_date",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<StringType, StringType, NullableType<DateType>>(
|date, format, output, _| {
if format.is_empty() {
output.push_null();
} else {
match NaiveDate::parse_from_str(date, format) {
Ok(res) => {
output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE);
}
Err(_) => {
output.push_null();
}
}
}
},
),
);
}

fn string_to_format_timestmap(
timestamp: &str,
format: &str,
ctx: &mut EvalContext,
) -> Result<(i64, bool), ParseError> {
if format.is_empty() {
return Ok((0, true));
}
// Parse with extra checks for timezone
// %Z ACST Local time zone name. Skips all non-whitespace characters during parsing. Identical to %:z when formatting. 6
// %z +0930 Offset from the local time to UTC (with UTC being +0000).
// %:z +09:30 Same as %z but with a colon.
// %::z +09:30:00 Offset from the local time to UTC with seconds.
// %:::z +09 Offset from the local time to UTC without minutes.
// %#z +09 Parsing only: Same as %z but allows minutes to be missing or present.
let timezone_strftime = ["%Z", "%z", "%:z", "%::z", "%:::z", "%#z"];
let parse_tz = timezone_strftime
.iter()
.any(|&pattern| format.contains(pattern));
let res = if ctx.func_ctx.parse_datetime_ignore_remainder {
let mut parsed = Parsed::new();
if parse_and_remainder(&mut parsed, timestamp, StrftimeItems::new(format)).is_err() {
return Ok((0, true));
}
// Additional checks and adjustments for parsed timestamp
if parsed.month.is_none() {
parsed.month = Some(1);
}
if parsed.day.is_none() {
parsed.day = Some(1);
}
if parsed.hour_div_12.is_none() && parsed.hour_mod_12.is_none() {
parsed.hour_div_12 = Some(0);
parsed.hour_mod_12 = Some(0);
}
if parsed.minute.is_none() {
parsed.minute = Some(0);
}
if parsed.second.is_none() {
parsed.second = Some(0);
}
// Convert parsed timestamp to datetime or naive datetime based on parse_tz
if parse_tz {
parsed.offset.get_or_insert(0);
parsed
.to_datetime()
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
parsed
.to_naive_datetime_with_offset(0)
.map(|res| res.and_utc().timestamp_micros())
}
} else if parse_tz {
DateTime::parse_from_str(timestamp, format)
.map(|res| res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros())
} else {
NaiveDateTime::parse_from_str(timestamp, format).map(|res| res.and_utc().timestamp_micros())
};

match res {
Ok(res) => Ok((res, false)),
Err(err) => Err(err),
}
}

fn register_date_to_timestamp(registry: &mut FunctionRegistry) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,22 @@ select to_timestamp('10000-09-09 01:46:39', '%Y-%m-%d %H:%M:%S');
----
NULL

statement error 1006
select to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S');

query T
select try_to_timestamp('10000-09-09 01:46:39', '%s-%m-%d %H:%M:%S');
----
NULL

statement error 1006
select to_date('2022-02-02', '%m');

query T
select try_to_date('2022-02-02', '%m');
----
NULL

query I
select week('2017-01-01');
----
Expand Down

0 comments on commit 41bac38

Please sign in to comment.