Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2020,9 +2020,6 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
case 'D':
builder.appendDayOfYear(count);
break;
case 'u':
builder.appendDayOfWeek1Based(count);
break;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep alphabetical order

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't move it, they are all appendDayOfWeekxxx

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wasn't removed, it was just moved to the bottom.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it placed here because the next is appendDayOfWeekText, both way can be acceptable, integrate with appendDayOfWeekxxx or alphabetical order

case 'E':
builder.appendDayOfWeekText(count);
break;
Expand Down Expand Up @@ -2057,6 +2054,9 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
case 'S':
builder.appendFractionOfSecond(count);
break;
case 'u':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this function is in functions/lib, looks like this function is only used in Spark. CC @rui-mo

And this behavior is not supported in Presto because it always uses joda time.
If we move it to Spark folder, we can safely change it to spark specified behavior.

presto:default> SELECT format_datetime(TIMESTAMP '1970-01-01', 'u');
Query 20251118_104605_00011_2tw34 failed: Illegal pattern component: u

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The historical reasons are unclear, it may be for the purpose of centralizing all formatters.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this function is in functions/lib, looks like this function is only used in Spark

Hi @NEUpanning, do you have any input on this? I only found #10354 but it doesn’t explain why the simple formatter cannot be Spark-specific.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought formatters which could be reused should in functions/lib when I saw mysql formatter is only used by presto yet remains in functions/lib.

builder.appendDayOfWeek1Based(count);
break;
case 'w':
builder.appendWeekOfWeekYear(count);
break;
Expand Down
65 changes: 65 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1143,4 +1143,69 @@ struct MonthsBetweenFunction {
const tz::TimeZone* sessionTimeZone_ = nullptr;
};

template <typename T>
struct DateFormatFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void initialize(
const std::vector<TypePtr>& /*inputTypes*/,
const core::QueryConfig& config,
const arg_type<Timestamp>* /*timestamp*/,
const arg_type<Varchar>* formatString) {
legacyFormatter_ = config.sparkLegacyDateFormatter();
sessionTimeZone_ = getTimeZoneFromConfig(config);
if (formatString != nullptr) {
auto formatter = detail::initializeFormatter(
std::string_view(*formatString), legacyFormatter_);
if (formatter) {
formatter_ = formatter;
maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_);
} else {
invalidFormat_ = true;
}
isConstFormat_ = true;
}
}

FOLLY_ALWAYS_INLINE bool call(
out_type<Varchar>& result,
const arg_type<Timestamp>& timestamp,
const arg_type<Varchar>& formatString) {
if (invalidFormat_) {
return false;
}
if (!isConstFormat_) {
auto formatter = detail::initializeFormatter(
std::string_view(formatString), legacyFormatter_);
if (!formatter) {
return false;
}
formatter_ = formatter;
maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_);
}

format(timestamp, sessionTimeZone_, maxResultSize_, result);
return true;
}

private:
FOLLY_ALWAYS_INLINE void format(
const Timestamp& timestamp,
const tz::TimeZone* timeZone,
uint32_t maxResultSize,
out_type<Varchar>& result) const {
result.reserve(maxResultSize);
const auto resultSize =
formatter_->format(timestamp, timeZone, maxResultSize, result.data());
result.resize(resultSize);
}

const tz::TimeZone* sessionTimeZone_{nullptr};
std::shared_ptr<DateTimeFormatter> formatter_;
bool isConstFormat_{false};
bool legacyFormatter_{false};
bool invalidFormat_{false};
uint32_t maxResultSize_;
};

} // namespace facebook::velox::functions::sparksql
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void registerDatetimeFunctions(const std::string& prefix) {
registerFunction<DateAddFunction, Date, Date, int8_t>({prefix + "date_add"});
registerFunction<DateAddFunction, Date, Date, int16_t>({prefix + "date_add"});
registerFunction<DateAddFunction, Date, Date, int32_t>({prefix + "date_add"});
registerFunction<FormatDateTimeFunction, Varchar, Timestamp, Varchar>(
registerFunction<DateFormatFunction, Varchar, Timestamp, Varchar>(
{prefix + "date_format"});
registerFunction<DateFromUnixDateFunction, Date, int32_t>(
{prefix + "date_from_unix_date"});
Expand Down
25 changes: 25 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1876,5 +1876,30 @@ TEST_F(DateTimeFunctionsTest, monthsBetween) {
parseTimestamp("1996-03-21 11:00:00"),
true));
}
TEST_F(DateTimeFunctionsTest, dateFormat) {
const auto dateFormat = [&](std::optional<Timestamp> timestamp,
std::optional<std::string> format) {
return evaluateOnce<std::string>("date_format(c0, c1)", timestamp, format);
};

// Check invalid format.
EXPECT_THROW(dateFormat(parseTimestamp("1970-01-01"), "u"), VeloxUserError);
EXPECT_THROW(
dateFormat(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError);

// Check Simple tests.
EXPECT_EQ("AD", dateFormat(parseTimestamp("1970-01-01"), "G"));
EXPECT_EQ("19", dateFormat(parseTimestamp("1900-01-01"), "C"));
EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-01-01"), "Y"));
EXPECT_EQ("1", dateFormat(parseTimestamp("2022-01-01"), "D"));
EXPECT_EQ("1", dateFormat(parseTimestamp("2022-01-01"), "d"));
EXPECT_EQ("AM", dateFormat(parseTimestamp("2022-01-01 00:00:00"), "a"));
EXPECT_EQ(
"2022-01-01 00:00:00",
dateFormat(parseTimestamp("2022-01-01"), "yyyy-MM-dd HH:mm:ss"));

enableLegacyFormatter();
EXPECT_EQ("4", dateFormat(parseTimestamp("1970-01-01"), "u"));
}
} // namespace
} // namespace facebook::velox::functions::sparksql::test
Loading