Skip to content

Commit 2ec613e

Browse files
committed
fix: Support legacy date formatter for Spark date_format function
1 parent 1b276a1 commit 2ec613e

File tree

4 files changed

+95
-1
lines changed

4 files changed

+95
-1
lines changed

velox/functions/lib/DateTimeFormatter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,9 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
20572057
case 'S':
20582058
builder.appendFractionOfSecond(count);
20592059
break;
2060+
case 'u':
2061+
builder.appendDayOfWeek1Based(count);
2062+
break;
20602063
case 'w':
20612064
builder.appendWeekOfWeekYear(count);
20622065
break;

velox/functions/sparksql/DateTimeFunctions.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,4 +1141,70 @@ struct MonthsBetweenFunction {
11411141
const tz::TimeZone* sessionTimeZone_ = nullptr;
11421142
};
11431143

1144+
template <typename T>
1145+
struct DateFormatFunction {
1146+
VELOX_DEFINE_FUNCTION_TYPES(T);
1147+
1148+
FOLLY_ALWAYS_INLINE void initialize(
1149+
const std::vector<TypePtr>& /*inputTypes*/,
1150+
const core::QueryConfig& config,
1151+
const arg_type<Timestamp>* /*timestamp*/,
1152+
const arg_type<Varchar>* formatString) {
1153+
legacyFormatter_ = config.sparkLegacyDateFormatter();
1154+
sessionTimeZone_ = getTimeZoneFromConfig(config);
1155+
if (formatString != nullptr) {
1156+
auto formatter = detail::initializeFormatter(
1157+
std::string_view(*formatString), legacyFormatter_);
1158+
if (formatter) {
1159+
formatter_ = formatter;
1160+
maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_);
1161+
} else {
1162+
invalidFormat_ = true;
1163+
}
1164+
isConstFormat_ = true;
1165+
}
1166+
}
1167+
1168+
FOLLY_ALWAYS_INLINE bool call(
1169+
out_type<Varchar>& result,
1170+
const arg_type<Timestamp>& timestamp,
1171+
const arg_type<Varchar>& formatString) {
1172+
if (invalidFormat_) {
1173+
return false;
1174+
}
1175+
if (!isConstFormat_) {
1176+
auto formatter = detail::initializeFormatter(
1177+
std::string_view(formatString), legacyFormatter_);
1178+
if (formatter) {
1179+
formatter_ = formatter;
1180+
maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_);
1181+
} else {
1182+
return false;
1183+
}
1184+
}
1185+
1186+
format(timestamp, sessionTimeZone_, maxResultSize_, result);
1187+
return true;
1188+
}
1189+
1190+
private:
1191+
FOLLY_ALWAYS_INLINE void format(
1192+
const Timestamp& timestamp,
1193+
const tz::TimeZone* timeZone,
1194+
uint32_t maxResultSize,
1195+
out_type<Varchar>& result) const {
1196+
result.reserve(maxResultSize);
1197+
const auto resultSize =
1198+
formatter_->format(timestamp, timeZone, maxResultSize, result.data());
1199+
result.resize(resultSize);
1200+
}
1201+
1202+
const tz::TimeZone* sessionTimeZone_{nullptr};
1203+
std::shared_ptr<DateTimeFormatter> formatter_;
1204+
bool isConstFormat_{false};
1205+
bool legacyFormatter_{false};
1206+
bool invalidFormat_{false};
1207+
uint32_t maxResultSize_;
1208+
};
1209+
11441210
} // namespace facebook::velox::functions::sparksql

velox/functions/sparksql/registration/RegisterDatetime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ void registerDatetimeFunctions(const std::string& prefix) {
5656
registerFunction<DateAddFunction, Date, Date, int8_t>({prefix + "date_add"});
5757
registerFunction<DateAddFunction, Date, Date, int16_t>({prefix + "date_add"});
5858
registerFunction<DateAddFunction, Date, Date, int32_t>({prefix + "date_add"});
59-
registerFunction<FormatDateTimeFunction, Varchar, Timestamp, Varchar>(
59+
registerFunction<DateFormatFunction, Varchar, Timestamp, Varchar>(
6060
{prefix + "date_format"});
6161
registerFunction<DateFromUnixDateFunction, Date, int32_t>(
6262
{prefix + "date_from_unix_date"});

velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,5 +1858,30 @@ TEST_F(DateTimeFunctionsTest, monthsBetween) {
18581858
parseTimestamp("1996-03-21 11:00:00"),
18591859
true));
18601860
}
1861+
TEST_F(DateTimeFunctionsTest, dateFormat) {
1862+
const auto dateFormat = [&](std::optional<Timestamp> timestamp,
1863+
std::optional<std::string> format) {
1864+
return evaluateOnce<std::string>("date_format(c0, c1)", timestamp, format);
1865+
};
1866+
1867+
// Check invalid format.
1868+
EXPECT_THROW(dateFormat(parseTimestamp("1970-01-01"), "u"), VeloxUserError);
1869+
EXPECT_THROW(
1870+
dateFormat(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError);
1871+
1872+
// Check Simple tests.
1873+
EXPECT_EQ("AD", dateFormat(parseTimestamp("1970-01-01"), "G"));
1874+
EXPECT_EQ("19", dateFormat(parseTimestamp("1900-01-01"), "C"));
1875+
EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-01-01"), "Y"));
1876+
EXPECT_EQ("1", dateFormat(parseTimestamp("2022-01-01"), "D"));
1877+
EXPECT_EQ("1", dateFormat(parseTimestamp("2022-01-01"), "d"));
1878+
EXPECT_EQ("AM", dateFormat(parseTimestamp("2022-01-01 00:00:00"), "a"));
1879+
EXPECT_EQ(
1880+
"2022-01-01 00:00:00",
1881+
dateFormat(parseTimestamp("2022-01-01"), "yyyy-MM-dd HH:mm:ss"));
1882+
1883+
enableLegacyFormatter();
1884+
EXPECT_EQ("4", dateFormat(parseTimestamp("1970-01-01"), "u"));
1885+
}
18611886
} // namespace
18621887
} // namespace facebook::velox::functions::sparksql::test

0 commit comments

Comments
 (0)