Skip to content

Commit d956509

Browse files
committed
SQL: Implement DATE_TRUNC function (#46473)
DATE_TRUNC(<truncate field>, <date/datetime>) is a function that allows the user to truncate a timestamp to the specified field by zeroing out the rest of the fields. The function is implemented according to the spec from PostgreSQL: https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TRUNC Closes: #46319 (cherry picked from commit b37e967)
1 parent fa54de7 commit d956509

File tree

22 files changed

+1106
-29
lines changed

22 files changed

+1106
-29
lines changed

docs/reference/sql/functions/date-time.asciidoc

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
[[sql-functions-datetime-interval]]
99
==== Intervals
1010

11-
A common requirement when dealing with date/time in general revolves around
11+
A common requirement when dealing with date/time in general revolves around
1212
the notion of `interval`, a topic that is worth exploring in the context of {es} and {es-sql}.
1313

1414
{es} has comprehensive support for <<date-math, date math>> both inside <<date-math-index-names, index names>> and <<mapping-date-format, queries>>.
@@ -248,6 +248,79 @@ include-tagged::{sql-specs}/docs/docs.csv-spec[filterNow]
248248
Currently, using a _precision_ greater than 3 doesn't make any difference to the output of the
249249
function as the maximum number of second fractional digits returned is 3 (milliseconds).
250250

251+
[[sql-functions-datetime-trunc]]
252+
==== `DATE_TRUNC`
253+
254+
.Synopsis:
255+
[source, sql]
256+
--------------------------------------------------
257+
DATE_TRUNC(
258+
string_exp, <1>
259+
datetime_exp) <2>
260+
--------------------------------------------------
261+
262+
*Input*:
263+
264+
<1> string expression denoting the unit to which the date/datetime should be truncated to
265+
<2> date/datetime expression
266+
267+
*Output*: datetime
268+
269+
.Description:
270+
271+
Truncate the date/datetime to the specified unit by setting all fields that are less significant than the specified
272+
one to zero (or one, for day, day of week and month).
273+
274+
[cols="^,^"]
275+
|===
276+
2+h|Datetime truncation units
277+
278+
s|unit
279+
s|abbreviations
280+
281+
| millennium | millennia
282+
| century | centuries
283+
| decade | decades
284+
| year | years, yy, yyyy
285+
| quarter | quarters, qq, q
286+
| month | months, mm, m
287+
| week | weeks, wk, ww
288+
| day | days, dd, d
289+
| hour | hours, hh
290+
| minute | minutes, mi, n
291+
| second | seconds, ss, s
292+
| millisecond | milliseconds, ms
293+
| microsecond | microseconds, mcs
294+
| nanosecond | nanoseconds, ns
295+
|===
296+
297+
298+
299+
[source, sql]
300+
--------------------------------------------------
301+
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMillennium]
302+
--------------------------------------------------
303+
304+
[source, sql]
305+
--------------------------------------------------
306+
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeWeek]
307+
--------------------------------------------------
308+
309+
[source, sql]
310+
--------------------------------------------------
311+
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateTimeMinutes]
312+
--------------------------------------------------
313+
314+
[source, sql]
315+
--------------------------------------------------
316+
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateDecades]
317+
--------------------------------------------------
318+
319+
[source, sql]
320+
--------------------------------------------------
321+
include-tagged::{sql-specs}/docs/docs.csv-spec[truncateDateQuarter]
322+
--------------------------------------------------
323+
251324
[[sql-functions-datetime-day]]
252325
==== `DAY_OF_MONTH/DOM/DAY`
253326

docs/reference/sql/functions/index.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
** <<sql-functions-current-date>>
5252
** <<sql-functions-current-time>>
5353
** <<sql-functions-current-timestamp>>
54+
** <<sql-functions-datetime-trunc>>
5455
** <<sql-functions-datetime-day>>
5556
** <<sql-functions-datetime-dow>>
5657
** <<sql-functions-datetime-doy>>

x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcAssert.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.relique.jdbc.csv.CsvResultSet;
1818

1919
import java.io.IOException;
20+
import java.sql.Date;
2021
import java.sql.ResultSet;
2122
import java.sql.ResultSetMetaData;
2223
import java.sql.SQLException;
@@ -37,6 +38,9 @@
3738
import static java.sql.Types.REAL;
3839
import static java.sql.Types.SMALLINT;
3940
import static java.sql.Types.TINYINT;
41+
import static java.time.ZoneOffset.UTC;
42+
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.logResultSetMetadata;
43+
import static org.elasticsearch.xpack.sql.qa.jdbc.JdbcTestUtils.resultSetCurrentData;
4044
import static org.hamcrest.MatcherAssert.assertThat;
4145
import static org.hamcrest.Matchers.instanceOf;
4246
import static org.junit.Assert.assertEquals;
@@ -107,7 +111,7 @@ public static void assertResultSetMetadata(ResultSet expected, ResultSet actual,
107111
ResultSetMetaData actualMeta = actual.getMetaData();
108112

109113
if (logger != null) {
110-
JdbcTestUtils.logResultSetMetadata(actual, logger);
114+
logResultSetMetadata(actual, logger);
111115
}
112116

113117
if (expectedMeta.getColumnCount() != actualMeta.getColumnCount()) {
@@ -210,7 +214,7 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
210214
assertTrue("Expected more data but no more entries found after [" + count + "]", actual.next());
211215

212216
if (logger != null) {
213-
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
217+
logger.info(resultSetCurrentData(actual));
214218
}
215219

216220
for (int column = 1; column <= columns; column++) {
@@ -264,6 +268,10 @@ private static void doAssertResultSetData(ResultSet expected, ResultSet actual,
264268
else if (type == Types.TIMESTAMP || type == Types.TIMESTAMP_WITH_TIMEZONE) {
265269
assertEquals(msg, expected.getTimestamp(column), actual.getTimestamp(column));
266270
}
271+
// then date
272+
else if (type == Types.DATE) {
273+
assertEquals(msg, convertDateToSystemTimezone(expected.getDate(column)), actual.getDate(column));
274+
}
267275
// and floats/doubles
268276
else if (type == Types.DOUBLE) {
269277
assertEquals(msg, (double) expectedObject, (double) actualObject, lenientFloatingNumbers ? 1d : 0.0d);
@@ -301,14 +309,14 @@ else if (type == Types.VARCHAR && actualObject instanceof TemporalAmount) {
301309
} catch (AssertionError ae) {
302310
if (logger != null && actual.next()) {
303311
logger.info("^^^ Assertion failure ^^^");
304-
logger.info(JdbcTestUtils.resultSetCurrentData(actual));
312+
logger.info(resultSetCurrentData(actual));
305313
}
306314
throw ae;
307315
}
308316

309317
if (actual.next()) {
310318
fail("Elasticsearch [" + actual + "] still has data after [" + count + "] entries:\n"
311-
+ JdbcTestUtils.resultSetCurrentData(actual));
319+
+ resultSetCurrentData(actual));
312320
}
313321
}
314322

@@ -328,4 +336,9 @@ private static int typeOf(int columnType, boolean lenientDataType) {
328336

329337
return columnType;
330338
}
339+
340+
// Used to convert the DATE read from CSV file to a java.sql.Date at the System's timezone (-Dtests.timezone=XXXX)
341+
private static Date convertDateToSystemTimezone(Date date) {
342+
return new Date(date.toLocalDate().atStartOfDay(UTC).toInstant().toEpochMilli());
343+
}
331344
}

x-pack/plugin/sql/qa/src/main/resources/command.csv-spec

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,17 @@ CURRENT_DATE |SCALAR
3939
CURRENT_TIME |SCALAR
4040
CURRENT_TIMESTAMP|SCALAR
4141
CURTIME |SCALAR
42+
DATE_TRUNC |SCALAR
4243
DAY |SCALAR
4344
DAYNAME |SCALAR
44-
DAYOFMONTH |SCALAR
45-
DAYOFWEEK |SCALAR
46-
DAYOFYEAR |SCALAR
47-
DAY_NAME |SCALAR
48-
DAY_OF_MONTH |SCALAR
49-
DAY_OF_WEEK |SCALAR
50-
DAY_OF_YEAR |SCALAR
51-
DOM |SCALAR
45+
DAYOFMONTH |SCALAR
46+
DAYOFWEEK |SCALAR
47+
DAYOFYEAR |SCALAR
48+
DAY_NAME |SCALAR
49+
DAY_OF_MONTH |SCALAR
50+
DAY_OF_WEEK |SCALAR
51+
DAY_OF_YEAR |SCALAR
52+
DOM |SCALAR
5253
DOW |SCALAR
5354
DOY |SCALAR
5455
HOUR |SCALAR

x-pack/plugin/sql/qa/src/main/resources/datetime.csv-spec

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,151 @@ SELECT WEEK(birth_date) week, birth_date FROM test_emp WHERE WEEK(birth_date) >
121121
2 |1953-01-07T00:00:00.000Z
122122
;
123123

124+
selectDateTruncWithDateTime
125+
schema::dt_hour:ts|dt_min:ts|dt_sec:ts|dt_millis:s|dt_micro:s|dt_nano:s
126+
SELECT DATE_TRUNC('hour', '2019-09-04T11:22:33.123Z'::datetime) as dt_hour, DATE_TRUNC('minute', '2019-09-04T11:22:33.123Z'::datetime) as dt_min,
127+
DATE_TRUNC('seconds', '2019-09-04T11:22:33.123Z'::datetime) as dt_sec, DATE_TRUNC('ms', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_millis,
128+
DATE_TRUNC('mcs', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_micro, DATE_TRUNC('nanoseconds', '2019-09-04T11:22:33.123Z'::datetime)::string as dt_nano;
129+
130+
dt_hour | dt_min | dt_sec | dt_millis | dt_micro | dt_nano
131+
-------------------------+---------------------------+--------------------------+--------------------------+--------------------------+-------------------------
132+
2019-09-04T11:00:00.000Z | 2019-09-04T11:22:00.000Z | 2019-09-04T11:22:33.000Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z | 2019-09-04T11:22:33.123Z
133+
;
134+
135+
selectDateTruncWithDate
136+
schema::dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
137+
SELECT DATE_TRUNC('millennia', '2019-09-04'::date) as dt_mil, DATE_TRUNC('century', '2019-09-04'::date) as dt_cent,
138+
DATE_TRUNC('decades', '2019-09-04'::date) as dt_dec, DATE_TRUNC('year', '2019-09-04'::date) as dt_year,
139+
DATE_TRUNC('quarter', '2019-09-04'::date) as dt_quarter, DATE_TRUNC('month', '2019-09-04'::date) as dt_month,
140+
DATE_TRUNC('week', '2019-09-04'::date) as dt_week, DATE_TRUNC('day', '2019-09-04'::date) as dt_day;
141+
142+
dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
143+
-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
144+
2000-01-01T00:00:00.000Z | 2000-01-01T00:00:00.000Z | 2010-01-01T00:00:00.000Z | 2019-01-01T00:00:00.000Z | 2019-07-01T00:00:00.000Z | 2019-09-01T00:00:00.000Z | 2019-09-02T00:00:00.000Z | 2019-09-04T00:00:00.000Z
145+
;
146+
147+
selectDateTruncWithField
148+
schema::emp_no:i|birth_date:ts|dt_mil:ts|dt_cent:ts|dt_dec:ts|dt_year:ts|dt_quarter:ts|dt_month:ts|dt_week:ts|dt_day:ts
149+
SELECT emp_no, birth_date, DATE_TRUNC('millennium', birth_date) as dt_mil, DATE_TRUNC('centuries', birth_date) as dt_cent,
150+
DATE_TRUNC('decades', birth_date) as dt_dec, DATE_TRUNC('year', birth_date) as dt_year, DATE_TRUNC('quarter', birth_date) as dt_quarter,
151+
DATE_TRUNC('month', birth_date) as dt_month, DATE_TRUNC('week', birth_date) as dt_week, DATE_TRUNC('day', birth_date) as dt_day
152+
FROM test_emp WHERE emp_no >= 10032 AND emp_no <= 10042 ORDER BY 1;
153+
154+
emp_no | birth_date | dt_mil | dt_cent | dt_dec | dt_year | dt_quarter | dt_month | dt_week | dt_day
155+
--------+-------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+--------------------------+-------------------------
156+
10032 |1960-08-09 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-08-01 00:00:00.000Z | 1960-08-08 00:00:00.000Z | 1960-08-09 00:00:00.000Z
157+
10033 |1956-11-14 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1956-01-01 00:00:00.000Z | 1956-10-01 00:00:00.000Z | 1956-11-01 00:00:00.000Z | 1956-11-12 00:00:00.000Z | 1956-11-14 00:00:00.000Z
158+
10034 |1962-12-29 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1962-01-01 00:00:00.000Z | 1962-10-01 00:00:00.000Z | 1962-12-01 00:00:00.000Z | 1962-12-24 00:00:00.000Z | 1962-12-29 00:00:00.000Z
159+
10035 |1953-02-08 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-01-01 00:00:00.000Z | 1953-02-01 00:00:00.000Z | 1953-02-02 00:00:00.000Z | 1953-02-08 00:00:00.000Z
160+
10036 |1959-08-10 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-07-01 00:00:00.000Z | 1959-08-01 00:00:00.000Z | 1959-08-10 00:00:00.000Z | 1959-08-10 00:00:00.000Z
161+
10037 |1963-07-22 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1963-01-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-01 00:00:00.000Z | 1963-07-22 00:00:00.000Z | 1963-07-22 00:00:00.000Z
162+
10038 |1960-07-20 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-01-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-01 00:00:00.000Z | 1960-07-18 00:00:00.000Z | 1960-07-20 00:00:00.000Z
163+
10039 |1959-10-01 00:00:00.000Z | 0999-12-27 00:00:00.000Z | 1900-01-01 00:00:00.000Z | 1950-01-01 00:00:00.000Z | 1959-01-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-10-01 00:00:00.000Z | 1959-09-28 00:00:00.000Z | 1959-10-01 00:00:00.000Z
164+
10040 | null | null | null | null | null | null | null | null | null
165+
10041 | null | null | null | null | null | null | null | null | null
166+
10042 | null | null | null | null | null | null | null | null | null
167+
;
168+
169+
selectDateTruncWithNullTruncateField
170+
SELECT DATE_TRUNC(null, birth_date) AS dt FROM test_emp LIMIT 5;
171+
172+
dt:ts
173+
------
174+
null
175+
null
176+
null
177+
null
178+
null
179+
;
180+
181+
selectDateTruncWithScalars
182+
SELECT birth_date, DATE_TRUNC(CAST(CHAR(109) AS VARCHAR), birth_date + INTERVAL 12 YEAR) AS dt FROM test_emp ORDER BY 1 DESC NULLS LAST LIMIT 5;
183+
184+
birth_date:ts | dt:ts
185+
-------------------------+---------------------
186+
1965-01-03 00:00:00.000Z | 1977-01-01 00:00:00.000Z
187+
1964-10-18 00:00:00.000Z | 1976-10-01 00:00:00.000Z
188+
1964-06-11 00:00:00.000Z | 1976-06-01 00:00:00.000Z
189+
1964-06-02 00:00:00.000Z | 1976-06-01 00:00:00.000Z
190+
1964-04-18 00:00:00.000Z | 1976-04-01 00:00:00.000Z
191+
;
192+
193+
selectDateTruncWithTruncArgFromField
194+
SELECT DATE_TRUNC(CONCAT(gender, 'illennium'), birth_date) AS dt FROM test_emp WHERE gender='M' ORDER BY 1 DESC LIMIT 2;
195+
196+
dt:ts
197+
------------------------
198+
0999-12-27 00:00:00.000Z
199+
0999-12-27 00:00:00.000Z
200+
;
201+
202+
selectDateTruncWithComplexExpressions
203+
SELECT gender, birth_date, DATE_TRUNC(CASE WHEN gender = 'M' THEN CONCAT(gender, 'onths') WHEN gender = 'F' THEN 'decade' ELSE 'quarter' END,
204+
birth_date + INTERVAL 10 month) AS dt FROM test_emp WHERE dt > '1954-07-01'::date ORDER BY emp_no LIMIT 10;
205+
206+
gender:s | birth_date:ts | dt:ts
207+
------------+--------------------------+---------------------
208+
F | 1964-06-02 00:00:00.000Z | 1960-01-01 00:00:00.000Z
209+
M | 1959-12-03 00:00:00.000Z | 1960-10-01 00:00:00.000Z
210+
M | 1954-05-01 00:00:00.000Z | 1955-03-01 00:00:00.000Z
211+
M | 1955-01-21 00:00:00.000Z | 1955-11-01 00:00:00.000Z
212+
M | 1958-02-19 00:00:00.000Z | 1958-12-01 00:00:00.000Z
213+
null | 1963-06-01 00:00:00.000Z | 1964-04-01 00:00:00.000Z
214+
null | 1960-10-04 00:00:00.000Z | 1961-07-01 00:00:00.000Z
215+
null | 1963-06-07 00:00:00.000Z | 1964-04-01 00:00:00.000Z
216+
null | 1956-02-12 00:00:00.000Z | 1956-10-01 00:00:00.000Z
217+
null | 1959-08-19 00:00:00.000Z | 1960-04-01 00:00:00.000Z
218+
;
219+
220+
dateTruncOrderBy
221+
schema::emp_no:i|hire_date:ts|dt:ts
222+
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp ORDER BY dt NULLS LAST, emp_no LIMIT 5;
223+
224+
emp_no | hire_date | dt
225+
--------+--------------------------+-------------------------
226+
10009 | 1985-02-18 00:00:00.000Z | 1985-01-01 00:00:00.000Z
227+
10048 | 1985-02-24 00:00:00.000Z | 1985-01-01 00:00:00.000Z
228+
10098 | 1985-05-13 00:00:00.000Z | 1985-04-01 00:00:00.000Z
229+
10061 | 1985-09-17 00:00:00.000Z | 1985-07-01 00:00:00.000Z
230+
10076 | 1985-07-09 00:00:00.000Z | 1985-07-01 00:00:00.000Z
231+
;
232+
233+
dateTruncFilter
234+
schema::emp_no:i|hire_date:ts|dt:ts
235+
SELECT emp_no, hire_date, DATE_TRUNC('quarter', hire_date) as dt FROM test_emp WHERE DATE_TRUNC('quarter', hire_date) > '1994-07-01T00:00:00.000Z'::timestamp ORDER BY emp_no;
236+
237+
emp_no | hire_date | dt
238+
--------+--------------------------+-------------------------
239+
10016 | 1995-01-27 00:00:00.000Z | 1995-01-01 00:00:00.000Z
240+
10019 | 1999-04-30 00:00:00.000Z | 1999-04-01 00:00:00.000Z
241+
10022 | 1995-08-22 00:00:00.000Z | 1995-07-01 00:00:00.000Z
242+
10024 | 1997-05-19 00:00:00.000Z | 1997-04-01 00:00:00.000Z
243+
10026 | 1995-03-20 00:00:00.000Z | 1995-01-01 00:00:00.000Z
244+
10054 | 1995-03-13 00:00:00.000Z | 1995-01-01 00:00:00.000Z
245+
10084 | 1995-12-15 00:00:00.000Z | 1995-10-01 00:00:00.000Z
246+
10093 | 1996-11-05 00:00:00.000Z | 1996-10-01 00:00:00.000Z
247+
;
248+
249+
dateTruncGroupBy
250+
schema::count:l|dt:ts
251+
SELECT count(*) as count, DATE_TRUNC('decade', hire_date) dt FROM test_emp GROUP BY dt ORDER BY 2;
252+
253+
count | dt
254+
--------+-------------------------
255+
59 | 1980-01-01 00:00:00.000Z
256+
41 | 1990-01-01 00:00:00.000Z
257+
;
258+
259+
dateTruncHaving
260+
schema::gender:s|dt:ts
261+
SELECT gender, max(hire_date) dt FROM test_emp GROUP BY gender HAVING DATE_TRUNC('year', max(hire_date)) >= '1997-01-01T00:00:00.000Z'::timestamp ORDER BY 1;
262+
263+
gender | dt
264+
--------+-------------------------
265+
null | 1999-04-30 00:00:00.000Z
266+
F | 1997-05-19 00:00:00.000Z
267+
;
268+
124269
//
125270
// Aggregate
126271
//
@@ -404,4 +549,4 @@ SELECT CAST (CAST (birth_date AS VARCHAR) AS TIMESTAMP) a FROM test_emp WHERE YE
404549
a:ts
405550
---------------
406551
1965-01-03T00:00:00Z
407-
;
552+
;

0 commit comments

Comments
 (0)