Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ Below is a list of all the keywords in Spark SQL.
|THEN|reserved|non-reserved|reserved|
|TIME|reserved|non-reserved|reserved|
|TIMESTAMP|non-reserved|non-reserved|non-reserved|
|TIMESTAMPADD|non-reserved|non-reserved|non-reserved|

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to add DATEADD as an alias for the same function in this PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's discuss overloading of DATEADD in a separate JIRA. This is arguable, and need to reach a consensus, from my point of view.

|TO|reserved|non-reserved|reserved|
|TOUCH|non-reserved|non-reserved|non-reserved|
|TRAILING|reserved|non-reserved|reserved|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,7 @@ valueExpression

primaryExpression
: name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER) #currentLike
| TIMESTAMPADD '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')' #timestampadd
| CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
| CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
| name=(CAST | TRY_CAST) '(' expression AS dataType ')' #cast
Expand Down Expand Up @@ -1267,6 +1268,7 @@ ansiNonReserved
| TEMPORARY
| TERMINATED
| TIMESTAMP
| TIMESTAMPADD
| TOUCH
| TRANSACTION
| TRANSACTIONS
Expand Down Expand Up @@ -1544,6 +1546,7 @@ nonReserved
| THEN
| TIME
| TIMESTAMP
| TIMESTAMPADD
| TO
| TOUCH
| TRAILING
Expand Down Expand Up @@ -1821,6 +1824,7 @@ TERMINATED: 'TERMINATED';
THEN: 'THEN';
TIME: 'TIME';
TIMESTAMP: 'TIMESTAMP';
TIMESTAMPADD: 'TIMESTAMPADD';
TO: 'TO';
TOUCH: 'TOUCH';
TRAILING: 'TRAILING';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ object FunctionRegistry {
expression[UnixMillis]("unix_millis"),
expression[UnixMicros]("unix_micros"),
expression[ConvertTimezone]("convert_timezone"),
expression[TimestampAdd]("timestampadd"),

// collection functions
expression[CreateArray]("array"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3057,3 +3057,87 @@ case class ConvertTimezone(
copy(sourceTz = newFirst, targetTz = newSecond, sourceTs = newThird)
}
}

// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(unit, quantity, timestamp) - Adds the specified number of units to the given timestamp.",
arguments = """
Arguments:
* unit - this indicates the units of datetime that you want to add.
Supported string values of `unit` are (case insensitive):
- "YEAR"
- "QUARTER" - 3 months
- "MONTH"
- "WEEK" - 7 days
- "DAY", "DAYOFYEAR"
- "HOUR"
- "MINUTE"
- "SECOND"
- "MILLISECOND" - milliseconds
- "MICROSECOND"
* quantity - this is the number of units of time that you want to add.
* timestamp - this is a timestamp (w/ or w/o timezone) to which you want to add.
""",
examples = """
Examples:
> SELECT _FUNC_('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00');
2022-02-12 04:30:00
> SELECT _FUNC_('MONTH', 1, timestamp_ltz'2022-01-31 00:00:00');
2022-02-28 00:00:00
> SELECT _FUNC_(SECOND, -10, date'2022-01-01');
2021-12-31 23:59:50
> SELECT _FUNC_(YEAR, 10, timestamp'2000-01-01 01:02:03.123456');
2010-01-01 01:02:03.123456
""",
group = "datetime_funcs",
since = "3.3.0")
// scalastyle:on line.size.limit
case class TimestampAdd(
unit: Expression,
quantity: Expression,
timestamp: Expression,
timeZoneId: Option[String] = None)
extends TernaryExpression
with ImplicitCastInputTypes
with NullIntolerant
with TimeZoneAwareExpression {

def this(unit: Expression, quantity: Expression, timestamp: Expression) =
this(unit, quantity, timestamp, None)

override def first: Expression = unit
override def second: Expression = quantity
override def third: Expression = timestamp

override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, AnyTimestampType)
override def dataType: DataType = timestamp.dataType

override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))

@transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(timestamp.dataType)

override def nullSafeEval(u: Any, q: Any, micros: Any): Any = {
DateTimeUtils.timestampAdd(
u.asInstanceOf[UTF8String].toString,
q.asInstanceOf[Int],
micros.asInstanceOf[Long],
zoneIdInEval)
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
val zid = ctx.addReferenceObj("zoneId", zoneIdInEval, classOf[ZoneId].getName)
defineCodeGen(ctx, ev, (u, q, micros) =>
s"""$dtu.timestampAdd($u.toString(), $q, $micros, $zid)""")
}

override def prettyName: String = "timestampadd"

override protected def withNewChildrenInternal(
newFirst: Expression,
newSecond: Expression,
newThird: Expression): TimestampAdd = {
copy(unit = newFirst, quantity = newSecond, timestamp = newThird)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4510,4 +4510,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
private def alterViewTypeMismatchHint: Option[String] = Some("Please use ALTER TABLE instead.")

private def alterTableTypeMismatchHint: Option[String] = Some("Please use ALTER VIEW instead.")

/**
* Create a TimestampAdd expression.
*/
override def visitTimestampadd(ctx: TimestampaddContext): Expression = withOrigin(ctx) {
val arguments = Seq(
Literal(ctx.unit.getText),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MaxGekk I think this indicates the unit parameter must be foldable?

Copy link
Member Author

@MaxGekk MaxGekk Feb 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the single enter point for timestampadd, so, the must word is not applicable. See the example above #35502 (comment), how it should work if the unit parameter must be foldable. BTW, I will open an JIRA to implement the optimization when unit is foldable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see, then I think the current implementation is fine.

expression(ctx.unitsAmount),
expression(ctx.timestamp))
UnresolvedFunction("timestampadd", arguments, isDistinct = false)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1163,4 +1163,40 @@ object DateTimeUtils {
val localStartTs = getLocalDateTime(startMicros, zoneId)
ChronoUnit.MICROS.between(localStartTs, localEndTs)
}

/**
* Adds the specified number of units to a timestamp.
*
* @param unit A keyword that specifies the interval units to add to the input timestamp.
* @param quantity The amount of `unit`s to add. It can be positive or negative.
* @param micros The input timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
* @param zoneId The time zone ID at which the operation is performed.
* @return A timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
*/
def timestampAdd(unit: String, quantity: Int, micros: Long, zoneId: ZoneId): Long = {
unit.toUpperCase(Locale.ROOT) match {
case "MICROSECOND" =>
timestampAddDayTime(micros, quantity, zoneId)
case "MILLISECOND" =>
timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
case "SECOND" =>
timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
case "MINUTE" =>
timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
case "HOUR" =>
timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
case "DAY" | "DAYOFYEAR" =>
timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
case "WEEK" =>
timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
case "MONTH" =>
timestampAddMonths(micros, quantity, zoneId)
case "QUARTER" =>
timestampAddMonths(micros, quantity * 3, zoneId)
case "YEAR" =>
timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
case _ =>
throw QueryExecutionErrors.invalidUnitInTimestampAdd(unit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we check the unit names in the parser? To fail earlier.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can but I just wonder what kind of problem would the earlier check solve? Parser and compiler do a lot of work for now, adding more unnecessary things should be motivated somehow, from my point of view.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Failing earlier is a pretty strong reason, right? It's a waste of resource if we submit a Spark job which fails with wrong unit name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a waste of resource if we submit a Spark job which fails with wrong unit name.

Not sure. Can you imagine a cluster of 1000 executors waiting for the driver that is still processing a query because we eagerly want to check everything even when user's queries and data don't have any issues. This is real waste of user's resources.

Copy link
Member Author

@MaxGekk MaxGekk Feb 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also I would like to add, what you are taking is about a mistake in a query actually, like:

SELECT timestampadd(YEER, 1, timestampColumn);

Such kind of mistakes are not permanent, and usually users fix them during the debug stage. There are no so much reasons to double check such mistakes at parsing and in runtime (we must do that since unit can be non-foldable).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why we suddenly want to stop doing it from this PR.

  1. The unit param can be non-foldable. I made it generic intentionally. If you wonder why, I will answer to that separately.
  2. As unit can be non-foldable, we need the runtime check.
  3. If we add checks in parser, we will do checks twice at parsing and at execution... which is not necessary because
  4. We can handle foldable unit in codegen as an optimization where we (of course) have to check unit values at the optimization phase.

As summary, taking into account that we will optimize foldable unit in codegen in the near future where we validate correctness of unit, there is no need to do that in parser as you proposed.

Example: EXTRACT, TO_BINARY, TO_NUMBER

The expressions require one of their param (format, field and etc) to be always foldable. In the case, of TIMESTAMPADD() is unnecessary restriction, I believe. I have faced to the situation a few times in my life when some code was deployed in the production after testing, and need to increase precision of intervals. Let's say we had:

select timestampadd(SECOND, tbl.quantity, tbl.ts1) 

, and we wants to bump precision of tbl.quantity to milliseconds. Since quantity is a column in a table, we can just multiply it by 1000 during a maintenance time but we should do with SECOND? We have to re-deploy to code, including pass whole release cycle, only because a Spark dev forced us to hard-code the SECOND in our code, for some unclear reasons.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm totally OK with your decision if the unit parameter can be unfoldable, but it seems not the case? We even added a special parse rule for this function so that the unit parameter is an identifier.

Copy link
Member Author

@MaxGekk MaxGekk Feb 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so that the unit parameter is an identifier.

It can be an identifier or a string column, see:

spark-sql> create table tbl as select 'SECOND' as u, 1 as q, timestamp'now' as t;
spark-sql> select * from tbl;
SECOND	1	2022-02-18 18:33:34.939
spark-sql> select timestampadd(tbl.u, q, t) from tbl;
2022-02-18 18:33:35.939

or

spark-sql> select timestampadd('HOUR', 1, timestamp'now');
2022-02-18 19:38:54.817

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically speaking the first argument, unit, should be a datetime interval type as in what's used with EXTRACT. It isn't meant to be a string if that makes things any simpler.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the first argument, unit, should be a datetime interval type

I didn't get your point. How it could be the interval type?

... as in what's used with EXTRACT

Just wonder why do you linked TIMESTAMPADD to EXTRACT but not to TIMESTAMPDIFF, for example. Anyway technically specking the type of the first argument is the same - string type.

... makes things any simpler.

This PR achieve this goal, I believe. It makes the migration process to Spark SQL simpler, and gives additional benefits of using Spark SQL in the real production (see my comment above).

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1963,4 +1963,10 @@ object QueryExecutionErrors {
def unsupportedDropNamespaceRestrictError(): Throwable = {
new SQLFeatureNotSupportedException("Drop namespace restrict is not supported")
}

def invalidUnitInTimestampAdd(unit: String): Throwable = {
new SparkIllegalArgumentException(
errorClass = "INVALID_PARAMETER_VALUE",
messageParameters = Array("unit", "timestampadd", unit))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1885,4 +1885,66 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}
}

test("SPARK-38195: add a quantity of interval units to a timestamp") {
// Check case-insensitivity
checkEvaluation(
TimestampAdd(Literal("Hour"), Literal(1), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
LocalDateTime.of(2022, 2, 15, 13, 57, 0))
// Check nulls as input values
checkEvaluation(
TimestampAdd(
Literal.create(null, StringType),
Literal(1),
Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
null)
checkEvaluation(
TimestampAdd(
Literal("MINUTE"),
Literal.create(null, IntegerType),
Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
null)
checkEvaluation(
TimestampAdd(
Literal("MINUTE"),
Literal(1),
Literal.create(null, TimestampType)),
null)
// Check crossing the daylight saving time
checkEvaluation(
TimestampAdd(
Literal("HOUR"),
Literal(6),
Literal(Instant.parse("2022-03-12T23:30:00Z")),
Some("America/Los_Angeles")),
Instant.parse("2022-03-13T05:30:00Z"))
// Check the leap year
checkEvaluation(
TimestampAdd(
Literal("DAY"),
Literal(2),
Literal(LocalDateTime.of(2020, 2, 28, 10, 11, 12)),
Some("America/Los_Angeles")),
LocalDateTime.of(2020, 3, 1, 10, 11, 12))

Seq(
"YEAR", "QUARTER", "MONTH",
"WEEK", "DAY",
"HOUR", "MINUTE", "SECOND",
"MILLISECOND", "MICROSECOND"
).foreach { unit =>
outstandingTimezonesIds.foreach { tz =>
Seq(TimestampNTZType, TimestampType).foreach { tsType =>
checkConsistencyBetweenInterpretedAndCodegenAllowingException(
(quantity: Expression, timestamp: Expression) =>
TimestampAdd(
Literal(unit),
quantity,
timestamp,
Some(tz)),
IntegerType, tsType)
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
import org.scalatest.matchers.must.Matchers
import org.scalatest.matchers.should.Matchers._

import org.apache.spark.SparkFunSuite
import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
Expand Down Expand Up @@ -955,4 +955,38 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
s"The difference is ${(result - expectedMicros) / MICROS_PER_HOUR} hours")
}
}

test("SPARK-38195: add a quantity of interval units to a timestamp") {
outstandingZoneIds.foreach { zid =>
assert(timestampAdd("MICROSECOND", 1, date(2022, 2, 14, 11, 27, 0, 0, zid), zid) ===
date(2022, 2, 14, 11, 27, 0, 1, zid))
assert(timestampAdd("MILLISECOND", -1, date(2022, 2, 14, 11, 27, 0, 1000, zid), zid) ===
date(2022, 2, 14, 11, 27, 0, 0, zid))
assert(timestampAdd("SECOND", 0, date(2022, 2, 14, 11, 27, 0, 1001, zid), zid) ===
date(2022, 2, 14, 11, 27, 0, 1001, zid))
assert(timestampAdd("MINUTE", -90, date(2022, 2, 14, 11, 0, 1, 1, zid), zid) ===
date(2022, 2, 14, 9, 30, 1, 1, zid))
assert(timestampAdd("HOUR", 24, date(2022, 2, 14, 11, 0, 1, 0, zid), zid) ===
date(2022, 2, 15, 11, 0, 1, 0, zid))
assert(timestampAdd("DAY", 1, date(2022, 2, 28, 11, 1, 0, 0, zid), zid) ===
date(2022, 3, 1, 11, 1, 0, 0, zid))
assert(timestampAdd("DAYOFYEAR", 364, date(2022, 1, 1, 0, 0, 0, 0, zid), zid) ===
date(2022, 12, 31, 0, 0, 0, 0, zid))
assert(timestampAdd("WEEK", 1, date(2022, 2, 14, 11, 43, 0, 1, zid), zid) ===
date(2022, 2, 21, 11, 43, 0, 1, zid))
assert(timestampAdd("MONTH", 10, date(2022, 2, 14, 11, 43, 0, 1, zid), zid) ===
date(2022, 12, 14, 11, 43, 0, 1, zid))
assert(timestampAdd("QUARTER", 1, date(1900, 2, 1, 0, 0, 0, 1, zid), zid) ===
date(1900, 5, 1, 0, 0, 0, 1, zid))
assert(timestampAdd("YEAR", 1, date(9998, 1, 1, 0, 0, 0, 1, zid), zid) ===
date(9999, 1, 1, 0, 0, 0, 1, zid))
assert(timestampAdd("YEAR", -9998, date(9999, 1, 1, 0, 0, 0, 1, zid), zid) ===
date(1, 1, 1, 0, 0, 0, 1, zid))
}

val e = intercept[SparkIllegalArgumentException] {
timestampAdd("SECS", 1, date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")), getZoneId("UTC"))
}
assert(e.getMessage.contains("invalid: SECS"))
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- Number of queries: 379
- Number of queries: 380
- Number of expressions that missing example: 12
- Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
## Schema of Built-in Functions
Expand Down Expand Up @@ -299,6 +299,7 @@
| org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct<TAN(0):double> |
| org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
| org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
| org.apache.spark.sql.catalyst.expressions.TimestampAdd | timestampadd | SELECT timestampadd('HOUR', 8, timestamp_ntz'2022-02-11 20:30:00') | struct<timestampadd(HOUR, 8, TIMESTAMP_NTZ '2022-02-11 20:30:00'):timestamp_ntz> |
| org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
| org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
| org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
Expand Down
6 changes: 6 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/timestamp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,9 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));

-- Add a number of units to a timestamp or a date
select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03');
select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03');
select timestampadd(YEAR, 1, date'2022-02-15');
select timestampadd('SECOND', -1, date'2022-02-15');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have some negative tests? e.g. invalid unit name, overflow, etc.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test for invalid unit name is in this PR, see the test for error class.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding to overflow, actually I reused methods of adding ANSI intervals to timestamps. I think we should test overflow for both ANSI intervals and for timestampadd(). I will open an JIRA for that.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 89
-- Number of queries: 93


-- !query
Expand Down Expand Up @@ -771,3 +771,35 @@ struct<>
-- !query output
org.apache.spark.SparkUpgradeException
You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html


-- !query
select timestampadd('MONTH', -1, timestamp'2022-02-14 01:02:03')
-- !query schema
struct<timestampadd(MONTH, -1, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
-- !query output
2022-01-14 01:02:03


-- !query
select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
-- !query schema
struct<timestampadd(MINUTE, 58, TIMESTAMP '2022-02-14 01:02:03'):timestamp>
-- !query output
2022-02-14 02:00:03


-- !query
select timestampadd(YEAR, 1, date'2022-02-15')
-- !query schema
struct<timestampadd(YEAR, 1, DATE '2022-02-15'):timestamp>
-- !query output
2023-02-15 00:00:00


-- !query
select timestampadd('SECOND', -1, date'2022-02-15')
-- !query schema
struct<timestampadd(SECOND, -1, DATE '2022-02-15'):timestamp>
-- !query output
2022-02-14 23:59:59
Loading