Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions python/pyspark/errors/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,11 @@
"Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
]
},
"WRONG_NUM_ARGS": {
"message": [
"Function `<func_name>` expects <expected> but got <actual>."
]
},
"WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
"message": [
"Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
Expand Down
33 changes: 30 additions & 3 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4019,6 +4019,7 @@ def try_make_timestamp_ltz(
try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__


@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
Expand All @@ -4027,9 +4028,35 @@ def make_timestamp_ntz(
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
...


@overload
def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


def make_timestamp_ntz(*cols: "ColumnOrName") -> Column:
if len(cols) == 2:
# make_timestamp_ntz(date, time)
date, time = cols
return _invoke_function_over_columns("make_timestamp_ntz", date, time)
elif len(cols) == 6:
# make_timestamp_ntz(years, months, days, hours, mins, secs)
years, months, days, hours, mins, secs = cols
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
else:
# Invalid number of arguments
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={
"func_name": "make_timestamp_ntz",
"expected": "either (years, months, days, hours, mins, secs) or (date, time)",
"actual": f"{len(cols)} columns",
},
)


make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__
Expand Down
101 changes: 81 additions & 20 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25126,7 +25126,7 @@ def try_make_timestamp_ltz(
)


@_try_remote_functions
@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
Expand All @@ -25135,30 +25135,59 @@ def make_timestamp_ntz(
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
...


@overload
def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@_try_remote_functions
def make_timestamp_ntz(*cols: "ColumnOrName") -> Column:
"""
Create local date-time from years, months, days, hours, mins, secs fields.
Create local date-time from years, months, days, hours, mins, secs fields, or from
date and time fields.
If there are 6 cols, then this creates a timestamp from individual time components.
If there are 2 cols, then this creates a timestamp from date and time.
If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL
on invalid inputs. Otherwise, it will throw an error instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we add a .. versionchanged::?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! Which version should I use? (4.1?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4.1.0

.. versionadded:: 3.5.0

.. versionchanged:: 3.4.0
Supports Spark Connect.

Parameters
----------
years : :class:`~pyspark.sql.Column` or column name
The year to represent, from 1 to 9999
months : :class:`~pyspark.sql.Column` or column name
The month-of-year to represent, from 1 (January) to 12 (December)
days : :class:`~pyspark.sql.Column` or column name
The day-of-month to represent, from 1 to 31
hours : :class:`~pyspark.sql.Column` or column name
The hour-of-day to represent, from 0 to 23
mins : :class:`~pyspark.sql.Column` or column name
The minute-of-hour to represent, from 0 to 59
secs : :class:`~pyspark.sql.Column` or column name
The second-of-minute and its micro-fraction to represent, from 0 to 60.
The value can be either an integer like 13 , or a fraction like 13.123.
If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
cols : :class:`~pyspark.sql.Column` or column name
Either 6 columns (years, months, days, hours, mins, secs)
Or 2 columns (date, time)

years : :class:`~pyspark.sql.Column` or column name
The year to represent, from 1 to 9999
months : :class:`~pyspark.sql.Column` or column name
The month-of-year to represent, from 1 (January) to 12 (December)
days : :class:`~pyspark.sql.Column` or column name
The day-of-month to represent, from 1 to 31
hours : :class:`~pyspark.sql.Column` or column name
The hour-of-day to represent, from 0 to 23
mins : :class:`~pyspark.sql.Column` or column name
The minute-of-hour to represent, from 0 to 59
secs : :class:`~pyspark.sql.Column` or column name
The second-of-minute and its micro-fraction to represent, from 0 to 60.
The value can be either an integer like 13, or a fraction like 13.123.
If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
date : :class:`~pyspark.sql.Column` or column name
A date to represent, from 0001-01-01 to 9999-12-31
time : :class:`~pyspark.sql.Column` or column name
A local time to represent, from 00:00:00 to 23:59:59.999999

Notes
-----
This function accepts either 6 arguments (years, months, days, hours, mins, secs)
or 2 arguments (date, time).

Returns
-------
Expand All @@ -25179,6 +25208,8 @@ def make_timestamp_ntz(
--------
>>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")

Example 1: Create local date-time from year, month, day, hour, min, sec fields.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
... ['year', 'month', 'day', 'hour', 'min', 'sec'])
Expand All @@ -25191,11 +25222,41 @@ def make_timestamp_ntz(
|2014-12-28 06:30:45.887 |
+----------------------------------------------------+

Example 2: Create local date-time from date and time fields.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([['2014-12-28', '06:30:45.887']],
... ['date_col', 'time_col'])
>>> df.select(
... sf.make_timestamp_ntz(sf.to_date(df.date_col), sf.to_time(df.time_col))
... ).show(truncate=False)
+--------------------------------------------------------+
|make_timestamp_ntz(to_date(date_col), to_time(time_col))|
+--------------------------------------------------------+
|2014-12-28 06:30:45.887 |
+--------------------------------------------------------+

>>> spark.conf.unset("spark.sql.session.timeZone")
"""
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
if len(cols) == 2:
# make_timestamp_ntz(date, time)
date, time = cols
return _invoke_function_over_columns("make_timestamp_ntz", date, time)
elif len(cols) == 6:
years, months, days, hours, mins, secs = cols
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
else:
# Invalid number of arguments
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={
"func_name": "make_timestamp_ntz",
"expected": "either (years, months, days, hours, mins, secs) or (date, time)",
"actual": f"{len(cols)} columns",
},
)


@_try_remote_functions
Expand Down
58 changes: 58 additions & 0 deletions python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,64 @@ def test_try_make_timestamp_ntz(self):
)
assertDataFrameEqual(actual, [Row(None)])

def test_make_timestamp_ntz_with_date_time(self):
# Test make_timestamp_ntz(date=..., time=...) overload
from datetime import date, time

# Test with date and time columns
data = [(date(2024, 5, 22), time(10, 30, 45))]
df = self.spark.createDataFrame(data, ["date_col", "time_col"])
actual = df.select(F.make_timestamp_ntz(df.date_col, df.time_col))
assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45))])

# Test with to_date and to_time functions
data = [("2024-05-22", "10:30:45.123")]
df = self.spark.createDataFrame(data, ["date_str", "time_str"])
actual = df.select(F.make_timestamp_ntz(F.to_date(df.date_str), F.to_time(df.time_str)))
assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))])

def test_make_timestamp_ntz_error_handling(self):
# Test error handling for wrong number of arguments
with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz() # No arguments

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={
"func_name": "make_timestamp_ntz",
"expected": "either (years, months, days, hours, mins, secs) or (date, time)",
"actual": "0 columns",
},
)

with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={
"func_name": "make_timestamp_ntz",
"expected": "either (years, months, days, hours, mins, secs) or (date, time)",
"actual": "1 columns",
},
)

# Test invalid number of arguments (3 arguments)
with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1))

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={
"func_name": "make_timestamp_ntz",
"expected": "either (years, months, days, hours, mins, secs) or (date, time)",
"actual": "3 columns",
},
)

def test_string_functions(self):
string_functions = [
"upper",
Expand Down