Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4019,6 +4019,7 @@ def try_make_timestamp_ltz(
try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__


@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
Expand All @@ -4027,9 +4028,45 @@ def make_timestamp_ntz(
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
...


@overload
def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


def make_timestamp_ntz(
years: Optional["ColumnOrName"] = None,
months: Optional["ColumnOrName"] = None,
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
*,
date: Optional["ColumnOrName"] = None,
time: Optional["ColumnOrName"] = None,
) -> Column:
# Check for mixed arguments (invalid)
if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None):
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"}
)

# Handle valid cases
if date is not None and time is not None:
# make_timestamp_ntz(date=..., time=...)
return _invoke_function_over_columns("make_timestamp_ntz", date, time)
elif all(arg is not None for arg in [years, months, days, hours, mins, secs]):
# make_timestamp_ntz(years, months, days, hours, mins, secs)
return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs)
else:
# Invalid argument combination (partial arguments)
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"}
)


make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__
Expand Down
69 changes: 63 additions & 6 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25126,17 +25126,37 @@ def try_make_timestamp_ltz(
)


@_try_remote_functions
@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
days: "ColumnOrName",
hours: "ColumnOrName",
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
...


@overload
def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@_try_remote_functions
def make_timestamp_ntz(
years: Optional["ColumnOrName"] = None,
months: Optional["ColumnOrName"] = None,
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
*,
date: Optional["ColumnOrName"] = None,
time: Optional["ColumnOrName"] = None,
) -> Column:
"""
Create local date-time from years, months, days, hours, mins, secs fields.
Create local date-time from years, months, days, hours, mins, secs fields, or from date and time fields.
If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL
on invalid inputs. Otherwise, it will throw an error instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we add a .. versionchanged::?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! Which version should I use? (4.1?)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4.1.0

Expand All @@ -25155,10 +25175,14 @@ def make_timestamp_ntz(
mins : :class:`~pyspark.sql.Column` or column name
The minute-of-hour to represent, from 0 to 59
secs : :class:`~pyspark.sql.Column` or column name
The second-of-minute and its micro-fraction to represent, from 0 to 60.
The second-of-minute and its micro-fraction to represent, from 0 to 60..
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: is that intentional?

The value can be either an integer like 13 , or a fraction like 13.123.
If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
date : :class:`~pyspark.sql.Column` or column name
A date to represent, from 0001-01-01 to 9999-12-31
time : :class:`~pyspark.sql.Column` or column name
A local time to represent, from 00:00:00 to 23:59:59.999999

Returns
-------
Expand All @@ -25179,6 +25203,8 @@ def make_timestamp_ntz(
--------
>>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")

Example 1: Create local date-time from year, month, day, hour, min, sec fields.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
... ['year', 'month', 'day', 'hour', 'min', 'sec'])
Expand All @@ -25191,11 +25217,42 @@ def make_timestamp_ntz(
|2014-12-28 06:30:45.887 |
+----------------------------------------------------+

Example 2: Create local date-time from date and time fields.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([['2014-12-28', '06:30:45.887']],
... ['date_col', 'time_col'])
>>> df.select(
... sf.make_timestamp_ntz(date=sf.to_date(df.date_col), time=sf.to_time(df.time_col))
... ).show(truncate=False)
+------------------------------------------------------------------+
|make_timestamp_ntz(date=to_date(date_col), time=to_time(time_col))|
+------------------------------------------------------------------+
|2014-12-28 06:30:45.887 |
+------------------------------------------------------------------+

>>> spark.conf.unset("spark.sql.session.timeZone")
"""
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
# Check for mixed arguments (invalid)
if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None):
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"}
)

# Handle valid cases
if date is not None and time is not None:
# make_timestamp_ntz(date=..., time=...)
return _invoke_function_over_columns("make_timestamp_ntz", date, time)
elif all(arg is not None for arg in [years, months, days, hours, mins, secs]):
# make_timestamp_ntz(years, months, days, hours, mins, secs)
return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs)
else:
# Invalid argument combination (partial arguments)
raise PySparkValueError(
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"}
)


@_try_remote_functions
Expand Down
50 changes: 49 additions & 1 deletion python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,54 @@ def test_try_make_timestamp_ntz(self):
)
assertDataFrameEqual(actual, [Row(None)])

def test_make_timestamp_ntz_with_date_time(self):
# Test make_timestamp_ntz(date=..., time=...) overload
from datetime import date, time

# Test with date and time columns
data = [(date(2024, 5, 22), time(10, 30, 45))]
df = self.spark.createDataFrame(data, ["date_col", "time_col"])
actual = df.select(F.make_timestamp_ntz(date=df.date_col, time=df.time_col))
assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45))])

# Test with to_date and to_time functions
data = [("2024-05-22", "10:30:45.123")]
df = self.spark.createDataFrame(data, ["date_str", "time_str"])
actual = df.select(
F.make_timestamp_ntz(date=F.to_date(df.date_str), time=F.to_time(df.time_str))
)
assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))])

def test_make_timestamp_ntz_error_handling(self):
# Test error handling for wrong number of arguments
with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz() # No arguments

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"}
)

with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"}
)

# Test mixed argument error
with self.assertRaises(PySparkValueError) as pe:
F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1), F.lit(12), F.lit(0), F.lit(0), date=F.lit("2024-01-01"))

self.check_error(
exception=pe.exception,
errorClass="WRONG_NUM_ARGS",
messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"}
)

def test_string_functions(self):
string_functions = [
"upper",
Expand Down Expand Up @@ -2016,7 +2064,7 @@ class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
if __name__ == "__main__":
import unittest
from pyspark.sql.tests.test_functions import * # noqa: F401

try:
import xmlrunner

Expand Down