Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
ff9c6d6
Initial commit
uros-db Aug 4, 2025
4e79dd8
Cleanup
uros-db Aug 18, 2025
a6bf88d
Merge branch 'apache:master' into python-try_make_timestamp_ntz
uros-db Aug 27, 2025
c707a1e
Python linter
uros-db Aug 27, 2025
53b30e9
Update tests
uros-db Aug 27, 2025
87c2327
Python lint fixes
uros-db Aug 29, 2025
3a6e03a
Python lint fixes
uros-db Aug 29, 2025
8180018
fix: doc-string
Yicong-Huang Sep 17, 2025
853b9c8
fix: doc-string
Yicong-Huang Sep 17, 2025
46f6b10
fix: doc-string
Yicong-Huang Sep 19, 2025
eadad20
wip: use keyword arguments only
Yicong-Huang Sep 19, 2025
ce643c2
use keyword arguments for date and time
Yicong-Huang Sep 22, 2025
b5936cf
refactor: clean up docstrings for make_timestamp_ntz function
Yicong-Huang Sep 22, 2025
91740e6
refactor: improve code readability in make_timestamp_ntz function by …
Yicong-Huang Sep 22, 2025
da60214
refactor: replace __builtins__ sum calls with built-in sum for improv…
Yicong-Huang Sep 22, 2025
338a23d
refactor: enhance try_make_timestamp_ntz function with improved argum…
Yicong-Huang Sep 23, 2025
d581527
refactor: enhance make_timestamp_ntz and try_make_timestamp_ntz funct…
Yicong-Huang Sep 23, 2025
a868cf2
refactor: update comments in make_timestamp_ntz and try_make_timestam…
Yicong-Huang Sep 23, 2025
83f701f
refactor: add type casting for arguments in make_timestamp_ntz and tr…
Yicong-Huang Sep 23, 2025
0d1f37b
refactor: enforce keyword-only arguments in make_timestamp_ntz functi…
Yicong-Huang Sep 23, 2025
860fc53
refactor: replace keyword-only arguments with *args in make_timestamp…
Yicong-Huang Sep 23, 2025
260df29
refactor: swap error message keys for clarity in error-conditions.json
Yicong-Huang Sep 23, 2025
93c99bd
refactor: simplify make_timestamp_ntz usage by removing mixed argumen…
Yicong-Huang Sep 24, 2025
873cf4a
refactor: streamline make_timestamp_ntz and try_make_timestamp_ntz fu…
Yicong-Huang Sep 24, 2025
66e1685
refactor: update error handling in make_timestamp_ntz to use CANNOT_S…
Yicong-Huang Sep 24, 2025
bb42aeb
refactor: improve argument validation and error handling in make_time…
Yicong-Huang Sep 24, 2025
31f33a9
refactor: enhance try_make_timestamp_ntz function to handle None para…
Yicong-Huang Sep 24, 2025
0696390
test: expand make_timestamp_ntz test cases to cover various argument …
Yicong-Huang Sep 24, 2025
6fb47d8
fix: shorten doc string
Yicong-Huang Sep 24, 2025
a704c54
doc: add more examples in doc-string
Yicong-Huang Sep 24, 2025
d028c14
test: update exception handling in make_timestamp_ntz tests to raise …
Yicong-Huang Sep 24, 2025
107ac4f
test: remove unused imports in make_timestamp_ntz test cases for clea…
Yicong-Huang Sep 24, 2025
5acd80c
doc: format output examples in make_timestamp_ntz documentation for i…
Yicong-Huang Sep 24, 2025
a3be61c
refactor: update try_make_timestamp_ntz function to use casting for p…
Yicong-Huang Sep 25, 2025
b824f78
refactor: enhance try_make_timestamp_ntz function to raise PySparkVal…
Yicong-Huang Sep 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 89 additions & 6 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3643,6 +3643,13 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__


def time_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Column:
return _invoke_function_over_columns("time_diff", lit(unit), start, end)


time_diff.__doc__ = pysparkfuncs.time_diff.__doc__


def timestamp_millis(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("timestamp_millis", col)

Expand Down Expand Up @@ -4012,6 +4019,12 @@ def try_make_timestamp_ltz(
try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__


@overload
def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
Expand All @@ -4020,14 +4033,52 @@ def make_timestamp_ntz(
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
...


def make_timestamp_ntz(
yearsOrDate: "ColumnOrName",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we change the argument names?
does it break this?

make_timestamp_ntz(years=..., months=..., days=..., hours=..., mins=..., secs=...)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah let's don't change the name

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted this change. using keyword arguments for new date and time arguments.

monthsOrTime: "ColumnOrName",
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
# Probe input arguments.
hasDays: bool = days is not None
hasHours: bool = hours is not None
hasMins: bool = mins is not None
hasSecs: bool = secs is not None
# Branch execution based on the number of input arguments.
if hasDays and hasHours and hasMins and hasSecs:
# Overload with inputs: years, months, days, hours, mins, secs.
return _invoke_function_over_columns(
"make_timestamp_ntz", yearsOrDate, monthsOrTime, days, hours, mins, secs
)
elif not hasDays and not hasHours and not hasMins and not hasSecs:
# Overload with inputs: date, time.
return _invoke_function_over_columns(
"make_timestamp_ntz", yearsOrDate, monthsOrTime
)
else:
raise PySparkValueError(
errorClass="INVALID_NUM_ARGS",
messageParameters={
"arg_name": "make_timestamp_ntz",
"num_args": "2 or 6",
},
)


make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__


@overload
def try_make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@overload
def try_make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
Expand All @@ -4036,9 +4087,41 @@ def try_make_timestamp_ntz(
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
return _invoke_function_over_columns(
"try_make_timestamp_ntz", years, months, days, hours, mins, secs
)
...


def try_make_timestamp_ntz(
yearsOrDate: "ColumnOrName",
monthsOrTime: "ColumnOrName",
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
# Probe input arguments.
hasDays: bool = days is not None
hasHours: bool = hours is not None
hasMins: bool = mins is not None
hasSecs: bool = secs is not None
# Branch execution based on the number of input arguments.
if hasDays and hasHours and hasMins and hasSecs:
# Overload with inputs: years, months, days, hours, mins, secs.
return _invoke_function_over_columns(
"try_make_timestamp_ntz", yearsOrDate, monthsOrTime, days, hours, mins, secs
)
elif not hasDays and not hasHours and not hasMins and not hasSecs:
# Overload with inputs: date, time.
return _invoke_function_over_columns(
"try_make_timestamp_ntz", yearsOrDate, monthsOrTime
)
else:
raise PySparkValueError(
errorClass="INVALID_NUM_ARGS",
messageParameters={
"arg_name": "try_make_timestamp_ntz",
"num_args": "2 or 6",
},
)


try_make_timestamp_ntz.__doc__ = pysparkfuncs.try_make_timestamp_ntz.__doc__
Expand Down
189 changes: 148 additions & 41 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25086,39 +25086,71 @@ def try_make_timestamp_ltz(
)


@_try_remote_functions
@overload
def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@overload
def make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
days: "ColumnOrName",
hours: "ColumnOrName",
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
...


@_try_remote_functions
def make_timestamp_ntz(
yearsOrDate: "ColumnOrName",
monthsOrTime: "ColumnOrName",
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
"""
Create local date-time from years, months, days, hours, mins, secs fields.
If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL
on invalid inputs. Otherwise, it will throw an error instead.
Create local date-time from years, months, days, hours, mins, secs fields. Alternatively, try to
create local date-time from date and time fields. If the configuration `spark.sql.ansi.enabled`
is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error.

.. versionadded:: 3.5.0

Parameters
----------
years : :class:`~pyspark.sql.Column` or column name
The year to represent, from 1 to 9999
months : :class:`~pyspark.sql.Column` or column name
The month-of-year to represent, from 1 (January) to 12 (December)
days : :class:`~pyspark.sql.Column` or column name
The day-of-month to represent, from 1 to 31
hours : :class:`~pyspark.sql.Column` or column name
The hour-of-day to represent, from 0 to 23
mins : :class:`~pyspark.sql.Column` or column name
The minute-of-hour to represent, from 0 to 59
secs : :class:`~pyspark.sql.Column` or column name
.. versionchanged:: 4.1.0
Added support for creating timestamps from date and time.

Parameters
----------
yearsOrDate : :class:`~pyspark.sql.Column` or column name
Either:
- The year to represent, from 1 to 9999;
for timsetamp creation based on year, month, day, hour, minute and second.
- The date to represent, in valid DATE format;
for timsetamp creation based on date and time.
monthsOrTime : :class:`~pyspark.sql.Column` or column name
Either:
- The month-of-year to represent, from 1 (January) to 12 (December);
for timsetamp creation based on year, month, day, hour, minute and second.
- The time to represent, in valid TIME format;
for timsetamp creation based on date and time.
days : :class:`~pyspark.sql.Column` or column name, optional
The day-of-month to represent, from 1 to 31;
only used for timsetamp creation based on year, month, day, hour, minute and second.
hours : :class:`~pyspark.sql.Column` or column name, optional
The hour-of-day to represent, from 0 to 23;
only used for timsetamp creation based on year, month, day, hour, minute and second.
mins : :class:`~pyspark.sql.Column` or column name, optional
The minute-of-hour to represent, from 0 to 59;
only used for timsetamp creation based on year, month, day, hour, minute and second.
secs : :class:`~pyspark.sql.Column` or column name, optional
The second-of-minute and its micro-fraction to represent, from 0 to 60.
The value can be either an integer like 13 , or a fraction like 13.123.
If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
to 0 and 1 minute is added to the final timestamp;
only used for timsetamp creation based on year, month, day, hour, minute and second.

Returns
-------
Expand Down Expand Up @@ -25153,43 +25185,97 @@ def make_timestamp_ntz(

>>> spark.conf.unset("spark.sql.session.timeZone")
"""
return _invoke_function_over_columns(
"make_timestamp_ntz", years, months, days, hours, mins, secs
)
# Probe input arguments.
hasDays: bool = days is not None
hasHours: bool = hours is not None
hasMins: bool = mins is not None
hasSecs: bool = secs is not None
# Branch execution based on the number of input arguments.
if hasDays and hasHours and hasMins and hasSecs:
# Overload with inputs: years, months, days, hours, mins, secs.
return _invoke_function_over_columns(
"make_timestamp_ntz", yearsOrDate, monthsOrTime, days, hours, mins, secs
)
elif not hasDays and not hasHours and not hasMins and not hasSecs:
# Overload with inputs: date, time.
return _invoke_function_over_columns(
"make_timestamp_ntz", yearsOrDate, monthsOrTime
)
else:
raise PySparkValueError(
errorClass="INVALID_NUM_ARGS",
messageParameters={
"arg_name": "make_timestamp_ntz",
"num_args": "2 or 6",
},
)


@_try_remote_functions
@overload
def try_make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
...


@overload
def try_make_timestamp_ntz(
years: "ColumnOrName",
months: "ColumnOrName",
days: "ColumnOrName",
hours: "ColumnOrName",
mins: "ColumnOrName",
secs: "ColumnOrName",
) -> Column:
...


@_try_remote_functions
def try_make_timestamp_ntz(
yearsOrDate: "ColumnOrName",
monthsOrTime: "ColumnOrName",
days: Optional["ColumnOrName"] = None,
hours: Optional["ColumnOrName"] = None,
mins: Optional["ColumnOrName"] = None,
secs: Optional["ColumnOrName"] = None,
) -> Column:
"""
Try to create local date-time from years, months, days, hours, mins, secs fields.
The function returns NULL on invalid inputs.
Try to create local date-time from years, months, days, hours, mins, secs fields. Alternatively,
try to create local date-time from date and time fields. The function returns NULL on invalid
inputs.

.. versionadded:: 4.0.0

Parameters
----------
years : :class:`~pyspark.sql.Column` or column name
The year to represent, from 1 to 9999
months : :class:`~pyspark.sql.Column` or column name
The month-of-year to represent, from 1 (January) to 12 (December)
days : :class:`~pyspark.sql.Column` or column name
The day-of-month to represent, from 1 to 31
hours : :class:`~pyspark.sql.Column` or column name
The hour-of-day to represent, from 0 to 23
mins : :class:`~pyspark.sql.Column` or column name
The minute-of-hour to represent, from 0 to 59
secs : :class:`~pyspark.sql.Column` or column name
.. versionchanged:: 4.1.0
Added support for creating timestamps from date and time.

Parameters
----------
yearsOrDate : :class:`~pyspark.sql.Column` or column name
Either:
- The year to represent, from 1 to 9999;
for timsetamp creation based on year, month, day, hour, minute and second.
- The date to represent, in valid DATE format;
for timsetamp creation based on date and time.
monthsOrTime : :class:`~pyspark.sql.Column` or column name
Either:
- The month-of-year to represent, from 1 (January) to 12 (December);
for timsetamp creation based on year, month, day, hour, minute and second.
- The time to represent, in valid TIME format;
for timsetamp creation based on date and time.
days : :class:`~pyspark.sql.Column` or column name, optional
The day-of-month to represent, from 1 to 31;
only used for timsetamp creation based on year, month, day, hour, minute and second.
hours : :class:`~pyspark.sql.Column` or column name, optional
The hour-of-day to represent, from 0 to 23;
only used for timsetamp creation based on year, month, day, hour, minute and second.
mins : :class:`~pyspark.sql.Column` or column name, optional
The minute-of-hour to represent, from 0 to 59;
only used for timsetamp creation based on year, month, day, hour, minute and second.
secs : :class:`~pyspark.sql.Column` or column name, optional
The second-of-minute and its micro-fraction to represent, from 0 to 60.
The value can be either an integer like 13 , or a fraction like 13.123.
If the sec argument equals to 60, the seconds field is set
to 0 and 1 minute is added to the final timestamp.
to 0 and 1 minute is added to the final timestamp;
only used for timsetamp creation based on year, month, day, hour, minute and second.

Returns
-------
Expand Down Expand Up @@ -25240,9 +25326,30 @@ def try_make_timestamp_ntz(

>>> spark.conf.unset("spark.sql.session.timeZone")
"""
return _invoke_function_over_columns(
"try_make_timestamp_ntz", years, months, days, hours, mins, secs
)
# Probe input arguments.
hasDays: bool = days is not None
hasHours: bool = hours is not None
hasMins: bool = mins is not None
hasSecs: bool = secs is not None
# Branch execution based on the number of input arguments.
if hasDays and hasHours and hasMins and hasSecs:
# Overload with inputs: years, months, days, hours, mins, secs.
return _invoke_function_over_columns(
"try_make_timestamp_ntz", yearsOrDate, monthsOrTime, days, hours, mins, secs
)
elif not hasDays and not hasHours and not hasMins and not hasSecs:
# Overload with inputs: date, time.
return _invoke_function_over_columns(
"try_make_timestamp_ntz", yearsOrDate, monthsOrTime
)
else:
raise PySparkValueError(
errorClass="INVALID_NUM_ARGS",
messageParameters={
"arg_name": "try_make_timestamp_ntz",
"num_args": "2 or 6",
},
)


@_try_remote_functions
Expand Down
Loading