From 8a7bca3f741a57ac2760621cc6af7602d38278b6 Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 13:33:28 -0700 Subject: [PATCH 01/13] feat: add make_timestamp_ntz overload with date/time parameters Add support for make_timestamp_ntz(date=..., time=...) overload in PySpark. This implements SPARK-51162 to provide an alternative way to create local date-time values using separate date and time columns. --- .../pyspark/sql/connect/functions/builtin.py | 49 ++++++++++++- python/pyspark/sql/functions/builtin.py | 73 +++++++++++++++++-- python/pyspark/sql/tests/test_functions.py | 49 ++++++++++++- 3 files changed, 162 insertions(+), 9 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 0380b517e6e5..51fc690ff4d8 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4019,6 +4019,7 @@ def try_make_timestamp_ltz( try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__ +@overload def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -4027,9 +4028,51 @@ def make_timestamp_ntz( mins: "ColumnOrName", secs: "ColumnOrName", ) -> Column: - return _invoke_function_over_columns( - "make_timestamp_ntz", years, months, days, hours, mins, secs - ) + ... + + +@overload +def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column: + ... + + +def make_timestamp_ntz( + years: Optional["ColumnOrName"] = None, + months: Optional["ColumnOrName"] = None, + days: Optional["ColumnOrName"] = None, + hours: Optional["ColumnOrName"] = None, + mins: Optional["ColumnOrName"] = None, + secs: Optional["ColumnOrName"] = None, + *, + date: Optional["ColumnOrName"] = None, + time: Optional["ColumnOrName"] = None, +) -> Column: + # Check for date/time keyword arguments (2-parameter version) + if date is not None and time is not None: + # make_timestamp_ntz(date=..., time=...) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]): + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + ) + return _invoke_function_over_columns("make_timestamp_ntz", date, time) + + # Check for 6-parameter positional version + elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): + # make_timestamp_ntz(years, months, days, hours, mins, secs) + if date is not None or time is not None: + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + ) + return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) + + else: + # Invalid argument combination + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + ) make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__ diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index b09713e0c289..bec2498ac5f2 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25126,7 +25126,7 @@ def try_make_timestamp_ltz( ) -@_try_remote_functions +@overload def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -25134,9 +25134,29 @@ def make_timestamp_ntz( hours: "ColumnOrName", mins: "ColumnOrName", secs: "ColumnOrName", +) -> Column: + ... + + +@overload +def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column: + ... + + +@_try_remote_functions +def make_timestamp_ntz( + years: Optional["ColumnOrName"] = None, + months: Optional["ColumnOrName"] = None, + days: Optional["ColumnOrName"] = None, + hours: Optional["ColumnOrName"] = None, + mins: Optional["ColumnOrName"] = None, + secs: Optional["ColumnOrName"] = None, + *, + date: Optional["ColumnOrName"] = None, + time: Optional["ColumnOrName"] = None, ) -> Column: """ - Create local date-time from years, months, days, hours, mins, secs fields. + Create local date-time from years, months, days, hours, mins, secs fields, or from date and time fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead. @@ -25159,6 +25179,10 @@ def make_timestamp_ntz( The value can be either an integer like 13 , or a fraction like 13.123. If the sec argument equals to 60, the seconds field is set to 0 and 1 minute is added to the final timestamp. + date : :class:`~pyspark.sql.Column` or column name + A date to represent, from 0001-01-01 to 9999-12-31 + time : :class:`~pyspark.sql.Column` or column name + A local time to represent, from 00:00:00 to 23:59:59.999999 Returns ------- @@ -25179,6 +25203,8 @@ def make_timestamp_ntz( -------- >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + Example 1: Create local date-time from year, month, day, hour, min, sec fields. + >>> import pyspark.sql.functions as sf >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]], ... ['year', 'month', 'day', 'hour', 'min', 'sec']) @@ -25191,11 +25217,48 @@ def make_timestamp_ntz( |2014-12-28 06:30:45.887 | +----------------------------------------------------+ + Example 2: Create local date-time from date and time fields. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([['2014-12-28', '06:30:45.887']], + ... ['date_col', 'time_col']) + >>> df.select( + ... sf.make_timestamp_ntz(date=sf.to_date(df.date_col), time=sf.to_time(df.time_col)) + ... ).show(truncate=False) + +------------------------------------------------------------------+ + |make_timestamp_ntz(date=to_date(date_col), time=to_time(time_col))| + +------------------------------------------------------------------+ + |2014-12-28 06:30:45.887 | + +------------------------------------------------------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") """ - return _invoke_function_over_columns( - "make_timestamp_ntz", years, months, days, hours, mins, secs - ) + # Check for date/time keyword arguments (2-parameter version) + if date is not None and time is not None: + # make_timestamp_ntz(date=..., time=...) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]): + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + ) + return _invoke_function_over_columns("make_timestamp_ntz", date, time) + + # Check for 6-parameter positional version + elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): + # make_timestamp_ntz(years, months, days, hours, mins, secs) + if date is not None or time is not None: + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + ) + return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) + + else: + # Invalid argument combination + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + ) @_try_remote_functions diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 2fde3edc2486..eeffe4003453 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -485,6 +485,54 @@ def test_try_make_timestamp_ntz(self): ) assertDataFrameEqual(actual, [Row(None)]) + def test_make_timestamp_ntz_with_date_time(self): + # Test make_timestamp_ntz(date=..., time=...) overload + from datetime import date, time + + # Test with date and time columns + data = [(date(2024, 5, 22), time(10, 30, 45))] + df = self.spark.createDataFrame(data, ["date_col", "time_col"]) + actual = df.select(F.make_timestamp_ntz(date=df.date_col, time=df.time_col)) + assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45))]) + + # Test with to_date and to_time functions + data = [("2024-05-22", "10:30:45.123")] + df = self.spark.createDataFrame(data, ["date_str", "time_str"]) + actual = df.select( + F.make_timestamp_ntz(date=F.to_date(df.date_str), time=F.to_time(df.time_str)) + ) + assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))]) + + def test_make_timestamp_ntz_error_handling(self): + # Test error handling for wrong number of arguments + with self.assertRaises(PySparkValueError) as pe: + F.make_timestamp_ntz() # No arguments + + self.check_error( + exception=pe.exception, + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + ) + + with self.assertRaises(PySparkValueError) as pe: + F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument + + self.check_error( + exception=pe.exception, + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + ) + + # Test mixed argument error + with self.assertRaises(PySparkValueError) as pe: + F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1), F.lit(12), F.lit(0), F.lit(0), date=F.lit("2024-01-01")) + + self.check_error( + exception=pe.exception, + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + ) + def test_string_functions(self): string_functions = [ "upper", @@ -2015,7 +2063,6 @@ class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin): if __name__ == "__main__": import unittest - from pyspark.sql.tests.test_functions import * # noqa: F401 try: import xmlrunner From 4bbadd33476939239e8655f939b21c0f54d7918d Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:11:34 -0700 Subject: [PATCH 02/13] revert: bring back the import --- python/pyspark/sql/tests/test_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index eeffe4003453..b14132ddb0a7 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -2063,7 +2063,8 @@ class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin): if __name__ == "__main__": import unittest - + from pyspark.sql.tests.test_functions import * # noqa: F401 + try: import xmlrunner From 171212761fd3873ca8a40d26e088ebb6b70e0876 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 15:00:27 -0700 Subject: [PATCH 03/13] refactor: simplify error handling and improve error messages in make_timestamp_ntz --- .../pyspark/sql/connect/functions/builtin.py | 26 +++++++---------- python/pyspark/sql/functions/builtin.py | 28 ++++++++----------- python/pyspark/sql/tests/test_functions.py | 6 ++-- 3 files changed, 24 insertions(+), 36 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 51fc690ff4d8..0fe43b3f0734 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4047,31 +4047,25 @@ def make_timestamp_ntz( date: Optional["ColumnOrName"] = None, time: Optional["ColumnOrName"] = None, ) -> Column: - # Check for date/time keyword arguments (2-parameter version) + # Check for mixed arguments (invalid) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None): + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} + ) + + # Handle valid cases if date is not None and time is not None: # make_timestamp_ntz(date=..., time=...) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]): - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} - ) return _invoke_function_over_columns("make_timestamp_ntz", date, time) - - # Check for 6-parameter positional version elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): # make_timestamp_ntz(years, months, days, hours, mins, secs) - if date is not None or time is not None: - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} - ) return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) - else: - # Invalid argument combination + # Invalid argument combination (partial arguments) raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index bec2498ac5f2..7de16a81318b 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25175,7 +25175,7 @@ def make_timestamp_ntz( mins : :class:`~pyspark.sql.Column` or column name The minute-of-hour to represent, from 0 to 59 secs : :class:`~pyspark.sql.Column` or column name - The second-of-minute and its micro-fraction to represent, from 0 to 60. + The second-of-minute and its micro-fraction to represent, from 0 to 60.. The value can be either an integer like 13 , or a fraction like 13.123. If the sec argument equals to 60, the seconds field is set to 0 and 1 minute is added to the final timestamp. @@ -25233,31 +25233,25 @@ def make_timestamp_ntz( >>> spark.conf.unset("spark.sql.session.timeZone") """ - # Check for date/time keyword arguments (2-parameter version) + # Check for mixed arguments (invalid) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None): + raise PySparkValueError( + errorClass="WRONG_NUM_ARGS", + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} + ) + + # Handle valid cases if date is not None and time is not None: # make_timestamp_ntz(date=..., time=...) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]): - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} - ) return _invoke_function_over_columns("make_timestamp_ntz", date, time) - - # Check for 6-parameter positional version elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): # make_timestamp_ntz(years, months, days, hours, mins, secs) - if date is not None or time is not None: - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} - ) return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) - else: - # Invalid argument combination + # Invalid argument combination (partial arguments) raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index b14132ddb0a7..d732e77867ce 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -511,7 +511,7 @@ def test_make_timestamp_ntz_error_handling(self): self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) with self.assertRaises(PySparkValueError) as pe: @@ -520,7 +520,7 @@ def test_make_timestamp_ntz_error_handling(self): self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 positional args (years, months, days, hours, mins, secs) or date/time keywords", "actual": "partial"} + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) # Test mixed argument error @@ -530,7 +530,7 @@ def test_make_timestamp_ntz_error_handling(self): self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either 6 positional args or date/time keywords", "actual": "mixed"} + messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} ) def test_string_functions(self): From 579c45a6c9d839601080df607cb93098dffc1789 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 15:07:44 -0700 Subject: [PATCH 04/13] test --- python/pyspark/sql/tests/test_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index d732e77867ce..4a34119895ce 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -513,6 +513,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) + with self.assertRaises(PySparkValueError) as pe: F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument From b52c2d5aff5c28c6b3f560a8034e90e0d85464f9 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 15:07:58 -0700 Subject: [PATCH 05/13] Revert "test" This reverts commit 579c45a6c9d839601080df607cb93098dffc1789. --- python/pyspark/sql/tests/test_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 4a34119895ce..d732e77867ce 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -513,7 +513,6 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} ) - with self.assertRaises(PySparkValueError) as pe: F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument From 747aaef604f78df1002fa48bfff7cb59f2437208 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 22:21:20 -0700 Subject: [PATCH 06/13] feat: add WRONG_NUM_ARGS error class and update make_timestamp_ntz error handling - Add WRONG_NUM_ARGS error class to error-conditions.json - Update make_timestamp_ntz in builtin.py to use PySparkValueError with WRONG_NUM_ARGS - Update make_timestamp_ntz in connect/builtin.py to use PySparkValueError with WRONG_NUM_ARGS - Update test cases to expect PySparkValueError with proper error class and message parameters --- python/pyspark/errors/error-conditions.json | 5 +++ .../pyspark/sql/connect/functions/builtin.py | 24 +++++++++--- python/pyspark/sql/functions/builtin.py | 26 +++++++++---- python/pyspark/sql/tests/test_functions.py | 38 ++++++++++++++----- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json index ffe3c61737e6..24bccc8d38e7 100644 --- a/python/pyspark/errors/error-conditions.json +++ b/python/pyspark/errors/error-conditions.json @@ -1384,6 +1384,11 @@ "Function `` should take at least columns." ] }, + "WRONG_NUM_ARGS": { + "message": [ + "Function `` expects but got ." + ] + }, "ZERO_INDEX": { "message": [ "Index must be non-zero." diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 0fe43b3f0734..a5f8de736ac8 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4047,25 +4047,37 @@ def make_timestamp_ntz( date: Optional["ColumnOrName"] = None, time: Optional["ColumnOrName"] = None, ) -> Column: - # Check for mixed arguments (invalid) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None): + # Check for mixed arguments (invalid) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and ( + date is not None or time is not None + ): raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", + "actual": "mixed arguments from both approaches", + }, ) - + # Handle valid cases if date is not None and time is not None: # make_timestamp_ntz(date=..., time=...) return _invoke_function_over_columns("make_timestamp_ntz", date, time) elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): # make_timestamp_ntz(years, months, days, hours, mins, secs) - return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) + return _invoke_function_over_columns( + "make_timestamp_ntz", years, months, days, hours, mins, secs + ) else: # Invalid argument combination (partial arguments) raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "actual": "incomplete arguments", + }, ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 7de16a81318b..e0b4b460f19c 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25175,7 +25175,7 @@ def make_timestamp_ntz( mins : :class:`~pyspark.sql.Column` or column name The minute-of-hour to represent, from 0 to 59 secs : :class:`~pyspark.sql.Column` or column name - The second-of-minute and its micro-fraction to represent, from 0 to 60.. + The second-of-minute and its micro-fraction to represent, from 0 to 60.. The value can be either an integer like 13 , or a fraction like 13.123. If the sec argument equals to 60, the seconds field is set to 0 and 1 minute is added to the final timestamp. @@ -25233,25 +25233,37 @@ def make_timestamp_ntz( >>> spark.conf.unset("spark.sql.session.timeZone") """ - # Check for mixed arguments (invalid) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and (date is not None or time is not None): + # Check for mixed arguments (invalid) + if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and ( + date is not None or time is not None + ): raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", + "actual": "mixed arguments from both approaches", + }, ) - + # Handle valid cases if date is not None and time is not None: # make_timestamp_ntz(date=..., time=...) return _invoke_function_over_columns("make_timestamp_ntz", date, time) elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): # make_timestamp_ntz(years, months, days, hours, mins, secs) - return _invoke_function_over_columns("make_timestamp_ntz", years, months, days, hours, mins, secs) + return _invoke_function_over_columns( + "make_timestamp_ntz", years, months, days, hours, mins, secs + ) else: # Invalid argument combination (partial arguments) raise PySparkValueError( errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "actual": "incomplete arguments", + }, ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index d732e77867ce..98e0c42519be 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -507,30 +507,50 @@ def test_make_timestamp_ntz_error_handling(self): # Test error handling for wrong number of arguments with self.assertRaises(PySparkValueError) as pe: F.make_timestamp_ntz() # No arguments - + self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "actual": "incomplete arguments", + }, ) with self.assertRaises(PySparkValueError) as pe: F.make_timestamp_ntz(F.lit(2024)) # Only 1 argument - + self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", "actual": "incomplete arguments"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "actual": "incomplete arguments", + }, ) - + # Test mixed argument error with self.assertRaises(PySparkValueError) as pe: - F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1), F.lit(12), F.lit(0), F.lit(0), date=F.lit("2024-01-01")) - + F.make_timestamp_ntz( + F.lit(2024), + F.lit(1), + F.lit(1), + F.lit(12), + F.lit(0), + F.lit(0), + date=F.lit("2024-01-01"), + ) + self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", - messageParameters={"func_name": "make_timestamp_ntz", "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "cannot mix both approaches"} + messageParameters={ + "func_name": "make_timestamp_ntz", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", + "actual": "mixed arguments from both approaches", + }, ) def test_string_functions(self): @@ -2064,7 +2084,7 @@ class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin): if __name__ == "__main__": import unittest from pyspark.sql.tests.test_functions import * # noqa: F401 - + try: import xmlrunner From 3124668ad58a91f73da2da3cf658fe12b2d2b023 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Fri, 12 Sep 2025 23:58:39 -0700 Subject: [PATCH 07/13] refactor: make error messages more specific with parameter names - Change error message from 'either all 6 components or both date and time' to '(years, months, days, hours, mins, secs) or (date, time)' - Update consistently across builtin.py, connect/builtin.py, and test_functions.py - Provides clearer guidance to users about expected parameter names --- python/pyspark/sql/connect/functions/builtin.py | 2 +- python/pyspark/sql/functions/builtin.py | 2 +- python/pyspark/sql/tests/test_functions.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index a5f8de736ac8..69cd48ed2b5c 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4075,7 +4075,7 @@ def make_timestamp_ntz( errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "expected": "(years, months, days, hours, mins, secs) or (date, time)", "actual": "incomplete arguments", }, ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index e0b4b460f19c..d48530d07e70 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25261,7 +25261,7 @@ def make_timestamp_ntz( errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "expected": "(years, months, days, hours, mins, secs) or (date, time)", "actual": "incomplete arguments", }, ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 98e0c42519be..b35a2576ce41 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -513,7 +513,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "expected": "(years, months, days, hours, mins, secs) or (date, time)", "actual": "incomplete arguments", }, ) @@ -526,7 +526,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either all 6 components (years, months, days, hours, mins, secs) or both date and time", + "expected": "(years, months, days, hours, mins, secs) or (date, time)", "actual": "incomplete arguments", }, ) From 96059d8ee0f71468bde0aa4b485be23366f676e7 Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 00:50:09 -0700 Subject: [PATCH 08/13] change order --- python/pyspark/errors/error-conditions.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json index 24bccc8d38e7..edaebd7420e0 100644 --- a/python/pyspark/errors/error-conditions.json +++ b/python/pyspark/errors/error-conditions.json @@ -1374,6 +1374,11 @@ "Value for `` must be between and (inclusive), got " ] }, + "WRONG_NUM_ARGS": { + "message": [ + "Function `` expects but got ." + ] + }, "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": { "message": [ "Function `` should take between 1 and 3 arguments, but the provided function takes ." @@ -1384,11 +1389,6 @@ "Function `` should take at least columns." ] }, - "WRONG_NUM_ARGS": { - "message": [ - "Function `` expects but got ." - ] - }, "ZERO_INDEX": { "message": [ "Index must be non-zero." From 33821c87e70cc3a8907ec47b9faa371162a11dd9 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:14:04 -0700 Subject: [PATCH 09/13] feat: finalize make_timestamp_ntz overload implementation --- .../pyspark/sql/connect/functions/builtin.py | 44 +++--------- python/pyspark/sql/functions/builtin.py | 71 ++++++++----------- python/pyspark/sql/tests/test_functions.py | 28 +++----- 3 files changed, 49 insertions(+), 94 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 69cd48ed2b5c..2e0a697f3a21 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4019,7 +4019,7 @@ def try_make_timestamp_ltz( try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__ -@overload +@overload # type: ignore[no-redef] def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -4032,51 +4032,29 @@ def make_timestamp_ntz( @overload -def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column: +def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column: ... -def make_timestamp_ntz( - years: Optional["ColumnOrName"] = None, - months: Optional["ColumnOrName"] = None, - days: Optional["ColumnOrName"] = None, - hours: Optional["ColumnOrName"] = None, - mins: Optional["ColumnOrName"] = None, - secs: Optional["ColumnOrName"] = None, - *, - date: Optional["ColumnOrName"] = None, - time: Optional["ColumnOrName"] = None, -) -> Column: - # Check for mixed arguments (invalid) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and ( - date is not None or time is not None - ): - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={ - "func_name": "make_timestamp_ntz", - "expected": "either (years, months, days, hours, mins, secs) or (date, time)", - "actual": "mixed arguments from both approaches", - }, - ) - - # Handle valid cases - if date is not None and time is not None: - # make_timestamp_ntz(date=..., time=...) +def make_timestamp_ntz(*args: "ColumnOrName") -> Column: + if len(args) == 2: + # make_timestamp_ntz(date, time) + date, time = args return _invoke_function_over_columns("make_timestamp_ntz", date, time) - elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): + elif len(args) == 6: # make_timestamp_ntz(years, months, days, hours, mins, secs) + years, months, days, hours, mins, secs = args return _invoke_function_over_columns( "make_timestamp_ntz", years, months, days, hours, mins, secs ) else: - # Invalid argument combination (partial arguments) + # Invalid number of arguments raise PySparkValueError( errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "(years, months, days, hours, mins, secs) or (date, time)", - "actual": "incomplete arguments", + "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", + "actual": f"{len(args)} arguments", }, ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index d48530d07e70..723525615f34 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25125,7 +25125,6 @@ def try_make_timestamp_ltz( "try_make_timestamp_ltz", years, months, days, hours, mins, secs ) - @overload def make_timestamp_ntz( years: "ColumnOrName", @@ -25137,30 +25136,24 @@ def make_timestamp_ntz( ) -> Column: ... - @overload -def make_timestamp_ntz(*, date: "ColumnOrName", time: "ColumnOrName") -> Column: +def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column: ... @_try_remote_functions -def make_timestamp_ntz( - years: Optional["ColumnOrName"] = None, - months: Optional["ColumnOrName"] = None, - days: Optional["ColumnOrName"] = None, - hours: Optional["ColumnOrName"] = None, - mins: Optional["ColumnOrName"] = None, - secs: Optional["ColumnOrName"] = None, - *, - date: Optional["ColumnOrName"] = None, - time: Optional["ColumnOrName"] = None, -) -> Column: +def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: """ Create local date-time from years, months, days, hours, mins, secs fields, or from date and time fields. + If there are 6 cols, then this creates a timestamp from individual time components. + If there are 2 cols, then this creates a timestamp from date and time. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead. .. versionadded:: 3.5.0 + + .. versionchanged:: 3.4.0 + Supports Spark Connect. Parameters ---------- @@ -25175,8 +25168,8 @@ def make_timestamp_ntz( mins : :class:`~pyspark.sql.Column` or column name The minute-of-hour to represent, from 0 to 59 secs : :class:`~pyspark.sql.Column` or column name - The second-of-minute and its micro-fraction to represent, from 0 to 60.. - The value can be either an integer like 13 , or a fraction like 13.123. + The second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13, or a fraction like 13.123. If the sec argument equals to 60, the seconds field is set to 0 and 1 minute is added to the final timestamp. date : :class:`~pyspark.sql.Column` or column name @@ -25184,6 +25177,11 @@ def make_timestamp_ntz( time : :class:`~pyspark.sql.Column` or column name A local time to represent, from 00:00:00 to 23:59:59.999999 + Notes + ----- + This function accepts either 6 arguments (years, months, days, hours, mins, secs) + or 2 arguments (date, time). + Returns ------- :class:`~pyspark.sql.Column` @@ -25223,46 +25221,33 @@ def make_timestamp_ntz( >>> df = spark.createDataFrame([['2014-12-28', '06:30:45.887']], ... ['date_col', 'time_col']) >>> df.select( - ... sf.make_timestamp_ntz(date=sf.to_date(df.date_col), time=sf.to_time(df.time_col)) + ... sf.make_timestamp_ntz(sf.to_date(df.date_col), sf.to_time(df.time_col)) ... ).show(truncate=False) - +------------------------------------------------------------------+ - |make_timestamp_ntz(date=to_date(date_col), time=to_time(time_col))| - +------------------------------------------------------------------+ - |2014-12-28 06:30:45.887 | - +------------------------------------------------------------------+ + +--------------------------------------------------------+ + |make_timestamp_ntz(to_date(date_col), to_time(time_col))| + +--------------------------------------------------------+ + |2014-12-28 06:30:45.887 | + +--------------------------------------------------------+ >>> spark.conf.unset("spark.sql.session.timeZone") """ - # Check for mixed arguments (invalid) - if any(arg is not None for arg in [years, months, days, hours, mins, secs]) and ( - date is not None or time is not None - ): - raise PySparkValueError( - errorClass="WRONG_NUM_ARGS", - messageParameters={ - "func_name": "make_timestamp_ntz", - "expected": "either (years, months, days, hours, mins, secs) or (date, time)", - "actual": "mixed arguments from both approaches", - }, - ) - - # Handle valid cases - if date is not None and time is not None: - # make_timestamp_ntz(date=..., time=...) + if len(cols) == 2: + # make_timestamp_ntz(date, time) + date, time = cols return _invoke_function_over_columns("make_timestamp_ntz", date, time) - elif all(arg is not None for arg in [years, months, days, hours, mins, secs]): - # make_timestamp_ntz(years, months, days, hours, mins, secs) + elif len(cols) == 6: + years, months, days, hours, mins, secs = cols return _invoke_function_over_columns( "make_timestamp_ntz", years, months, days, hours, mins, secs ) else: - # Invalid argument combination (partial arguments) + # Invalid number of arguments raise PySparkValueError( errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "(years, months, days, hours, mins, secs) or (date, time)", - "actual": "incomplete arguments", + "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", + "actual": f"{len(cols)} arguments", }, ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index b35a2576ce41..95dbcbe8e825 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -492,14 +492,14 @@ def test_make_timestamp_ntz_with_date_time(self): # Test with date and time columns data = [(date(2024, 5, 22), time(10, 30, 45))] df = self.spark.createDataFrame(data, ["date_col", "time_col"]) - actual = df.select(F.make_timestamp_ntz(date=df.date_col, time=df.time_col)) + actual = df.select(F.make_timestamp_ntz(df.date_col, df.time_col)) assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45))]) # Test with to_date and to_time functions data = [("2024-05-22", "10:30:45.123")] df = self.spark.createDataFrame(data, ["date_str", "time_str"]) actual = df.select( - F.make_timestamp_ntz(date=F.to_date(df.date_str), time=F.to_time(df.time_str)) + F.make_timestamp_ntz(F.to_date(df.date_str), F.to_time(df.time_str)) ) assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))]) @@ -513,8 +513,8 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "(years, months, days, hours, mins, secs) or (date, time)", - "actual": "incomplete arguments", + "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", + "actual": "0 arguments", }, ) @@ -526,30 +526,22 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "(years, months, days, hours, mins, secs) or (date, time)", - "actual": "incomplete arguments", + "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", + "actual": "1 arguments", }, ) - # Test mixed argument error + # Test invalid number of arguments (3 arguments) with self.assertRaises(PySparkValueError) as pe: - F.make_timestamp_ntz( - F.lit(2024), - F.lit(1), - F.lit(1), - F.lit(12), - F.lit(0), - F.lit(0), - date=F.lit("2024-01-01"), - ) + F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1)) self.check_error( exception=pe.exception, errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either (years, months, days, hours, mins, secs) or (date, time)", - "actual": "mixed arguments from both approaches", + "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", + "actual": "3 arguments", }, ) From bf2d24a73a512f79198775e5554d948d9bb1536c Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:40:09 -0700 Subject: [PATCH 10/13] docs: restructure make_timestamp_ntz parameter documentation --- .../pyspark/sql/connect/functions/builtin.py | 6 +-- python/pyspark/sql/functions/builtin.py | 46 ++++++++++--------- python/pyspark/sql/tests/test_functions.py | 12 ++--- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 2e0a697f3a21..fcd42611f21d 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4019,7 +4019,7 @@ def try_make_timestamp_ltz( try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__ -@overload # type: ignore[no-redef] +@overload def make_timestamp_ntz( years: "ColumnOrName", months: "ColumnOrName", @@ -4053,8 +4053,8 @@ def make_timestamp_ntz(*args: "ColumnOrName") -> Column: errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", - "actual": f"{len(args)} arguments", + "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "actual": f"{len(args)} columns", }, ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 723525615f34..9477668f329e 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25157,25 +25157,29 @@ def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: Parameters ---------- - years : :class:`~pyspark.sql.Column` or column name - The year to represent, from 1 to 9999 - months : :class:`~pyspark.sql.Column` or column name - The month-of-year to represent, from 1 (January) to 12 (December) - days : :class:`~pyspark.sql.Column` or column name - The day-of-month to represent, from 1 to 31 - hours : :class:`~pyspark.sql.Column` or column name - The hour-of-day to represent, from 0 to 23 - mins : :class:`~pyspark.sql.Column` or column name - The minute-of-hour to represent, from 0 to 59 - secs : :class:`~pyspark.sql.Column` or column name - The second-of-minute and its micro-fraction to represent, from 0 to 60. - The value can be either an integer like 13, or a fraction like 13.123. - If the sec argument equals to 60, the seconds field is set - to 0 and 1 minute is added to the final timestamp. - date : :class:`~pyspark.sql.Column` or column name - A date to represent, from 0001-01-01 to 9999-12-31 - time : :class:`~pyspark.sql.Column` or column name - A local time to represent, from 00:00:00 to 23:59:59.999999 + cols : :class:`~pyspark.sql.Column` or column name + Either 6 columns (years, months, days, hours, mins, secs) + Or 2 columns (date, time) + + years : :class:`~pyspark.sql.Column` or column name + The year to represent, from 1 to 9999 + months : :class:`~pyspark.sql.Column` or column name + The month-of-year to represent, from 1 (January) to 12 (December) + days : :class:`~pyspark.sql.Column` or column name + The day-of-month to represent, from 1 to 31 + hours : :class:`~pyspark.sql.Column` or column name + The hour-of-day to represent, from 0 to 23 + mins : :class:`~pyspark.sql.Column` or column name + The minute-of-hour to represent, from 0 to 59 + secs : :class:`~pyspark.sql.Column` or column name + The second-of-minute and its micro-fraction to represent, from 0 to 60. + The value can be either an integer like 13, or a fraction like 13.123. + If the sec argument equals to 60, the seconds field is set + to 0 and 1 minute is added to the final timestamp. + date : :class:`~pyspark.sql.Column` or column name + A date to represent, from 0001-01-01 to 9999-12-31 + time : :class:`~pyspark.sql.Column` or column name + A local time to represent, from 00:00:00 to 23:59:59.999999 Notes ----- @@ -25246,8 +25250,8 @@ def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", - "actual": f"{len(cols)} arguments", + "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "actual": f"{len(cols)} columns", }, ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 95dbcbe8e825..5eb306960105 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -513,8 +513,8 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", - "actual": "0 arguments", + "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "actual": "0 columns", }, ) @@ -526,8 +526,8 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", - "actual": "1 arguments", + "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "actual": "1 columns", }, ) @@ -540,8 +540,8 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 arguments (years, months, days, hours, mins, secs) or 2 arguments (date, time)", - "actual": "3 arguments", + "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "actual": "3 columns", }, ) From 3f92037550ee71b805eda019a6707e174e5eb8d9 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:40:49 -0700 Subject: [PATCH 11/13] reformat --- python/pyspark/sql/functions/builtin.py | 4 +++- python/pyspark/sql/tests/test_functions.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 9477668f329e..b86da455fdce 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25125,6 +25125,7 @@ def try_make_timestamp_ltz( "try_make_timestamp_ltz", years, months, days, hours, mins, secs ) + @overload def make_timestamp_ntz( years: "ColumnOrName", @@ -25136,6 +25137,7 @@ def make_timestamp_ntz( ) -> Column: ... + @overload def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column: ... @@ -25151,7 +25153,7 @@ def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: on invalid inputs. Otherwise, it will throw an error instead. .. versionadded:: 3.5.0 - + .. versionchanged:: 3.4.0 Supports Spark Connect. diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 5eb306960105..d701e83576a7 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -498,9 +498,7 @@ def test_make_timestamp_ntz_with_date_time(self): # Test with to_date and to_time functions data = [("2024-05-22", "10:30:45.123")] df = self.spark.createDataFrame(data, ["date_str", "time_str"]) - actual = df.select( - F.make_timestamp_ntz(F.to_date(df.date_str), F.to_time(df.time_str)) - ) + actual = df.select(F.make_timestamp_ntz(F.to_date(df.date_str), F.to_time(df.time_str))) assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))]) def test_make_timestamp_ntz_error_handling(self): From 101a78c8c84e6deb02bdbbd5b2ea36a1742e1171 Mon Sep 17 00:00:00 2001 From: Yicong-Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 13:54:39 -0700 Subject: [PATCH 12/13] fix test --- python/pyspark/sql/connect/functions/builtin.py | 14 +++++++------- python/pyspark/sql/functions/builtin.py | 2 +- python/pyspark/sql/tests/test_functions.py | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index fcd42611f21d..1650a19a740e 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4036,14 +4036,14 @@ def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column: ... -def make_timestamp_ntz(*args: "ColumnOrName") -> Column: - if len(args) == 2: +def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: + if len(cols) == 2: # make_timestamp_ntz(date, time) - date, time = args + date, time = cols return _invoke_function_over_columns("make_timestamp_ntz", date, time) - elif len(args) == 6: + elif len(cols) == 6: # make_timestamp_ntz(years, months, days, hours, mins, secs) - years, months, days, hours, mins, secs = args + years, months, days, hours, mins, secs = cols return _invoke_function_over_columns( "make_timestamp_ntz", years, months, days, hours, mins, secs ) @@ -4053,8 +4053,8 @@ def make_timestamp_ntz(*args: "ColumnOrName") -> Column: errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", - "actual": f"{len(args)} columns", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", + "actual": f"{len(cols)} columns", }, ) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index b86da455fdce..0bf697bf6516 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25252,7 +25252,7 @@ def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": f"{len(cols)} columns", }, ) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index d701e83576a7..7ef90a6b17ec 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -511,7 +511,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "0 columns", }, ) @@ -524,7 +524,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "1 columns", }, ) @@ -538,7 +538,7 @@ def test_make_timestamp_ntz_error_handling(self): errorClass="WRONG_NUM_ARGS", messageParameters={ "func_name": "make_timestamp_ntz", - "expected": "either 6 columns (years, months, days, hours, mins, secs) or 2 columns (date, time)", + "expected": "either (years, months, days, hours, mins, secs) or (date, time)", "actual": "3 columns", }, ) From a7ddbe538c62dfaee55d15debd19e0b6dbaa4689 Mon Sep 17 00:00:00 2001 From: Yicong Huang <17627829+Yicong-Huang@users.noreply.github.com> Date: Sat, 13 Sep 2025 19:51:20 -0700 Subject: [PATCH 13/13] Refactor docstring for make_timestamp_ntz function Updated docstring for make_timestamp_ntz function to improve readability. --- python/pyspark/sql/functions/builtin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 0bf697bf6516..455d2568c5eb 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25146,7 +25146,8 @@ def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column: @_try_remote_functions def make_timestamp_ntz(*cols: "ColumnOrName") -> Column: """ - Create local date-time from years, months, days, hours, mins, secs fields, or from date and time fields. + Create local date-time from years, months, days, hours, mins, secs fields, or from + date and time fields. If there are 6 cols, then this creates a timestamp from individual time components. If there are 2 cols, then this creates a timestamp from date and time. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL