apache · Yicong-Huang · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json
@@ -1374,6 +1374,11 @@
       "Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
     ]
   },
+  "WRONG_NUM_ARGS": {
+    "message": [
+      "Function `<func_name>` expects <expected> but got <actual>."
+    ]
+  },
   "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
     "message": [
       "Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."

diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
@@ -4019,6 +4019,7 @@ def try_make_timestamp_ltz(
 try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__
 
 
+@overload
 def make_timestamp_ntz(
     years: "ColumnOrName",
     months: "ColumnOrName",
@@ -4027,9 +4028,35 @@ def make_timestamp_ntz(
     mins: "ColumnOrName",
     secs: "ColumnOrName",
 ) -> Column:
-    return _invoke_function_over_columns(
-        "make_timestamp_ntz", years, months, days, hours, mins, secs
-    )
+    ...
+
+
+@overload
+def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
+    ...
+
+
+def make_timestamp_ntz(*cols: "ColumnOrName") -> Column:
+    if len(cols) == 2:
+        # make_timestamp_ntz(date, time)
+        date, time = cols
+        return _invoke_function_over_columns("make_timestamp_ntz", date, time)
+    elif len(cols) == 6:
+        # make_timestamp_ntz(years, months, days, hours, mins, secs)
+        years, months, days, hours, mins, secs = cols
+        return _invoke_function_over_columns(
+            "make_timestamp_ntz", years, months, days, hours, mins, secs
+        )
+    else:
+        # Invalid number of arguments
+        raise PySparkValueError(
+            errorClass="WRONG_NUM_ARGS",
+            messageParameters={
+                "func_name": "make_timestamp_ntz",
+                "expected": "either (years, months, days, hours, mins, secs) or (date, time)",
+                "actual": f"{len(cols)} columns",
+            },
+        )
 
 
 make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__

diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
@@ -25126,7 +25126,7 @@ def try_make_timestamp_ltz(
         )
 
 
-@_try_remote_functions
+@overload
 def make_timestamp_ntz(
     years: "ColumnOrName",
     months: "ColumnOrName",
@@ -25135,30 +25135,59 @@ def make_timestamp_ntz(
     mins: "ColumnOrName",
     secs: "ColumnOrName",
 ) -> Column:
+    ...
+
+
+@overload
+def make_timestamp_ntz(date: "ColumnOrName", time: "ColumnOrName") -> Column:
+    ...
+
+
+@_try_remote_functions
+def make_timestamp_ntz(*cols: "ColumnOrName") -> Column:
     """
-    Create local date-time from years, months, days, hours, mins, secs fields.
+    Create local date-time from years, months, days, hours, mins, secs fields, or from
+    date and time fields.
+    If there are 6 cols, then this creates a timestamp from individual time components.
+    If there are 2 cols, then this creates a timestamp from date and time.
     If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL
     on invalid inputs. Otherwise, it will throw an error instead.
 
     .. versionadded:: 3.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or column name
-        The year to represent, from 1 to 9999
-    months : :class:`~pyspark.sql.Column` or column name
-        The month-of-year to represent, from 1 (January) to 12 (December)
-    days : :class:`~pyspark.sql.Column` or column name
-        The day-of-month to represent, from 1 to 31
-    hours : :class:`~pyspark.sql.Column` or column name
-        The hour-of-day to represent, from 0 to 23
-    mins : :class:`~pyspark.sql.Column` or column name
-        The minute-of-hour to represent, from 0 to 59
-    secs : :class:`~pyspark.sql.Column` or column name
-        The second-of-minute and its micro-fraction to represent, from 0 to 60.
-        The value can be either an integer like 13 , or a fraction like 13.123.
-        If the sec argument equals to 60, the seconds field is set
-        to 0 and 1 minute is added to the final timestamp.
+    cols : :class:`~pyspark.sql.Column` or column name
+        Either 6 columns (years, months, days, hours, mins, secs)
+        Or 2 columns (date, time)
+
+        years : :class:`~pyspark.sql.Column` or column name
+            The year to represent, from 1 to 9999
+        months : :class:`~pyspark.sql.Column` or column name
+            The month-of-year to represent, from 1 (January) to 12 (December)
+        days : :class:`~pyspark.sql.Column` or column name
+            The day-of-month to represent, from 1 to 31
+        hours : :class:`~pyspark.sql.Column` or column name
+            The hour-of-day to represent, from 0 to 23
+        mins : :class:`~pyspark.sql.Column` or column name
+            The minute-of-hour to represent, from 0 to 59
+        secs : :class:`~pyspark.sql.Column` or column name
+            The second-of-minute and its micro-fraction to represent, from 0 to 60.
+            The value can be either an integer like 13, or a fraction like 13.123.
+            If the sec argument equals to 60, the seconds field is set
+            to 0 and 1 minute is added to the final timestamp.
+        date : :class:`~pyspark.sql.Column` or column name
+            A date to represent, from 0001-01-01 to 9999-12-31
+        time : :class:`~pyspark.sql.Column` or column name
+            A local time to represent, from 00:00:00 to 23:59:59.999999
+
+    Notes
+    -----
+    This function accepts either 6 arguments (years, months, days, hours, mins, secs)
+    or 2 arguments (date, time).
 
     Returns
     -------
@@ -25179,6 +25208,8 @@ def make_timestamp_ntz(
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
+    Example 1: Create local date-time from year, month, day, hour, min, sec fields.
+
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
     ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
@@ -25191,11 +25222,41 @@ def make_timestamp_ntz(
     |2014-12-28 06:30:45.887                             |
     +----------------------------------------------------+
 
+    Example 2: Create local date-time from date and time fields.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([['2014-12-28', '06:30:45.887']],
+    ...     ['date_col', 'time_col'])
+    >>> df.select(
+    ...     sf.make_timestamp_ntz(sf.to_date(df.date_col), sf.to_time(df.time_col))
+    ... ).show(truncate=False)
+    +--------------------------------------------------------+
+    |make_timestamp_ntz(to_date(date_col), to_time(time_col))|
+    +--------------------------------------------------------+
+    |2014-12-28 06:30:45.887                                 |
+    +--------------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
-    return _invoke_function_over_columns(
-        "make_timestamp_ntz", years, months, days, hours, mins, secs
-    )
+    if len(cols) == 2:
+        # make_timestamp_ntz(date, time)
+        date, time = cols
+        return _invoke_function_over_columns("make_timestamp_ntz", date, time)
+    elif len(cols) == 6:
+        years, months, days, hours, mins, secs = cols
+        return _invoke_function_over_columns(
+            "make_timestamp_ntz", years, months, days, hours, mins, secs
+        )
+    else:
+        # Invalid number of arguments
+        raise PySparkValueError(
+            errorClass="WRONG_NUM_ARGS",
+            messageParameters={
+                "func_name": "make_timestamp_ntz",
+                "expected": "either (years, months, days, hours, mins, secs) or (date, time)",
+                "actual": f"{len(cols)} columns",
+            },
+        )
 
 
 @_try_remote_functions

diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
@@ -485,6 +485,64 @@ def test_try_make_timestamp_ntz(self):
         )
         assertDataFrameEqual(actual, [Row(None)])
 
+    def test_make_timestamp_ntz_with_date_time(self):
+        # Test make_timestamp_ntz(date=..., time=...) overload
+        from datetime import date, time
+
+        # Test with date and time columns
+        data = [(date(2024, 5, 22), time(10, 30, 45))]
+        df = self.spark.createDataFrame(data, ["date_col", "time_col"])
+        actual = df.select(F.make_timestamp_ntz(df.date_col, df.time_col))
+        assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45))])
+
+        # Test with to_date and to_time functions
+        data = [("2024-05-22", "10:30:45.123")]
+        df = self.spark.createDataFrame(data, ["date_str", "time_str"])
+        actual = df.select(F.make_timestamp_ntz(F.to_date(df.date_str), F.to_time(df.time_str)))
+        assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 45, 123000))])
+
+    def test_make_timestamp_ntz_error_handling(self):
+        # Test error handling for wrong number of arguments
+        with self.assertRaises(PySparkValueError) as pe:
+            F.make_timestamp_ntz()  # No arguments
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="WRONG_NUM_ARGS",
+            messageParameters={
+                "func_name": "make_timestamp_ntz",
+                "expected": "either (years, months, days, hours, mins, secs) or (date, time)",
+                "actual": "0 columns",
+            },
+        )
+
+        with self.assertRaises(PySparkValueError) as pe:
+            F.make_timestamp_ntz(F.lit(2024))  # Only 1 argument
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="WRONG_NUM_ARGS",
+            messageParameters={
+                "func_name": "make_timestamp_ntz",
+                "expected": "either (years, months, days, hours, mins, secs) or (date, time)",
+                "actual": "1 columns",
+            },
+        )
+
+        # Test invalid number of arguments (3 arguments)
+        with self.assertRaises(PySparkValueError) as pe:
+            F.make_timestamp_ntz(F.lit(2024), F.lit(1), F.lit(1))
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="WRONG_NUM_ARGS",
+            messageParameters={
+                "func_name": "make_timestamp_ntz",
+                "expected": "either (years, months, days, hours, mins, secs) or (date, time)",
+                "actual": "3 columns",
+            },
+        )
+
     def test_string_functions(self):
         string_functions = [
             "upper",