From 7c49184c411230fe3c263570d6c2d24bb6700bf6 Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Sun, 4 Sep 2022 19:15:15 +0100
Subject: [PATCH 1/6] Docstring improvements

---
 python/pyspark/sql/functions.py | 574 +++++++++++++++++++++++++++++++-
 1 file changed, 567 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 89e86414fbf46..76b590829010d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -4486,6 +4486,11 @@ def window(
         start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
         `startTime` as `15 minutes`.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> import datetime
@@ -4548,6 +4553,11 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
         static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
         duration dynamically based on the input row.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
@@ -4579,6 +4589,16 @@ def crc32(col: "ColumnOrName") -> Column:
     Calculates the cyclic redundancy check value  (CRC32) of a binary column and
     returns the value as a bigint.
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     .. versionadded:: 1.5.0
 
     Examples
@@ -4594,6 +4614,16 @@ def md5(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
@@ -4607,6 +4637,16 @@ def sha1(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
@@ -4622,6 +4662,19 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+    numBits : int
+        the desired bit length of the result, which must have a
+        value of 224, 256, 384, 512, or 0 (which is equivalent to 256).
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> digests = df.select(sha2(df.name, 256).alias('s')).collect()
@@ -4638,6 +4691,16 @@ def hash(*cols: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.0.0
 
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        list of columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hash value as int column.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
@@ -4652,6 +4715,16 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.0.0
 
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        list of columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hash value as long column.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
@@ -4662,8 +4735,8 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
 
 def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
     """
-    Returns null if the input column is true; throws an exception with the provided error message
-    otherwise.
+    Returns `null` if the input column is `true`; throws an exception
+    with the provided error message otherwise.
 
     .. versionadded:: 3.1.0
 
@@ -4671,20 +4744,27 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     ----------
     col : :class:`~pyspark.sql.Column` or str
         column name or column that represents the input column to test
-    errMsg : :class:`~pyspark.sql.Column` or str
+    errMsg : :class:`~pyspark.sql.Column` or str, optional
         A Python string literal or column containing the error message
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        `null` if the input column is `true` otherwise throws an error with specified message.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
     [Row(r=None)]
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect()
     [Row(r=None)]
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
     [Row(r=None)]
+    >>> df.select(assert_true(df.a > df.b, 'My error msg').alias('r')).collect() # doctest: +SKIP
+    22/09/03 20:18:15 ERROR Executor: Exception in task 15.0 in stage 45.0 (TID 383)
+    java.lang.RuntimeException: My error msg
+    ...
     """
     if errMsg is None:
         return _invoke_function_over_columns("assert_true", col)
@@ -4707,6 +4787,19 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     ----------
     errMsg : :class:`~pyspark.sql.Column` or str
         A Python string literal or column containing the error message
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        throws an error with specified message.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(raise_error("My error message")).show() # doctest: +SKIP
+    22/09/03 20:26:49 ERROR Executor: Exception in task 15.0 in stage 46.0 (TID 399)
+    java.lang.RuntimeException: My error message
+    ...
     """
     if not isinstance(errMsg, (str, Column)):
         raise TypeError("errMsg should be a Column or a str, got {}".format(type(errMsg)))
@@ -4725,6 +4818,29 @@ def upper(col: "ColumnOrName") -> Column:
     Converts a string expression to upper case.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        upper case values.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df.select(upper("value")).show()
+    +------------+
+    |upper(value)|
+    +------------+
+    |       SPARK|
+    |     PYSPARK|
+    |  PANDAS API|
+    +------------+
     """
     return _invoke_function_over_columns("upper", col)
 
@@ -4734,6 +4850,29 @@ def lower(col: "ColumnOrName") -> Column:
     Converts a string expression to lower case.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        lower case values.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df.select(lower("value")).show()
+    +------------+
+    |lower(value)|
+    +------------+
+    |       spark|
+    |     pyspark|
+    |  pandas api|
+    +------------+
     """
     return _invoke_function_over_columns("lower", col)
 
@@ -4743,6 +4882,29 @@ def ascii(col: "ColumnOrName") -> Column:
     Computes the numeric value of the first character of the string column.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        numeric value.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df.select(ascii("value")).show()
+    +------------+
+    |ascii(value)|
+    +------------+
+    |          83|
+    |          80|
+    |          80|
+    +------------+
     """
     return _invoke_function_over_columns("ascii", col)
 
@@ -4752,6 +4914,29 @@ def base64(col: "ColumnOrName") -> Column:
     Computes the BASE64 encoding of a binary column and returns it as a string column.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        BASE64 encoding of string value.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df.select(base64("value")).show()
+    +----------------+
+    |   base64(value)|
+    +----------------+
+    |        U3Bhcms=|
+    |    UHlTcGFyaw==|
+    |UGFuZGFzIEFQSQ==|
+    +----------------+
     """
     return _invoke_function_over_columns("base64", col)
 
@@ -4761,6 +4946,31 @@ def unbase64(col: "ColumnOrName") -> Column:
     Decodes a BASE64 encoded string column and returns it as a binary column.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        encoded string value.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["U3Bhcms=",
+    ...                             "UHlTcGFyaw==",
+    ...                             "UGFuZGFzIEFQSQ=="], types.StringType())
+    >>> df.select(unbase64("value")).show()
+    +--------------------+
+    |     unbase64(value)|
+    +--------------------+
+    |    [53 70 61 72 6B]|
+    |[50 79 53 70 61 7...|
+    |[50 61 6E 64 61 7...|
+    +--------------------+
     """
     return _invoke_function_over_columns("unbase64", col)
 
@@ -4770,6 +4980,29 @@ def ltrim(col: "ColumnOrName") -> Column:
     Trim the spaces from left end for the specified string value.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        left trimmed values.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df.select(ltrim("value").alias("r")).withColumn("length", length("r")).show()
+    +-------+------+
+    |      r|length|
+    +-------+------+
+    |  Spark|     5|
+    |Spark  |     7|
+    |  Spark|     5|
+    +-------+------+
     """
     return _invoke_function_over_columns("ltrim", col)
 
@@ -4779,6 +5012,29 @@ def rtrim(col: "ColumnOrName") -> Column:
     Trim the spaces from right end for the specified string value.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        right trimmed values.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df.select(rtrim("value").alias("r")).withColumn("length", length("r")).show()
+    +--------+------+
+    |       r|length|
+    +--------+------+
+    |   Spark|     8|
+    |   Spark|     5|
+    |   Spark|     6|
+    +--------+------+
     """
     return _invoke_function_over_columns("rtrim", col)
 
@@ -4788,6 +5044,29 @@ def trim(col: "ColumnOrName") -> Column:
     Trim the spaces from both ends for the specified string column.
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        trimmed values from both sides.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df.select(trim("value").alias("r")).withColumn("length", length("r")).show()
+    +-----+------+
+    |    r|length|
+    +-----+------+
+    |Spark|     5|
+    |Spark|     5|
+    |Spark|     5|
+    +-----+------+
     """
     return _invoke_function_over_columns("trim", col)
 
@@ -4799,6 +5078,18 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    sep : str
+        words seperator.
+    cols : :class:`~pyspark.sql.Column` or str
+        list of columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string of concatenated words.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
@@ -4816,6 +5107,28 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    charset : str
+        charset to use to decode to.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('abcd',)], ['a'])
+    >>> df.select(decode("a", "UTF-8")).show()
+    +----------------------+
+    |stringdecode(a, UTF-8)|
+    +----------------------+
+    |                  abcd|
+    +----------------------+
     """
     return _invoke_function("decode", _to_java_column(col), charset)
 
@@ -4826,6 +5139,28 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
 
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    charset : str
+        charset to use to encode.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('abcd',)], ['c'])
+    >>> df.select(encode("c", "UTF-8")).show()
+    +----------------+
+    |encode(c, UTF-8)|
+    +----------------+
+    |   [61 62 63 64]|
+    +----------------+
     """
     return _invoke_function("encode", _to_java_column(col), charset)
 
@@ -4844,6 +5179,11 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
     d : int
         the N decimal places
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of formatted results.
+
     >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
     [Row(v='5.0000')]
     """
@@ -4863,6 +5203,11 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     cols : :class:`~pyspark.sql.Column` or str
         column names or :class:`~pyspark.sql.Column`\\s to be used in formatting
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of formatted results.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b'])
@@ -4886,6 +5231,20 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
     The position is not zero based, but 1 based index. Returns 0 if substr
     could not be found in str.
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    substr : str
+        substring to look for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        location of the first occurence of the substring as integer.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
     [Row(s=2)]
@@ -4913,10 +5272,15 @@ def overlay(
         column name or column containing the substitution string
     pos : :class:`~pyspark.sql.Column` or str or int
         column name, column, or int containing the starting position in src
-    len : :class:`~pyspark.sql.Column` or str or int
+    len : :class:`~pyspark.sql.Column` or str or int, optional
         column name, column, or int containing the number of bytes to replace in src
         string by 'replace' defaults to -1, which represents the length of the 'replace' string
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with replaced values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y"))
@@ -4962,6 +5326,11 @@ def sentences(
     country : :class:`~pyspark.sql.Column` or str, optional
         a country of the locale
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        arrays of split sentences.
+
     Examples
     --------
     >>> df = spark.createDataFrame([["This is an example sentence."]], ["string"])
@@ -4971,6 +5340,13 @@ def sentences(
     +-----------------------------------+
     |[[This, is, an, example, sentence]]|
     +-----------------------------------+
+    >>> df = spark.createDataFrame([["Hello world. How are you?"]], ["s"])
+    >>> df.select(sentences("s")).show(truncate=False)
+    +---------------------------------+
+    |sentences(s, , )                 |
+    +---------------------------------+
+    |[[Hello, world], [How, are, you]]|
+    +---------------------------------+
     """
     if language is None:
         language = lit("")
@@ -4992,6 +5368,20 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     -----
     The position is not zero based, but 1 based index.
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    pos : int
+        starting position in str.
+    len : int
+        length of chars.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        substring of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -5010,6 +5400,20 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    delim : str
+        delimiter of values.
+    count : int
+        number of occurences.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        substring of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
@@ -5026,6 +5430,18 @@ def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        first column value.
+    right : :class:`~pyspark.sql.Column` or str
+        second column value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Levenshtein distance as integer value.
+
     Examples
     --------
     >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
@@ -5050,6 +5466,11 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     pos : int, optional
         start position (zero based)
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        position of the substring.
+
     Notes
     -----
     The position is not zero based, but 1 based index. Returns 0 if substr
@@ -5070,6 +5491,20 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    len : int
+        length of the final string.
+    pad : str
+        chars to prepend.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        left padded result.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -5085,6 +5520,20 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    len : int
+        length of the final string.
+    pad : str
+        chars to append.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        right padded result.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -5100,6 +5549,18 @@ def repeat(col: "ColumnOrName", n: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    n : int
+        number of times to repeat value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with repeated values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('ab',)], ['s',])
@@ -5134,6 +5595,11 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
         .. versionchanged:: 3.0
            `split` now takes an optional `limit` field. If not provided, default limit value is -1.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        array of separated strings.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
@@ -5151,6 +5617,20 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    pattern : str
+        regex pattern to apply.
+    idx : int
+        matched group id.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        matched value specified by `idx` group id.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
@@ -5182,6 +5662,11 @@ def regexp_replace(
     replacement : :class:`~pyspark.sql.Column` or str
         column object or str containing the replacement
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with all substrings replaced.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("100-200", r"(\d+)", "--")], ["str", "pattern", "replacement"])
@@ -5206,6 +5691,16 @@ def initcap(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with all first letters are uppercase in each word.
+
     Examples
     --------
     >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
@@ -5220,6 +5715,16 @@ def soundex(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        SoundEx encoded string.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
@@ -5234,6 +5739,16 @@ def bin(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        binary representation of given value as string.
+
     Examples
     --------
     >>> df.select(bin(df.age).alias('c')).collect()
@@ -5249,6 +5764,16 @@ def hex(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hexadecimal representation of given value as string.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
@@ -5263,6 +5788,16 @@ def unhex(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string representation of given hexadecimal value.
+
     Examples
     --------
     >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
@@ -5278,6 +5813,16 @@ def length(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        length of the value.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
@@ -5341,11 +5886,26 @@ def bit_length(col: "ColumnOrName") -> Column:
 def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
     """A function translate any character in the `srcCol` by a character in `matching`.
     The characters in `replace` is corresponding to the characters in `matching`.
-    The translate will happen when any character in the string matching with the character
+    Translation will happen whenever any character in the string is matching with the character
     in the `matching`.
 
     .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    srcCol : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    matching : str
+        matching characters.
+    replace : str
+        characters for replacement. If this is shorter than `matching` string then
+        those chars that don't have replacement will be dropped.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        replaced value.
+
     Examples
     --------
     >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a', "rnlt", "123") \\

From 2c898e7b62ef45bdeae05588cfe24e8bc5be88d0 Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Tue, 6 Sep 2022 08:48:32 +0100
Subject: [PATCH 2/6] Review corrections

---
 python/pyspark/sql/functions.py | 65 ++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 76b590829010d..133f3df49f6fc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -4694,7 +4694,7 @@ def hash(*cols: "ColumnOrName") -> Column:
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
-        list of columns to work on.
+        one or more columns to compute on.
 
     Returns
     -------
@@ -4703,8 +4703,25 @@ def hash(*cols: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
-    [Row(hash=-757602832)]
+    >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+
+    Hash for one column
+
+    >>> df.select(hash('c1').alias('hash')).show()
+    +----------+
+    |      hash|
+    +----------+
+    |-757602832|
+    +----------+
+
+    Two or more columns
+
+    >>> df.select(hash('c1', 'c2').alias('hash')).show()
+    +---------+
+    |     hash|
+    +---------+
+    |599895104|
+    +---------+
     """
     return _invoke_function_over_seq_of_columns("hash", cols)
 
@@ -4718,7 +4735,7 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
-        list of columns to work on.
+        one or more columns to compute on.
 
     Returns
     -------
@@ -4727,8 +4744,25 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
-    [Row(hash=4105715581806190027)]
+    >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+
+    Hash for one column
+
+    >>> df.select(xxhash64('c1').alias('hash')).show()
+    +-------------------+
+    |               hash|
+    +-------------------+
+    |4105715581806190027|
+    +-------------------+
+
+    Two or more columns
+
+    >>> df.select(xxhash64('c1', 'c2').alias('hash')).show()
+    +-------------------+
+    |               hash|
+    +-------------------+
+    |3233247871021311208|
+    +-------------------+
     """
     return _invoke_function_over_seq_of_columns("xxhash64", cols)
 
@@ -4762,7 +4796,7 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
     [Row(r=None)]
     >>> df.select(assert_true(df.a > df.b, 'My error msg').alias('r')).collect() # doctest: +SKIP
-    22/09/03 20:18:15 ERROR Executor: Exception in task 15.0 in stage 45.0 (TID 383)
+    ...
     java.lang.RuntimeException: My error msg
     ...
     """
@@ -4797,7 +4831,7 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     --------
     >>> df = spark.range(1)
     >>> df.select(raise_error("My error message")).show() # doctest: +SKIP
-    22/09/03 20:26:49 ERROR Executor: Exception in task 15.0 in stage 46.0 (TID 399)
+    ...
     java.lang.RuntimeException: My error message
     ...
     """
@@ -4831,8 +4865,7 @@ def upper(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
     >>> df.select(upper("value")).show()
     +------------+
     |upper(value)|
@@ -4863,8 +4896,7 @@ def lower(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
     >>> df.select(lower("value")).show()
     +------------+
     |lower(value)|
@@ -4895,8 +4927,7 @@ def ascii(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
     >>> df.select(ascii("value")).show()
     +------------+
     |ascii(value)|
@@ -4927,8 +4958,7 @@ def base64(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], types.StringType())
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
     >>> df.select(base64("value")).show()
     +----------------+
     |   base64(value)|
@@ -4959,10 +4989,9 @@ def unbase64(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
     >>> df = spark.createDataFrame(["U3Bhcms=",
     ...                             "UHlTcGFyaw==",
-    ...                             "UGFuZGFzIEFQSQ=="], types.StringType())
+    ...                             "UGFuZGFzIEFQSQ=="], "STRING")
     >>> df.select(unbase64("value")).show()
     +--------------------+
     |     unbase64(value)|

From b1ba4321d6176367a79adbee8404da53b505b8ea Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Tue, 6 Sep 2022 11:49:38 +0100
Subject: [PATCH 3/6] Simplify df creation

Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
---
 python/pyspark/sql/functions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 133f3df49f6fc..e42fdac11cefc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -5054,8 +5054,7 @@ def rtrim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
     >>> df.select(rtrim("value").alias("r")).withColumn("length", length("r")).show()
     +--------+------+
     |       r|length|

From 303f240b4121ddf612a070dd6f737e75d4077e11 Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Tue, 6 Sep 2022 11:49:48 +0100
Subject: [PATCH 4/6] Simplify df creation

Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
---
 python/pyspark/sql/functions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e42fdac11cefc..0a60b3ce2ce79 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -5085,8 +5085,7 @@ def trim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
     >>> df.select(trim("value").alias("r")).withColumn("length", length("r")).show()
     +-----+------+
     |    r|length|

From e180740500dc818bee4060428fbc3f54196b17e4 Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Tue, 6 Sep 2022 11:50:08 +0100
Subject: [PATCH 5/6] Simplify df creation

Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
---
 python/pyspark/sql/functions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 0a60b3ce2ce79..e7e2628e71412 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -5022,8 +5022,7 @@ def ltrim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], types.StringType())
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
     >>> df.select(ltrim("value").alias("r")).withColumn("length", length("r")).show()
     +-------+------+
     |      r|length|

From edf862484041cccbb66f0ebb00ad56a460f7cc0e Mon Sep 17 00:00:00 2001
From: Khalid Mammadov <khalidmammadov9@gmail.com>
Date: Tue, 6 Sep 2022 21:46:47 +0100
Subject: [PATCH 6/6] Trigger pipeline