Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions python/pyspark/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2551,6 +2551,28 @@ def map_concat(*cols):
return Column(jc)


@since(2.4)
def sequence(start, stop, step=None):
"""
Generate a sequence of integers from `start` to `stop`, incrementing by `step`.
If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`,
otherwise -1.

>>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
>>> df1.select(sequence('C1', 'C2').alias('r')).collect()
[Row(r=[-2, -1, 0, 1, 2])]
>>> df2 = spark.createDataFrame([(4, -4, -2)], ('C1', 'C2', 'C3'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if the step is positive in this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will throw java.lang.IllegalArgumentException.
There is a check in collectionOperations.scala

    require(
      (step > num.zero && start <= stop)
        || (step < num.zero && start >= stop)
        || (step == num.zero && start == stop),
      s"Illegal sequence boundaries: $start to $stop by $step")

>>> df2.select(sequence('C1', 'C2', 'C3').alias('r')).collect()
[Row(r=[4, 2, 0, -2, -4])]
"""
sc = SparkContext._active_spark_context
if step is None:
return Column(sc._jvm.functions.sequence(_to_java_column(start), _to_java_column(stop)))
else:
return Column(sc._jvm.functions.sequence(
_to_java_column(start), _to_java_column(stop), _to_java_column(step)))


# ---------------------------- User Defined Function ----------------------------------

class PandasUDFType(object):
Expand Down