Skip to content

Commit 2d41f04

Browse files
committed
[SPARK-23143][SS][PYTHON] Added python API for setting continuous trigger
## What changes were proposed in this pull request? Self-explanatory. ## How was this patch tested? New python tests. Author: Tathagata Das <[email protected]> Closes #20309 from tdas/SPARK-23143.
1 parent 9678941 commit 2d41f04

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

python/pyspark/sql/streaming.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ def queryName(self, queryName):
786786

787787
@keyword_only
788788
@since(2.0)
789-
def trigger(self, processingTime=None, once=None):
789+
def trigger(self, processingTime=None, once=None, continuous=None):
790790
"""Set the trigger for the stream query. If this is not set it will run the query as fast
791791
as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
792792
@@ -802,23 +802,38 @@ def trigger(self, processingTime=None, once=None):
802802
>>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
803803
>>> # trigger the query for just once batch of data
804804
>>> writer = sdf.writeStream.trigger(once=True)
805+
>>> # trigger the query for execution every 5 seconds
806+
>>> writer = sdf.writeStream.trigger(continuous='5 seconds')
805807
"""
808+
params = [processingTime, once, continuous]
809+
810+
if params.count(None) == 3:
811+
raise ValueError('No trigger provided')
812+
elif params.count(None) < 2:
813+
raise ValueError('Multiple triggers not allowed.')
814+
806815
jTrigger = None
807816
if processingTime is not None:
808-
if once is not None:
809-
raise ValueError('Multiple triggers not allowed.')
810817
if type(processingTime) != str or len(processingTime.strip()) == 0:
811818
raise ValueError('Value for processingTime must be a non empty string. Got: %s' %
812819
processingTime)
813820
interval = processingTime.strip()
814821
jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.ProcessingTime(
815822
interval)
823+
816824
elif once is not None:
817825
if once is not True:
818826
raise ValueError('Value for once must be True. Got: %s' % once)
819827
jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
828+
820829
else:
821-
raise ValueError('No trigger provided')
830+
if type(continuous) != str or len(continuous.strip()) == 0:
831+
raise ValueError('Value for continuous must be a non empty string. Got: %s' %
832+
continuous)
833+
interval = continuous.strip()
834+
jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
835+
interval)
836+
822837
self._jwrite = self._jwrite.trigger(jTrigger)
823838
return self
824839

python/pyspark/sql/tests.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1538,6 +1538,12 @@ def test_stream_trigger(self):
15381538
except ValueError:
15391539
pass
15401540

1541+
# Should not take multiple args
1542+
try:
1543+
df.writeStream.trigger(processingTime='5 seconds', continuous='1 second')
1544+
except ValueError:
1545+
pass
1546+
15411547
# Should take only keyword args
15421548
try:
15431549
df.writeStream.trigger('5 seconds')

0 commit comments

Comments
 (0)