1- < << << << HEAD
2- == == == =
3- __author__ = 'ktakagiw'
4-
5-
6- > >> >> >> initial commit for pySparkStreaming
71#
82# Licensed to the Apache Software Foundation (ASF) under one or more
93# contributor license agreements. See the NOTICE file distributed with
2115# limitations under the License.
2216#
2317
24- << < << << HEAD
2518import sys
2619from signal import signal , SIGTERM , SIGINT
2720
@@ -36,43 +29,12 @@ class StreamingContext(object):
3629 """
3730 Main entry point for Spark Streaming functionality. A StreamingContext represents the
3831 connection to a Spark cluster, and can be used to create L{DStream}s and
39- =======
40- import os
41- import shutil
42- import sys
43- from threading import Lock
44- from tempfile import NamedTemporaryFile
45-
46- from pyspark import accumulators
47- from pyspark.accumulators import Accumulator
48- from pyspark.broadcast import Broadcast
49- from pyspark.conf import SparkConf
50- from pyspark.files import SparkFiles
51- from pyspark.java_gateway import launch_gateway
52- from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
53- from pyspark.storagelevel import StorageLevel
54- from pyspark.rdd import RDD
55- from pyspark.context import SparkContext
56-
57- from py4j.java_collections import ListConverter
58-
59- from pyspark.streaming.dstream import DStream
60-
61- class StreamingContext(object):
62- """
63- Main entry point for Spark functionality . A StreamingContext represents the
64- connection to a Spark cluster , and can be used to create L {RDD }s and
65- >> > >> > > initial commit for pySparkStreaming
6632 broadcast variables on that cluster.
6733 """
6834
6935 def __init__ (self , master = None , appName = None , sparkHome = None , pyFiles = None ,
7036 environment = None , batchSize = 1024 , serializer = PickleSerializer (), conf = None ,
71- <<<<<<< HEAD
7237 gateway = None , sparkContext = None , duration = None ):
73- =======
74- gateway=None, duration=None):
75- >>>>>>> initial commit for pySparkStreaming
7638 """
7739 Create a new StreamingContext. At least the master and app name and duration
7840 should be set, either through the named parameters here or through C{conf}.
@@ -93,7 +55,6 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
9355 @param conf: A L{SparkConf} object setting Spark properties.
9456 @param gateway: Use an existing gateway and JVM, otherwise a new JVM
9557 will be instatiated.
96- << < << < < HEAD
9758 @param sparkContext: L{SparkContext} object.
9859 @param duration: A L{Duration} object for SparkStreaming.
9960
@@ -112,23 +73,13 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
11273 # is started in StreamingContext.
11374 SparkContext ._gateway .restart_callback_server ()
11475 self ._clean_up_trigger ()
115- =======
116- @param duration: A L{Duration} Duration for SparkStreaming
117-
118- """
119- # Create the Python Sparkcontext
120- self ._sc = SparkContext (master = master , appName = appName , sparkHome = sparkHome ,
121- pyFiles = pyFiles , environment = environment , batchSize = batchSize ,
122- serializer = serializer , conf = conf , gateway = gateway )
123- >> >> > >> initial commit for pySparkStreaming
12476 self ._jvm = self ._sc ._jvm
12577 self ._jssc = self ._initialize_context (self ._sc ._jsc , duration ._jduration )
12678
12779 # Initialize StremaingContext in function to allow subclass specific initialization
12880 def _initialize_context (self , jspark_context , jduration ):
12981 return self ._jvm .JavaStreamingContext (jspark_context , jduration )
13082
131- << << < << HEAD
13283 def _clean_up_trigger (self ):
13384 """Kill py4j callback server properly using signal lib"""
13485
@@ -205,53 +156,3 @@ def _testInputStream(self, test_inputs, numSlices=None):
205156 jinput_stream = self ._jvm .PythonTestInputStream (self ._jssc , jtest_rdds ).asJavaDStream ()
206157
207158 return DStream (jinput_stream , self , test_rdd_deserializers [0 ])
208- == == == =
209- def actorStream (self , props , name , storageLevel , supervisorStrategy ):
210- raise NotImplementedError
211-
212- def addStreamingListener (self , streamingListener ):
213- raise NotImplementedError
214-
215- def awaitTermination (self , timeout = None ):
216- if timeout :
217- self ._jssc .awaitTermination (timeout )
218- else :
219- self ._jssc .awaitTermination ()
220-
221- def checkpoint (self , directory ):
222- raise NotImplementedError
223-
224- def fileStream (self , directory , filter = None , newFilesOnly = None ):
225- raise NotImplementedError
226-
227- def networkStream (self , receiver ):
228- raise NotImplementedError
229-
230- def queueStream (self , queue , oneAtATime = True , defaultRDD = None ):
231- raise NotImplementedError
232-
233- def rawSocketStream (self , hostname , port , storagelevel ):
234- raise NotImplementedError
235-
236- def remember (self , duration ):
237- raise NotImplementedError
238-
239- def socketStream (hostname , port , converter ,storageLevel ):
240- raise NotImplementedError
241-
242- def start (self ):
243- self ._jssc .start ()
244-
245- def stop (self , stopSparkContext = True ):
246- raise NotImplementedError
247-
248- def textFileStream (self , directory ):
249- return DStream (self ._jssc .textFileStream (directory ), self , UTF8Deserializer ())
250-
251- def transform (self , seq ):
252- raise NotImplementedError
253-
254- def union (self , seq ):
255- raise NotImplementedError
256-
257- >> >> >> > initial commit for pySparkStreaming
0 commit comments