manishamde
diff --git a/‎bin/spark-submit‎
Lines changed: 11 additions & 0 deletions b/‎bin/spark-submit‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions b/‎core/pom.xml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/src/main/python/streaming/wordcount.py‎
Lines changed: 16 additions & 0 deletions b/‎examples/src/main/python/streaming/wordcount.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎python/pyspark/java_gateway.py‎
Lines changed: 6 additions & 0 deletions b/‎python/pyspark/java_gateway.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎python/pyspark/streaming/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎python/pyspark/streaming/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/pyspark/streaming/context.py‎
Lines changed: 99 additions & 0 deletions b/‎python/pyspark/streaming/context.py‎
Lines changed: 99 additions & 0 deletions
@@ -48,6 +48,7 @@ export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PR
 # paths, library paths, java options and memory early on. Otherwise, it will
 # be too late by the time the driver JVM has started.
 
+<<<<<<< HEAD
 if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
   # Parse the properties file only if the special configs exist
   contains_special_configs=$(
@@ -57,6 +58,16 @@ if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FI
   if [ -n "$contains_special_configs" ]; then
     export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
   fi
+=======
+# Figure out which Python executable to use
+if [[ -z "$PYSPARK_PYTHON" ]]; then
+  PYSPARK_PYTHON="python"
+fi
+export PYSPARK_PYTHON
+
+if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
+  export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
+>>>>>>> initial commit for pySparkStreaming
 fi
 
 exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
 
@@ -21,7 +21,11 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
+<<<<<<< HEAD
     <version>1.2.0-SNAPSHOT</version>
+=======
+    <version>1.0.0</version>
+>>>>>>> initial commit for pySparkStreaming
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -1,14 +1,18 @@
 import sys
 from operator import add
 
+<<<<<<< HEAD
 from pyspark.conf import SparkConf
+=======
+>>>>>>> initial commit for pySparkStreaming
 from pyspark.streaming.context import StreamingContext
 from pyspark.streaming.duration import *
 
 if __name__ == "__main__":
     if len(sys.argv) != 2:
         print >> sys.stderr, "Usage: wordcount <directory>"
         exit(-1)
+<<<<<<< HEAD
     conf = SparkConf()
     conf.setAppName("PythonStreamingWordCount")
 
@@ -20,5 +24,17 @@
     count = mapped_words.reduceByKey(add)
 
     count.pyprint()
+=======
+    ssc = StreamingContext(appName="PythonStreamingWordCount", duration=Seconds(1))
+
+    lines = ssc.textFileStream(sys.argv[1])
+    fm_lines = lines.flatMap(lambda x: x.split(" "))
+    filtered_lines = fm_lines.filter(lambda line: "Spark" in line)
+    mapped_lines = fm_lines.map(lambda x: (x, 1))
+    
+    fm_lines.pyprint()
+    filtered_lines.pyprint()
+    mapped_lines.pyprint()
+>>>>>>> initial commit for pySparkStreaming
     ssc.start()
     ssc.awaitTermination()
@@ -108,10 +108,16 @@ def run(self):
     java_import(gateway.jvm, "org.apache.spark.SparkConf")
     java_import(gateway.jvm, "org.apache.spark.api.java.*")
     java_import(gateway.jvm, "org.apache.spark.api.python.*")
+<<<<<<< HEAD
     java_import(gateway.jvm, "org.apache.spark.streaming.*")  # do we need this?
     java_import(gateway.jvm, "org.apache.spark.streaming.api.java.*")
     java_import(gateway.jvm, "org.apache.spark.streaming.api.python.*")
     java_import(gateway.jvm, "org.apache.spark.streaming.dstream.*")  # do we need this?
+=======
+    java_import(gateway.jvm, "org.apache.spark.streaming.*")
+    java_import(gateway.jvm, "org.apache.spark.streaming.api.java.*")
+    java_import(gateway.jvm, "org.apache.spark.streaming.api.python.*")
+>>>>>>> initial commit for pySparkStreaming
     java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
     java_import(gateway.jvm, "org.apache.spark.sql.SQLContext")
     java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
 
@@ -0,0 +1 @@
+__author__ = 'ktakagiw'
@@ -1,3 +1,9 @@
+<<<<<<< HEAD
+=======
+__author__ = 'ktakagiw'
+
+
+>>>>>>> initial commit for pySparkStreaming
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -15,6 +21,7 @@
 # limitations under the License.
 #
 
+<<<<<<< HEAD
 import sys
 from signal import signal, SIGTERM, SIGINT
 
@@ -29,12 +36,43 @@ class StreamingContext(object):
     """
     Main entry point for Spark Streaming functionality. A StreamingContext represents the
     connection to a Spark cluster, and can be used to create L{DStream}s and
+=======
+import os
+import shutil
+import sys
+from threading import Lock
+from tempfile import NamedTemporaryFile
+
+from pyspark import accumulators
+from pyspark.accumulators import Accumulator
+from pyspark.broadcast import Broadcast
+from pyspark.conf import SparkConf
+from pyspark.files import SparkFiles
+from pyspark.java_gateway import launch_gateway
+from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.rdd import RDD
+from pyspark.context import SparkContext
+
+from py4j.java_collections import ListConverter
+
+from pyspark.streaming.dstream import DStream
+
+class StreamingContext(object):
+    """
+    Main entry point for Spark functionality. A StreamingContext represents the
+    connection to a Spark cluster, and can be used to create L{RDD}s and
+>>>>>>> initial commit for pySparkStreaming
     broadcast variables on that cluster.
     """
 
     def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None,
+<<<<<<< HEAD
         gateway=None, sparkContext=None, duration=None):
+=======
+        gateway=None, duration=None):
+>>>>>>> initial commit for pySparkStreaming
         """
         Create a new StreamingContext. At least the master and app name and duration
         should be set, either through the named parameters here or through C{conf}.
@@ -55,6 +93,7 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         @param conf: A L{SparkConf} object setting Spark properties.
         @param gateway: Use an existing gateway and JVM, otherwise a new JVM
                will be instatiated.
+<<<<<<< HEAD
         @param sparkContext: L{SparkContext} object.
         @param duration: A L{Duration} object for SparkStreaming.
 
@@ -73,13 +112,23 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         # is started in StreamingContext.
         SparkContext._gateway.restart_callback_server()
         self._clean_up_trigger()
+=======
+        @param duration: A L{Duration} Duration for SparkStreaming
+
+        """
+        # Create the Python Sparkcontext
+        self._sc = SparkContext(master=master, appName=appName, sparkHome=sparkHome,
+                        pyFiles=pyFiles, environment=environment, batchSize=batchSize,
+                        serializer=serializer, conf=conf, gateway=gateway)
+>>>>>>> initial commit for pySparkStreaming
         self._jvm = self._sc._jvm
         self._jssc = self._initialize_context(self._sc._jsc, duration._jduration)
 
     # Initialize StremaingContext in function to allow subclass specific initialization
     def _initialize_context(self, jspark_context, jduration):
         return self._jvm.JavaStreamingContext(jspark_context, jduration)
 
+<<<<<<< HEAD
     def _clean_up_trigger(self):
         """Kill py4j callback server properly using signal lib"""
 
@@ -156,3 +205,53 @@ def _testInputStream(self, test_inputs, numSlices=None):
         jinput_stream = self._jvm.PythonTestInputStream(self._jssc, jtest_rdds).asJavaDStream()
 
         return DStream(jinput_stream, self, test_rdd_deserializers[0])
+=======
+    def actorStream(self, props, name, storageLevel, supervisorStrategy):
+        raise NotImplementedError
+
+    def addStreamingListener(self, streamingListener):
+        raise NotImplementedError
+
+    def awaitTermination(self, timeout=None):
+        if timeout:
+            self._jssc.awaitTermination(timeout)
+        else:
+            self._jssc.awaitTermination()
+
+    def checkpoint(self, directory):
+        raise NotImplementedError
+
+    def fileStream(self, directory, filter=None, newFilesOnly=None):
+        raise NotImplementedError
+
+    def networkStream(self, receiver):
+        raise NotImplementedError
+
+    def queueStream(self, queue, oneAtATime=True, defaultRDD=None):
+        raise NotImplementedError
+
+    def rawSocketStream(self, hostname, port, storagelevel):
+        raise NotImplementedError
+
+    def remember(self, duration):
+        raise NotImplementedError
+
+    def socketStream(hostname, port, converter,storageLevel):
+        raise NotImplementedError
+
+    def start(self):
+        self._jssc.start()
+
+    def stop(self, stopSparkContext=True):
+        raise NotImplementedError
+
+    def textFileStream(self, directory):
+        return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer())
+
+    def transform(self, seq):
+        raise NotImplementedError
+
+    def union(self, seq):
+        raise NotImplementedError
+
+>>>>>>> initial commit for pySparkStreaming