diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 0b1cec2df830..a8f732b11f6c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -85,6 +85,7 @@ object PythonRunner { // pass conf spark.pyspark.python to python process, the only way to pass info to // python process is through environment variable. sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _)) + sys.env.get("PYTHONHASHSEED").foreach(env.put("PYTHONHASHSEED", _)) builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize try { val process = builder.start() diff --git a/python/pyspark/context.py b/python/pyspark/context.py index ac4b2b035f5c..2961cda553d6 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -173,10 +173,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, if k.startswith("spark.executorEnv."): varName = k[len("spark.executorEnv."):] self.environment[varName] = v - if sys.version >= '3.3' and 'PYTHONHASHSEED' not in os.environ: - # disable randomness of hash of string in worker, if this is not - # launched by spark-submit - self.environment["PYTHONHASHSEED"] = "0" + + self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0") # Create the Java SparkContext through Py4J self._jsc = jsc or self._initialize_context(self._conf._jconf) diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index b384b2b50733..a5e6e2b05496 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -68,7 +68,8 @@ def portable_hash(x): >>> portable_hash((None, 1)) & 0xffffffff 219750521 """ - if sys.version >= '3.3' and 'PYTHONHASHSEED' not in os.environ: + + if sys.version_info >= (3, 2, 3) and 'PYTHONHASHSEED' not in os.environ: raise Exception("Randomness of hash of string should be disabled via PYTHONHASHSEED") if x is None: diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index a00234c2b416..6f3e63a0a10f 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -813,6 +813,7 @@ private[spark] class Client( sys.env.get(envname).foreach(env(envname) = _) } } + sys.env.get("PYTHONHASHSEED").foreach(env.put("PYTHONHASHSEED", _)) } sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>