From c3d9dea16e1a32f7a2c825d807e6b30d763d08c1 Mon Sep 17 00:00:00 2001 From: guojh Date: Tue, 30 Oct 2018 15:34:15 +0800 Subject: [PATCH 1/2] user set's hadoop conf should not overwrite by sparkcontext's conf --- .../main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 4cc0063d010ef..b09fdd4869b50 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -468,7 +468,10 @@ object SparkHadoopUtil { private def appendSparkHadoopConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = { // Copy any "spark.hadoop.foo=bar" spark properties into conf as "foo=bar" for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) { - hadoopConf.set(key.substring("spark.hadoop.".length), value) + val hadoopKey = key.substring("spark.hadoop.".length) + if (hadoopConf.get(hadoopKey) == null) { + hadoopConf.set(hadoopKey, value) + } } } } From 58d3d0b85c2c0ef253268efcf5287e4bf125aaf1 Mon Sep 17 00:00:00 2001 From: guojh Date: Tue, 27 Nov 2018 15:38:56 +0800 Subject: [PATCH 2/2] fixed the review issues --- .../scala/org/apache/spark/deploy/SparkHadoopUtil.scala | 5 +---- .../main/scala/org/apache/spark/sql/hive/TableReader.scala | 6 ++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index b09fdd4869b50..4cc0063d010ef 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -468,10 +468,7 @@ object SparkHadoopUtil { private def appendSparkHadoopConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = { // Copy any "spark.hadoop.foo=bar" spark properties into conf as "foo=bar" for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) { - val hadoopKey = key.substring("spark.hadoop.".length) - if (hadoopConf.get(hadoopKey) == null) { - hadoopConf.set(hadoopKey, value) - } + hadoopConf.set(key.substring("spark.hadoop.".length), value) } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index 9443fbb4330a5..8eb26fc1e1afd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -85,6 +85,12 @@ class HadoopTableReader( SparkHadoopUtil.get.appendS3AndSparkHadoopConfigurations( sparkSession.sparkContext.conf, hadoopConf) + // User's configuration that through setCommand should update the hadoopConf + // but the key must start with 'spark.hadoop.' + for ((key, value) <- sparkSession.conf.getAll if key.startsWith("spark.hadoop.")) { + hadoopConf.set(key.substring("spark.hadoop.".length), value) + } + private val _broadcastedHadoopConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))