From d6cdde748fb25e51f7f100a9bbd48a1d17547ccf Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 5 Dec 2017 17:19:58 -0800 Subject: [PATCH 1/4] [SPARK-20728][SQL][FOLLOWUP] Use a actionable exception message --- .../org/apache/spark/sql/execution/datasources/DataSource.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 5f12d5f93a35..03ca760e527a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -602,7 +602,7 @@ object DataSource extends Logging { provider1.startsWith("org.apache.spark.sql.hive.orc")) { throw new AnalysisException( "Hive-based ORC data source must be used with Hive support enabled. " + - "Please use native ORC data source instead") + "Please use native ORC data source instead by `SET spark.sql.orc.impl=native`") } else if (provider1.toLowerCase(Locale.ROOT) == "avro" || provider1 == "com.databricks.spark.avro") { throw new AnalysisException( From ba1e3ae39116c53a8f0d1ed7c71f737fa2e9d0b6 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 5 Dec 2017 19:47:53 -0800 Subject: [PATCH 2/4] Address comments --- .../apache/spark/sql/execution/datasources/DataSource.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 03ca760e527a..9a04c2ef799e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -602,7 +602,8 @@ object DataSource extends Logging { provider1.startsWith("org.apache.spark.sql.hive.orc")) { throw new AnalysisException( "Hive-based ORC data source must be used with Hive support enabled. " + - "Please use native ORC data source instead by `SET spark.sql.orc.impl=native`") + "Please use native ORC data source instead by setting 'spark.sql.orc.impl' " + + "configuration to 'native'") } else if (provider1.toLowerCase(Locale.ROOT) == "avro" || provider1 == "com.databricks.spark.avro") { throw new AnalysisException( From d7d268d19e69b1477e2f3ca9cc8d3f1221d3663c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 5 Dec 2017 20:43:32 -0800 Subject: [PATCH 3/4] Address comments --- .../apache/spark/sql/execution/datasources/DataSource.scala | 6 +++--- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 9a04c2ef799e..c2c4d8067fb2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -601,9 +601,9 @@ object DataSource extends Logging { if (provider1.toLowerCase(Locale.ROOT) == "orc" || provider1.startsWith("org.apache.spark.sql.hive.orc")) { throw new AnalysisException( - "Hive-based ORC data source must be used with Hive support enabled. " + - "Please use native ORC data source instead by setting 'spark.sql.orc.impl' " + - "configuration to 'native'") + "Hive built-in ORC data source must be used with Hive support enabled. " + + "Please use the native ORC data source by setting 'spark.sql.orc.impl' to " + + "'native'") } else if (provider1.toLowerCase(Locale.ROOT) == "avro" || provider1 == "com.databricks.spark.avro") { throw new AnalysisException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 86bd9b95bca6..8ddddbeee598 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1666,7 +1666,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { e = intercept[AnalysisException] { sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`") } - assert(e.message.contains("Hive-based ORC data source must be used with Hive support")) + assert(e.message.contains("Hive built-in ORC data source must be used with Hive support")) e = intercept[AnalysisException] { sql(s"select id from `com.databricks.spark.avro`.`file_path`") @@ -2790,7 +2790,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val e = intercept[AnalysisException] { sql("CREATE TABLE spark_20728(a INT) USING ORC") } - assert(e.message.contains("Hive-based ORC data source must be used with Hive support")) + assert(e.message.contains("Hive built-in ORC data source must be used with Hive support")) } withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> "native") { From 9809af87cca925bb2ecc90c6c71ab5337695bf28 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 6 Dec 2017 00:30:40 -0800 Subject: [PATCH 4/4] Remove invalid condition --- .../apache/spark/sql/execution/datasources/DataSource.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index c2c4d8067fb2..b676672b38cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -598,8 +598,7 @@ object DataSource extends Logging { // Found the data source using fully qualified path dataSource case Failure(error) => - if (provider1.toLowerCase(Locale.ROOT) == "orc" || - provider1.startsWith("org.apache.spark.sql.hive.orc")) { + if (provider1.startsWith("org.apache.spark.sql.hive.orc")) { throw new AnalysisException( "Hive built-in ORC data source must be used with Hive support enabled. " + "Please use the native ORC data source by setting 'spark.sql.orc.impl' to " +