-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18817] [SPARKR] [SQL] Set default warehouse dir to tempdir #16290
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
ac05089
2583410
1d0d1d2
014d7e1
6eec97d
f7b4772
047054e
b14c302
7a98b91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,6 +72,20 @@ test_that("repeatedly starting and stopping SparkSession", { | |
| } | ||
| }) | ||
|
|
||
| test_that("Default warehouse dir should be set to tempdir", { | ||
| sparkR.session.stop() | ||
| sparkR.session(enableHiveSupport = FALSE) | ||
|
|
||
| # Create a temporary table | ||
| sql("CREATE TABLE people_warehouse_test") | ||
| # spark-warehouse should be written only tempdir() and not current working directory | ||
| res <- list.files(path = ".", pattern = ".*spark-warehouse.*", | ||
|
||
| recursive = TRUE, include.dirs = TRUE) | ||
| expect_equal(length(res), 0) | ||
| result <- sql("DROP TABLE people_warehouse_test") | ||
| sparkR.session.stop() | ||
| }) | ||
|
|
||
| test_that("rdd GC across sparkR.stop", { | ||
| sc <- sparkR.sparkContext() # sc should get id 0 | ||
| rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -819,7 +819,13 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { | |
|
|
||
| def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) | ||
|
|
||
| def warehousePath: String = new Path(getConf(StaticSQLConf.WAREHOUSE_PATH)).toString | ||
| def warehousePath: String = { | ||
| if (contains(StaticSQLConf.WAREHOUSE_PATH.key)) { | ||
|
||
| new Path(getConf(StaticSQLConf.WAREHOUSE_PATH).get).toString | ||
| } else { | ||
| new Path(getConf(StaticSQLConf.DEFAULT_WAREHOUSE_PATH)).toString | ||
| } | ||
| } | ||
|
|
||
| def ignoreCorruptFiles: Boolean = getConf(IGNORE_CORRUPT_FILES) | ||
|
|
||
|
|
@@ -964,11 +970,17 @@ object StaticSQLConf { | |
| } | ||
| } | ||
|
|
||
| val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir") | ||
| .doc("The default location for managed databases and tables.") | ||
| val DEFAULT_WAREHOUSE_PATH = buildConf("spark.sql.default.warehouse.dir") | ||
|
||
| .doc("The default location for managed databases and tables. " + | ||
| "Used if spark.sql.warehouse.dir is not set") | ||
| .stringConf | ||
| .createWithDefault(Utils.resolveURI("spark-warehouse").toString) | ||
|
|
||
| val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir") | ||
| .doc("The location for managed databases and tables.") | ||
|
||
| .stringConf | ||
| .createOptional | ||
|
|
||
| val CATALOG_IMPLEMENTATION = buildConf("spark.sql.catalogImplementation") | ||
| .internal() | ||
| .stringConf | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,14 +55,19 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { | |
| s"is set. Setting ${WAREHOUSE_PATH.key} to the value of " + | ||
| s"hive.metastore.warehouse.dir ('$hiveWarehouseDir').") | ||
| hiveWarehouseDir | ||
| } else { | ||
| } else if (sparkContext.conf.contains(WAREHOUSE_PATH.key) && | ||
| sparkContext.conf.get(WAREHOUSE_PATH).isDefined) { | ||
|
||
| // If spark.sql.warehouse.dir is set, we will override hive.metastore.warehouse.dir using | ||
| // the value of spark.sql.warehouse.dir. | ||
| // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set, | ||
| // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir. | ||
| val sparkWarehouseDir = sparkContext.conf.get(WAREHOUSE_PATH) | ||
| val sparkWarehouseDir = sparkContext.conf.get(WAREHOUSE_PATH).get | ||
| sparkContext.conf.set("hive.metastore.warehouse.dir", sparkWarehouseDir) | ||
| sparkWarehouseDir | ||
| } else { | ||
| // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set, | ||
| // we will set hive.metastore.warehouse.dir to the value of spark.sql.default.warehouse.dir. | ||
| val sparkDefaultWarehouseDir = sparkContext.conf.get(DEFAULT_WAREHOUSE_PATH) | ||
| sparkContext.conf.set("hive.metastore.warehouse.dir", sparkDefaultWarehouseDir) | ||
| sparkDefaultWarehouseDir | ||
| } | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -221,6 +221,19 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { | |
| .sessionState.conf.warehousePath.stripSuffix("/")) | ||
| } | ||
|
|
||
| test("changing default value of warehouse path") { | ||
|
||
| try { | ||
| val newWarehouseDefault = "spark-warehouse2" | ||
| val newWarehouseDefaultPath = new Path(Utils.resolveURI(newWarehouseDefault)).toString | ||
| sparkContext.conf.set("spark.sql.default.warehouse.dir", newWarehouseDefaultPath) | ||
| val spark = new SparkSession(sparkContext) | ||
| assert(newWarehouseDefaultPath.stripSuffix("/") === spark | ||
| .sessionState.conf.warehousePath.stripSuffix("/")) | ||
|
||
| } finally { | ||
| sparkContext.conf.remove("spark.sql.default.warehouse.dir") | ||
| } | ||
| } | ||
|
|
||
| test("MAX_CASES_BRANCHES") { | ||
| withTable("tab1") { | ||
| spark.range(10).write.saveAsTable("tab1") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After rethinking it, we might not need to add an extra sql conf. We just need to know whether the value of
spark.sql.warehouse.diris from the users or the original default. If it is the default, R can simply change it.Maybe it is a good to-have feature for users to know whether the SQLConf value is from users or from the default. cc @cloud-fan
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually we can,
SessionState.conf.settingscontains all the user-setted entries.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see - I will make try to use
SessionStateand see if it can avoid having to create a new option