Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -125,18 +125,23 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
private def isParquetProperty(key: String) =
key.startsWith("parquet.") || key.contains(".parquet.")

private def isRecursiveFileLookupProperty(key: String) =
key.equalsIgnoreCase("recursiveFileLookup")

def convert(relation: HiveTableRelation): LogicalRelation = {
val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)

// Consider table and storage properties. For properties existing in both sides, storage
// properties will supersede table properties.
if (serde.contains("parquet")) {
val options = relation.tableMeta.properties.filterKeys(isParquetProperty) ++
val options = relation.tableMeta.properties.filterKeys(p =>
isParquetProperty(p) || isRecursiveFileLookupProperty(p)) ++
relation.tableMeta.storage.properties + (ParquetOptions.MERGE_SCHEMA ->
SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString)
convertToLogicalRelation(relation, options, classOf[ParquetFileFormat], "parquet")
} else {
val options = relation.tableMeta.properties.filterKeys(isOrcProperty) ++
val options = relation.tableMeta.properties.filterKeys(p =>
isOrcProperty(p) || isRecursiveFileLookupProperty(p)) ++
relation.tableMeta.storage.properties
if (SQLConf.get.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") {
convertToLogicalRelation(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,4 +358,30 @@ class DataSourceWithHiveMetastoreCatalogSuite
Seq(table("src").count().toString))
}
}

test("SPARK-29899: Recursively load data in table via TBLPROPERTIES") {
withTempPath(dir => {
val baseDir = s"${dir.getCanonicalFile.toURI.toString}/path1"
val innerDir = s"$baseDir/path2/path3"
spark.range(3).selectExpr("id").write.parquet(innerDir)
withTable("test1", "test2") {
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
spark.sql(
s"""
|CREATE TABLE test1 (id bigint)
|STORED AS PARQUET LOCATION '$baseDir'
|TBLPROPERTIES (
| 'recursiveFileLookup'='true')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry to ask tangential questions, but I'm curious: Will the Metastore track this property somehow? i.e. If I create a table with 'recursiveFileLookup'='true' using Spark, can I query it from Presto and see the same data, provided that both are pointed at the same Metastore? Will the Metastore just track the table property, or will it also track the list of data paths that were detected when the table was created or refreshed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks to point me this. Maybe 'spark.recursiveFileLookup' is much more meaningful for user.

|""".stripMargin)
checkAnswer(spark.table("test1"), Seq(Row(0), Row(1), Row(2)))
spark.sql(
s"""
|CREATE TABLE test2 (id bigint)
|STORED AS PARQUET LOCATION '$baseDir'
|""".stripMargin)
checkAnswer(spark.table("test2"), Seq())
}
}
})
}
}