Skip to content

Commit 9d032d0

Browse files
windpigergatorsmile
authored andcommitted
[SPARK-19329][SQL][BRANCH-2.1] Reading from or writing to a datasource table with a non pre-existing location should succeed
## What changes were proposed in this pull request? This is a backport pr of #16672 into branch-2.1. ## How was this patch tested? Existing tests. Author: windpiger <[email protected]> Closes #17317 from windpiger/backport-insertnotexists.
1 parent 0622546 commit 9d032d0

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
299299
options = table.storage.properties ++ pathOption)
300300

301301
LogicalRelation(
302-
dataSource.resolveRelation(),
302+
dataSource.resolveRelation(checkFilesExist = false),
303303
expectedOutputAttributes = Some(simpleCatalogRelation.output),
304304
catalogTable = Some(table))
305305
}

sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,4 +1760,122 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
17601760
val rows: Seq[Row] = df.toLocalIterator().asScala.toSeq
17611761
assert(rows.length > 0)
17621762
}
1763+
1764+
test("insert data to a data source table which has a not existed location should succeed") {
1765+
withTable("t") {
1766+
withTempDir { dir =>
1767+
spark.sql(
1768+
s"""
1769+
|CREATE TABLE t(a string, b int)
1770+
|USING parquet
1771+
|OPTIONS(path "$dir")
1772+
""".stripMargin)
1773+
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
1774+
val expectedPath = dir.getAbsolutePath
1775+
assert(table.location == expectedPath)
1776+
1777+
dir.delete
1778+
val tableLocFile = new File(table.location.stripPrefix("file:"))
1779+
assert(!tableLocFile.exists)
1780+
spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
1781+
assert(tableLocFile.exists)
1782+
checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
1783+
1784+
Utils.deleteRecursively(dir)
1785+
assert(!tableLocFile.exists)
1786+
spark.sql("INSERT OVERWRITE TABLE t SELECT 'c', 1")
1787+
assert(tableLocFile.exists)
1788+
checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
1789+
1790+
val newDir = new File(dir, "x")
1791+
spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
1792+
spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
1793+
1794+
val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
1795+
assert(table1.location == newDir.getAbsolutePath)
1796+
assert(!newDir.exists)
1797+
1798+
spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
1799+
assert(newDir.exists)
1800+
checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
1801+
}
1802+
}
1803+
}
1804+
1805+
test("insert into a data source table with no existed partition location should succeed") {
1806+
withTable("t") {
1807+
withTempDir { dir =>
1808+
spark.sql(
1809+
s"""
1810+
|CREATE TABLE t(a int, b int, c int, d int)
1811+
|USING parquet
1812+
|OPTIONS(path '$dir')
1813+
|PARTITIONED BY(a, b)
1814+
""".stripMargin)
1815+
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
1816+
val expectedPath = dir.getAbsolutePath
1817+
assert(table.location == expectedPath)
1818+
1819+
spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
1820+
checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
1821+
1822+
val partLoc = new File(dir, "a=1")
1823+
Utils.deleteRecursively(partLoc)
1824+
assert(!partLoc.exists())
1825+
// insert overwrite into a partition which location has been deleted.
1826+
spark.sql("INSERT OVERWRITE TABLE t PARTITION(a=1, b=2) SELECT 7, 8")
1827+
assert(partLoc.exists())
1828+
checkAnswer(spark.table("t"), Row(7, 8, 1, 2) :: Nil)
1829+
}
1830+
}
1831+
}
1832+
1833+
test("read data from a data source table which has a not existed location should succeed") {
1834+
withTable("t") {
1835+
withTempDir { dir =>
1836+
spark.sql(
1837+
s"""
1838+
|CREATE TABLE t(a string, b int)
1839+
|USING parquet
1840+
|OPTIONS(path "$dir")
1841+
""".stripMargin)
1842+
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
1843+
val expectedPath = dir.getAbsolutePath
1844+
assert(table.location == expectedPath)
1845+
1846+
dir.delete()
1847+
checkAnswer(spark.table("t"), Nil)
1848+
1849+
val newDir = new File(dir, "x")
1850+
spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
1851+
1852+
val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
1853+
assert(table1.location == newDir.getAbsolutePath)
1854+
assert(!newDir.exists())
1855+
checkAnswer(spark.table("t"), Nil)
1856+
}
1857+
}
1858+
}
1859+
1860+
test("read data from a data source table with no existed partition location should succeed") {
1861+
withTable("t") {
1862+
withTempDir { dir =>
1863+
spark.sql(
1864+
s"""
1865+
|CREATE TABLE t(a int, b int, c int, d int)
1866+
|USING parquet
1867+
|OPTIONS(path "$dir")
1868+
|PARTITIONED BY(a, b)
1869+
""".stripMargin)
1870+
spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
1871+
checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
1872+
1873+
// select from a partition which location has been deleted.
1874+
Utils.deleteRecursively(dir)
1875+
assert(!dir.exists())
1876+
spark.sql("REFRESH TABLE t")
1877+
checkAnswer(spark.sql("select * from t where a=1 and b=2"), Nil)
1878+
}
1879+
}
1880+
}
17631881
}

0 commit comments

Comments
 (0)