diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c9f6e571ddab..c42e6c3257fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -340,7 +340,7 @@ case class LoadDataCommand( uri } else { val uri = new URI(path) - if (uri.getScheme() != null && uri.getAuthority() != null) { + val hdfsUri = if (uri.getScheme() != null && uri.getAuthority() != null) { uri } else { // Follow Hive's behavior: @@ -380,6 +380,13 @@ case class LoadDataCommand( } new URI(scheme, authority, absolutePath, uri.getQuery(), uri.getFragment()) } + val hadoopConf = sparkSession.sessionState.newHadoopConf() + val srcPath = new Path(hdfsUri) + val fs = srcPath.getFileSystem(hadoopConf) + if (!fs.exists(srcPath)) { + throw new AnalysisException(s"LOAD DATA input path does not exist: $path") + } + hdfsUri } if (partition.nonEmpty) { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 9063ef066aa8..6c11905ba890 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2141,4 +2141,13 @@ class HiveDDLSuite } } } + + test("load command for non local invalid path validation") { + withTable("tbl") { + sql("CREATE TABLE tbl(i INT, j STRING)") + val e = intercept[AnalysisException]( + sql("load data inpath '/doesnotexist.csv' into table tbl")) + assert(e.message.contains("LOAD DATA input path does not exist")) + } + } }