Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
expect_error(read.df(source = "json"),
paste("Error in load : analysis error - Unable to infer schema for JSON.",
"It must be specified manually"))
expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist")
expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")
expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist")
expect_error(read.df("arbitrary_path"),
"Error in load : analysis error - \\[PATH_NOT_FOUND\\].*")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon @srielau @cloud-fan Are you ok with such changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: the arbitrary_path here is different per test environments, so I use regexp for the path string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks man

expect_error(read.json("arbitrary_path"),
"Error in json : analysis error - \\[PATH_NOT_FOUND\\].*")
expect_error(read.text("arbitrary_path"),
"Error in text : analysis error - \\[PATH_NOT_FOUND\\].*")
expect_error(read.orc("arbitrary_path"),
"Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*")
expect_error(read.parquet("arbitrary_path"),
"Error in parquet : analysis error - Path does not exist")
"Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*")

# Arguments checking in R side.
expect_error(read.df(path = c(3)),
Expand Down
10 changes: 5 additions & 5 deletions core/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,11 @@
],
"sqlState" : "42000"
},
"PATH_NOT_FOUND" : {
"message" : [
"Path does not exist: <path>."
]
},
"PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
"message" : [
"Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>"
Expand Down Expand Up @@ -2221,11 +2226,6 @@
"Unable to infer schema for <format>. It must be specified manually."
]
},
"_LEGACY_ERROR_TEMP_1130" : {
"message" : [
"Path does not exist: <path>."
]
},
"_LEGACY_ERROR_TEMP_1131" : {
"message" : [
"Data source <className> does not support <outputMode> output mode."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {

def dataPathNotExistError(path: String): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1130",
errorClass = "PATH_NOT_FOUND",
messageParameters = Map("path" -> path))
}

Expand Down
37 changes: 21 additions & 16 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2322,39 +2322,44 @@ class DataFrameSuite extends QueryTest
test("SPARK-13774: Check error message for non existent path without globbed paths") {
val uuid = UUID.randomUUID().toString
val baseDir = Utils.createTempDir()
try {
val e = intercept[AnalysisException] {
checkError(
exception = intercept[AnalysisException] {
spark.read.format("csv").load(
new File(baseDir, "file").getAbsolutePath,
new File(baseDir, "file2").getAbsolutePath,
new File(uuid, "file3").getAbsolutePath,
uuid).rdd
}
assert(e.getMessage.startsWith("Path does not exist"))
} finally {

}

},
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> "file:.*"),
matchPVals = true
)
}

test("SPARK-13774: Check error message for not existent globbed paths") {
// Non-existent initial path component:
val nonExistentBasePath = "/" + UUID.randomUUID().toString
assert(!new File(nonExistentBasePath).exists())
val e = intercept[AnalysisException] {
spark.read.format("text").load(s"$nonExistentBasePath/*")
}
assert(e.getMessage.startsWith("Path does not exist"))
checkError(
exception = intercept[AnalysisException] {
spark.read.format("text").load(s"$nonExistentBasePath/*")
},
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> s"file:$nonExistentBasePath/*")
)

// Existent initial path component, but no matching files:
val baseDir = Utils.createTempDir()
val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
assert(childDir.exists())
try {
val e1 = intercept[AnalysisException] {
spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
}
assert(e1.getMessage.startsWith("Path does not exist"))
checkError(
exception = intercept[AnalysisException] {
spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
},
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
)
} finally {
Utils.deleteRecursively(baseDir)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
}

test("test non existent paths") {
assertThrows[AnalysisException](
DataSource.checkAndGlobPathIfNecessary(
Seq(
path1.toString,
path2.toString,
nonExistentPath.toString
),
hadoopConf,
checkEmptyGlobPath = true,
checkFilesExist = true,
enableGlobbing = true
)
checkError(
exception = intercept[AnalysisException](
DataSource.checkAndGlobPathIfNecessary(
Seq(
path1.toString,
path2.toString,
nonExistentPath.toString
),
hadoopConf,
checkEmptyGlobPath = true,
checkFilesExist = true,
enableGlobbing = true
)
),
errorClass = "PATH_NOT_FOUND",
parameters = Map("path" -> nonExistentPath.toString)
)
}

Expand Down