apache · HyukjinKwon · Sep 24, 2016 · Sep 24, 2016 · Sep 25, 2016 · Sep 26, 2016
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -2625,10 +2625,10 @@ setMethod("write.df",
           signature(df = "SparkDataFrame"),
           function(df, path = NULL, source = NULL, mode = "error", ...) {
             if (!is.null(path) && !is.character(path)) {
-              stop("path should be charactor, null or omitted.")
+              stop("path should be charactor, NULL or omitted.")
             }
             if (!is.null(source) && !is.character(source)) {
-              stop("source should be character, null or omitted. It is the datasource specified ",
+              stop("source should be character, NULL or omitted. It is the datasource specified ",
                    "in 'spark.sql.sources.default' configuration by default.")
             }
             if (!is.character(mode)) {
@@ -2646,7 +2646,7 @@ setMethod("write.df",
             write <- callJMethod(write, "format", source)
             write <- callJMethod(write, "mode", jmode)
             write <- callJMethod(write, "options", options)
-            write <- tryCatch(callJMethod(write, "save"), error = captureJVMException)
+            write <- handledCallJMethod(write, "save")
           })
 
 #' @rdname write.df

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -772,10 +772,10 @@ dropTempView <- function(viewName) {
 #' @note read.df since 1.4.0
 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
   if (!is.null(path) && !is.character(path)) {
-    stop("path should be charactor, null or omitted.")
+    stop("path should be charactor, NULL or omitted.")
   }
   if (!is.null(source) && !is.character(source)) {
-    stop("source should be character, null or omitted. It is the datasource specified ",
+    stop("source should be character, NULL or omitted. It is the datasource specified ",
          "in 'spark.sql.sources.default' configuration by default.")
   }
   sparkSession <- getSparkSession()
@@ -791,13 +791,11 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string
   }
   if (!is.null(schema)) {
     stopifnot(class(schema) == "structType")
-    sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
-                                source, schema$jobj, options),
-                    error = captureJVMException)
+    sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
+                              source, schema$jobj, options)
   } else {
-    sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
-                                source, options),
-                    error = captureJVMException)
+    sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession,
+                              source, options)
   }
   dataFrame(sdf)
 }

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
@@ -698,18 +698,55 @@ isSparkRShell <- function() {
   grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
 }
 
-captureJVMException <- function(e) {
-  stacktrace <- as.character(e)
+# Works identically with `callJStatic(...)` but throws a pretty formatted exception.
+handledCallJStatic <- function(cls, method, ...) {
+  result <- tryCatch(callJStatic(cls, method, ...),
+                     error = function(e) {
+                       captureJVMException(e, method)
+                     })
+  result
+}
+
+# Works identically with `callJMethod(...)` but throws a pretty formatted exception.
+handledCallJMethod <- function(obj, method, ...) {
+  result <- tryCatch(callJMethod(obj, method, ...),
+                     error = function(e) {
+                       captureJVMException(e, method)
+                     })
+  result
+}
+
+captureJVMException <- function(e, method) {
+  rawmsg <- as.character(e)
+  if (any(grep("^Error in .*?: ", rawmsg))) {
+    # If the exception message starts with "Error in ...", this is possibly
+    # "Error in invokeJava(...)". Here, it replaces the characters to
+    # `paste("Error in", method, ":")` in order to identify which function
+    # was called in JVM side.
+    stacktrace <- strsplit(rawmsg, "Error in .*?: ")[[1]]
+    rmsg <- paste("Error in", method, ":")
+    stacktrace <- paste(rmsg[1], stacktrace[2])
+  } else {
+    # Otherwise, do not convert the error message just in case.
+    stacktrace <- rawmsg
+  }
+
   if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) {
-    msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]][2]
-    first <- strsplit(msg, "\r?\n\tat")[[1]][1]
-    stop(first)
+    msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]]
+    # Extract "Error in ..." message.
+    rmsg <- msg[1]
+    # Extract the first message of JVM exception.
+    first <- strsplit(msg[2], "\r?\n\tat")[[1]][1]
+    stop(paste0(rmsg, "illegal argument - ", first), call. = FALSE)
   } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) {
-    msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]][2]
-    first <- strsplit(msg, "\r?\n\tat")[[1]][1]
-    stop(first)
+    msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]]
+    # Extract "Error in ..." message.
+    rmsg <- msg[1]
+    # Extract the first message of JVM exception.
+    first <- strsplit(msg[2], "\r?\n\tat")[[1]][1]
+    stop(paste0(rmsg, "analysis error - ", first), call. = FALSE)
   } else {
-    stop(stacktrace)
+    stop(stacktrace, call. = FALSE)
   }
 }
 

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2549,14 +2549,15 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
   # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in write.df API and then it calls
   # DataFrameWriter.save() without path.
-  expect_error(write.df(df, source = "csv"), "'path' is not specified")
+  expect_error(write.df(df, source = "csv"),
+               "Error in save : illegal argument - 'path' is not specified")
 
   # Arguments checking in R side.
   expect_error(write.df(df, "data.tmp", source = c(1, 2)),
-               paste("source should be character, null or omitted. It is the datasource specified",
+               paste("source should be character, NULL or omitted. It is the datasource specified",
                      "in 'spark.sql.sources.default' configuration by default."))
   expect_error(write.df(df, path = c(3)),
-               "path should be charactor, null or omitted.")
+               "path should be charactor, NULL or omitted.")
   expect_error(write.df(df, mode = TRUE),
                "mode should be charactor or omitted. It is 'error' by default.")
 })
@@ -2566,14 +2567,15 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
   # It makes sure that we can omit path argument in read.df API and then it calls
   # DataFrameWriter.load() without path.
   expect_error(read.df(source = "json"),
-               "Unable to infer schema for JSON at . It must be specified manually")
-  expect_error(read.df("arbitrary_path"), "Path does not exist:")
+               paste("Error in loadDF : analysis error - Unable to infer schema for JSON at .",
+                     "It must be specified manually"))
+  expect_error(read.df("arbitrary_path"), "Error in loadDF : analysis error - Path does not exist")
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
-               "path should be charactor, null or omitted.")
+               "path should be charactor, NULL or omitted.")
   expect_error(read.df(jsonPath, source = c(1, 2)),
-               paste("source should be character, null or omitted. It is the datasource specified",
+               paste("source should be character, NULL or omitted. It is the datasource specified",
                      "in 'spark.sql.sources.default' configuration by default."))
 })
 

diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
@@ -167,10 +167,13 @@ test_that("convertToJSaveMode", {
 })
 
 test_that("captureJVMException", {
-  expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getSQLDataType",
+  method <- "getSQLDataType"
+  expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", method,
                                     "unknown"),
-                        error = captureJVMException),
-               "Invalid type unknown")
+                        error = function(e) {
+                          captureJVMException(e, method)
+                        }),
+               "Error in getSQLDataType : illegal argument - Invalid type unknown")
 })
 
 test_that("hashCode", {