From 41611e594c4e5f8e6aedb2846abfda74dac84a80 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 24 Sep 2016 19:36:16 +0900 Subject: [PATCH 01/12] Make path optional in write.df --- R/pkg/R/DataFrame.R | 8 ++++---- R/pkg/R/generics.R | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 40f1f0f4429e..d2a88b502af7 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2622,21 +2622,21 @@ setMethod("except", #' } #' @note write.df since 1.4.0 setMethod("write.df", - signature(df = "SparkDataFrame", path = "character"), - function(df, path, source = NULL, mode = "error", ...) { + signature(df = "SparkDataFrame"), + function(df, path = NULL, source = NULL, mode = "error", ...) { if (is.null(source)) { source <- getDefaultSqlSource() } jmode <- convertToJSaveMode(mode) options <- varargsToEnv(...) if (!is.null(path)) { - options[["path"]] <- path + options[["path"]] <- path } write <- callJMethod(df@sdf, "write") write <- callJMethod(write, "format", source) write <- callJMethod(write, "mode", jmode) write <- callJMethod(write, "options", options) - write <- callJMethod(write, "save", path) + write <- callJMethod(write, "save") }) #' @rdname write.df diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 67a999da9bc2..90a02e277831 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -633,7 +633,7 @@ setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") }) #' @rdname write.df #' @export -setGeneric("write.df", function(df, path, source = NULL, mode = "error", ...) { +setGeneric("write.df", function(df, path = NULL, source = NULL, mode = "error", ...) { standardGeneric("write.df") }) @@ -732,7 +732,7 @@ setGeneric("withColumnRenamed", #' @rdname write.df #' @export -setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) +setGeneric("write.df", function(df, path = NULL, ...) { standardGeneric("write.df") }) #' @rdname randomSplit #' @export From 2d76e7c54086dca703f4ffc180855c837bbc74e6 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 24 Sep 2016 21:28:53 +0900 Subject: [PATCH 02/12] Fix documentation for CRAN check --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index d2a88b502af7..f2baa2a82af4 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2608,7 +2608,7 @@ setMethod("except", #' @param ... additional argument(s) passed to the method. #' #' @family SparkDataFrame functions -#' @aliases write.df,SparkDataFrame,character-method +#' @aliases write.df,SparkDataFrame-method #' @rdname write.df #' @name write.df #' @export From c2a64dba42d68561c23583ca8bbb79aa4edfb707 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 25 Sep 2016 20:03:25 +0900 Subject: [PATCH 03/12] Add a test for calling DataFrameWriter.save() API without a path --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 9d874a098871..f20199e24c42 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2544,6 +2544,15 @@ test_that("Spark version from SparkSession", { expect_equal(ver, version) }) +test_that("Call DataFrameWriter.save() API in Java without path", { + df <- read.df(jsonPath, "json") + # This tests if the exception is thrown from Spark side not from SparkR side. + # It makes sure that we can omit path argument in write.df API and then it calls + # DataFrameWriter.save() without path. + expect_error(write.df(df, source = "csv"), + "java.lang.IllegalArgumentException: 'path' is not specified") +}) + unlink(parquetPath) unlink(orcPath) unlink(jsonPath) From 5c3d2228aeddf3a43888c170e00729ecbfe6e4bd Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 26 Sep 2016 12:42:28 +0900 Subject: [PATCH 04/12] Support omitting path for read.df and make the error messages better --- R/pkg/R/DataFrame.R | 11 +++++++- R/pkg/R/SQLContext.R | 22 +++++++++++----- R/pkg/R/utils.R | 15 +++++++++++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 31 ++++++++++++++++++++--- 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index f2baa2a82af4..1f3d66ed2821 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2624,6 +2624,15 @@ setMethod("except", setMethod("write.df", signature(df = "SparkDataFrame"), function(df, path = NULL, source = NULL, mode = "error", ...) { + if (!is.character(path) && !is.null(path)) { + stop("path should be charactor, null or omitted.") + } + if (!is.character(source) && !is.null(source)) { + stop("source should be charactor, null or omitted. It is 'parquet' by default.") + } + if (!is.character(mode)) { + stop("mode should be charactor or omitted. It is 'error' by default.") + } if (is.null(source)) { source <- getDefaultSqlSource() } @@ -2636,7 +2645,7 @@ setMethod("write.df", write <- callJMethod(write, "format", source) write <- callJMethod(write, "mode", jmode) write <- callJMethod(write, "options", options) - write <- callJMethod(write, "save") + write <- tryCatch(callJMethod(write, "save"), error = captureJVMException) }) #' @rdname write.df diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index ce531c3f8886..95edfe7aa304 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -771,6 +771,12 @@ dropTempView <- function(viewName) { #' @method read.df default #' @note read.df since 1.4.0 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) { + if (!is.character(path) && !is.null(path)) { + stop("path should be charactor, null or omitted.") + } + if (!is.character(source) && !is.null(source)) { + stop("source should be charactor, null or omitted. It is 'parquet' by default.") + } sparkSession <- getSparkSession() options <- varargsToEnv(...) if (!is.null(path)) { @@ -784,16 +790,20 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string } if (!is.null(schema)) { stopifnot(class(schema) == "structType") - sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source, - schema$jobj, options) + sdf <- tryCatch({ + callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source, + schema$jobj, options) + }, error = captureJVMException) } else { - sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", - "loadDF", sparkSession, source, options) + sdf <- tryCatch({ + callJStatic("org.apache.spark.sql.api.r.SQLUtils", + "loadDF", sparkSession, source, options) + }, error = captureJVMException) } dataFrame(sdf) } -read.df <- function(x, ...) { +read.df <- function(x = NULL, ...) { dispatchFunc("read.df(path = NULL, source = NULL, schema = NULL, ...)", x, ...) } @@ -805,7 +815,7 @@ loadDF.default <- function(path = NULL, source = NULL, schema = NULL, ...) { read.df(path, source, schema, ...) } -loadDF <- function(x, ...) { +loadDF <- function(x = NULL, ...) { dispatchFunc("loadDF(path = NULL, source = NULL, schema = NULL, ...)", x, ...) } diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 248c57532b6c..0729032f9deb 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -698,6 +698,21 @@ isSparkRShell <- function() { grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE) } +captureJVMException <- function(e) { + stacktrace <- as.character(e) + if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) { + msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]][2] + first <- strsplit(msg, "\r?\n\tat")[[1]][1] + stop(first) + } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) { + msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]][2] + first <- strsplit(msg, "\r?\n\tat")[[1]][1] + stop(first) + } else { + stop(stacktrace) + } +} + # rbind a list of rows with raw (binary) columns # # @param inputData a list of rows, with each row a list diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index f20199e24c42..2fcd21bd8e57 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2544,13 +2544,36 @@ test_that("Spark version from SparkSession", { expect_equal(ver, version) }) -test_that("Call DataFrameWriter.save() API in Java without path", { +test_that("Call DataFrameWriter.save() API in Java without path and check argument types", { df <- read.df(jsonPath, "json") - # This tests if the exception is thrown from Spark side not from SparkR side. + # This tests if the exception is thrown from JVM not from SparkR side. # It makes sure that we can omit path argument in write.df API and then it calls # DataFrameWriter.save() without path. - expect_error(write.df(df, source = "csv"), - "java.lang.IllegalArgumentException: 'path' is not specified") + expect_error(write.df(df, source = "csv"), "'path' is not specified") + + # Arguments checking in R side. + expect_error(write.df(df, "data.tmp", source = c(1, 2)), + "source should be charactor, null or omitted. It is 'parquet' by default.") + expect_error(write.df(df, path = c(3)), + "path should be charactor, null or omitted.") + expect_error(write.df(df, mode = TRUE), + "mode should be charactor or omitted. It is 'error' by default.") +}) + +test_that("Call DataFrameWriter.load() API in Java without path and check argument types", { + df <- read.df(jsonPath, "json") + # This tests if the exception is thrown from JVM not from SparkR side. + # It makes sure that we can omit path argument in read.df API and then it calls + # DataFrameWriter.load() without path. + expect_error(read.df(source = "json"), + "Unable to infer schema for JSON at . It must be specified manually") + expect_error(read.df("arbitrary_path"), "Path does not exist:") + + # Arguments checking in R side. + expect_error(read.df(path = c(3)), + "path should be charactor, null or omitted.") + expect_error(read.df(jsonPath, source = c(1, 2)), + "source should be charactor, null or omitted. It is 'parquet' by default.") }) unlink(parquetPath) From 1440195f49b346bde24843bf5c97360dc0488daf Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 26 Sep 2016 13:09:59 +0900 Subject: [PATCH 05/12] Fix style --- R/pkg/R/SQLContext.R | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 95edfe7aa304..582bad45e77b 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -790,15 +790,13 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string } if (!is.null(schema)) { stopifnot(class(schema) == "structType") - sdf <- tryCatch({ - callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source, - schema$jobj, options) - }, error = captureJVMException) + sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, + source, schema$jobj, options), + error = captureJVMException) } else { - sdf <- tryCatch({ - callJStatic("org.apache.spark.sql.api.r.SQLUtils", - "loadDF", sparkSession, source, options) - }, error = captureJVMException) + sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, + source, options), + error = captureJVMException) } dataFrame(sdf) } From 07dca5d3d76b9e58dd1b2cdef8036c937a01f51b Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 26 Sep 2016 13:31:13 +0900 Subject: [PATCH 06/12] Remove unneeded dataframe --- R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 2fcd21bd8e57..0b07381cd1b3 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2561,7 +2561,6 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume }) test_that("Call DataFrameWriter.load() API in Java without path and check argument types", { - df <- read.df(jsonPath, "json") # This tests if the exception is thrown from JVM not from SparkR side. # It makes sure that we can omit path argument in read.df API and then it calls # DataFrameWriter.load() without path. From 11ae83276b01977283051d32233932909ce113dd Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 26 Sep 2016 18:36:29 +0900 Subject: [PATCH 07/12] Address some comments --- R/pkg/R/DataFrame.R | 3 ++- R/pkg/R/SQLContext.R | 3 ++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 1f3d66ed2821..da0052a2408e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2628,7 +2628,8 @@ setMethod("write.df", stop("path should be charactor, null or omitted.") } if (!is.character(source) && !is.null(source)) { - stop("source should be charactor, null or omitted. It is 'parquet' by default.") + stop("source should be character, null or omitted. It is the datasource specified ", + "in 'spark.sql.sources.default' configuration by default.") } if (!is.character(mode)) { stop("mode should be charactor or omitted. It is 'error' by default.") diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 582bad45e77b..4c1b04eeb467 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -775,7 +775,8 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string stop("path should be charactor, null or omitted.") } if (!is.character(source) && !is.null(source)) { - stop("source should be charactor, null or omitted. It is 'parquet' by default.") + stop("source should be character, null or omitted. It is the datasource specified ", + "in 'spark.sql.sources.default' configuration by default.") } sparkSession <- getSparkSession() options <- varargsToEnv(...) diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 0b07381cd1b3..f443ec62e235 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2553,7 +2553,8 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume # Arguments checking in R side. expect_error(write.df(df, "data.tmp", source = c(1, 2)), - "source should be charactor, null or omitted. It is 'parquet' by default.") + paste("source should be character, null or omitted. It is the datasource specified", + "in 'spark.sql.sources.default' configuration by default.")) expect_error(write.df(df, path = c(3)), "path should be charactor, null or omitted.") expect_error(write.df(df, mode = TRUE), @@ -2572,7 +2573,8 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume expect_error(read.df(path = c(3)), "path should be charactor, null or omitted.") expect_error(read.df(jsonPath, source = c(1, 2)), - "source should be charactor, null or omitted. It is 'parquet' by default.") + paste("source should be character, null or omitted. It is the datasource specified", + "in 'spark.sql.sources.default' configuration by default.")) }) unlink(parquetPath) From 37480be0f76b7e4404c639e62137fd301989aaf3 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 26 Sep 2016 19:43:08 +0900 Subject: [PATCH 08/12] Address comments more and add a unittest for captureJVMException --- R/pkg/R/DataFrame.R | 4 ++-- R/pkg/R/SQLContext.R | 4 ++-- R/pkg/inst/tests/testthat/test_utils.R | 7 +++++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index da0052a2408e..820d297266d5 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2624,10 +2624,10 @@ setMethod("except", setMethod("write.df", signature(df = "SparkDataFrame"), function(df, path = NULL, source = NULL, mode = "error", ...) { - if (!is.character(path) && !is.null(path)) { + if (!is.null(path) && !is.character(path)) { stop("path should be charactor, null or omitted.") } - if (!is.character(source) && !is.null(source)) { + if (!is.null(source) && !is.character(source)) { stop("source should be character, null or omitted. It is the datasource specified ", "in 'spark.sql.sources.default' configuration by default.") } diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 4c1b04eeb467..33a42dd0c75f 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -771,10 +771,10 @@ dropTempView <- function(viewName) { #' @method read.df default #' @note read.df since 1.4.0 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) { - if (!is.character(path) && !is.null(path)) { + if (!is.null(path) && !is.character(path)) { stop("path should be charactor, null or omitted.") } - if (!is.character(source) && !is.null(source)) { + if (!is.null(source) && !is.character(source)) { stop("source should be character, null or omitted. It is the datasource specified ", "in 'spark.sql.sources.default' configuration by default.") } diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 77f25292f3f2..8dfc857df149 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -166,6 +166,13 @@ test_that("convertToJSaveMode", { 'mode should be one of "append", "overwrite", "error", "ignore"') #nolint }) +test_that("captureJVMException", { + expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getSQLDataType", + "unknown"), + error = captureJVMException), + "Invalid type unknown") +}) + test_that("hashCode", { expect_error(hashCode("bc53d3605e8a5b7de1e8e271c2317645"), NA) }) From 05cb5afa3a45017dae84409f60bb52c7f25bf5e0 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 3 Oct 2016 18:36:22 +0900 Subject: [PATCH 09/12] Improve error message --- R/pkg/R/utils.R | 16 +++++++++------- R/pkg/inst/tests/testthat/test_utils.R | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0729032f9deb..11935d2a1a09 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -701,15 +701,17 @@ isSparkRShell <- function() { captureJVMException <- function(e) { stacktrace <- as.character(e) if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) { - msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]][2] - first <- strsplit(msg, "\r?\n\tat")[[1]][1] - stop(first) + msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]] + rmsg <- msg[1] + first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] + stop(paste0(rmsg, "illegal argument - ", first), call. = FALSE) } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) { - msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]][2] - first <- strsplit(msg, "\r?\n\tat")[[1]][1] - stop(first) + msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]] + rmsg <- msg[1] + first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] + stop(paste0(rmsg, "analysis error - ", first), call. = FALSE) } else { - stop(stacktrace) + stop(stacktrace, call. = FALSE) } } diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 8dfc857df149..1e0149369063 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -170,7 +170,7 @@ test_that("captureJVMException", { expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getSQLDataType", "unknown"), error = captureJVMException), - "Invalid type unknown") + "illegal argument - Invalid type unknown") }) test_that("hashCode", { From bcd5060ba8aa7271f823cf60d4cc8cfb36850cf3 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 4 Oct 2016 19:53:26 +0900 Subject: [PATCH 10/12] Address comments about exception messages --- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/SQLContext.R | 11 ++++---- R/pkg/R/utils.R | 39 ++++++++++++++++++++++++-- R/pkg/inst/tests/testthat/test_utils.R | 9 ++++-- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 820d297266d5..11725117f04e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2646,7 +2646,7 @@ setMethod("write.df", write <- callJMethod(write, "format", source) write <- callJMethod(write, "mode", jmode) write <- callJMethod(write, "options", options) - write <- tryCatch(callJMethod(write, "save"), error = captureJVMException) + write <- handledCallJMethod(write, "save") }) #' @rdname write.df diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 33a42dd0c75f..289d7f54b2fb 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -791,13 +791,12 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string } if (!is.null(schema)) { stopifnot(class(schema) == "structType") - sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, - source, schema$jobj, options), - error = captureJVMException) + method <- "loadDF" + sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", method, sparkSession, + source, schema$jobj, options) } else { - sdf <- tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, - source, options), - error = captureJVMException) + sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, + source, options) } dataFrame(sdf) } diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 11935d2a1a09..e69666453480 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -698,16 +698,51 @@ isSparkRShell <- function() { grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE) } -captureJVMException <- function(e) { - stacktrace <- as.character(e) +# Works identically with `callJStatic(...)` but throws a pretty formatted exception. +handledCallJStatic <- function(cls, method, ...) { + result <- tryCatch(callJStatic(cls, method, ...), + error = function(e) { + captureJVMException(e, method) + }) + result +} + +# Works identically with `callJMethod(...)` but throws a pretty formatted exception. +handledCallJMethod <- function(obj, method, ...) { + result <- tryCatch(callJMethod(obj, method, ...), + error = function(e) { + captureJVMException(e, method) + }) + result +} + +captureJVMException <- function(e, method) { + rawmsg <- as.character(e) + if (any(grep("^Error in .*?: ", rawmsg))) { + # If the exception message starts with "Error in ...", this is possibly + # "Error in invokeJava(...)". Here, it replaces the characters to + # `paste("Error in", method, ":")` in order to identify which function + # was called in JVM side. + stacktrace <- strsplit(rawmsg, "Error in .*?: ")[[1]] + rmsg <- paste("Error in", method, ":") + stacktrace <- paste(rmsg[1], stacktrace[2]) + } else { + # Otherwise, do not convert the error message just in case. + stacktrace <- rawmsg + } + if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) { msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]] + # Extract "Error in ..." message. rmsg <- msg[1] + # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] stop(paste0(rmsg, "illegal argument - ", first), call. = FALSE) } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) { msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]] + # Extract "Error in ..." message. rmsg <- msg[1] + # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] stop(paste0(rmsg, "analysis error - ", first), call. = FALSE) } else { diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R index 1e0149369063..69ed5549168b 100644 --- a/R/pkg/inst/tests/testthat/test_utils.R +++ b/R/pkg/inst/tests/testthat/test_utils.R @@ -167,10 +167,13 @@ test_that("convertToJSaveMode", { }) test_that("captureJVMException", { - expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getSQLDataType", + method <- "getSQLDataType" + expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", method, "unknown"), - error = captureJVMException), - "illegal argument - Invalid type unknown") + error = function(e) { + captureJVMException(e, method) + }), + "Error in getSQLDataType : illegal argument - Invalid type unknown") }) test_that("hashCode", { From 08bdc4d95c26c152279982075130fe5edaf3e456 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 4 Oct 2016 20:01:51 +0900 Subject: [PATCH 11/12] Remove unused variable --- R/pkg/R/SQLContext.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 289d7f54b2fb..19c43781c47a 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -791,8 +791,7 @@ read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.string } if (!is.null(schema)) { stopifnot(class(schema) == "structType") - method <- "loadDF" - sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", method, sparkSession, + sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, source, schema$jobj, options) } else { sdf <- handledCallJStatic("org.apache.spark.sql.api.r.SQLUtils", "loadDF", sparkSession, From c8a433bf7b52b04451412adc5b7b67003a93de12 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 4 Oct 2016 20:25:52 +0900 Subject: [PATCH 12/12] Add missed changes more --- R/pkg/R/DataFrame.R | 4 ++-- R/pkg/R/SQLContext.R | 4 ++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 16 +++++++++------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 11725117f04e..75861d5de709 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2625,10 +2625,10 @@ setMethod("write.df", signature(df = "SparkDataFrame"), function(df, path = NULL, source = NULL, mode = "error", ...) { if (!is.null(path) && !is.character(path)) { - stop("path should be charactor, null or omitted.") + stop("path should be charactor, NULL or omitted.") } if (!is.null(source) && !is.character(source)) { - stop("source should be character, null or omitted. It is the datasource specified ", + stop("source should be character, NULL or omitted. It is the datasource specified ", "in 'spark.sql.sources.default' configuration by default.") } if (!is.character(mode)) { diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 19c43781c47a..baa87824beb9 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -772,10 +772,10 @@ dropTempView <- function(viewName) { #' @note read.df since 1.4.0 read.df.default <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) { if (!is.null(path) && !is.character(path)) { - stop("path should be charactor, null or omitted.") + stop("path should be charactor, NULL or omitted.") } if (!is.null(source) && !is.character(source)) { - stop("source should be character, null or omitted. It is the datasource specified ", + stop("source should be character, NULL or omitted. It is the datasource specified ", "in 'spark.sql.sources.default' configuration by default.") } sparkSession <- getSparkSession() diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index f443ec62e235..f5ab601f274f 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2549,14 +2549,15 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume # This tests if the exception is thrown from JVM not from SparkR side. # It makes sure that we can omit path argument in write.df API and then it calls # DataFrameWriter.save() without path. - expect_error(write.df(df, source = "csv"), "'path' is not specified") + expect_error(write.df(df, source = "csv"), + "Error in save : illegal argument - 'path' is not specified") # Arguments checking in R side. expect_error(write.df(df, "data.tmp", source = c(1, 2)), - paste("source should be character, null or omitted. It is the datasource specified", + paste("source should be character, NULL or omitted. It is the datasource specified", "in 'spark.sql.sources.default' configuration by default.")) expect_error(write.df(df, path = c(3)), - "path should be charactor, null or omitted.") + "path should be charactor, NULL or omitted.") expect_error(write.df(df, mode = TRUE), "mode should be charactor or omitted. It is 'error' by default.") }) @@ -2566,14 +2567,15 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume # It makes sure that we can omit path argument in read.df API and then it calls # DataFrameWriter.load() without path. expect_error(read.df(source = "json"), - "Unable to infer schema for JSON at . It must be specified manually") - expect_error(read.df("arbitrary_path"), "Path does not exist:") + paste("Error in loadDF : analysis error - Unable to infer schema for JSON at .", + "It must be specified manually")) + expect_error(read.df("arbitrary_path"), "Error in loadDF : analysis error - Path does not exist") # Arguments checking in R side. expect_error(read.df(path = c(3)), - "path should be charactor, null or omitted.") + "path should be charactor, NULL or omitted.") expect_error(read.df(jsonPath, source = c(1, 2)), - paste("source should be character, null or omitted. It is the datasource specified", + paste("source should be character, NULL or omitted. It is the datasource specified", "in 'spark.sql.sources.default' configuration by default.")) })