From 69e295363ac623456695a553325b0e4e7a48f806 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 27 Mar 2020 16:54:52 -0700 Subject: [PATCH 1/3] [SPARK-31290][SQL][R] Add back the deprecated R APIs --- R/pkg/NAMESPACE | 10 +++ R/pkg/R/DataFrame.R | 38 +++++++++++ R/pkg/R/SQLContext.R | 50 ++++++++++++++ R/pkg/R/catalog.R | 52 ++++++++++++++ R/pkg/R/generics.R | 6 ++ R/pkg/R/sparkR.R | 98 +++++++++++++++++++++++++++ R/pkg/tests/fulltests/test_sparkSQL.R | 40 +++++++++-- docs/sparkr-migration-guide.md | 5 -- 8 files changed, 289 insertions(+), 10 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 56eceb8343bf..3b3d0e0c740e 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u # S3 methods exported export("sparkR.session") +export("sparkR.init") export("sparkR.session.stop") export("sparkR.stop") export("sparkR.conf") @@ -41,6 +42,9 @@ export("sparkR.callJStatic") export("install.spark") +export("sparkRSQL.init", + "sparkRHive.init") + # MLlib integration exportMethods("glm", "spark.glm", @@ -148,6 +152,7 @@ exportMethods("arrange", "printSchema", "randomSplit", "rbind", + "registerTempTable", "rename", "repartition", "repartitionByRange", @@ -155,6 +160,7 @@ exportMethods("arrange", "sample", "sample_frac", "sampleBy", + "saveAsParquetFile", "saveAsTable", "saveDF", "schema", @@ -431,14 +437,18 @@ export("as.DataFrame", "cacheTable", "clearCache", "createDataFrame", + "createExternalTable", "createTable", "currentDatabase", + "dropTempTable", "dropTempView", + "jsonFile", "listColumns", "listDatabases", "listFunctions", "listTables", "loadDF", + "parquetFile", "read.df", "read.jdbc", "read.json", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 593d3ca16220..867264a36338 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView", invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName)) }) +#' (Deprecated) Register Temporary Table +#' +#' Registers a SparkDataFrame as a Temporary Table in the SparkSession +#' @param x A SparkDataFrame +#' @param tableName A character vector containing the name of the table +#' +#' @seealso \link{createOrReplaceTempView} +#' @rdname registerTempTable-deprecated +#' @name registerTempTable +#' @aliases registerTempTable,SparkDataFrame,character-method +#' @examples +#'\dontrun{ +#' sparkR.session() +#' path <- "path/to/file.json" +#' df <- read.json(path) +#' registerTempTable(df, "json_df") +#' new_df <- sql("SELECT * FROM json_df") +#'} +#' @note registerTempTable since 1.4.0 +setMethod("registerTempTable", + signature(x = "SparkDataFrame", tableName = "character"), + function(x, tableName) { + .Deprecated("createOrReplaceTempView") + invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName)) + }) + #' insertInto #' #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession. @@ -936,6 +962,7 @@ setMethod("write.orc", #' path <- "path/to/file.json" #' df <- read.json(path) #' write.parquet(df, "/tmp/sparkr-tmp1/") +#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/") #'} #' @note write.parquet since 1.6.0 setMethod("write.parquet", @@ -946,6 +973,17 @@ setMethod("write.parquet", invisible(handledCallJMethod(write, "parquet", path)) }) +#' @rdname write.parquet +#' @name saveAsParquetFile +#' @aliases saveAsParquetFile,SparkDataFrame,character-method +#' @note saveAsParquetFile since 1.4.0 +setMethod("saveAsParquetFile", + signature(x = "SparkDataFrame", path = "character"), + function(x, path) { + .Deprecated("write.parquet") + write.parquet(x, path) + }) + #' Save the content of SparkDataFrame in a text file at the specified path. #' #' Save the content of the SparkDataFrame in a text file at the specified path. diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index c6842912706a..7cd6c53375bb 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -389,6 +389,7 @@ setMethod("toDF", signature(x = "RDD"), #' path <- "path/to/file.json" #' df <- read.json(path) #' df <- read.json(path, multiLine = TRUE) +#' df <- jsonFile(path) #' } #' @name read.json #' @note read.json since 1.6.0 @@ -403,6 +404,46 @@ read.json <- function(path, ...) { dataFrame(sdf) } +#' @rdname read.json +#' @name jsonFile +#' @note jsonFile since 1.4.0 +jsonFile <- function(path) { + .Deprecated("read.json") + read.json(path) +} + +#' JSON RDD +#' +#' Loads an RDD storing one JSON object per string as a SparkDataFrame. +#' +#' @param sqlContext SQLContext to use +#' @param rdd An RDD of JSON string +#' @param schema A StructType object to use as schema +#' @param samplingRatio The ratio of simpling used to infer the schema +#' @return A SparkDataFrame +#' @noRd +#' @examples +#'\dontrun{ +#' sparkR.session() +#' rdd <- texFile(sc, "path/to/json") +#' df <- jsonRDD(sqlContext, rdd) +#'} + +# TODO: remove - this method is no longer exported +# TODO: support schema +jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { + .Deprecated("read.json") + rdd <- serializeToString(rdd) + if (is.null(schema)) { + read <- callJMethod(sqlContext, "read") + # samplingRatio is deprecated + sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd")) + dataFrame(sdf) + } else { + stop("not implemented") + } +} + #' Create a SparkDataFrame from an ORC file. #' #' Loads an ORC file, returning the result as a SparkDataFrame. @@ -445,6 +486,15 @@ read.parquet <- function(path, ...) { dataFrame(sdf) } +#' @param ... argument(s) passed to the method. +#' @rdname read.parquet +#' @name parquetFile +#' @note parquetFile since 1.4.0 +parquetFile <- function(...) { + .Deprecated("read.parquet") + read.parquet(unlist(list(...))) +} + #' Create a SparkDataFrame from a text file. #' #' Loads text files and returns a SparkDataFrame whose schema starts with diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R index 7641f8a7a043..41ca108ee29f 100644 --- a/R/pkg/R/catalog.R +++ b/R/pkg/R/catalog.R @@ -17,6 +17,35 @@ # catalog.R: SparkSession catalog functions +#' (Deprecated) Create an external table +#' +#' Creates an external table based on the dataset in a data source, +#' Returns a SparkDataFrame associated with the external table. +#' +#' The data source is specified by the \code{source} and a set of options(...). +#' If \code{source} is not specified, the default data source configured by +#' "spark.sql.sources.default" will be used. +#' +#' @param tableName a name of the table. +#' @param path the path of files to load. +#' @param source the name of external data source. +#' @param schema the schema of the data required for some data sources. +#' @param ... additional argument(s) passed to the method. +#' @return A SparkDataFrame. +#' @rdname createExternalTable-deprecated +#' @seealso \link{createTable} +#' @examples +#'\dontrun{ +#' sparkR.session() +#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema) +#' } +#' @name createExternalTable +#' @note createExternalTable since 1.4.0 +createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) { + .Deprecated("createTable", old = "createExternalTable") + createTable(tableName, path, source, schema, ...) +} + #' Creates a table based on the dataset in a data source #' #' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with @@ -130,6 +159,29 @@ clearCache <- function() { invisible(callJMethod(catalog, "clearCache")) } +#' Drops the temporary table with the given table name in the catalog. +#' If the table has been cached/persisted before, it's also unpersisted. +#' +#' @param tableName The name of the SparkSQL table to be dropped. +#' @seealso \link{dropTempView} +#' @rdname dropTempTable-deprecated +#' @examples +#' \dontrun{ +#' sparkR.session() +#' df <- read.df(path, "parquet") +#' createOrReplaceTempView(df, "table") +#' dropTempTable("table") +#' } +#' @name dropTempTable +#' @note dropTempTable since 1.4.0 +dropTempTable <- function(tableName) { + .Deprecated("dropTempView", old = "dropTempTable") + if (class(tableName) != "character") { + stop("tableName must be a string.") + } + dropTempView(tableName) +} + #' Drops the temporary view with the given view name in the catalog. #' #' Drops the temporary view with the given view name in the catalog. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 6f6ef6f363ea..44c00d19bc5d 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) #' @rdname printSchema setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) +#' @rdname registerTempTable-deprecated +setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) + #' @rdname rename setGeneric("rename", function(x, ...) { standardGeneric("rename") }) @@ -592,6 +595,9 @@ setGeneric("write.parquet", function(x, path, ...) { standardGeneric("write.parquet") }) +#' @rdname write.parquet +setGeneric("saveAsParquetFile", function(x, path) { standardGeneric("saveAsParquetFile") }) + #' @rdname write.stream setGeneric("write.stream", function(df, source = NULL, outputMode = NULL, ...) { standardGeneric("write.stream") diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index cdb59093781f..cc8c92b8ab26 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -88,6 +88,49 @@ sparkR.stop <- function() { sparkR.session.stop() } +#' (Deprecated) Initialize a new Spark Context +#' +#' This function initializes a new SparkContext. +#' +#' @param master The Spark master URL +#' @param appName Application name to register with cluster manager +#' @param sparkHome Spark Home directory +#' @param sparkEnvir Named list of environment variables to set on worker nodes +#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors +#' @param sparkJars Character vector of jar files to pass to the worker nodes +#' @param sparkPackages Character vector of package coordinates +#' @seealso \link{sparkR.session} +#' @rdname sparkR.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark") +#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark", +#' list(spark.executor.memory="1g")) +#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark", +#' list(spark.executor.memory="4g"), +#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"), +#' c("one.jar", "two.jar", "three.jar"), +#' c("com.databricks:spark-avro_2.11:2.0.1")) +#'} +#' @note sparkR.init since 1.4.0 +sparkR.init <- function( + master = "", + appName = "SparkR", + sparkHome = Sys.getenv("SPARK_HOME"), + sparkEnvir = list(), + sparkExecutorEnv = list(), + sparkJars = "", + sparkPackages = "") { + .Deprecated("sparkR.session") + sparkR.sparkContext(master, + appName, + sparkHome, + convertNamedListToEnv(sparkEnvir), + convertNamedListToEnv(sparkExecutorEnv), + sparkJars, + sparkPackages) +} + # Internal function to handle creating the SparkContext. sparkR.sparkContext <- function( master = "", @@ -229,6 +272,61 @@ sparkR.sparkContext <- function( sc } +#' (Deprecated) Initialize a new SQLContext +#' +#' This function creates a SparkContext from an existing JavaSparkContext and +#' then uses it to initialize a new SQLContext +#' +#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. +#' This API is deprecated and kept for backward compatibility only. +#' +#' @param jsc The existing JavaSparkContext created with SparkR.init() +#' @seealso \link{sparkR.session} +#' @rdname sparkRSQL.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#'} +#' @note sparkRSQL.init since 1.4.0 +sparkRSQL.init <- function(jsc = NULL) { + .Deprecated("sparkR.session") + + if (exists(".sparkRsession", envir = .sparkREnv)) { + return(get(".sparkRsession", envir = .sparkREnv)) + } + + # Default to without Hive support for backward compatibility. + sparkR.session(enableHiveSupport = FALSE) +} + +#' (Deprecated) Initialize a new HiveContext +#' +#' This function creates a HiveContext from an existing JavaSparkContext +#' +#' Starting SparkR 2.0, a SparkSession is initialized and returned instead. +#' This API is deprecated and kept for backward compatibility only. +#' +#' @param jsc The existing JavaSparkContext created with SparkR.init() +#' @seealso \link{sparkR.session} +#' @rdname sparkRHive.init-deprecated +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRHive.init(sc) +#'} +#' @note sparkRHive.init since 1.4.0 +sparkRHive.init <- function(jsc = NULL) { + .Deprecated("sparkR.session") + + if (exists(".sparkRsession", envir = .sparkREnv)) { + return(get(".sparkRsession", envir = .sparkREnv)) + } + + # Default to without Hive support for backward compatibility. + sparkR.session(enableHiveSupport = TRUE) +} + #' Get the existing SparkSession or initialize a new SparkSession. #' #' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index a5527298b4d4..3afb50b024dc 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -106,6 +106,15 @@ if (is_windows()) { Sys.setenv(TZ = "GMT") } +test_that("calling sparkRSQL.init returns existing SQL context", { + sqlContext <- suppressWarnings(sparkRSQL.init(sc)) + expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext) +}) + +test_that("calling sparkRSQL.init returns existing SparkSession", { + expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession) +}) + test_that("calling sparkR.session returns existing SparkSession", { expect_equal(sparkR.session(), sparkSession) }) @@ -619,10 +628,14 @@ test_that("read/write json files", { jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json") write.json(df, jsonPath3) - # Test read.json() works with multiple input paths + # Test read.json()/jsonFile() works with multiple input paths jsonDF1 <- read.json(c(jsonPath2, jsonPath3)) expect_is(jsonDF1, "SparkDataFrame") expect_equal(count(jsonDF1), 6) + # Suppress warnings because jsonFile is deprecated + jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3))) + expect_is(jsonDF2, "SparkDataFrame") + expect_equal(count(jsonDF2), 6) unlink(jsonPath2) unlink(jsonPath3) @@ -642,6 +655,20 @@ test_that("read/write json files - compression option", { unlink(jsonPath) }) +test_that("jsonRDD() on a RDD with json string", { + sqlContext <- suppressWarnings(sparkRSQL.init(sc)) + rdd <- parallelize(sc, mockLines) + expect_equal(countRDD(rdd), 3) + df <- suppressWarnings(jsonRDD(sqlContext, rdd)) + expect_is(df, "SparkDataFrame") + expect_equal(count(df), 3) + + rdd2 <- flatMap(rdd, function(x) c(x, x)) + df <- suppressWarnings(jsonRDD(sqlContext, rdd2)) + expect_is(df, "SparkDataFrame") + expect_equal(count(df), 6) +}) + test_that("test tableNames and tables", { count <- count(listTables()) @@ -656,10 +683,10 @@ test_that("test tableNames and tables", { expect_true("tableName" %in% colnames(tables())) expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables()))) - createOrReplaceTempView(df, "table2") + suppressWarnings(registerTempTable(df, "table2")) tables <- listTables() expect_equal(count(tables), count + 2) - dropTempView("table1") + suppressWarnings(dropTempTable("table1")) expect_true(dropTempView("table2")) tables <- listTables() @@ -2845,14 +2872,17 @@ test_that("read/write Parquet files", { expect_is(df2, "SparkDataFrame") expect_equal(count(df2), 3) - # Test write.parquet and read.parquet + # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") write.parquet(df, parquetPath2) parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet") - write.parquet(df, parquetPath3) + suppressWarnings(saveAsParquetFile(df, parquetPath3)) parquetDF <- read.parquet(c(parquetPath2, parquetPath3)) expect_is(parquetDF, "SparkDataFrame") expect_equal(count(parquetDF), count(df) * 2) + parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3)) + expect_is(parquetDF2, "SparkDataFrame") + expect_equal(count(parquetDF2), count(df) * 2) # Test if varargs works with variables saveMode <- "overwrite" diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md index 6fbc4c03aefc..fe5323604bc0 100644 --- a/docs/sparkr-migration-guide.md +++ b/docs/sparkr-migration-guide.md @@ -26,11 +26,6 @@ Note that this migration guide describes the items specific to SparkR. Many items of SQL migration can be applied when migrating SparkR to higher versions. Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html). -## Upgrading from SparkR 2.4 to 3.0 - - - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead. - - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead. - ## Upgrading from SparkR 2.3 to 2.4 - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does. From 754f93863bfec47d6c53a796ad70bdfb3a187441 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sun, 29 Mar 2020 18:04:12 -0700 Subject: [PATCH 2/3] remove jsonFile, parquetFile, saveAsParquetFile and jsonRDD --- R/pkg/NAMESPACE | 3 -- R/pkg/R/DataFrame.R | 12 ------- R/pkg/R/SQLContext.R | 50 --------------------------- R/pkg/R/generics.R | 3 -- R/pkg/tests/fulltests/test_sparkSQL.R | 27 ++------------- docs/sparkr-migration-guide.md | 4 +++ 6 files changed, 7 insertions(+), 92 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 3b3d0e0c740e..fb879e4885d8 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -160,7 +160,6 @@ exportMethods("arrange", "sample", "sample_frac", "sampleBy", - "saveAsParquetFile", "saveAsTable", "saveDF", "schema", @@ -442,13 +441,11 @@ export("as.DataFrame", "currentDatabase", "dropTempTable", "dropTempView", - "jsonFile", "listColumns", "listDatabases", "listFunctions", "listTables", "loadDF", - "parquetFile", "read.df", "read.jdbc", "read.json", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 867264a36338..14d2076e88ef 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -962,7 +962,6 @@ setMethod("write.orc", #' path <- "path/to/file.json" #' df <- read.json(path) #' write.parquet(df, "/tmp/sparkr-tmp1/") -#' saveAsParquetFile(df, "/tmp/sparkr-tmp2/") #'} #' @note write.parquet since 1.6.0 setMethod("write.parquet", @@ -973,17 +972,6 @@ setMethod("write.parquet", invisible(handledCallJMethod(write, "parquet", path)) }) -#' @rdname write.parquet -#' @name saveAsParquetFile -#' @aliases saveAsParquetFile,SparkDataFrame,character-method -#' @note saveAsParquetFile since 1.4.0 -setMethod("saveAsParquetFile", - signature(x = "SparkDataFrame", path = "character"), - function(x, path) { - .Deprecated("write.parquet") - write.parquet(x, path) - }) - #' Save the content of SparkDataFrame in a text file at the specified path. #' #' Save the content of the SparkDataFrame in a text file at the specified path. diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 7cd6c53375bb..c6842912706a 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -389,7 +389,6 @@ setMethod("toDF", signature(x = "RDD"), #' path <- "path/to/file.json" #' df <- read.json(path) #' df <- read.json(path, multiLine = TRUE) -#' df <- jsonFile(path) #' } #' @name read.json #' @note read.json since 1.6.0 @@ -404,46 +403,6 @@ read.json <- function(path, ...) { dataFrame(sdf) } -#' @rdname read.json -#' @name jsonFile -#' @note jsonFile since 1.4.0 -jsonFile <- function(path) { - .Deprecated("read.json") - read.json(path) -} - -#' JSON RDD -#' -#' Loads an RDD storing one JSON object per string as a SparkDataFrame. -#' -#' @param sqlContext SQLContext to use -#' @param rdd An RDD of JSON string -#' @param schema A StructType object to use as schema -#' @param samplingRatio The ratio of simpling used to infer the schema -#' @return A SparkDataFrame -#' @noRd -#' @examples -#'\dontrun{ -#' sparkR.session() -#' rdd <- texFile(sc, "path/to/json") -#' df <- jsonRDD(sqlContext, rdd) -#'} - -# TODO: remove - this method is no longer exported -# TODO: support schema -jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { - .Deprecated("read.json") - rdd <- serializeToString(rdd) - if (is.null(schema)) { - read <- callJMethod(sqlContext, "read") - # samplingRatio is deprecated - sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd")) - dataFrame(sdf) - } else { - stop("not implemented") - } -} - #' Create a SparkDataFrame from an ORC file. #' #' Loads an ORC file, returning the result as a SparkDataFrame. @@ -486,15 +445,6 @@ read.parquet <- function(path, ...) { dataFrame(sdf) } -#' @param ... argument(s) passed to the method. -#' @rdname read.parquet -#' @name parquetFile -#' @note parquetFile since 1.4.0 -parquetFile <- function(...) { - .Deprecated("read.parquet") - read.parquet(unlist(list(...))) -} - #' Create a SparkDataFrame from a text file. #' #' Loads text files and returns a SparkDataFrame whose schema starts with diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 44c00d19bc5d..d924b2af4ce1 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -595,9 +595,6 @@ setGeneric("write.parquet", function(x, path, ...) { standardGeneric("write.parquet") }) -#' @rdname write.parquet -setGeneric("saveAsParquetFile", function(x, path) { standardGeneric("saveAsParquetFile") }) - #' @rdname write.stream setGeneric("write.stream", function(df, source = NULL, outputMode = NULL, ...) { standardGeneric("write.stream") diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 3afb50b024dc..c892feb61da8 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -628,14 +628,10 @@ test_that("read/write json files", { jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json") write.json(df, jsonPath3) - # Test read.json()/jsonFile() works with multiple input paths + # Test read.json() works with multiple input paths jsonDF1 <- read.json(c(jsonPath2, jsonPath3)) expect_is(jsonDF1, "SparkDataFrame") expect_equal(count(jsonDF1), 6) - # Suppress warnings because jsonFile is deprecated - jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3))) - expect_is(jsonDF2, "SparkDataFrame") - expect_equal(count(jsonDF2), 6) unlink(jsonPath2) unlink(jsonPath3) @@ -655,20 +651,6 @@ test_that("read/write json files - compression option", { unlink(jsonPath) }) -test_that("jsonRDD() on a RDD with json string", { - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) - rdd <- parallelize(sc, mockLines) - expect_equal(countRDD(rdd), 3) - df <- suppressWarnings(jsonRDD(sqlContext, rdd)) - expect_is(df, "SparkDataFrame") - expect_equal(count(df), 3) - - rdd2 <- flatMap(rdd, function(x) c(x, x)) - df <- suppressWarnings(jsonRDD(sqlContext, rdd2)) - expect_is(df, "SparkDataFrame") - expect_equal(count(df), 6) -}) - test_that("test tableNames and tables", { count <- count(listTables()) @@ -2872,17 +2854,14 @@ test_that("read/write Parquet files", { expect_is(df2, "SparkDataFrame") expect_equal(count(df2), 3) - # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile + # Test write.parquet and read.parquet parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") write.parquet(df, parquetPath2) parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet") - suppressWarnings(saveAsParquetFile(df, parquetPath3)) + write.parquet(df, parquetPath3) parquetDF <- read.parquet(c(parquetPath2, parquetPath3)) expect_is(parquetDF, "SparkDataFrame") expect_equal(count(parquetDF), count(df) * 2) - parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3)) - expect_is(parquetDF2, "SparkDataFrame") - expect_equal(count(parquetDF2), count(df) * 2) # Test if varargs works with variables saveMode <- "overwrite" diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md index fe5323604bc0..32836cdac53d 100644 --- a/docs/sparkr-migration-guide.md +++ b/docs/sparkr-migration-guide.md @@ -26,6 +26,10 @@ Note that this migration guide describes the items specific to SparkR. Many items of SQL migration can be applied when migrating SparkR to higher versions. Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html). +## Upgrading from SparkR 2.4 to 3.0 + + - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead. + ## Upgrading from SparkR 2.3 to 2.4 - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does. From ce5969b95aa7de4a09fb39ca58dc8f2a7dc2f0f0 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sun, 29 Mar 2020 20:24:32 -0700 Subject: [PATCH 3/3] mark Drop Temporary Table depreacated --- R/pkg/R/catalog.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R index 41ca108ee29f..275737f804bd 100644 --- a/R/pkg/R/catalog.R +++ b/R/pkg/R/catalog.R @@ -159,6 +159,8 @@ clearCache <- function() { invisible(callJMethod(catalog, "clearCache")) } +#' (Deprecated) Drop Temporary Table +#' #' Drops the temporary table with the given table name in the catalog. #' If the table has been cached/persisted before, it's also unpersisted. #'