Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u

# S3 methods exported
export("sparkR.session")
export("sparkR.init")
export("sparkR.session.stop")
export("sparkR.stop")
export("sparkR.conf")
Expand All @@ -41,6 +42,9 @@ export("sparkR.callJStatic")

export("install.spark")

export("sparkRSQL.init",
"sparkRHive.init")

# MLlib integration
exportMethods("glm",
"spark.glm",
Expand Down Expand Up @@ -148,6 +152,7 @@ exportMethods("arrange",
"printSchema",
"randomSplit",
"rbind",
"registerTempTable",
"rename",
"repartition",
"repartitionByRange",
Expand Down Expand Up @@ -431,8 +436,10 @@ export("as.DataFrame",
"cacheTable",
"clearCache",
"createDataFrame",
"createExternalTable",
"createTable",
"currentDatabase",
"dropTempTable",
"dropTempView",
"listColumns",
"listDatabases",
Expand Down
26 changes: 26 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
})

#' (Deprecated) Register Temporary Table
#'
#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
#' @param x A SparkDataFrame
#' @param tableName A character vector containing the name of the table
#'
#' @seealso \link{createOrReplaceTempView}
#' @rdname registerTempTable-deprecated
#' @name registerTempTable
#' @aliases registerTempTable,SparkDataFrame,character-method
#' @examples
#'\dontrun{
#' sparkR.session()
#' path <- "path/to/file.json"
#' df <- read.json(path)
#' registerTempTable(df, "json_df")
#' new_df <- sql("SELECT * FROM json_df")
#'}
#' @note registerTempTable since 1.4.0
setMethod("registerTempTable",
signature(x = "SparkDataFrame", tableName = "character"),
function(x, tableName) {
.Deprecated("createOrReplaceTempView")
invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
})

#' insertInto
#'
#' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
Expand Down
54 changes: 54 additions & 0 deletions R/pkg/R/catalog.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,35 @@

# catalog.R: SparkSession catalog functions

#' (Deprecated) Create an external table
#'
#' Creates an external table based on the dataset in a data source,
#' Returns a SparkDataFrame associated with the external table.
#'
#' The data source is specified by the \code{source} and a set of options(...).
#' If \code{source} is not specified, the default data source configured by
#' "spark.sql.sources.default" will be used.
#'
#' @param tableName a name of the table.
#' @param path the path of files to load.
#' @param source the name of external data source.
#' @param schema the schema of the data required for some data sources.
#' @param ... additional argument(s) passed to the method.
#' @return A SparkDataFrame.
#' @rdname createExternalTable-deprecated
#' @seealso \link{createTable}
#' @examples
#'\dontrun{
#' sparkR.session()
#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
#' }
#' @name createExternalTable
#' @note createExternalTable since 1.4.0
createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
.Deprecated("createTable", old = "createExternalTable")
createTable(tableName, path, source, schema, ...)
}

#' Creates a table based on the dataset in a data source
#'
#' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
Expand Down Expand Up @@ -130,6 +159,31 @@ clearCache <- function() {
invisible(callJMethod(catalog, "clearCache"))
}

#' (Deprecated) Drop Temporary Table
#'
#' Drops the temporary table with the given table name in the catalog.
#' If the table has been cached/persisted before, it's also unpersisted.
#'
#' @param tableName The name of the SparkSQL table to be dropped.
#' @seealso \link{dropTempView}
#' @rdname dropTempTable-deprecated
#' @examples
#' \dontrun{
#' sparkR.session()
#' df <- read.df(path, "parquet")
#' createOrReplaceTempView(df, "table")
#' dropTempTable("table")
#' }
#' @name dropTempTable
#' @note dropTempTable since 1.4.0
dropTempTable <- function(tableName) {
.Deprecated("dropTempView", old = "dropTempTable")
if (class(tableName) != "character") {
stop("tableName must be a string.")
}
dropTempView(tableName)
}

#' Drops the temporary view with the given view name in the catalog.
#'
#' Drops the temporary view with the given view name in the catalog.
Expand Down
3 changes: 3 additions & 0 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
#' @rdname printSchema
setGeneric("printSchema", function(x) { standardGeneric("printSchema") })

#' @rdname registerTempTable-deprecated
setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })

#' @rdname rename
setGeneric("rename", function(x, ...) { standardGeneric("rename") })

Expand Down
98 changes: 98 additions & 0 deletions R/pkg/R/sparkR.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,49 @@ sparkR.stop <- function() {
sparkR.session.stop()
}

#' (Deprecated) Initialize a new Spark Context
#'
#' This function initializes a new SparkContext.
#'
#' @param master The Spark master URL
#' @param appName Application name to register with cluster manager
#' @param sparkHome Spark Home directory
#' @param sparkEnvir Named list of environment variables to set on worker nodes
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors
#' @param sparkJars Character vector of jar files to pass to the worker nodes
#' @param sparkPackages Character vector of package coordinates
#' @seealso \link{sparkR.session}
#' @rdname sparkR.init-deprecated
#' @examples
#'\dontrun{
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark")
#' sc <- sparkR.init("local[2]", "SparkR", "/home/spark",
#' list(spark.executor.memory="1g"))
#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
#' list(spark.executor.memory="4g"),
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
#' c("one.jar", "two.jar", "three.jar"),
#' c("com.databricks:spark-avro_2.11:2.0.1"))
#'}
#' @note sparkR.init since 1.4.0
sparkR.init <- function(
master = "",
appName = "SparkR",
sparkHome = Sys.getenv("SPARK_HOME"),
sparkEnvir = list(),
sparkExecutorEnv = list(),
sparkJars = "",
sparkPackages = "") {
.Deprecated("sparkR.session")
sparkR.sparkContext(master,
appName,
sparkHome,
convertNamedListToEnv(sparkEnvir),
convertNamedListToEnv(sparkExecutorEnv),
sparkJars,
sparkPackages)
}

# Internal function to handle creating the SparkContext.
sparkR.sparkContext <- function(
master = "",
Expand Down Expand Up @@ -229,6 +272,61 @@ sparkR.sparkContext <- function(
sc
}

#' (Deprecated) Initialize a new SQLContext
#'
#' This function creates a SparkContext from an existing JavaSparkContext and
#' then uses it to initialize a new SQLContext
#'
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
#' This API is deprecated and kept for backward compatibility only.
#'
#' @param jsc The existing JavaSparkContext created with SparkR.init()
#' @seealso \link{sparkR.session}
#' @rdname sparkRSQL.init-deprecated
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
#'}
#' @note sparkRSQL.init since 1.4.0
sparkRSQL.init <- function(jsc = NULL) {
.Deprecated("sparkR.session")

if (exists(".sparkRsession", envir = .sparkREnv)) {
return(get(".sparkRsession", envir = .sparkREnv))
}

# Default to without Hive support for backward compatibility.
sparkR.session(enableHiveSupport = FALSE)
}

#' (Deprecated) Initialize a new HiveContext
#'
#' This function creates a HiveContext from an existing JavaSparkContext
#'
#' Starting SparkR 2.0, a SparkSession is initialized and returned instead.
#' This API is deprecated and kept for backward compatibility only.
#'
#' @param jsc The existing JavaSparkContext created with SparkR.init()
#' @seealso \link{sparkR.session}
#' @rdname sparkRHive.init-deprecated
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRHive.init(sc)
#'}
#' @note sparkRHive.init since 1.4.0
sparkRHive.init <- function(jsc = NULL) {
.Deprecated("sparkR.session")

if (exists(".sparkRsession", envir = .sparkREnv)) {
return(get(".sparkRsession", envir = .sparkREnv))
}

# Default to without Hive support for backward compatibility.
sparkR.session(enableHiveSupport = TRUE)
}

#' Get the existing SparkSession or initialize a new SparkSession.
#'
#' SparkSession is the entry point into SparkR. \code{sparkR.session} gets the existing
Expand Down
13 changes: 11 additions & 2 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ if (is_windows()) {
Sys.setenv(TZ = "GMT")
}

test_that("calling sparkRSQL.init returns existing SQL context", {
sqlContext <- suppressWarnings(sparkRSQL.init(sc))
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
})

test_that("calling sparkRSQL.init returns existing SparkSession", {
expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
})

test_that("calling sparkR.session returns existing SparkSession", {
expect_equal(sparkR.session(), sparkSession)
})
Expand Down Expand Up @@ -656,10 +665,10 @@ test_that("test tableNames and tables", {
expect_true("tableName" %in% colnames(tables()))
expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))

createOrReplaceTempView(df, "table2")
suppressWarnings(registerTempTable(df, "table2"))
tables <- listTables()
expect_equal(count(tables), count + 2)
dropTempView("table1")
suppressWarnings(dropTempTable("table1"))
expect_true(dropTempView("table2"))

tables <- listTables()
Expand Down
3 changes: 1 addition & 2 deletions docs/sparkr-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.

## Upgrading from SparkR 2.4 to 3.0

- The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
- The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `jsonRDD` have been removed. Use `read.parquet`, `write.parquet`, `read.json` instead.

## Upgrading from SparkR 2.3 to 2.4

Expand Down