Skip to content

Commit a696dcc

Browse files
author
pgandhi
committed
2 parents 3111166 + bbbdaa8 commit a696dcc

File tree

441 files changed

+21178
-16782
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

441 files changed

+21178
-16782
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ target/
7777
unit-tests.log
7878
work/
7979
docs/.jekyll-metadata
80-
*.crc
8180

8281
# For Hive
8382
TempStatsStore/

R/WINDOWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
To build SparkR on Windows, the following steps are required
44

55
1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
6-
include Rtools and R in `PATH`.
6+
include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
77

88
2. Install
99
[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ URL: http://www.apache.org/ http://spark.apache.org/
1515
BugReports: http://spark.apache.org/contributing.html
1616
SystemRequirements: Java (== 8)
1717
Depends:
18-
R (>= 3.0),
18+
R (>= 3.1),
1919
methods
2020
Suggests:
2121
knitr,

R/pkg/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ exportMethods("%<=>%",
194194
"acos",
195195
"add_months",
196196
"alias",
197+
"approx_count_distinct",
197198
"approxCountDistinct",
198199
"approxQuantile",
199200
"array_contains",
@@ -252,6 +253,7 @@ exportMethods("%<=>%",
252253
"dayofweek",
253254
"dayofyear",
254255
"decode",
256+
"degrees",
255257
"dense_rank",
256258
"desc",
257259
"element_at",
@@ -334,6 +336,7 @@ exportMethods("%<=>%",
334336
"posexplode",
335337
"posexplode_outer",
336338
"quarter",
339+
"radians",
337340
"rand",
338341
"randn",
339342
"rank",

R/pkg/R/functions.R

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ NULL
112112
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
113113
#' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp),
114114
#' v3 = bround(df$wt, 1), v4 = bin(df$cyl),
115-
#' v5 = hex(df$wt), v6 = toDegrees(df$gear),
115+
#' v5 = hex(df$wt), v6 = degrees(df$gear),
116116
#' v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am),
117117
#' v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1),
118118
#' v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5),
@@ -320,23 +320,37 @@ setMethod("acos",
320320
})
321321

322322
#' @details
323-
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
323+
#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
324324
#'
325325
#' @rdname column_aggregate_functions
326-
#' @aliases approxCountDistinct approxCountDistinct,Column-method
326+
#' @aliases approx_count_distinct approx_count_distinct,Column-method
327327
#' @examples
328328
#'
329329
#' \dontrun{
330-
#' head(select(df, approxCountDistinct(df$gear)))
331-
#' head(select(df, approxCountDistinct(df$gear, 0.02)))
330+
#' head(select(df, approx_count_distinct(df$gear)))
331+
#' head(select(df, approx_count_distinct(df$gear, 0.02)))
332332
#' head(select(df, countDistinct(df$gear, df$cyl)))
333333
#' head(select(df, n_distinct(df$gear)))
334334
#' head(distinct(select(df, "gear")))}
335+
#' @note approx_count_distinct(Column) since 3.0.0
336+
setMethod("approx_count_distinct",
337+
signature(x = "Column"),
338+
function(x) {
339+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340+
column(jc)
341+
})
342+
343+
#' @details
344+
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
345+
#'
346+
#' @rdname column_aggregate_functions
347+
#' @aliases approxCountDistinct approxCountDistinct,Column-method
335348
#' @note approxCountDistinct(Column) since 1.4.0
336349
setMethod("approxCountDistinct",
337350
signature(x = "Column"),
338351
function(x) {
339-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
352+
.Deprecated("approx_count_distinct")
353+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340354
column(jc)
341355
})
342356

@@ -1651,7 +1665,22 @@ setMethod("tanh",
16511665
setMethod("toDegrees",
16521666
signature(x = "Column"),
16531667
function(x) {
1654-
jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
1668+
.Deprecated("degrees")
1669+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
1670+
column(jc)
1671+
})
1672+
1673+
#' @details
1674+
#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle
1675+
#' measured in degrees.
1676+
#'
1677+
#' @rdname column_math_functions
1678+
#' @aliases degrees degrees,Column-method
1679+
#' @note degrees since 3.0.0
1680+
setMethod("degrees",
1681+
signature(x = "Column"),
1682+
function(x) {
1683+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
16551684
column(jc)
16561685
})
16571686

@@ -1665,7 +1694,22 @@ setMethod("toDegrees",
16651694
setMethod("toRadians",
16661695
signature(x = "Column"),
16671696
function(x) {
1668-
jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
1697+
.Deprecated("radians")
1698+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
1699+
column(jc)
1700+
})
1701+
1702+
#' @details
1703+
#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle
1704+
#' measured in radians.
1705+
#'
1706+
#' @rdname column_math_functions
1707+
#' @aliases radians radians,Column-method
1708+
#' @note radians since 3.0.0
1709+
setMethod("radians",
1710+
signature(x = "Column"),
1711+
function(x) {
1712+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
16691713
column(jc)
16701714
})
16711715

@@ -2065,13 +2109,24 @@ setMethod("pmod", signature(y = "Column"),
20652109

20662110
#' @param rsd maximum estimation error allowed (default = 0.05).
20672111
#'
2112+
#' @rdname column_aggregate_functions
2113+
#' @aliases approx_count_distinct,Column-method
2114+
#' @note approx_count_distinct(Column, numeric) since 3.0.0
2115+
setMethod("approx_count_distinct",
2116+
signature(x = "Column"),
2117+
function(x, rsd = 0.05) {
2118+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
2119+
column(jc)
2120+
})
2121+
20682122
#' @rdname column_aggregate_functions
20692123
#' @aliases approxCountDistinct,Column-method
20702124
#' @note approxCountDistinct(Column, numeric) since 1.4.0
20712125
setMethod("approxCountDistinct",
20722126
signature(x = "Column"),
20732127
function(x, rsd = 0.05) {
2074-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
2128+
.Deprecated("approx_count_distinct")
2129+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
20752130
column(jc)
20762131
})
20772132

R/pkg/R/generics.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
746746
#' @name NULL
747747
setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
748748

749+
#' @rdname column_aggregate_functions
750+
#' @name NULL
751+
setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") })
752+
749753
#' @rdname column_aggregate_functions
750754
#' @name NULL
751755
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
@@ -1287,10 +1291,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
12871291
#' @name NULL
12881292
setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
12891293

1294+
#' @rdname column_math_functions
1295+
#' @name NULL
1296+
setGeneric("degrees", function(x) { standardGeneric("degrees") })
1297+
12901298
#' @rdname column_math_functions
12911299
#' @name NULL
12921300
setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
12931301

1302+
#' @rdname column_math_functions
1303+
#' @name NULL
1304+
setGeneric("radians", function(x) { standardGeneric("radians") })
1305+
12941306
#' @rdname column_math_functions
12951307
#' @name NULL
12961308
setGeneric("toRadians", function(x) { standardGeneric("toRadians") })

R/pkg/inst/profile/general.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#
1717

1818
.First <- function() {
19+
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20+
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21+
}
22+
1923
packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
2024
dirs <- strsplit(packageDir, ",")[[1]]
2125
.libPaths(c(dirs, .libPaths()))

R/pkg/inst/profile/shell.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#
1717

1818
.First <- function() {
19+
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20+
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21+
}
22+
1923
home <- Sys.getenv("SPARK_HOME")
2024
.libPaths(c(file.path(home, "R", "lib"), .libPaths()))
2125
Sys.setenv(NOAWT = 1)

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,7 @@ test_that("column operators", {
13791379

13801380
test_that("column functions", {
13811381
c <- column("a")
1382-
c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
1382+
c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
13831383
c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
13841384
c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
13851385
c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
@@ -1388,7 +1388,7 @@ test_that("column functions", {
13881388
c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
13891389
c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
13901390
c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
1391-
c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
1391+
c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
13921392
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
13931393
c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
13941394
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)

R/pkg/vignettes/sparkr-vignettes.Rmd

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,20 @@ First, let's load and attach the package.
5757
library(SparkR)
5858
```
5959

60+
```{r, include=FALSE}
61+
# disable eval if java version not supported
62+
override_eval <- tryCatch(!is.numeric(SparkR:::checkJavaVersion()),
63+
error = function(e) { TRUE },
64+
warning = function(e) { TRUE })
65+
66+
if (override_eval) {
67+
opts_hooks$set(eval = function(options) {
68+
options$eval = FALSE
69+
options
70+
})
71+
}
72+
```
73+
6074
`SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc.
6175

6276
We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession).

0 commit comments

Comments
 (0)