Skip to content

Commit e6b4615

Browse files
authored
Merge branch 'master' into lir_instr
2 parents c8693d8 + 3ccabdf commit e6b4615

File tree

167 files changed

+6531
-4123
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+6531
-4123
lines changed

R/pkg/DESCRIPTION

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: SparkR
22
Type: Package
3-
Version: 2.1.0
3+
Version: 2.2.0
44
Title: R Frontend for Apache Spark
55
Description: The SparkR package provides an R Frontend for Apache Spark.
66
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
@@ -41,7 +41,13 @@ Collate:
4141
'functions.R'
4242
'install.R'
4343
'jvm.R'
44-
'mllib.R'
44+
'mllib_classification.R'
45+
'mllib_clustering.R'
46+
'mllib_recommendation.R'
47+
'mllib_regression.R'
48+
'mllib_stat.R'
49+
'mllib_tree.R'
50+
'mllib_utils.R'
4551
'serialize.R'
4652
'sparkR.R'
4753
'stats.R'

R/pkg/R/DataFrame.R

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,9 +2313,9 @@ setMethod("dropDuplicates",
23132313
#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
23142314
#' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
23152315
#' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
2316-
#' @param joinType The type of join to perform. The following join types are available:
2317-
#' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
2318-
#' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
2316+
#' @param joinType The type of join to perform, default 'inner'.
2317+
#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
2318+
#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
23192319
#' @return A SparkDataFrame containing the result of the join operation.
23202320
#' @family SparkDataFrame functions
23212321
#' @aliases join,SparkDataFrame,SparkDataFrame-method
@@ -2344,15 +2344,18 @@ setMethod("join",
23442344
if (is.null(joinType)) {
23452345
sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc)
23462346
} else {
2347-
if (joinType %in% c("inner", "outer", "full", "fullouter",
2348-
"leftouter", "left_outer", "left",
2349-
"rightouter", "right_outer", "right", "leftsemi")) {
2347+
if (joinType %in% c("inner", "cross",
2348+
"outer", "full", "fullouter", "full_outer",
2349+
"left", "leftouter", "left_outer",
2350+
"right", "rightouter", "right_outer",
2351+
"left_semi", "leftsemi", "left_anti", "leftanti")) {
23502352
joinType <- gsub("_", "", joinType)
23512353
sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType)
23522354
} else {
23532355
stop("joinType must be one of the following types: ",
2354-
"'inner', 'outer', 'full', 'fullouter', 'leftouter', 'left_outer', 'left',
2355-
'rightouter', 'right_outer', 'right', 'leftsemi'")
2356+
"'inner', 'cross', 'outer', 'full', 'full_outer',",
2357+
"'left', 'left_outer', 'right', 'right_outer',",
2358+
"'left_semi', or 'left_anti'.")
23562359
}
23572360
}
23582361
}

R/pkg/R/functions.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3150,7 +3150,8 @@ setMethod("cume_dist",
31503150
#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
31513151
#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
31523152
#' and had three people tie for second place, you would say that all three were in second
3153-
#' place and that the next person came in third.
3153+
#' place and that the next person came in third. Rank would give me sequential numbers, making
3154+
#' the person that came in third place (after the ties) would register as coming in fifth.
31543155
#'
31553156
#' This is equivalent to the \code{DENSE_RANK} function in SQL.
31563157
#'
@@ -3321,10 +3322,11 @@ setMethod("percent_rank",
33213322
#'
33223323
#' Window function: returns the rank of rows within a window partition.
33233324
#'
3324-
#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
3325-
#' sequence when there are ties. That is, if you were ranking a competition using denseRank
3325+
#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
3326+
#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
33263327
#' and had three people tie for second place, you would say that all three were in second
3327-
#' place and that the next person came in third.
3328+
#' place and that the next person came in third. Rank would give me sequential numbers, making
3329+
#' the person that came in third place (after the ties) would register as coming in fifth.
33283330
#'
33293331
#' This is equivalent to the RANK function in SQL.
33303332
#'

0 commit comments

Comments
 (0)