Skip to content

Commit 1254953

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into sparkr-df-api
Conflicts: R/pkg/R/DataFrame.R
2 parents 0521149 + 008a60d commit 1254953

File tree

191 files changed

+13060
-2836
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

191 files changed

+13060
-2836
lines changed

.rat-excludes

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,12 @@ logs
7474
.*scalastyle-output.xml
7575
.*dependency-reduced-pom.xml
7676
known_translations
77+
json_expectation
78+
local-1422981759269/*
79+
local-1422981780767/*
80+
local-1425081759269/*
81+
local-1426533911241/*
82+
local-1426633911242/*
83+
local-1430917381534/*
7784
DESCRIPTION
7885
NAMESPACE

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ Suggests:
1515
Description: R frontend for Spark
1616
License: Apache License (== 2.0)
1717
Collate:
18+
'schema.R'
1819
'generics.R'
1920
'jobj.R'
2021
'RDD.R'
2122
'pairRDD.R'
22-
'schema.R'
2323
'column.R'
2424
'group.R'
2525
'DataFrame.R'

R/pkg/NAMESPACE

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ exportMethods("arrange",
2828
"intersect",
2929
"isLocal",
3030
"join",
31-
"length",
3231
"limit",
3332
"orderBy",
3433
"mutate",
@@ -108,9 +107,6 @@ export("cacheTable",
108107
"tables",
109108
"uncacheTable")
110109

111-
export("sparkRSQL.init",
112-
"sparkRHive.init")
113-
114110
export("structField",
115111
"structField.jobj",
116112
"structField.character",

R/pkg/R/DataFrame.R

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
4545

4646
#' @rdname DataFrame
4747
#' @export
48+
#'
49+
#' @param sdf A Java object reference to the backing Scala DataFrame
50+
#' @param isCached TRUE if the dataFrame is cached
4851
dataFrame <- function(sdf, isCached = FALSE) {
4952
new("DataFrame", sdf, isCached)
5053
}
@@ -244,7 +247,7 @@ setMethod("columns",
244247
})
245248

246249
#' @rdname columns
247-
#' @export
250+
#' @aliases names,DataFrame,function-method
248251
setMethod("names",
249252
signature(x = "DataFrame"),
250253
function(x) {
@@ -399,23 +402,23 @@ setMethod("repartition",
399402
dataFrame(sdf)
400403
})
401404

402-
#' toJSON
403-
#'
404-
#' Convert the rows of a DataFrame into JSON objects and return an RDD where
405-
#' each element contains a JSON string.
406-
#'
407-
#' @param x A SparkSQL DataFrame
408-
#' @return A StringRRDD of JSON objects
409-
#' @rdname tojson
410-
#' @export
411-
#' @examples
412-
#'\dontrun{
413-
#' sc <- sparkR.init()
414-
#' sqlCtx <- sparkRSQL.init(sc)
415-
#' path <- "path/to/file.json"
416-
#' df <- jsonFile(sqlCtx, path)
417-
#' newRDD <- toJSON(df)
418-
#'}
405+
# toJSON
406+
#
407+
# Convert the rows of a DataFrame into JSON objects and return an RDD where
408+
# each element contains a JSON string.
409+
#
410+
#@param x A SparkSQL DataFrame
411+
# @return A StringRRDD of JSON objects
412+
# @rdname tojson
413+
# @export
414+
# @examples
415+
#\dontrun{
416+
# sc <- sparkR.init()
417+
# sqlCtx <- sparkRSQL.init(sc)
418+
# path <- "path/to/file.json"
419+
# df <- jsonFile(sqlCtx, path)
420+
# newRDD <- toJSON(df)
421+
#}
419422
setMethod("toJSON",
420423
signature(x = "DataFrame"),
421424
function(x) {
@@ -588,8 +591,8 @@ setMethod("limit",
588591
dataFrame(res)
589592
})
590593

591-
# Take the first NUM rows of a DataFrame and return a the results as a data.frame
592-
594+
#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
595+
#'
593596
#' @rdname take
594597
#' @export
595598
#' @examples
@@ -654,22 +657,22 @@ setMethod("first",
654657
take(x, 1)
655658
})
656659

657-
#' toRDD()
658-
#'
659-
#' Converts a Spark DataFrame to an RDD while preserving column names.
660-
#'
661-
#' @param x A Spark DataFrame
662-
#'
663-
#' @rdname DataFrame
664-
#' @export
665-
#' @examples
666-
#'\dontrun{
667-
#' sc <- sparkR.init()
668-
#' sqlCtx <- sparkRSQL.init(sc)
669-
#' path <- "path/to/file.json"
670-
#' df <- jsonFile(sqlCtx, path)
671-
#' rdd <- toRDD(df)
672-
#' }
660+
# toRDD()
661+
#
662+
# Converts a Spark DataFrame to an RDD while preserving column names.
663+
#
664+
# @param x A Spark DataFrame
665+
#
666+
# @rdname DataFrame
667+
# @export
668+
# @examples
669+
#\dontrun{
670+
# sc <- sparkR.init()
671+
# sqlCtx <- sparkRSQL.init(sc)
672+
# path <- "path/to/file.json"
673+
# df <- jsonFile(sqlCtx, path)
674+
# rdd <- toRDD(df)
675+
# }
673676
setMethod("toRDD",
674677
signature(x = "DataFrame"),
675678
function(x) {
@@ -725,6 +728,7 @@ setMethod("group_by",
725728
#'
726729
#' Compute aggregates by specifying a list of columns
727730
#'
731+
#' @param x a DataFrame
728732
#' @rdname DataFrame
729733
#' @alias summarize
730734
#' @export
@@ -749,53 +753,53 @@ setMethod("summarize",
749753
# the requested map function. #
750754
###################################################################################
751755

752-
#' @rdname lapply
756+
# @rdname lapply
753757
setMethod("lapply",
754758
signature(X = "DataFrame", FUN = "function"),
755759
function(X, FUN) {
756760
rdd <- toRDD(X)
757761
lapply(rdd, FUN)
758762
})
759763

760-
#' @rdname lapply
764+
# @rdname lapply
761765
setMethod("map",
762766
signature(X = "DataFrame", FUN = "function"),
763767
function(X, FUN) {
764768
lapply(X, FUN)
765769
})
766770

767-
#' @rdname flatMap
771+
# @rdname flatMap
768772
setMethod("flatMap",
769773
signature(X = "DataFrame", FUN = "function"),
770774
function(X, FUN) {
771775
rdd <- toRDD(X)
772776
flatMap(rdd, FUN)
773777
})
774778

775-
#' @rdname lapplyPartition
779+
# @rdname lapplyPartition
776780
setMethod("lapplyPartition",
777781
signature(X = "DataFrame", FUN = "function"),
778782
function(X, FUN) {
779783
rdd <- toRDD(X)
780784
lapplyPartition(rdd, FUN)
781785
})
782786

783-
#' @rdname lapplyPartition
787+
# @rdname lapplyPartition
784788
setMethod("mapPartitions",
785789
signature(X = "DataFrame", FUN = "function"),
786790
function(X, FUN) {
787791
lapplyPartition(X, FUN)
788792
})
789793

790-
#' @rdname foreach
794+
# @rdname foreach
791795
setMethod("foreach",
792796
signature(x = "DataFrame", func = "function"),
793797
function(x, func) {
794798
rdd <- toRDD(x)
795799
foreach(rdd, func)
796800
})
797801

798-
#' @rdname foreach
802+
# @rdname foreach
799803
setMethod("foreachPartition",
800804
signature(x = "DataFrame", func = "function"),
801805
function(x, func) {
@@ -816,6 +820,7 @@ setMethod("$", signature(x = "DataFrame"),
816820
getColumn(x, name)
817821
})
818822

823+
#' @rdname select
819824
setMethod("$<-", signature(x = "DataFrame"),
820825
function(x, name, value) {
821826
stopifnot(class(value) == "Column" || is.null(value))
@@ -1112,7 +1117,6 @@ setMethod("arrange",
11121117

11131118
#' @rdname arrange
11141119
#' @aliases orderBy,DataFrame,function-method
1115-
#' @export
11161120
setMethod("orderBy",
11171121
signature(x = "DataFrame", col = "characterOrColumn"),
11181122
function(x, col) {
@@ -1149,7 +1153,7 @@ setMethod("filter",
11491153
})
11501154

11511155
#' @rdname filter
1152-
#' @export
1156+
#' @aliases where,DataFrame,function-method
11531157
setMethod("where",
11541158
signature(x = "DataFrame", condition = "characterOrColumn"),
11551159
function(x, condition) {

0 commit comments

Comments
 (0)