Skip to content

Commit b66534d

Browse files
Merge pull request apache#144 from shivaram/fix-rd-files
[SPARKR-176] Fix SparkR Rd files
2 parents 227ee42 + 32394de commit b66534d

34 files changed

+249
-203
lines changed

pkg/NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#exportPattern("^[[:alpha:]]+")
22
exportClasses("RDD")
3-
exportClasses("PipelinedRDD")
43
exportClasses("Broadcast")
54
exportMethods(
65
"cache",
@@ -67,3 +66,4 @@ export(
6766
export("sparkR.init")
6867
export("print.jobj")
6968
useDynLib(SparkR, stringHashCode)
69+
importFrom(methods, setGeneric, setMethod, setOldClass)

pkg/R/RDD.R

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ setOldClass("jobj")
88
#' @rdname RDD
99
#' @seealso parallelize, textFile
1010
#'
11-
#' @param env An R environment that stores bookkeeping states of the RDD
12-
#' @param jrdd Java object reference to the backing JavaRDD
11+
#' @slot env An R environment that stores bookkeeping states of the RDD
12+
#' @slot jrdd Java object reference to the backing JavaRDD
1313
#' @export
1414
setClass("RDD",
1515
slots = list(env = "environment",
@@ -81,13 +81,16 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val)
8181

8282
#' @rdname RDD
8383
#' @export
84+
#'
85+
#' @param jrdd Java object reference to the backing JavaRDD
86+
#' @param serialized TRUE if the RDD stores data serialized in R
87+
#' @param isCached TRUE if the RDD is cached
88+
#' @param isCheckpointed TRUE if the RDD has been checkpointed
8489
RDD <- function(jrdd, serialized = TRUE, isCached = FALSE,
8590
isCheckpointed = FALSE) {
8691
new("RDD", jrdd, serialized, isCached, isCheckpointed)
8792
}
8893

89-
#' @rdname PipelinedRDD
90-
#' @export
9194
PipelinedRDD <- function(prev, func) {
9295
new("PipelinedRDD", prev, func, NULL)
9396
}
@@ -414,7 +417,7 @@ setMethod("lookup",
414417

415418
#' Return the number of elements in the RDD.
416419
#'
417-
#' @param rdd The RDD to count
420+
#' @param x The RDD to count
418421
#' @return number of elements in the RDD.
419422
#' @rdname count
420423
#' @export
@@ -425,17 +428,17 @@ setMethod("lookup",
425428
#' count(rdd) # 10
426429
#' length(rdd) # Same as count
427430
#'}
428-
setGeneric("count", function(rdd) { standardGeneric("count") })
431+
setGeneric("count", function(x) { standardGeneric("count") })
429432

430433
#' @rdname count
431434
#' @aliases count,RDD-method
432435
setMethod("count",
433-
signature(rdd = "RDD"),
434-
function(rdd) {
436+
signature(x = "RDD"),
437+
function(x) {
435438
countPartition <- function(part) {
436439
as.integer(length(part))
437440
}
438-
valsRDD <- lapplyPartition(rdd, countPartition)
441+
valsRDD <- lapplyPartition(x, countPartition)
439442
vals <- collect(valsRDD)
440443
sum(as.integer(vals))
441444
})
@@ -511,6 +514,7 @@ setMethod("countByKey",
511514
#' @param FUN the transformation to apply on each element
512515
#' @return a new RDD created by the transformation.
513516
#' @rdname lapply
517+
#' @aliases lapply
514518
#' @export
515519
#' @examples
516520
#'\dontrun{
@@ -666,8 +670,8 @@ setMethod("mapPartitionsWithIndex",
666670
#' a predicate (i.e. returning TRUE in a given logical function).
667671
#' The same as `filter()' in Spark.
668672
#'
669-
#' @param rdd The RDD to be filtered.
670-
#' @param filterFunc A unary predicate function.
673+
#' @param x The RDD to be filtered.
674+
#' @param f A unary predicate function.
671675
#' @rdname filterRDD
672676
#' @export
673677
#' @examples
@@ -677,21 +681,22 @@ setMethod("mapPartitionsWithIndex",
677681
#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2)
678682
#'}
679683
setGeneric("filterRDD",
680-
function(rdd, filterFunc) { standardGeneric("filterRDD") })
684+
function(x, f) { standardGeneric("filterRDD") })
681685

682686
#' @rdname filterRDD
683687
#' @aliases filterRDD,RDD,function-method
684688
setMethod("filterRDD",
685-
signature(rdd = "RDD", filterFunc = "function"),
686-
function(rdd, filterFunc) {
689+
signature(x = "RDD", f = "function"),
690+
function(x, f) {
687691
filter.func <- function(part) {
688-
Filter(filterFunc, part)
692+
Filter(f, part)
689693
}
690-
lapplyPartition(rdd, filter.func)
694+
lapplyPartition(x, filter.func)
691695
})
692696

693697
#' @rdname filterRDD
694-
#' @aliases Filter,function,RDD-method
698+
#' @export
699+
#' @aliases Filter
695700
setMethod("Filter",
696701
signature(f = "function", x = "RDD"),
697702
function(f, x) {
@@ -802,9 +807,6 @@ setMethod("foreach",
802807

803808
#' Applies a function to each partition in an RDD, and force evaluation.
804809
#'
805-
#' @param rdd The RDD to apply the function
806-
#' @param func The function to be applied to partitions.
807-
#' @return invisible NULL.
808810
#' @export
809811
#' @rdname foreach
810812
#' @examples
@@ -1652,7 +1654,8 @@ setMethod("join",
16521654
#' sc <- sparkR.init()
16531655
#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
16541656
#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
1655-
#' leftOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
1657+
#' leftOuterJoin(rdd1, rdd2, 2L)
1658+
#' # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL)))
16561659
#'}
16571660
setGeneric("leftOuterJoin", function(rdd1, rdd2, numPartitions) { standardGeneric("leftOuterJoin") })
16581661

@@ -1721,7 +1724,8 @@ setMethod("leftOuterJoin",
17211724
#' sc <- sparkR.init()
17221725
#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
17231726
#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
1724-
#' rightOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
1727+
#' rightOuterJoin(rdd1, rdd2, 2L)
1728+
#' # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4)))
17251729
#'}
17261730
setGeneric("rightOuterJoin", function(rdd1, rdd2, numPartitions) { standardGeneric("rightOuterJoin") })
17271731

pkg/R/jobj.R

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,17 @@ jobj <- function(objId) {
3333
obj
3434
}
3535

36-
print.jobj <- function(jobj) {
37-
cls <- callJMethod(jobj, "getClass")
36+
#' Print a JVM object reference.
37+
#'
38+
#' This function prints the type and id for an object stored
39+
#' in the SparkR JVM backend.
40+
#'
41+
#' @param x The JVM object reference
42+
#' @param ... further arguments passed to or from other methods
43+
print.jobj <- function(x, ...) {
44+
cls <- callJMethod(x, "getClass")
3845
name <- callJMethod(cls, "getName")
39-
cat("Java ref type", name, "id", jobj$id, "\n", sep = " ")
46+
cat("Java ref type", name, "id", x$id, "\n", sep = " ")
4047
}
4148

4249
cleanup.jobj <- function(jobj) {

pkg/R/serialize.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ writeBoolean <- function(con, value) {
5151
}
5252

5353
writeRawSerialize <- function(outputCon, batch) {
54-
outputSer <- serialize(batch, ascii = FALSE, conn = NULL)
54+
outputSer <- serialize(batch, ascii = FALSE, connection = NULL)
5555
writeRaw(outputCon, outputSer)
5656
}
5757

pkg/R/sparkR.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ sparkR.stop <- function(env) {
5959
#' @param sparkEnvir Named list of environment variables to set on worker nodes.
6060
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors.
6161
#' @param sparkJars Character string vector of jar files to pass to the worker nodes.
62+
#' @param sparkRLibDir The path where R is installed on the worker nodes.
63+
#' @param sparkRBackendPort The port to use for SparkR JVM Backend.
6264
#' @export
6365
#' @examples
6466
#'\dontrun{
@@ -67,7 +69,7 @@ sparkR.stop <- function(env) {
6769
#' list(spark.executor.memory="1g"))
6870
#' sc <- sparkR.init("yarn-client", "SparkR", "/home/spark",
6971
#' list(spark.executor.memory="1g"),
70-
#' list(LD_LIBRARY_PATH="/directory of Java VM Library Files (libjvm.so) on worker nodes/"),
72+
#' list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
7173
#' c("jarfile1.jar","jarfile2.jar"))
7274
#'}
7375

pkg/R/sparkRBackend.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
# Returns TRUE if object is an instance of given class
55
isInstanceOf <- function(jobj, className) {
66
stopifnot(class(jobj) == "jobj")
7-
cls <- SparkR:::callJStatic("java.lang.Class", "forName", className)
8-
SparkR:::callJMethod(cls, "isInstance", jobj)
7+
cls <- callJStatic("java.lang.Class", "forName", className)
8+
callJMethod(cls, "isInstance", jobj)
99
}
1010

1111
# Call a Java method named methodName on the object

pkg/man/RDD.Rd

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,32 @@
1+
% Generated by roxygen2 (4.0.2): do not edit by hand
12
\docType{class}
23
\name{RDD-class}
34
\alias{RDD}
45
\alias{RDD-class}
56
\title{S4 class that represents an RDD}
67
\usage{
7-
RDD(jrdd, serialized = TRUE)
8+
RDD(jrdd, serialized = TRUE, isCached = FALSE, isCheckpointed = FALSE)
89
}
910
\arguments{
10-
\item{jrdd}{Java object reference to the backing JavaRDD}
11+
\item{jrdd}{Java object reference to the backing JavaRDD}
1112

12-
\item{serialized}{TRUE if the JavaRDD contains serialized
13-
R objects}
13+
\item{serialized}{TRUE if the RDD stores data serialized in R}
14+
15+
\item{isCached}{TRUE if the RDD is cached}
16+
17+
\item{isCheckpointed}{TRUE if the RDD has been checkpointed}
1418
}
1519
\description{
16-
RDD can be created using functions like \code{parallelize},
17-
\code{textFile} etc.
20+
RDD can be created using functions like
21+
\code{parallelize}, \code{textFile} etc.
1822
}
23+
\section{Slots}{
24+
25+
\describe{
26+
\item{\code{env}}{An R environment that stores bookkeeping states of the RDD}
27+
28+
\item{\code{jrdd}}{Java object reference to the backing JavaRDD}
29+
}}
1930
\seealso{
2031
parallelize, textFile
2132
}

pkg/man/broadcast-class.Rd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
% Generated by roxygen2 (4.0.2): do not edit by hand
12
\docType{class}
23
\name{Broadcast-class}
34
\alias{Broadcast}
@@ -7,18 +8,17 @@
78
Broadcast(id, value, jBroadcastRef, objName)
89
}
910
\arguments{
10-
\item{id}{Id of the backing Spark broadcast variable}
11+
\item{id}{Id of the backing Spark broadcast variable}
1112

12-
\item{value}{Value of the broadcast variable}
13+
\item{value}{Value of the broadcast variable}
1314

14-
\item{jBroadcastRef}{reference to the backing Java
15-
broadcast object}
15+
\item{jBroadcastRef}{reference to the backing Java broadcast object}
1616

17-
\item{objName}{name of broadcasted object}
17+
\item{objName}{name of broadcasted object}
1818
}
1919
\description{
2020
Broadcast variables can be created using the broadcast
21-
function from a \code{SparkContext}.
21+
function from a \code{SparkContext}.
2222
}
2323
\seealso{
2424
broadcast

pkg/man/broadcast-internal.Rd

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1+
% Generated by roxygen2 (4.0.2): do not edit by hand
12
\name{setBroadcastValue}
23
\alias{setBroadcastValue}
34
\title{Internal function to set values of a broadcast variable.}
45
\usage{
56
setBroadcastValue(bcastId, value)
67
}
78
\arguments{
8-
\item{bcastId}{The id of broadcast variable to set}
9+
\item{bcastId}{The id of broadcast variable to set}
910

10-
\item{value}{The value to be set}
11+
\item{value}{The value to be set}
1112
}
1213
\description{
13-
This function is used internally by Spark to set the value
14-
of a broadcast variable on workers. Not intended for use
15-
outside the package.
14+
This function is used internally by Spark to set the value of a broadcast
15+
variable on workers. Not intended for use outside the package.
1616
}
1717
\seealso{
1818
broadcast, value

pkg/man/broadcast.Rd

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
% Generated by roxygen2 (4.0.2): do not edit by hand
12
\docType{methods}
23
\name{value}
34
\alias{broadcast}
@@ -12,19 +13,18 @@ value(bcast)
1213
broadcast(sc, object)
1314
}
1415
\arguments{
15-
\item{bcast}{The broadcast variable to get}
16+
\item{bcast}{The broadcast variable to get}
1617

17-
\item{sc}{Spark Context to use}
18+
\item{sc}{Spark Context to use}
1819

19-
\item{object}{Object to be broadcast}
20+
\item{object}{Object to be broadcast}
2021
}
2122
\description{
22-
\code{value} can be used to get the value of a broadcast
23-
variable inside a distributed function.
23+
\code{value} can be used to get the value of a broadcast variable inside
24+
a distributed function.
2425

25-
Broadcast a read-only variable to the cluster, returning a
26-
\code{Broadcast} object for reading it in distributed
27-
functions.
26+
Broadcast a read-only variable to the cluster, returning a \code{Broadcast}
27+
object for reading it in distributed functions.
2828
}
2929
\examples{
3030
\dontrun{

0 commit comments

Comments
 (0)