Skip to content

Commit a8fde87

Browse files
committed
Initilize local master branch.
2 parents 4dd1c8a + bb870e7 commit a8fde87

File tree

571 files changed

+15236
-7381
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

571 files changed

+15236
-7381
lines changed

.rat-excludes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,5 @@ help/*
9191
html/*
9292
INDEX
9393
.lintr
94+
gen-java.*
95+
.*avpr

R/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R
66

77
#### Build Spark
88

9-
Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-PsparkR` profile to build the R package. For example to use the default Hadoop versions you can run
9+
Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
1010
```
1111
build/mvn -DskipTests -Psparkr package
1212
```

R/install-dev.bat

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,8 @@ set SPARK_HOME=%~dp0..
2525
MKDIR %SPARK_HOME%\R\lib
2626

2727
R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" %SPARK_HOME%\R\pkg\
28+
29+
rem Zip the SparkR package so that it can be distributed to worker nodes on YARN
30+
pushd %SPARK_HOME%\R\lib
31+
%JAVA_HOME%\bin\jar.exe cfM "%SPARK_HOME%\R\lib\sparkr.zip" SparkR
32+
popd

R/install-dev.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,16 @@ LIB_DIR="$FWDIR/lib"
3434

3535
mkdir -p $LIB_DIR
3636

37-
pushd $FWDIR
37+
pushd $FWDIR > /dev/null
3838

3939
# Generate Rd files if devtools is installed
4040
Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
4141

4242
# Install SparkR to $LIB_DIR
4343
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
4444

45-
popd
45+
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
46+
cd $LIB_DIR
47+
jar cfM "$LIB_DIR/sparkr.zip" SparkR
48+
49+
popd > /dev/null

R/pkg/DESCRIPTION

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,3 @@ Collate:
3232
'serialize.R'
3333
'sparkR.R'
3434
'utils.R'
35-
'zzz.R'

R/pkg/R/RDD.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
165165
serializedFuncArr,
166166
rdd@env$prev_serializedMode,
167167
packageNamesArr,
168-
as.character(.sparkREnv[["libname"]]),
169168
broadcastArr,
170169
callJMethod(prev_jrdd, "classTag"))
171170
} else {
@@ -175,7 +174,6 @@ setMethod("getJRDD", signature(rdd = "PipelinedRDD"),
175174
rdd@env$prev_serializedMode,
176175
serializedMode,
177176
packageNamesArr,
178-
as.character(.sparkREnv[["libname"]]),
179177
broadcastArr,
180178
callJMethod(prev_jrdd, "classTag"))
181179
}

R/pkg/R/SQLContext.R

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ infer_type <- function(x) {
8686
createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
8787
if (is.data.frame(data)) {
8888
# get the names of columns, they will be put into RDD
89-
schema <- names(data)
89+
if (is.null(schema)) {
90+
schema <- names(data)
91+
}
9092
n <- nrow(data)
9193
m <- ncol(data)
9294
# get rid of factor type

R/pkg/R/generics.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
# @rdname aggregateRDD
2121
# @seealso reduce
2222
# @export
23-
setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
23+
setGeneric("aggregateRDD",
24+
function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
2425

2526
# @rdname cache-methods
2627
# @export

R/pkg/R/pairRDD.R

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ setMethod("partitionBy",
215215
serializedHashFuncBytes,
216216
getSerializedMode(x),
217217
packageNamesArr,
218-
as.character(.sparkREnv$libname),
219218
broadcastArr,
220219
callJMethod(jrdd, "classTag"))
221220

@@ -560,8 +559,8 @@ setMethod("join",
560559
# Left outer join two RDDs
561560
#
562561
# @description
563-
# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
564-
# The key types of the two RDDs should be the same.
562+
# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of
563+
# the form list(K, V). The key types of the two RDDs should be the same.
565564
#
566565
# @param x An RDD to be joined. Should be an RDD where each element is
567566
# list(K, V).
@@ -597,8 +596,8 @@ setMethod("leftOuterJoin",
597596
# Right outer join two RDDs
598597
#
599598
# @description
600-
# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
601-
# The key types of the two RDDs should be the same.
599+
# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of
600+
# the form list(K, V). The key types of the two RDDs should be the same.
602601
#
603602
# @param x An RDD to be joined. Should be an RDD where each element is
604603
# list(K, V).
@@ -634,8 +633,8 @@ setMethod("rightOuterJoin",
634633
# Full outer join two RDDs
635634
#
636635
# @description
637-
# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
638-
# The key types of the two RDDs should be the same.
636+
# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of
637+
# the form list(K, V). The key types of the two RDDs should be the same.
639638
#
640639
# @param x An RDD to be joined. Should be an RDD where each element is
641640
# list(K, V).

R/pkg/R/sparkR.R

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717

1818
.sparkREnv <- new.env()
1919

20-
sparkR.onLoad <- function(libname, pkgname) {
21-
.sparkREnv$libname <- libname
22-
}
23-
2420
# Utility function that returns TRUE if we have an active connection to the
2521
# backend and FALSE otherwise
2622
connExists <- function(env) {
@@ -80,7 +76,6 @@ sparkR.stop <- function() {
8076
#' @param sparkEnvir Named list of environment variables to set on worker nodes.
8177
#' @param sparkExecutorEnv Named list of environment variables to be used when launching executors.
8278
#' @param sparkJars Character string vector of jar files to pass to the worker nodes.
83-
#' @param sparkRLibDir The path where R is installed on the worker nodes.
8479
#' @param sparkPackages Character string vector of packages from spark-packages.org
8580
#' @export
8681
#' @examples
@@ -101,11 +96,11 @@ sparkR.init <- function(
10196
sparkEnvir = list(),
10297
sparkExecutorEnv = list(),
10398
sparkJars = "",
104-
sparkRLibDir = "",
10599
sparkPackages = "") {
106100

107101
if (exists(".sparkRjsc", envir = .sparkREnv)) {
108-
cat("Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n")
102+
cat(paste("Re-using existing Spark Context.",
103+
"Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n"))
109104
return(get(".sparkRjsc", envir = .sparkREnv))
110105
}
111106

@@ -169,25 +164,23 @@ sparkR.init <- function(
169164
sparkHome <- normalizePath(sparkHome)
170165
}
171166

172-
if (nchar(sparkRLibDir) != 0) {
173-
.sparkREnv$libname <- sparkRLibDir
174-
}
175-
176167
sparkEnvirMap <- new.env()
177168
for (varname in names(sparkEnvir)) {
178169
sparkEnvirMap[[varname]] <- sparkEnvir[[varname]]
179170
}
180171

181172
sparkExecutorEnvMap <- new.env()
182173
if (!any(names(sparkExecutorEnv) == "LD_LIBRARY_PATH")) {
183-
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
174+
sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <-
175+
paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
184176
}
185177
for (varname in names(sparkExecutorEnv)) {
186178
sparkExecutorEnvMap[[varname]] <- sparkExecutorEnv[[varname]]
187179
}
188180

189181
nonEmptyJars <- Filter(function(x) { x != "" }, jars)
190-
localJarPaths <- sapply(nonEmptyJars, function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
182+
localJarPaths <- sapply(nonEmptyJars,
183+
function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
191184

192185
# Set the start time to identify jobjs
193186
# Seconds resolution is good enough for this purpose, so use ints

0 commit comments

Comments
 (0)