apache
diff --git a/‎LICENSE‎
Lines changed: 3 additions & 3 deletions b/‎LICENSE‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NOTICE‎
Lines changed: 0 additions & 1 deletion b/‎NOTICE‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎R/README.md‎
Lines changed: 5 additions & 5 deletions b/‎R/README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎R/pkg/DESCRIPTION‎
Lines changed: 3 additions & 1 deletion b/‎R/pkg/DESCRIPTION‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/pkg/NAMESPACE‎
Lines changed: 6 additions & 2 deletions b/‎R/pkg/NAMESPACE‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 63 additions & 0 deletions b/‎R/pkg/R/functions.R‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎R/pkg/R/generics.R‎
Lines changed: 12 additions & 0 deletions b/‎R/pkg/R/generics.R‎
Lines changed: 12 additions & 0 deletions
@@ -238,6 +238,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
+     (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
@@ -256,9 +257,8 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
      (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
      (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
-     (New BSD License) Kryo (com.esotericsoftware.kryo:kryo:2.21 - http://code.google.com/p/kryo/)
-     (New BSD License) MinLog (com.esotericsoftware.minlog:minlog:1.2 - http://code.google.com/p/minlog/)
-     (New BSD License) ReflectASM (com.esotericsoftware.reflectasm:reflectasm:1.07 - http://code.google.com/p/reflectasm/)
+     (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo)
+     (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog)
      (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf)
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
 
@@ -48,7 +48,6 @@ Eclipse Public License 1.0
 
 The following components are provided under the Eclipse Public License 1.0. See project link for details.
 
-     (Eclipse Public License - Version 1.0) mqtt-client (org.eclipse.paho:mqtt-client:0.4.0 - http://www.eclipse.org/paho/mqtt-client)
      (Eclipse Public License v1.0) Eclipse JDT Core (org.eclipse.jdt:core:3.1.1 - http://www.eclipse.org/jdt/)
 
 ========================================================================
 
@@ -40,7 +40,7 @@ To set other options like driver memory, executor memory etc. you can pass in th
 If you wish to use SparkR from RStudio or other R frontends you will need to set some environment variables which point SparkR to your Spark installation. For example 
 ```
 # Set this to where Spark is installed
-Sys.setenv(SPARK_HOME="/Users/shivaram/spark")
+Sys.setenv(SPARK_HOME="/Users/username/spark")
 # This line loads SparkR from the installed directory
 .libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
 library(SparkR)
@@ -51,7 +51,7 @@ sc <- sparkR.init(master="local")
 
 The [instructions](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) for making contributions to Spark also apply to SparkR.
 If you only make R file changes (i.e. no Scala changes) then you can just re-install the R package using `R/install-dev.sh` and test your changes.
-Once you have made your changes, please include unit tests for them and run existing unit tests using the `run-tests.sh` script as described below. 
+Once you have made your changes, please include unit tests for them and run existing unit tests using the `R/run-tests.sh` script as described below.
 
 #### Generating documentation
 
@@ -60,17 +60,17 @@ The SparkR documentation (Rd files and HTML files) are not a part of the source
 ### Examples, Unit tests
 
 SparkR comes with several sample programs in the `examples/src/main/r` directory.
-To run one of them, use `./bin/sparkR <filename> <args>`. For example:
+To run one of them, use `./bin/spark-submit <filename> <args>`. For example:
 
-    ./bin/sparkR examples/src/main/r/dataframe.R
+    ./bin/spark-submit examples/src/main/r/dataframe.R
 
 You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first):
 
     R -e 'install.packages("testthat", repos="http://cran.us.r-project.org")'
     ./R/run-tests.sh
 
 ### Running on YARN
-The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run
+The `./bin/spark-submit` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run
 ```
 export YARN_CONF_DIR=/etc/hadoop/conf
 ./bin/spark-submit --master yarn examples/src/main/r/dataframe.R
 
@@ -11,7 +11,9 @@ Depends:
     R (>= 3.0),
     methods,
 Suggests:
-    testthat
+    testthat,
+    e1071,
+    survival
 Description: R frontend for Spark
 License: Apache License (== 2.0)
 Collate:
 
@@ -15,7 +15,9 @@ exportMethods("glm",
               "predict",
               "summary",
               "kmeans",
-              "fitted")
+              "fitted",
+              "naiveBayes",
+              "survreg")
 
 # Job group lifecycle management methods
 export("setJobGroup",
@@ -263,6 +265,7 @@ exportMethods("%in%",
               "var_samp",
               "weekofyear",
               "when",
+              "window",
               "year")
 
 exportClasses("GroupedData")
@@ -289,7 +292,8 @@ export("as.DataFrame",
        "tableToDF",
        "tableNames",
        "tables",
-       "uncacheTable")
+       "uncacheTable",
+       "print.summary.GeneralizedLinearRegressionModel")
 
 export("structField",
        "structField.jobj",
 
@@ -2131,6 +2131,69 @@ setMethod("from_unixtime", signature(x = "Column"),
             column(jc)
           })
 
+#' window
+#'
+#' Bucketize rows into one or more time windows given a timestamp specifying column. Window
+#' starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+#' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
+#' the order of months are not supported.
+#'
+#' The time column must be of TimestampType.
+#'
+#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
+#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
+#' If the `slideDuration` is not provided, the windows will be tumbling windows.
+#'
+#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
+#' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
+#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
+#'
+#' The output column will be a struct called 'window' by default with the nested columns 'start'
+#' and 'end'.
+#'
+#' @family datetime_funcs
+#' @rdname window
+#' @name window
+#' @export
+#' @examples
+#'\dontrun{
+#'   # One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
+#'   # 09:00:25-09:01:25, 09:00:40-09:01:40, ...
+#'   window(df$time, "1 minute", "15 seconds", "10 seconds")
+#'
+#'   # One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
+#'    # 09:01:15-09:02:15...
+#'   window(df$time, "1 minute", startTime = "15 seconds")
+#'
+#'   # Thirty second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
+#'   window(df$time, "30 seconds", "10 seconds")
+#'}
+setMethod("window", signature(x = "Column"),
+          function(x, windowDuration, slideDuration = NULL, startTime = NULL) {
+            stopifnot(is.character(windowDuration))
+            if (!is.null(slideDuration) && !is.null(startTime)) {
+              stopifnot(is.character(slideDuration) && is.character(startTime))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, slideDuration, startTime)
+            } else if (!is.null(slideDuration)) {
+              stopifnot(is.character(slideDuration))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, slideDuration)
+            } else if (!is.null(startTime)) {
+              stopifnot(is.character(startTime))
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration, windowDuration, startTime)
+            } else {
+              jc <- callJStatic("org.apache.spark.sql.functions",
+                                "window",
+                                x@jc, windowDuration)
+            }
+            column(jc)
+          })
+
 #' locate
 #'
 #' Locate the position of the first occurrence of substr.
 
@@ -1152,6 +1152,10 @@ setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 #' @export
 setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 
+#' @rdname window
+#' @export
+setGeneric("window", function(x, ...) { standardGeneric("window") })
+
 #' @rdname year
 #' @export
 setGeneric("year", function(x) { standardGeneric("year") })
@@ -1175,3 +1179,11 @@ setGeneric("kmeans")
 #' @rdname fitted
 #' @export
 setGeneric("fitted")
+
+#' @rdname naiveBayes
+#' @export
+setGeneric("naiveBayes", function(formula, data, ...) { standardGeneric("naiveBayes") })
+
+#' @rdname survreg
+#' @export
+setGeneric("survreg", function(formula, data, ...) { standardGeneric("survreg") })