From e7e471c58948c4d9829ea77d35318320e26fe411 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 18 Jun 2016 23:59:07 -0700
Subject: [PATCH 1/3] Add `spark_partition_id` in SparkR

---
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/functions.R                       | 21 +++++++++++++++++++++
 R/pkg/R/generics.R                        |  4 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  1 +
 4 files changed, 27 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 82e56ca43729..1bc27ba88fe0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -258,6 +258,7 @@ exportMethods("%in%",
               "skewness",
               "sort_array",
               "soundex",
+              "spark_partition_id",
               "stddev",
               "stddev_pop",
               "stddev_samp",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a779127b379a..5b34082289a6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1179,6 +1179,27 @@ setMethod("soundex",
             column(jc)
           })
 
+#' spark_partition_id
+#'
+#' Return the column for partition ID of the Spark task.
+#' Note that this is indeterministic because it depends on data partitioning and
+#' task scheduling.
+#'
+#' This is equivalent to the SPARK_PARTITION_ID function in SQL.
+#'
+#' @rdname spark_partition_id
+#' @name spark_partition_id
+#' @export
+#' @examples
+#' \dontrun{select(df, spark_partition_id())}
+#' @note spark_partition_id since 2.0.0
+setMethod("spark_partition_id",
+          signature(x = "missing"),
+          function() {
+            jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id")
+            column(jc)
+          })
+
 #' @rdname sd
 #' @name stddev
 setMethod("stddev",
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6e754afab6c6..8abd18d35e66 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1126,6 +1126,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array")
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
+#' @rdname spark_partition_id
+#' @export
+setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") })
+
 #' @rdname sd
 #' @export
 setGeneric("stddev", function(x) { standardGeneric("stddev") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index fcc2ab3ed6a2..53097f34d7a3 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1058,6 +1058,7 @@ test_that("column functions", {
   c16 <- is.nan(c) + isnan(c) + isNaN(c)
   c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
   c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
+  c19 <- spark_partition_id()
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

From cbd54b29883a4a8a0bfa9e5e6db97e30e764d277 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 20 Jun 2016 11:52:23 -0700
Subject: [PATCH 2/3] Use new title/description convention.

---
 R/pkg/R/functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5b34082289a6..5d352ace1aef 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1179,7 +1179,7 @@ setMethod("soundex",
             column(jc)
           })
 
-#' spark_partition_id
+#' Return the partition ID as a column
 #'
 #' Return the column for partition ID of the Spark task.
 #' Note that this is indeterministic because it depends on data partitioning and

From ddb21023fcc998001fbf2cc152bd97bbb1a09c7b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 20 Jun 2016 12:23:16 -0700
Subject: [PATCH 3/3] Update function description.

---
 R/pkg/R/functions.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5d352ace1aef..064edf2468dc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1181,8 +1181,8 @@ setMethod("soundex",
 
 #' Return the partition ID as a column
 #'
-#' Return the column for partition ID of the Spark task.
-#' Note that this is indeterministic because it depends on data partitioning and
+#' Return the partition ID of the Spark task as a SparkDataFrame column.
+#' Note that this is nondeterministic because it depends on data partitioning and
 #' task scheduling.
 #'
 #' This is equivalent to the SPARK_PARTITION_ID function in SQL.