From 3b03c732fcf9425d257a52bb14e1f02c2e1882e2 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 4 Sep 2019 19:21:00 -0500 Subject: [PATCH 1/2] Fix DataFrameReader.json docs to doc that partition column can be numeric, date or timestamp type --- .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index c3c0642c85c2d..f901005ad4fcf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -265,7 +265,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * @param url JDBC database url of the form `jdbc:subprotocol:subname`. * @param table Name of the table in the external database. - * @param columnName the name of a column of integral type that will be used for partitioning. + * @param columnName the name of a column of numeric, date, or timestamp type + * that will be used for partitioning. * @param lowerBound the minimum value of `columnName` used to decide partition stride. * @param upperBound the maximum value of `columnName` used to decide partition stride. * @param numPartitions the number of partitions. This, along with `lowerBound` (inclusive), From 8935d4c2bc48225503e69bb152c5e26dd37617d3 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 4 Sep 2019 21:08:43 -0500 Subject: [PATCH 2/2] Additional related changes --- R/pkg/R/SQLContext.R | 3 ++- python/pyspark/sql/readwriter.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 69ddaab9eeb86..43ea27b359a9c 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -624,7 +624,8 @@ loadDF <- function(path = NULL, source = NULL, schema = NULL, ...) { #' #' @param url JDBC database url of the form \code{jdbc:subprotocol:subname} #' @param tableName the name of the table in the external database -#' @param partitionColumn the name of a column of integral type that will be used for partitioning +#' @param partitionColumn the name of a column of numeric, date, or timestamp type +#' that will be used for partitioning. #' @param lowerBound the minimum value of \code{partitionColumn} used to decide partition stride #' @param upperBound the maximum value of \code{partitionColumn} used to decide partition stride #' @param numPartitions the number of partitions, This, along with \code{lowerBound} (inclusive), diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 1bda5f32bc015..e51ff9bad0746 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -533,7 +533,8 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar :param url: a JDBC URL of the form ``jdbc:subprotocol:subname`` :param table: the name of the table - :param column: the name of an integer column that will be used for partitioning; + :param column: the name of a column of numeric, date, or timestamp type + that will be used for partitioning; if this parameter is specified, then ``numPartitions``, ``lowerBound`` (inclusive), and ``upperBound`` (exclusive) will form partition strides for generated WHERE clause expressions used to split the column