[SPARK-7980] [SQL] Support SQLContext.range(end)

animesh · rxin · commit d053a31be93d · 2015-06-03T11:28:18.000-07:00
1. range() overloaded in SQLContext.scala 2. range() modified in python sql context.py 3. Tests added accordingly in DataFrameSuite.scala and python sql tests.py Author: animesh <animesh@apache.spark> Closes #6609 from animeshbaranawal/SPARK-7980 and squashes the following commits: 935899c [animesh] SPARK-7980:python+scala changes
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
@@ -131,7 +131,7 @@ def udf(self):
         return UDFRegistration(self)
 
     @since(1.4)
-    def range(self, start, end, step=1, numPartitions=None):
+    def range(self, start, end=None, step=1, numPartitions=None):
         """
         Create a :class:`DataFrame` with single LongType column named `id`,
         containing elements in a range from `start` to `end` (exclusive) with
@@ -145,10 +145,18 @@ def range(self, start, end, step=1, numPartitions=None):
 
         >>> sqlContext.range(1, 7, 2).collect()
         [Row(id=1), Row(id=3), Row(id=5)]
+
+        >>> sqlContext.range(3).collect()
+        [Row(id=0), Row(id=1), Row(id=2)]
         """
         if numPartitions is None:
             numPartitions = self._sc.defaultParallelism
-        jdf = self._ssql_ctx.range(int(start), int(end), int(step), int(numPartitions))
+
+        if end is None:
+            jdf = self._ssql_ctx.range(0, int(start), int(step), int(numPartitions))
+        else:
+            jdf = self._ssql_ctx.range(int(start), int(end), int(step), int(numPartitions))
+
         return DataFrame(jdf, self)
 
     @ignore_unicode_prefix
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -131,6 +131,8 @@ def test_range(self):
         self.assertEqual(self.sqlCtx.range(1, 1).count(), 0)
         self.assertEqual(self.sqlCtx.range(1, 0, -1).count(), 1)
         self.assertEqual(self.sqlCtx.range(0, 1 << 40, 1 << 39).count(), 2)
+        self.assertEqual(self.sqlCtx.range(-2).count(), 0)
+        self.assertEqual(self.sqlCtx.range(3).count(), 3)
 
     def test_explode(self):
         from pyspark.sql.functions import explode
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -717,6 +717,17 @@ class SQLContext(@transient val sparkContext: SparkContext)
       StructType(StructField("id", LongType, nullable = false) :: Nil))
   }
 
+  /**
+   * :: Experimental ::
+   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * in an range from 0 to `end`(exclusive) with step value 1.
+   *
+   * @since 1.4.0
+   * @group dataframe
+   */
+  @Experimental
+  def range(end: Long): DataFrame = range(0, end)
+
   /**
    * :: Experimental ::
    * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -576,5 +576,13 @@ class DataFrameSuite extends QueryTest {
     val res9 = TestSQLContext.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id")
     assert(res9.count == 2)
     assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1)))
+
+    // only end provided as argument
+    val res10 = TestSQLContext.range(10).select("id")
+    assert(res10.count == 10)
+    assert(res10.agg(sum("id")).as("sumid").collect() === Seq(Row(45)))
+
+    val res11 = TestSQLContext.range(-1).select("id")
+    assert(res11.count == 0)
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -576,5 +576,13 @@ class DataFrameSuite extends QueryTest {`
`576`	`576`	`val res9 = TestSQLContext.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id")`
`577`	`577`	`assert(res9.count == 2)`
`578`	`578`	`assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1)))`
	`579`	`+`
	`580`	`+ // only end provided as argument`
	`581`	`+ val res10 = TestSQLContext.range(10).select("id")`
	`582`	`+ assert(res10.count == 10)`
	`583`	`+ assert(res10.agg(sum("id")).as("sumid").collect() === Seq(Row(45)))`
	`584`	`+`
	`585`	`+ val res11 = TestSQLContext.range(-1).select("id")`
	`586`	`+ assert(res11.count == 0)`
`579`	`587`	`}`
`580`	`588`	`}`