apache · wetneb · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020 · Apr 22, 2020
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
 
 import org.apache.spark.{HashPartitioner, Partitioner}
 import org.apache.spark.Partitioner._
+import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
 import org.apache.spark.api.java.function.{FlatMapFunction, Function => JFunction,
@@ -937,6 +938,34 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending, numPartitions))
   }
 
+  /**
+   * Return a RDD containing only the elements in the inclusive range `lower` to `upper`.
+   * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be
+   * performed efficiently by only scanning the partitions that might containt matching elements.
+   * Otherwise, a standard `filter` is applied to all partitions.
+   *
+   * @since 3.1.0
+   */
+  @Since("3.1.0")
+  def filterByRange(lower: K, upper: K): JavaPairRDD[K, V] = {
+    val comp = com.google.common.collect.Ordering.natural().asInstanceOf[Comparator[K]]
+    filterByRange(comp, lower, upper)
+  }
+
+  /**
+   * Return a RDD containing only the elements in the inclusive range `lower` to `upper`.
+   * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be
+   * performed efficiently by only scanning the partitions that might containt matching elements.
+   * Otherwise, a standard `filter` is applied to all partitions.
+   *
+   * @since 3.1.0
+   */
+  @Since("3.1.0")
+  def filterByRange(comp: Comparator[K], lower: K, upper: K): JavaPairRDD[K, V] = {
+    implicit val ordering = comp // Allow implicit conversion of Comparator to Ordering.
+    fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).filterByRange(lower, upper))
+  }
+
   /**
    * Return an RDD with the keys of each tuple.
    */

diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -247,6 +247,34 @@ public int getPartition(Object key) {
         Arrays.asList(new Tuple2<>(1, 3), new Tuple2<>(3, 8), new Tuple2<>(3, 8)));
   }
 
+  @Test
+  public void filterByRange() {
+    List<Tuple2<Integer, Integer>> pairs = new ArrayList<>();
+    pairs.add(new Tuple2<>(0, 5));
+    pairs.add(new Tuple2<>(1, 8));
+    pairs.add(new Tuple2<>(2, 6));
+    pairs.add(new Tuple2<>(3, 8));
+    pairs.add(new Tuple2<>(4, 8));
+
+    JavaPairRDD<Integer, Integer> rdd = sc.parallelizePairs(pairs).sortByKey();
+
+    // Default comparator
+    JavaPairRDD<Integer, Integer> filteredRDD = rdd.filterByRange(3, 11);
+    List<Tuple2<Integer, Integer>> filteredPairs = filteredRDD.collect();
+    assertEquals(filteredPairs.size(), 2);
+    assertEquals(filteredPairs.get(0), new Tuple2<>(3, 8));
+    assertEquals(filteredPairs.get(1), new Tuple2<>(4, 8));
+
+    // Custom comparator
+    filteredRDD = rdd.filterByRange(Collections.reverseOrder(), 3, -2);
+    filteredPairs = filteredRDD.collect();
+    assertEquals(filteredPairs.size(), 4);
+    assertEquals(filteredPairs.get(0), new Tuple2<>(0, 5));
+    assertEquals(filteredPairs.get(1), new Tuple2<>(1, 8));
+    assertEquals(filteredPairs.get(2), new Tuple2<>(2, 6));
+    assertEquals(filteredPairs.get(3), new Tuple2<>(3, 8));
+  }
+
   @Test
   public void emptyRDD() {
     JavaRDD<String> rdd = sc.emptyRDD();