Provide preferred location for each bucket-id in case of partitioned sample table. (#22)

Vivek Bhaskar · web-flow · commit 2f66ad148c8a · 2016-11-07T17:18:15.000+05:30
These changes are related to AQP-79. Provide preferred location for each bucket-id in case of partitioned sample table.
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -42,3 +42,12 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
     prev = null
   }
 }
+
+private[spark] final class PreserveLocationsRDD[U: ClassTag, T: ClassTag](
+    prev: RDD[T],
+    f: (TaskContext, Int, Iterator[T]) => Iterator[U],  // (TaskContext, partition index, iterator)
+    preservesPartitioning: Boolean = false, p: (Int) => Seq[String])
+    extends MapPartitionsRDD[U, T](prev, f, preservesPartitioning) {
+
+  override def getPreferredLocations(split: Partition): Seq[String] = p(split.index)
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -821,6 +821,17 @@ abstract class RDD[T: ClassTag](
       preservesPartitioning)
   }
 
+  def mapPartitionsWithIndexPreserveLocations[U: ClassTag](
+      f: (Int, Iterator[T]) => Iterator[U],
+      p: (Int) => Seq[String],
+      preservesPartitioning: Boolean = false): RDD[U] = withScope {
+    val cleanedF = sc.clean(f)
+    new PreserveLocationsRDD(
+      this,
+      (context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter),
+      preservesPartitioning, p)
+  }
+
   /**
    * Zips this RDD with another one, returning key-value pairs with the first element in each RDD,
    * second element in each RDD, etc. Assumes that the two RDDs have the *same number of

Original file line number	Diff line number	Diff line change
`@@ -42,3 +42,12 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](`
`42`	`42`	`prev = null`
`43`	`43`	`}`
`44`	`44`	`}`
	`45`	`+`
	`46`	`+private[spark] final class PreserveLocationsRDD[U: ClassTag, T: ClassTag](`
	`47`	`+ prev: RDD[T],`
	`48`	`+ f: (TaskContext, Int, Iterator[T]) => Iterator[U], // (TaskContext, partition index, iterator)`
	`49`	`+ preservesPartitioning: Boolean = false, p: (Int) => Seq[String])`
	`50`	`+ extends MapPartitionsRDD[U, T](prev, f, preservesPartitioning) {`
	`51`	`+`
	`52`	`+ override def getPreferredLocations(split: Partition): Seq[String] = p(split.index)`
	`53`	`+}`