From e7e545fd8f7455a653c2bcee4e42a0e5249791f9 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Sat, 30 Jul 2016 15:02:48 -0700
Subject: [PATCH 1/2] Sat Jul 30 15:02:48 PDT 2016

---
 .../datasources/FileSourceStrategy.scala      |  2 ++
 .../datasources/FileSourceStrategySuite.scala | 35 ++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 32aa4713ebdbb..d914b854ca502 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -130,7 +130,9 @@ private[sql] object FileSourceStrategy extends Strategy with Logging {
           createNonBucketedReadRDD(readFile, selectedPartitions, fsRelation)
       }
 
+      // These metadata values make scan plans uniquely identifiable.
       val meta = Map(
+        "PartitionFilters" -> partitionKeyFilters.mkString("[", ", ", "]"),
         "Format" -> fsRelation.fileFormat.toString,
         "ReadSchema" -> prunedDataSchema.simpleString,
         PUSHED_FILTERS -> pushedDownFilters.mkString("[", ", ", "]"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index ddcc24a7f56b2..6ee8e494a8639 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet, PredicateHelper}
 import org.apache.spark.sql.catalyst.util
-import org.apache.spark.sql.execution.DataSourceScanExec
+import org.apache.spark.sql.execution.{DataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
@@ -408,6 +408,39 @@ class FileSourceStrategySuite extends QueryTest with SharedSQLContext with Predi
     }
   }
 
+  test("[SPARK-16818] partition pruned file scans implement sameResult correctly") {
+    withTempPath { path =>
+      val tempDir = path.getCanonicalPath
+      spark.range(100)
+        .selectExpr("id", "id as b")
+        .write
+        .partitionBy("id")
+        .parquet(tempDir)
+      val df = spark.read.parquet(tempDir)
+      def getPlan(df: DataFrame): SparkPlan = {
+        df.queryExecution.executedPlan
+      }
+      assert(getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 2"))))
+      assert(!getPlan(df.where("id = 2")).sameResult(getPlan(df.where("id = 3"))))
+    }
+  }
+
+  test("[SPARK-16818] exchange reuse respects differences in partition pruning") {
+    spark.conf.set("spark.sql.exchange.reuse", true)
+    withTempPath { path =>
+      val tempDir = path.getCanonicalPath
+      spark.range(10)
+        .selectExpr("id % 2 as a", "id % 3 as b", "id as c")
+        .write
+        .partitionBy("a")
+        .parquet(tempDir)
+      val df = spark.read.parquet(tempDir)
+      val df1 = df.where("a = 0").groupBy("b").agg("c" -> "sum")
+      val df2 = df.where("a = 1").groupBy("b").agg("c" -> "sum")
+      checkAnswer(df1.join(df2, "b"), Row(0, 6, 12) :: Row(1, 4, 8) :: Row(2, 10, 5) :: Nil)
+    }
+  }
+
   // Helpers for checking the arguments passed to the FileFormat.
 
   protected val checkPartitionSchema =

From a2545408d144c5ea87ce5696341fe52cd2d29d2c Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Sat, 30 Jul 2016 15:06:12 -0700
Subject: [PATCH 2/2] Sat Jul 30 15:06:12 PDT 2016

---
 .../spark/sql/execution/datasources/FileSourceStrategy.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index d914b854ca502..67491302a9848 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -130,7 +130,7 @@ private[sql] object FileSourceStrategy extends Strategy with Logging {
           createNonBucketedReadRDD(readFile, selectedPartitions, fsRelation)
       }
 
-      // These metadata values make scan plans uniquely identifiable.
+      // These metadata values make scan plans uniquely identifiable for equality checking.
       val meta = Map(
         "PartitionFilters" -> partitionKeyFilters.mkString("[", ", ", "]"),
         "Format" -> fsRelation.fileFormat.toString,