From 49d767d838691fc7d964be2c4349662f5500ff2b Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Fri, 30 Jun 2017 20:02:15 +0800
Subject: [PATCH 001/125] [SPARK-18710][ML] Add offset in GLM

## What changes were proposed in this pull request?
Add support for offset in GLM. This is useful for at least two reasons:

1. Account for exposure: e.g., when modeling the number of accidents, we may need to use miles driven as an offset to access factors on frequency.
2. Test incremental effects of new variables: we can use predictions from the existing model as offset and run a much smaller model on only new variables. This avoids re-estimating the large model with all variables (old + new) and can be very important for efficient large-scaled analysis.

## How was this patch tested?
New test.

yanboliang srowen felixcheung sethah

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #16699 from actuaryzhang/offset.
---
 .../apache/spark/ml/feature/Instance.scala    |  21 +
 .../IterativelyReweightedLeastSquares.scala   |  14 +-
 .../spark/ml/optim/WeightedLeastSquares.scala |   2 +-
 .../GeneralizedLinearRegression.scala         | 184 +++--
 ...erativelyReweightedLeastSquaresSuite.scala |  40 +-
 .../GeneralizedLinearRegressionSuite.scala    | 634 ++++++++++--------
 6 files changed, 534 insertions(+), 361 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index cce3ca45ccd8..dd56fbbfa2b6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -27,3 +27,24 @@ import org.apache.spark.ml.linalg.Vector
  * @param features The vector of features for this data point.
  */
 private[ml] case class Instance(label: Double, weight: Double, features: Vector)
+
+/**
+ * Case class that represents an instance of data point with
+ * label, weight, offset and features.
+ * This is mainly used in GeneralizedLinearRegression currently.
+ *
+ * @param label Label for this data point.
+ * @param weight The weight of this instance.
+ * @param offset The offset used for this data point.
+ * @param features The vector of features for this data point.
+ */
+private[ml] case class OffsetInstance(
+    label: Double,
+    weight: Double,
+    offset: Double,
+    features: Vector) {
+
+  /** Converts to an [[Instance]] object by leaving out the offset. */
+  def toInstance: Instance = Instance(label, weight, features)
+
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
index 9c495512422b..6961b45f55e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.optim
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
@@ -43,7 +43,7 @@ private[ml] class IterativelyReweightedLeastSquaresModel(
  * find M-estimator in robust regression and other optimization problems.
  *
  * @param initialModel the initial guess model.
- * @param reweightFunc the reweight function which is used to update offsets and weights
+ * @param reweightFunc the reweight function which is used to update working labels and weights
  *                     at each iteration.
  * @param fitIntercept whether to fit intercept.
  * @param regParam L2 regularization parameter used by WLS.
@@ -57,13 +57,13 @@ private[ml] class IterativelyReweightedLeastSquaresModel(
  */
 private[ml] class IterativelyReweightedLeastSquares(
     val initialModel: WeightedLeastSquaresModel,
-    val reweightFunc: (Instance, WeightedLeastSquaresModel) => (Double, Double),
+    val reweightFunc: (OffsetInstance, WeightedLeastSquaresModel) => (Double, Double),
     val fitIntercept: Boolean,
     val regParam: Double,
     val maxIter: Int,
     val tol: Double) extends Logging with Serializable {
 
-  def fit(instances: RDD[Instance]): IterativelyReweightedLeastSquaresModel = {
+  def fit(instances: RDD[OffsetInstance]): IterativelyReweightedLeastSquaresModel = {
 
     var converged = false
     var iter = 0
@@ -75,10 +75,10 @@ private[ml] class IterativelyReweightedLeastSquares(
 
       oldModel = model
 
-      // Update offsets and weights using reweightFunc
+      // Update working labels and weights using reweightFunc
       val newInstances = instances.map { instance =>
-        val (newOffset, newWeight) = reweightFunc(instance, oldModel)
-        Instance(newOffset, newWeight, instance.features)
+        val (newLabel, newWeight) = reweightFunc(instance, oldModel)
+        Instance(newLabel, newWeight, instance.features)
       }
 
       // Estimate new model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 56ab9675700a..32b0af72ba9b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.optim
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index bff0d9bbb46f..ce3460ae4356 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -26,8 +26,8 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.PredictorParams
-import org.apache.spark.ml.feature.Instance
-import org.apache.spark.ml.linalg.{BLAS, Vector}
+import org.apache.spark.ml.feature.{Instance, OffsetInstance}
+import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.ml.optim._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -138,6 +138,27 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
   @Since("2.0.0")
   def getLinkPredictionCol: String = $(linkPredictionCol)
 
+  /**
+   * Param for offset column name. If this is not set or empty, we treat all instance offsets
+   * as 0.0. The feature specified as offset has a constant coefficient of 1.0.
+   * @group param
+   */
+  @Since("2.3.0")
+  final val offsetCol: Param[String] = new Param[String](this, "offsetCol", "The offset " +
+    "column name. If this is not set or empty, we treat all instance offsets as 0.0")
+
+  /** @group getParam */
+  @Since("2.3.0")
+  def getOffsetCol: String = $(offsetCol)
+
+  /** Checks whether weight column is set and nonempty. */
+  private[regression] def hasWeightCol: Boolean =
+    isSet(weightCol) && $(weightCol).nonEmpty
+
+  /** Checks whether offset column is set and nonempty. */
+  private[regression] def hasOffsetCol: Boolean =
+    isSet(offsetCol) && $(offsetCol).nonEmpty
+
   /** Checks whether we should output link prediction. */
   private[regression] def hasLinkPredictionCol: Boolean = {
     isDefined(linkPredictionCol) && $(linkPredictionCol).nonEmpty
@@ -172,6 +193,11 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
     }
 
     val newSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
+
+    if (hasOffsetCol) {
+      SchemaUtils.checkNumericType(schema, $(offsetCol))
+    }
+
     if (hasLinkPredictionCol) {
       SchemaUtils.appendColumn(newSchema, $(linkPredictionCol), DoubleType)
     } else {
@@ -306,6 +332,16 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
   @Since("2.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
+  /**
+   * Sets the value of param [[offsetCol]].
+   * If this is not set or empty, we treat all instance offsets as 0.0.
+   * Default is not set, so all instances have offset 0.0.
+   *
+   * @group setParam
+   */
+  @Since("2.3.0")
+  def setOffsetCol(value: String): this.type = set(offsetCol, value)
+
   /**
    * Sets the solver algorithm used for optimization.
    * Currently only supports "irls" which is also the default solver.
@@ -329,7 +365,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
 
     val numFeatures = dataset.select(col($(featuresCol))).first().getAs[Vector](0).size
     val instr = Instrumentation.create(this, dataset)
-    instr.logParams(labelCol, featuresCol, weightCol, predictionCol, linkPredictionCol,
+    instr.logParams(labelCol, featuresCol, weightCol, offsetCol, predictionCol, linkPredictionCol,
       family, solver, fitIntercept, link, maxIter, regParam, tol)
     instr.logNumFeatures(numFeatures)
 
@@ -343,15 +379,16 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
       "GeneralizedLinearRegression was given data with 0 features, and with Param fitIntercept " +
         "set to false. To fit a model with 0 features, fitIntercept must be set to true." )
 
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val w = if (!hasWeightCol) lit(1.0) else col($(weightCol))
+    val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType)
 
     val model = if (familyAndLink.family == Gaussian && familyAndLink.link == Identity) {
       // TODO: Make standardizeFeatures and standardizeLabel configurable.
+      val instances: RDD[Instance] =
+        dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map {
+          case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
+            Instance(label - offset, weight, features)
+        }
       val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), elasticNetParam = 0.0,
         standardizeFeatures = true, standardizeLabel = true)
       val wlsModel = optimizer.fit(instances)
@@ -362,6 +399,11 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
         wlsModel.diagInvAtWA.toArray, 1, getSolver)
       model.setSummary(Some(trainingSummary))
     } else {
+      val instances: RDD[OffsetInstance] =
+        dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map {
+          case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
+            OffsetInstance(label, weight, offset, features)
+        }
       // Fit Generalized Linear Model by iteratively reweighted least squares (IRLS).
       val initialModel = familyAndLink.initialize(instances, $(fitIntercept), $(regParam))
       val optimizer = new IterativelyReweightedLeastSquares(initialModel,
@@ -425,12 +467,12 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
      * Get the initial guess model for [[IterativelyReweightedLeastSquares]].
      */
     def initialize(
-        instances: RDD[Instance],
+        instances: RDD[OffsetInstance],
         fitIntercept: Boolean,
         regParam: Double): WeightedLeastSquaresModel = {
       val newInstances = instances.map { instance =>
         val mu = family.initialize(instance.label, instance.weight)
-        val eta = predict(mu)
+        val eta = predict(mu) - instance.offset
         Instance(eta, instance.weight, instance.features)
       }
       // TODO: Make standardizeFeatures and standardizeLabel configurable.
@@ -441,16 +483,16 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
     }
 
     /**
-     * The reweight function used to update offsets and weights
+     * The reweight function used to update working labels and weights
      * at each iteration of [[IterativelyReweightedLeastSquares]].
      */
-    val reweightFunc: (Instance, WeightedLeastSquaresModel) => (Double, Double) = {
-      (instance: Instance, model: WeightedLeastSquaresModel) => {
-        val eta = model.predict(instance.features)
+    val reweightFunc: (OffsetInstance, WeightedLeastSquaresModel) => (Double, Double) = {
+      (instance: OffsetInstance, model: WeightedLeastSquaresModel) => {
+        val eta = model.predict(instance.features) + instance.offset
         val mu = fitted(eta)
-        val offset = eta + (instance.label - mu) * link.deriv(mu)
-        val weight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * family.variance(mu))
-        (offset, weight)
+        val newLabel = eta - instance.offset + (instance.label - mu) * link.deriv(mu)
+        val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * family.variance(mu))
+        (newLabel, newWeight)
       }
     }
   }
@@ -950,15 +992,22 @@ class GeneralizedLinearRegressionModel private[ml] (
   private lazy val familyAndLink = FamilyAndLink(this)
 
   override protected def predict(features: Vector): Double = {
-    val eta = predictLink(features)
+    predict(features, 0.0)
+  }
+
+  /**
+   * Calculates the predicted value when offset is set.
+   */
+  private def predict(features: Vector, offset: Double): Double = {
+    val eta = predictLink(features, offset)
     familyAndLink.fitted(eta)
   }
 
   /**
-   * Calculate the link prediction (linear predictor) of the given instance.
+   * Calculates the link prediction (linear predictor) of the given instance.
    */
-  private def predictLink(features: Vector): Double = {
-    BLAS.dot(features, coefficients) + intercept
+  private def predictLink(features: Vector, offset: Double): Double = {
+    BLAS.dot(features, coefficients) + intercept + offset
   }
 
   override def transform(dataset: Dataset[_]): DataFrame = {
@@ -967,14 +1016,16 @@ class GeneralizedLinearRegressionModel private[ml] (
   }
 
   override protected def transformImpl(dataset: Dataset[_]): DataFrame = {
-    val predictUDF = udf { (features: Vector) => predict(features) }
-    val predictLinkUDF = udf { (features: Vector) => predictLink(features) }
+    val predictUDF = udf { (features: Vector, offset: Double) => predict(features, offset) }
+    val predictLinkUDF = udf { (features: Vector, offset: Double) => predictLink(features, offset) }
+
+    val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType)
     var output = dataset
     if ($(predictionCol).nonEmpty) {
-      output = output.withColumn($(predictionCol), predictUDF(col($(featuresCol))))
+      output = output.withColumn($(predictionCol), predictUDF(col($(featuresCol)), offset))
     }
     if (hasLinkPredictionCol) {
-      output = output.withColumn($(linkPredictionCol), predictLinkUDF(col($(featuresCol))))
+      output = output.withColumn($(linkPredictionCol), predictLinkUDF(col($(featuresCol)), offset))
     }
     output.toDF()
   }
@@ -1146,9 +1197,7 @@ class GeneralizedLinearRegressionSummary private[regression] (
 
   /** Degrees of freedom. */
   @Since("2.0.0")
-  lazy val degreesOfFreedom: Long = {
-    numInstances - rank
-  }
+  lazy val degreesOfFreedom: Long = numInstances - rank
 
   /** The residual degrees of freedom. */
   @Since("2.0.0")
@@ -1156,18 +1205,20 @@ class GeneralizedLinearRegressionSummary private[regression] (
 
   /** The residual degrees of freedom for the null model. */
   @Since("2.0.0")
-  lazy val residualDegreeOfFreedomNull: Long = if (model.getFitIntercept) {
-    numInstances - 1
-  } else {
-    numInstances
+  lazy val residualDegreeOfFreedomNull: Long = {
+    if (model.getFitIntercept) numInstances - 1 else numInstances
   }
 
-  private def weightCol: Column = {
-    if (!model.isDefined(model.weightCol) || model.getWeightCol.isEmpty) {
-      lit(1.0)
-    } else {
-      col(model.getWeightCol)
-    }
+  private def label: Column = col(model.getLabelCol).cast(DoubleType)
+
+  private def prediction: Column = col(predictionCol)
+
+  private def weight: Column = {
+    if (!model.hasWeightCol) lit(1.0) else col(model.getWeightCol)
+  }
+
+  private def offset: Column = {
+    if (!model.hasOffsetCol) lit(0.0) else col(model.getOffsetCol).cast(DoubleType)
   }
 
   private[regression] lazy val devianceResiduals: DataFrame = {
@@ -1175,25 +1226,23 @@ class GeneralizedLinearRegressionSummary private[regression] (
       val r = math.sqrt(math.max(family.deviance(y, mu, weight), 0.0))
       if (y > mu) r else -1.0 * r
     }
-    val w = weightCol
     predictions.select(
-      drUDF(col(model.getLabelCol), col(predictionCol), w).as("devianceResiduals"))
+      drUDF(label, prediction, weight).as("devianceResiduals"))
   }
 
   private[regression] lazy val pearsonResiduals: DataFrame = {
     val prUDF = udf { mu: Double => family.variance(mu) }
-    val w = weightCol
-    predictions.select(col(model.getLabelCol).minus(col(predictionCol))
-      .multiply(sqrt(w)).divide(sqrt(prUDF(col(predictionCol)))).as("pearsonResiduals"))
+    predictions.select(label.minus(prediction)
+      .multiply(sqrt(weight)).divide(sqrt(prUDF(prediction))).as("pearsonResiduals"))
   }
 
   private[regression] lazy val workingResiduals: DataFrame = {
     val wrUDF = udf { (y: Double, mu: Double) => (y - mu) * link.deriv(mu) }
-    predictions.select(wrUDF(col(model.getLabelCol), col(predictionCol)).as("workingResiduals"))
+    predictions.select(wrUDF(label, prediction).as("workingResiduals"))
   }
 
   private[regression] lazy val responseResiduals: DataFrame = {
-    predictions.select(col(model.getLabelCol).minus(col(predictionCol)).as("responseResiduals"))
+    predictions.select(label.minus(prediction).as("responseResiduals"))
   }
 
   /**
@@ -1225,16 +1274,35 @@ class GeneralizedLinearRegressionSummary private[regression] (
    */
   @Since("2.0.0")
   lazy val nullDeviance: Double = {
-    val w = weightCol
-    val wtdmu: Double = if (model.getFitIntercept) {
-      val agg = predictions.agg(sum(w.multiply(col(model.getLabelCol))), sum(w)).first()
-      agg.getDouble(0) / agg.getDouble(1)
+    val intercept: Double = if (!model.getFitIntercept) {
+      0.0
     } else {
-      link.unlink(0.0)
+      /*
+        Estimate intercept analytically when there is no offset, or when there is offset but
+        the model is Gaussian family with identity link. Otherwise, fit an intercept only model.
+       */
+      if (!model.hasOffsetCol ||
+        (model.hasOffsetCol && family == Gaussian && link == Identity)) {
+        val agg = predictions.agg(sum(weight.multiply(
+          label.minus(offset))), sum(weight)).first()
+        link.link(agg.getDouble(0) / agg.getDouble(1))
+      } else {
+        // Create empty feature column and fit intercept only model using param setting from model
+        val featureNull = "feature_" + java.util.UUID.randomUUID.toString
+        val paramMap = model.extractParamMap()
+        paramMap.put(model.featuresCol, featureNull)
+        if (family.name != "tweedie") {
+          paramMap.remove(model.variancePower)
+        }
+        val emptyVectorUDF = udf{ () => Vectors.zeros(0) }
+        model.parent.fit(
+          dataset.withColumn(featureNull, emptyVectorUDF()), paramMap
+        ).intercept
+      }
     }
-    predictions.select(col(model.getLabelCol).cast(DoubleType), w).rdd.map {
-      case Row(y: Double, weight: Double) =>
-        family.deviance(y, wtdmu, weight)
+    predictions.select(label, offset, weight).rdd.map {
+      case Row(y: Double, offset: Double, weight: Double) =>
+        family.deviance(y, link.unlink(intercept + offset), weight)
     }.sum()
   }
 
@@ -1243,8 +1311,7 @@ class GeneralizedLinearRegressionSummary private[regression] (
    */
   @Since("2.0.0")
   lazy val deviance: Double = {
-    val w = weightCol
-    predictions.select(col(model.getLabelCol).cast(DoubleType), col(predictionCol), w).rdd.map {
+    predictions.select(label, prediction, weight).rdd.map {
       case Row(label: Double, pred: Double, weight: Double) =>
         family.deviance(label, pred, weight)
     }.sum()
@@ -1269,10 +1336,9 @@ class GeneralizedLinearRegressionSummary private[regression] (
   /** Akaike Information Criterion (AIC) for the fitted model. */
   @Since("2.0.0")
   lazy val aic: Double = {
-    val w = weightCol
-    val weightSum = predictions.select(w).agg(sum(w)).first().getDouble(0)
+    val weightSum = predictions.select(weight).agg(sum(weight)).first().getDouble(0)
     val t = predictions.select(
-      col(model.getLabelCol).cast(DoubleType), col(predictionCol), w).rdd.map {
+      label, prediction, weight).rdd.map {
         case Row(label: Double, pred: Double, weight: Double) =>
           (label, pred, weight)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
index 50260952ecb6..6d143504fcf5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquaresSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.optim
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -26,8 +26,8 @@ import org.apache.spark.rdd.RDD
 
 class IterativelyReweightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  private var instances1: RDD[Instance] = _
-  private var instances2: RDD[Instance] = _
+  private var instances1: RDD[OffsetInstance] = _
+  private var instances2: RDD[OffsetInstance] = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -39,10 +39,10 @@ class IterativelyReweightedLeastSquaresSuite extends SparkFunSuite with MLlibTes
        w <- c(1, 2, 3, 4)
      */
     instances1 = sc.parallelize(Seq(
-      Instance(1.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(0.0, 2.0, Vectors.dense(1.0, 2.0)),
-      Instance(1.0, 3.0, Vectors.dense(2.0, 1.0)),
-      Instance(0.0, 4.0, Vectors.dense(3.0, 3.0))
+      OffsetInstance(1.0, 1.0, 0.0, Vectors.dense(0.0, 5.0).toSparse),
+      OffsetInstance(0.0, 2.0, 0.0, Vectors.dense(1.0, 2.0)),
+      OffsetInstance(1.0, 3.0, 0.0, Vectors.dense(2.0, 1.0)),
+      OffsetInstance(0.0, 4.0, 0.0, Vectors.dense(3.0, 3.0))
     ), 2)
     /*
        R code:
@@ -52,10 +52,10 @@ class IterativelyReweightedLeastSquaresSuite extends SparkFunSuite with MLlibTes
        w <- c(1, 2, 3, 4)
      */
     instances2 = sc.parallelize(Seq(
-      Instance(2.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(8.0, 2.0, Vectors.dense(1.0, 7.0)),
-      Instance(3.0, 3.0, Vectors.dense(2.0, 11.0)),
-      Instance(9.0, 4.0, Vectors.dense(3.0, 13.0))
+      OffsetInstance(2.0, 1.0, 0.0, Vectors.dense(0.0, 5.0).toSparse),
+      OffsetInstance(8.0, 2.0, 0.0, Vectors.dense(1.0, 7.0)),
+      OffsetInstance(3.0, 3.0, 0.0, Vectors.dense(2.0, 11.0)),
+      OffsetInstance(9.0, 4.0, 0.0, Vectors.dense(3.0, 13.0))
     ), 2)
   }
 
@@ -156,7 +156,7 @@ class IterativelyReweightedLeastSquaresSuite extends SparkFunSuite with MLlibTes
     var idx = 0
     for (fitIntercept <- Seq(false, true)) {
       val initial = new WeightedLeastSquares(fitIntercept, regParam = 0.0, elasticNetParam = 0.0,
-        standardizeFeatures = false, standardizeLabel = false).fit(instances2)
+        standardizeFeatures = false, standardizeLabel = false).fit(instances2.map(_.toInstance))
       val irls = new IterativelyReweightedLeastSquares(initial, L1RegressionReweightFunc,
         fitIntercept, regParam = 0.0, maxIter = 200, tol = 1e-7).fit(instances2)
       val actual = Vectors.dense(irls.intercept, irls.coefficients(0), irls.coefficients(1))
@@ -169,29 +169,29 @@ class IterativelyReweightedLeastSquaresSuite extends SparkFunSuite with MLlibTes
 object IterativelyReweightedLeastSquaresSuite {
 
   def BinomialReweightFunc(
-      instance: Instance,
+      instance: OffsetInstance,
       model: WeightedLeastSquaresModel): (Double, Double) = {
-    val eta = model.predict(instance.features)
+    val eta = model.predict(instance.features) + instance.offset
     val mu = 1.0 / (1.0 + math.exp(-1.0 * eta))
-    val z = eta + (instance.label - mu) / (mu * (1.0 - mu))
+    val z = eta - instance.offset + (instance.label - mu) / (mu * (1.0 - mu))
     val w = mu * (1 - mu) * instance.weight
     (z, w)
   }
 
   def PoissonReweightFunc(
-      instance: Instance,
+      instance: OffsetInstance,
       model: WeightedLeastSquaresModel): (Double, Double) = {
-    val eta = model.predict(instance.features)
+    val eta = model.predict(instance.features) + instance.offset
     val mu = math.exp(eta)
-    val z = eta + (instance.label - mu) / mu
+    val z = eta - instance.offset + (instance.label - mu) / mu
     val w = mu * instance.weight
     (z, w)
   }
 
   def L1RegressionReweightFunc(
-      instance: Instance,
+      instance: OffsetInstance,
       model: WeightedLeastSquaresModel): (Double, Double) = {
-    val eta = model.predict(instance.features)
+    val eta = model.predict(instance.features) + instance.offset
     val e = math.max(math.abs(eta - instance.label), 1e-7)
     val w = 1 / e
     val y = instance.label
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index f7c7c001a36a..cfaa57314bd6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -21,7 +21,7 @@ import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
-import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, Vector, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
@@ -797,77 +797,160 @@ class GeneralizedLinearRegressionSuite
     }
   }
 
-  test("glm summary: gaussian family with weight") {
+  test("generalized linear regression with weight and offset") {
     /*
-       R code:
+      R code:
+      library(statmod)
 
-       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
-       b <- c(17, 19, 23, 29)
-       w <- c(1, 2, 3, 4)
-       df <- as.data.frame(cbind(A, b))
-     */
-    val datasetWithWeight = Seq(
-      Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)),
-      Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)),
-      Instance(29.0, 4.0, Vectors.dense(3.0, 13.0))
+      df <- as.data.frame(matrix(c(
+        0.2, 1.0, 2.0, 0.0, 5.0,
+        0.5, 2.1, 0.5, 1.0, 2.0,
+        0.9, 0.4, 1.0, 2.0, 1.0,
+        0.7, 0.7, 0.0, 3.0, 3.0), 4, 5, byrow = TRUE))
+      families <- list(gaussian, binomial, poisson, Gamma, tweedie(1.5))
+      f1 <- V1 ~ -1 + V4 + V5
+      f2 <- V1 ~ V4 + V5
+      for (f in c(f1, f2)) {
+        for (fam in families) {
+          model <- glm(f, df, family = fam, weights = V2, offset = V3)
+          print(as.vector(coef(model)))
+        }
+      }
+      [1]  0.5169222 -0.3344444
+      [1]  0.9419107 -0.6864404
+      [1]  0.1812436 -0.6568422
+      [1] -0.2869094  0.7857710
+      [1] 0.1055254 0.2979113
+      [1] -0.05990345  0.53188982 -0.32118415
+      [1] -0.2147117  0.9911750 -0.6356096
+      [1] -1.5616130  0.6646470 -0.3192581
+      [1]  0.3390397 -0.3406099  0.6870259
+      [1] 0.3665034 0.1039416 0.1484616
+    */
+    val dataset = Seq(
+      OffsetInstance(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
+      OffsetInstance(0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
+      OffsetInstance(0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
+      OffsetInstance(0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))
     ).toDF()
+
+    val expected = Seq(
+      Vectors.dense(0, 0.5169222, -0.3344444),
+      Vectors.dense(0, 0.9419107, -0.6864404),
+      Vectors.dense(0, 0.1812436, -0.6568422),
+      Vectors.dense(0, -0.2869094, 0.785771),
+      Vectors.dense(0, 0.1055254, 0.2979113),
+      Vectors.dense(-0.05990345, 0.53188982, -0.32118415),
+      Vectors.dense(-0.2147117, 0.991175, -0.6356096),
+      Vectors.dense(-1.561613, 0.664647, -0.3192581),
+      Vectors.dense(0.3390397, -0.3406099, 0.6870259),
+      Vectors.dense(0.3665034, 0.1039416, 0.1484616))
+
+    import GeneralizedLinearRegression._
+
+    var idx = 0
+
+    for (fitIntercept <- Seq(false, true)) {
+      for (family <- Seq("gaussian", "binomial", "poisson", "gamma", "tweedie")) {
+        val trainer = new GeneralizedLinearRegression().setFamily(family)
+          .setFitIntercept(fitIntercept).setOffsetCol("offset")
+          .setWeightCol("weight").setLinkPredictionCol("linkPrediction")
+        if (family == "tweedie") trainer.setVariancePower(1.5)
+        val model = trainer.fit(dataset)
+        val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
+        assert(actual ~= expected(idx) absTol 1e-4, s"Model mismatch: GLM with family = $family," +
+          s" and fitIntercept = $fitIntercept.")
+
+        val familyLink = FamilyAndLink(trainer)
+        model.transform(dataset).select("features", "offset", "prediction", "linkPrediction")
+          .collect().foreach {
+          case Row(features: DenseVector, offset: Double, prediction1: Double,
+          linkPrediction1: Double) =>
+            val eta = BLAS.dot(features, model.coefficients) + model.intercept + offset
+            val prediction2 = familyLink.fitted(eta)
+            val linkPrediction2 = eta
+            assert(prediction1 ~= prediction2 relTol 1E-5, "Prediction mismatch: GLM with " +
+              s"family = $family, and fitIntercept = $fitIntercept.")
+            assert(linkPrediction1 ~= linkPrediction2 relTol 1E-5, "Link Prediction mismatch: " +
+              s"GLM with family = $family, and fitIntercept = $fitIntercept.")
+        }
+
+        idx += 1
+      }
+    }
+  }
+
+  test("glm summary: gaussian family with weight and offset") {
     /*
-       R code:
+      R code:
 
-       model <- glm(formula = "b ~ .", family="gaussian", data = df, weights = w)
-       summary(model)
+      A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
+      b <- c(17, 19, 23, 29)
+      w <- c(1, 2, 3, 4)
+      off <- c(2, 3, 1, 4)
+      df <- as.data.frame(cbind(A, b))
+     */
+    val dataset = Seq(
+      OffsetInstance(17.0, 1.0, 2.0, Vectors.dense(0.0, 5.0).toSparse),
+      OffsetInstance(19.0, 2.0, 3.0, Vectors.dense(1.0, 7.0)),
+      OffsetInstance(23.0, 3.0, 1.0, Vectors.dense(2.0, 11.0)),
+      OffsetInstance(29.0, 4.0, 4.0, Vectors.dense(3.0, 13.0))
+    ).toDF()
+    /*
+      R code:
 
-       Deviance Residuals:
-           1       2       3       4
-       1.920  -1.358  -1.109   0.960
+      model <- glm(formula = "b ~ .", family = "gaussian", data = df,
+                   weights = w, offset = off)
+      summary(model)
 
-       Coefficients:
-                   Estimate Std. Error t value Pr(>|t|)
-       (Intercept)   18.080      9.608   1.882    0.311
-       V1             6.080      5.556   1.094    0.471
-       V2            -0.600      1.960  -0.306    0.811
+      Deviance Residuals:
+            1        2        3        4
+       0.9600  -0.6788  -0.5543   0.4800
 
-       (Dispersion parameter for gaussian family taken to be 7.68)
+      Coefficients:
+                  Estimate Std. Error t value Pr(>|t|)
+      (Intercept)   5.5400     4.8040   1.153    0.455
+      V1           -0.9600     2.7782  -0.346    0.788
+      V2            1.7000     0.9798   1.735    0.333
 
-           Null deviance: 202.00  on 3  degrees of freedom
-       Residual deviance:   7.68  on 1  degrees of freedom
-       AIC: 18.783
+      (Dispersion parameter for gaussian family taken to be 1.92)
 
-       Number of Fisher Scoring iterations: 2
+          Null deviance: 152.10  on 3  degrees of freedom
+      Residual deviance:   1.92  on 1  degrees of freedom
+      AIC: 13.238
 
-       residuals(model, type="pearson")
-              1         2         3         4
-       1.920000 -1.357645 -1.108513  0.960000
+      Number of Fisher Scoring iterations: 2
 
-       residuals(model, type="working")
+      residuals(model, type = "pearson")
+               1          2          3          4
+      0.9600000 -0.6788225 -0.5542563  0.4800000
+      residuals(model, type = "working")
           1     2     3     4
-       1.92 -0.96 -0.64  0.48
-
-       residuals(model, type="response")
+      0.96 -0.48 -0.32  0.24
+      residuals(model, type = "response")
           1     2     3     4
-       1.92 -0.96 -0.64  0.48
+      0.96 -0.48 -0.32  0.24
      */
     val trainer = new GeneralizedLinearRegression()
-      .setWeightCol("weight")
+      .setWeightCol("weight").setOffsetCol("offset")
+
+    val model = trainer.fit(dataset)
 
-    val model = trainer.fit(datasetWithWeight)
-
-    val coefficientsR = Vectors.dense(Array(6.080, -0.600))
-    val interceptR = 18.080
-    val devianceResidualsR = Array(1.920, -1.358, -1.109, 0.960)
-    val pearsonResidualsR = Array(1.920000, -1.357645, -1.108513, 0.960000)
-    val workingResidualsR = Array(1.92, -0.96, -0.64, 0.48)
-    val responseResidualsR = Array(1.92, -0.96, -0.64, 0.48)
-    val seCoefR = Array(5.556, 1.960, 9.608)
-    val tValsR = Array(1.094, -0.306, 1.882)
-    val pValsR = Array(0.471, 0.811, 0.311)
-    val dispersionR = 7.68
-    val nullDevianceR = 202.00
-    val residualDevianceR = 7.68
+    val coefficientsR = Vectors.dense(Array(-0.96, 1.7))
+    val interceptR = 5.54
+    val devianceResidualsR = Array(0.96, -0.67882, -0.55426, 0.48)
+    val pearsonResidualsR = Array(0.96, -0.67882, -0.55426, 0.48)
+    val workingResidualsR = Array(0.96, -0.48, -0.32, 0.24)
+    val responseResidualsR = Array(0.96, -0.48, -0.32, 0.24)
+    val seCoefR = Array(2.7782, 0.9798, 4.804)
+    val tValsR = Array(-0.34555, 1.73506, 1.15321)
+    val pValsR = Array(0.78819, 0.33286, 0.45478)
+    val dispersionR = 1.92
+    val nullDevianceR = 152.1
+    val residualDevianceR = 1.92
     val residualDegreeOfFreedomNullR = 3
     val residualDegreeOfFreedomR = 1
-    val aicR = 18.783
+    val aicR = 13.23758
 
     assert(model.hasSummary)
     val summary = model.summary
@@ -912,7 +995,7 @@ class GeneralizedLinearRegressionSuite
     assert(summary.aic ~== aicR absTol 1E-3)
     assert(summary.solver === "irls")
 
-    val summary2: GeneralizedLinearRegressionSummary = model.evaluate(datasetWithWeight)
+    val summary2: GeneralizedLinearRegressionSummary = model.evaluate(dataset)
     assert(summary.predictions.columns.toSet === summary2.predictions.columns.toSet)
     assert(summary.predictionCol === summary2.predictionCol)
     assert(summary.rank === summary2.rank)
@@ -925,79 +1008,79 @@ class GeneralizedLinearRegressionSuite
     assert(summary.aic === summary2.aic)
   }
 
-  test("glm summary: binomial family with weight") {
+  test("glm summary: binomial family with weight and offset") {
     /*
-       R code:
+      R code:
 
-       A <- matrix(c(0, 1, 2, 3, 5, 2, 1, 3), 4, 2)
-       b <- c(1, 0.5, 1, 0)
-       w <- c(1, 2.0, 0.3, 4.7)
-       df <- as.data.frame(cbind(A, b))
+      df <- as.data.frame(matrix(c(
+          0.2, 1.0, 2.0, 0.0, 5.0,
+          0.5, 2.1, 0.5, 1.0, 2.0,
+          0.9, 0.4, 1.0, 2.0, 1.0,
+          0.7, 0.7, 0.0, 3.0, 3.0), 4, 5, byrow = TRUE))
      */
-    val datasetWithWeight = Seq(
-      Instance(1.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(0.5, 2.0, Vectors.dense(1.0, 2.0)),
-      Instance(1.0, 0.3, Vectors.dense(2.0, 1.0)),
-      Instance(0.0, 4.7, Vectors.dense(3.0, 3.0))
+    val dataset = Seq(
+      OffsetInstance(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
+      OffsetInstance(0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
+      OffsetInstance(0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
+      OffsetInstance(0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))
     ).toDF()
-
     /*
-       R code:
-
-       model <- glm(formula = "b ~ . -1", family="binomial", data = df, weights = w)
-       summary(model)
-
-       Deviance Residuals:
-             1        2        3        4
-        0.2404   0.1965   1.2824  -0.6916
+      R code:
 
-       Coefficients:
-          Estimate Std. Error z value Pr(>|z|)
-       x1  -1.6901     1.2764  -1.324    0.185
-       x2   0.7059     0.9449   0.747    0.455
+      model <- glm(formula = "V1 ~ V4 + V5", family = "binomial", data = df,
+                   weights = V2, offset = V3)
+      summary(model)
 
-       (Dispersion parameter for binomial family taken to be 1)
+      Deviance Residuals:
+              1          2          3          4
+       0.002584  -0.003800   0.012478  -0.001796
 
-           Null deviance: 8.3178  on 4  degrees of freedom
-       Residual deviance: 2.2193  on 2  degrees of freedom
-       AIC: 5.9915
+      Coefficients:
+                  Estimate Std. Error z value Pr(>|z|)
+      (Intercept)  -0.2147     3.5687  -0.060    0.952
+      V4            0.9912     1.2344   0.803    0.422
+      V5           -0.6356     0.9669  -0.657    0.511
 
-       Number of Fisher Scoring iterations: 5
+      (Dispersion parameter for binomial family taken to be 1)
 
-       residuals(model, type="pearson")
-              1         2         3         4
-       0.171217  0.197406  2.085864 -0.495332
+          Null deviance: 2.17560881  on 3  degrees of freedom
+      Residual deviance: 0.00018005  on 1  degrees of freedom
+      AIC: 10.245
 
-       residuals(model, type="working")
-              1         2         3         4
-       1.029315  0.281881 15.502768 -1.052203
+      Number of Fisher Scoring iterations: 4
 
-       residuals(model, type="response")
-              1          2          3          4
-       0.028480  0.069123  0.935495 -0.049613
+      residuals(model, type = "pearson")
+                 1            2            3            4
+      0.002586113 -0.003799744  0.012372235 -0.001796892
+      residuals(model, type = "working")
+                 1            2            3            4
+      0.006477857 -0.005244163  0.063541250 -0.004691064
+      residuals(model, type = "response")
+                  1             2             3             4
+      0.0010324375 -0.0013110318  0.0060225522 -0.0009832738
     */
     val trainer = new GeneralizedLinearRegression()
       .setFamily("Binomial")
       .setWeightCol("weight")
-      .setFitIntercept(false)
-
-    val model = trainer.fit(datasetWithWeight)
-
-    val coefficientsR = Vectors.dense(Array(-1.690134, 0.705929))
-    val interceptR = 0.0
-    val devianceResidualsR = Array(0.2404, 0.1965, 1.2824, -0.6916)
-    val pearsonResidualsR = Array(0.171217, 0.197406, 2.085864, -0.495332)
-    val workingResidualsR = Array(1.029315, 0.281881, 15.502768, -1.052203)
-    val responseResidualsR = Array(0.02848, 0.069123, 0.935495, -0.049613)
-    val seCoefR = Array(1.276417, 0.944934)
-    val tValsR = Array(-1.324124, 0.747068)
-    val pValsR = Array(0.185462, 0.455023)
-    val dispersionR = 1.0
-    val nullDevianceR = 8.3178
-    val residualDevianceR = 2.2193
-    val residualDegreeOfFreedomNullR = 4
-    val residualDegreeOfFreedomR = 2
-    val aicR = 5.991537
+      .setOffsetCol("offset")
+
+    val model = trainer.fit(dataset)
+
+    val coefficientsR = Vectors.dense(Array(0.99117, -0.63561))
+    val interceptR = -0.21471
+    val devianceResidualsR = Array(0.00258, -0.0038, 0.01248, -0.0018)
+    val pearsonResidualsR = Array(0.00259, -0.0038, 0.01237, -0.0018)
+    val workingResidualsR = Array(0.00648, -0.00524, 0.06354, -0.00469)
+    val responseResidualsR = Array(0.00103, -0.00131, 0.00602, -0.00098)
+    val seCoefR = Array(1.23439, 0.9669, 3.56866)
+    val tValsR = Array(0.80297, -0.65737, -0.06017)
+    val pValsR = Array(0.42199, 0.51094, 0.95202)
+    val dispersionR = 1
+    val nullDevianceR = 2.17561
+    val residualDevianceR = 0.00018
+    val residualDegreeOfFreedomNullR = 3
+    val residualDegreeOfFreedomR = 1
+    val aicR = 10.24453
 
     val summary = model.summary
     val devianceResiduals = summary.residuals()
@@ -1040,81 +1123,79 @@ class GeneralizedLinearRegressionSuite
     assert(summary.solver === "irls")
   }
 
-  test("glm summary: poisson family with weight") {
+  test("glm summary: poisson family with weight and offset") {
     /*
-       R code:
+      R code:
 
-       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
-       b <- c(2, 8, 3, 9)
-       w <- c(1, 2, 3, 4)
-       df <- as.data.frame(cbind(A, b))
+      A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
+      b <- c(2, 8, 3, 9)
+      w <- c(1, 2, 3, 4)
+      off <- c(2, 3, 1, 4)
+      df <- as.data.frame(cbind(A, b))
      */
-    val datasetWithWeight = Seq(
-      Instance(2.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(8.0, 2.0, Vectors.dense(1.0, 7.0)),
-      Instance(3.0, 3.0, Vectors.dense(2.0, 11.0)),
-      Instance(9.0, 4.0, Vectors.dense(3.0, 13.0))
+    val dataset = Seq(
+      OffsetInstance(2.0, 1.0, 2.0, Vectors.dense(0.0, 5.0).toSparse),
+      OffsetInstance(8.0, 2.0, 3.0, Vectors.dense(1.0, 7.0)),
+      OffsetInstance(3.0, 3.0, 1.0, Vectors.dense(2.0, 11.0)),
+      OffsetInstance(9.0, 4.0, 4.0, Vectors.dense(3.0, 13.0))
     ).toDF()
     /*
-       R code:
-
-       model <- glm(formula = "b ~ .", family="poisson", data = df, weights = w)
-       summary(model)
-
-       Deviance Residuals:
-              1         2         3         4
-       -0.28952   0.11048   0.14839  -0.07268
-
-       Coefficients:
-                   Estimate Std. Error z value Pr(>|z|)
-       (Intercept)   6.2999     1.6086   3.916 8.99e-05 ***
-       V1            3.3241     1.0184   3.264  0.00110 **
-       V2           -1.0818     0.3522  -3.071  0.00213 **
-       ---
-       Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-
-       (Dispersion parameter for poisson family taken to be 1)
-
-           Null deviance: 15.38066  on 3  degrees of freedom
-       Residual deviance:  0.12333  on 1  degrees of freedom
-       AIC: 41.803
-
-       Number of Fisher Scoring iterations: 3
+      R code:
 
-       residuals(model, type="pearson")
-                 1           2           3           4
-       -0.28043145  0.11099310  0.14963714 -0.07253611
+      model <- glm(formula = "b ~ .", family = "poisson", data = df,
+                   weights = w, offset = off)
+      summary(model)
 
-       residuals(model, type="working")
-                 1           2           3           4
-       -0.17960679  0.02813593  0.05113852 -0.01201650
+      Deviance Residuals:
+            1        2        3        4
+      -2.0480   1.2315   1.8293  -0.7107
 
-       residuals(model, type="response")
-                1          2          3          4
-       -0.4378554  0.2189277  0.1459518 -0.1094638
+      Coefficients:
+                  Estimate Std. Error z value Pr(>|z|)
+      (Intercept)  -4.5678     1.9625  -2.328   0.0199
+      V1           -2.8784     1.1683  -2.464   0.0137
+      V2            0.8859     0.4170   2.124   0.0336
+
+      (Dispersion parameter for poisson family taken to be 1)
+
+          Null deviance: 22.5585  on 3  degrees of freedom
+      Residual deviance:  9.5622  on 1  degrees of freedom
+      AIC: 51.242
+
+      Number of Fisher Scoring iterations: 5
+
+      residuals(model, type = "pearson")
+               1          2          3          4
+      -1.7480418  1.3037611  2.0750099 -0.6972966
+      residuals(model, type = "working")
+               1          2          3          4
+      -0.6891489  0.3833588  0.9710682 -0.1096590
+      residuals(model, type = "response")
+              1         2         3         4
+      -4.433948  2.216974  1.477983 -1.108487
      */
     val trainer = new GeneralizedLinearRegression()
       .setFamily("Poisson")
       .setWeightCol("weight")
-      .setFitIntercept(true)
-
-    val model = trainer.fit(datasetWithWeight)
-
-    val coefficientsR = Vectors.dense(Array(3.3241, -1.0818))
-    val interceptR = 6.2999
-    val devianceResidualsR = Array(-0.28952, 0.11048, 0.14839, -0.07268)
-    val pearsonResidualsR = Array(-0.28043145, 0.11099310, 0.14963714, -0.07253611)
-    val workingResidualsR = Array(-0.17960679, 0.02813593, 0.05113852, -0.01201650)
-    val responseResidualsR = Array(-0.4378554, 0.2189277, 0.1459518, -0.1094638)
-    val seCoefR = Array(1.0184, 0.3522, 1.6086)
-    val tValsR = Array(3.264, -3.071, 3.916)
-    val pValsR = Array(0.00110, 0.00213, 0.00009)
-    val dispersionR = 1.0
-    val nullDevianceR = 15.38066
-    val residualDevianceR = 0.12333
+      .setOffsetCol("offset")
+
+    val model = trainer.fit(dataset)
+
+    val coefficientsR = Vectors.dense(Array(-2.87843, 0.88589))
+    val interceptR = -4.56784
+    val devianceResidualsR = Array(-2.04796, 1.23149, 1.82933, -0.71066)
+    val pearsonResidualsR = Array(-1.74804, 1.30376, 2.07501, -0.6973)
+    val workingResidualsR = Array(-0.68915, 0.38336, 0.97107, -0.10966)
+    val responseResidualsR = Array(-4.43395, 2.21697, 1.47798, -1.10849)
+    val seCoefR = Array(1.16826, 0.41703, 1.96249)
+    val tValsR = Array(-2.46387, 2.12428, -2.32757)
+    val pValsR = Array(0.01374, 0.03365, 0.01993)
+    val dispersionR = 1
+    val nullDevianceR = 22.55853
+    val residualDevianceR = 9.5622
     val residualDegreeOfFreedomNullR = 3
     val residualDegreeOfFreedomR = 1
-    val aicR = 41.803
+    val aicR = 51.24218
 
     val summary = model.summary
     val devianceResiduals = summary.residuals()
@@ -1157,78 +1238,79 @@ class GeneralizedLinearRegressionSuite
     assert(summary.solver === "irls")
   }
 
-  test("glm summary: gamma family with weight") {
+  test("glm summary: gamma family with weight and offset") {
     /*
-       R code:
+      R code:
 
-       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
-       b <- c(2, 8, 3, 9)
-       w <- c(1, 2, 3, 4)
-       df <- as.data.frame(cbind(A, b))
+      A <- matrix(c(0, 5, 1, 2, 2, 1, 3, 3), 4, 2, byrow = TRUE)
+      b <- c(1, 2, 1, 2)
+      w <- c(1, 2, 3, 4)
+      off <- c(0, 0.5, 1, 0)
+      df <- as.data.frame(cbind(A, b))
      */
-    val datasetWithWeight = Seq(
-      Instance(2.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
-      Instance(8.0, 2.0, Vectors.dense(1.0, 7.0)),
-      Instance(3.0, 3.0, Vectors.dense(2.0, 11.0)),
-      Instance(9.0, 4.0, Vectors.dense(3.0, 13.0))
+    val dataset = Seq(
+      OffsetInstance(1.0, 1.0, 0.0, Vectors.dense(0.0, 5.0)),
+      OffsetInstance(2.0, 2.0, 0.5, Vectors.dense(1.0, 2.0)),
+      OffsetInstance(1.0, 3.0, 1.0, Vectors.dense(2.0, 1.0)),
+      OffsetInstance(2.0, 4.0, 0.0, Vectors.dense(3.0, 3.0))
     ).toDF()
     /*
-       R code:
-
-       model <- glm(formula = "b ~ .", family="Gamma", data = df, weights = w)
-       summary(model)
+      R code:
 
-       Deviance Residuals:
-              1         2         3         4
-       -0.26343   0.05761   0.12818  -0.03484
+      model <- glm(formula = "b ~ .", family = "Gamma", data = df,
+                   weights = w, offset = off)
+      summary(model)
 
-       Coefficients:
-                   Estimate Std. Error t value Pr(>|t|)
-       (Intercept) -0.81511    0.23449  -3.476    0.178
-       V1          -0.72730    0.16137  -4.507    0.139
-       V2           0.23894    0.05481   4.359    0.144
+      Deviance Residuals:
+             1         2         3         4
+      -0.17095   0.19867  -0.23604   0.03241
 
-       (Dispersion parameter for Gamma family taken to be 0.07986091)
+      Coefficients:
+                  Estimate Std. Error t value Pr(>|t|)
+      (Intercept) -0.56474    0.23866  -2.366    0.255
+      V1           0.07695    0.06931   1.110    0.467
+      V2           0.28068    0.07320   3.835    0.162
 
-           Null deviance: 2.937462  on 3  degrees of freedom
-       Residual deviance: 0.090358  on 1  degrees of freedom
-       AIC: 23.202
+      (Dispersion parameter for Gamma family taken to be 0.1212174)
 
-       Number of Fisher Scoring iterations: 4
+          Null deviance: 2.02568  on 3  degrees of freedom
+      Residual deviance: 0.12546  on 1  degrees of freedom
+      AIC: 0.93388
 
-       residuals(model, type="pearson")
-                 1           2           3           4
-       -0.24082508  0.05839241  0.13135766 -0.03463621
+      Number of Fisher Scoring iterations: 4
 
-       residuals(model, type="working")
+      residuals(model, type = "pearson")
+                1           2           3           4
+      -0.16134949  0.20807694 -0.22544551  0.03258777
+      residuals(model, type = "working")
                  1            2            3            4
-       0.091414181 -0.005374314 -0.027196998  0.001890910
-
-       residuals(model, type="response")
-                1          2          3          4
-       -0.6344390  0.3172195  0.2114797 -0.1586097
+      0.135315831 -0.084390309  0.113219135 -0.008279688
+      residuals(model, type = "response")
+               1          2          3          4
+      -0.1923918  0.2565224 -0.1496381  0.0320653
      */
     val trainer = new GeneralizedLinearRegression()
       .setFamily("Gamma")
       .setWeightCol("weight")
+      .setOffsetCol("offset")
+
+    val model = trainer.fit(dataset)
 
-    val model = trainer.fit(datasetWithWeight)
-
-    val coefficientsR = Vectors.dense(Array(-0.72730, 0.23894))
-    val interceptR = -0.81511
-    val devianceResidualsR = Array(-0.26343, 0.05761, 0.12818, -0.03484)
-    val pearsonResidualsR = Array(-0.24082508, 0.05839241, 0.13135766, -0.03463621)
-    val workingResidualsR = Array(0.091414181, -0.005374314, -0.027196998, 0.001890910)
-    val responseResidualsR = Array(-0.6344390, 0.3172195, 0.2114797, -0.1586097)
-    val seCoefR = Array(0.16137, 0.05481, 0.23449)
-    val tValsR = Array(-4.507, 4.359, -3.476)
-    val pValsR = Array(0.139, 0.144, 0.178)
-    val dispersionR = 0.07986091
-    val nullDevianceR = 2.937462
-    val residualDevianceR = 0.090358
+    val coefficientsR = Vectors.dense(Array(0.07695, 0.28068))
+    val interceptR = -0.56474
+    val devianceResidualsR = Array(-0.17095, 0.19867, -0.23604, 0.03241)
+    val pearsonResidualsR = Array(-0.16135, 0.20808, -0.22545, 0.03259)
+    val workingResidualsR = Array(0.13532, -0.08439, 0.11322, -0.00828)
+    val responseResidualsR = Array(-0.19239, 0.25652, -0.14964, 0.03207)
+    val seCoefR = Array(0.06931, 0.0732, 0.23866)
+    val tValsR = Array(1.11031, 3.83453, -2.3663)
+    val pValsR = Array(0.46675, 0.16241, 0.25454)
+    val dispersionR = 0.12122
+    val nullDevianceR = 2.02568
+    val residualDevianceR = 0.12546
     val residualDegreeOfFreedomNullR = 3
     val residualDegreeOfFreedomR = 1
-    val aicR = 23.202
+    val aicR = 0.93388
 
     val summary = model.summary
     val devianceResiduals = summary.residuals()
@@ -1271,77 +1353,81 @@ class GeneralizedLinearRegressionSuite
     assert(summary.solver === "irls")
   }
 
-  test("glm summary: tweedie family with weight") {
+  test("glm summary: tweedie family with weight and offset") {
     /*
       R code:
 
-      library(statmod)
       df <- as.data.frame(matrix(c(
-        1.0, 1.0, 0.0, 5.0,
-        0.5, 2.0, 1.0, 2.0,
-        1.0, 3.0, 2.0, 1.0,
-        0.0, 4.0, 3.0, 3.0), 4, 4, byrow = TRUE))
+        1.0, 1.0, 1.0, 0.0, 5.0,
+        0.5, 2.0, 3.0, 1.0, 2.0,
+        1.0, 3.0, 2.0, 2.0, 1.0,
+        0.0, 4.0, 0.0, 3.0, 3.0), 4, 5, byrow = TRUE))
+    */
+    val dataset = Seq(
+      OffsetInstance(1.0, 1.0, 1.0, Vectors.dense(0.0, 5.0)),
+      OffsetInstance(0.5, 2.0, 3.0, Vectors.dense(1.0, 2.0)),
+      OffsetInstance(1.0, 3.0, 2.0, Vectors.dense(2.0, 1.0)),
+      OffsetInstance(0.0, 4.0, 0.0, Vectors.dense(3.0, 3.0))
+    ).toDF()
+    /*
+      R code:
 
-      model <- glm(V1 ~ -1 + V3 + V4, data = df, weights = V2,
-          family = tweedie(var.power = 1.6, link.power = 0))
+      library(statmod)
+      model <- glm(V1 ~ V4 + V5, data = df, weights = V2, offset = V3,
+                   family = tweedie(var.power = 1.6, link.power = 0.0))
       summary(model)
 
       Deviance Residuals:
             1        2        3        4
-       0.6210  -0.0515   1.6935  -3.2539
+       0.8917  -2.1396   1.2252  -1.7946
 
       Coefficients:
-         Estimate Std. Error t value Pr(>|t|)
-      V3  -0.4087     0.5205  -0.785    0.515
-      V4  -0.1212     0.4082  -0.297    0.794
+                  Estimate Std. Error t value Pr(>|t|)
+      (Intercept) -0.03047    3.65000  -0.008    0.995
+      V4          -1.14577    1.41674  -0.809    0.567
+      V5          -0.36585    0.97065  -0.377    0.771
 
-      (Dispersion parameter for Tweedie family taken to be 3.830036)
+      (Dispersion parameter for Tweedie family taken to be 6.334961)
 
-          Null deviance: 20.702  on 4  degrees of freedom
-      Residual deviance: 13.844  on 2  degrees of freedom
+          Null deviance: 12.784  on 3  degrees of freedom
+      Residual deviance: 10.095  on 1  degrees of freedom
       AIC: NA
 
-      Number of Fisher Scoring iterations: 11
-
-      residuals(model, type="pearson")
-           1           2           3           4
-      0.7383616 -0.0509458  2.2348337 -1.4552090
-      residuals(model, type="working")
-           1            2            3            4
-      0.83354150 -0.04103552  1.55676369 -1.00000000
-      residuals(model, type="response")
-           1            2            3            4
-      0.45460738 -0.02139574  0.60888055 -0.20392801
+      Number of Fisher Scoring iterations: 18
+
+      residuals(model, type = "pearson")
+               1          2          3          4
+      1.1472554 -1.4642569  1.4935199 -0.8025842
+      residuals(model, type = "working")
+               1          2          3          4
+      1.3624928 -0.8322375  0.9894580 -1.0000000
+      residuals(model, type = "response")
+                1           2           3           4
+      0.57671828 -2.48040354  0.49735052 -0.01040646
      */
-    val datasetWithWeight = Seq(
-      Instance(1.0, 1.0, Vectors.dense(0.0, 5.0)),
-      Instance(0.5, 2.0, Vectors.dense(1.0, 2.0)),
-      Instance(1.0, 3.0, Vectors.dense(2.0, 1.0)),
-      Instance(0.0, 4.0, Vectors.dense(3.0, 3.0))
-    ).toDF()
-
     val trainer = new GeneralizedLinearRegression()
       .setFamily("tweedie")
       .setVariancePower(1.6)
       .setLinkPower(0.0)
       .setWeightCol("weight")
-      .setFitIntercept(false)
-
-    val model = trainer.fit(datasetWithWeight)
-    val coefficientsR = Vectors.dense(Array(-0.408746, -0.12125))
-    val interceptR = 0.0
-    val devianceResidualsR = Array(0.621047, -0.051515, 1.693473, -3.253946)
-    val pearsonResidualsR = Array(0.738362, -0.050946, 2.234834, -1.455209)
-    val workingResidualsR = Array(0.833541, -0.041036, 1.556764, -1.0)
-    val responseResidualsR = Array(0.454607, -0.021396, 0.608881, -0.203928)
-    val seCoefR = Array(0.520519, 0.408215)
-    val tValsR = Array(-0.785267, -0.297024)
-    val pValsR = Array(0.514549, 0.794457)
-    val dispersionR = 3.830036
-    val nullDevianceR = 20.702
-    val residualDevianceR = 13.844
-    val residualDegreeOfFreedomNullR = 4
-    val residualDegreeOfFreedomR = 2
+      .setOffsetCol("offset")
+
+    val model = trainer.fit(dataset)
+
+    val coefficientsR = Vectors.dense(Array(-1.14577, -0.36585))
+    val interceptR = -0.03047
+    val devianceResidualsR = Array(0.89171, -2.13961, 1.2252, -1.79463)
+    val pearsonResidualsR = Array(1.14726, -1.46426, 1.49352, -0.80258)
+    val workingResidualsR = Array(1.36249, -0.83224, 0.98946, -1)
+    val responseResidualsR = Array(0.57672, -2.4804, 0.49735, -0.01041)
+    val seCoefR = Array(1.41674, 0.97065, 3.65)
+    val tValsR = Array(-0.80873, -0.37691, -0.00835)
+    val pValsR = Array(0.56707, 0.77053, 0.99468)
+    val dispersionR = 6.33496
+    val nullDevianceR = 12.78358
+    val residualDevianceR = 10.09488
+    val residualDegreeOfFreedomNullR = 3
+    val residualDegreeOfFreedomR = 1
 
     val summary = model.summary
 

From 3c2fc19d478256f8dc0ae7219fdd188030218c07 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Fri, 30 Jun 2017 20:30:26 +0800
Subject: [PATCH 002/125] [SPARK-18294][CORE] Implement commit protocol to
 support `mapred` package's committer

## What changes were proposed in this pull request?

This PR makes the following changes:

- Implement a new commit protocol `HadoopMapRedCommitProtocol` which support the old `mapred` package's committer;
- Refactor SparkHadoopWriter and SparkHadoopMapReduceWriter, now they are combined together, thus we can support write through both mapred and mapreduce API by the new SparkHadoopWriter, a lot of duplicated codes are removed.

After this change, it should be pretty easy for us to support the committer from both the new and the old hadoop API at high level.

## How was this patch tested?
No major behavior change, passed the existing test cases.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18438 from jiangxb1987/SparkHadoopWriter.
---
 .../io/HadoopMapRedCommitProtocol.scala       |  36 ++
 .../internal/io/HadoopWriteConfigUtil.scala   |  79 ++++
 .../io/SparkHadoopMapReduceWriter.scala       | 181 --------
 .../spark/internal/io/SparkHadoopWriter.scala | 393 ++++++++++++++----
 .../apache/spark/rdd/PairRDDFunctions.scala   |  72 +---
 .../spark/rdd/PairRDDFunctionsSuite.scala     |   2 +-
 .../OutputCommitCoordinatorSuite.scala        |  35 +-
 7 files changed, 461 insertions(+), 337 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
 create mode 100644 core/src/main/scala/org/apache/spark/internal/io/HadoopWriteConfigUtil.scala
 delete mode 100644 core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
new file mode 100644
index 000000000000..ddbd624b380d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import org.apache.hadoop.mapred._
+import org.apache.hadoop.mapreduce.{TaskAttemptContext => NewTaskAttemptContext}
+
+/**
+ * An [[FileCommitProtocol]] implementation backed by an underlying Hadoop OutputCommitter
+ * (from the old mapred API).
+ *
+ * Unlike Hadoop's OutputCommitter, this implementation is serializable.
+ */
+class HadoopMapRedCommitProtocol(jobId: String, path: String)
+  extends HadoopMapReduceCommitProtocol(jobId, path) {
+
+  override def setupCommitter(context: NewTaskAttemptContext): OutputCommitter = {
+    val config = context.getConfiguration.asInstanceOf[JobConf]
+    config.getOutputCommitter
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopWriteConfigUtil.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopWriteConfigUtil.scala
new file mode 100644
index 000000000000..9b987e0e1bb6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopWriteConfigUtil.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.mapreduce._
+
+import org.apache.spark.SparkConf
+
+/**
+ * Interface for create output format/committer/writer used during saving an RDD using a Hadoop
+ * OutputFormat (both from the old mapred API and the new mapreduce API)
+ *
+ * Notes:
+ * 1. Implementations should throw [[IllegalArgumentException]] when wrong hadoop API is
+ *    referenced;
+ * 2. Implementations must be serializable, as the instance instantiated on the driver
+ *    will be used for tasks on executors;
+ * 3. Implementations should have a constructor with exactly one argument:
+ *    (conf: SerializableConfiguration) or (conf: SerializableJobConf).
+ */
+abstract class HadoopWriteConfigUtil[K, V: ClassTag] extends Serializable {
+
+  // --------------------------------------------------------------------------
+  // Create JobContext/TaskAttemptContext
+  // --------------------------------------------------------------------------
+
+  def createJobContext(jobTrackerId: String, jobId: Int): JobContext
+
+  def createTaskAttemptContext(
+      jobTrackerId: String,
+      jobId: Int,
+      splitId: Int,
+      taskAttemptId: Int): TaskAttemptContext
+
+  // --------------------------------------------------------------------------
+  // Create committer
+  // --------------------------------------------------------------------------
+
+  def createCommitter(jobId: Int): HadoopMapReduceCommitProtocol
+
+  // --------------------------------------------------------------------------
+  // Create writer
+  // --------------------------------------------------------------------------
+
+  def initWriter(taskContext: TaskAttemptContext, splitId: Int): Unit
+
+  def write(pair: (K, V)): Unit
+
+  def closeWriter(taskContext: TaskAttemptContext): Unit
+
+  // --------------------------------------------------------------------------
+  // Create OutputFormat
+  // --------------------------------------------------------------------------
+
+  def initOutputFormat(jobContext: JobContext): Unit
+
+  // --------------------------------------------------------------------------
+  // Verify hadoop config
+  // --------------------------------------------------------------------------
+
+  def assertConf(jobContext: JobContext, conf: SparkConf): Unit
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala
deleted file mode 100644
index 376ff9bb19f7..000000000000
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopMapReduceWriter.scala
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.internal.io
-
-import java.text.SimpleDateFormat
-import java.util.{Date, Locale}
-
-import scala.reflect.ClassTag
-import scala.util.DynamicVariable
-
-import org.apache.hadoop.conf.{Configurable, Configuration}
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapred.{JobConf, JobID}
-import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-
-import org.apache.spark.{SparkConf, SparkException, TaskContext}
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.executor.OutputMetrics
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.{SerializableConfiguration, Utils}
-
-/**
- * A helper object that saves an RDD using a Hadoop OutputFormat
- * (from the newer mapreduce API, not the old mapred API).
- */
-private[spark]
-object SparkHadoopMapReduceWriter extends Logging {
-
-  /**
-   * Basic work flow of this command is:
-   * 1. Driver side setup, prepare the data source and hadoop configuration for the write job to
-   *    be issued.
-   * 2. Issues a write job consists of one or more executor side tasks, each of which writes all
-   *    rows within an RDD partition.
-   * 3. If no exception is thrown in a task, commits that task, otherwise aborts that task;  If any
-   *    exception is thrown during task commitment, also aborts that task.
-   * 4. If all tasks are committed, commit the job, otherwise aborts the job;  If any exception is
-   *    thrown during job commitment, also aborts the job.
-   */
-  def write[K, V: ClassTag](
-      rdd: RDD[(K, V)],
-      hadoopConf: Configuration): Unit = {
-    // Extract context and configuration from RDD.
-    val sparkContext = rdd.context
-    val stageId = rdd.id
-    val sparkConf = rdd.conf
-    val conf = new SerializableConfiguration(hadoopConf)
-
-    // Set up a job.
-    val jobTrackerId = SparkHadoopWriterUtils.createJobTrackerID(new Date())
-    val jobAttemptId = new TaskAttemptID(jobTrackerId, stageId, TaskType.MAP, 0, 0)
-    val jobContext = new TaskAttemptContextImpl(conf.value, jobAttemptId)
-    val format = jobContext.getOutputFormatClass
-
-    if (SparkHadoopWriterUtils.isOutputSpecValidationEnabled(sparkConf)) {
-      // FileOutputFormat ignores the filesystem parameter
-      val jobFormat = format.newInstance
-      jobFormat.checkOutputSpecs(jobContext)
-    }
-
-    val committer = FileCommitProtocol.instantiate(
-      className = classOf[HadoopMapReduceCommitProtocol].getName,
-      jobId = stageId.toString,
-      outputPath = conf.value.get("mapreduce.output.fileoutputformat.outputdir"),
-      isAppend = false).asInstanceOf[HadoopMapReduceCommitProtocol]
-    committer.setupJob(jobContext)
-
-    // Try to write all RDD partitions as a Hadoop OutputFormat.
-    try {
-      val ret = sparkContext.runJob(rdd, (context: TaskContext, iter: Iterator[(K, V)]) => {
-        executeTask(
-          context = context,
-          jobTrackerId = jobTrackerId,
-          sparkStageId = context.stageId,
-          sparkPartitionId = context.partitionId,
-          sparkAttemptNumber = context.attemptNumber,
-          committer = committer,
-          hadoopConf = conf.value,
-          outputFormat = format.asInstanceOf[Class[OutputFormat[K, V]]],
-          iterator = iter)
-      })
-
-      committer.commitJob(jobContext, ret)
-      logInfo(s"Job ${jobContext.getJobID} committed.")
-    } catch {
-      case cause: Throwable =>
-        logError(s"Aborting job ${jobContext.getJobID}.", cause)
-        committer.abortJob(jobContext)
-        throw new SparkException("Job aborted.", cause)
-    }
-  }
-
-  /** Write an RDD partition out in a single Spark task. */
-  private def executeTask[K, V: ClassTag](
-      context: TaskContext,
-      jobTrackerId: String,
-      sparkStageId: Int,
-      sparkPartitionId: Int,
-      sparkAttemptNumber: Int,
-      committer: FileCommitProtocol,
-      hadoopConf: Configuration,
-      outputFormat: Class[_ <: OutputFormat[K, V]],
-      iterator: Iterator[(K, V)]): TaskCommitMessage = {
-    // Set up a task.
-    val attemptId = new TaskAttemptID(jobTrackerId, sparkStageId, TaskType.REDUCE,
-      sparkPartitionId, sparkAttemptNumber)
-    val taskContext = new TaskAttemptContextImpl(hadoopConf, attemptId)
-    committer.setupTask(taskContext)
-
-    val (outputMetrics, callback) = SparkHadoopWriterUtils.initHadoopOutputMetrics(context)
-
-    // Initiate the writer.
-    val taskFormat = outputFormat.newInstance()
-    // If OutputFormat is Configurable, we should set conf to it.
-    taskFormat match {
-      case c: Configurable => c.setConf(hadoopConf)
-      case _ => ()
-    }
-    var writer = taskFormat.getRecordWriter(taskContext)
-      .asInstanceOf[RecordWriter[K, V]]
-    require(writer != null, "Unable to obtain RecordWriter")
-    var recordsWritten = 0L
-
-    // Write all rows in RDD partition.
-    try {
-      val ret = Utils.tryWithSafeFinallyAndFailureCallbacks {
-        // Write rows out, release resource and commit the task.
-        while (iterator.hasNext) {
-          val pair = iterator.next()
-          writer.write(pair._1, pair._2)
-
-          // Update bytes written metric every few records
-          SparkHadoopWriterUtils.maybeUpdateOutputMetrics(outputMetrics, callback, recordsWritten)
-          recordsWritten += 1
-        }
-        if (writer != null) {
-          writer.close(taskContext)
-          writer = null
-        }
-        committer.commitTask(taskContext)
-      }(catchBlock = {
-        // If there is an error, release resource and then abort the task.
-        try {
-          if (writer != null) {
-            writer.close(taskContext)
-            writer = null
-          }
-        } finally {
-          committer.abortTask(taskContext)
-          logError(s"Task ${taskContext.getTaskAttemptID} aborted.")
-        }
-      })
-
-      outputMetrics.setBytesWritten(callback())
-      outputMetrics.setRecordsWritten(recordsWritten)
-
-      ret
-    } catch {
-      case t: Throwable =>
-        throw new SparkException("Task failed while writing rows", t)
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
index acc9c3857100..7d846f9354df 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
@@ -17,143 +17,374 @@
 
 package org.apache.spark.internal.io
 
-import java.io.IOException
-import java.text.{NumberFormat, SimpleDateFormat}
+import java.text.NumberFormat
 import java.util.{Date, Locale}
 
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.mapred._
-import org.apache.hadoop.mapreduce.TaskType
+import org.apache.hadoop.mapreduce.{JobContext => NewJobContext,
+OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter,
+TaskAttemptContext => NewTaskAttemptContext, TaskAttemptID => NewTaskAttemptID, TaskType}
+import org.apache.hadoop.mapreduce.task.{TaskAttemptContextImpl => NewTaskAttemptContextImpl}
 
-import org.apache.spark.SerializableWritable
+import org.apache.spark.{SerializableWritable, SparkConf, SparkException, TaskContext}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.mapred.SparkHadoopMapRedUtil
-import org.apache.spark.rdd.HadoopRDD
-import org.apache.spark.util.SerializableJobConf
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
+import org.apache.spark.rdd.{HadoopRDD, RDD}
+import org.apache.spark.util.{SerializableConfiguration, SerializableJobConf, Utils}
 
 /**
- * Internal helper class that saves an RDD using a Hadoop OutputFormat.
- *
- * Saves the RDD using a JobConf, which should contain an output key class, an output value class,
- * a filename to write to, etc, exactly like in a Hadoop MapReduce job.
+ * A helper object that saves an RDD using a Hadoop OutputFormat.
+ */
+private[spark]
+object SparkHadoopWriter extends Logging {
+  import SparkHadoopWriterUtils._
+
+  /**
+   * Basic work flow of this command is:
+   * 1. Driver side setup, prepare the data source and hadoop configuration for the write job to
+   *    be issued.
+   * 2. Issues a write job consists of one or more executor side tasks, each of which writes all
+   *    rows within an RDD partition.
+   * 3. If no exception is thrown in a task, commits that task, otherwise aborts that task;  If any
+   *    exception is thrown during task commitment, also aborts that task.
+   * 4. If all tasks are committed, commit the job, otherwise aborts the job;  If any exception is
+   *    thrown during job commitment, also aborts the job.
+   */
+  def write[K, V: ClassTag](
+      rdd: RDD[(K, V)],
+      config: HadoopWriteConfigUtil[K, V]): Unit = {
+    // Extract context and configuration from RDD.
+    val sparkContext = rdd.context
+    val stageId = rdd.id
+
+    // Set up a job.
+    val jobTrackerId = createJobTrackerID(new Date())
+    val jobContext = config.createJobContext(jobTrackerId, stageId)
+    config.initOutputFormat(jobContext)
+
+    // Assert the output format/key/value class is set in JobConf.
+    config.assertConf(jobContext, rdd.conf)
+
+    val committer = config.createCommitter(stageId)
+    committer.setupJob(jobContext)
+
+    // Try to write all RDD partitions as a Hadoop OutputFormat.
+    try {
+      val ret = sparkContext.runJob(rdd, (context: TaskContext, iter: Iterator[(K, V)]) => {
+        executeTask(
+          context = context,
+          config = config,
+          jobTrackerId = jobTrackerId,
+          sparkStageId = context.stageId,
+          sparkPartitionId = context.partitionId,
+          sparkAttemptNumber = context.attemptNumber,
+          committer = committer,
+          iterator = iter)
+      })
+
+      committer.commitJob(jobContext, ret)
+      logInfo(s"Job ${jobContext.getJobID} committed.")
+    } catch {
+      case cause: Throwable =>
+        logError(s"Aborting job ${jobContext.getJobID}.", cause)
+        committer.abortJob(jobContext)
+        throw new SparkException("Job aborted.", cause)
+    }
+  }
+
+  /** Write a RDD partition out in a single Spark task. */
+  private def executeTask[K, V: ClassTag](
+      context: TaskContext,
+      config: HadoopWriteConfigUtil[K, V],
+      jobTrackerId: String,
+      sparkStageId: Int,
+      sparkPartitionId: Int,
+      sparkAttemptNumber: Int,
+      committer: FileCommitProtocol,
+      iterator: Iterator[(K, V)]): TaskCommitMessage = {
+    // Set up a task.
+    val taskContext = config.createTaskAttemptContext(
+      jobTrackerId, sparkStageId, sparkPartitionId, sparkAttemptNumber)
+    committer.setupTask(taskContext)
+
+    val (outputMetrics, callback) = initHadoopOutputMetrics(context)
+
+    // Initiate the writer.
+    config.initWriter(taskContext, sparkPartitionId)
+    var recordsWritten = 0L
+
+    // Write all rows in RDD partition.
+    try {
+      val ret = Utils.tryWithSafeFinallyAndFailureCallbacks {
+        while (iterator.hasNext) {
+          val pair = iterator.next()
+          config.write(pair)
+
+          // Update bytes written metric every few records
+          maybeUpdateOutputMetrics(outputMetrics, callback, recordsWritten)
+          recordsWritten += 1
+        }
+
+        config.closeWriter(taskContext)
+        committer.commitTask(taskContext)
+      }(catchBlock = {
+        // If there is an error, release resource and then abort the task.
+        try {
+          config.closeWriter(taskContext)
+        } finally {
+          committer.abortTask(taskContext)
+          logError(s"Task ${taskContext.getTaskAttemptID} aborted.")
+        }
+      })
+
+      outputMetrics.setBytesWritten(callback())
+      outputMetrics.setRecordsWritten(recordsWritten)
+
+      ret
+    } catch {
+      case t: Throwable =>
+        throw new SparkException("Task failed while writing rows", t)
+    }
+  }
+}
+
+/**
+ * A helper class that reads JobConf from older mapred API, creates output Format/Committer/Writer.
  */
 private[spark]
-class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable {
+class HadoopMapRedWriteConfigUtil[K, V: ClassTag](conf: SerializableJobConf)
+  extends HadoopWriteConfigUtil[K, V] with Logging {
 
-  private val now = new Date()
-  private val conf = new SerializableJobConf(jobConf)
+  private var outputFormat: Class[_ <: OutputFormat[K, V]] = null
+  private var writer: RecordWriter[K, V] = null
 
-  private var jobID = 0
-  private var splitID = 0
-  private var attemptID = 0
-  private var jID: SerializableWritable[JobID] = null
-  private var taID: SerializableWritable[TaskAttemptID] = null
+  private def getConf: JobConf = conf.value
 
-  @transient private var writer: RecordWriter[AnyRef, AnyRef] = null
-  @transient private var format: OutputFormat[AnyRef, AnyRef] = null
-  @transient private var committer: OutputCommitter = null
-  @transient private var jobContext: JobContext = null
-  @transient private var taskContext: TaskAttemptContext = null
+  // --------------------------------------------------------------------------
+  // Create JobContext/TaskAttemptContext
+  // --------------------------------------------------------------------------
 
-  def preSetup() {
-    setIDs(0, 0, 0)
-    HadoopRDD.addLocalConfiguration("", 0, 0, 0, conf.value)
+  override def createJobContext(jobTrackerId: String, jobId: Int): NewJobContext = {
+    val jobAttemptId = new SerializableWritable(new JobID(jobTrackerId, jobId))
+    new JobContextImpl(getConf, jobAttemptId.value)
+  }
 
-    val jCtxt = getJobContext()
-    getOutputCommitter().setupJob(jCtxt)
+  override def createTaskAttemptContext(
+      jobTrackerId: String,
+      jobId: Int,
+      splitId: Int,
+      taskAttemptId: Int): NewTaskAttemptContext = {
+    // Update JobConf.
+    HadoopRDD.addLocalConfiguration(jobTrackerId, jobId, splitId, taskAttemptId, conf.value)
+    // Create taskContext.
+    val attemptId = new TaskAttemptID(jobTrackerId, jobId, TaskType.MAP, splitId, taskAttemptId)
+    new TaskAttemptContextImpl(getConf, attemptId)
   }
 
+  // --------------------------------------------------------------------------
+  // Create committer
+  // --------------------------------------------------------------------------
 
-  def setup(jobid: Int, splitid: Int, attemptid: Int) {
-    setIDs(jobid, splitid, attemptid)
-    HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(now),
-      jobid, splitID, attemptID, conf.value)
+  override def createCommitter(jobId: Int): HadoopMapReduceCommitProtocol = {
+    // Update JobConf.
+    HadoopRDD.addLocalConfiguration("", 0, 0, 0, getConf)
+    // Create commit protocol.
+    FileCommitProtocol.instantiate(
+      className = classOf[HadoopMapRedCommitProtocol].getName,
+      jobId = jobId.toString,
+      outputPath = getConf.get("mapred.output.dir"),
+      isAppend = false).asInstanceOf[HadoopMapReduceCommitProtocol]
   }
 
-  def open() {
+  // --------------------------------------------------------------------------
+  // Create writer
+  // --------------------------------------------------------------------------
+
+  override def initWriter(taskContext: NewTaskAttemptContext, splitId: Int): Unit = {
     val numfmt = NumberFormat.getInstance(Locale.US)
     numfmt.setMinimumIntegerDigits(5)
     numfmt.setGroupingUsed(false)
 
-    val outputName = "part-"  + numfmt.format(splitID)
-    val path = FileOutputFormat.getOutputPath(conf.value)
+    val outputName = "part-" + numfmt.format(splitId)
+    val path = FileOutputFormat.getOutputPath(getConf)
     val fs: FileSystem = {
       if (path != null) {
-        path.getFileSystem(conf.value)
+        path.getFileSystem(getConf)
       } else {
-        FileSystem.get(conf.value)
+        FileSystem.get(getConf)
       }
     }
 
-    getOutputCommitter().setupTask(getTaskContext())
-    writer = getOutputFormat().getRecordWriter(fs, conf.value, outputName, Reporter.NULL)
+    writer = getConf.getOutputFormat
+      .getRecordWriter(fs, getConf, outputName, Reporter.NULL)
+      .asInstanceOf[RecordWriter[K, V]]
+
+    require(writer != null, "Unable to obtain RecordWriter")
   }
 
-  def write(key: AnyRef, value: AnyRef) {
+  override def write(pair: (K, V)): Unit = {
+    require(writer != null, "Must call createWriter before write.")
+    writer.write(pair._1, pair._2)
+  }
+
+  override def closeWriter(taskContext: NewTaskAttemptContext): Unit = {
     if (writer != null) {
-      writer.write(key, value)
-    } else {
-      throw new IOException("Writer is null, open() has not been called")
+      writer.close(Reporter.NULL)
     }
   }
 
-  def close() {
-    writer.close(Reporter.NULL)
-  }
+  // --------------------------------------------------------------------------
+  // Create OutputFormat
+  // --------------------------------------------------------------------------
 
-  def commit() {
-    SparkHadoopMapRedUtil.commitTask(getOutputCommitter(), getTaskContext(), jobID, splitID)
+  override def initOutputFormat(jobContext: NewJobContext): Unit = {
+    if (outputFormat == null) {
+      outputFormat = getConf.getOutputFormat.getClass
+        .asInstanceOf[Class[_ <: OutputFormat[K, V]]]
+    }
   }
 
-  def commitJob() {
-    val cmtr = getOutputCommitter()
-    cmtr.commitJob(getJobContext())
+  private def getOutputFormat(): OutputFormat[K, V] = {
+    require(outputFormat != null, "Must call initOutputFormat first.")
+
+    outputFormat.newInstance()
   }
 
-  // ********* Private Functions *********
+  // --------------------------------------------------------------------------
+  // Verify hadoop config
+  // --------------------------------------------------------------------------
+
+  override def assertConf(jobContext: NewJobContext, conf: SparkConf): Unit = {
+    val outputFormatInstance = getOutputFormat()
+    val keyClass = getConf.getOutputKeyClass
+    val valueClass = getConf.getOutputValueClass
+    if (outputFormatInstance == null) {
+      throw new SparkException("Output format class not set")
+    }
+    if (keyClass == null) {
+      throw new SparkException("Output key class not set")
+    }
+    if (valueClass == null) {
+      throw new SparkException("Output value class not set")
+    }
+    SparkHadoopUtil.get.addCredentials(getConf)
+
+    logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
+      valueClass.getSimpleName + ")")
 
-  private def getOutputFormat(): OutputFormat[AnyRef, AnyRef] = {
-    if (format == null) {
-      format = conf.value.getOutputFormat()
-        .asInstanceOf[OutputFormat[AnyRef, AnyRef]]
+    if (SparkHadoopWriterUtils.isOutputSpecValidationEnabled(conf)) {
+      // FileOutputFormat ignores the filesystem parameter
+      val ignoredFs = FileSystem.get(getConf)
+      getOutputFormat().checkOutputSpecs(ignoredFs, getConf)
     }
-    format
+  }
+}
+
+/**
+ * A helper class that reads Configuration from newer mapreduce API, creates output
+ * Format/Committer/Writer.
+ */
+private[spark]
+class HadoopMapReduceWriteConfigUtil[K, V: ClassTag](conf: SerializableConfiguration)
+  extends HadoopWriteConfigUtil[K, V] with Logging {
+
+  private var outputFormat: Class[_ <: NewOutputFormat[K, V]] = null
+  private var writer: NewRecordWriter[K, V] = null
+
+  private def getConf: Configuration = conf.value
+
+  // --------------------------------------------------------------------------
+  // Create JobContext/TaskAttemptContext
+  // --------------------------------------------------------------------------
+
+  override def createJobContext(jobTrackerId: String, jobId: Int): NewJobContext = {
+    val jobAttemptId = new NewTaskAttemptID(jobTrackerId, jobId, TaskType.MAP, 0, 0)
+    new NewTaskAttemptContextImpl(getConf, jobAttemptId)
+  }
+
+  override def createTaskAttemptContext(
+      jobTrackerId: String,
+      jobId: Int,
+      splitId: Int,
+      taskAttemptId: Int): NewTaskAttemptContext = {
+    val attemptId = new NewTaskAttemptID(
+      jobTrackerId, jobId, TaskType.REDUCE, splitId, taskAttemptId)
+    new NewTaskAttemptContextImpl(getConf, attemptId)
+  }
+
+  // --------------------------------------------------------------------------
+  // Create committer
+  // --------------------------------------------------------------------------
+
+  override def createCommitter(jobId: Int): HadoopMapReduceCommitProtocol = {
+    FileCommitProtocol.instantiate(
+      className = classOf[HadoopMapReduceCommitProtocol].getName,
+      jobId = jobId.toString,
+      outputPath = getConf.get("mapreduce.output.fileoutputformat.outputdir"),
+      isAppend = false).asInstanceOf[HadoopMapReduceCommitProtocol]
   }
 
-  private def getOutputCommitter(): OutputCommitter = {
-    if (committer == null) {
-      committer = conf.value.getOutputCommitter
+  // --------------------------------------------------------------------------
+  // Create writer
+  // --------------------------------------------------------------------------
+
+  override def initWriter(taskContext: NewTaskAttemptContext, splitId: Int): Unit = {
+    val taskFormat = getOutputFormat()
+    // If OutputFormat is Configurable, we should set conf to it.
+    taskFormat match {
+      case c: Configurable => c.setConf(getConf)
+      case _ => ()
     }
-    committer
+
+    writer = taskFormat.getRecordWriter(taskContext)
+      .asInstanceOf[NewRecordWriter[K, V]]
+
+    require(writer != null, "Unable to obtain RecordWriter")
+  }
+
+  override def write(pair: (K, V)): Unit = {
+    require(writer != null, "Must call createWriter before write.")
+    writer.write(pair._1, pair._2)
   }
 
-  private def getJobContext(): JobContext = {
-    if (jobContext == null) {
-      jobContext = new JobContextImpl(conf.value, jID.value)
+  override def closeWriter(taskContext: NewTaskAttemptContext): Unit = {
+    if (writer != null) {
+      writer.close(taskContext)
+      writer = null
+    } else {
+      logWarning("Writer has been closed.")
     }
-    jobContext
   }
 
-  private def getTaskContext(): TaskAttemptContext = {
-    if (taskContext == null) {
-      taskContext = newTaskAttemptContext(conf.value, taID.value)
+  // --------------------------------------------------------------------------
+  // Create OutputFormat
+  // --------------------------------------------------------------------------
+
+  override def initOutputFormat(jobContext: NewJobContext): Unit = {
+    if (outputFormat == null) {
+      outputFormat = jobContext.getOutputFormatClass
+        .asInstanceOf[Class[_ <: NewOutputFormat[K, V]]]
     }
-    taskContext
   }
 
-  protected def newTaskAttemptContext(
-      conf: JobConf,
-      attemptId: TaskAttemptID): TaskAttemptContext = {
-    new TaskAttemptContextImpl(conf, attemptId)
+  private def getOutputFormat(): NewOutputFormat[K, V] = {
+    require(outputFormat != null, "Must call initOutputFormat first.")
+
+    outputFormat.newInstance()
   }
 
-  private def setIDs(jobid: Int, splitid: Int, attemptid: Int) {
-    jobID = jobid
-    splitID = splitid
-    attemptID = attemptid
+  // --------------------------------------------------------------------------
+  // Verify hadoop config
+  // --------------------------------------------------------------------------
 
-    jID = new SerializableWritable[JobID](SparkHadoopWriterUtils.createJobID(now, jobid))
-    taID = new SerializableWritable[TaskAttemptID](
-        new TaskAttemptID(new TaskID(jID.value, TaskType.MAP, splitID), attemptID))
+  override def assertConf(jobContext: NewJobContext, conf: SparkConf): Unit = {
+    if (SparkHadoopWriterUtils.isOutputSpecValidationEnabled(conf)) {
+      getOutputFormat().checkOutputSpecs(jobContext)
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 58762cc0838c..4628fa8ba270 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -27,7 +27,6 @@ import scala.reflect.ClassTag
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat}
@@ -36,13 +35,11 @@ import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewO
 import org.apache.spark._
 import org.apache.spark.Partitioner.defaultPartitioner
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.io.{SparkHadoopMapReduceWriter, SparkHadoopWriter,
-  SparkHadoopWriterUtils}
+import org.apache.spark.internal.io._
 import org.apache.spark.internal.Logging
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{SerializableConfiguration, SerializableJobConf, Utils}
 import org.apache.spark.util.collection.CompactBuffer
 import org.apache.spark.util.random.StratifiedSamplingUtils
 
@@ -1082,9 +1079,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * result of using direct output committer with speculation enabled.
    */
   def saveAsNewAPIHadoopDataset(conf: Configuration): Unit = self.withScope {
-    SparkHadoopMapReduceWriter.write(
+    val config = new HadoopMapReduceWriteConfigUtil[K, V](new SerializableConfiguration(conf))
+    SparkHadoopWriter.write(
       rdd = self,
-      hadoopConf = conf)
+      config = config)
   }
 
   /**
@@ -1094,62 +1092,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * MapReduce job.
    */
   def saveAsHadoopDataset(conf: JobConf): Unit = self.withScope {
-    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
-    val hadoopConf = conf
-    val outputFormatInstance = hadoopConf.getOutputFormat
-    val keyClass = hadoopConf.getOutputKeyClass
-    val valueClass = hadoopConf.getOutputValueClass
-    if (outputFormatInstance == null) {
-      throw new SparkException("Output format class not set")
-    }
-    if (keyClass == null) {
-      throw new SparkException("Output key class not set")
-    }
-    if (valueClass == null) {
-      throw new SparkException("Output value class not set")
-    }
-    SparkHadoopUtil.get.addCredentials(hadoopConf)
-
-    logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
-      valueClass.getSimpleName + ")")
-
-    if (SparkHadoopWriterUtils.isOutputSpecValidationEnabled(self.conf)) {
-      // FileOutputFormat ignores the filesystem parameter
-      val ignoredFs = FileSystem.get(hadoopConf)
-      hadoopConf.getOutputFormat.checkOutputSpecs(ignoredFs, hadoopConf)
-    }
-
-    val writer = new SparkHadoopWriter(hadoopConf)
-    writer.preSetup()
-
-    val writeToFile = (context: TaskContext, iter: Iterator[(K, V)]) => {
-      // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
-      // around by taking a mod. We expect that no task will be attempted 2 billion times.
-      val taskAttemptId = (context.taskAttemptId % Int.MaxValue).toInt
-
-      val (outputMetrics, callback) = SparkHadoopWriterUtils.initHadoopOutputMetrics(context)
-
-      writer.setup(context.stageId, context.partitionId, taskAttemptId)
-      writer.open()
-      var recordsWritten = 0L
-
-      Utils.tryWithSafeFinallyAndFailureCallbacks {
-        while (iter.hasNext) {
-          val record = iter.next()
-          writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
-
-          // Update bytes written metric every few records
-          SparkHadoopWriterUtils.maybeUpdateOutputMetrics(outputMetrics, callback, recordsWritten)
-          recordsWritten += 1
-        }
-      }(finallyBlock = writer.close())
-      writer.commit()
-      outputMetrics.setBytesWritten(callback())
-      outputMetrics.setRecordsWritten(recordsWritten)
-    }
-
-    self.context.runJob(self, writeToFile)
-    writer.commitJob()
+    val config = new HadoopMapRedWriteConfigUtil[K, V](new SerializableJobConf(conf))
+    SparkHadoopWriter.write(
+      rdd = self,
+      config = config)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 02df157be377..44dd955ce869 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -561,7 +561,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
       pairs.saveAsHadoopFile(
         "ignored", pairs.keyClass, pairs.valueClass, classOf[FakeFormatWithCallback], conf)
     }
-    assert(e.getMessage contains "failed to write")
+    assert(e.getCause.getMessage contains "failed to write")
 
     assert(FakeWriterWithCallback.calledBy === "write,callback,close")
     assert(FakeWriterWithCallback.exception != null, "exception should be captured")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index e51e6a0d3ff6..1579b614ea5b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -18,12 +18,14 @@
 package org.apache.spark.scheduler
 
 import java.io.File
+import java.util.Date
 import java.util.concurrent.TimeoutException
 
 import scala.concurrent.duration._
 import scala.language.postfixOps
 
-import org.apache.hadoop.mapred.{JobConf, OutputCommitter, TaskAttemptContext, TaskAttemptID}
+import org.apache.hadoop.mapred._
+import org.apache.hadoop.mapreduce.TaskType
 import org.mockito.Matchers
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
@@ -31,7 +33,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
-import org.apache.spark.internal.io.SparkHadoopWriter
+import org.apache.spark.internal.io.{FileCommitProtocol, HadoopMapRedCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.rdd.{FakeOutputCommitter, RDD}
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -214,6 +216,8 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
  */
 private case class OutputCommitFunctions(tempDirPath: String) {
 
+  private val jobId = new SerializableWritable(SparkHadoopWriterUtils.createJobID(new Date, 0))
+
   // Mock output committer that simulates a successful commit (after commit is authorized)
   private def successfulOutputCommitter = new FakeOutputCommitter {
     override def commitTask(context: TaskAttemptContext): Unit = {
@@ -256,14 +260,23 @@ private case class OutputCommitFunctions(tempDirPath: String) {
     def jobConf = new JobConf {
       override def getOutputCommitter(): OutputCommitter = outputCommitter
     }
-    val sparkHadoopWriter = new SparkHadoopWriter(jobConf) {
-      override def newTaskAttemptContext(
-        conf: JobConf,
-        attemptId: TaskAttemptID): TaskAttemptContext = {
-        mock(classOf[TaskAttemptContext])
-      }
-    }
-    sparkHadoopWriter.setup(ctx.stageId, ctx.partitionId, ctx.attemptNumber)
-    sparkHadoopWriter.commit()
+
+    // Instantiate committer.
+    val committer = FileCommitProtocol.instantiate(
+      className = classOf[HadoopMapRedCommitProtocol].getName,
+      jobId = jobId.value.getId.toString,
+      outputPath = jobConf.get("mapred.output.dir"),
+      isAppend = false)
+
+    // Create TaskAttemptContext.
+    // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
+    // around by taking a mod. We expect that no task will be attempted 2 billion times.
+    val taskAttemptId = (ctx.taskAttemptId % Int.MaxValue).toInt
+    val attemptId = new TaskAttemptID(
+      new TaskID(jobId.value, TaskType.MAP, ctx.partitionId), taskAttemptId)
+    val taskContext = new TaskAttemptContextImpl(jobConf, attemptId)
+
+    committer.setupTask(taskContext)
+    committer.commitTask(taskContext)
   }
 }

From 528c9281aecc49e9bff204dd303962c705c6f237 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 30 Jun 2017 23:25:14 +0800
Subject: [PATCH 003/125] [ML] Fix scala-2.10 build failure of
 GeneralizedLinearRegressionSuite.

## What changes were proposed in this pull request?
Fix scala-2.10 build failure of ```GeneralizedLinearRegressionSuite```.

## How was this patch tested?
Build with scala-2.10.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #18489 from yanboliang/glr.
---
 .../ml/regression/GeneralizedLinearRegressionSuite.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index cfaa57314bd6..83f1344a7bcb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -1075,7 +1075,7 @@ class GeneralizedLinearRegressionSuite
     val seCoefR = Array(1.23439, 0.9669, 3.56866)
     val tValsR = Array(0.80297, -0.65737, -0.06017)
     val pValsR = Array(0.42199, 0.51094, 0.95202)
-    val dispersionR = 1
+    val dispersionR = 1.0
     val nullDevianceR = 2.17561
     val residualDevianceR = 0.00018
     val residualDegreeOfFreedomNullR = 3
@@ -1114,7 +1114,7 @@ class GeneralizedLinearRegressionSuite
       assert(x._1 ~== x._2 absTol 1E-3) }
     summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
     summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
-    assert(summary.dispersion ~== dispersionR absTol 1E-3)
+    assert(summary.dispersion === dispersionR)
     assert(summary.nullDeviance ~== nullDevianceR absTol 1E-3)
     assert(summary.deviance ~== residualDevianceR absTol 1E-3)
     assert(summary.residualDegreeOfFreedom === residualDegreeOfFreedomR)
@@ -1190,7 +1190,7 @@ class GeneralizedLinearRegressionSuite
     val seCoefR = Array(1.16826, 0.41703, 1.96249)
     val tValsR = Array(-2.46387, 2.12428, -2.32757)
     val pValsR = Array(0.01374, 0.03365, 0.01993)
-    val dispersionR = 1
+    val dispersionR = 1.0
     val nullDevianceR = 22.55853
     val residualDevianceR = 9.5622
     val residualDegreeOfFreedomNullR = 3
@@ -1229,7 +1229,7 @@ class GeneralizedLinearRegressionSuite
       assert(x._1 ~== x._2 absTol 1E-3) }
     summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
     summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
-    assert(summary.dispersion ~== dispersionR absTol 1E-3)
+    assert(summary.dispersion === dispersionR)
     assert(summary.nullDeviance ~== nullDevianceR absTol 1E-3)
     assert(summary.deviance ~== residualDevianceR absTol 1E-3)
     assert(summary.residualDegreeOfFreedom === residualDegreeOfFreedomR)

From 1fe08d62f022e12f2f0161af5d8f9eac51baf1b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9B=BE=E6=9E=97=E8=A5=BF?= <zenglinxi@meituan.com>
Date: Fri, 30 Jun 2017 19:28:43 +0100
Subject: [PATCH 004/125] [SPARK-21223] Change fileToAppInfo in
 FsHistoryProvider to fix concurrent issue.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What issue does this PR address ?
Jira:https://issues.apache.org/jira/browse/SPARK-21223
fix the Thread-safety issue in FsHistoryProvider
Currently, Spark HistoryServer use a HashMap named fileToAppInfo in class FsHistoryProvider to store the map of eventlog path and attemptInfo.
When use ThreadPool to Replay the log files in the list and merge the list of old applications with new ones, multi thread may update fileToAppInfo at the same time, which may cause Thread-safety issues, such as  falling into an infinite loop because of calling resize func of the hashtable.

Author: 曾林西 <zenglinxi@meituan.com>

Closes #18430 from zenglinxi0615/master.
---
 .../apache/spark/deploy/history/FsHistoryProvider.scala  | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index d05ca142b618..b2a50bd05571 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
 
 import java.io.{FileNotFoundException, IOException, OutputStream}
 import java.util.UUID
-import java.util.concurrent.{Executors, ExecutorService, Future, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, Executors, ExecutorService, Future, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.mutable
@@ -122,7 +122,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   @volatile private var applications: mutable.LinkedHashMap[String, FsApplicationHistoryInfo]
     = new mutable.LinkedHashMap()
 
-  val fileToAppInfo = new mutable.HashMap[Path, FsApplicationAttemptInfo]()
+  val fileToAppInfo = new ConcurrentHashMap[Path, FsApplicationAttemptInfo]()
 
   // List of application logs to be deleted by event log cleaner.
   private var attemptsToClean = new mutable.ListBuffer[FsApplicationAttemptInfo]
@@ -321,7 +321,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       // scan for modified applications, replay and merge them
       val logInfos: Seq[FileStatus] = statusList
         .filter { entry =>
-          val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L)
+          val fileInfo = fileToAppInfo.get(entry.getPath())
+          val prevFileSize = if (fileInfo != null) fileInfo.fileSize else 0L
           !entry.isDirectory() &&
             // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
             // reading a garbage file is safe, but we would log an error which can be scary to
@@ -475,7 +476,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           fileStatus.getLen(),
           appListener.appSparkVersion.getOrElse("")
         )
-        fileToAppInfo(logPath) = attemptInfo
+        fileToAppInfo.put(logPath, attemptInfo)
         logDebug(s"Application log ${attemptInfo.logPath} loaded successfully: $attemptInfo")
         Some(attemptInfo)
       } else {

From eed9c4ef859fdb75a816a3e0ce2d593b34b23444 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 30 Jun 2017 14:23:56 -0700
Subject: [PATCH 005/125] [SPARK-21129][SQL] Arguments of SQL function call
 should not be named expressions

### What changes were proposed in this pull request?

Function argument should not be named expressions. It could cause two issues:
- Misleading error message
- Unexpected query results when the column name is `distinct`, which is not a reserved word in our parser.

```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query: cannot resolve '`distinct`' given input columns: [c1, c2]; line 1 pos 26;
'Project [unresolvedalias('count(c1#30, 'distinct), None)]
+- SubqueryAlias t1
   +- CatalogRelation `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#30, c2#31]
```

After the fix, the error message becomes
```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query:
extraneous input 'c2' expecting {')', ',', '.', '[', 'OR', 'AND', 'IN', NOT, 'BETWEEN', 'LIKE', RLIKE, 'IS', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 35)

== SQL ==
select count(distinct c1, distinct c2) from t1
-----------------------------------^^^
```

### How was this patch tested?
Added a test case to parser suite.

Author: Xiao Li <gatorsmile@gmail.com>
Author: gatorsmile <gatorsmile@gmail.com>

Closes #18338 from gatorsmile/parserDistinctAggFunc.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  3 +-
 .../spark/sql/catalyst/dsl/package.scala      |  1 +
 .../sql/catalyst/parser/AstBuilder.scala      |  9 +++++-
 .../parser/ExpressionParserSuite.scala        |  6 ++--
 .../sql/catalyst/parser/PlanParserSuite.scala |  6 ++++
 .../resources/sql-tests/inputs/struct.sql     |  7 ++++
 .../sql-tests/results/struct.sql.out          | 32 ++++++++++++++++++-
 7 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 945603173652..7ffa15009633 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -561,6 +561,7 @@ primaryExpression
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CAST '(' expression AS dataType ')'                                                      #cast
+    | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')'             #struct
     | FIRST '(' expression (IGNORE NULLS)? ')'                                                 #first
     | LAST '(' expression (IGNORE NULLS)? ')'                                                  #last
     | POSITION '(' substr=valueExpression IN str=valueExpression ')'                           #position
@@ -569,7 +570,7 @@ primaryExpression
     | qualifiedName '.' ASTERISK                                                               #star
     | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
-    | qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
+    | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
        (OVER windowSpec)?                                                                      #functionCall
     | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
     | identifier                                                                               #columnReference
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index f6792569b704..7c100afcd738 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -170,6 +170,7 @@ package object dsl {
       case Seq() => UnresolvedStar(None)
       case target => UnresolvedStar(Option(target))
     }
+    def namedStruct(e: Expression*): Expression = CreateNamedStruct(e)
 
     def callFunction[T, U](
         func: T => U,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ef79cbcaa0ce..8eac3ef2d356 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1061,6 +1061,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
   }
 
+  /**
+   * Create a [[CreateStruct]] expression.
+   */
+  override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) {
+    CreateStruct(ctx.argument.asScala.map(expression))
+  }
+
   /**
    * Create a [[First]] expression.
    */
@@ -1091,7 +1098,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     // Create the function call.
     val name = ctx.qualifiedName.getText
     val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
-    val arguments = ctx.namedExpression().asScala.map(expression) match {
+    val arguments = ctx.argument.asScala.map(expression) match {
       case Seq(UnresolvedStar(None))
         if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct =>
         // Transform COUNT(*) into COUNT(1).
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 4d08f016a4a1..45f9f72dccc4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -231,7 +231,7 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
     assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
     assertEqual("`select`(all a, b)", 'select.function('a, 'b))
-    assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
+    intercept("foo(a x)", "extraneous input 'x'")
   }
 
   test("window function expressions") {
@@ -330,7 +330,9 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("a.b", UnresolvedAttribute("a.b"))
     assertEqual("`select`.b", UnresolvedAttribute("select.b"))
     assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
-    assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
+    assertEqual(
+      "struct(a, b).b",
+      namedStruct(NamePlaceholder, 'a, NamePlaceholder, 'b).getField("b"))
   }
 
   test("reference") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index bf15b85d5b51..5b2573fa4d60 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -223,6 +223,12 @@ class PlanParserSuite extends AnalysisTest {
     assertEqual(s"$sql grouping sets((a, b), (a), ())",
       GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b), table("d"),
         Seq('a, 'b, 'sum.function('c).as("c"))))
+
+    val m = intercept[ParseException] {
+      parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b")
+    }.getMessage
+    assert(m.contains("extraneous input 'b'"))
+
   }
 
   test("limit") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/struct.sql b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
index e56344dc4de8..93a1238ab18c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/struct.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
@@ -18,3 +18,10 @@ SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;
 
 -- Prepend a column to a struct
 SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;
+
+-- Select a column from a struct
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x;
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x;
+
+-- Select an alias from a struct
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
index 3e32f4619546..1da33bc736f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 9
 
 
 -- !query 0
@@ -58,3 +58,33 @@ struct<ID:int,NST:struct<AA:string,C:string,D:string>>
 1	{"AA":"1","C":"gamma","D":"delta"}
 2	{"AA":"2","C":"epsilon","D":"eta"}
 3	{"AA":"3","C":"theta","D":"iota"}
+
+
+-- !query 6
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
+-- !query 6 schema
+struct<ID:int,NST:string>
+-- !query 6 output
+1	gamma
+2	epsilon
+3	theta
+
+
+-- !query 7
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
+-- !query 7 schema
+struct<ID:int,NST:string>
+-- !query 7 output
+1	delta
+2	eta
+3	iota
+
+
+-- !query 8
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
+-- !query 8 schema
+struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS `STD`).STD:string>
+-- !query 8 output
+1	delta
+2	eta
+3	iota

From fd1325522549937232f37215db53d6478f48644c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 30 Jun 2017 15:11:27 -0700
Subject: [PATCH 006/125] [SPARK-21052][SQL][FOLLOW-UP] Add hash map metrics to
 join

## What changes were proposed in this pull request?

Remove `numHashCollisions` in `BytesToBytesMap`. And change `getAverageProbesPerLookup()` to `getAverageProbesPerLookup` as suggested.

## How was this patch tested?

Existing tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18480 from viirya/SPARK-21052-followup.
---
 .../spark/unsafe/map/BytesToBytesMap.java     | 33 -------------------
 .../spark/sql/execution/joins/HashJoin.scala  |  2 +-
 .../sql/execution/joins/HashedRelation.scala  |  8 ++---
 3 files changed, 5 insertions(+), 38 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 4bef21b6b4e4..3b6200e74f1e 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -160,14 +160,10 @@ public final class BytesToBytesMap extends MemoryConsumer {
 
   private final boolean enablePerfMetrics;
 
-  private long timeSpentResizingNs = 0;
-
   private long numProbes = 0;
 
   private long numKeyLookups = 0;
 
-  private long numHashCollisions = 0;
-
   private long peakMemoryUsedBytes = 0L;
 
   private final int initialCapacity;
@@ -489,10 +485,6 @@ public void safeLookup(Object keyBase, long keyOffset, int keyLength, Location l
             );
             if (areEqual) {
               return;
-            } else {
-              if (enablePerfMetrics) {
-                numHashCollisions++;
-              }
             }
           }
         }
@@ -859,16 +851,6 @@ public long getPeakMemoryUsedBytes() {
     return peakMemoryUsedBytes;
   }
 
-  /**
-   * Returns the total amount of time spent resizing this map (in nanoseconds).
-   */
-  public long getTimeSpentResizingNs() {
-    if (!enablePerfMetrics) {
-      throw new IllegalStateException();
-    }
-    return timeSpentResizingNs;
-  }
-
   /**
    * Returns the average number of probes per key lookup.
    */
@@ -879,13 +861,6 @@ public double getAverageProbesPerLookup() {
     return (1.0 * numProbes) / numKeyLookups;
   }
 
-  public long getNumHashCollisions() {
-    if (!enablePerfMetrics) {
-      throw new IllegalStateException();
-    }
-    return numHashCollisions;
-  }
-
   @VisibleForTesting
   public int getNumDataPages() {
     return dataPages.size();
@@ -923,10 +898,6 @@ public void reset() {
   void growAndRehash() {
     assert(longArray != null);
 
-    long resizeStartTime = -1;
-    if (enablePerfMetrics) {
-      resizeStartTime = System.nanoTime();
-    }
     // Store references to the old data structures to be used when we re-hash
     final LongArray oldLongArray = longArray;
     final int oldCapacity = (int) oldLongArray.size() / 2;
@@ -951,9 +922,5 @@ void growAndRehash() {
       longArray.set(newPos * 2 + 1, hashcode);
     }
     freeArray(oldLongArray);
-
-    if (enablePerfMetrics) {
-      timeSpentResizingNs += System.nanoTime() - resizeStartTime;
-    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index b09edf380c2d..0396168d3f31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -215,7 +215,7 @@ trait HashJoin {
 
     // At the end of the task, we update the avg hash probe.
     TaskContext.get().addTaskCompletionListener(_ =>
-      avgHashProbe.set(hashed.getAverageProbesPerLookup()))
+      avgHashProbe.set(hashed.getAverageProbesPerLookup))
 
     val resultProj = createResultProjection
     joinedIter.map { r =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 3c702856114f..2038cb9edb67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -83,7 +83,7 @@ private[execution] sealed trait HashedRelation extends KnownSizeEstimation {
   /**
    * Returns the average number of probes per key lookup.
    */
-  def getAverageProbesPerLookup(): Double
+  def getAverageProbesPerLookup: Double
 }
 
 private[execution] object HashedRelation {
@@ -280,7 +280,7 @@ private[joins] class UnsafeHashedRelation(
     read(in.readInt, in.readLong, in.readBytes)
   }
 
-  override def getAverageProbesPerLookup(): Double = binaryMap.getAverageProbesPerLookup()
+  override def getAverageProbesPerLookup: Double = binaryMap.getAverageProbesPerLookup
 }
 
 private[joins] object UnsafeHashedRelation {
@@ -776,7 +776,7 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
   /**
    * Returns the average number of probes per key lookup.
    */
-  def getAverageProbesPerLookup(): Double = numProbes.toDouble / numKeyLookups
+  def getAverageProbesPerLookup: Double = numProbes.toDouble / numKeyLookups
 }
 
 private[joins] class LongHashedRelation(
@@ -829,7 +829,7 @@ private[joins] class LongHashedRelation(
     map = in.readObject().asInstanceOf[LongToUnsafeRowMap]
   }
 
-  override def getAverageProbesPerLookup(): Double = map.getAverageProbesPerLookup()
+  override def getAverageProbesPerLookup: Double = map.getAverageProbesPerLookup
 }
 
 /**

From 4eb41879ce774dec1d16b2281ab1fbf41f9d418a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 1 Jul 2017 09:25:29 +0800
Subject: [PATCH 007/125] [SPARK-17528][SQL] data should be copied properly
 before saving into InternalRow

## What changes were proposed in this pull request?

For performance reasons, `UnsafeRow.getString`, `getStruct`, etc. return a "pointer" that points to a memory region of this unsafe row. This makes the unsafe projection a little dangerous, because all of its output rows share one instance.

When we implement SQL operators, we should be careful to not cache the input rows because they may be produced by unsafe projection from child operator and thus its content may change overtime.

However, when we updating values of InternalRow(e.g. in mutable projection and safe projection), we only copy UTF8String, we should also copy InternalRow, ArrayData and MapData. This PR fixes this, and also fixes the copy of vairous InternalRow, ArrayData and MapData implementations.

## How was this patch tested?

new regression tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18483 from cloud-fan/fix-copy.
---
 .../apache/spark/unsafe/types/UTF8String.java |   6 +
 .../spark/sql/catalyst/InternalRow.scala      |  27 ++++-
 .../spark/sql/catalyst/expressions/Cast.scala |   2 +-
 .../expressions/SpecificInternalRow.scala     |  12 --
 .../expressions/aggregate/collect.scala       |   2 +-
 .../expressions/aggregate/interfaces.scala    |   6 +
 .../expressions/codegen/CodeGenerator.scala   |   6 +-
 .../codegen/GenerateSafeProjection.scala      |   2 -
 .../spark/sql/catalyst/expressions/rows.scala |  23 ++--
 .../sql/catalyst/util/GenericArrayData.scala  |  10 +-
 .../scala/org/apache/spark/sql/RowTest.scala  |   4 -
 .../catalyst/expressions/MapDataSuite.scala   |  57 ----------
 .../codegen/GeneratedProjectionSuite.scala    |  36 ++++++
 .../sql/catalyst/util/ComplexDataSuite.scala  | 107 ++++++++++++++++++
 .../execution/vectorized/ColumnarBatch.java   |   2 +-
 .../SortBasedAggregationIterator.scala        |  15 +--
 .../columnar/GenerateColumnAccessor.scala     |   1 -
 .../execution/window/AggregateProcessor.scala |   7 +-
 18 files changed, 212 insertions(+), 113 deletions(-)
 delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 40b9fc9534f4..9de4ca71ff6d 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -1088,6 +1088,12 @@ public UTF8String clone() {
     return fromBytes(getBytes());
   }
 
+  public UTF8String copy() {
+    byte[] bytes = new byte[numBytes];
+    copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
+    return fromBytes(bytes);
+  }
+
   @Override
   public int compareTo(@Nonnull final UTF8String other) {
     int len = Math.min(numBytes, other.numBytes);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index 256f64e320be..29110640d64f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.catalyst
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types.{DataType, Decimal, StructType}
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * An abstract class for row used internally in Spark SQL, which only contains the columns as
@@ -33,6 +35,10 @@ abstract class InternalRow extends SpecializedGetters with Serializable {
 
   def setNullAt(i: Int): Unit
 
+  /**
+   * Updates the value at column `i`. Note that after updating, the given value will be kept in this
+   * row, and the caller side should guarantee that this value won't be changed afterwards.
+   */
   def update(i: Int, value: Any): Unit
 
   // default implementation (slow)
@@ -58,7 +64,15 @@ abstract class InternalRow extends SpecializedGetters with Serializable {
   def copy(): InternalRow
 
   /** Returns true if there are any NULL values in this row. */
-  def anyNull: Boolean
+  def anyNull: Boolean = {
+    val len = numFields
+    var i = 0
+    while (i < len) {
+      if (isNullAt(i)) { return true }
+      i += 1
+    }
+    false
+  }
 
   /* ---------------------- utility methods for Scala ---------------------- */
 
@@ -94,4 +108,15 @@ object InternalRow {
 
   /** Returns an empty [[InternalRow]]. */
   val empty = apply()
+
+  /**
+   * Copies the given value if it's string/struct/array/map type.
+   */
+  def copyValue(value: Any): Any = value match {
+    case v: UTF8String => v.copy()
+    case v: InternalRow => v.copy()
+    case v: ArrayData => v.copy()
+    case v: MapData => v.copy()
+    case _ => value
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 43df19ba009a..3862e64b9d82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1047,7 +1047,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         final $rowClass $result = new $rowClass(${fieldsCasts.length});
         final InternalRow $tmpRow = $c;
         $fieldsEvalCode
-        $evPrim = $result.copy();
+        $evPrim = $result;
       """
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
index 74e0b4691d4c..75feaf670c84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
 
 /**
@@ -220,17 +219,6 @@ final class SpecificInternalRow(val values: Array[MutableValue]) extends BaseGen
 
   override def isNullAt(i: Int): Boolean = values(i).isNull
 
-  override def copy(): InternalRow = {
-    val newValues = new Array[Any](values.length)
-    var i = 0
-    while (i < values.length) {
-      newValues(i) = values(i).boxed
-      i += 1
-    }
-
-    new GenericInternalRow(newValues)
-  }
-
   override protected def genericGet(i: Int): Any = values(i).boxed
 
   override def update(ordinal: Int, value: Any) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 26cd9ab66538..0d2f9889a27d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -52,7 +52,7 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
     // Do not allow null values. We follow the semantics of Hive's collect_list/collect_set here.
     // See: org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMkCollectionEvaluator
     if (value != null) {
-      buffer += value
+      buffer += InternalRow.copyValue(value)
     }
     buffer
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index fffcc7c9ef53..7af490143585 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -317,6 +317,9 @@ abstract class ImperativeAggregate extends AggregateFunction with CodegenFallbac
    * Updates its aggregation buffer, located in `mutableAggBuffer`, based on the given `inputRow`.
    *
    * Use `fieldNumber + mutableAggBufferOffset` to access fields of `mutableAggBuffer`.
+   *
+   * Note that, the input row may be produced by unsafe projection and it may not be safe to cache
+   * some fields of the input row, as the values can be changed unexpectedly.
    */
   def update(mutableAggBuffer: InternalRow, inputRow: InternalRow): Unit
 
@@ -326,6 +329,9 @@ abstract class ImperativeAggregate extends AggregateFunction with CodegenFallbac
    *
    * Use `fieldNumber + mutableAggBufferOffset` to access fields of `mutableAggBuffer`.
    * Use `fieldNumber + inputAggBufferOffset` to access fields of `inputAggBuffer`.
+   *
+   * Note that, the input row may be produced by unsafe projection and it may not be safe to cache
+   * some fields of the input row, as the values can be changed unexpectedly.
    */
   def merge(mutableAggBuffer: InternalRow, inputAggBuffer: InternalRow): Unit
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 5158949b9562..b15bf2ca7c11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -408,9 +408,11 @@ class CodegenContext {
     dataType match {
       case _ if isPrimitiveType(jt) => s"$row.set${primitiveTypeName(jt)}($ordinal, $value)"
       case t: DecimalType => s"$row.setDecimal($ordinal, $value, ${t.precision})"
-      // The UTF8String may came from UnsafeRow, otherwise clone is cheap (re-use the bytes)
-      case StringType => s"$row.update($ordinal, $value.clone())"
       case udt: UserDefinedType[_] => setColumn(row, udt.sqlType, ordinal, value)
+      // The UTF8String, InternalRow, ArrayData and MapData may came from UnsafeRow, we should copy
+      // it to avoid keeping a "pointer" to a memory region which may get updated afterwards.
+      case StringType | _: StructType | _: ArrayType | _: MapType =>
+        s"$row.update($ordinal, $value.copy())"
       case _ => s"$row.update($ordinal, $value)"
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index f708aeff2b14..dd0419d2286d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -131,8 +131,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     case s: StructType => createCodeForStruct(ctx, input, s)
     case ArrayType(elementType, _) => createCodeForArray(ctx, input, elementType)
     case MapType(keyType, valueType, _) => createCodeForMap(ctx, input, keyType, valueType)
-    // UTF8String act as a pointer if it's inside UnsafeRow, so copy it to make it safe.
-    case StringType => ExprCode("", "false", s"$input.clone()")
     case udt: UserDefinedType[_] => convertToSafe(ctx, input, udt.sqlType)
     case _ => ExprCode("", "false", input)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index 751b821e1b00..65539a2f00e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -50,16 +50,6 @@ trait BaseGenericInternalRow extends InternalRow {
   override def getMap(ordinal: Int): MapData = getAs(ordinal)
   override def getStruct(ordinal: Int, numFields: Int): InternalRow = getAs(ordinal)
 
-  override def anyNull: Boolean = {
-    val len = numFields
-    var i = 0
-    while (i < len) {
-      if (isNullAt(i)) { return true }
-      i += 1
-    }
-    false
-  }
-
   override def toString: String = {
     if (numFields == 0) {
       "[empty row]"
@@ -79,6 +69,17 @@ trait BaseGenericInternalRow extends InternalRow {
     }
   }
 
+  override def copy(): GenericInternalRow = {
+    val len = numFields
+    val newValues = new Array[Any](len)
+    var i = 0
+    while (i < len) {
+      newValues(i) = InternalRow.copyValue(genericGet(i))
+      i += 1
+    }
+    new GenericInternalRow(newValues)
+  }
+
   override def equals(o: Any): Boolean = {
     if (!o.isInstanceOf[BaseGenericInternalRow]) {
       return false
@@ -206,6 +207,4 @@ class GenericInternalRow(val values: Array[Any]) extends BaseGenericInternalRow
   override def setNullAt(i: Int): Unit = { values(i) = null}
 
   override def update(i: Int, value: Any): Unit = { values(i) = value }
-
-  override def copy(): GenericInternalRow = this
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
index dd660c80a9c3..9e39ed9c3a77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -49,7 +49,15 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
 
   def this(seqOrArray: Any) = this(GenericArrayData.anyToSeq(seqOrArray))
 
-  override def copy(): ArrayData = new GenericArrayData(array.clone())
+  override def copy(): ArrayData = {
+    val newValues = new Array[Any](array.length)
+    var i = 0
+    while (i < array.length) {
+      newValues(i) = InternalRow.copyValue(array(i))
+      i += 1
+    }
+    new GenericArrayData(newValues)
+  }
 
   override def numElements(): Int = array.length
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index c9c9599e7f46..25699de33d71 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -121,10 +121,6 @@ class RowTest extends FunSpec with Matchers {
       externalRow should be theSameInstanceAs externalRow.copy()
     }
 
-    it("copy should return same ref for internal rows") {
-      internalRow should be theSameInstanceAs internalRow.copy()
-    }
-
     it("toSeq should not expose internal state for external rows") {
       val modifiedValues = modifyValues(externalRow.toSeq)
       externalRow.toSeq should not equal modifiedValues
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala
deleted file mode 100644
index 25a675a90276..000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import scala.collection._
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
-import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType}
-import org.apache.spark.unsafe.types.UTF8String
-
-class MapDataSuite extends SparkFunSuite {
-
-  test("inequality tests") {
-    def u(str: String): UTF8String = UTF8String.fromString(str)
-
-    // test data
-    val testMap1 = Map(u("key1") -> 1)
-    val testMap2 = Map(u("key1") -> 1, u("key2") -> 2)
-    val testMap3 = Map(u("key1") -> 1)
-    val testMap4 = Map(u("key1") -> 1, u("key2") -> 2)
-
-    // ArrayBasedMapData
-    val testArrayMap1 = ArrayBasedMapData(testMap1.toMap)
-    val testArrayMap2 = ArrayBasedMapData(testMap2.toMap)
-    val testArrayMap3 = ArrayBasedMapData(testMap3.toMap)
-    val testArrayMap4 = ArrayBasedMapData(testMap4.toMap)
-    assert(testArrayMap1 !== testArrayMap3)
-    assert(testArrayMap2 !== testArrayMap4)
-
-    // UnsafeMapData
-    val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType)))
-    val row = new GenericInternalRow(1)
-    def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = {
-      row.update(0, map)
-      val unsafeRow = unsafeConverter.apply(row)
-      unsafeRow.getMap(0).copy
-    }
-    assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3))
-    assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4))
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 58ea5b9cb52d..0cd0d8859145 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -172,4 +172,40 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     assert(unsafe1 === unsafe3)
     assert(unsafe1.getStruct(1, 7) === unsafe3.getStruct(1, 7))
   }
+
+  test("MutableProjection should not cache content from the input row") {
+    val mutableProj = GenerateMutableProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", StringType), true)))
+    val row = new GenericInternalRow(1)
+    mutableProj.target(row)
+
+    val unsafeProj = GenerateUnsafeProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", StringType), true)))
+    val unsafeRow = unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("a"))))
+
+    mutableProj.apply(unsafeRow)
+    assert(row.getStruct(0, 1).getString(0) == "a")
+
+    // Even if the input row of the mutable projection has been changed, the target mutable row
+    // should keep same.
+    unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b"))))
+    assert(row.getStruct(0, 1).getString(0).toString == "a")
+  }
+
+  test("SafeProjection should not cache content from the input row") {
+    val safeProj = GenerateSafeProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", StringType), true)))
+
+    val unsafeProj = GenerateUnsafeProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", StringType), true)))
+    val unsafeRow = unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("a"))))
+
+    val row = safeProj.apply(unsafeRow)
+    assert(row.getStruct(0, 1).getString(0) == "a")
+
+    // Even if the input row of the mutable projection has been changed, the target mutable row
+    // should keep same.
+    unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b"))))
+    assert(row.getStruct(0, 1).getString(0).toString == "a")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala
new file mode 100644
index 000000000000..9d285916bcf4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, GenericInternalRow, SpecificInternalRow, UnsafeMapData, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class ComplexDataSuite extends SparkFunSuite {
+  def utf8(str: String): UTF8String = UTF8String.fromString(str)
+
+  test("inequality tests for MapData") {
+    // test data
+    val testMap1 = Map(utf8("key1") -> 1)
+    val testMap2 = Map(utf8("key1") -> 1, utf8("key2") -> 2)
+    val testMap3 = Map(utf8("key1") -> 1)
+    val testMap4 = Map(utf8("key1") -> 1, utf8("key2") -> 2)
+
+    // ArrayBasedMapData
+    val testArrayMap1 = ArrayBasedMapData(testMap1.toMap)
+    val testArrayMap2 = ArrayBasedMapData(testMap2.toMap)
+    val testArrayMap3 = ArrayBasedMapData(testMap3.toMap)
+    val testArrayMap4 = ArrayBasedMapData(testMap4.toMap)
+    assert(testArrayMap1 !== testArrayMap3)
+    assert(testArrayMap2 !== testArrayMap4)
+
+    // UnsafeMapData
+    val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType)))
+    val row = new GenericInternalRow(1)
+    def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = {
+      row.update(0, map)
+      val unsafeRow = unsafeConverter.apply(row)
+      unsafeRow.getMap(0).copy
+    }
+    assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3))
+    assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4))
+  }
+
+  test("GenericInternalRow.copy return a new instance that is independent from the old one") {
+    val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true)))
+    val unsafeRow = project.apply(InternalRow(utf8("a")))
+
+    val genericRow = new GenericInternalRow(Array[Any](unsafeRow.getUTF8String(0)))
+    val copiedGenericRow = genericRow.copy()
+    assert(copiedGenericRow.getString(0) == "a")
+    project.apply(InternalRow(UTF8String.fromString("b")))
+    // The copied internal row should not be changed externally.
+    assert(copiedGenericRow.getString(0) == "a")
+  }
+
+  test("SpecificMutableRow.copy return a new instance that is independent from the old one") {
+    val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true)))
+    val unsafeRow = project.apply(InternalRow(utf8("a")))
+
+    val mutableRow = new SpecificInternalRow(Seq(StringType))
+    mutableRow(0) = unsafeRow.getUTF8String(0)
+    val copiedMutableRow = mutableRow.copy()
+    assert(copiedMutableRow.getString(0) == "a")
+    project.apply(InternalRow(UTF8String.fromString("b")))
+    // The copied internal row should not be changed externally.
+    assert(copiedMutableRow.getString(0) == "a")
+  }
+
+  test("GenericArrayData.copy return a new instance that is independent from the old one") {
+    val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true)))
+    val unsafeRow = project.apply(InternalRow(utf8("a")))
+
+    val genericArray = new GenericArrayData(Array[Any](unsafeRow.getUTF8String(0)))
+    val copiedGenericArray = genericArray.copy()
+    assert(copiedGenericArray.getUTF8String(0).toString == "a")
+    project.apply(InternalRow(UTF8String.fromString("b")))
+    // The copied array data should not be changed externally.
+    assert(copiedGenericArray.getUTF8String(0).toString == "a")
+  }
+
+  test("copy on nested complex type") {
+    val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true)))
+    val unsafeRow = project.apply(InternalRow(utf8("a")))
+
+    val arrayOfRow = new GenericArrayData(Array[Any](InternalRow(unsafeRow.getUTF8String(0))))
+    val copied = arrayOfRow.copy()
+    assert(copied.getStruct(0, 1).getUTF8String(0).toString == "a")
+    project.apply(InternalRow(UTF8String.fromString("b")))
+    // The copied data should not be changed externally.
+    assert(copied.getStruct(0, 1).getUTF8String(0).toString == "a")
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
index e23a64350cbc..34dc3af9b85c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
@@ -149,7 +149,7 @@ public InternalRow copy() {
           } else if (dt instanceof DoubleType) {
             row.setDouble(i, getDouble(i));
           } else if (dt instanceof StringType) {
-            row.update(i, getUTF8String(i));
+            row.update(i, getUTF8String(i).copy());
           } else if (dt instanceof BinaryType) {
             row.update(i, getBinary(i));
           } else if (dt instanceof DecimalType) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
index bea2dce1a765..a5a444b160c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@@ -86,17 +86,6 @@ class SortBasedAggregationIterator(
   // The aggregation buffer used by the sort-based aggregation.
   private[this] val sortBasedAggregationBuffer: InternalRow = newBuffer
 
-  // This safe projection is used to turn the input row into safe row. This is necessary
-  // because the input row may be produced by unsafe projection in child operator and all the
-  // produced rows share one byte array. However, when we update the aggregate buffer according to
-  // the input row, we may cache some values from input row, e.g. `Max` will keep the max value from
-  // input row via MutableProjection, `CollectList` will keep all values in an array via
-  // ImperativeAggregate framework. These values may get changed unexpectedly if the underlying
-  // unsafe projection update the shared byte array. By applying a safe projection to the input row,
-  // we can cut down the connection from input row to the shared byte array, and thus it's safe to
-  // cache values from input row while updating the aggregation buffer.
-  private[this] val safeProj: Projection = FromUnsafeProjection(valueAttributes.map(_.dataType))
-
   protected def initialize(): Unit = {
     if (inputIterator.hasNext) {
       initializeBuffer(sortBasedAggregationBuffer)
@@ -119,7 +108,7 @@ class SortBasedAggregationIterator(
     // We create a variable to track if we see the next group.
     var findNextPartition = false
     // firstRowInNextGroup is the first row of this group. We first process it.
-    processRow(sortBasedAggregationBuffer, safeProj(firstRowInNextGroup))
+    processRow(sortBasedAggregationBuffer, firstRowInNextGroup)
 
     // The search will stop when we see the next group or there is no
     // input row left in the iter.
@@ -130,7 +119,7 @@ class SortBasedAggregationIterator(
 
       // Check if the current row belongs the current input row.
       if (currentGroupingKey == groupingKey) {
-        processRow(sortBasedAggregationBuffer, safeProj(currentRow))
+        processRow(sortBasedAggregationBuffer, currentRow)
       } else {
         // We find a new group.
         findNextPartition = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index d3fa0dcd2d7c..fc977f2fd553 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -56,7 +56,6 @@ class MutableUnsafeRow(val writer: UnsafeRowWriter) extends BaseGenericInternalR
   // all other methods inherited from GenericMutableRow are not need
   override protected def genericGet(ordinal: Int): Any = throw new UnsupportedOperationException
   override def numFields: Int = throw new UnsupportedOperationException
-  override def copy(): InternalRow = throw new UnsupportedOperationException
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
index 2195c6ea9594..bc141b36e63b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
@@ -145,13 +145,10 @@ private[window] final class AggregateProcessor(
 
   /** Update the buffer. */
   def update(input: InternalRow): Unit = {
-    // TODO(hvanhovell) this sacrifices performance for correctness. We should make sure that
-    // MutableProjection makes copies of the complex input objects it buffer.
-    val copy = input.copy()
-    updateProjection(join(buffer, copy))
+    updateProjection(join(buffer, input))
     var i = 0
     while (i < numImperatives) {
-      imperatives(i).update(buffer, copy)
+      imperatives(i).update(buffer, input)
       i += 1
     }
   }

From 61b5df567eb8ae0df4059cb0e334316fff462de9 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Sat, 1 Jul 2017 10:01:44 +0800
Subject: [PATCH 008/125] [SPARK-21127][SQL] Update statistics after data
 changing commands

## What changes were proposed in this pull request?

Update stats after the following data changing commands:

- InsertIntoHadoopFsRelationCommand
- InsertIntoHiveTable
- LoadDataCommand
- TruncateTableCommand
- AlterTableSetLocationCommand
- AlterTableDropPartitionCommand

## How was this patch tested?
Added new test cases.

Author: wangzhenhua <wangzhenhua@huawei.com>
Author: Zhenhua Wang <wzh_zju@163.com>

Closes #18334 from wzhfy/changeStatsForOperation.
---
 .../apache/spark/sql/internal/SQLConf.scala   |  10 +
 .../sql/execution/command/CommandUtils.scala  |  17 +-
 .../spark/sql/execution/command/ddl.scala     |  15 +-
 .../spark/sql/StatisticsCollectionSuite.scala |  77 +++++---
 .../spark/sql/hive/StatisticsSuite.scala      | 187 +++++++++++-------
 5 files changed, 207 insertions(+), 99 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c641e4d3a23e..25152f3e32d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -774,6 +774,14 @@ object SQLConf {
       .doubleConf
       .createWithDefault(0.05)
 
+  val AUTO_UPDATE_SIZE =
+    buildConf("spark.sql.statistics.autoUpdate.size")
+      .doc("Enables automatic update for table size once table's data is changed. Note that if " +
+        "the total number of files of the table is very large, this can be expensive and slow " +
+        "down data change commands.")
+      .booleanConf
+      .createWithDefault(false)
+
   val CBO_ENABLED =
     buildConf("spark.sql.cbo.enabled")
       .doc("Enables CBO for estimation of plan statistics when set true.")
@@ -1083,6 +1091,8 @@ class SQLConf extends Serializable with Logging {
 
   def cboEnabled: Boolean = getConf(SQLConf.CBO_ENABLED)
 
+  def autoUpdateSize: Boolean = getConf(SQLConf.AUTO_UPDATE_SIZE)
+
   def joinReorderEnabled: Boolean = getConf(SQLConf.JOIN_REORDER_ENABLED)
 
   def joinReorderDPThreshold: Int = getConf(SQLConf.JOIN_REORDER_DP_THRESHOLD)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 92397607f38f..fce12cc96620 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -36,7 +36,14 @@ object CommandUtils extends Logging {
   def updateTableStats(sparkSession: SparkSession, table: CatalogTable): Unit = {
     if (table.stats.nonEmpty) {
       val catalog = sparkSession.sessionState.catalog
-      catalog.alterTableStats(table.identifier, None)
+      if (sparkSession.sessionState.conf.autoUpdateSize) {
+        val newTable = catalog.getTableMetadata(table.identifier)
+        val newSize = CommandUtils.calculateTotalSize(sparkSession.sessionState, newTable)
+        val newStats = CatalogStatistics(sizeInBytes = newSize)
+        catalog.alterTableStats(table.identifier, Some(newStats))
+      } else {
+        catalog.alterTableStats(table.identifier, None)
+      }
     }
   }
 
@@ -84,7 +91,9 @@ object CommandUtils extends Logging {
       size
     }
 
-    locationUri.map { p =>
+    val startTime = System.nanoTime()
+    logInfo(s"Starting to calculate the total file size under path $locationUri.")
+    val size = locationUri.map { p =>
       val path = new Path(p)
       try {
         val fs = path.getFileSystem(sessionState.newHadoopConf())
@@ -97,6 +106,10 @@ object CommandUtils extends Logging {
           0L
       }
     }.getOrElse(0L)
+    val durationInMs = (System.nanoTime() - startTime) / (1000 * 1000)
+    logInfo(s"It took $durationInMs ms to calculate the total file size under path $locationUri.")
+
+    size
   }
 
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index ac897c1b22d7..ba7ca84f229f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -437,7 +437,20 @@ case class AlterTableAddPartitionCommand(
     }
     catalog.createPartitions(table.identifier, parts, ignoreIfExists = ifNotExists)
 
-    CommandUtils.updateTableStats(sparkSession, table)
+    if (table.stats.nonEmpty) {
+      if (sparkSession.sessionState.conf.autoUpdateSize) {
+        val addedSize = parts.map { part =>
+          CommandUtils.calculateLocationSize(sparkSession.sessionState, table.identifier,
+            part.storage.locationUri)
+        }.sum
+        if (addedSize > 0) {
+          val newStats = CatalogStatistics(sizeInBytes = table.stats.get.sizeInBytes + addedSize)
+          catalog.alterTableStats(table.identifier, Some(newStats))
+        }
+      } else {
+        catalog.alterTableStats(table.identifier, None)
+      }
+    }
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index b031c52dad8b..d9392de37a81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.sql.test.SQLTestData.ArrayData
 import org.apache.spark.sql.types._
@@ -178,36 +178,63 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
 
   test("change stats after set location command") {
     val table = "change_stats_set_location_table"
-    withTable(table) {
-      spark.range(100).select($"id", $"id" % 5 as "value").write.saveAsTable(table)
-      // analyze to get initial stats
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS id, value")
-      val fetched1 = checkTableStats(
-        table, hasSizeInBytes = true, expectedRowCounts = Some(100))
-      assert(fetched1.get.sizeInBytes > 0)
-      assert(fetched1.get.colStats.size == 2)
-
-      // set location command
-      withTempDir { newLocation =>
-        sql(s"ALTER TABLE $table SET LOCATION '${newLocation.toURI.toString}'")
-        checkTableStats(table, hasSizeInBytes = false, expectedRowCounts = None)
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_UPDATE_SIZE.key -> autoUpdate.toString) {
+        withTable(table) {
+          spark.range(100).select($"id", $"id" % 5 as "value").write.saveAsTable(table)
+          // analyze to get initial stats
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS id, value")
+          val fetched1 = checkTableStats(
+            table, hasSizeInBytes = true, expectedRowCounts = Some(100))
+          assert(fetched1.get.sizeInBytes > 0)
+          assert(fetched1.get.colStats.size == 2)
+
+          // set location command
+          val initLocation = spark.sessionState.catalog.getTableMetadata(TableIdentifier(table))
+            .storage.locationUri.get.toString
+          withTempDir { newLocation =>
+            sql(s"ALTER TABLE $table SET LOCATION '${newLocation.toURI.toString}'")
+            if (autoUpdate) {
+              val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+              assert(fetched2.get.sizeInBytes == 0)
+              assert(fetched2.get.colStats.isEmpty)
+
+              // set back to the initial location
+              sql(s"ALTER TABLE $table SET LOCATION '$initLocation'")
+              val fetched3 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+              assert(fetched3.get.sizeInBytes == fetched1.get.sizeInBytes)
+            } else {
+              checkTableStats(table, hasSizeInBytes = false, expectedRowCounts = None)
+            }
+          }
+        }
       }
     }
   }
 
   test("change stats after insert command for datasource table") {
     val table = "change_stats_insert_datasource_table"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (i int, j string) USING PARQUET")
-      // analyze to get initial stats
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
-      val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
-      assert(fetched1.get.sizeInBytes == 0)
-      assert(fetched1.get.colStats.size == 2)
-
-      // insert into command
-      sql(s"INSERT INTO TABLE $table SELECT 1, 'abc'")
-      checkTableStats(table, hasSizeInBytes = false, expectedRowCounts = None)
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_UPDATE_SIZE.key -> autoUpdate.toString) {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (i int, j string) USING PARQUET")
+          // analyze to get initial stats
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
+          val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+          assert(fetched1.get.sizeInBytes == 0)
+          assert(fetched1.get.colStats.size == 2)
+
+          // insert into command
+          sql(s"INSERT INTO TABLE $table SELECT 1, 'abc'")
+          if (autoUpdate) {
+            val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+            assert(fetched2.get.sizeInBytes > 0)
+            assert(fetched2.get.colStats.isEmpty)
+          } else {
+            checkTableStats(table, hasSizeInBytes = false, expectedRowCounts = None)
+          }
+        }
+      }
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 5fd266c2d033..c601038a2b0a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -444,88 +444,133 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
   test("change stats after insert command for hive table") {
     val table = s"change_stats_insert_hive_table"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (i int, j string)")
-      // analyze to get initial stats
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
-      val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
-      assert(fetched1.get.sizeInBytes == 0)
-      assert(fetched1.get.colStats.size == 2)
-
-      // insert into command
-      sql(s"INSERT INTO TABLE $table SELECT 1, 'abc'")
-      assert(getStatsProperties(table).isEmpty)
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_UPDATE_SIZE.key -> autoUpdate.toString) {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (i int, j string)")
+          // analyze to get initial stats
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
+          val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+          assert(fetched1.get.sizeInBytes == 0)
+          assert(fetched1.get.colStats.size == 2)
+
+          // insert into command
+          sql(s"INSERT INTO TABLE $table SELECT 1, 'abc'")
+          if (autoUpdate) {
+            val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+            assert(fetched2.get.sizeInBytes > 0)
+            assert(fetched2.get.colStats.isEmpty)
+            val statsProp = getStatsProperties(table)
+            assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched2.get.sizeInBytes)
+          } else {
+            assert(getStatsProperties(table).isEmpty)
+          }
+        }
+      }
     }
   }
 
   test("change stats after load data command") {
     val table = "change_stats_load_table"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (i INT, j STRING) STORED AS PARQUET")
-      // analyze to get initial stats
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
-      val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
-      assert(fetched1.get.sizeInBytes == 0)
-      assert(fetched1.get.colStats.size == 2)
-
-      withTempDir { loadPath =>
-        // load data command
-        val file = new File(loadPath + "/data")
-        val writer = new PrintWriter(file)
-        writer.write("2,xyz")
-        writer.close()
-        sql(s"LOAD DATA INPATH '${loadPath.toURI.toString}' INTO TABLE $table")
-        assert(getStatsProperties(table).isEmpty)
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_UPDATE_SIZE.key -> autoUpdate.toString) {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (i INT, j STRING) STORED AS PARQUET")
+          // analyze to get initial stats
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
+          val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+          assert(fetched1.get.sizeInBytes == 0)
+          assert(fetched1.get.colStats.size == 2)
+
+          withTempDir { loadPath =>
+            // load data command
+            val file = new File(loadPath + "/data")
+            val writer = new PrintWriter(file)
+            writer.write("2,xyz")
+            writer.close()
+            sql(s"LOAD DATA INPATH '${loadPath.toURI.toString}' INTO TABLE $table")
+            if (autoUpdate) {
+              val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+              assert(fetched2.get.sizeInBytes > 0)
+              assert(fetched2.get.colStats.isEmpty)
+              val statsProp = getStatsProperties(table)
+              assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched2.get.sizeInBytes)
+            } else {
+              assert(getStatsProperties(table).isEmpty)
+            }
+          }
+        }
       }
     }
   }
 
   test("change stats after add/drop partition command") {
     val table = "change_stats_part_table"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (i INT, j STRING) PARTITIONED BY (ds STRING, hr STRING)")
-      // table has two partitions initially
-      for (ds <- Seq("2008-04-08"); hr <- Seq("11", "12")) {
-        sql(s"INSERT OVERWRITE TABLE $table PARTITION (ds='$ds',hr='$hr') SELECT 1, 'a'")
-      }
-      // analyze to get initial stats
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
-      val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(2))
-      assert(fetched1.get.sizeInBytes > 0)
-      assert(fetched1.get.colStats.size == 2)
-
-      withTempPaths(numPaths = 2) { case Seq(dir1, dir2) =>
-        val file1 = new File(dir1 + "/data")
-        val writer1 = new PrintWriter(file1)
-        writer1.write("1,a")
-        writer1.close()
-
-        val file2 = new File(dir2 + "/data")
-        val writer2 = new PrintWriter(file2)
-        writer2.write("1,a")
-        writer2.close()
-
-        // add partition command
-        sql(
-          s"""
-             |ALTER TABLE $table ADD
-             |PARTITION (ds='2008-04-09', hr='11') LOCATION '${dir1.toURI.toString}'
-             |PARTITION (ds='2008-04-09', hr='12') LOCATION '${dir2.toURI.toString}'
-        """.stripMargin)
-        assert(getStatsProperties(table).isEmpty)
-
-        // generate stats again
-        sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
-        val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(4))
-        assert(fetched2.get.sizeInBytes > 0)
-        assert(fetched2.get.colStats.size == 2)
-
-        // drop partition command
-        sql(s"ALTER TABLE $table DROP PARTITION (ds='2008-04-08'), PARTITION (hr='12')")
-        // only one partition left
-        assert(spark.sessionState.catalog.listPartitions(TableIdentifier(table))
-          .map(_.spec).toSet == Set(Map("ds" -> "2008-04-09", "hr" -> "11")))
-        assert(getStatsProperties(table).isEmpty)
+    Seq(false, true).foreach { autoUpdate =>
+      withSQLConf(SQLConf.AUTO_UPDATE_SIZE.key -> autoUpdate.toString) {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (i INT, j STRING) PARTITIONED BY (ds STRING, hr STRING)")
+          // table has two partitions initially
+          for (ds <- Seq("2008-04-08"); hr <- Seq("11", "12")) {
+            sql(s"INSERT OVERWRITE TABLE $table PARTITION (ds='$ds',hr='$hr') SELECT 1, 'a'")
+          }
+          // analyze to get initial stats
+          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
+          val fetched1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(2))
+          assert(fetched1.get.sizeInBytes > 0)
+          assert(fetched1.get.colStats.size == 2)
+
+          withTempPaths(numPaths = 2) { case Seq(dir1, dir2) =>
+            val file1 = new File(dir1 + "/data")
+            val writer1 = new PrintWriter(file1)
+            writer1.write("1,a")
+            writer1.close()
+
+            val file2 = new File(dir2 + "/data")
+            val writer2 = new PrintWriter(file2)
+            writer2.write("1,a")
+            writer2.close()
+
+            // add partition command
+            sql(
+              s"""
+                 |ALTER TABLE $table ADD
+                 |PARTITION (ds='2008-04-09', hr='11') LOCATION '${dir1.toURI.toString}'
+                 |PARTITION (ds='2008-04-09', hr='12') LOCATION '${dir2.toURI.toString}'
+            """.stripMargin)
+            if (autoUpdate) {
+              val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+              assert(fetched2.get.sizeInBytes > fetched1.get.sizeInBytes)
+              assert(fetched2.get.colStats.isEmpty)
+              val statsProp = getStatsProperties(table)
+              assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched2.get.sizeInBytes)
+            } else {
+              assert(getStatsProperties(table).isEmpty)
+            }
+
+            // now the table has four partitions, generate stats again
+            sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS i, j")
+            val fetched3 = checkTableStats(
+              table, hasSizeInBytes = true, expectedRowCounts = Some(4))
+            assert(fetched3.get.sizeInBytes > 0)
+            assert(fetched3.get.colStats.size == 2)
+
+            // drop partition command
+            sql(s"ALTER TABLE $table DROP PARTITION (ds='2008-04-08'), PARTITION (hr='12')")
+            assert(spark.sessionState.catalog.listPartitions(TableIdentifier(table))
+              .map(_.spec).toSet == Set(Map("ds" -> "2008-04-09", "hr" -> "11")))
+            // only one partition left
+            if (autoUpdate) {
+              val fetched4 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+              assert(fetched4.get.sizeInBytes < fetched1.get.sizeInBytes)
+              assert(fetched4.get.colStats.isEmpty)
+              val statsProp = getStatsProperties(table)
+              assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched4.get.sizeInBytes)
+            } else {
+              assert(getStatsProperties(table).isEmpty)
+            }
+          }
+        }
       }
     }
   }

From b1d719e7c9faeb5661a7e712b3ecefca56bf356f Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 30 Jun 2017 21:10:23 -0700
Subject: [PATCH 009/125] [SPARK-21273][SQL] Propagate logical plan stats using
 visitor pattern and mixin

## What changes were proposed in this pull request?
We currently implement statistics propagation directly in logical plan. Given we already have two different implementations, it'd make sense to actually decouple the two and add stats propagation using mixin. This would reduce the coupling between logical plan and statistics handling.

This can also be a powerful pattern in the future to add additional properties (e.g. constraints).

## How was this patch tested?
Should be covered by existing test cases.

Author: Reynold Xin <rxin@databricks.com>

Closes #18479 from rxin/stats-trait.
---
 .../sql/catalyst/catalog/interface.scala      |   2 +-
 .../plans/logical/LocalRelation.scala         |   5 +-
 .../catalyst/plans/logical/LogicalPlan.scala  |  61 +------
 .../plans/logical/LogicalPlanVisitor.scala    |  87 ++++++++++
 .../plans/logical/basicLogicalOperators.scala | 128 +-------------
 .../sql/catalyst/plans/logical/hints.scala    |   5 -
 .../BasicStatsPlanVisitor.scala               |  82 +++++++++
 .../statsEstimation/LogicalPlanStats.scala    |  50 ++++++
 .../SizeInBytesOnlyStatsPlanVisitor.scala     | 163 ++++++++++++++++++
 .../BasicStatsEstimationSuite.scala           |  44 -----
 .../StatsEstimationTestBase.scala             |   2 +-
 .../spark/sql/execution/ExistingRDD.scala     |   4 +-
 .../execution/columnar/InMemoryRelation.scala |   2 +-
 .../datasources/LogicalRelation.scala         |   7 +-
 .../sql/execution/streaming/memory.scala      |   3 +-
 .../PruneFileSourcePartitionsSuite.scala      |   2 +-
 16 files changed, 409 insertions(+), 238 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/LogicalPlanStats.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index da50b0e7e8e4..9531456434a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -438,7 +438,7 @@ case class CatalogRelation(
       case (attr, index) => attr.withExprId(ExprId(index + dataCols.length))
     })
 
-  override def computeStats: Statistics = {
+  override def computeStats(): Statistics = {
     // For data source tables, we will create a `LogicalRelation` and won't call this method, for
     // hive serde tables, we will always generate a statistics.
     // TODO: unify the table stats generation.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index dc2add64b68b..1c986fbde7ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -66,9 +66,8 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[InternalRow] = Nil)
     }
   }
 
-  override def computeStats: Statistics =
-    Statistics(sizeInBytes =
-      output.map(n => BigInt(n.dataType.defaultSize)).sum * data.length)
+  override def computeStats(): Statistics =
+    Statistics(sizeInBytes = output.map(n => BigInt(n.dataType.defaultSize)).sum * data.length)
 
   def toSQL(inlineTableName: String): String = {
     require(data.nonEmpty)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 0d30aa76049a..8649603b1a9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -22,11 +22,16 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.types.StructType
 
 
-abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstraints with Logging {
+abstract class LogicalPlan
+  extends QueryPlan[LogicalPlan]
+  with LogicalPlanStats
+  with QueryPlanConstraints
+  with Logging {
 
   private var _analyzed: Boolean = false
 
@@ -80,40 +85,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai
     }
   }
 
-  /** A cache for the estimated statistics, such that it will only be computed once. */
-  private var statsCache: Option[Statistics] = None
-
-  /**
-   * Returns the estimated statistics for the current logical plan node. Under the hood, this
-   * method caches the return value, which is computed based on the configuration passed in the
-   * first time. If the configuration changes, the cache can be invalidated by calling
-   * [[invalidateStatsCache()]].
-   */
-  final def stats: Statistics = statsCache.getOrElse {
-    statsCache = Some(computeStats)
-    statsCache.get
-  }
-
-  /** Invalidates the stats cache. See [[stats]] for more information. */
-  final def invalidateStatsCache(): Unit = {
-    statsCache = None
-    children.foreach(_.invalidateStatsCache())
-  }
-
-  /**
-   * Computes [[Statistics]] for this plan. The default implementation assumes the output
-   * cardinality is the product of all child plan's cardinality, i.e. applies in the case
-   * of cartesian joins.
-   *
-   * [[LeafNode]]s must override this.
-   */
-  protected def computeStats: Statistics = {
-    if (children.isEmpty) {
-      throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.")
-    }
-    Statistics(sizeInBytes = children.map(_.stats.sizeInBytes).product)
-  }
-
   override def verboseStringWithSuffix: String = {
     super.verboseString + statsCache.map(", " + _.toString).getOrElse("")
   }
@@ -300,6 +271,9 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai
 abstract class LeafNode extends LogicalPlan {
   override final def children: Seq[LogicalPlan] = Nil
   override def producedAttributes: AttributeSet = outputSet
+
+  /** Leaf nodes that can survive analysis must define their own statistics. */
+  def computeStats(): Statistics = throw new UnsupportedOperationException
 }
 
 /**
@@ -331,23 +305,6 @@ abstract class UnaryNode extends LogicalPlan {
   }
 
   override protected def validConstraints: Set[Expression] = child.constraints
-
-  override def computeStats: Statistics = {
-    // There should be some overhead in Row object, the size should not be zero when there is
-    // no columns, this help to prevent divide-by-zero error.
-    val childRowSize = child.output.map(_.dataType.defaultSize).sum + 8
-    val outputRowSize = output.map(_.dataType.defaultSize).sum + 8
-    // Assume there will be the same number of rows as child has.
-    var sizeInBytes = (child.stats.sizeInBytes * outputRowSize) / childRowSize
-    if (sizeInBytes == 0) {
-      // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
-      // (product of children).
-      sizeInBytes = 1
-    }
-
-    // Don't propagate rowCount and attributeStats, since they are not estimated here.
-    Statistics(sizeInBytes = sizeInBytes, hints = child.stats.hints)
-  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
new file mode 100644
index 000000000000..b23045810a4f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+/**
+ * A visitor pattern for traversing a [[LogicalPlan]] tree and compute some properties.
+ */
+trait LogicalPlanVisitor[T] {
+
+  def visit(p: LogicalPlan): T = p match {
+    case p: Aggregate => visitAggregate(p)
+    case p: Distinct => visitDistinct(p)
+    case p: Except => visitExcept(p)
+    case p: Expand => visitExpand(p)
+    case p: Filter => visitFilter(p)
+    case p: Generate => visitGenerate(p)
+    case p: GlobalLimit => visitGlobalLimit(p)
+    case p: Intersect => visitIntersect(p)
+    case p: Join => visitJoin(p)
+    case p: LocalLimit => visitLocalLimit(p)
+    case p: Pivot => visitPivot(p)
+    case p: Project => visitProject(p)
+    case p: Range => visitRange(p)
+    case p: Repartition => visitRepartition(p)
+    case p: RepartitionByExpression => visitRepartitionByExpr(p)
+    case p: Sample => visitSample(p)
+    case p: ScriptTransformation => visitScriptTransform(p)
+    case p: Union => visitUnion(p)
+    case p: ResolvedHint => visitHint(p)
+    case p: LogicalPlan => default(p)
+  }
+
+  def default(p: LogicalPlan): T
+
+  def visitAggregate(p: Aggregate): T
+
+  def visitDistinct(p: Distinct): T
+
+  def visitExcept(p: Except): T
+
+  def visitExpand(p: Expand): T
+
+  def visitFilter(p: Filter): T
+
+  def visitGenerate(p: Generate): T
+
+  def visitGlobalLimit(p: GlobalLimit): T
+
+  def visitHint(p: ResolvedHint): T
+
+  def visitIntersect(p: Intersect): T
+
+  def visitJoin(p: Join): T
+
+  def visitLocalLimit(p: LocalLimit): T
+
+  def visitPivot(p: Pivot): T
+
+  def visitProject(p: Project): T
+
+  def visitRange(p: Range): T
+
+  def visitRepartition(p: Repartition): T
+
+  def visitRepartitionByExpr(p: RepartitionByExpression): T
+
+  def visitSample(p: Sample): T
+
+  def visitScriptTransform(p: ScriptTransformation): T
+
+  def visitUnion(p: Union): T
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index e89caabf252d..0bd3166352d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -63,14 +63,6 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend
 
   override def validConstraints: Set[Expression] =
     child.constraints.union(getAliasedConstraints(projectList))
-
-  override def computeStats: Statistics = {
-    if (conf.cboEnabled) {
-      ProjectEstimation.estimate(this).getOrElse(super.computeStats)
-    } else {
-      super.computeStats
-    }
-  }
 }
 
 /**
@@ -137,14 +129,6 @@ case class Filter(condition: Expression, child: LogicalPlan)
       .filterNot(SubqueryExpression.hasCorrelatedSubquery)
     child.constraints.union(predicates.toSet)
   }
-
-  override def computeStats: Statistics = {
-    if (conf.cboEnabled) {
-      FilterEstimation(this).estimate.getOrElse(super.computeStats)
-    } else {
-      super.computeStats
-    }
-  }
 }
 
 abstract class SetOperation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
@@ -190,15 +174,6 @@ case class Intersect(left: LogicalPlan, right: LogicalPlan) extends SetOperation
       Some(children.flatMap(_.maxRows).min)
     }
   }
-
-  override def computeStats: Statistics = {
-    val leftSize = left.stats.sizeInBytes
-    val rightSize = right.stats.sizeInBytes
-    val sizeInBytes = if (leftSize < rightSize) leftSize else rightSize
-    Statistics(
-      sizeInBytes = sizeInBytes,
-      hints = left.stats.hints.resetForJoin())
-  }
 }
 
 case class Except(left: LogicalPlan, right: LogicalPlan) extends SetOperation(left, right) {
@@ -207,10 +182,6 @@ case class Except(left: LogicalPlan, right: LogicalPlan) extends SetOperation(le
   override def output: Seq[Attribute] = left.output
 
   override protected def validConstraints: Set[Expression] = leftConstraints
-
-  override def computeStats: Statistics = {
-    left.stats.copy()
-  }
 }
 
 /** Factory for constructing new `Union` nodes. */
@@ -247,11 +218,6 @@ case class Union(children: Seq[LogicalPlan]) extends LogicalPlan {
     children.length > 1 && childrenResolved && allChildrenCompatible
   }
 
-  override def computeStats: Statistics = {
-    val sizeInBytes = children.map(_.stats.sizeInBytes).sum
-    Statistics(sizeInBytes = sizeInBytes)
-  }
-
   /**
    * Maps the constraints containing a given (original) sequence of attributes to those with a
    * given (reference) sequence of attributes. Given the nature of union, we expect that the
@@ -355,25 +321,6 @@ case class Join(
     case UsingJoin(_, _) => false
     case _ => resolvedExceptNatural
   }
-
-  override def computeStats: Statistics = {
-    def simpleEstimation: Statistics = joinType match {
-      case LeftAnti | LeftSemi =>
-        // LeftSemi and LeftAnti won't ever be bigger than left
-        left.stats
-      case _ =>
-        // Make sure we don't propagate isBroadcastable in other joins, because
-        // they could explode the size.
-        val stats = super.computeStats
-        stats.copy(hints = stats.hints.resetForJoin())
-    }
-
-    if (conf.cboEnabled) {
-      JoinEstimation.estimate(this).getOrElse(simpleEstimation)
-    } else {
-      simpleEstimation
-    }
-  }
 }
 
 /**
@@ -522,14 +469,13 @@ case class Range(
 
   override def newInstance(): Range = copy(output = output.map(_.newInstance()))
 
-  override def computeStats: Statistics = {
-    val sizeInBytes = LongType.defaultSize * numElements
-    Statistics( sizeInBytes = sizeInBytes )
-  }
-
   override def simpleString: String = {
     s"Range ($start, $end, step=$step, splits=$numSlices)"
   }
+
+  override def computeStats(): Statistics = {
+    Statistics(sizeInBytes = LongType.defaultSize * numElements)
+  }
 }
 
 case class Aggregate(
@@ -554,25 +500,6 @@ case class Aggregate(
     val nonAgg = aggregateExpressions.filter(_.find(_.isInstanceOf[AggregateExpression]).isEmpty)
     child.constraints.union(getAliasedConstraints(nonAgg))
   }
-
-  override def computeStats: Statistics = {
-    def simpleEstimation: Statistics = {
-      if (groupingExpressions.isEmpty) {
-        Statistics(
-          sizeInBytes = EstimationUtils.getOutputSize(output, outputRowCount = 1),
-          rowCount = Some(1),
-          hints = child.stats.hints)
-      } else {
-        super.computeStats
-      }
-    }
-
-    if (conf.cboEnabled) {
-      AggregateEstimation.estimate(this).getOrElse(simpleEstimation)
-    } else {
-      simpleEstimation
-    }
-  }
 }
 
 case class Window(
@@ -671,11 +598,6 @@ case class Expand(
   override def references: AttributeSet =
     AttributeSet(projections.flatten.flatMap(_.references))
 
-  override def computeStats: Statistics = {
-    val sizeInBytes = super.computeStats.sizeInBytes * projections.length
-    Statistics(sizeInBytes = sizeInBytes)
-  }
-
   // This operator can reuse attributes (for example making them null when doing a roll up) so
   // the constraints of the child may no longer be valid.
   override protected def validConstraints: Set[Expression] = Set.empty[Expression]
@@ -742,16 +664,6 @@ case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryN
       case _ => None
     }
   }
-  override def computeStats: Statistics = {
-    val limit = limitExpr.eval().asInstanceOf[Int]
-    val childStats = child.stats
-    val rowCount: BigInt = childStats.rowCount.map(_.min(limit)).getOrElse(limit)
-    // Don't propagate column stats, because we don't know the distribution after a limit operation
-    Statistics(
-      sizeInBytes = EstimationUtils.getOutputSize(output, rowCount, childStats.attributeStats),
-      rowCount = Some(rowCount),
-      hints = childStats.hints)
-  }
 }
 
 case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode {
@@ -762,24 +674,6 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
       case _ => None
     }
   }
-  override def computeStats: Statistics = {
-    val limit = limitExpr.eval().asInstanceOf[Int]
-    val childStats = child.stats
-    if (limit == 0) {
-      // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
-      // (product of children).
-      Statistics(
-        sizeInBytes = 1,
-        rowCount = Some(0),
-        hints = childStats.hints)
-    } else {
-      // The output row count of LocalLimit should be the sum of row counts from each partition.
-      // However, since the number of partitions is not available here, we just use statistics of
-      // the child. Because the distribution after a limit operation is unknown, we do not propagate
-      // the column stats.
-      childStats.copy(attributeStats = AttributeMap(Nil))
-    }
-  }
 }
 
 /**
@@ -828,18 +722,6 @@ case class Sample(
   }
 
   override def output: Seq[Attribute] = child.output
-
-  override def computeStats: Statistics = {
-    val ratio = upperBound - lowerBound
-    val childStats = child.stats
-    var sizeInBytes = EstimationUtils.ceil(BigDecimal(childStats.sizeInBytes) * ratio)
-    if (sizeInBytes == 0) {
-      sizeInBytes = 1
-    }
-    val sampledRowCount = childStats.rowCount.map(c => EstimationUtils.ceil(BigDecimal(c) * ratio))
-    // Don't propagate column stats, because we don't know the distribution after a sample operation
-    Statistics(sizeInBytes, sampledRowCount, hints = childStats.hints)
-  }
 }
 
 /**
@@ -893,7 +775,7 @@ case class RepartitionByExpression(
 case object OneRowRelation extends LeafNode {
   override def maxRows: Option[Long] = Some(1)
   override def output: Seq[Attribute] = Nil
-  override def computeStats: Statistics = Statistics(sizeInBytes = 1)
+  override def computeStats(): Statistics = Statistics(sizeInBytes = 1)
 }
 
 /** A logical plan for `dropDuplicates`. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index 8479c702d756..29a43528124d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -42,11 +42,6 @@ case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())
   override def output: Seq[Attribute] = child.output
 
   override lazy val canonicalized: LogicalPlan = child.canonicalized
-
-  override def computeStats: Statistics = {
-    val stats = child.stats
-    stats.copy(hints = hints)
-  }
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
new file mode 100644
index 000000000000..93908b04fb64
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
+
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.LongType
+
+/**
+ * An [[LogicalPlanVisitor]] that computes a the statistics used in a cost-based optimizer.
+ */
+object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
+
+  /** Falls back to the estimation computed by [[SizeInBytesOnlyStatsPlanVisitor]]. */
+  private def fallback(p: LogicalPlan): Statistics = SizeInBytesOnlyStatsPlanVisitor.visit(p)
+
+  override def default(p: LogicalPlan): Statistics = fallback(p)
+
+  override def visitAggregate(p: Aggregate): Statistics = {
+    AggregateEstimation.estimate(p).getOrElse(fallback(p))
+  }
+
+  override def visitDistinct(p: Distinct): Statistics = fallback(p)
+
+  override def visitExcept(p: Except): Statistics = fallback(p)
+
+  override def visitExpand(p: Expand): Statistics = fallback(p)
+
+  override def visitFilter(p: Filter): Statistics = {
+    FilterEstimation(p).estimate.getOrElse(fallback(p))
+  }
+
+  override def visitGenerate(p: Generate): Statistics = fallback(p)
+
+  override def visitGlobalLimit(p: GlobalLimit): Statistics = fallback(p)
+
+  override def visitHint(p: ResolvedHint): Statistics = fallback(p)
+
+  override def visitIntersect(p: Intersect): Statistics = fallback(p)
+
+  override def visitJoin(p: Join): Statistics = {
+    JoinEstimation.estimate(p).getOrElse(fallback(p))
+  }
+
+  override def visitLocalLimit(p: LocalLimit): Statistics = fallback(p)
+
+  override def visitPivot(p: Pivot): Statistics = fallback(p)
+
+  override def visitProject(p: Project): Statistics = {
+    ProjectEstimation.estimate(p).getOrElse(fallback(p))
+  }
+
+  override def visitRange(p: logical.Range): Statistics = {
+    val sizeInBytes = LongType.defaultSize * p.numElements
+    Statistics(sizeInBytes = sizeInBytes)
+  }
+
+  override def visitRepartition(p: Repartition): Statistics = fallback(p)
+
+  override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = fallback(p)
+
+  override def visitSample(p: Sample): Statistics = fallback(p)
+
+  override def visitScriptTransform(p: ScriptTransformation): Statistics = fallback(p)
+
+  override def visitUnion(p: Union): Statistics = fallback(p)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/LogicalPlanStats.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/LogicalPlanStats.scala
new file mode 100644
index 000000000000..8660d9355019
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/LogicalPlanStats.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
+
+import org.apache.spark.sql.catalyst.plans.logical._
+
+/**
+ * A trait to add statistics propagation to [[LogicalPlan]].
+ */
+trait LogicalPlanStats { self: LogicalPlan =>
+
+  /**
+   * Returns the estimated statistics for the current logical plan node. Under the hood, this
+   * method caches the return value, which is computed based on the configuration passed in the
+   * first time. If the configuration changes, the cache can be invalidated by calling
+   * [[invalidateStatsCache()]].
+   */
+  def stats: Statistics = statsCache.getOrElse {
+    if (conf.cboEnabled) {
+      statsCache = Option(BasicStatsPlanVisitor.visit(self))
+    } else {
+      statsCache = Option(SizeInBytesOnlyStatsPlanVisitor.visit(self))
+    }
+    statsCache.get
+  }
+
+  /** A cache for the estimated statistics, such that it will only be computed once. */
+  protected var statsCache: Option[Statistics] = None
+
+  /** Invalidates the stats cache. See [[stats]] for more information. */
+  final def invalidateStatsCache(): Unit = {
+    statsCache = None
+    children.foreach(_.invalidateStatsCache())
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
new file mode 100644
index 000000000000..559f12072e44
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
+
+import org.apache.spark.sql.catalyst.expressions.AttributeMap
+import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical._
+
+/**
+ * An [[LogicalPlanVisitor]] that computes a single dimension for plan stats: size in bytes.
+ */
+object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
+
+  /**
+   * A default, commonly used estimation for unary nodes. We assume the input row number is the
+   * same as the output row number, and compute sizes based on the column types.
+   */
+  private def visitUnaryNode(p: UnaryNode): Statistics = {
+    // There should be some overhead in Row object, the size should not be zero when there is
+    // no columns, this help to prevent divide-by-zero error.
+    val childRowSize = p.child.output.map(_.dataType.defaultSize).sum + 8
+    val outputRowSize = p.output.map(_.dataType.defaultSize).sum + 8
+    // Assume there will be the same number of rows as child has.
+    var sizeInBytes = (p.child.stats.sizeInBytes * outputRowSize) / childRowSize
+    if (sizeInBytes == 0) {
+      // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
+      // (product of children).
+      sizeInBytes = 1
+    }
+
+    // Don't propagate rowCount and attributeStats, since they are not estimated here.
+    Statistics(sizeInBytes = sizeInBytes, hints = p.child.stats.hints)
+  }
+
+  /**
+   * For leaf nodes, use its computeStats. For other nodes, we assume the size in bytes is the
+   * sum of all of the children's.
+   */
+  override def default(p: LogicalPlan): Statistics = p match {
+    case p: LeafNode => p.computeStats()
+    case _: LogicalPlan => Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).product)
+  }
+
+  override def visitAggregate(p: Aggregate): Statistics = {
+    if (p.groupingExpressions.isEmpty) {
+      Statistics(
+        sizeInBytes = EstimationUtils.getOutputSize(p.output, outputRowCount = 1),
+        rowCount = Some(1),
+        hints = p.child.stats.hints)
+    } else {
+      visitUnaryNode(p)
+    }
+  }
+
+  override def visitDistinct(p: Distinct): Statistics = default(p)
+
+  override def visitExcept(p: Except): Statistics = p.left.stats.copy()
+
+  override def visitExpand(p: Expand): Statistics = {
+    val sizeInBytes = visitUnaryNode(p).sizeInBytes * p.projections.length
+    Statistics(sizeInBytes = sizeInBytes)
+  }
+
+  override def visitFilter(p: Filter): Statistics = visitUnaryNode(p)
+
+  override def visitGenerate(p: Generate): Statistics = default(p)
+
+  override def visitGlobalLimit(p: GlobalLimit): Statistics = {
+    val limit = p.limitExpr.eval().asInstanceOf[Int]
+    val childStats = p.child.stats
+    val rowCount: BigInt = childStats.rowCount.map(_.min(limit)).getOrElse(limit)
+    // Don't propagate column stats, because we don't know the distribution after limit
+    Statistics(
+      sizeInBytes = EstimationUtils.getOutputSize(p.output, rowCount, childStats.attributeStats),
+      rowCount = Some(rowCount),
+      hints = childStats.hints)
+  }
+
+  override def visitHint(p: ResolvedHint): Statistics = p.child.stats.copy(hints = p.hints)
+
+  override def visitIntersect(p: Intersect): Statistics = {
+    val leftSize = p.left.stats.sizeInBytes
+    val rightSize = p.right.stats.sizeInBytes
+    val sizeInBytes = if (leftSize < rightSize) leftSize else rightSize
+    Statistics(
+      sizeInBytes = sizeInBytes,
+      hints = p.left.stats.hints.resetForJoin())
+  }
+
+  override def visitJoin(p: Join): Statistics = {
+    p.joinType match {
+      case LeftAnti | LeftSemi =>
+        // LeftSemi and LeftAnti won't ever be bigger than left
+        p.left.stats
+      case _ =>
+        // Make sure we don't propagate isBroadcastable in other joins, because
+        // they could explode the size.
+        val stats = default(p)
+        stats.copy(hints = stats.hints.resetForJoin())
+    }
+  }
+
+  override def visitLocalLimit(p: LocalLimit): Statistics = {
+    val limit = p.limitExpr.eval().asInstanceOf[Int]
+    val childStats = p.child.stats
+    if (limit == 0) {
+      // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero
+      // (product of children).
+      Statistics(sizeInBytes = 1, rowCount = Some(0), hints = childStats.hints)
+    } else {
+      // The output row count of LocalLimit should be the sum of row counts from each partition.
+      // However, since the number of partitions is not available here, we just use statistics of
+      // the child. Because the distribution after a limit operation is unknown, we do not propagate
+      // the column stats.
+      childStats.copy(attributeStats = AttributeMap(Nil))
+    }
+  }
+
+  override def visitPivot(p: Pivot): Statistics = default(p)
+
+  override def visitProject(p: Project): Statistics = visitUnaryNode(p)
+
+  override def visitRange(p: logical.Range): Statistics = {
+    p.computeStats()
+  }
+
+  override def visitRepartition(p: Repartition): Statistics = default(p)
+
+  override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = default(p)
+
+  override def visitSample(p: Sample): Statistics = {
+    val ratio = p.upperBound - p.lowerBound
+    var sizeInBytes = EstimationUtils.ceil(BigDecimal(p.child.stats.sizeInBytes) * ratio)
+    if (sizeInBytes == 0) {
+      sizeInBytes = 1
+    }
+    val sampleRows = p.child.stats.rowCount.map(c => EstimationUtils.ceil(BigDecimal(c) * ratio))
+    // Don't propagate column stats, because we don't know the distribution after a sample operation
+    Statistics(sizeInBytes, sampleRows, hints = p.child.stats.hints)
+  }
+
+  override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p)
+
+  override def visitUnion(p: Union): Statistics = {
+    Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).sum)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 912c5fed6345..31a8cbdee977 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -77,37 +77,6 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
     checkStats(globalLimit, stats)
   }
 
-  test("sample estimation") {
-    val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 1000).toLong, plan)
-    checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5)))
-
-    // Child doesn't have rowCount in stats
-    val childStats = Statistics(sizeInBytes = 120)
-    val childPlan = DummyLogicalPlan(childStats, childStats)
-    val sample2 =
-      Sample(0.0, 0.11, withReplacement = false, (math.random * 1000).toLong, childPlan)
-    checkStats(sample2, Statistics(sizeInBytes = 14))
-  }
-
-  test("estimate statistics when the conf changes") {
-    val expectedDefaultStats =
-      Statistics(
-        sizeInBytes = 40,
-        rowCount = Some(10),
-        attributeStats = AttributeMap(Seq(
-          AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), Some(10), 0, 4, 4))))
-    val expectedCboStats =
-      Statistics(
-        sizeInBytes = 4,
-        rowCount = Some(1),
-        attributeStats = AttributeMap(Seq(
-          AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), Some(5), 0, 4, 4))))
-
-    val plan = DummyLogicalPlan(defaultStats = expectedDefaultStats, cboStats = expectedCboStats)
-    checkStats(
-      plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
-  }
-
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,
@@ -132,16 +101,3 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
   private def checkStats(plan: LogicalPlan, expectedStats: Statistics): Unit =
     checkStats(plan, expectedStats, expectedStats)
 }
-
-/**
- * This class is used for unit-testing the cbo switch, it mimics a logical plan which computes
- * a simple statistics or a cbo estimated statistics based on the conf.
- */
-private case class DummyLogicalPlan(
-    defaultStats: Statistics,
-    cboStats: Statistics) extends LogicalPlan {
-  override def output: Seq[Attribute] = Nil
-  override def children: Seq[LogicalPlan] = Nil
-  override def computeStats: Statistics =
-    if (conf.cboEnabled) cboStats else defaultStats
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
index eaa33e44a6a5..31dea2e3e7f1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
@@ -65,7 +65,7 @@ case class StatsTestPlan(
     attributeStats: AttributeMap[ColumnStat],
     size: Option[BigInt] = None) extends LeafNode {
   override def output: Seq[Attribute] = outputList
-  override def computeStats: Statistics = Statistics(
+  override def computeStats(): Statistics = Statistics(
     // If sizeInBytes is useless in testing, we just use a fake value
     sizeInBytes = size.getOrElse(Int.MaxValue),
     rowCount = Some(rowCount),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 66f66a289a06..dcb918eeb9d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -88,7 +88,7 @@ case class ExternalRDD[T](
 
   override protected def stringArgs: Iterator[Any] = Iterator(output)
 
-  @transient override def computeStats: Statistics = Statistics(
+  override def computeStats(): Statistics = Statistics(
     // TODO: Instead of returning a default value here, find a way to return a meaningful size
     // estimate for RDDs. See PR 1238 for more discussions.
     sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
@@ -156,7 +156,7 @@ case class LogicalRDD(
 
   override protected def stringArgs: Iterator[Any] = Iterator(output)
 
-  @transient override def computeStats: Statistics = Statistics(
+  override def computeStats(): Statistics = Statistics(
     // TODO: Instead of returning a default value here, find a way to return a meaningful size
     // estimate for RDDs. See PR 1238 for more discussions.
     sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 2972132336de..39cf8fcac511 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -69,7 +69,7 @@ case class InMemoryRelation(
 
   @transient val partitionStatistics = new PartitionStatistics(output)
 
-  override def computeStats: Statistics = {
+  override def computeStats(): Statistics = {
     if (batchStats.value == 0L) {
       // Underlying columnar RDD hasn't been materialized, no useful statistics information
       // available, return the default statistics.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 6ba190b9e5dc..699f1bad9c4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -48,9 +48,10 @@ case class LogicalRelation(
     output = output.map(QueryPlan.normalizeExprId(_, output)),
     catalogTable = None)
 
-  @transient override def computeStats: Statistics = {
-    catalogTable.flatMap(_.stats.map(_.toPlanStats(output))).getOrElse(
-      Statistics(sizeInBytes = relation.sizeInBytes))
+  override def computeStats(): Statistics = {
+    catalogTable
+      .flatMap(_.stats.map(_.toPlanStats(output)))
+      .getOrElse(Statistics(sizeInBytes = relation.sizeInBytes))
   }
 
   /** Used to lookup original attribute capitalization */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 4979873ee3c7..587ae2bfb63f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -230,6 +230,5 @@ case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode
 
   private val sizePerRow = sink.schema.toAttributes.map(_.dataType.defaultSize).sum
 
-  override def computeStats: Statistics =
-    Statistics(sizePerRow * sink.allData.size)
+  override def computeStats(): Statistics = Statistics(sizePerRow * sink.allData.size)
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
index 3a724aa14f2a..94384185d190 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@@ -86,7 +86,7 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
         case relation: LogicalRelation => relation
       }
       assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}")
-      val size2 = relations(0).computeStats.sizeInBytes
+      val size2 = relations(0).stats.sizeInBytes
       assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes)
       assert(size2 < tableStats.get.sizeInBytes)
     }

From 37ef32e515ea071afe63b56ba0d4299bb76e8a75 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Sat, 1 Jul 2017 14:57:57 +0800
Subject: [PATCH 010/125] [SPARK-21275][ML] Update GLM test to use
 supportedFamilyNames

## What changes were proposed in this pull request?
Update GLM test to use supportedFamilyNames as suggested here:
https://github.com/apache/spark/pull/16699#discussion-diff-100574976R855

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #18495 from actuaryzhang/mlGlmTest2.
---
 .../GeneralizedLinearRegressionSuite.scala    | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 83f1344a7bcb..a47bd17f47bb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -749,15 +749,15 @@ class GeneralizedLinearRegressionSuite
       library(statmod)
       y <- c(1.0, 0.5, 0.7, 0.3)
       w <- c(1, 2, 3, 4)
-      for (fam in list(gaussian(), poisson(), binomial(), Gamma(), tweedie(1.6))) {
+      for (fam in list(binomial(), Gamma(), gaussian(), poisson(), tweedie(1.6))) {
         model1 <- glm(y ~ 1, family = fam)
         model2 <- glm(y ~ 1, family = fam, weights = w)
         print(as.vector(c(coef(model1), coef(model2))))
       }
-      [1] 0.625 0.530
-      [1] -0.4700036 -0.6348783
       [1] 0.5108256 0.1201443
       [1] 1.600000 1.886792
+      [1] 0.625 0.530
+      [1] -0.4700036 -0.6348783
       [1] 1.325782 1.463641
      */
 
@@ -768,13 +768,13 @@ class GeneralizedLinearRegressionSuite
       Instance(0.3, 4.0, Vectors.zeros(0))
     ).toDF()
 
-    val expected = Seq(0.625, 0.530, -0.4700036, -0.6348783, 0.5108256, 0.1201443,
-      1.600000, 1.886792, 1.325782, 1.463641)
+    val expected = Seq(0.5108256, 0.1201443, 1.600000, 1.886792, 0.625, 0.530,
+      -0.4700036, -0.6348783, 1.325782, 1.463641)
 
     import GeneralizedLinearRegression._
 
     var idx = 0
-    for (family <- Seq("gaussian", "poisson", "binomial", "gamma", "tweedie")) {
+    for (family <- GeneralizedLinearRegression.supportedFamilyNames.sortWith(_ < _)) {
       for (useWeight <- Seq(false, true)) {
         val trainer = new GeneralizedLinearRegression().setFamily(family)
         if (useWeight) trainer.setWeightCol("weight")
@@ -807,7 +807,7 @@ class GeneralizedLinearRegressionSuite
         0.5, 2.1, 0.5, 1.0, 2.0,
         0.9, 0.4, 1.0, 2.0, 1.0,
         0.7, 0.7, 0.0, 3.0, 3.0), 4, 5, byrow = TRUE))
-      families <- list(gaussian, binomial, poisson, Gamma, tweedie(1.5))
+      families <- list(binomial, Gamma, gaussian, poisson, tweedie(1.5))
       f1 <- V1 ~ -1 + V4 + V5
       f2 <- V1 ~ V4 + V5
       for (f in c(f1, f2)) {
@@ -816,15 +816,15 @@ class GeneralizedLinearRegressionSuite
           print(as.vector(coef(model)))
         }
       }
-      [1]  0.5169222 -0.3344444
       [1]  0.9419107 -0.6864404
-      [1]  0.1812436 -0.6568422
       [1] -0.2869094  0.7857710
+      [1]  0.5169222 -0.3344444
+      [1]  0.1812436 -0.6568422
       [1] 0.1055254 0.2979113
-      [1] -0.05990345  0.53188982 -0.32118415
       [1] -0.2147117  0.9911750 -0.6356096
-      [1] -1.5616130  0.6646470 -0.3192581
       [1]  0.3390397 -0.3406099  0.6870259
+      [1] -0.05990345  0.53188982 -0.32118415
+      [1] -1.5616130  0.6646470 -0.3192581
       [1] 0.3665034 0.1039416 0.1484616
     */
     val dataset = Seq(
@@ -835,23 +835,22 @@ class GeneralizedLinearRegressionSuite
     ).toDF()
 
     val expected = Seq(
-      Vectors.dense(0, 0.5169222, -0.3344444),
       Vectors.dense(0, 0.9419107, -0.6864404),
-      Vectors.dense(0, 0.1812436, -0.6568422),
       Vectors.dense(0, -0.2869094, 0.785771),
+      Vectors.dense(0, 0.5169222, -0.3344444),
+      Vectors.dense(0, 0.1812436, -0.6568422),
       Vectors.dense(0, 0.1055254, 0.2979113),
-      Vectors.dense(-0.05990345, 0.53188982, -0.32118415),
       Vectors.dense(-0.2147117, 0.991175, -0.6356096),
-      Vectors.dense(-1.561613, 0.664647, -0.3192581),
       Vectors.dense(0.3390397, -0.3406099, 0.6870259),
+      Vectors.dense(-0.05990345, 0.53188982, -0.32118415),
+      Vectors.dense(-1.561613, 0.664647, -0.3192581),
       Vectors.dense(0.3665034, 0.1039416, 0.1484616))
 
     import GeneralizedLinearRegression._
 
     var idx = 0
-
     for (fitIntercept <- Seq(false, true)) {
-      for (family <- Seq("gaussian", "binomial", "poisson", "gamma", "tweedie")) {
+      for (family <- GeneralizedLinearRegression.supportedFamilyNames.sortWith(_ < _)) {
         val trainer = new GeneralizedLinearRegression().setFamily(family)
           .setFitIntercept(fitIntercept).setOffsetCol("offset")
           .setWeightCol("weight").setLinkPredictionCol("linkPrediction")

From e0b047eafed92eadf6842a9df964438095e12d41 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Sat, 1 Jul 2017 15:37:41 +0800
Subject: [PATCH 011/125] [SPARK-18518][ML] HasSolver supports override

## What changes were proposed in this pull request?
1, make param support non-final with `finalFields` option
2, generate `HasSolver` with `finalFields = false`
3, override `solver` in LiR, GLR, and make MLPC inherit `HasSolver`

## How was this patch tested?
existing tests

Author: Ruifeng Zheng <ruifengz@foxmail.com>
Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #16028 from zhengruifeng/param_non_final.
---
 .../MultilayerPerceptronClassifier.scala      | 19 ++++----
 .../ml/param/shared/SharedParamsCodeGen.scala | 11 +++--
 .../spark/ml/param/shared/sharedParams.scala  |  8 ++--
 .../GeneralizedLinearRegression.scala         | 21 ++++++++-
 .../ml/regression/LinearRegression.scala      | 46 +++++++++++++++----
 python/pyspark/ml/classification.py           | 18 +-------
 python/pyspark/ml/regression.py               |  5 ++
 7 files changed, 82 insertions(+), 46 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index ec39f964e213..ceba11edc93b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -27,13 +27,16 @@ import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer}
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed, HasStepSize, HasTol}
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
 
 /** Params for Multilayer Perceptron. */
 private[classification] trait MultilayerPerceptronParams extends PredictorParams
-  with HasSeed with HasMaxIter with HasTol with HasStepSize {
+  with HasSeed with HasMaxIter with HasTol with HasStepSize with HasSolver {
+
+  import MultilayerPerceptronClassifier._
+
   /**
    * Layer sizes including input size and output size.
    *
@@ -78,14 +81,10 @@ private[classification] trait MultilayerPerceptronParams extends PredictorParams
    * @group expertParam
    */
   @Since("2.0.0")
-  final val solver: Param[String] = new Param[String](this, "solver",
+  final override val solver: Param[String] = new Param[String](this, "solver",
     "The solver algorithm for optimization. Supported options: " +
-      s"${MultilayerPerceptronClassifier.supportedSolvers.mkString(", ")}. (Default l-bfgs)",
-    ParamValidators.inArray[String](MultilayerPerceptronClassifier.supportedSolvers))
-
-  /** @group expertGetParam */
-  @Since("2.0.0")
-  final def getSolver: String = $(solver)
+      s"${supportedSolvers.mkString(", ")}. (Default l-bfgs)",
+    ParamValidators.inArray[String](supportedSolvers))
 
   /**
    * The initial weights of the model.
@@ -101,7 +100,7 @@ private[classification] trait MultilayerPerceptronParams extends PredictorParams
   final def getInitialWeights: Vector = $(initialWeights)
 
   setDefault(maxIter -> 100, tol -> 1e-6, blockSize -> 128,
-    solver -> MultilayerPerceptronClassifier.LBFGS, stepSize -> 0.03)
+    solver -> LBFGS, stepSize -> 0.03)
 }
 
 /** Label to vector converter. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 013817a41baf..23e0d45d943a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -80,8 +80,7 @@ private[shared] object SharedParamsCodeGen {
         " 0)", isValid = "ParamValidators.gt(0)"),
       ParamDesc[String]("weightCol", "weight column name. If this is not set or empty, we treat " +
         "all instance weights as 1.0"),
-      ParamDesc[String]("solver", "the solver algorithm for optimization. If this is not set or " +
-        "empty, default value is 'auto'", Some("\"auto\"")),
+      ParamDesc[String]("solver", "the solver algorithm for optimization", finalFields = false),
       ParamDesc[Int]("aggregationDepth", "suggested depth for treeAggregate (>= 2)", Some("2"),
         isValid = "ParamValidators.gtEq(2)", isExpertParam = true))
 
@@ -99,6 +98,7 @@ private[shared] object SharedParamsCodeGen {
       defaultValueStr: Option[String] = None,
       isValid: String = "",
       finalMethods: Boolean = true,
+      finalFields: Boolean = true,
       isExpertParam: Boolean = false) {
 
     require(name.matches("[a-z][a-zA-Z0-9]*"), s"Param name $name is invalid.")
@@ -167,6 +167,11 @@ private[shared] object SharedParamsCodeGen {
     } else {
       "def"
     }
+    val fieldStr = if (param.finalFields) {
+      "final val"
+    } else {
+      "val"
+    }
 
     val htmlCompliantDoc = Utility.escape(doc)
 
@@ -180,7 +185,7 @@ private[shared] object SharedParamsCodeGen {
       |   * Param for $htmlCompliantDoc.
       |   * @group ${groupStr(0)}
       |   */
-      |  final val $name: $Param = new $Param(this, "$name", "$doc"$isValid)
+      |  $fieldStr $name: $Param = new $Param(this, "$name", "$doc"$isValid)
       |$setDefault
       |  /** @group ${groupStr(1)} */
       |  $methodStr get$Name: $T = $$($name)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 50619607a505..1a8f499798b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -374,17 +374,15 @@ private[ml] trait HasWeightCol extends Params {
 }
 
 /**
- * Trait for shared param solver (default: "auto").
+ * Trait for shared param solver.
  */
 private[ml] trait HasSolver extends Params {
 
   /**
-   * Param for the solver algorithm for optimization. If this is not set or empty, default value is 'auto'.
+   * Param for the solver algorithm for optimization.
    * @group param
    */
-  final val solver: Param[String] = new Param[String](this, "solver", "the solver algorithm for optimization. If this is not set or empty, default value is 'auto'")
-
-  setDefault(solver, "auto")
+  val solver: Param[String] = new Param[String](this, "solver", "the solver algorithm for optimization")
 
   /** @group getParam */
   final def getSolver: String = $(solver)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index ce3460ae4356..c600b87bdc64 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -164,7 +164,18 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
     isDefined(linkPredictionCol) && $(linkPredictionCol).nonEmpty
   }
 
-  import GeneralizedLinearRegression._
+  /**
+   * The solver algorithm for optimization.
+   * Supported options: "irls" (iteratively reweighted least squares).
+   * Default: "irls"
+   *
+   * @group param
+   */
+  @Since("2.3.0")
+  final override val solver: Param[String] = new Param[String](this, "solver",
+    "The solver algorithm for optimization. Supported options: " +
+      s"${supportedSolvers.mkString(", ")}. (Default irls)",
+    ParamValidators.inArray[String](supportedSolvers))
 
   @Since("2.0.0")
   override def validateAndTransformSchema(
@@ -350,7 +361,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
    */
   @Since("2.0.0")
   def setSolver(value: String): this.type = set(solver, value)
-  setDefault(solver -> "irls")
+  setDefault(solver -> IRLS)
 
   /**
    * Sets the link prediction (linear predictor) column name.
@@ -442,6 +453,12 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
     Gamma -> Inverse, Gamma -> Identity, Gamma -> Log
   )
 
+  /** String name for "irls" (iteratively reweighted least squares) solver. */
+  private[regression] val IRLS = "irls"
+
+  /** Set of solvers that GeneralizedLinearRegression supports. */
+  private[regression] val supportedSolvers = Array(IRLS)
+
   /** Set of family names that GeneralizedLinearRegression supports. */
   private[regression] lazy val supportedFamilyNames =
     supportedFamilyAndLinkPairs.map(_._1.name).toArray :+ "tweedie"
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index db5ac4f14bd3..ce5e0797915d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -34,7 +34,7 @@ import org.apache.spark.ml.optim.WeightedLeastSquares
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.optim.aggregator.LeastSquaresAggregator
 import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
-import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.mllib.evaluation.RegressionMetrics
@@ -53,7 +53,23 @@ import org.apache.spark.storage.StorageLevel
 private[regression] trait LinearRegressionParams extends PredictorParams
     with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
     with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
-    with HasAggregationDepth
+    with HasAggregationDepth {
+
+  import LinearRegression._
+
+  /**
+   * The solver algorithm for optimization.
+   * Supported options: "l-bfgs", "normal" and "auto".
+   * Default: "auto"
+   *
+   * @group param
+   */
+  @Since("2.3.0")
+  final override val solver: Param[String] = new Param[String](this, "solver",
+    "The solver algorithm for optimization. Supported options: " +
+      s"${supportedSolvers.mkString(", ")}. (Default auto)",
+    ParamValidators.inArray[String](supportedSolvers))
+}
 
 /**
  * Linear regression.
@@ -78,6 +94,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   extends Regressor[Vector, LinearRegression, LinearRegressionModel]
   with LinearRegressionParams with DefaultParamsWritable with Logging {
 
+  import LinearRegression._
+
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("linReg"))
 
@@ -175,12 +193,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    * @group setParam
    */
   @Since("1.6.0")
-  def setSolver(value: String): this.type = {
-    require(Set("auto", "l-bfgs", "normal").contains(value),
-      s"Solver $value was not supported. Supported options: auto, l-bfgs, normal")
-    set(solver, value)
-  }
-  setDefault(solver -> "auto")
+  def setSolver(value: String): this.type = set(solver, value)
+  setDefault(solver -> AUTO)
 
   /**
    * Suggested depth for treeAggregate (greater than or equal to 2).
@@ -210,8 +224,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth)
     instr.logNumFeatures(numFeatures)
 
-    if (($(solver) == "auto" &&
-      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == "normal") {
+    if (($(solver) == AUTO &&
+      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == NORMAL) {
       // For low dimensional data, WeightedLeastSquares is more efficient since the
       // training algorithm only requires one pass through the data. (SPARK-10668)
 
@@ -444,6 +458,18 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
    */
   @Since("2.1.0")
   val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES
+
+  /** String name for "auto". */
+  private[regression] val AUTO = "auto"
+
+  /** String name for "normal". */
+  private[regression] val NORMAL = "normal"
+
+  /** String name for "l-bfgs". */
+  private[regression] val LBFGS = "l-bfgs"
+
+  /** Set of solvers that LinearRegression supports. */
+  private[regression] val supportedSolvers = Array(AUTO, NORMAL, LBFGS)
 }
 
 /**
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 9b345ac73f3d..948806a5c936 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1265,8 +1265,8 @@ def theta(self):
 
 @inherit_doc
 class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable,
-                                     JavaMLReadable):
+                                     HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver,
+                                     JavaMLWritable, JavaMLReadable):
     """
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
@@ -1407,20 +1407,6 @@ def getStepSize(self):
         """
         return self.getOrDefault(self.stepSize)
 
-    @since("2.0.0")
-    def setSolver(self, value):
-        """
-        Sets the value of :py:attr:`solver`.
-        """
-        return self._set(solver=value)
-
-    @since("2.0.0")
-    def getSolver(self):
-        """
-        Gets the value of solver or its default value.
-        """
-        return self.getOrDefault(self.solver)
-
     @since("2.0.0")
     def setInitialWeights(self, value):
         """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2d17f95b0c44..84d843369e10 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -95,6 +95,9 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     .. versionadded:: 1.4.0
     """
 
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: auto, normal, l-bfgs.", typeConverter=TypeConverters.toString)
+
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
@@ -1371,6 +1374,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
     linkPower = Param(Params._dummy(), "linkPower", "The index in the power link function. " +
                       "Only applicable to the Tweedie family.",
                       typeConverter=TypeConverters.toFloat)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: irls.", typeConverter=TypeConverters.toString)
 
     @keyword_only
     def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",

From 6beca9ce94f484de2f9ffb946bef8334781b3122 Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Sat, 1 Jul 2017 15:53:49 +0100
Subject: [PATCH 012/125] [SPARK-21170][CORE]
 Utils.tryWithSafeFinallyAndFailureCallbacks throws IllegalArgumentException:
 Self-suppression not permitted

## What changes were proposed in this pull request?

Not adding the exception to the suppressed if it is the same instance as originalThrowable.

## How was this patch tested?

Added new tests to verify this, these tests fail without source code changes and passes with the change.

Author: Devaraj K <devaraj@apache.org>

Closes #18384 from devaraj-kavali/SPARK-21170.
---
 .../scala/org/apache/spark/util/Utils.scala   | 30 +++----
 .../org/apache/spark/util/UtilsSuite.scala    | 88 ++++++++++++++++++-
 2 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index bbb7999e2a14..26f61e25da4d 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1348,14 +1348,10 @@ private[spark] object Utils extends Logging {
       try {
         finallyBlock
       } catch {
-        case t: Throwable =>
-          if (originalThrowable != null) {
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in finally: " + t.getMessage, t)
-            throw originalThrowable
-          } else {
-            throw t
-          }
+        case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
+          originalThrowable.addSuppressed(t)
+          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          throw originalThrowable
       }
     }
   }
@@ -1387,22 +1383,20 @@ private[spark] object Utils extends Logging {
           catchBlock
         } catch {
           case t: Throwable =>
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in catch: " + t.getMessage, t)
+            if (originalThrowable != t) {
+              originalThrowable.addSuppressed(t)
+              logWarning(s"Suppressing exception in catch: ${t.getMessage}", t)
+            }
         }
         throw originalThrowable
     } finally {
       try {
         finallyBlock
       } catch {
-        case t: Throwable =>
-          if (originalThrowable != null) {
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in finally: " + t.getMessage, t)
-            throw originalThrowable
-          } else {
-            throw t
-          }
+        case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
+          originalThrowable.addSuppressed(t)
+          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          throw originalThrowable
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index f7bc8f888b0d..4ce143f18bbf 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -38,7 +38,7 @@ import org.apache.commons.math3.stat.inference.ChiSquareTest
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.ByteUnit
 
@@ -1024,4 +1024,90 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(redactedConf("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
 
   }
+
+  test("tryWithSafeFinally") {
+    var e = new Error("Block0")
+    val finallyBlockError = new Error("Finally Block")
+    var isErrorOccurred = false
+    // if the try and finally blocks throw different exception instances
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = { throw finallyBlockError })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.head == finallyBlockError)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try and finally blocks throw the same exception instance then it should not
+    // try to add to suppressed and get IllegalArgumentException
+    e = new Error("Block1")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = { throw e })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try throws the exception and finally doesn't throw exception
+    e = new Error("Block2")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = {})
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try and finally block don't throw exception
+    Utils.tryWithSafeFinally {}(finallyBlock = {})
+  }
+
+  test("tryWithSafeFinallyAndFailureCallbacks") {
+    var e = new Error("Block0")
+    val catchBlockError = new Error("Catch Block")
+    val finallyBlockError = new Error("Finally Block")
+    var isErrorOccurred = false
+    TaskContext.setTaskContext(TaskContext.empty())
+    // if the try, catch and finally blocks throw different exception instances
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(
+        catchBlock = { throw catchBlockError }, finallyBlock = { throw finallyBlockError })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.head == catchBlockError)
+        assert(t.getSuppressed.last == finallyBlockError)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try, catch and finally blocks throw the same exception instance then it should not
+    // try to add to suppressed and get IllegalArgumentException
+    e = new Error("Block1")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(catchBlock = { throw e },
+        finallyBlock = { throw e })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try throws the exception, catch and finally don't throw exceptions
+    e = new Error("Block2")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(catchBlock = {}, finallyBlock = {})
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try, catch and finally blocks don't throw exceptions
+    Utils.tryWithSafeFinallyAndFailureCallbacks {}(catchBlock = {}, finallyBlock = {})
+    TaskContext.unset
+  }
 }

From c605fee01f180588ecb2f48710a7b84073bd3b9a Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Sun, 2 Jul 2017 08:50:48 +0100
Subject: [PATCH 013/125] [SPARK-21260][SQL][MINOR] Remove the unused
 OutputFakerExec

## What changes were proposed in this pull request?

OutputFakerExec was added long ago and is not used anywhere now so we should remove it.

## How was this patch tested?
N/A

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18473 from jiangxb1987/OutputFakerExec.
---
 .../spark/sql/execution/basicPhysicalOperators.scala  | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index f3ca8397047f..2151c339b9b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -584,17 +584,6 @@ case class CoalesceExec(numPartitions: Int, child: SparkPlan) extends UnaryExecN
   }
 }
 
-/**
- * A plan node that does nothing but lie about the output of its child.  Used to spice a
- * (hopefully structurally equivalent) tree from a different optimization sequence into an already
- * resolved tree.
- */
-case class OutputFakerExec(output: Seq[Attribute], child: SparkPlan) extends SparkPlan {
-  def children: Seq[SparkPlan] = child :: Nil
-
-  protected override def doExecute(): RDD[InternalRow] = child.execute()
-}
-
 /**
  * Physical plan for a subquery.
  */

From c19680be1c532dded1e70edce7a981ba28af09ad Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sun, 2 Jul 2017 16:17:03 +0800
Subject: [PATCH 014/125] [SPARK-19852][PYSPARK][ML] Python StringIndexer
 supports 'keep' to handle invalid data

## What changes were proposed in this pull request?
This PR is to maintain API parity with changes made in SPARK-17498 to support a new option
'keep' in StringIndexer to handle unseen labels or NULL values with PySpark.

Note: This is updated version of #17237 , the primary author of this PR is VinceShieh .
## How was this patch tested?
Unit tests.

Author: VinceShieh <vincent.xie@intel.com>
Author: Yanbo Liang <ybliang8@gmail.com>

Closes #18453 from yanboliang/spark-19852.
---
 python/pyspark/ml/feature.py |  6 ++++++
 python/pyspark/ml/tests.py   | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 77de1cc18246..25ad06f682ed 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -2132,6 +2132,12 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
                             "frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc.",
                             typeConverter=TypeConverters.toString)
 
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " +
+                          "labels or NULL values). Options are 'skip' (filter out rows with " +
+                          "invalid data), error (throw an error), or 'keep' (put invalid data " +
+                          "in a special additional bucket, at index numLabels).",
+                          typeConverter=TypeConverters.toString)
+
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None, handleInvalid="error",
                  stringOrderType="frequencyDesc"):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 17a39472e1fe..ffb8b0a890ff 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -551,6 +551,27 @@ def test_rformula_string_indexer_order_type(self):
         for i in range(0, len(expected)):
             self.assertTrue(all(observed[i]["features"].toArray() == expected[i]))
 
+    def test_string_indexer_handle_invalid(self):
+        df = self.spark.createDataFrame([
+            (0, "a"),
+            (1, "d"),
+            (2, None)], ["id", "label"])
+
+        si1 = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="keep",
+                            stringOrderType="alphabetAsc")
+        model1 = si1.fit(df)
+        td1 = model1.transform(df)
+        actual1 = td1.select("id", "indexed").collect()
+        expected1 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0), Row(id=2, indexed=2.0)]
+        self.assertEqual(actual1, expected1)
+
+        si2 = si1.setHandleInvalid("skip")
+        model2 = si2.fit(df)
+        td2 = model2.transform(df)
+        actual2 = td2.select("id", "indexed").collect()
+        expected2 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0)]
+        self.assertEqual(actual2, expected2)
+
 
 class HasInducedError(Params):
 

From d4107196d59638845bd19da6aab074424d90ddaf Mon Sep 17 00:00:00 2001
From: Rui Zha <zrdt713@gmail.com>
Date: Sun, 2 Jul 2017 17:37:47 -0700
Subject: [PATCH 015/125] [SPARK-18004][SQL] Make sure the date or timestamp
 related predicate can be pushed down to Oracle correctly

## What changes were proposed in this pull request?

Move `compileValue` method in JDBCRDD to JdbcDialect, and override the `compileValue` method in OracleDialect to rewrite the Oracle-specific timestamp and date literals in where clause.

## How was this patch tested?

An integration test has been added.

Author: Rui Zha <zrdt713@gmail.com>
Author: Zharui <zrdt713@gmail.com>

Closes #18451 from SharpRay/extend-compileValue-to-dialects.
---
 .../sql/jdbc/OracleIntegrationSuite.scala     | 45 +++++++++++++++++++
 .../execution/datasources/jdbc/JDBCRDD.scala  | 35 +++++----------
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 27 ++++++++++-
 .../apache/spark/sql/jdbc/OracleDialect.scala | 15 ++++++-
 4 files changed, 95 insertions(+), 27 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index b2f096964427..e14810a32edc 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -223,4 +223,49 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     val types = rows(0).toSeq.map(x => x.getClass.toString)
     assert(types(1).equals("class java.sql.Timestamp"))
   }
+
+  test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") {
+    val props = new Properties()
+    props.put("oracle.jdbc.mapDateToTimestamp", "false")
+
+    val schema = StructType(Seq(
+      StructField("date_type", DateType, true),
+      StructField("timestamp_type", TimestampType, true)
+    ))
+
+    val tableName = "test_date_timestamp_pushdown"
+    val dateVal = Date.valueOf("2017-06-22")
+    val timestampVal = Timestamp.valueOf("2017-06-22 21:30:07")
+
+    val data = spark.sparkContext.parallelize(Seq(
+      Row(dateVal, timestampVal)
+    ))
+
+    val dfWrite = spark.createDataFrame(data, schema)
+    dfWrite.write.jdbc(jdbcUrl, tableName, props)
+
+    val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
+
+    val millis = System.currentTimeMillis()
+    val dt = new java.sql.Date(millis)
+    val ts = new java.sql.Timestamp(millis)
+
+    // Query Oracle table with date and timestamp predicates
+    // which should be pushed down to Oracle.
+    val df = dfRead.filter(dfRead.col("date_type").lt(dt))
+      .filter(dfRead.col("timestamp_type").lt(ts))
+
+    val metadata = df.queryExecution.sparkPlan.metadata
+    // The "PushedFilters" part should be exist in Datafrome's
+    // physical plan and the existence of right literals in
+    // "PushedFilters" is used to prove that the predicates
+    // pushing down have been effective.
+    assert(metadata.get("PushedFilters").ne(None))
+    assert(metadata("PushedFilters").contains(dt.toString))
+    assert(metadata("PushedFilters").contains(ts.toString))
+
+    val row = df.collect()(0)
+    assert(row.getDate(0).equals(dateVal))
+    assert(row.getTimestamp(1).equals(timestampVal))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 2bdc43254133..0f53b5c7c6f0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Connection, Date, PreparedStatement, ResultSet, SQLException, Timestamp}
+import java.sql.{Connection, PreparedStatement, ResultSet, SQLException}
 
 import scala.util.control.NonFatal
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
@@ -86,20 +84,6 @@ object JDBCRDD extends Logging {
     new StructType(columns.map(name => fieldMap(name)))
   }
 
-  /**
-   * Converts value to SQL expression.
-   */
-  private def compileValue(value: Any): Any = value match {
-    case stringValue: String => s"'${escapeSql(stringValue)}'"
-    case timestampValue: Timestamp => "'" + timestampValue + "'"
-    case dateValue: Date => "'" + dateValue + "'"
-    case arrayValue: Array[Any] => arrayValue.map(compileValue).mkString(", ")
-    case _ => value
-  }
-
-  private def escapeSql(value: String): String =
-    if (value == null) null else StringUtils.replace(value, "'", "''")
-
   /**
    * Turns a single Filter into a String representing a SQL expression.
    * Returns None for an unhandled filter.
@@ -108,15 +92,16 @@ object JDBCRDD extends Logging {
     def quote(colName: String): String = dialect.quoteIdentifier(colName)
 
     Option(f match {
-      case EqualTo(attr, value) => s"${quote(attr)} = ${compileValue(value)}"
+      case EqualTo(attr, value) => s"${quote(attr)} = ${dialect.compileValue(value)}"
       case EqualNullSafe(attr, value) =>
         val col = quote(attr)
-        s"(NOT ($col != ${compileValue(value)} OR $col IS NULL OR " +
-          s"${compileValue(value)} IS NULL) OR ($col IS NULL AND ${compileValue(value)} IS NULL))"
-      case LessThan(attr, value) => s"${quote(attr)} < ${compileValue(value)}"
-      case GreaterThan(attr, value) => s"${quote(attr)} > ${compileValue(value)}"
-      case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${compileValue(value)}"
-      case GreaterThanOrEqual(attr, value) => s"${quote(attr)} >= ${compileValue(value)}"
+        s"(NOT ($col != ${dialect.compileValue(value)} OR $col IS NULL OR " +
+          s"${dialect.compileValue(value)} IS NULL) OR " +
+          s"($col IS NULL AND ${dialect.compileValue(value)} IS NULL))"
+      case LessThan(attr, value) => s"${quote(attr)} < ${dialect.compileValue(value)}"
+      case GreaterThan(attr, value) => s"${quote(attr)} > ${dialect.compileValue(value)}"
+      case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${dialect.compileValue(value)}"
+      case GreaterThanOrEqual(attr, value) => s"${quote(attr)} >= ${dialect.compileValue(value)}"
       case IsNull(attr) => s"${quote(attr)} IS NULL"
       case IsNotNull(attr) => s"${quote(attr)} IS NOT NULL"
       case StringStartsWith(attr, value) => s"${quote(attr)} LIKE '${value}%'"
@@ -124,7 +109,7 @@ object JDBCRDD extends Logging {
       case StringContains(attr, value) => s"${quote(attr)} LIKE '%${value}%'"
       case In(attr, value) if value.isEmpty =>
         s"CASE WHEN ${quote(attr)} IS NULL THEN NULL ELSE FALSE END"
-      case In(attr, value) => s"${quote(attr)} IN (${compileValue(value)})"
+      case In(attr, value) => s"${quote(attr)} IN (${dialect.compileValue(value)})"
       case Not(f) => compileFilter(f, dialect).map(p => s"(NOT ($p))").getOrElse(null)
       case Or(f1, f2) =>
         // We can't compile Or filter unless both sub-filters are compiled successfully.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index a86a86d40890..7c38ed68c041 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Connection
+import java.sql.{Connection, Date, Timestamp}
+
+import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.annotation.{DeveloperApi, InterfaceStability, Since}
 import org.apache.spark.sql.types._
@@ -123,6 +125,29 @@ abstract class JdbcDialect extends Serializable {
   def beforeFetch(connection: Connection, properties: Map[String, String]): Unit = {
   }
 
+  /**
+   * Escape special characters in SQL string literals.
+   * @param value The string to be escaped.
+   * @return Escaped string.
+   */
+  @Since("2.3.0")
+  protected[jdbc] def escapeSql(value: String): String =
+    if (value == null) null else StringUtils.replace(value, "'", "''")
+
+  /**
+   * Converts value to SQL expression.
+   * @param value The value to be converted.
+   * @return Converted value.
+   */
+  @Since("2.3.0")
+  def compileValue(value: Any): Any = value match {
+    case stringValue: String => s"'${escapeSql(stringValue)}'"
+    case timestampValue: Timestamp => "'" + timestampValue + "'"
+    case dateValue: Date => "'" + dateValue + "'"
+    case arrayValue: Array[Any] => arrayValue.map(compileValue).mkString(", ")
+    case _ => value
+  }
+
   /**
    * Return Some[true] iff `TRUNCATE TABLE` causes cascading default.
    * Some[true] : TRUNCATE TABLE causes cascading.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 20e634c06b61..3b44c1de93a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Types
+import java.sql.{Date, Timestamp, Types}
 
 import org.apache.spark.sql.types._
 
@@ -64,5 +64,18 @@ private case object OracleDialect extends JdbcDialect {
     case _ => None
   }
 
+  override def compileValue(value: Any): Any = value match {
+    // The JDBC drivers support date literals in SQL statements written in the
+    // format: {d 'yyyy-mm-dd'} and timestamp literals in SQL statements written
+    // in the format: {ts 'yyyy-mm-dd hh:mm:ss.f...'}. For details, see
+    // 'Oracle Database JDBC Developer’s Guide and Reference, 11g Release 1 (11.1)'
+    // Appendix A Reference Information.
+    case stringValue: String => s"'${escapeSql(stringValue)}'"
+    case timestampValue: Timestamp => "{ts '" + timestampValue + "'}"
+    case dateValue: Date => "{d '" + dateValue + "'}"
+    case arrayValue: Array[Any] => arrayValue.map(compileValue).mkString(", ")
+    case _ => value
+  }
+
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 }

From d913db16a0de0983961f9d0c5f9b146be7226ac1 Mon Sep 17 00:00:00 2001
From: guoxiaolong <guo.xiaolong1@zte.com.cn>
Date: Mon, 3 Jul 2017 13:31:01 +0800
Subject: [PATCH 016/125] [SPARK-21250][WEB-UI] Add a url in the table of
 'Running Executors' in worker page to visit job page.

## What changes were proposed in this pull request?

Add a url in the table of 'Running Executors' in worker page to visit job page.

When I click URL of 'Name', the current page jumps to the job page. Of course this is only in the table of 'Running Executors'.

This URL of 'Name' is in the table of 'Finished Executors' does not exist, the click will not jump to any page.

fix before:
![1](https://user-images.githubusercontent.com/26266482/27679397-30ddc262-5ceb-11e7-839b-0889d1f42480.png)

fix after:
![2](https://user-images.githubusercontent.com/26266482/27679405-3588ef12-5ceb-11e7-9756-0a93815cd698.png)

## How was this patch tested?
manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolong <guo.xiaolong1@zte.com.cn>

Closes #18464 from guoxiaolongzte/SPARK-21250.
---
 .../apache/spark/deploy/worker/ui/WorkerPage.scala   | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
index 1ad973122b60..ea39b0dce0a4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
@@ -23,8 +23,8 @@ import scala.xml.Node
 
 import org.json4s.JValue
 
+import org.apache.spark.deploy.{ExecutorState, JsonProtocol}
 import org.apache.spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse}
-import org.apache.spark.deploy.JsonProtocol
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
 import org.apache.spark.ui.{UIUtils, WebUIPage}
@@ -112,7 +112,15 @@ private[ui] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
       <td>
         <ul class="unstyled">
           <li><strong>ID:</strong> {executor.appId}</li>
-          <li><strong>Name:</strong> {executor.appDesc.name}</li>
+          <li><strong>Name:</strong>
+          {
+            if ({executor.state == ExecutorState.RUNNING} && executor.appDesc.appUiUrl.nonEmpty) {
+              <a href={executor.appDesc.appUiUrl}> {executor.appDesc.name}</a>
+            } else {
+              {executor.appDesc.name}
+            }
+          }
+          </li>
           <li><strong>User:</strong> {executor.appDesc.user}</li>
         </ul>
       </td>

From a9339db99f0620d4828eb903523be55dfbf2fb64 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 3 Jul 2017 19:52:39 +0800
Subject: [PATCH 017/125] [SPARK-21137][CORE] Spark reads many small files
 slowly

## What changes were proposed in this pull request?

Parallelize FileInputFormat.listStatus in Hadoop API via LIST_STATUS_NUM_THREADS to speed up examination of file sizes for wholeTextFiles et al

## How was this patch tested?

Existing tests, which will exercise the key path here: using a local file system.

Author: Sean Owen <sowen@cloudera.com>

Closes #18441 from srowen/SPARK-21137.
---
 .../main/scala/org/apache/spark/rdd/BinaryFileRDD.scala    | 7 ++++++-
 .../main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
index 50d977a92da5..a14bad47dfe1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -20,6 +20,7 @@ package org.apache.spark.rdd
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 import org.apache.hadoop.mapreduce.task.JobContextImpl
 
 import org.apache.spark.{Partition, SparkContext}
@@ -35,8 +36,12 @@ private[spark] class BinaryFileRDD[T](
   extends NewHadoopRDD[String, T](sc, inputFormatClass, keyClass, valueClass, conf) {
 
   override def getPartitions: Array[Partition] = {
-    val inputFormat = inputFormatClass.newInstance
     val conf = getConf
+    // setMinPartitions below will call FileInputFormat.listStatus(), which can be quite slow when
+    // traversing a large number of directories and files. Parallelize it.
+    conf.setIfUnset(FileInputFormat.LIST_STATUS_NUM_THREADS,
+      Runtime.getRuntime.availableProcessors().toString)
+    val inputFormat = inputFormatClass.newInstance
     inputFormat match {
       case configurable: Configurable =>
         configurable.setConf(conf)
diff --git a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
index 8e1baae796fc..9f3d0745c33c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala
@@ -20,6 +20,7 @@ package org.apache.spark.rdd
 import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.{Text, Writable}
 import org.apache.hadoop.mapreduce.InputSplit
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 import org.apache.hadoop.mapreduce.task.JobContextImpl
 
 import org.apache.spark.{Partition, SparkContext}
@@ -38,8 +39,12 @@ private[spark] class WholeTextFileRDD(
   extends NewHadoopRDD[Text, Text](sc, inputFormatClass, keyClass, valueClass, conf) {
 
   override def getPartitions: Array[Partition] = {
-    val inputFormat = inputFormatClass.newInstance
     val conf = getConf
+    // setMinPartitions below will call FileInputFormat.listStatus(), which can be quite slow when
+    // traversing a large number of directories and files. Parallelize it.
+    conf.setIfUnset(FileInputFormat.LIST_STATUS_NUM_THREADS,
+      Runtime.getRuntime.availableProcessors().toString)
+    val inputFormat = inputFormatClass.newInstance
     inputFormat match {
       case configurable: Configurable =>
         configurable.setConf(conf)

From eb7a5a66bbd5837c01f13c76b68de2a6034976f3 Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wzh_zju@163.com>
Date: Mon, 3 Jul 2017 09:01:42 -0700
Subject: [PATCH 018/125] [TEST] Load test table based on case sensitivity

## What changes were proposed in this pull request?

It is strange that we will get "table not found" error if **the first sql** uses upper case table names, when developers write tests with `TestHiveSingleton`, **although case insensitivity**. This is because in `TestHiveQueryExecution`, test tables are loaded based on exact matching instead of case sensitivity.

## How was this patch tested?

Added a new test case.

Author: Zhenhua Wang <wzh_zju@163.com>

Closes #18504 from wzhfy/testHive.
---
 .../apache/spark/sql/hive/test/TestHive.scala |  7 ++-
 .../apache/spark/sql/hive/TestHiveSuite.scala | 45 +++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 4e1792321c89..801f9b992364 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -449,6 +449,8 @@ private[hive] class TestHiveSparkSession(
 
   private val loadedTables = new collection.mutable.HashSet[String]
 
+  def getLoadedTables: collection.mutable.HashSet[String] = loadedTables
+
   def loadTestTable(name: String) {
     if (!(loadedTables contains name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
@@ -553,7 +555,10 @@ private[hive] class TestHiveQueryExecution(
     val referencedTables =
       describedTables ++
         logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.table }
-    val referencedTestTables = referencedTables.filter(sparkSession.testTables.contains)
+    val resolver = sparkSession.sessionState.conf.resolver
+    val referencedTestTables = sparkSession.testTables.keys.filter { testTable =>
+      referencedTables.exists(resolver(_, testTable))
+    }
     logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
     referencedTestTables.foreach(sparkSession.loadTestTable)
     // Proceed with analysis.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala
new file mode 100644
index 000000000000..193fa83dbad9
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.hive.test.{TestHiveSingleton, TestHiveSparkSession}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+
+
+class TestHiveSuite extends TestHiveSingleton with SQLTestUtils {
+  test("load test table based on case sensitivity") {
+    val testHiveSparkSession = spark.asInstanceOf[TestHiveSparkSession]
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      sql("SELECT * FROM SRC").queryExecution.analyzed
+      assert(testHiveSparkSession.getLoadedTables.contains("src"))
+      assert(testHiveSparkSession.getLoadedTables.size == 1)
+    }
+    testHiveSparkSession.reset()
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val err = intercept[AnalysisException] {
+        sql("SELECT * FROM SRC").queryExecution.analyzed
+      }
+      assert(err.message.contains("Table or view not found"))
+    }
+    testHiveSparkSession.reset()
+  }
+}

From 17bdc36ef16a544b693c628db276fe32db87fe7a Mon Sep 17 00:00:00 2001
From: aokolnychyi <anton.okolnychyi@sap.com>
Date: Mon, 3 Jul 2017 09:35:49 -0700
Subject: [PATCH 019/125] [SPARK-21102][SQL] Refresh command is too aggressive
 in parsing

### Idea

This PR adds validation to REFRESH sql statements. Currently, users can specify whatever they want as resource path. For example, spark.sql("REFRESH ! $ !") will be executed without any exceptions.

### Implementation

I am not sure that my current implementation is the most optimal, so any feedback is appreciated. My first idea was to make the grammar as strict as possible. Unfortunately, there were some problems. I tried the approach below:

SqlBase.g4
```
...
    | REFRESH TABLE tableIdentifier                                    #refreshTable
    | REFRESH resourcePath                                             #refreshResource
...

resourcePath
    : STRING
    | (IDENTIFIER | number | nonReserved | '/' | '-')+ // other symbols can be added if needed
    ;
```
It is not flexible enough and requires to explicitly mention all possible symbols. Therefore, I came up with the current approach that is implemented in the code.

Let me know your opinion on which one is better.

Author: aokolnychyi <anton.okolnychyi@sap.com>

Closes #18368 from aokolnychyi/spark-21102.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  2 +-
 .../spark/sql/execution/SparkSqlParser.scala  | 20 +++++++++++++++---
 .../sql/execution/SparkSqlParserSuite.scala   | 21 ++++++++++++++++++-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 7ffa15009633..29f554451ed4 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -149,7 +149,7 @@ statement
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
         tableIdentifier partitionSpec? describeColName?                #describeTable
     | REFRESH TABLE tableIdentifier                                    #refreshTable
-    | REFRESH .*?                                                      #refreshResource
+    | REFRESH (STRING | .*?)                                           #refreshResource
     | CACHE LAZY? TABLE tableIdentifier (AS? query)?                   #cacheTable
     | UNCACHE TABLE (IF EXISTS)? tableIdentifier                       #uncacheTable
     | CLEAR CACHE                                                      #clearCache
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 3c58c6e1b678..2b79eb5eac0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -230,11 +230,25 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   }
 
   /**
-   * Create a [[RefreshTable]] logical plan.
+   * Create a [[RefreshResource]] logical plan.
    */
   override def visitRefreshResource(ctx: RefreshResourceContext): LogicalPlan = withOrigin(ctx) {
-    val resourcePath = remainder(ctx.REFRESH.getSymbol).trim
-    RefreshResource(resourcePath)
+    val path = if (ctx.STRING != null) string(ctx.STRING) else extractUnquotedResourcePath(ctx)
+    RefreshResource(path)
+  }
+
+  private def extractUnquotedResourcePath(ctx: RefreshResourceContext): String = withOrigin(ctx) {
+    val unquotedPath = remainder(ctx.REFRESH.getSymbol).trim
+    validate(
+      unquotedPath != null && !unquotedPath.isEmpty,
+      "Resource paths cannot be empty in REFRESH statements. Use / to match everything",
+      ctx)
+    val forbiddenSymbols = Seq(" ", "\n", "\r", "\t")
+    validate(
+      !forbiddenSymbols.exists(unquotedPath.contains(_)),
+      "REFRESH statements cannot contain ' ', '\\n', '\\r', '\\t' inside unquoted resource paths",
+      ctx)
+    unquotedPath
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index bd9c2ebd6fab..d238c76fbeef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource}
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
 
@@ -66,6 +66,25 @@ class SparkSqlParserSuite extends AnalysisTest {
     }
   }
 
+  test("refresh resource") {
+    assertEqual("REFRESH prefix_path", RefreshResource("prefix_path"))
+    assertEqual("REFRESH /", RefreshResource("/"))
+    assertEqual("REFRESH /path///a", RefreshResource("/path///a"))
+    assertEqual("REFRESH pat1h/112/_1a", RefreshResource("pat1h/112/_1a"))
+    assertEqual("REFRESH pat1h/112/_1a/a-1", RefreshResource("pat1h/112/_1a/a-1"))
+    assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash"))
+    assertEqual("REFRESH \'path with space\'", RefreshResource("path with space"))
+    assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with space 2"))
+    intercept("REFRESH a b", "REFRESH statements cannot contain")
+    intercept("REFRESH a\tb", "REFRESH statements cannot contain")
+    intercept("REFRESH a\nb", "REFRESH statements cannot contain")
+    intercept("REFRESH a\rb", "REFRESH statements cannot contain")
+    intercept("REFRESH a\r\nb", "REFRESH statements cannot contain")
+    intercept("REFRESH @ $a$", "REFRESH statements cannot contain")
+    intercept("REFRESH  ", "Resource paths cannot be empty in REFRESH statements")
+    intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
+  }
+
   test("show functions") {
     assertEqual("show functions", ShowFunctionsCommand(None, None, true, true))
     assertEqual("show all functions", ShowFunctionsCommand(None, None, true, true))

From 363bfe30ba44852a8fac946a37032f76480f6f1b Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 3 Jul 2017 10:14:03 -0700
Subject: [PATCH 020/125] [SPARK-20073][SQL] Prints an explicit warning message
 in case of NULL-safe equals

## What changes were proposed in this pull request?
This pr added code to print the same warning messages with `===` cases when using NULL-safe equals (`<=>`).

## How was this patch tested?
Existing tests.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #18436 from maropu/SPARK-20073.
---
 .../src/main/scala/org/apache/spark/sql/Column.scala   | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 7e1f1d83cb3d..bd1669b6dba6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -464,7 +464,15 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 1.3.0
    */
-  def <=> (other: Any): Column = withExpr { EqualNullSafe(expr, lit(other).expr) }
+  def <=> (other: Any): Column = withExpr {
+    val right = lit(other).expr
+    if (this.expr == right) {
+      logWarning(
+        s"Constructing trivially true equals predicate, '${this.expr} <=> $right'. " +
+          "Perhaps you need to use aliases.")
+    }
+    EqualNullSafe(expr, right)
+  }
 
   /**
    * Equality test that is safe for null values.

From f953ca56eccdaef29ac580d44613a028415ba3f5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 3 Jul 2017 10:51:44 -0700
Subject: [PATCH 021/125] [SPARK-21284][SQL] rename
 SessionCatalog.registerFunction parameter name

## What changes were proposed in this pull request?

Looking at the code in `SessionCatalog.registerFunction`, the parameter `ignoreIfExists` is a wrong name. When `ignoreIfExists` is true, we will override the function if it already exists. So `overrideIfExists` should be the corrected name.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18510 from cloud-fan/minor.
---
 .../sql/catalyst/catalog/SessionCatalog.scala |  6 +++---
 .../catalog/SessionCatalogSuite.scala         | 20 ++++++++++---------
 .../sql/execution/command/functions.scala     |  3 +--
 .../spark/sql/internal/CatalogSuite.scala     |  2 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  2 +-
 .../ObjectHashAggregateExecBenchmark.scala    |  2 +-
 6 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 7ece77df7fc1..a86604e4353a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1104,10 +1104,10 @@ class SessionCatalog(
    */
   def registerFunction(
       funcDefinition: CatalogFunction,
-      ignoreIfExists: Boolean,
+      overrideIfExists: Boolean,
       functionBuilder: Option[FunctionBuilder] = None): Unit = {
     val func = funcDefinition.identifier
-    if (functionRegistry.functionExists(func) && !ignoreIfExists) {
+    if (functionRegistry.functionExists(func) && !overrideIfExists) {
       throw new AnalysisException(s"Function $func already exists")
     }
     val info = new ExpressionInfo(funcDefinition.className, func.database.orNull, func.funcName)
@@ -1219,7 +1219,7 @@ class SessionCatalog(
     // catalog. So, it is possible that qualifiedName is not exactly the same as
     // catalogFunction.identifier.unquotedString (difference is on case-sensitivity).
     // At here, we preserve the input from the user.
-    registerFunction(catalogFunction.copy(identifier = qualifiedName), ignoreIfExists = false)
+    registerFunction(catalogFunction.copy(identifier = qualifiedName), overrideIfExists = false)
     // Now, we need to create the Expression.
     functionRegistry.lookupFunction(qualifiedName, children)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index fc3893e19779..8f856a0daad1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1175,9 +1175,9 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       val tempFunc1 = (e: Seq[Expression]) => e.head
       val tempFunc2 = (e: Seq[Expression]) => e.last
       catalog.registerFunction(
-        newFunc("temp1", None), ignoreIfExists = false, functionBuilder = Some(tempFunc1))
+        newFunc("temp1", None), overrideIfExists = false, functionBuilder = Some(tempFunc1))
       catalog.registerFunction(
-        newFunc("temp2", None), ignoreIfExists = false, functionBuilder = Some(tempFunc2))
+        newFunc("temp2", None), overrideIfExists = false, functionBuilder = Some(tempFunc2))
       val arguments = Seq(Literal(1), Literal(2), Literal(3))
       assert(catalog.lookupFunction(FunctionIdentifier("temp1"), arguments) === Literal(1))
       assert(catalog.lookupFunction(FunctionIdentifier("temp2"), arguments) === Literal(3))
@@ -1189,12 +1189,12 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       // Temporary function already exists
       val e = intercept[AnalysisException] {
         catalog.registerFunction(
-          newFunc("temp1", None), ignoreIfExists = false, functionBuilder = Some(tempFunc3))
+          newFunc("temp1", None), overrideIfExists = false, functionBuilder = Some(tempFunc3))
       }.getMessage
       assert(e.contains("Function temp1 already exists"))
       // Temporary function is overridden
       catalog.registerFunction(
-        newFunc("temp1", None), ignoreIfExists = true, functionBuilder = Some(tempFunc3))
+        newFunc("temp1", None), overrideIfExists = true, functionBuilder = Some(tempFunc3))
       assert(
         catalog.lookupFunction(
           FunctionIdentifier("temp1"), arguments) === Literal(arguments.length))
@@ -1208,7 +1208,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
 
       val tempFunc1 = (e: Seq[Expression]) => e.head
       catalog.registerFunction(
-        newFunc("temp1", None), ignoreIfExists = false, functionBuilder = Some(tempFunc1))
+        newFunc("temp1", None), overrideIfExists = false, functionBuilder = Some(tempFunc1))
 
       // Returns true when the function is temporary
       assert(catalog.isTemporaryFunction(FunctionIdentifier("temp1")))
@@ -1259,7 +1259,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     withBasicCatalog { catalog =>
       val tempFunc = (e: Seq[Expression]) => e.head
       catalog.registerFunction(
-        newFunc("func1", None), ignoreIfExists = false, functionBuilder = Some(tempFunc))
+        newFunc("func1", None), overrideIfExists = false, functionBuilder = Some(tempFunc))
       val arguments = Seq(Literal(1), Literal(2), Literal(3))
       assert(catalog.lookupFunction(FunctionIdentifier("func1"), arguments) === Literal(1))
       catalog.dropTempFunction("func1", ignoreIfNotExists = false)
@@ -1300,7 +1300,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     withBasicCatalog { catalog =>
       val tempFunc1 = (e: Seq[Expression]) => e.head
       catalog.registerFunction(
-        newFunc("func1", None), ignoreIfExists = false, functionBuilder = Some(tempFunc1))
+        newFunc("func1", None), overrideIfExists = false, functionBuilder = Some(tempFunc1))
       assert(catalog.lookupFunction(
         FunctionIdentifier("func1"), Seq(Literal(1), Literal(2), Literal(3))) == Literal(1))
       catalog.dropTempFunction("func1", ignoreIfNotExists = false)
@@ -1318,8 +1318,10 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       val tempFunc2 = (e: Seq[Expression]) => e.last
       catalog.createFunction(newFunc("func2", Some("db2")), ignoreIfExists = false)
       catalog.createFunction(newFunc("not_me", Some("db2")), ignoreIfExists = false)
-      catalog.registerFunction(funcMeta1, ignoreIfExists = false, functionBuilder = Some(tempFunc1))
-      catalog.registerFunction(funcMeta2, ignoreIfExists = false, functionBuilder = Some(tempFunc2))
+      catalog.registerFunction(
+        funcMeta1, overrideIfExists = false, functionBuilder = Some(tempFunc1))
+      catalog.registerFunction(
+        funcMeta2, overrideIfExists = false, functionBuilder = Some(tempFunc2))
       assert(catalog.listFunctions("db1", "*").map(_._1).toSet ==
         Set(FunctionIdentifier("func1"),
           FunctionIdentifier("yes_me")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index f39a3269efaf..a91ad413f4d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -58,9 +58,8 @@ case class CreateFunctionCommand(
           s"is not allowed: '${databaseName.get}'")
       }
       // We first load resources and then put the builder in the function registry.
-      // Please note that it is allowed to overwrite an existing temp function.
       catalog.loadFunctionResources(resources)
-      catalog.registerFunction(func, ignoreIfExists = false)
+      catalog.registerFunction(func, overrideIfExists = false)
     } else {
       // For a permanent, we will store the metadata into underlying external catalog.
       // This function will be loaded into the FunctionRegistry when a query uses it.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index b2d568ce320e..6acac1a9aa31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -79,7 +79,7 @@ class CatalogSuite
     val tempFunc = (e: Seq[Expression]) => e.head
     val funcMeta = CatalogFunction(FunctionIdentifier(name, None), "className", Nil)
     sessionCatalog.registerFunction(
-      funcMeta, ignoreIfExists = false, functionBuilder = Some(tempFunc))
+      funcMeta, overrideIfExists = false, functionBuilder = Some(tempFunc))
   }
 
   private def dropFunction(name: String, db: Option[String] = None): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index da87f0218e3a..0d0269f69430 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -161,7 +161,7 @@ private[sql] class HiveSessionCatalog(
             FunctionIdentifier(functionName.toLowerCase(Locale.ROOT), database)
           val func = CatalogFunction(functionIdentifier, className, Nil)
           // Put this Hive built-in function to our function registry.
-          registerFunction(func, ignoreIfExists = false)
+          registerFunction(func, overrideIfExists = false)
           // Now, we need to create the Expression.
           functionRegistry.lookupFunction(functionIdentifier, children)
         }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 73383ae4d411..e599d1ab1d48 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -221,7 +221,7 @@ class ObjectHashAggregateExecBenchmark extends BenchmarkBase with TestHiveSingle
     val sessionCatalog = sparkSession.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
     val functionIdentifier = FunctionIdentifier(functionName, database = None)
     val func = CatalogFunction(functionIdentifier, clazz.getName, resources = Nil)
-    sessionCatalog.registerFunction(func, ignoreIfExists = false)
+    sessionCatalog.registerFunction(func, overrideIfExists = false)
   }
 
   private def percentile_approx(

From c79c10ebaf3d63b697b8d6d1a7e55aa2d406af69 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 3 Jul 2017 16:18:54 -0700
Subject: [PATCH 022/125] [TEST] Different behaviors of SparkContext Conf when
 building SparkSession

## What changes were proposed in this pull request?
If the created ACTIVE sparkContext is not EXPLICITLY passed through the Builder's API `sparkContext()`, the conf of this sparkContext will also contain the conf set through the API `config()`; otherwise, the conf of this sparkContext will NOT contain the conf set through the API `config()`

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18517 from gatorsmile/fixTestCase2.
---
 .../spark/sql/SparkSessionBuilderSuite.scala  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 386d13d07a95..4f6d5f79d466 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -98,12 +98,31 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     val session = SparkSession.builder().config("key2", "value2").getOrCreate()
     assert(session.conf.get("key1") == "value1")
     assert(session.conf.get("key2") == "value2")
+    assert(session.sparkContext == sparkContext2)
     assert(session.sparkContext.conf.get("key1") == "value1")
+    // If the created sparkContext is not passed through the Builder's API sparkContext,
+    // the conf of this sparkContext will also contain the conf set through the API config.
     assert(session.sparkContext.conf.get("key2") == "value2")
     assert(session.sparkContext.conf.get("spark.app.name") == "test")
     session.stop()
   }
 
+  test("create SparkContext first then pass context to SparkSession") {
+    sparkContext.stop()
+    val conf = new SparkConf().setAppName("test").setMaster("local").set("key1", "value1")
+    val newSC = new SparkContext(conf)
+    val session = SparkSession.builder().sparkContext(newSC).config("key2", "value2").getOrCreate()
+    assert(session.conf.get("key1") == "value1")
+    assert(session.conf.get("key2") == "value2")
+    assert(session.sparkContext == newSC)
+    assert(session.sparkContext.conf.get("key1") == "value1")
+    // If the created sparkContext is passed through the Builder's API sparkContext,
+    // the conf of this sparkContext will not contain the conf set through the API config.
+    assert(!session.sparkContext.conf.contains("key2"))
+    assert(session.sparkContext.conf.get("spark.app.name") == "test")
+    session.stop()
+  }
+
   test("SPARK-15887: hive-site.xml should be loaded") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true")

From 6657e00de36b59011d3fe78e8613fb64e54c957a Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Tue, 4 Jul 2017 09:16:40 +0800
Subject: [PATCH 023/125] [SPARK-21283][CORE] FileOutputStream should be
 created as append mode

## What changes were proposed in this pull request?

`FileAppender` is used to write `stderr` and `stdout` files  in `ExecutorRunner`, But before writing `ErrorStream` into the the `stderr` file, the header information has been written into ,if  FileOutputStream is  not created as append mode, the  header information will be lost

## How was this patch tested?
unit test case

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18507 from 10110346/wip-lx-0703.
---
 .../scala/org/apache/spark/util/logging/FileAppender.scala   | 2 +-
 .../test/scala/org/apache/spark/util/FileAppenderSuite.scala | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index fdb1495899bc..8a0cc709bccc 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -94,7 +94,7 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
 
   /** Open the file output stream */
   protected def openFile() {
-    outputStream = new FileOutputStream(file, false)
+    outputStream = new FileOutputStream(file, true)
     logDebug(s"Opened file $file")
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 7e2da8e14153..cd0ed5b036bf 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -52,10 +52,13 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
   test("basic file appender") {
     val testString = (1 to 1000).mkString(", ")
     val inputStream = new ByteArrayInputStream(testString.getBytes(StandardCharsets.UTF_8))
+    // The `header` should not be covered
+    val header = "Add header"
+    Files.write(header, testFile, StandardCharsets.UTF_8)
     val appender = new FileAppender(inputStream, testFile)
     inputStream.close()
     appender.awaitTermination()
-    assert(Files.toString(testFile, StandardCharsets.UTF_8) === testString)
+    assert(Files.toString(testFile, StandardCharsets.UTF_8) === header + testString)
   }
 
   test("rolling file appender - time-based rolling") {

From a848d552ef6b5d0d3bb3b2da903478437a8b10aa Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 4 Jul 2017 11:35:08 +0900
Subject: [PATCH 024/125] [SPARK-21264][PYTHON] Call cross join path in join
 without 'on' and with 'how'

## What changes were proposed in this pull request?

Currently, it throws a NPE when missing columns but join type is speicified in join at PySpark as below:

```python
spark.conf.set("spark.sql.crossJoin.enabled", "false")
spark.range(1).join(spark.range(1), how="inner").show()
```

```
Traceback (most recent call last):
...
py4j.protocol.Py4JJavaError: An error occurred while calling o66.join.
: java.lang.NullPointerException
	at org.apache.spark.sql.Dataset.join(Dataset.scala:931)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
...
```

```python
spark.conf.set("spark.sql.crossJoin.enabled", "true")
spark.range(1).join(spark.range(1), how="inner").show()
```

```
...
py4j.protocol.Py4JJavaError: An error occurred while calling o84.join.
: java.lang.NullPointerException
	at org.apache.spark.sql.Dataset.join(Dataset.scala:931)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
...
```

This PR suggests to follow Scala's one as below:

```scala
scala> spark.conf.set("spark.sql.crossJoin.enabled", "false")

scala> spark.range(1).join(spark.range(1), Seq.empty[String], "inner").show()
```

```
org.apache.spark.sql.AnalysisException: Detected cartesian product for INNER join between logical plans
Range (0, 1, step=1, splits=Some(8))
and
Range (0, 1, step=1, splits=Some(8))
Join condition is missing or trivial.
Use the CROSS JOIN syntax to allow cartesian products between these relations.;
...
```

```scala
scala> spark.conf.set("spark.sql.crossJoin.enabled", "true")

scala> spark.range(1).join(spark.range(1), Seq.empty[String], "inner").show()
```
```
+---+---+
| id| id|
+---+---+
|  0|  0|
+---+---+
```

**After**

```python
spark.conf.set("spark.sql.crossJoin.enabled", "false")
spark.range(1).join(spark.range(1), how="inner").show()
```

```
Traceback (most recent call last):
...
pyspark.sql.utils.AnalysisException: u'Detected cartesian product for INNER join between logical plans\nRange (0, 1, step=1, splits=Some(8))\nand\nRange (0, 1, step=1, splits=Some(8))\nJoin condition is missing or trivial.\nUse the CROSS JOIN syntax to allow cartesian products between these relations.;'
```

```python
spark.conf.set("spark.sql.crossJoin.enabled", "true")
spark.range(1).join(spark.range(1), how="inner").show()
```
```
+---+---+
| id| id|
+---+---+
|  0|  0|
+---+---+
```

## How was this patch tested?

Added tests in `python/pyspark/sql/tests.py`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18484 from HyukjinKwon/SPARK-21264.
---
 python/pyspark/sql/dataframe.py |  2 ++
 python/pyspark/sql/tests.py     | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 0649271ed224..27a6dad8917d 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -833,6 +833,8 @@ def join(self, other, on=None, how=None):
         else:
             if how is None:
                 how = "inner"
+            if on is None:
+                on = self._jseq([])
             assert isinstance(how, basestring), "how should be basestring"
             jdf = self._jdf.join(other._jdf, on, how)
         return DataFrame(jdf, self.sql_ctx)
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 0a1cd6856b8e..c105969b26b9 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -2021,6 +2021,22 @@ def test_toDF_with_schema_string(self):
         self.assertEqual(df.schema.simpleString(), "struct<value:int>")
         self.assertEqual(df.collect(), [Row(key=i) for i in range(100)])
 
+    def test_join_without_on(self):
+        df1 = self.spark.range(1).toDF("a")
+        df2 = self.spark.range(1).toDF("b")
+
+        try:
+            self.spark.conf.set("spark.sql.crossJoin.enabled", "false")
+            self.assertRaises(AnalysisException, lambda: df1.join(df2, how="inner").collect())
+
+            self.spark.conf.set("spark.sql.crossJoin.enabled", "true")
+            actual = df1.join(df2, how="inner").collect()
+            expected = [Row(a=0, b=0)]
+            self.assertEqual(actual, expected)
+        finally:
+            # We should unset this. Otherwise, other tests are affected.
+            self.spark.conf.unset("spark.sql.crossJoin.enabled")
+
     # Regression test for invalid join methods when on is None, Spark-14761
     def test_invalid_join_method(self):
         df1 = self.spark.createDataFrame([("Alice", 5), ("Bob", 8)], ["name", "age"])

From 8ca4ebefa6301d9cb633ea15cf71f49c2d7f8607 Mon Sep 17 00:00:00 2001
From: Thomas Decaux <ebuildy@gmail.com>
Date: Tue, 4 Jul 2017 12:17:48 +0100
Subject: [PATCH 025/125] [MINOR] Add french stop word "les"

## What changes were proposed in this pull request?

Added "les" as french stop word (plurial of le)

Author: Thomas Decaux <ebuildy@gmail.com>

Closes #18514 from ebuildy/patch-1.
---
 .../resources/org/apache/spark/ml/feature/stopwords/french.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt b/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
index 94b8f8f39a3e..a59a0424616c 100644
--- a/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
+++ b/mllib/src/main/resources/org/apache/spark/ml/feature/stopwords/french.txt
@@ -15,6 +15,7 @@ il
 je
 la
 le
+les
 leur
 lui
 ma
@@ -152,4 +153,4 @@ eusses
 eût
 eussions
 eussiez
-eussent
\ No newline at end of file
+eussent

From 2b1e94b9add82b30bc94f639fa97492624bf0dce Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 4 Jul 2017 12:18:42 +0100
Subject: [PATCH 026/125] [MINOR][SPARK SUBMIT] Print out R file usage in
 spark-submit

## What changes were proposed in this pull request?

Currently, running the shell below:

```bash
$ ./bin/spark-submit tmp.R a b c
```

with R file, `tmp.R` as below:

```r
#!/usr/bin/env Rscript

library(SparkR)
sparkRSQL.init(sparkR.init(master = "local"))
collect(createDataFrame(list(list(1))))
print(commandArgs(trailingOnly = TRUE))
```

working fine as below:

```bash
  _1
1  1
[1] "a" "b" "c"
```

However, it looks not printed in usage documentation as below:

```bash
$ ./bin/spark-submit
```

```
Usage: spark-submit [options] <app jar | python file> [app arguments]
...
```

For `./bin/sparkR`, it looks fine as below:

```bash
$ ./bin/sparkR tmp.R
```

```
Running R applications through 'sparkR' is not supported as of Spark 2.0.
Use ./bin/spark-submit <R file>
```

Running the script below:

```bash
$ ./bin/spark-submit
```

**Before**

```
Usage: spark-submit [options] <app jar | python file> [app arguments]
...
```

**After**

```
Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
...
```

## How was this patch tested?

Manually tested.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18505 from HyukjinKwon/minor-doc-summit.
---
 .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 3d9a14c51618..7800d3d624e3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -504,7 +504,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       outStream.println("Unknown/unsupported param " + unknownParam)
     }
     val command = sys.env.get("_SPARK_CMD_USAGE").getOrElse(
-      """Usage: spark-submit [options] <app jar | python file> [app arguments]
+      """Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
         |Usage: spark-submit --kill [submission ID] --master [spark://...]
         |Usage: spark-submit --status [submission ID] --master [spark://...]
         |Usage: spark-submit run-example [options] example-class [example args]""".stripMargin)

From d492cc5a21cd67b3999b85d97f5c41c3734b1ba3 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 4 Jul 2017 20:45:58 +0800
Subject: [PATCH 027/125] [SPARK-19507][SPARK-21296][PYTHON] Avoid per-record
 type dispatch in schema verification and improve exception message

## What changes were proposed in this pull request?
**Context**

While reviewing https://github.com/apache/spark/pull/17227, I realised here we type-dispatch per record. The PR itself is fine in terms of performance as is but this prints a prefix, `"obj"` in exception message as below:

```
from pyspark.sql.types import *
schema = StructType([StructField('s', IntegerType(), nullable=False)])
spark.createDataFrame([["1"]], schema)
...
TypeError: obj.s: IntegerType can not accept object '1' in type <type 'str'>
```

I suggested to get rid of this but during investigating this, I realised my approach might bring a performance regression as it is a hot path.

Only for SPARK-19507 and https://github.com/apache/spark/pull/17227, It needs more changes to cleanly get rid of the prefix and I rather decided to fix both issues together.

**Propersal**

This PR tried to

  - get rid of per-record type dispatch as we do in many code paths in Scala  so that it improves the performance (roughly ~25% improvement) - SPARK-21296

    This was tested with a simple code `spark.createDataFrame(range(1000000), "int")`. However, I am quite sure the actual improvement in practice is larger than this, in particular, when the schema is complicated.

   - improve error message in exception describing field information as prose - SPARK-19507

## How was this patch tested?

Manually tested and unit tests were added in `python/pyspark/sql/tests.py`.

Benchmark - codes: https://gist.github.com/HyukjinKwon/c3397469c56cb26c2d7dd521ed0bc5a3
Error message - codes: https://gist.github.com/HyukjinKwon/b1b2c7f65865444c4a8836435100e398

**Before**

Benchmark:
  - Results: https://gist.github.com/HyukjinKwon/4a291dab45542106301a0c1abcdca924

Error message
  - Results: https://gist.github.com/HyukjinKwon/57b1916395794ce924faa32b14a3fe19

**After**

Benchmark
  - Results: https://gist.github.com/HyukjinKwon/21496feecc4a920e50c4e455f836266e

Error message
  - Results: https://gist.github.com/HyukjinKwon/7a494e4557fe32a652ce1236e504a395

Closes #17227

Author: hyukjinkwon <gurwls223@gmail.com>
Author: David Gingrich <david@textio.com>

Closes #18521 from HyukjinKwon/python-type-dispatch.
---
 python/pyspark/rdd.py         |   1 -
 python/pyspark/sql/session.py |  12 +-
 python/pyspark/sql/tests.py   | 203 ++++++++++++++++++++++++++++++-
 python/pyspark/sql/types.py   | 219 +++++++++++++++++++++++-----------
 4 files changed, 352 insertions(+), 83 deletions(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 60141792d499..7dfa17f68a94 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -627,7 +627,6 @@ def sortPartition(iterator):
     def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         """
         Sorts this RDD, which is assumed to consist of (key, value) pairs.
-        # noqa
 
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
         >>> sc.parallelize(tmp).sortByKey().first()
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index e3bf0f35ea15..2cc0e2d1d7b8 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -33,7 +33,7 @@
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.readwriter import DataFrameReader
 from pyspark.sql.streaming import DataStreamReader
-from pyspark.sql.types import Row, DataType, StringType, StructType, _verify_type, \
+from pyspark.sql.types import Row, DataType, StringType, StructType, _make_type_verifier, \
     _infer_schema, _has_nulltype, _merge_type, _create_converter, _parse_datatype_string
 from pyspark.sql.utils import install_exception_handler
 
@@ -514,17 +514,21 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
                 schema = [str(x) for x in data.columns]
             data = [r.tolist() for r in data.to_records(index=False)]
 
-        verify_func = _verify_type if verifySchema else lambda _, t: True
         if isinstance(schema, StructType):
+            verify_func = _make_type_verifier(schema) if verifySchema else lambda _: True
+
             def prepare(obj):
-                verify_func(obj, schema)
+                verify_func(obj)
                 return obj
         elif isinstance(schema, DataType):
             dataType = schema
             schema = StructType().add("value", schema)
 
+            verify_func = _make_type_verifier(
+                dataType, name="field value") if verifySchema else lambda _: True
+
             def prepare(obj):
-                verify_func(obj, dataType)
+                verify_func(obj)
                 return obj,
         else:
             if isinstance(schema, list):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index c105969b26b9..16ba8bd73f40 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -57,7 +57,7 @@
 from pyspark import SparkContext
 from pyspark.sql import SparkSession, SQLContext, HiveContext, Column, Row
 from pyspark.sql.types import *
-from pyspark.sql.types import UserDefinedType, _infer_type
+from pyspark.sql.types import UserDefinedType, _infer_type, _make_type_verifier
 from pyspark.tests import ReusedPySparkTestCase, SparkSubmitTests
 from pyspark.sql.functions import UserDefinedFunction, sha2, lit
 from pyspark.sql.window import Window
@@ -852,7 +852,7 @@ def test_convert_row_to_dict(self):
         self.assertEqual(1.0, row.asDict()['d']['key'].c)
 
     def test_udt(self):
-        from pyspark.sql.types import _parse_datatype_json_string, _infer_type, _verify_type
+        from pyspark.sql.types import _parse_datatype_json_string, _infer_type, _make_type_verifier
         from pyspark.sql.tests import ExamplePointUDT, ExamplePoint
 
         def check_datatype(datatype):
@@ -868,8 +868,8 @@ def check_datatype(datatype):
         check_datatype(structtype_with_udt)
         p = ExamplePoint(1.0, 2.0)
         self.assertEqual(_infer_type(p), ExamplePointUDT())
-        _verify_type(ExamplePoint(1.0, 2.0), ExamplePointUDT())
-        self.assertRaises(ValueError, lambda: _verify_type([1.0, 2.0], ExamplePointUDT()))
+        _make_type_verifier(ExamplePointUDT())(ExamplePoint(1.0, 2.0))
+        self.assertRaises(ValueError, lambda: _make_type_verifier(ExamplePointUDT())([1.0, 2.0]))
 
         check_datatype(PythonOnlyUDT())
         structtype_with_udt = StructType([StructField("label", DoubleType(), False),
@@ -877,8 +877,10 @@ def check_datatype(datatype):
         check_datatype(structtype_with_udt)
         p = PythonOnlyPoint(1.0, 2.0)
         self.assertEqual(_infer_type(p), PythonOnlyUDT())
-        _verify_type(PythonOnlyPoint(1.0, 2.0), PythonOnlyUDT())
-        self.assertRaises(ValueError, lambda: _verify_type([1.0, 2.0], PythonOnlyUDT()))
+        _make_type_verifier(PythonOnlyUDT())(PythonOnlyPoint(1.0, 2.0))
+        self.assertRaises(
+            ValueError,
+            lambda: _make_type_verifier(PythonOnlyUDT())([1.0, 2.0]))
 
     def test_simple_udt_in_df(self):
         schema = StructType().add("key", LongType()).add("val", PythonOnlyUDT())
@@ -2636,6 +2638,195 @@ def range_frame_match():
 
         importlib.reload(window)
 
+
+class DataTypeVerificationTests(unittest.TestCase):
+
+    def test_verify_type_exception_msg(self):
+        self.assertRaisesRegexp(
+            ValueError,
+            "test_name",
+            lambda: _make_type_verifier(StringType(), nullable=False, name="test_name")(None))
+
+        schema = StructType([StructField('a', StructType([StructField('b', IntegerType())]))])
+        self.assertRaisesRegexp(
+            TypeError,
+            "field b in field a",
+            lambda: _make_type_verifier(schema)([["data"]]))
+
+    def test_verify_type_ok_nullable(self):
+        obj = None
+        types = [IntegerType(), FloatType(), StringType(), StructType([])]
+        for data_type in types:
+            try:
+                _make_type_verifier(data_type, nullable=True)(obj)
+            except Exception:
+                self.fail("verify_type(%s, %s, nullable=True)" % (obj, data_type))
+
+    def test_verify_type_not_nullable(self):
+        import array
+        import datetime
+        import decimal
+
+        schema = StructType([
+            StructField('s', StringType(), nullable=False),
+            StructField('i', IntegerType(), nullable=True)])
+
+        class MyObj:
+            def __init__(self, **kwargs):
+                for k, v in kwargs.items():
+                    setattr(self, k, v)
+
+        # obj, data_type
+        success_spec = [
+            # String
+            ("", StringType()),
+            (u"", StringType()),
+            (1, StringType()),
+            (1.0, StringType()),
+            ([], StringType()),
+            ({}, StringType()),
+
+            # UDT
+            (ExamplePoint(1.0, 2.0), ExamplePointUDT()),
+
+            # Boolean
+            (True, BooleanType()),
+
+            # Byte
+            (-(2**7), ByteType()),
+            (2**7 - 1, ByteType()),
+
+            # Short
+            (-(2**15), ShortType()),
+            (2**15 - 1, ShortType()),
+
+            # Integer
+            (-(2**31), IntegerType()),
+            (2**31 - 1, IntegerType()),
+
+            # Long
+            (2**64, LongType()),
+
+            # Float & Double
+            (1.0, FloatType()),
+            (1.0, DoubleType()),
+
+            # Decimal
+            (decimal.Decimal("1.0"), DecimalType()),
+
+            # Binary
+            (bytearray([1, 2]), BinaryType()),
+
+            # Date/Timestamp
+            (datetime.date(2000, 1, 2), DateType()),
+            (datetime.datetime(2000, 1, 2, 3, 4), DateType()),
+            (datetime.datetime(2000, 1, 2, 3, 4), TimestampType()),
+
+            # Array
+            ([], ArrayType(IntegerType())),
+            (["1", None], ArrayType(StringType(), containsNull=True)),
+            ([1, 2], ArrayType(IntegerType())),
+            ((1, 2), ArrayType(IntegerType())),
+            (array.array('h', [1, 2]), ArrayType(IntegerType())),
+
+            # Map
+            ({}, MapType(StringType(), IntegerType())),
+            ({"a": 1}, MapType(StringType(), IntegerType())),
+            ({"a": None}, MapType(StringType(), IntegerType(), valueContainsNull=True)),
+
+            # Struct
+            ({"s": "a", "i": 1}, schema),
+            ({"s": "a", "i": None}, schema),
+            ({"s": "a"}, schema),
+            ({"s": "a", "f": 1.0}, schema),
+            (Row(s="a", i=1), schema),
+            (Row(s="a", i=None), schema),
+            (Row(s="a", i=1, f=1.0), schema),
+            (["a", 1], schema),
+            (["a", None], schema),
+            (("a", 1), schema),
+            (MyObj(s="a", i=1), schema),
+            (MyObj(s="a", i=None), schema),
+            (MyObj(s="a"), schema),
+        ]
+
+        # obj, data_type, exception class
+        failure_spec = [
+            # String (match anything but None)
+            (None, StringType(), ValueError),
+
+            # UDT
+            (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError),
+
+            # Boolean
+            (1, BooleanType(), TypeError),
+            ("True", BooleanType(), TypeError),
+            ([1], BooleanType(), TypeError),
+
+            # Byte
+            (-(2**7) - 1, ByteType(), ValueError),
+            (2**7, ByteType(), ValueError),
+            ("1", ByteType(), TypeError),
+            (1.0, ByteType(), TypeError),
+
+            # Short
+            (-(2**15) - 1, ShortType(), ValueError),
+            (2**15, ShortType(), ValueError),
+
+            # Integer
+            (-(2**31) - 1, IntegerType(), ValueError),
+            (2**31, IntegerType(), ValueError),
+
+            # Float & Double
+            (1, FloatType(), TypeError),
+            (1, DoubleType(), TypeError),
+
+            # Decimal
+            (1.0, DecimalType(), TypeError),
+            (1, DecimalType(), TypeError),
+            ("1.0", DecimalType(), TypeError),
+
+            # Binary
+            (1, BinaryType(), TypeError),
+
+            # Date/Timestamp
+            ("2000-01-02", DateType(), TypeError),
+            (946811040, TimestampType(), TypeError),
+
+            # Array
+            (["1", None], ArrayType(StringType(), containsNull=False), ValueError),
+            ([1, "2"], ArrayType(IntegerType()), TypeError),
+
+            # Map
+            ({"a": 1}, MapType(IntegerType(), IntegerType()), TypeError),
+            ({"a": "1"}, MapType(StringType(), IntegerType()), TypeError),
+            ({"a": None}, MapType(StringType(), IntegerType(), valueContainsNull=False),
+             ValueError),
+
+            # Struct
+            ({"s": "a", "i": "1"}, schema, TypeError),
+            (Row(s="a"), schema, ValueError),     # Row can't have missing field
+            (Row(s="a", i="1"), schema, TypeError),
+            (["a"], schema, ValueError),
+            (["a", "1"], schema, TypeError),
+            (MyObj(s="a", i="1"), schema, TypeError),
+            (MyObj(s=None, i="1"), schema, ValueError),
+        ]
+
+        # Check success cases
+        for obj, data_type in success_spec:
+            try:
+                _make_type_verifier(data_type, nullable=False)(obj)
+            except Exception:
+                self.fail("verify_type(%s, %s, nullable=False)" % (obj, data_type))
+
+        # Check failure cases
+        for obj, data_type, exp in failure_spec:
+            msg = "verify_type(%s, %s, nullable=False) == %s" % (obj, data_type, exp)
+            with self.assertRaises(exp, msg=msg):
+                _make_type_verifier(data_type, nullable=False)(obj)
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests import *
     if xmlrunner:
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 26b54a7fb370..f5505ed4722a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1249,121 +1249,196 @@ def _infer_schema_type(obj, dataType):
 }
 
 
-def _verify_type(obj, dataType, nullable=True):
+def _make_type_verifier(dataType, nullable=True, name=None):
     """
-    Verify the type of obj against dataType, raise a TypeError if they do not match.
-
-    Also verify the value of obj against datatype, raise a ValueError if it's not within the allowed
-    range, e.g. using 128 as ByteType will overflow. Note that, Python float is not checked, so it
-    will become infinity when cast to Java float if it overflows.
-
-    >>> _verify_type(None, StructType([]))
-    >>> _verify_type("", StringType())
-    >>> _verify_type(0, LongType())
-    >>> _verify_type(list(range(3)), ArrayType(ShortType()))
-    >>> _verify_type(set(), ArrayType(StringType())) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Make a verifier that checks the type of obj against dataType and raises a TypeError if they do
+    not match.
+
+    This verifier also checks the value of obj against datatype and raises a ValueError if it's not
+    within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is
+    not checked, so it will become infinity when cast to Java float if it overflows.
+
+    >>> _make_type_verifier(StructType([]))(None)
+    >>> _make_type_verifier(StringType())("")
+    >>> _make_type_verifier(LongType())(0)
+    >>> _make_type_verifier(ArrayType(ShortType()))(list(range(3)))
+    >>> _make_type_verifier(ArrayType(StringType()))(set()) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     TypeError:...
-    >>> _verify_type({}, MapType(StringType(), IntegerType()))
-    >>> _verify_type((), StructType([]))
-    >>> _verify_type([], StructType([]))
-    >>> _verify_type([1], StructType([])) # doctest: +IGNORE_EXCEPTION_DETAIL
+    >>> _make_type_verifier(MapType(StringType(), IntegerType()))({})
+    >>> _make_type_verifier(StructType([]))(())
+    >>> _make_type_verifier(StructType([]))([])
+    >>> _make_type_verifier(StructType([]))([1]) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     ValueError:...
     >>> # Check if numeric values are within the allowed range.
-    >>> _verify_type(12, ByteType())
-    >>> _verify_type(1234, ByteType()) # doctest: +IGNORE_EXCEPTION_DETAIL
+    >>> _make_type_verifier(ByteType())(12)
+    >>> _make_type_verifier(ByteType())(1234) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     ValueError:...
-    >>> _verify_type(None, ByteType(), False) # doctest: +IGNORE_EXCEPTION_DETAIL
+    >>> _make_type_verifier(ByteType(), False)(None) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     ValueError:...
-    >>> _verify_type([1, None], ArrayType(ShortType(), False)) # doctest: +IGNORE_EXCEPTION_DETAIL
+    >>> _make_type_verifier(
+    ...     ArrayType(ShortType(), False))([1, None]) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     ValueError:...
-    >>> _verify_type({None: 1}, MapType(StringType(), IntegerType()))
+    >>> _make_type_verifier(MapType(StringType(), IntegerType()))({None: 1})
     Traceback (most recent call last):
         ...
     ValueError:...
     >>> schema = StructType().add("a", IntegerType()).add("b", StringType(), False)
-    >>> _verify_type((1, None), schema) # doctest: +IGNORE_EXCEPTION_DETAIL
+    >>> _make_type_verifier(schema)((1, None)) # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
     ValueError:...
     """
-    if obj is None:
-        if nullable:
-            return
-        else:
-            raise ValueError("This field is not nullable, but got None")
 
-    # StringType can work with any types
-    if isinstance(dataType, StringType):
-        return
+    if name is None:
+        new_msg = lambda msg: msg
+        new_name = lambda n: "field %s" % n
+    else:
+        new_msg = lambda msg: "%s: %s" % (name, msg)
+        new_name = lambda n: "field %s in %s" % (n, name)
 
-    if isinstance(dataType, UserDefinedType):
-        if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
-            raise ValueError("%r is not an instance of type %r" % (obj, dataType))
-        _verify_type(dataType.toInternal(obj), dataType.sqlType())
-        return
+    def verify_nullability(obj):
+        if obj is None:
+            if nullable:
+                return True
+            else:
+                raise ValueError(new_msg("This field is not nullable, but got None"))
+        else:
+            return False
 
     _type = type(dataType)
-    assert _type in _acceptable_types, "unknown datatype: %s for object %r" % (dataType, obj)
 
-    if _type is StructType:
-        # check the type and fields later
-        pass
-    else:
+    def assert_acceptable_types(obj):
+        assert _type in _acceptable_types, \
+            new_msg("unknown datatype: %s for object %r" % (dataType, obj))
+
+    def verify_acceptable_types(obj):
         # subclass of them can not be fromInternal in JVM
         if type(obj) not in _acceptable_types[_type]:
-            raise TypeError("%s can not accept object %r in type %s" % (dataType, obj, type(obj)))
+            raise TypeError(new_msg("%s can not accept object %r in type %s"
+                                    % (dataType, obj, type(obj))))
+
+    if isinstance(dataType, StringType):
+        # StringType can work with any types
+        verify_value = lambda _: _
+
+    elif isinstance(dataType, UserDefinedType):
+        verifier = _make_type_verifier(dataType.sqlType(), name=name)
 
-    if isinstance(dataType, ByteType):
-        if obj < -128 or obj > 127:
-            raise ValueError("object of ByteType out of range, got: %s" % obj)
+        def verify_udf(obj):
+            if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
+                raise ValueError(new_msg("%r is not an instance of type %r" % (obj, dataType)))
+            verifier(dataType.toInternal(obj))
+
+        verify_value = verify_udf
+
+    elif isinstance(dataType, ByteType):
+        def verify_byte(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -128 or obj > 127:
+                raise ValueError(new_msg("object of ByteType out of range, got: %s" % obj))
+
+        verify_value = verify_byte
 
     elif isinstance(dataType, ShortType):
-        if obj < -32768 or obj > 32767:
-            raise ValueError("object of ShortType out of range, got: %s" % obj)
+        def verify_short(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -32768 or obj > 32767:
+                raise ValueError(new_msg("object of ShortType out of range, got: %s" % obj))
+
+        verify_value = verify_short
 
     elif isinstance(dataType, IntegerType):
-        if obj < -2147483648 or obj > 2147483647:
-            raise ValueError("object of IntegerType out of range, got: %s" % obj)
+        def verify_integer(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -2147483648 or obj > 2147483647:
+                raise ValueError(
+                    new_msg("object of IntegerType out of range, got: %s" % obj))
+
+        verify_value = verify_integer
 
     elif isinstance(dataType, ArrayType):
-        for i in obj:
-            _verify_type(i, dataType.elementType, dataType.containsNull)
+        element_verifier = _make_type_verifier(
+            dataType.elementType, dataType.containsNull, name="element in array %s" % name)
+
+        def verify_array(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            for i in obj:
+                element_verifier(i)
+
+        verify_value = verify_array
 
     elif isinstance(dataType, MapType):
-        for k, v in obj.items():
-            _verify_type(k, dataType.keyType, False)
-            _verify_type(v, dataType.valueType, dataType.valueContainsNull)
+        key_verifier = _make_type_verifier(dataType.keyType, False, name="key of map %s" % name)
+        value_verifier = _make_type_verifier(
+            dataType.valueType, dataType.valueContainsNull, name="value of map %s" % name)
+
+        def verify_map(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            for k, v in obj.items():
+                key_verifier(k)
+                value_verifier(v)
+
+        verify_value = verify_map
 
     elif isinstance(dataType, StructType):
-        if isinstance(obj, dict):
-            for f in dataType.fields:
-                _verify_type(obj.get(f.name), f.dataType, f.nullable)
-        elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
-            # the order in obj could be different than dataType.fields
-            for f in dataType.fields:
-                _verify_type(obj[f.name], f.dataType, f.nullable)
-        elif isinstance(obj, (tuple, list)):
-            if len(obj) != len(dataType.fields):
-                raise ValueError("Length of object (%d) does not match with "
-                                 "length of fields (%d)" % (len(obj), len(dataType.fields)))
-            for v, f in zip(obj, dataType.fields):
-                _verify_type(v, f.dataType, f.nullable)
-        elif hasattr(obj, "__dict__"):
-            d = obj.__dict__
-            for f in dataType.fields:
-                _verify_type(d.get(f.name), f.dataType, f.nullable)
-        else:
-            raise TypeError("StructType can not accept object %r in type %s" % (obj, type(obj)))
+        verifiers = []
+        for f in dataType.fields:
+            verifier = _make_type_verifier(f.dataType, f.nullable, name=new_name(f.name))
+            verifiers.append((f.name, verifier))
+
+        def verify_struct(obj):
+            assert_acceptable_types(obj)
+
+            if isinstance(obj, dict):
+                for f, verifier in verifiers:
+                    verifier(obj.get(f))
+            elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
+                # the order in obj could be different than dataType.fields
+                for f, verifier in verifiers:
+                    verifier(obj[f])
+            elif isinstance(obj, (tuple, list)):
+                if len(obj) != len(verifiers):
+                    raise ValueError(
+                        new_msg("Length of object (%d) does not match with "
+                                "length of fields (%d)" % (len(obj), len(verifiers))))
+                for v, (_, verifier) in zip(obj, verifiers):
+                    verifier(v)
+            elif hasattr(obj, "__dict__"):
+                d = obj.__dict__
+                for f, verifier in verifiers:
+                    verifier(d.get(f))
+            else:
+                raise TypeError(new_msg("StructType can not accept object %r in type %s"
+                                        % (obj, type(obj))))
+        verify_value = verify_struct
+
+    else:
+        def verify_default(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+
+        verify_value = verify_default
+
+    def verify(obj):
+        if not verify_nullability(obj):
+            verify_value(obj)
+
+    return verify
 
 
 # This is used to unpickle a Row from JVM

From 29b1f6b09f98e216af71e893a9da0c4717c80679 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 4 Jul 2017 08:54:07 -0700
Subject: [PATCH 028/125] [SPARK-21256][SQL] Add withSQLConf to Catalyst Test

### What changes were proposed in this pull request?
SQLConf is moved to Catalyst. We are adding more and more test cases for verifying the conf-specific behaviors. It is nice to add a helper function to simplify the test cases.

### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18469 from gatorsmile/withSQLConf.
---
 .../InferFiltersFromConstraintsSuite.scala    |  5 +--
 .../optimizer/OuterJoinEliminationSuite.scala |  6 +---
 .../optimizer/PruneFiltersSuite.scala         |  6 +---
 .../plans/ConstraintPropagationSuite.scala    | 24 +++++++-------
 .../spark/sql/catalyst/plans/PlanTest.scala   | 32 ++++++++++++++++++-
 .../AggregateEstimationSuite.scala            |  9 ++----
 .../BasicStatsEstimationSuite.scala           | 12 +++----
 .../spark/sql/SparkSessionBuilderSuite.scala  |  3 ++
 .../apache/spark/sql/test/SQLTestUtils.scala  | 30 ++++-------------
 9 files changed, 64 insertions(+), 63 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index cdc9f25cf877..d2dd469e2d74 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -206,13 +206,10 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("No inferred filter when constraint propagation is disabled") {
-    try {
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, false)
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       val originalQuery = testRelation.where('a === 1 && 'a === 'b).analyze
       val optimized = Optimize.execute(originalQuery)
       comparePlans(optimized, originalQuery)
-    } finally {
-      SQLConf.get.unsetConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
index 623ff3d446a5..893c111c2906 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
@@ -234,9 +234,7 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("no outer join elimination if constraint propagation is disabled") {
-    try {
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, false)
-
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       val x = testRelation.subquery('x)
       val y = testRelation1.subquery('y)
 
@@ -251,8 +249,6 @@ class OuterJoinEliminationSuite extends PlanTest {
       val optimized = Optimize.execute(originalQuery.analyze)
 
       comparePlans(optimized, originalQuery.analyze)
-    } finally {
-      SQLConf.get.unsetConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
index 706634cdd29b..6d1a05f3c998 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.CONSTRAINT_PROPAGATION_ENABLED
 
 class PruneFiltersSuite extends PlanTest {
 
@@ -149,8 +148,7 @@ class PruneFiltersSuite extends PlanTest {
         ("tr1.a".attr > 10 || "tr1.c".attr < 10) &&
           'd.attr < 100)
 
-    SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, false)
-    try {
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       val optimized = Optimize.execute(queryWithUselessFilter.analyze)
       // When constraint propagation is disabled, the useless filter won't be pruned.
       // It gets pushed down. Because the rule `CombineFilters` runs only once, there are redundant
@@ -160,8 +158,6 @@ class PruneFiltersSuite extends PlanTest {
         .join(tr2.where('d.attr < 100).where('d.attr < 100),
           Inner, Some("tr1.a".attr === "tr2.a".attr)).analyze
       comparePlans(optimized, correctAnswer)
-    } finally {
-      SQLConf.get.unsetConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index a3948d90b0e4..a37e06d92264 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType}
 
-class ConstraintPropagationSuite extends SparkFunSuite {
+class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
 
   private def resolveColumn(tr: LocalRelation, columnName: String): Expression =
     resolveColumn(tr.analyze, columnName)
@@ -400,26 +400,26 @@ class ConstraintPropagationSuite extends SparkFunSuite {
   }
 
   test("enable/disable constraint propagation") {
-    try {
-      val tr = LocalRelation('a.int, 'b.string, 'c.int)
-      val filterRelation = tr.where('a.attr > 10)
+    val tr = LocalRelation('a.int, 'b.string, 'c.int)
+    val filterRelation = tr.where('a.attr > 10)
 
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, true)
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "true") {
       assert(filterRelation.analyze.constraints.nonEmpty)
+    }
 
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, false)
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       assert(filterRelation.analyze.constraints.isEmpty)
+    }
 
-      val aliasedRelation = tr.where('c.attr > 10 && 'a.attr < 5)
-        .groupBy('a, 'c, 'b)('a, 'c.as("c1"), count('a).as("a3")).select('c1, 'a, 'a3)
+    val aliasedRelation = tr.where('c.attr > 10 && 'a.attr < 5)
+      .groupBy('a, 'c, 'b)('a, 'c.as("c1"), count('a).as("a3")).select('c1, 'a, 'a3)
 
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, true)
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "true") {
       assert(aliasedRelation.analyze.constraints.nonEmpty)
+    }
 
-      SQLConf.get.setConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED, false)
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       assert(aliasedRelation.analyze.constraints.isEmpty)
-    } finally {
-      SQLConf.get.unsetConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 6883d23d477e..e9679d336150 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -28,8 +29,9 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Provides helper methods for comparing plans.
  */
-abstract class PlanTest extends SparkFunSuite with PredicateHelper {
+trait PlanTest extends SparkFunSuite with PredicateHelper {
 
+  // TODO(gatorsmile): remove this from PlanTest and all the analyzer/optimizer rules
   protected val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)
 
   /**
@@ -142,4 +144,32 @@ abstract class PlanTest extends SparkFunSuite with PredicateHelper {
         plan1 == plan2
     }
   }
+
+  /**
+   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
+   * configurations.
+   */
+  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val conf = SQLConf.get
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map { key =>
+      if (conf.contains(key)) {
+        Some(conf.getConfString(key))
+      } else {
+        None
+      }
+    }
+    (keys, values).zipped.foreach { (k, v) =>
+      if (SQLConf.staticConfKeys.contains(k)) {
+        throw new AnalysisException(s"Cannot modify the value of a static config: $k")
+      }
+      conf.setConfString(k, v)
+    }
+    try f finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => conf.setConfString(key, value)
+        case (key, None) => conf.unsetConf(key)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
index 30ddf03bd3c4..23f95a6cc2ac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/AggregateEstimationSuite.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.catalyst.statsEstimation
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.internal.SQLConf
 
 
-class AggregateEstimationSuite extends StatsEstimationTestBase {
+class AggregateEstimationSuite extends StatsEstimationTestBase with PlanTest {
 
   /** Columns for testing */
   private val columnInfo: AttributeMap[ColumnStat] = AttributeMap(Seq(
@@ -100,9 +101,7 @@ class AggregateEstimationSuite extends StatsEstimationTestBase {
       size = Some(4 * (8 + 4)),
       attributeStats = AttributeMap(Seq("key12").map(nameToColInfo)))
 
-    val originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED)
-    try {
-      SQLConf.get.setConf(SQLConf.CBO_ENABLED, false)
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "false") {
       val noGroupAgg = Aggregate(groupingExpressions = Nil,
         aggregateExpressions = Seq(Alias(Count(Literal(1)), "cnt")()), child)
       assert(noGroupAgg.stats ==
@@ -114,8 +113,6 @@ class AggregateEstimationSuite extends StatsEstimationTestBase {
       assert(hasGroupAgg.stats ==
         // From UnaryNode.computeStats, childSize * outputRowSize / childRowSize
         Statistics(sizeInBytes = 48 * (8 + 4 + 8) / (8 + 4)))
-    } finally {
-      SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 31a8cbdee977..5fd21a06a109 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.sql.catalyst.statsEstimation
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Literal}
+import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 
 
-class BasicStatsEstimationSuite extends StatsEstimationTestBase {
+class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
   val attribute = attr("key")
   val colStat = ColumnStat(distinctCount = 10, min = Some(1), max = Some(10),
     nullCount = 0, avgLen = 4, maxLen = 4)
@@ -82,18 +83,15 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
       plan: LogicalPlan,
       expectedStatsCboOn: Statistics,
       expectedStatsCboOff: Statistics): Unit = {
-    val originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED)
-    try {
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
       // Invalidate statistics
       plan.invalidateStatsCache()
-      SQLConf.get.setConf(SQLConf.CBO_ENABLED, true)
       assert(plan.stats == expectedStatsCboOn)
+    }
 
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "false") {
       plan.invalidateStatsCache()
-      SQLConf.get.setConf(SQLConf.CBO_ENABLED, false)
       assert(plan.stats == expectedStatsCboOff)
-    } finally {
-      SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 4f6d5f79d466..cdac6827082c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
@@ -67,6 +68,8 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     assert(activeSession != defaultSession)
     assert(session == activeSession)
     assert(session.conf.get("spark-config2") == "a")
+    assert(session.sessionState.conf == SQLConf.get)
+    assert(SQLConf.get.getConfString("spark-config2") == "a")
     SparkSession.clearActiveSession()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index d74a7cce25ed..92ee7d596acd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -35,9 +35,11 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog.DEFAULT_DATABASE
 import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.{UninterruptibleThread, Utils}
 
 /**
@@ -53,7 +55,8 @@ import org.apache.spark.util.{UninterruptibleThread, Utils}
 private[sql] trait SQLTestUtils
   extends SparkFunSuite with Eventually
   with BeforeAndAfterAll
-  with SQLTestData { self =>
+  with SQLTestData
+  with PlanTest { self =>
 
   protected def sparkContext = spark.sparkContext
 
@@ -89,28 +92,9 @@ private[sql] trait SQLTestUtils
     }
   }
 
-  /**
-   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
-   * configurations.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    val (keys, values) = pairs.unzip
-    val currentValues = keys.map { key =>
-      if (spark.conf.contains(key)) {
-        Some(spark.conf.get(key))
-      } else {
-        None
-      }
-    }
-    (keys, values).zipped.foreach(spark.conf.set)
-    try f finally {
-      keys.zip(currentValues).foreach {
-        case (key, Some(value)) => spark.conf.set(key, value)
-        case (key, None) => spark.conf.unset(key)
-      }
-    }
+  protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    SparkSession.setActiveSession(spark)
+    super.withSQLConf(pairs: _*)(f)
   }
 
   /**

From a3c29fcbbda02c1528b4185bcb880c91077d480c Mon Sep 17 00:00:00 2001
From: "YIHAODIAN\\wangshuangshuang" <wangshuangshuang@yihaodian.com>
Date: Tue, 4 Jul 2017 09:44:27 -0700
Subject: [PATCH 029/125] [SPARK-19726][SQL] Faild to insert null timestamp
 value to mysql using spark jdbc

## What changes were proposed in this pull request?

when creating table like following:
> create table timestamp_test(id int(11), time_stamp timestamp not null default current_timestamp);

The result of Excuting "insert into timestamp_test values (111, null)" is different between Spark and JDBC.
```
mysql> select * from timestamp_test;
+------+---------------------+
| id   | time_stamp          |
+------+---------------------+
|  111 | 1970-01-01 00:00:00 | -> spark
|  111 | 2017-06-27 19:32:38 | -> mysql
+------+---------------------+
2 rows in set (0.00 sec)
```
   Because in such case ```StructField.nullable``` is false, so the generated codes of ```InvokeLike``` and ```BoundReference``` don't check whether the field is null or not. Instead, they directly use ```CodegenContext.INPUT_ROW.getLong(1)```, however, ```UnsafeRow.setNullAt(1)``` will put 0 in the underlying memory.

   The PR will ```always``` set ```StructField.nullable```  true after obtaining metadata from jdbc connection, Since we can insert null to not null timestamp column in MySQL. In this way, spark will propagate null to underlying DB engine, and let DB to choose how to process NULL.

## How was this patch tested?

Added tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: YIHAODIAN\wangshuangshuang <wangshuangshuang@yihaodian.com>
Author: Shuangshuang Wang <wsszone@gmail.com>

Closes #18445 from shuangshuangwang/SPARK-19726.
---
 .../sql/execution/datasources/jdbc/JDBCRDD.scala     |  2 +-
 .../sql/execution/datasources/jdbc/JdbcUtils.scala   | 12 ++++++++++--
 .../org/apache/spark/sql/jdbc/JDBCWriteSuite.scala   |  8 ++++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 0f53b5c7c6f0..57e9bc9b7045 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -59,7 +59,7 @@ object JDBCRDD extends Logging {
       try {
         val rs = statement.executeQuery()
         try {
-          JdbcUtils.getSchema(rs, dialect)
+          JdbcUtils.getSchema(rs, dialect, alwaysNullable = true)
         } finally {
           rs.close()
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index ca61c2efe2dd..55b2539c1338 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -266,10 +266,14 @@ object JdbcUtils extends Logging {
   /**
    * Takes a [[ResultSet]] and returns its Catalyst schema.
    *
+   * @param alwaysNullable If true, all the columns are nullable.
    * @return A [[StructType]] giving the Catalyst schema.
    * @throws SQLException if the schema contains an unsupported type.
    */
-  def getSchema(resultSet: ResultSet, dialect: JdbcDialect): StructType = {
+  def getSchema(
+      resultSet: ResultSet,
+      dialect: JdbcDialect,
+      alwaysNullable: Boolean = false): StructType = {
     val rsmd = resultSet.getMetaData
     val ncols = rsmd.getColumnCount
     val fields = new Array[StructField](ncols)
@@ -290,7 +294,11 @@ object JdbcUtils extends Logging {
             rsmd.getClass.getName == "org.apache.hive.jdbc.HiveResultSetMetaData" => true
         }
       }
-      val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
+      val nullable = if (alwaysNullable) {
+        true
+      } else {
+        rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
+      }
       val metadata = new MetadataBuilder()
         .putString("name", columnName)
         .putLong("scale", fieldScale)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index bf1fd160704f..92f50a095f19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConverters.propertiesAsScalaMapConverter
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
@@ -506,4 +507,11 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
         "schema struct<name:string,id:int>"))
     }
   }
+
+  test("SPARK-19726: INSERT null to a NOT NULL column") {
+    val e = intercept[SparkException] {
+      sql("INSERT INTO PEOPLE1 values (null, null)")
+    }.getMessage
+    assert(e.contains("NULL not allowed for column \"NAME\""))
+  }
 }

From 1b50e0e0d6fd9d1b815a3bb37647ea659222e3f1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 4 Jul 2017 09:48:40 -0700
Subject: [PATCH 030/125] [SPARK-20256][SQL] SessionState should be created
 more lazily

## What changes were proposed in this pull request?

`SessionState` is designed to be created lazily. However, in reality, it created immediately in `SparkSession.Builder.getOrCreate` ([here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L943)).

This PR aims to recover the lazy behavior by keeping the options into `initialSessionOptions`. The benefit is like the following. Users can start `spark-shell` and use RDD operations without any problems.

**BEFORE**
```scala
$ bin/spark-shell
java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder'
...
Caused by: org.apache.spark.sql.AnalysisException:
    org.apache.hadoop.hive.ql.metadata.HiveException:
       MetaException(message:java.security.AccessControlException:
          Permission denied: user=spark, access=READ,
             inode="/apps/hive/warehouse":hive:hdfs:drwx------
```
As reported in SPARK-20256, this happens when the warehouse directory is not allowed for this user.

**AFTER**
```scala
$ bin/spark-shell
...
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.3.0-SNAPSHOT
      /_/

Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_112)
Type in expressions to have them evaluated.
Type :help for more information.

scala> sc.range(0, 10, 1).count()
res0: Long = 10
```

## How was this patch tested?

Manual.

This closes #18512 .

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18501 from dongjoon-hyun/SPARK-20256.
---
 .../scala/org/apache/spark/sql/SparkSession.scala    | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 2c38f7d7c88d..0ddcd2111aa5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -117,6 +117,12 @@ class SparkSession private(
     existingSharedState.getOrElse(new SharedState(sparkContext))
   }
 
+  /**
+   * Initial options for session. This options are applied once when sessionState is created.
+   */
+  @transient
+  private[sql] val initialSessionOptions = new scala.collection.mutable.HashMap[String, String]
+
   /**
    * State isolated across sessions, including SQL configurations, temporary tables, registered
    * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
@@ -132,9 +138,11 @@ class SparkSession private(
     parentSessionState
       .map(_.clone(this))
       .getOrElse {
-        SparkSession.instantiateSessionState(
+        val state = SparkSession.instantiateSessionState(
           SparkSession.sessionStateClassName(sparkContext.conf),
           self)
+        initialSessionOptions.foreach { case (k, v) => state.conf.setConfString(k, v) }
+        state
       }
   }
 
@@ -940,7 +948,7 @@ object SparkSession {
         }
 
         session = new SparkSession(sparkContext, None, None, extensions)
-        options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
+        options.foreach { case (k, v) => session.initialSessionOptions.put(k, v) }
         defaultSession.set(session)
 
         // Register a successfully instantiated context to the singleton. This should be at the

From 4d6d8192c807006ff89488a1d38bc6f7d41de5cf Mon Sep 17 00:00:00 2001
From: dardelet <guillaumegorp@gmail.com>
Date: Tue, 4 Jul 2017 17:58:44 +0100
Subject: [PATCH 031/125] [SPARK-21268][MLLIB] Move center calculations to a
 distributed map in KMeans

## What changes were proposed in this pull request?

The scal() and creation of newCenter vector is done in the driver, after a collectAsMap operation while it could be done in the distributed RDD.
This PR moves this code before the collectAsMap for more efficiency

## How was this patch tested?

This was tested manually by running the KMeansExample and verifying that the new code ran without error and gave same output as before.

Author: dardelet <guillaumegorp@gmail.com>
Author: Guillaume Dardelet <dardelet@users.noreply.github.com>

Closes #18491 from dardelet/move-center-calculation-to-distributed-map-kmean.
---
 .../org/apache/spark/mllib/clustering/KMeans.scala    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index fa72b72e2d92..98e50c5b45cf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -272,8 +272,8 @@ class KMeans private (
       val costAccum = sc.doubleAccumulator
       val bcCenters = sc.broadcast(centers)
 
-      // Find the sum and count of points mapping to each center
-      val totalContribs = data.mapPartitions { points =>
+      // Find the new centers
+      val newCenters = data.mapPartitions { points =>
         val thisCenters = bcCenters.value
         val dims = thisCenters.head.vector.size
 
@@ -292,15 +292,16 @@ class KMeans private (
       }.reduceByKey { case ((sum1, count1), (sum2, count2)) =>
         axpy(1.0, sum2, sum1)
         (sum1, count1 + count2)
+      }.mapValues { case (sum, count) =>
+        scal(1.0 / count, sum)
+        new VectorWithNorm(sum)
       }.collectAsMap()
 
       bcCenters.destroy(blocking = false)
 
       // Update the cluster centers and costs
       converged = true
-      totalContribs.foreach { case (j, (sum, count)) =>
-        scal(1.0 / count, sum)
-        val newCenter = new VectorWithNorm(sum)
+      newCenters.foreach { case (j, newCenter) =>
         if (converged && KMeans.fastSquaredDistance(newCenter, centers(j)) > epsilon * epsilon) {
           converged = false
         }

From cec392150451a64c9c2902b7f8f4b3b38f25cbea Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Tue, 4 Jul 2017 12:18:51 -0700
Subject: [PATCH 032/125] [SPARK-20889][SPARKR] Grouped documentation for
 WINDOW column methods

## What changes were proposed in this pull request?

Grouped documentation for column window methods.

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #18481 from actuaryzhang/sparkRDocWindow.
---
 R/pkg/R/functions.R | 225 ++++++++++++++------------------------------
 R/pkg/R/generics.R  |  28 +++---
 2 files changed, 88 insertions(+), 165 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index a1f5c4f8cc18..8c12308c1d7c 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -200,6 +200,34 @@ NULL
 #' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
 NULL
 
+#' Window functions for Column operations
+#'
+#' Window functions defined for \code{Column}.
+#'
+#' @param x In \code{lag} and \code{lead}, it is the column as a character string or a Column
+#'          to compute on. In \code{ntile}, it is the number of ntile groups.
+#' @param offset In \code{lag}, the number of rows back from the current row from which to obtain
+#'               a value. In \code{lead}, the number of rows after the current row from which to
+#'               obtain a value. If not specified, the default is 1.
+#' @param defaultValue (optional) default to use when the offset row does not exist.
+#' @param ... additional argument(s).
+#' @name column_window_functions
+#' @rdname column_window_functions
+#' @family window functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' ws <- orderBy(windowPartitionBy("am"), "hp")
+#' tmp <- mutate(df, dist = over(cume_dist(), ws), dense_rank = over(dense_rank(), ws),
+#'               lag = over(lag(df$mpg), ws), lead = over(lead(df$mpg, 1), ws),
+#'               percent_rank = over(percent_rank(), ws),
+#'               rank = over(rank(), ws), row_number = over(row_number(), ws))
+#' # Get ntile group id (1-4) for hp
+#' tmp <- mutate(tmp, ntile = over(ntile(4), ws))
+#' head(tmp)}
+NULL
+
 #' @details
 #' \code{lit}: A new Column is created to represent the literal value.
 #' If the parameter is a Column, it is returned unchanged.
@@ -2844,27 +2872,16 @@ setMethod("ifelse",
 
 ###################### Window functions######################
 
-#' cume_dist
-#'
-#' Window function: returns the cumulative distribution of values within a window partition,
-#' i.e. the fraction of rows that are below the current row.
-#'
-#'   N = total number of rows in the partition
-#'   cume_dist(x) = number of values before (and including) x / N
-#'
+#' @details
+#' \code{cume_dist}: Returns the cumulative distribution of values within a window partition,
+#' i.e. the fraction of rows that are below the current row:
+#' (number of values before and including x) / (total number of rows in the partition).
 #' This is equivalent to the \code{CUME_DIST} function in SQL.
+#' The method should be used with no argument.
 #'
-#' @rdname cume_dist
-#' @name cume_dist
-#' @family window functions
-#' @aliases cume_dist,missing-method
+#' @rdname column_window_functions
+#' @aliases cume_dist cume_dist,missing-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'   out <- select(df, over(cume_dist(), ws), df$hp, df$am)
-#' }
 #' @note cume_dist since 1.6.0
 setMethod("cume_dist",
           signature("missing"),
@@ -2873,28 +2890,19 @@ setMethod("cume_dist",
             column(jc)
           })
 
-#' dense_rank
-#'
-#' Window function: returns the rank of rows within a window partition, without any gaps.
+#' @details
+#' \code{dense_rank}: Returns the rank of rows within a window partition, without any gaps.
 #' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
 #' sequence when there are ties. That is, if you were ranking a competition using dense_rank
 #' and had three people tie for second place, you would say that all three were in second
 #' place and that the next person came in third. Rank would give me sequential numbers, making
 #' the person that came in third place (after the ties) would register as coming in fifth.
-#'
 #' This is equivalent to the \code{DENSE_RANK} function in SQL.
+#' The method should be used with no argument.
 #'
-#' @rdname dense_rank
-#' @name dense_rank
-#' @family window functions
-#' @aliases dense_rank,missing-method
+#' @rdname column_window_functions
+#' @aliases dense_rank dense_rank,missing-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'   out <- select(df, over(dense_rank(), ws), df$hp, df$am)
-#' }
 #' @note dense_rank since 1.6.0
 setMethod("dense_rank",
           signature("missing"),
@@ -2903,34 +2911,15 @@ setMethod("dense_rank",
             column(jc)
           })
 
-#' lag
-#'
-#' Window function: returns the value that is \code{offset} rows before the current row, and
+#' @details
+#' \code{lag}: Returns the value that is \code{offset} rows before the current row, and
 #' \code{defaultValue} if there is less than \code{offset} rows before the current row. For example,
 #' an \code{offset} of one will return the previous row at any given point in the window partition.
-#'
 #' This is equivalent to the \code{LAG} function in SQL.
 #'
-#' @param x the column as a character string or a Column to compute on.
-#' @param offset the number of rows back from the current row from which to obtain a value.
-#'               If not specified, the default is 1.
-#' @param defaultValue (optional) default to use when the offset row does not exist.
-#' @param ... further arguments to be passed to or from other methods.
-#' @rdname lag
-#' @name lag
-#' @aliases lag,characterOrColumn-method
-#' @family window functions
+#' @rdname column_window_functions
+#' @aliases lag lag,characterOrColumn-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'
-#'   # Partition by am (transmission) and order by hp (horsepower)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'
-#'   # Lag mpg values by 1 row on the partition-and-ordered table
-#'   out <- select(df, over(lag(df$mpg), ws), df$mpg, df$hp, df$am)
-#' }
 #' @note lag since 1.6.0
 setMethod("lag",
           signature(x = "characterOrColumn"),
@@ -2946,35 +2935,16 @@ setMethod("lag",
             column(jc)
           })
 
-#' lead
-#'
-#' Window function: returns the value that is \code{offset} rows after the current row, and
+#' @details
+#' \code{lead}: Returns the value that is \code{offset} rows after the current row, and
 #' \code{defaultValue} if there is less than \code{offset} rows after the current row.
 #' For example, an \code{offset} of one will return the next row at any given point
 #' in the window partition.
-#'
 #' This is equivalent to the \code{LEAD} function in SQL.
 #'
-#' @param x the column as a character string or a Column to compute on.
-#' @param offset the number of rows after the current row from which to obtain a value.
-#'               If not specified, the default is 1.
-#' @param defaultValue (optional) default to use when the offset row does not exist.
-#'
-#' @rdname lead
-#' @name lead
-#' @family window functions
-#' @aliases lead,characterOrColumn,numeric-method
+#' @rdname column_window_functions
+#' @aliases lead lead,characterOrColumn,numeric-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'
-#'   # Partition by am (transmission) and order by hp (horsepower)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'
-#'   # Lead mpg values by 1 row on the partition-and-ordered table
-#'   out <- select(df, over(lead(df$mpg), ws), df$mpg, df$hp, df$am)
-#' }
 #' @note lead since 1.6.0
 setMethod("lead",
           signature(x = "characterOrColumn", offset = "numeric", defaultValue = "ANY"),
@@ -2990,31 +2960,15 @@ setMethod("lead",
             column(jc)
           })
 
-#' ntile
-#'
-#' Window function: returns the ntile group id (from 1 to n inclusive) in an ordered window
+#' @details
+#' \code{ntile}: Returns the ntile group id (from 1 to n inclusive) in an ordered window
 #' partition. For example, if n is 4, the first quarter of the rows will get value 1, the second
 #' quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
-#'
 #' This is equivalent to the \code{NTILE} function in SQL.
 #'
-#' @param x Number of ntile groups
-#'
-#' @rdname ntile
-#' @name ntile
-#' @aliases ntile,numeric-method
-#' @family window functions
+#' @rdname column_window_functions
+#' @aliases ntile ntile,numeric-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'
-#'   # Partition by am (transmission) and order by hp (horsepower)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'
-#'   # Get ntile group id (1-4) for hp
-#'   out <- select(df, over(ntile(4), ws), df$hp, df$am)
-#' }
 #' @note ntile since 1.6.0
 setMethod("ntile",
           signature(x = "numeric"),
@@ -3023,27 +2977,15 @@ setMethod("ntile",
             column(jc)
           })
 
-#' percent_rank
-#'
-#' Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
-#'
-#' This is computed by:
-#'
-#'   (rank of row in its partition - 1) / (number of rows in the partition - 1)
-#'
-#' This is equivalent to the PERCENT_RANK function in SQL.
+#' @details
+#' \code{percent_rank}: Returns the relative rank (i.e. percentile) of rows within a window partition.
+#' This is computed by: (rank of row in its partition - 1) / (number of rows in the partition - 1).
+#' This is equivalent to the \code{PERCENT_RANK} function in SQL.
+#' The method should be used with no argument.
 #'
-#' @rdname percent_rank
-#' @name percent_rank
-#' @family window functions
-#' @aliases percent_rank,missing-method
+#' @rdname column_window_functions
+#' @aliases percent_rank percent_rank,missing-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'   out <- select(df, over(percent_rank(), ws), df$hp, df$am)
-#' }
 #' @note percent_rank since 1.6.0
 setMethod("percent_rank",
           signature("missing"),
@@ -3052,29 +2994,19 @@ setMethod("percent_rank",
             column(jc)
           })
 
-#' rank
-#'
-#' Window function: returns the rank of rows within a window partition.
-#'
+#' @details
+#' \code{rank}: Returns the rank of rows within a window partition.
 #' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
 #' sequence when there are ties. That is, if you were ranking a competition using dense_rank
 #' and had three people tie for second place, you would say that all three were in second
 #' place and that the next person came in third. Rank would give me sequential numbers, making
 #' the person that came in third place (after the ties) would register as coming in fifth.
+#' This is equivalent to the \code{RANK} function in SQL.
+#' The method should be used with no argument.
 #'
-#' This is equivalent to the RANK function in SQL.
-#'
-#' @rdname rank
-#' @name rank
-#' @family window functions
-#' @aliases rank,missing-method
+#' @rdname column_window_functions
+#' @aliases rank rank,missing-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'   out <- select(df, over(rank(), ws), df$hp, df$am)
-#' }
 #' @note rank since 1.6.0
 setMethod("rank",
           signature(x = "missing"),
@@ -3083,11 +3015,7 @@ setMethod("rank",
             column(jc)
           })
 
-# Expose rank() in the R base package
-#' @param x a numeric, complex, character or logical vector.
-#' @param ... additional argument(s) passed to the method.
-#' @name rank
-#' @rdname rank
+#' @rdname column_window_functions
 #' @aliases rank,ANY-method
 #' @export
 setMethod("rank",
@@ -3096,23 +3024,14 @@ setMethod("rank",
             base::rank(x, ...)
           })
 
-#' row_number
-#'
-#' Window function: returns a sequential number starting at 1 within a window partition.
-#'
-#' This is equivalent to the ROW_NUMBER function in SQL.
+#' @details
+#' \code{row_number}: Returns a sequential number starting at 1 within a window partition.
+#' This is equivalent to the \code{ROW_NUMBER} function in SQL.
+#' The method should be used with no argument.
 #'
-#' @rdname row_number
-#' @name row_number
-#' @aliases row_number,missing-method
-#' @family window functions
+#' @rdname column_window_functions
+#' @aliases row_number row_number,missing-method
 #' @export
-#' @examples
-#' \dontrun{
-#'   df <- createDataFrame(mtcars)
-#'   ws <- orderBy(windowPartitionBy("am"), "hp")
-#'   out <- select(df, over(row_number(), ws), df$hp, df$am)
-#' }
 #' @note row_number since 1.6.0
 setMethod("row_number",
           signature("missing"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index b901b74e4728..beac18e41273 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1013,9 +1013,9 @@ setGeneric("create_map", function(x, ...) { standardGeneric("create_map") })
 #' @name NULL
 setGeneric("hash", function(x, ...) { standardGeneric("hash") })
 
-#' @param x empty. Should be used with no argument.
-#' @rdname cume_dist
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") })
 
 #' @rdname column_datetime_diff_functions
@@ -1053,9 +1053,9 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") })
 #' @name NULL
 setGeneric("decode", function(x, charset) { standardGeneric("decode") })
 
-#' @param x empty. Should be used with no argument.
-#' @rdname dense_rank
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank") })
 
 #' @rdname column_string_functions
@@ -1159,8 +1159,9 @@ setGeneric("isnan", function(x) { standardGeneric("isnan") })
 #' @name NULL
 setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })
 
-#' @rdname lag
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("lag", function(x, ...) { standardGeneric("lag") })
 
 #' @rdname last
@@ -1172,8 +1173,9 @@ setGeneric("last", function(x, ...) { standardGeneric("last") })
 #' @name NULL
 setGeneric("last_day", function(x) { standardGeneric("last_day") })
 
-#' @rdname lead
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("lead", function(x, offset, defaultValue = NULL) { standardGeneric("lead") })
 
 #' @rdname column_nonaggregate_functions
@@ -1260,8 +1262,9 @@ setGeneric("not", function(x) { standardGeneric("not") })
 #' @name NULL
 setGeneric("next_day", function(y, x) { standardGeneric("next_day") })
 
-#' @rdname ntile
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("ntile", function(x) { standardGeneric("ntile") })
 
 #' @rdname column_aggregate_functions
@@ -1269,9 +1272,9 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
 #' @name NULL
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
-#' @param x empty. Should be used with no argument.
-#' @rdname percent_rank
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
 
 #' @rdname column_math_functions
@@ -1304,8 +1307,9 @@ setGeneric("rand", function(seed) { standardGeneric("rand") })
 #' @name NULL
 setGeneric("randn", function(seed) { standardGeneric("randn") })
 
-#' @rdname rank
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("rank", function(x, ...) { standardGeneric("rank") })
 
 #' @rdname column_string_functions
@@ -1334,9 +1338,9 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") })
 #' @name NULL
 setGeneric("rint", function(x) { standardGeneric("rint") })
 
-#' @param x empty. Should be used with no argument.
-#' @rdname row_number
+#' @rdname column_window_functions
 #' @export
+#' @name NULL
 setGeneric("row_number", function(x = "missing") { standardGeneric("row_number") })
 
 #' @rdname column_string_functions

From daabf425ec0272951b11f286e4bec7a48f42cc0d Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Tue, 4 Jul 2017 12:37:29 -0700
Subject: [PATCH 033/125] [MINOR][SPARKR] ignore Rplots.pdf test output after
 running R tests

## What changes were proposed in this pull request?

After running R tests in local build, it outputs Rplots.pdf. This one should be ignored in the git repository.

Author: wangmiao1981 <wm624@hotmail.com>

Closes #18518 from wangmiao1981/ignore.
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 1d91b43c23fa..cf9780db37ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ R-unit-tests.log
 R/unit-tests.out
 R/cran-check.out
 R/pkg/vignettes/sparkr-vignettes.html
+R/pkg/tests/fulltests/Rplots.pdf
 build/*.jar
 build/apache-maven*
 build/scala*

From de14086e1f6a2474bb9ba1452ada94e0ce58cf9c Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 5 Jul 2017 10:40:02 +0800
Subject: [PATCH 034/125] [SPARK-21295][SQL] Use qualified names in error
 message for missing references

### What changes were proposed in this pull request?
It is strange to see the following error message. Actually, the column is from another table.
```
cannot resolve '`right.a`' given input columns: [a, c, d];
```

After the PR, the error message looks like
```
cannot resolve '`right.a`' given input columns: [left.a, right.c, right.d];
```

### How was this patch tested?
Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18520 from gatorsmile/removeSQLConf.
---
 .../sql/catalyst/analysis/CheckAnalysis.scala  |  2 +-
 .../results/columnresolution-negative.sql.out  | 10 +++++-----
 .../results/columnresolution-views.sql.out     |  2 +-
 .../sql-tests/results/columnresolution.sql.out | 18 +++++++++---------
 .../sql-tests/results/group-by.sql.out         |  2 +-
 .../sql-tests/results/table-aliases.sql.out    |  2 +-
 .../org/apache/spark/sql/SubquerySuite.scala   |  4 ++--
 7 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index fb81a7006bc5..85c52792ef65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -86,7 +86,7 @@ trait CheckAnalysis extends PredicateHelper {
       case operator: LogicalPlan =>
         operator transformExpressionsUp {
           case a: Attribute if !a.resolved =>
-            val from = operator.inputSet.map(_.name).mkString(", ")
+            val from = operator.inputSet.map(_.qualifiedName).mkString(", ")
             a.failAnalysis(s"cannot resolve '${a.sql}' given input columns: [$from]")
 
           case e: Expression if e.checkInputDataTypes().isFailure =>
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 60bd8e9cc99d..9e60e592c2bd 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -90,7 +90,7 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query 10 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1, i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1, t1.i1]; line 1 pos 7
 
 
 -- !query 11
@@ -161,7 +161,7 @@ SELECT db1.t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query 18 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`db1.t1.i1`' given input columns: [i1, i1]; line 1 pos 7
+cannot resolve '`db1.t1.i1`' given input columns: [t1.i1, t1.i1]; line 1 pos 7
 
 
 -- !query 19
@@ -186,7 +186,7 @@ SELECT mydb1.t1 FROM t1
 struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`mydb1.t1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 22
@@ -204,7 +204,7 @@ SELECT t1 FROM mydb1.t1
 struct<>
 -- !query 23 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`t1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`t1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 24
@@ -221,7 +221,7 @@ SELECT mydb1.t1.i1 FROM t1
 struct<>
 -- !query 25 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 26
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
index 616421d6f2b2..7c451c2aa5b5 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
@@ -105,7 +105,7 @@ SELECT global_temp.view1.i1 FROM global_temp.view1
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`global_temp.view1.i1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`global_temp.view1.i1`' given input columns: [view1.i1]; line 1 pos 7
 
 
 -- !query 13
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
index 764cad0e3943..d3ca4443cce5 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
@@ -96,7 +96,7 @@ SELECT mydb1.t1.i1 FROM t1
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 12
@@ -105,7 +105,7 @@ SELECT mydb1.t1.i1 FROM mydb1.t1
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 13
@@ -154,7 +154,7 @@ SELECT mydb1.t1.i1 FROM mydb1.t1
 struct<>
 -- !query 18 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1]; line 1 pos 7
 
 
 -- !query 19
@@ -270,7 +270,7 @@ SELECT * FROM mydb1.t3 WHERE c1 IN
 struct<>
 -- !query 32 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t4.c3`' given input columns: [c2, c3]; line 2 pos 42
+cannot resolve '`mydb1.t4.c3`' given input columns: [t4.c2, t4.c3]; line 2 pos 42
 
 
 -- !query 33
@@ -287,7 +287,7 @@ SELECT mydb1.t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query 34 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1, i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1, t1.i1]; line 1 pos 7
 
 
 -- !query 35
@@ -296,7 +296,7 @@ SELECT mydb1.t1.i1 FROM mydb1.t1, mydb2.t1
 struct<>
 -- !query 35 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1, i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1, t1.i1]; line 1 pos 7
 
 
 -- !query 36
@@ -313,7 +313,7 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query 37 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t1.i1`' given input columns: [i1, i1]; line 1 pos 7
+cannot resolve '`mydb1.t1.i1`' given input columns: [t1.i1, t1.i1]; line 1 pos 7
 
 
 -- !query 38
@@ -402,7 +402,7 @@ SELECT mydb1.t5.t5.i1 FROM mydb1.t5
 struct<>
 -- !query 48 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t5.t5.i1`' given input columns: [i1, t5]; line 1 pos 7
+cannot resolve '`mydb1.t5.t5.i1`' given input columns: [t5.i1, t5.t5]; line 1 pos 7
 
 
 -- !query 49
@@ -411,7 +411,7 @@ SELECT mydb1.t5.t5.i2 FROM mydb1.t5
 struct<>
 -- !query 49 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`mydb1.t5.t5.i2`' given input columns: [i1, t5]; line 1 pos 7
+cannot resolve '`mydb1.t5.t5.i2`' given input columns: [t5.i1, t5.t5]; line 1 pos 7
 
 
 -- !query 50
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 14679850c692..e23ebd4e822f 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -202,7 +202,7 @@ SELECT a AS k, COUNT(b) FROM testData GROUP BY k
 struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`k`' given input columns: [a, b]; line 1 pos 47
+cannot resolve '`k`' given input columns: [testdata.a, testdata.b]; line 1 pos 47
 
 
 -- !query 22
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index c318018dced2..7abbcd834a52 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -60,4 +60,4 @@ SELECT a AS col1, b AS col2 FROM testData AS t(c, d)
 struct<>
 -- !query 6 output
 org.apache.spark.sql.AnalysisException
-cannot resolve '`a`' given input columns: [c, d]; line 1 pos 7
+cannot resolve '`a`' given input columns: [t.c, t.d]; line 1 pos 7
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 820cff655c4f..c0a3b5add313 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -870,9 +870,9 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
 
   test("SPARK-20688: correctly check analysis for scalar sub-queries") {
     withTempView("t") {
-      Seq(1 -> "a").toDF("i", "j").createTempView("t")
+      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("t")
       val e = intercept[AnalysisException](sql("SELECT (SELECT count(*) FROM t WHERE a = 1)"))
-      assert(e.message.contains("cannot resolve '`a`' given input columns: [i, j]"))
+      assert(e.message.contains("cannot resolve '`a`' given input columns: [t.i, t.j]"))
     }
   }
 }

From ce10545d3401c555e56a214b7c2f334274803660 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Wed, 5 Jul 2017 11:24:38 +0800
Subject: [PATCH 035/125] [SPARK-21300][SQL] ExternalMapToCatalyst should
 null-check map key prior to converting to internal value.

## What changes were proposed in this pull request?

`ExternalMapToCatalyst` should null-check map key prior to converting to internal value to throw an appropriate Exception instead of something like NPE.

## How was this patch tested?

Added a test and existing tests.

Author: Takuya UESHIN <ueshin@databricks.com>

Closes #18524 from ueshin/issues/SPARK-21300.
---
 .../spark/sql/catalyst/JavaTypeInference.scala   |  1 +
 .../spark/sql/catalyst/ScalaReflection.scala     |  1 +
 .../catalyst/expressions/objects/objects.scala   | 16 +++++++++++++++-
 .../encoders/ExpressionEncoderSuite.scala        |  8 +++++++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 7683ee7074e7..90ec699877de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -418,6 +418,7 @@ object JavaTypeInference {
             inputObject,
             ObjectType(keyType.getRawType),
             serializerFor(_, keyType),
+            keyNullable = true,
             ObjectType(valueType.getRawType),
             serializerFor(_, valueType),
             valueNullable = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index d580cf4d3391..f3c1e4150017 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -494,6 +494,7 @@ object ScalaReflection extends ScalaReflection {
           inputObject,
           dataTypeFor(keyType),
           serializerFor(_, keyType, keyPath, seenTypeSet),
+          keyNullable = !keyType.typeSymbol.asClass.isPrimitive,
           dataTypeFor(valueType),
           serializerFor(_, valueType, valuePath, seenTypeSet),
           valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 4b651836ff4d..d6d06aecc077 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -841,18 +841,21 @@ object ExternalMapToCatalyst {
       inputMap: Expression,
       keyType: DataType,
       keyConverter: Expression => Expression,
+      keyNullable: Boolean,
       valueType: DataType,
       valueConverter: Expression => Expression,
       valueNullable: Boolean): ExternalMapToCatalyst = {
     val id = curId.getAndIncrement()
     val keyName = "ExternalMapToCatalyst_key" + id
+    val keyIsNull = "ExternalMapToCatalyst_key_isNull" + id
     val valueName = "ExternalMapToCatalyst_value" + id
     val valueIsNull = "ExternalMapToCatalyst_value_isNull" + id
 
     ExternalMapToCatalyst(
       keyName,
+      keyIsNull,
       keyType,
-      keyConverter(LambdaVariable(keyName, "false", keyType, false)),
+      keyConverter(LambdaVariable(keyName, keyIsNull, keyType, keyNullable)),
       valueName,
       valueIsNull,
       valueType,
@@ -868,6 +871,8 @@ object ExternalMapToCatalyst {
  *
  * @param key the name of the map key variable that used when iterate the map, and used as input for
  *            the `keyConverter`
+ * @param keyIsNull the nullability of the map key variable that used when iterate the map, and
+ *                  used as input for the `keyConverter`
  * @param keyType the data type of the map key variable that used when iterate the map, and used as
  *                input for the `keyConverter`
  * @param keyConverter A function that take the `key` as input, and converts it to catalyst format.
@@ -883,6 +888,7 @@ object ExternalMapToCatalyst {
  */
 case class ExternalMapToCatalyst private(
     key: String,
+    keyIsNull: String,
     keyType: DataType,
     keyConverter: Expression,
     value: String,
@@ -913,6 +919,7 @@ case class ExternalMapToCatalyst private(
 
     val keyElementJavaType = ctx.javaType(keyType)
     val valueElementJavaType = ctx.javaType(valueType)
+    ctx.addMutableState("boolean", keyIsNull, "")
     ctx.addMutableState(keyElementJavaType, key, "")
     ctx.addMutableState("boolean", valueIsNull, "")
     ctx.addMutableState(valueElementJavaType, value, "")
@@ -950,6 +957,12 @@ case class ExternalMapToCatalyst private(
         defineEntries -> defineKeyValue
     }
 
+    val keyNullCheck = if (ctx.isPrimitiveType(keyType)) {
+      s"$keyIsNull = false;"
+    } else {
+      s"$keyIsNull = $key == null;"
+    }
+
     val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
       s"$valueIsNull = false;"
     } else {
@@ -972,6 +985,7 @@ case class ExternalMapToCatalyst private(
           $defineEntries
           while($entries.hasNext()) {
             $defineKeyValue
+            $keyNullCheck
             $valueNullCheck
 
             ${genKeyConverter.code}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 080f11b76938..bb1955a1ae24 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -355,12 +355,18 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
     checkNullable[String](true)
   }
 
-  test("null check for map key") {
+  test("null check for map key: String") {
     val encoder = ExpressionEncoder[Map[String, Int]]()
     val e = intercept[RuntimeException](encoder.toRow(Map(("a", 1), (null, 2))))
     assert(e.getMessage.contains("Cannot use null as map key"))
   }
 
+  test("null check for map key: Integer") {
+    val encoder = ExpressionEncoder[Map[Integer, String]]()
+    val e = intercept[RuntimeException](encoder.toRow(Map((1, "a"), (null, "b"))))
+    assert(e.getMessage.contains("Cannot use null as map key"))
+  }
+
   private def encodeDecodeTest[T : ExpressionEncoder](
       input: T,
       testName: String): Unit = {

From e9a93f8140c913b91781b35e0e1b051c30244882 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Tue, 4 Jul 2017 21:05:05 -0700
Subject: [PATCH 036/125] [SPARK-20889][SPARKR][FOLLOWUP] Clean up grouped doc
 for column methods

## What changes were proposed in this pull request?
Add doc for methods that were left out, and fix various style and consistency issues.

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #18493 from actuaryzhang/sparkRDocCleanup.
---
 R/pkg/R/functions.R | 100 ++++++++++++++++++++------------------------
 R/pkg/R/generics.R  |   7 ++--
 2 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 8c12308c1d7c..c529d83060f5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -38,10 +38,10 @@ NULL
 #'
 #' Date time functions defined for \code{Column}.
 #'
-#' @param x Column to compute on.
+#' @param x Column to compute on. In \code{window}, it must be a time Column of \code{TimestampType}.
 #' @param format For \code{to_date} and \code{to_timestamp}, it is the string to use to parse
-#'               x Column to DateType or TimestampType. For \code{trunc}, it is the string used
-#'               for specifying the truncation method. For example, "year", "yyyy", "yy" for
+#'               Column \code{x} to DateType or TimestampType. For \code{trunc}, it is the string
+#'               to use to specify the truncation method. For example, "year", "yyyy", "yy" for
 #'               truncate by year, or "month", "mon", "mm" for truncate by month.
 #' @param ... additional argument(s).
 #' @name column_datetime_functions
@@ -122,7 +122,7 @@ NULL
 #'           format to. See 'Details'.
 #'      }
 #' @param y Column to compute on.
-#' @param ... additional columns.
+#' @param ... additional Columns.
 #' @name column_string_functions
 #' @rdname column_string_functions
 #' @family string functions
@@ -167,8 +167,7 @@ NULL
 #' tmp <- mutate(df, v1 = crc32(df$model), v2 = hash(df$model),
 #'                   v3 = hash(df$model, df$mpg), v4 = md5(df$model),
 #'                   v5 = sha1(df$model), v6 = sha2(df$model, 256))
-#' head(tmp)
-#' }
+#' head(tmp)}
 NULL
 
 #' Collection functions for Column operations
@@ -190,7 +189,6 @@ NULL
 #' \dontrun{
 #' # Dataframe used throughout this doc
 #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
-#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
 #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
 #' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
@@ -394,7 +392,7 @@ setMethod("base64",
           })
 
 #' @details
-#' \code{bin}: An expression that returns the string representation of the binary value
+#' \code{bin}: Returns the string representation of the binary value
 #' of the given long column. For example, bin("12") returns "1100".
 #'
 #' @rdname column_math_functions
@@ -722,7 +720,7 @@ setMethod("dayofyear",
 #' \code{decode}: Computes the first argument into a string from a binary using the provided
 #' character set.
 #'
-#' @param charset Character set to use (one of "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE",
+#' @param charset character set to use (one of "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE",
 #'                "UTF-16LE", "UTF-16").
 #'
 #' @rdname column_string_functions
@@ -855,7 +853,7 @@ setMethod("hex",
           })
 
 #' @details
-#' \code{hour}: Extracts the hours as an integer from a given date/timestamp/string.
+#' \code{hour}: Extracts the hour as an integer from a given date/timestamp/string.
 #'
 #' @rdname column_datetime_functions
 #' @aliases hour hour,Column-method
@@ -1177,7 +1175,7 @@ setMethod("min",
           })
 
 #' @details
-#' \code{minute}: Extracts the minutes as an integer from a given date/timestamp/string.
+#' \code{minute}: Extracts the minute as an integer from a given date/timestamp/string.
 #'
 #' @rdname column_datetime_functions
 #' @aliases minute minute,Column-method
@@ -1354,7 +1352,7 @@ setMethod("sd",
           })
 
 #' @details
-#' \code{second}: Extracts the seconds as an integer from a given date/timestamp/string.
+#' \code{second}: Extracts the second as an integer from a given date/timestamp/string.
 #'
 #' @rdname column_datetime_functions
 #' @aliases second second,Column-method
@@ -1464,20 +1462,18 @@ setMethod("soundex",
             column(jc)
           })
 
-#' Return the partition ID as a column
-#'
-#' Return the partition ID as a SparkDataFrame column.
+#' @details
+#' \code{spark_partition_id}: Returns the partition ID as a SparkDataFrame column.
 #' Note that this is nondeterministic because it depends on data partitioning and
 #' task scheduling.
+#' This is equivalent to the \code{SPARK_PARTITION_ID} function in SQL.
 #'
-#' This is equivalent to the SPARK_PARTITION_ID function in SQL.
-#'
-#' @rdname spark_partition_id
-#' @name spark_partition_id
-#' @aliases spark_partition_id,missing-method
+#' @rdname column_nonaggregate_functions
+#' @aliases spark_partition_id spark_partition_id,missing-method
 #' @export
 #' @examples
-#' \dontrun{select(df, spark_partition_id())}
+#'
+#' \dontrun{head(select(df, spark_partition_id()))}
 #' @note spark_partition_id since 2.0.0
 setMethod("spark_partition_id",
           signature("missing"),
@@ -2028,7 +2024,7 @@ setMethod("pmod", signature(y = "Column"),
             column(jc)
           })
 
-#' @param rsd maximum estimation error allowed (default = 0.05)
+#' @param rsd maximum estimation error allowed (default = 0.05).
 #'
 #' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
@@ -2220,8 +2216,8 @@ setMethod("from_json", signature(x = "Column", schema = "structType"),
 #' @examples
 #'
 #' \dontrun{
-#' tmp <- mutate(df, from_utc = from_utc_timestamp(df$time, 'PST'),
-#'                  to_utc = to_utc_timestamp(df$time, 'PST'))
+#' tmp <- mutate(df, from_utc = from_utc_timestamp(df$time, "PST"),
+#'                  to_utc = to_utc_timestamp(df$time, "PST"))
 #' head(tmp)}
 #' @note from_utc_timestamp since 1.5.0
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
@@ -2255,7 +2251,7 @@ setMethod("instr", signature(y = "Column", x = "character"),
 #' @details
 #' \code{next_day}: Given a date column, returns the first date which is later than the value of
 #' the date column that is on the specified day of the week. For example,
-#' \code{next_day('2015-07-27', "Sunday")} returns 2015-08-02 because that is the first Sunday
+#' \code{next_day("2015-07-27", "Sunday")} returns 2015-08-02 because that is the first Sunday
 #' after 2015-07-27. Day of the week parameter is case insensitive, and accepts first three or
 #' two characters: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
 #'
@@ -2295,7 +2291,7 @@ setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
 #' tmp <- mutate(df, t1 = add_months(df$time, 1),
 #'                   t2 = date_add(df$time, 2),
 #'                   t3 = date_sub(df$time, 3),
-#'                   t4 = next_day(df$time, 'Sun'))
+#'                   t4 = next_day(df$time, "Sun"))
 #' head(tmp)}
 #' @note add_months since 1.5.0
 setMethod("add_months", signature(y = "Column", x = "numeric"),
@@ -2404,8 +2400,8 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
           })
 
 #' @details
-#' \code{shiftRight}: (Unigned) shifts the given value numBits right. If the given value is a long value,
-#' it will return a long value else it will return an integer value.
+#' \code{shiftRightUnsigned}: (Unigned) shifts the given value numBits right. If the given value is
+#' a long value, it will return a long value else it will return an integer value.
 #'
 #' @rdname column_math_functions
 #' @aliases shiftRightUnsigned shiftRightUnsigned,Column,numeric-method
@@ -2513,14 +2509,13 @@ setMethod("from_unixtime", signature(x = "Column"),
             column(jc)
           })
 
-#' window
-#'
-#' Bucketize rows into one or more time windows given a timestamp specifying column. Window
-#' starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+#' @details
+#' \code{window}: Bucketizes rows into one or more time windows given a timestamp specifying column.
+#' Window starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
 #' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
-#' the order of months are not supported.
+#' the order of months are not supported. It returns an output column of struct called 'window'
+#' by default with the nested columns 'start' and 'end'
 #'
-#' @param x a time Column. Must be of TimestampType.
 #' @param windowDuration a string specifying the width of the window, e.g. '1 second',
 #'                       '1 day 12 hours', '2 minutes'. Valid interval strings are 'week',
 #'                       'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'. Note that
@@ -2536,27 +2531,22 @@ setMethod("from_unixtime", signature(x = "Column"),
 #'                  window intervals. For example, in order to have hourly tumbling windows
 #'                  that start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
 #'                  \code{startTime} as \code{"15 minutes"}.
-#' @param ... further arguments to be passed to or from other methods.
-#' @return An output column of struct called 'window' by default with the nested columns 'start'
-#'         and 'end'.
-#' @family date time functions
-#' @rdname window
-#' @name window
-#' @aliases window,Column-method
+#' @rdname column_datetime_functions
+#' @aliases window window,Column-method
 #' @export
 #' @examples
-#'\dontrun{
-#'   # One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
-#'   # 09:00:25-09:01:25, 09:00:40-09:01:40, ...
-#'   window(df$time, "1 minute", "15 seconds", "10 seconds")
 #'
-#'   # One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
-#'    # 09:01:15-09:02:15...
-#'   window(df$time, "1 minute", startTime = "15 seconds")
+#' \dontrun{
+#' # One minute windows every 15 seconds 10 seconds after the minute, e.g. 09:00:10-09:01:10,
+#' # 09:00:25-09:01:25, 09:00:40-09:01:40, ...
+#' window(df$time, "1 minute", "15 seconds", "10 seconds")
 #'
-#'   # Thirty-second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
-#'   window(df$time, "30 seconds", "10 seconds")
-#'}
+#' # One minute tumbling windows 15 seconds after the minute, e.g. 09:00:15-09:01:15,
+#' # 09:01:15-09:02:15...
+#' window(df$time, "1 minute", startTime = "15 seconds")
+#'
+#' # Thirty-second windows every 10 seconds, e.g. 09:00:00-09:00:30, 09:00:10-09:00:40, ...
+#' window(df$time, "30 seconds", "10 seconds")}
 #' @note window since 2.0.0
 setMethod("window", signature(x = "Column"),
           function(x, windowDuration, slideDuration = NULL, startTime = NULL) {
@@ -3046,7 +3036,7 @@ setMethod("row_number",
 #' \code{array_contains}: Returns null if the array is null, true if the array contains
 #' the value, and false otherwise.
 #'
-#' @param value A value to be checked if contained in the column
+#' @param value a value to be checked if contained in the column
 #' @rdname column_collection_functions
 #' @aliases array_contains array_contains,Column-method
 #' @export
@@ -3091,7 +3081,7 @@ setMethod("size",
 #' to the natural ordering of the array elements.
 #'
 #' @rdname column_collection_functions
-#' @param asc A logical flag indicating the sorting order.
+#' @param asc a logical flag indicating the sorting order.
 #'            TRUE, sorting is in ascending order.
 #'            FALSE, sorting is in descending order.
 #' @aliases sort_array sort_array,Column-method
@@ -3218,7 +3208,7 @@ setMethod("split_string",
 #' \code{repeat_string}: Repeats string n times.
 #' Equivalent to \code{repeat} SQL function.
 #'
-#' @param n Number of repetitions
+#' @param n number of repetitions.
 #' @rdname column_string_functions
 #' @aliases repeat_string repeat_string,Column-method
 #' @export
@@ -3347,7 +3337,7 @@ setMethod("grouping_bit",
 #' \code{grouping_id}: Returns the level of grouping.
 #' Equals to \code{
 #' grouping_bit(c1) * 2^(n - 1) + grouping_bit(c2) * 2^(n - 2)  + ... + grouping_bit(cn)
-#' }
+#' }.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases grouping_id grouping_id,Column-method
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index beac18e41273..92098741f72f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1418,9 +1418,9 @@ setGeneric("split_string", function(x, pattern) { standardGeneric("split_string"
 #' @name NULL
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
-#' @param x empty. Should be used with no argument.
-#' @rdname spark_partition_id
+#' @rdname column_nonaggregate_functions
 #' @export
+#' @name NULL
 setGeneric("spark_partition_id", function(x = "missing") { standardGeneric("spark_partition_id") })
 
 #' @rdname column_aggregate_functions
@@ -1538,8 +1538,9 @@ setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 #' @name NULL
 setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
 
-#' @rdname window
+#' @rdname column_datetime_functions
 #' @export
+#' @name NULL
 setGeneric("window", function(x, ...) { standardGeneric("window") })
 
 #' @rdname column_datetime_functions

From f2c3b1dd69423cf52880e0ffa5f673ad6041b40e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 5 Jul 2017 14:17:26 +0800
Subject: [PATCH 037/125] [SPARK-21304][SQL] remove unnecessary isNull variable
 for collection related encoder expressions

## What changes were proposed in this pull request?

For these collection-related encoder expressions, we don't need to create `isNull` variable if the loop element is not nullable.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18529 from cloud-fan/minor.
---
 .../spark/sql/catalyst/ScalaReflection.scala  |  2 +-
 .../expressions/objects/objects.scala         | 77 ++++++++++++-------
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index f3c1e4150017..bea0de4d90c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -335,7 +335,7 @@ object ScalaReflection extends ScalaReflection {
         // TODO: add walked type path for map
         val TypeRef(_, _, Seq(keyType, valueType)) = t
 
-        CollectObjectsToMap(
+        CatalystToExternalMap(
           p => deserializerFor(keyType, Some(p), walkedTypePath),
           p => deserializerFor(valueType, Some(p), walkedTypePath),
           getPath,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index d6d06aecc077..ce07f4a25c18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -465,7 +465,11 @@ object MapObjects {
       customCollectionCls: Option[Class[_]] = None): MapObjects = {
     val id = curId.getAndIncrement()
     val loopValue = s"MapObjects_loopValue$id"
-    val loopIsNull = s"MapObjects_loopIsNull$id"
+    val loopIsNull = if (elementNullable) {
+      s"MapObjects_loopIsNull$id"
+    } else {
+      "false"
+    }
     val loopVar = LambdaVariable(loopValue, loopIsNull, elementType, elementNullable)
     MapObjects(
       loopValue, loopIsNull, elementType, function(loopVar), inputData, customCollectionCls)
@@ -517,7 +521,6 @@ case class MapObjects private(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val elementJavaType = ctx.javaType(loopVarDataType)
-    ctx.addMutableState("boolean", loopIsNull, "")
     ctx.addMutableState(elementJavaType, loopValue, "")
     val genInputData = inputData.genCode(ctx)
     val genFunction = lambdaFunction.genCode(ctx)
@@ -588,12 +591,14 @@ case class MapObjects private(
       case _ => genFunction.value
     }
 
-    val loopNullCheck = inputDataType match {
-      case _: ArrayType => s"$loopIsNull = ${genInputData.value}.isNullAt($loopIndex);"
-      // The element of primitive array will never be null.
-      case ObjectType(cls) if cls.isArray && cls.getComponentType.isPrimitive =>
-        s"$loopIsNull = false"
-      case _ => s"$loopIsNull = $loopValue == null;"
+    val loopNullCheck = if (loopIsNull != "false") {
+      ctx.addMutableState("boolean", loopIsNull, "")
+      inputDataType match {
+        case _: ArrayType => s"$loopIsNull = ${genInputData.value}.isNullAt($loopIndex);"
+        case _ => s"$loopIsNull = $loopValue == null;"
+      }
+    } else {
+      ""
     }
 
     val (initCollection, addElement, getResult): (String, String => String, String) =
@@ -667,11 +672,11 @@ case class MapObjects private(
   }
 }
 
-object CollectObjectsToMap {
+object CatalystToExternalMap {
   private val curId = new java.util.concurrent.atomic.AtomicInteger()
 
   /**
-   * Construct an instance of CollectObjectsToMap case class.
+   * Construct an instance of CatalystToExternalMap case class.
    *
    * @param keyFunction The function applied on the key collection elements.
    * @param valueFunction The function applied on the value collection elements.
@@ -682,15 +687,19 @@ object CollectObjectsToMap {
       keyFunction: Expression => Expression,
       valueFunction: Expression => Expression,
       inputData: Expression,
-      collClass: Class[_]): CollectObjectsToMap = {
+      collClass: Class[_]): CatalystToExternalMap = {
     val id = curId.getAndIncrement()
-    val keyLoopValue = s"CollectObjectsToMap_keyLoopValue$id"
+    val keyLoopValue = s"CatalystToExternalMap_keyLoopValue$id"
     val mapType = inputData.dataType.asInstanceOf[MapType]
     val keyLoopVar = LambdaVariable(keyLoopValue, "", mapType.keyType, nullable = false)
-    val valueLoopValue = s"CollectObjectsToMap_valueLoopValue$id"
-    val valueLoopIsNull = s"CollectObjectsToMap_valueLoopIsNull$id"
+    val valueLoopValue = s"CatalystToExternalMap_valueLoopValue$id"
+    val valueLoopIsNull = if (mapType.valueContainsNull) {
+      s"CatalystToExternalMap_valueLoopIsNull$id"
+    } else {
+      "false"
+    }
     val valueLoopVar = LambdaVariable(valueLoopValue, valueLoopIsNull, mapType.valueType)
-    CollectObjectsToMap(
+    CatalystToExternalMap(
       keyLoopValue, keyFunction(keyLoopVar),
       valueLoopValue, valueLoopIsNull, valueFunction(valueLoopVar),
       inputData, collClass)
@@ -716,7 +725,7 @@ object CollectObjectsToMap {
  * @param inputData An expression that when evaluated returns a map object.
  * @param collClass The type of the resulting collection.
  */
-case class CollectObjectsToMap private(
+case class CatalystToExternalMap private(
     keyLoopValue: String,
     keyLambdaFunction: Expression,
     valueLoopValue: String,
@@ -748,7 +757,6 @@ case class CollectObjectsToMap private(
     ctx.addMutableState(keyElementJavaType, keyLoopValue, "")
     val genKeyFunction = keyLambdaFunction.genCode(ctx)
     val valueElementJavaType = ctx.javaType(mapType.valueType)
-    ctx.addMutableState("boolean", valueLoopIsNull, "")
     ctx.addMutableState(valueElementJavaType, valueLoopValue, "")
     val genValueFunction = valueLambdaFunction.genCode(ctx)
     val genInputData = inputData.genCode(ctx)
@@ -781,7 +789,12 @@ case class CollectObjectsToMap private(
     val genKeyFunctionValue = genFunctionValue(keyLambdaFunction, genKeyFunction)
     val genValueFunctionValue = genFunctionValue(valueLambdaFunction, genValueFunction)
 
-    val valueLoopNullCheck = s"$valueLoopIsNull = $valueArray.isNullAt($loopIndex);"
+    val valueLoopNullCheck = if (valueLoopIsNull != "false") {
+      ctx.addMutableState("boolean", valueLoopIsNull, "")
+      s"$valueLoopIsNull = $valueArray.isNullAt($loopIndex);"
+    } else {
+      ""
+    }
 
     val builderClass = classOf[Builder[_, _]].getName
     val constructBuilder = s"""
@@ -847,9 +860,17 @@ object ExternalMapToCatalyst {
       valueNullable: Boolean): ExternalMapToCatalyst = {
     val id = curId.getAndIncrement()
     val keyName = "ExternalMapToCatalyst_key" + id
-    val keyIsNull = "ExternalMapToCatalyst_key_isNull" + id
+    val keyIsNull = if (keyNullable) {
+      "ExternalMapToCatalyst_key_isNull" + id
+    } else {
+      "false"
+    }
     val valueName = "ExternalMapToCatalyst_value" + id
-    val valueIsNull = "ExternalMapToCatalyst_value_isNull" + id
+    val valueIsNull = if (valueNullable) {
+      "ExternalMapToCatalyst_value_isNull" + id
+    } else {
+      "false"
+    }
 
     ExternalMapToCatalyst(
       keyName,
@@ -919,9 +940,7 @@ case class ExternalMapToCatalyst private(
 
     val keyElementJavaType = ctx.javaType(keyType)
     val valueElementJavaType = ctx.javaType(valueType)
-    ctx.addMutableState("boolean", keyIsNull, "")
     ctx.addMutableState(keyElementJavaType, key, "")
-    ctx.addMutableState("boolean", valueIsNull, "")
     ctx.addMutableState(valueElementJavaType, value, "")
 
     val (defineEntries, defineKeyValue) = child.dataType match {
@@ -957,16 +976,18 @@ case class ExternalMapToCatalyst private(
         defineEntries -> defineKeyValue
     }
 
-    val keyNullCheck = if (ctx.isPrimitiveType(keyType)) {
-      s"$keyIsNull = false;"
-    } else {
+    val keyNullCheck = if (keyIsNull != "false") {
+      ctx.addMutableState("boolean", keyIsNull, "")
       s"$keyIsNull = $key == null;"
+    } else {
+      ""
     }
 
-    val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
-      s"$valueIsNull = false;"
-    } else {
+    val valueNullCheck = if (valueIsNull != "false") {
+      ctx.addMutableState("boolean", valueIsNull, "")
       s"$valueIsNull = $value == null;"
+    } else {
+      ""
     }
 
     val arrayCls = classOf[GenericArrayData].getName

From a38643256691947ff7f7c474b85c052a7d5d8553 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Wed, 5 Jul 2017 14:25:26 +0800
Subject: [PATCH 038/125] [SPARK-18623][SQL] Add `returnNullable` to
 `StaticInvoke` and modify it to handle properly.

## What changes were proposed in this pull request?

Add `returnNullable` to `StaticInvoke` the same as #15780 is trying to add to `Invoke` and modify to handle properly.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>
Author: Takuya UESHIN <ueshin@databricks.com>

Closes #16056 from ueshin/issues/SPARK-18623.
---
 .../sql/catalyst/JavaTypeInference.scala      | 21 +++++----
 .../spark/sql/catalyst/ScalaReflection.scala  | 44 ++++++++++++-------
 .../sql/catalyst/encoders/RowEncoder.scala    | 27 ++++++++----
 .../expressions/objects/objects.scala         | 42 ++++++++++++++----
 4 files changed, 91 insertions(+), 43 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 90ec699877de..21363d3ba82c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -216,7 +216,7 @@ object JavaTypeInference {
           ObjectType(c),
           "valueOf",
           getPath :: Nil,
-          propagateNull = true)
+          returnNullable = false)
 
       case c if c == classOf[java.sql.Date] =>
         StaticInvoke(
@@ -224,7 +224,7 @@ object JavaTypeInference {
           ObjectType(c),
           "toJavaDate",
           getPath :: Nil,
-          propagateNull = true)
+          returnNullable = false)
 
       case c if c == classOf[java.sql.Timestamp] =>
         StaticInvoke(
@@ -232,7 +232,7 @@ object JavaTypeInference {
           ObjectType(c),
           "toJavaTimestamp",
           getPath :: Nil,
-          propagateNull = true)
+          returnNullable = false)
 
       case c if c == classOf[java.lang.String] =>
         Invoke(getPath, "toString", ObjectType(classOf[String]))
@@ -300,7 +300,8 @@ object JavaTypeInference {
           ArrayBasedMapData.getClass,
           ObjectType(classOf[JMap[_, _]]),
           "toJavaMap",
-          keyData :: valueData :: Nil)
+          keyData :: valueData :: Nil,
+          returnNullable = false)
 
       case other =>
         val properties = getJavaBeanReadableAndWritableProperties(other)
@@ -367,28 +368,32 @@ object JavaTypeInference {
             classOf[UTF8String],
             StringType,
             "fromString",
-            inputObject :: Nil)
+            inputObject :: Nil,
+            returnNullable = false)
 
         case c if c == classOf[java.sql.Timestamp] =>
           StaticInvoke(
             DateTimeUtils.getClass,
             TimestampType,
             "fromJavaTimestamp",
-            inputObject :: Nil)
+            inputObject :: Nil,
+            returnNullable = false)
 
         case c if c == classOf[java.sql.Date] =>
           StaticInvoke(
             DateTimeUtils.getClass,
             DateType,
             "fromJavaDate",
-            inputObject :: Nil)
+            inputObject :: Nil,
+            returnNullable = false)
 
         case c if c == classOf[java.math.BigDecimal] =>
           StaticInvoke(
             Decimal.getClass,
             DecimalType.SYSTEM_DEFAULT,
             "apply",
-            inputObject :: Nil)
+            inputObject :: Nil,
+            returnNullable = false)
 
         case c if c == classOf[java.lang.Boolean] =>
           Invoke(inputObject, "booleanValue", BooleanType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index bea0de4d90c2..814f2c10b909 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -206,51 +206,53 @@ object ScalaReflection extends ScalaReflection {
       case t if t <:< localTypeOf[java.lang.Integer] =>
         val boxedType = classOf[java.lang.Integer]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Long] =>
         val boxedType = classOf[java.lang.Long]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Double] =>
         val boxedType = classOf[java.lang.Double]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Float] =>
         val boxedType = classOf[java.lang.Float]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Short] =>
         val boxedType = classOf[java.lang.Short]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Byte] =>
         val boxedType = classOf[java.lang.Byte]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Boolean] =>
         val boxedType = classOf[java.lang.Boolean]
         val objectType = ObjectType(boxedType)
-        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, propagateNull = true)
+        StaticInvoke(boxedType, objectType, "valueOf", getPath :: Nil, returnNullable = false)
 
       case t if t <:< localTypeOf[java.sql.Date] =>
         StaticInvoke(
           DateTimeUtils.getClass,
           ObjectType(classOf[java.sql.Date]),
           "toJavaDate",
-          getPath :: Nil)
+          getPath :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
         StaticInvoke(
           DateTimeUtils.getClass,
           ObjectType(classOf[java.sql.Timestamp]),
           "toJavaTimestamp",
-          getPath :: Nil)
+          getPath :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.String] =>
         Invoke(getPath, "toString", ObjectType(classOf[String]), returnNullable = false)
@@ -446,7 +448,8 @@ object ScalaReflection extends ScalaReflection {
               classOf[UnsafeArrayData],
               ArrayType(dt, false),
               "fromPrimitiveArray",
-              input :: Nil)
+              input :: Nil,
+              returnNullable = false)
           } else {
             NewInstance(
               classOf[GenericArrayData],
@@ -504,49 +507,56 @@ object ScalaReflection extends ScalaReflection {
           classOf[UTF8String],
           StringType,
           "fromString",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.sql.Timestamp] =>
         StaticInvoke(
           DateTimeUtils.getClass,
           TimestampType,
           "fromJavaTimestamp",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.sql.Date] =>
         StaticInvoke(
           DateTimeUtils.getClass,
           DateType,
           "fromJavaDate",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[BigDecimal] =>
         StaticInvoke(
           Decimal.getClass,
           DecimalType.SYSTEM_DEFAULT,
           "apply",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.math.BigDecimal] =>
         StaticInvoke(
           Decimal.getClass,
           DecimalType.SYSTEM_DEFAULT,
           "apply",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.math.BigInteger] =>
         StaticInvoke(
           Decimal.getClass,
           DecimalType.BigIntDecimal,
           "apply",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[scala.math.BigInt] =>
         StaticInvoke(
           Decimal.getClass,
           DecimalType.BigIntDecimal,
           "apply",
-          inputObject :: Nil)
+          inputObject :: Nil,
+          returnNullable = false)
 
       case t if t <:< localTypeOf[java.lang.Integer] =>
         Invoke(inputObject, "intValue", IntegerType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 0f8282d3b2f1..cc32fac67e92 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -96,28 +96,32 @@ object RowEncoder {
         DateTimeUtils.getClass,
         TimestampType,
         "fromJavaTimestamp",
-        inputObject :: Nil)
+        inputObject :: Nil,
+        returnNullable = false)
 
     case DateType =>
       StaticInvoke(
         DateTimeUtils.getClass,
         DateType,
         "fromJavaDate",
-        inputObject :: Nil)
+        inputObject :: Nil,
+        returnNullable = false)
 
     case d: DecimalType =>
       StaticInvoke(
         Decimal.getClass,
         d,
         "fromDecimal",
-        inputObject :: Nil)
+        inputObject :: Nil,
+        returnNullable = false)
 
     case StringType =>
       StaticInvoke(
         classOf[UTF8String],
         StringType,
         "fromString",
-        inputObject :: Nil)
+        inputObject :: Nil,
+        returnNullable = false)
 
     case t @ ArrayType(et, cn) =>
       et match {
@@ -126,7 +130,8 @@ object RowEncoder {
             classOf[ArrayData],
             t,
             "toArrayData",
-            inputObject :: Nil)
+            inputObject :: Nil,
+            returnNullable = false)
         case _ => MapObjects(
           element => serializerFor(ValidateExternalType(element, et), et),
           inputObject,
@@ -254,14 +259,16 @@ object RowEncoder {
         DateTimeUtils.getClass,
         ObjectType(classOf[java.sql.Timestamp]),
         "toJavaTimestamp",
-        input :: Nil)
+        input :: Nil,
+        returnNullable = false)
 
     case DateType =>
       StaticInvoke(
         DateTimeUtils.getClass,
         ObjectType(classOf[java.sql.Date]),
         "toJavaDate",
-        input :: Nil)
+        input :: Nil,
+        returnNullable = false)
 
     case _: DecimalType =>
       Invoke(input, "toJavaBigDecimal", ObjectType(classOf[java.math.BigDecimal]),
@@ -280,7 +287,8 @@ object RowEncoder {
         scala.collection.mutable.WrappedArray.getClass,
         ObjectType(classOf[Seq[_]]),
         "make",
-        arrayData :: Nil)
+        arrayData :: Nil,
+        returnNullable = false)
 
     case MapType(kt, vt, valueNullable) =>
       val keyArrayType = ArrayType(kt, false)
@@ -293,7 +301,8 @@ object RowEncoder {
         ArrayBasedMapData.getClass,
         ObjectType(classOf[Map[_, _]]),
         "toScalaMap",
-        keyData :: valueData :: Nil)
+        keyData :: valueData :: Nil,
+        returnNullable = false)
 
     case schema @ StructType(fields) =>
       val convertedFields = fields.zipWithIndex.map { case (f, i) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index ce07f4a25c18..24c06d8b14b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -118,17 +118,20 @@ trait InvokeLike extends Expression with NonSQLExpression {
  * @param arguments An optional list of expressions to pass as arguments to the function.
  * @param propagateNull When true, and any of the arguments is null, null will be returned instead
  *                      of calling the function.
+ * @param returnNullable When false, indicating the invoked method will always return
+ *                       non-null value.
  */
 case class StaticInvoke(
     staticObject: Class[_],
     dataType: DataType,
     functionName: String,
     arguments: Seq[Expression] = Nil,
-    propagateNull: Boolean = true) extends InvokeLike {
+    propagateNull: Boolean = true,
+    returnNullable: Boolean = true) extends InvokeLike {
 
   val objectName = staticObject.getName.stripSuffix("$")
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = needNullCheck || returnNullable
   override def children: Seq[Expression] = arguments
 
   override def eval(input: InternalRow): Any =
@@ -141,19 +144,40 @@ case class StaticInvoke(
 
     val callFunc = s"$objectName.$functionName($argString)"
 
-    // If the function can return null, we do an extra check to make sure our null bit is still set
-    // correctly.
-    val postNullCheck = if (ctx.defaultValue(dataType) == "null") {
-      s"${ev.isNull} = ${ev.value} == null;"
+    val prepareIsNull = if (nullable) {
+      s"boolean ${ev.isNull} = $resultIsNull;"
     } else {
+      ev.isNull = "false"
       ""
     }
 
+    val evaluate = if (returnNullable) {
+      if (ctx.defaultValue(dataType) == "null") {
+        s"""
+          ${ev.value} = $callFunc;
+          ${ev.isNull} = ${ev.value} == null;
+        """
+      } else {
+        val boxedResult = ctx.freshName("boxedResult")
+        s"""
+          ${ctx.boxedType(dataType)} $boxedResult = $callFunc;
+          ${ev.isNull} = $boxedResult == null;
+          if (!${ev.isNull}) {
+            ${ev.value} = $boxedResult;
+          }
+        """
+      }
+    } else {
+      s"${ev.value} = $callFunc;"
+    }
+
     val code = s"""
       $argCode
-      boolean ${ev.isNull} = $resultIsNull;
-      final $javaType ${ev.value} = $resultIsNull ? ${ctx.defaultValue(dataType)} : $callFunc;
-      $postNullCheck
+      $prepareIsNull
+      $javaType ${ev.value} = ${ctx.defaultValue(dataType)};
+      if (!$resultIsNull) {
+        $evaluate
+      }
      """
     ev.copy(code = code)
   }

From 4852b7d447e872079c2c81428354adc825a87b27 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Wed, 5 Jul 2017 18:41:00 +0800
Subject: [PATCH 039/125] [SPARK-21310][ML][PYSPARK] Expose offset in PySpark

## What changes were proposed in this pull request?
Add offset to PySpark in GLM as in #16699.

## How was this patch tested?
Python test

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #18534 from actuaryzhang/pythonOffset.
---
 python/pyspark/ml/regression.py | 25 +++++++++++++++++++++----
 python/pyspark/ml/tests.py      | 14 ++++++++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 84d843369e10..f0ff7a5f59ab 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1376,17 +1376,20 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
                       typeConverter=TypeConverters.toFloat)
     solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
                    "options: irls.", typeConverter=TypeConverters.toString)
+    offsetCol = Param(Params._dummy(), "offsetCol", "The offset column name. If this is not set " +
+                      "or empty, we treat all instance offsets as 0.0",
+                      typeConverter=TypeConverters.toString)
 
     @keyword_only
     def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
-                 variancePower=0.0, linkPower=None):
+                 variancePower=0.0, linkPower=None, offsetCol=None):
         """
         __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
-                 variancePower=0.0, linkPower=None)
+                 variancePower=0.0, linkPower=None, offsetCol=None)
         """
         super(GeneralizedLinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1402,12 +1405,12 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred
     def setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction",
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
-                  variancePower=0.0, linkPower=None):
+                  variancePower=0.0, linkPower=None, offsetCol=None):
         """
         setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
-                  variancePower=0.0, linkPower=None)
+                  variancePower=0.0, linkPower=None, offsetCol=None)
         Sets params for generalized linear regression.
         """
         kwargs = self._input_kwargs
@@ -1486,6 +1489,20 @@ def getLinkPower(self):
         """
         return self.getOrDefault(self.linkPower)
 
+    @since("2.3.0")
+    def setOffsetCol(self, value):
+        """
+        Sets the value of :py:attr:`offsetCol`.
+        """
+        return self._set(offsetCol=value)
+
+    @since("2.3.0")
+    def getOffsetCol(self):
+        """
+        Gets the value of offsetCol or its default value.
+        """
+        return self.getOrDefault(self.offsetCol)
+
 
 class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
                                        JavaMLReadable):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index ffb8b0a890ff..787004765160 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1291,6 +1291,20 @@ def test_tweedie_distribution(self):
         self.assertTrue(np.allclose(model2.coefficients.toArray(), [-0.6667, 0.5], atol=1E-4))
         self.assertTrue(np.isclose(model2.intercept, 0.6667, atol=1E-4))
 
+    def test_offset(self):
+
+        df = self.spark.createDataFrame(
+            [(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
+             (0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
+             (0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
+             (0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))], ["label", "weight", "offset", "features"])
+
+        glr = GeneralizedLinearRegression(family="poisson", weightCol="weight", offsetCol="offset")
+        model = glr.fit(df)
+        self.assertTrue(np.allclose(model.coefficients.toArray(), [0.664647, -0.3192581],
+                                    atol=1E-4))
+        self.assertTrue(np.isclose(model.intercept, -1.561613, atol=1E-4))
+
 
 class FPGrowthTests(SparkSessionTestCase):
     def setUp(self):

From 873f3ad2b89c955f42fced49dc129e8efa77d044 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Wed, 5 Jul 2017 20:32:47 +0800
Subject: [PATCH 040/125] [SPARK-16167][SQL] RowEncoder should preserve
 array/map type nullability.

## What changes were proposed in this pull request?

Currently `RowEncoder` doesn't preserve nullability of `ArrayType` or `MapType`.
It returns always `containsNull = true` for `ArrayType`, `valueContainsNull = true` for `MapType` and also the nullability of itself is always `true`.

This pr fixes the nullability of them.
## How was this patch tested?

Add tests to check if `RowEncoder` preserves array/map nullability.

Author: Takuya UESHIN <ueshin@happy-camper.st>
Author: Takuya UESHIN <ueshin@databricks.com>

Closes #13873 from ueshin/issues/SPARK-16167.
---
 .../sql/catalyst/encoders/RowEncoder.scala    | 25 +++++++++++---
 .../catalyst/encoders/RowEncoderSuite.scala   | 33 +++++++++++++++++++
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index cc32fac67e92..43c35bbdf383 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -123,7 +123,7 @@ object RowEncoder {
         inputObject :: Nil,
         returnNullable = false)
 
-    case t @ ArrayType(et, cn) =>
+    case t @ ArrayType(et, containsNull) =>
       et match {
         case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
           StaticInvoke(
@@ -132,8 +132,16 @@ object RowEncoder {
             "toArrayData",
             inputObject :: Nil,
             returnNullable = false)
+
         case _ => MapObjects(
-          element => serializerFor(ValidateExternalType(element, et), et),
+          element => {
+            val value = serializerFor(ValidateExternalType(element, et), et)
+            if (!containsNull) {
+              AssertNotNull(value, Seq.empty)
+            } else {
+              value
+            }
+          },
           inputObject,
           ObjectType(classOf[Object]))
       }
@@ -155,10 +163,19 @@ object RowEncoder {
           ObjectType(classOf[scala.collection.Seq[_]]), returnNullable = false)
       val convertedValues = serializerFor(values, ArrayType(vt, valueNullable))
 
-      NewInstance(
+      val nonNullOutput = NewInstance(
         classOf[ArrayBasedMapData],
         convertedKeys :: convertedValues :: Nil,
-        dataType = t)
+        dataType = t,
+        propagateNull = false)
+
+      if (inputObject.nullable) {
+        If(IsNull(inputObject),
+          Literal.create(null, inputType),
+          nonNullOutput)
+      } else {
+        nonNullOutput
+      }
 
     case StructType(fields) =>
       val nonNullOutput = CreateNamedStruct(fields.zipWithIndex.flatMap { case (field, index) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 1a5569a77dc7..6ed175f86ca7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -273,6 +273,39 @@ class RowEncoderSuite extends SparkFunSuite {
     assert(e4.getMessage.contains("java.lang.String is not a valid external type"))
   }
 
+  for {
+    elementType <- Seq(IntegerType, StringType)
+    containsNull <- Seq(true, false)
+    nullable <- Seq(true, false)
+  } {
+    test("RowEncoder should preserve array nullability: " +
+      s"ArrayType($elementType, containsNull = $containsNull), nullable = $nullable") {
+      val schema = new StructType().add("array", ArrayType(elementType, containsNull), nullable)
+      val encoder = RowEncoder(schema).resolveAndBind()
+      assert(encoder.serializer.length == 1)
+      assert(encoder.serializer.head.dataType == ArrayType(elementType, containsNull))
+      assert(encoder.serializer.head.nullable == nullable)
+    }
+  }
+
+  for {
+    keyType <- Seq(IntegerType, StringType)
+    valueType <- Seq(IntegerType, StringType)
+    valueContainsNull <- Seq(true, false)
+    nullable <- Seq(true, false)
+  } {
+    test("RowEncoder should preserve map nullability: " +
+      s"MapType($keyType, $valueType, valueContainsNull = $valueContainsNull), " +
+      s"nullable = $nullable") {
+      val schema = new StructType().add(
+        "map", MapType(keyType, valueType, valueContainsNull), nullable)
+      val encoder = RowEncoder(schema).resolveAndBind()
+      assert(encoder.serializer.length == 1)
+      assert(encoder.serializer.head.dataType == MapType(keyType, valueType, valueContainsNull))
+      assert(encoder.serializer.head.nullable == nullable)
+    }
+  }
+
   private def encodeDecodeTest(schema: StructType): Unit = {
     test(s"encode/decode: ${schema.simpleString}") {
       val encoder = RowEncoder(schema).resolveAndBind()

From 5787ace463b2abde50d2ca24e8dd111e3a7c158e Mon Sep 17 00:00:00 2001
From: ouyangxiaochen <ou.yangxiaochen@zte.com.cn>
Date: Wed, 5 Jul 2017 20:46:42 +0800
Subject: [PATCH 041/125] [SPARK-20383][SQL] Supporting Create [temporary]
 Function with the keyword 'OR REPLACE' and 'IF NOT EXISTS'

## What changes were proposed in this pull request?

support to create [temporary] function with the keyword 'OR REPLACE' and 'IF NOT EXISTS'

## How was this patch tested?
manual test and added test cases

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: ouyangxiaochen <ou.yangxiaochen@zte.com.cn>

Closes #17681 from ouyangxiaochen/spark-419.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  3 +-
 .../catalyst/catalog/ExternalCatalog.scala    |  9 ++++
 .../catalyst/catalog/InMemoryCatalog.scala    |  6 +++
 .../sql/catalyst/catalog/SessionCatalog.scala | 23 ++++++++
 .../spark/sql/catalyst/catalog/events.scala   | 10 ++++
 .../catalog/ExternalCatalogEventSuite.scala   |  9 ++++
 .../catalog/ExternalCatalogSuite.scala        |  9 ++++
 .../spark/sql/execution/SparkSqlParser.scala  |  8 +--
 .../sql/execution/command/functions.scala     | 46 +++++++++++-----
 .../execution/command/DDLCommandSuite.scala   | 52 ++++++++++++++++++-
 .../sql/execution/command/DDLSuite.scala      | 51 ++++++++++++++++++
 .../spark/sql/hive/HiveExternalCatalog.scala  |  9 ++++
 12 files changed, 216 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 29f554451ed4..ef9f88a9026c 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -126,7 +126,8 @@ statement
         tableIdentifier ('(' colTypeList ')')? tableProvider
         (OPTIONS tablePropertyList)?                                   #createTempViewUsing
     | ALTER VIEW tableIdentifier AS? query                             #alterViewQuery
-    | CREATE TEMPORARY? FUNCTION qualifiedName AS className=STRING
+    | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
+        qualifiedName AS className=STRING
         (USING resource (',' resource)*)?                              #createFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName              #dropFunction
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 0254b6bb6d13..6000d483db20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -332,6 +332,15 @@ abstract class ExternalCatalog
 
   protected def doDropFunction(db: String, funcName: String): Unit
 
+  final def alterFunction(db: String, funcDefinition: CatalogFunction): Unit = {
+    val name = funcDefinition.identifier.funcName
+    postToAll(AlterFunctionPreEvent(db, name))
+    doAlterFunction(db, funcDefinition)
+    postToAll(AlterFunctionEvent(db, name))
+  }
+
+  protected def doAlterFunction(db: String, funcDefinition: CatalogFunction): Unit
+
   final def renameFunction(db: String, oldName: String, newName: String): Unit = {
     postToAll(RenameFunctionPreEvent(db, oldName, newName))
     doRenameFunction(db, oldName, newName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 747190faa3c8..d253c72a6273 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -590,6 +590,12 @@ class InMemoryCatalog(
     catalog(db).functions.remove(funcName)
   }
 
+  override protected def doAlterFunction(db: String, func: CatalogFunction): Unit = synchronized {
+    requireDbExists(db)
+    requireFunctionExists(db, func.identifier.funcName)
+    catalog(db).functions.put(func.identifier.funcName, func)
+  }
+
   override protected def doRenameFunction(
       db: String,
       oldName: String,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a86604e4353a..c40d5f6031a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1055,6 +1055,29 @@ class SessionCatalog(
     }
   }
 
+  /**
+   * overwirte a metastore function in the database specified in `funcDefinition`..
+   * If no database is specified, assume the function is in the current database.
+   */
+  def alterFunction(funcDefinition: CatalogFunction): Unit = {
+    val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
+    requireDbExists(db)
+    val identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db))
+    val newFuncDefinition = funcDefinition.copy(identifier = identifier)
+    if (functionExists(identifier)) {
+      if (functionRegistry.functionExists(identifier)) {
+        // If we have loaded this function into the FunctionRegistry,
+        // also drop it from there.
+        // For a permanent function, because we loaded it to the FunctionRegistry
+        // when it's first used, we also need to drop it from the FunctionRegistry.
+        functionRegistry.dropFunction(identifier)
+      }
+      externalCatalog.alterFunction(db, newFuncDefinition)
+    } else {
+      throw new NoSuchFunctionException(db = db, func = identifier.toString)
+    }
+  }
+
   /**
    * Retrieve the metadata of a metastore function.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
index 459973a13bb1..742a51e64038 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
@@ -139,6 +139,16 @@ case class DropFunctionPreEvent(database: String, name: String) extends Function
  */
 case class DropFunctionEvent(database: String, name: String) extends FunctionEvent
 
+/**
+ * Event fired before a function is altered.
+ */
+case class AlterFunctionPreEvent(database: String, name: String) extends FunctionEvent
+
+/**
+ * Event fired after a function has been altered.
+ */
+case class AlterFunctionEvent(database: String, name: String) extends FunctionEvent
+
 /**
  * Event fired before a function is renamed.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
index 2539ea615ff9..087c26f23f38 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
@@ -176,6 +176,15 @@ class ExternalCatalogEventSuite extends SparkFunSuite {
     }
     checkEvents(RenameFunctionPreEvent("db5", "fn7", "fn4") :: Nil)
 
+    // ALTER
+    val alteredFunctionDefinition = CatalogFunction(
+      identifier = FunctionIdentifier("fn4", Some("db5")),
+      className = "org.apache.spark.AlterFunction",
+      resources = Seq.empty)
+    catalog.alterFunction("db5", alteredFunctionDefinition)
+    checkEvents(
+      AlterFunctionPreEvent("db5", "fn4") :: AlterFunctionEvent("db5", "fn4") :: Nil)
+
     // DROP
     intercept[AnalysisException] {
       catalog.dropFunction("db5", "fn7")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index c22d55fc96a6..66e895a4690c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -752,6 +752,14 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     }
   }
 
+  test("alter function") {
+    val catalog = newBasicCatalog()
+    assert(catalog.getFunction("db2", "func1").className == funcClass)
+    val myNewFunc = catalog.getFunction("db2", "func1").copy(className = newFuncClass)
+    catalog.alterFunction("db2", myNewFunc)
+    assert(catalog.getFunction("db2", "func1").className == newFuncClass)
+  }
+
   test("list functions") {
     val catalog = newBasicCatalog()
     catalog.createFunction("db2", newFunc("func2"))
@@ -916,6 +924,7 @@ abstract class CatalogTestUtils {
   lazy val partWithEmptyValue =
     CatalogTablePartition(Map("a" -> "3", "b" -> ""), storageFormat)
   lazy val funcClass = "org.apache.spark.myFunc"
+  lazy val newFuncClass = "org.apache.spark.myNewFunc"
 
   /**
    * Creates a basic catalog, with the following structure:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2b79eb5eac0f..2f8e416e7df1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -687,8 +687,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    *
    * For example:
    * {{{
-   *   CREATE [TEMPORARY] FUNCTION [db_name.]function_name AS class_name
-   *    [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri']];
+   *   CREATE [OR REPLACE] [TEMPORARY] FUNCTION [IF NOT EXISTS] [db_name.]function_name
+   *   AS class_name [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri']];
    * }}}
    */
   override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = withOrigin(ctx) {
@@ -709,7 +709,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       functionIdentifier.funcName,
       string(ctx.className),
       resources,
-      ctx.TEMPORARY != null)
+      ctx.TEMPORARY != null,
+      ctx.EXISTS != null,
+      ctx.REPLACE != null)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index a91ad413f4d1..4f92ffee687a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -31,13 +31,13 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
  * The DDL command that creates a function.
  * To create a temporary function, the syntax of using this command in SQL is:
  * {{{
- *    CREATE TEMPORARY FUNCTION functionName
+ *    CREATE [OR REPLACE] TEMPORARY FUNCTION functionName
  *    AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
  * }}}
  *
  * To create a permanent function, the syntax in SQL is:
  * {{{
- *    CREATE FUNCTION [databaseName.]functionName
+ *    CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [databaseName.]functionName
  *    AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
  * }}}
  */
@@ -46,26 +46,46 @@ case class CreateFunctionCommand(
     functionName: String,
     className: String,
     resources: Seq[FunctionResource],
-    isTemp: Boolean)
+    isTemp: Boolean,
+    ifNotExists: Boolean,
+    replace: Boolean)
   extends RunnableCommand {
 
+  if (ifNotExists && replace) {
+    throw new AnalysisException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE" +
+      " is not allowed.")
+  }
+
+  // Disallow to define a temporary function with `IF NOT EXISTS`
+  if (ifNotExists && isTemp) {
+    throw new AnalysisException(
+      "It is not allowed to define a TEMPORARY function with IF NOT EXISTS.")
+  }
+
+  // Temporary function names should not contain database prefix like "database.function"
+  if (databaseName.isDefined && isTemp) {
+    throw new AnalysisException(s"Specifying a database in CREATE TEMPORARY FUNCTION " +
+      s"is not allowed: '${databaseName.get}'")
+  }
+
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources)
     if (isTemp) {
-      if (databaseName.isDefined) {
-        throw new AnalysisException(s"Specifying a database in CREATE TEMPORARY FUNCTION " +
-          s"is not allowed: '${databaseName.get}'")
-      }
       // We first load resources and then put the builder in the function registry.
       catalog.loadFunctionResources(resources)
-      catalog.registerFunction(func, overrideIfExists = false)
+      catalog.registerFunction(func, overrideIfExists = replace)
     } else {
-      // For a permanent, we will store the metadata into underlying external catalog.
-      // This function will be loaded into the FunctionRegistry when a query uses it.
-      // We do not load it into FunctionRegistry right now.
-      // TODO: should we also parse "IF NOT EXISTS"?
-      catalog.createFunction(func, ignoreIfExists = false)
+      // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
+      if (replace && catalog.functionExists(func.identifier)) {
+        // alter the function in the metastore
+        catalog.alterFunction(CatalogFunction(func.identifier, className, resources))
+      } else {
+        // For a permanent, we will store the metadata into underlying external catalog.
+        // This function will be loaded into the FunctionRegistry when a query uses it.
+        // We do not load it into FunctionRegistry right now.
+        catalog.createFunction(CatalogFunction(func.identifier, className, resources), ifNotExists)
+      }
     }
     Seq.empty[Row]
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 8a6bc62fec96..5643c58d9f84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -181,8 +181,29 @@ class DDLCommandSuite extends PlanTest {
         |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
         |FILE '/path/to/file'
       """.stripMargin
+    val sql3 =
+      """
+        |CREATE OR REPLACE TEMPORARY FUNCTION helloworld3 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'
+      """.stripMargin
+    val sql4 =
+      """
+        |CREATE OR REPLACE FUNCTION hello.world1 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
+        |FILE '/path/to/file'
+      """.stripMargin
+    val sql5 =
+      """
+        |CREATE FUNCTION IF NOT EXISTS hello.world2 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
+        |FILE '/path/to/file'
+      """.stripMargin
     val parsed1 = parser.parsePlan(sql1)
     val parsed2 = parser.parsePlan(sql2)
+    val parsed3 = parser.parsePlan(sql3)
+    val parsed4 = parser.parsePlan(sql4)
+    val parsed5 = parser.parsePlan(sql5)
     val expected1 = CreateFunctionCommand(
       None,
       "helloworld",
@@ -190,7 +211,7 @@ class DDLCommandSuite extends PlanTest {
       Seq(
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
         FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
-      isTemp = true)
+      isTemp = true, ifNotExists = false, replace = false)
     val expected2 = CreateFunctionCommand(
       Some("hello"),
       "world",
@@ -198,9 +219,36 @@ class DDLCommandSuite extends PlanTest {
       Seq(
         FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
         FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false)
+      isTemp = false, ifNotExists = false, replace = false)
+    val expected3 = CreateFunctionCommand(
+      None,
+      "helloworld3",
+      "com.matthewrathbone.example.SimpleUDFExample",
+      Seq(
+        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
+        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
+      isTemp = true, ifNotExists = false, replace = true)
+    val expected4 = CreateFunctionCommand(
+      Some("hello"),
+      "world1",
+      "com.matthewrathbone.example.SimpleUDFExample",
+      Seq(
+        FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
+        FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
+      isTemp = false, ifNotExists = false, replace = true)
+    val expected5 = CreateFunctionCommand(
+      Some("hello"),
+      "world2",
+      "com.matthewrathbone.example.SimpleUDFExample",
+      Seq(
+        FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
+        FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
+      isTemp = false, ifNotExists = true, replace = false)
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
+    comparePlans(parsed3, expected3)
+    comparePlans(parsed4, expected4)
+    comparePlans(parsed5, expected5)
   }
 
   test("drop function") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e4dd077715d0..5c40d8bb4b1e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2270,6 +2270,57 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("create temporary function with if not exists") {
+    withUserDefinedFunction("func1" -> true) {
+      val sql1 =
+        """
+          |CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+        """.stripMargin
+      val e = intercept[AnalysisException] {
+        sql(sql1)
+      }.getMessage
+      assert(e.contains("It is not allowed to define a TEMPORARY function with IF NOT EXISTS"))
+    }
+  }
+
+  test("create function with both if not exists and replace") {
+    withUserDefinedFunction("func1" -> false) {
+      val sql1 =
+        """
+          |CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
+          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+          |JAR '/path/to/jar2'
+        """.stripMargin
+      val e = intercept[AnalysisException] {
+        sql(sql1)
+      }.getMessage
+      assert(e.contains("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed"))
+    }
+  }
+
+  test("create temporary function by specifying a database") {
+    val dbName = "mydb"
+    withDatabase(dbName) {
+      sql(s"CREATE DATABASE $dbName")
+      sql(s"USE $dbName")
+      withUserDefinedFunction("func1" -> true) {
+        val sql1 =
+          s"""
+             |CREATE TEMPORARY FUNCTION $dbName.func1 as
+             |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+             |JAR '/path/to/jar2'
+            """.stripMargin
+        val e = intercept[AnalysisException] {
+          sql(sql1)
+        }.getMessage
+        assert(e.contains(s"Specifying a database in CREATE TEMPORARY FUNCTION " +
+          s"is not allowed: '$dbName'"))
+      }
+    }
+  }
+
   Seq(true, false).foreach { caseSensitive =>
     test(s"alter table add columns with existing column name - caseSensitive $caseSensitive") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 2a17849fa8a3..306b38048e3a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1132,6 +1132,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     client.dropFunction(db, name)
   }
 
+  override protected def doAlterFunction(
+      db: String, funcDefinition: CatalogFunction): Unit = withClient {
+    requireDbExists(db)
+    val functionName = funcDefinition.identifier.funcName.toLowerCase(Locale.ROOT)
+    requireFunctionExists(db, functionName)
+    val functionIdentifier = funcDefinition.identifier.copy(funcName = functionName)
+    client.alterFunction(db, funcDefinition.copy(identifier = functionIdentifier))
+  }
+
   override protected def doRenameFunction(
       db: String,
       oldName: String,

From e3e2b5da3671a6c6d152b4de481a8aa3e57a6e42 Mon Sep 17 00:00:00 2001
From: "he.qiao" <he.qiao17@zte.com.cn>
Date: Wed, 5 Jul 2017 21:13:25 +0800
Subject: [PATCH 042/125] [SPARK-21286][TEST] Modified StorageTabSuite unit
 test

## What changes were proposed in this pull request?
The old unit test not effect

## How was this patch tested?
unit test

Author: he.qiao <he.qiao17@zte.com.cn>

Closes #18511 from Geek-He/dev_0703.
---
 .../scala/org/apache/spark/ui/storage/StorageTabSuite.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
index 66dda382eb65..1cb52593e706 100644
--- a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala
@@ -74,7 +74,7 @@ class StorageTabSuite extends SparkFunSuite with BeforeAndAfter {
     // Submitting RDDInfos with duplicate IDs does nothing
     val rddInfo0Cached = new RDDInfo(0, "freedom", 100, StorageLevel.MEMORY_ONLY, Seq(10))
     rddInfo0Cached.numCachedPartitions = 1
-    val stageInfo0Cached = new StageInfo(0, 0, "0", 100, Seq(rddInfo0), Seq.empty, "details")
+    val stageInfo0Cached = new StageInfo(0, 0, "0", 100, Seq(rddInfo0Cached), Seq.empty, "details")
     bus.postToAll(SparkListenerStageSubmitted(stageInfo0Cached))
     assert(storageListener._rddInfoMap.size === 4)
     assert(storageListener.rddInfoList.size === 2)

From 960298ee66b9b8a80f84df679ce5b4b3846267f4 Mon Sep 17 00:00:00 2001
From: sadikovi <ivan.sadikov@lincolnuni.ac.nz>
Date: Wed, 5 Jul 2017 14:40:44 +0100
Subject: [PATCH 043/125] [SPARK-20858][DOC][MINOR] Document ListenerBus event
 queue size

## What changes were proposed in this pull request?

This change adds a new configuration option `spark.scheduler.listenerbus.eventqueue.size` to the configuration docs to specify the capacity of the spark listener bus event queue. Default value is 10000.

This is doc PR for [SPARK-15703](https://issues.apache.org/jira/browse/SPARK-15703).

I added option to the `Scheduling` section, however it might be more related to `Spark UI` section.

## How was this patch tested?

Manually verified correct rendering of configuration option.

Author: sadikovi <ivan.sadikov@lincolnuni.ac.nz>
Author: Ivan Sadikov <ivan.sadikov@team.telstra.com>

Closes #18476 from sadikovi/SPARK-20858.
---
 docs/configuration.md | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index bd6a1f9e240e..c785a664c67b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -725,7 +725,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.retainedJobs</code></td>
   <td>1000</td>
   <td>
-    How many jobs the Spark UI and status APIs remember before garbage collecting. 
+    How many jobs the Spark UI and status APIs remember before garbage collecting.
     This is a target maximum, and fewer elements may be retained in some circumstances.
   </td>
 </tr>
@@ -733,7 +733,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.retainedStages</code></td>
   <td>1000</td>
   <td>
-    How many stages the Spark UI and status APIs remember before garbage collecting. 
+    How many stages the Spark UI and status APIs remember before garbage collecting.
     This is a target maximum, and fewer elements may be retained in some circumstances.
   </td>
 </tr>
@@ -741,7 +741,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.retainedTasks</code></td>
   <td>100000</td>
   <td>
-    How many tasks the Spark UI and status APIs remember before garbage collecting. 
+    How many tasks the Spark UI and status APIs remember before garbage collecting.
     This is a target maximum, and fewer elements may be retained in some circumstances.
   </td>
 </tr>
@@ -1389,6 +1389,15 @@ Apart from these, the following properties are also available, and may be useful
     The interval length for the scheduler to revive the worker resource offers to run tasks.
   </td>
 </tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>10000</td>
+  <td>
+    Capacity for event queue in Spark listener bus, must be greater than 0. Consider increasing
+    value (e.g. 20000) if listener events are dropped. Increasing this value may result in the
+    driver using more memory.
+  </td>
+</tr>
 <tr>
   <td><code>spark.blacklist.enabled</code></td>
   <td>
@@ -1475,8 +1484,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.blacklist.application.fetchFailure.enabled</code></td>
   <td>false</td>
   <td>
-    (Experimental) If set to "true", Spark will blacklist the executor immediately when a fetch 
-    failure happenes. If external shuffle service is enabled, then the whole node will be 
+    (Experimental) If set to "true", Spark will blacklist the executor immediately when a fetch
+    failure happenes. If external shuffle service is enabled, then the whole node will be
     blacklisted.
   </td>
 </tr>

From 742da0868534dab3d4d7b7edbe5ba9dc8bf26cc8 Mon Sep 17 00:00:00 2001
From: Jeff Zhang <zjffdu@apache.org>
Date: Wed, 5 Jul 2017 10:59:10 -0700
Subject: [PATCH 044/125] [SPARK-19439][PYSPARK][SQL] PySpark's
 registerJavaFunction Should Support UDAFs

## What changes were proposed in this pull request?

Support register Java UDAFs in PySpark so that user can use Java UDAF in PySpark. Besides that I also add api in `UDFRegistration`

## How was this patch tested?

Unit test is added

Author: Jeff Zhang <zjffdu@apache.org>

Closes #17222 from zjffdu/SPARK-19439.
---
 python/pyspark/sql/context.py                 | 23 ++++++++
 python/pyspark/sql/tests.py                   | 10 ++++
 .../apache/spark/sql/UDFRegistration.scala    | 33 +++++++++--
 .../org/apache/spark/sql/JavaUDAFSuite.java   | 55 +++++++++++++++++++
 .../org/apache/spark/sql}/MyDoubleAvg.java    |  2 +-
 .../org/apache/spark/sql}/MyDoubleSum.java    |  8 +--
 sql/hive/pom.xml                              |  7 +++
 .../spark/sql/hive/JavaDataFrameSuite.java    |  2 +-
 .../execution/AggregationQuerySuite.scala     |  5 +-
 9 files changed, 132 insertions(+), 13 deletions(-)
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
 rename sql/{hive/src/test/java/org/apache/spark/sql/hive/aggregate => core/src/test/java/test/org/apache/spark/sql}/MyDoubleAvg.java (99%)
 rename sql/{hive/src/test/java/org/apache/spark/sql/hive/aggregate => core/src/test/java/test/org/apache/spark/sql}/MyDoubleSum.java (98%)

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 426f07cd9410..c44ab247fd3d 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -232,6 +232,23 @@ def registerJavaFunction(self, name, javaClassName, returnType=None):
             jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json())
         self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt)
 
+    @ignore_unicode_prefix
+    @since(2.3)
+    def registerJavaUDAF(self, name, javaClassName):
+        """Register a java UDAF so it can be used in SQL statements.
+
+        :param name:  name of the UDAF
+        :param javaClassName: fully qualified name of java class
+
+        >>> sqlContext.registerJavaUDAF("javaUDAF",
+        ...   "test.org.apache.spark.sql.MyDoubleAvg")
+        >>> df = sqlContext.createDataFrame([(1, "a"),(2, "b"), (3, "a")],["id", "name"])
+        >>> df.registerTempTable("df")
+        >>> sqlContext.sql("SELECT name, javaUDAF(id) as avg from df group by name").collect()
+        [Row(name=u'b', avg=102.0), Row(name=u'a', avg=102.0)]
+        """
+        self.sparkSession._jsparkSession.udf().registerJavaUDAF(name, javaClassName)
+
     # TODO(andrew): delete this once we refactor things to take in SparkSession
     def _inferSchema(self, rdd, samplingRatio=None):
         """
@@ -551,6 +568,12 @@ def __init__(self, sqlContext):
     def register(self, name, f, returnType=StringType()):
         return self.sqlContext.registerFunction(name, f, returnType)
 
+    def registerJavaFunction(self, name, javaClassName, returnType=None):
+        self.sqlContext.registerJavaFunction(name, javaClassName, returnType)
+
+    def registerJavaUDAF(self, name, javaClassName):
+        self.sqlContext.registerJavaUDAF(name, javaClassName)
+
     register.__doc__ = SQLContext.registerFunction.__doc__
 
 
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 16ba8bd73f40..c0e3b8d13239 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -481,6 +481,16 @@ def test_udf_registration_returns_udf(self):
             df.select(add_three("id").alias("plus_three")).collect()
         )
 
+    def test_non_existed_udf(self):
+        spark = self.spark
+        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf",
+                                lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf"))
+
+    def test_non_existed_udaf(self):
+        spark = self.spark
+        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udaf",
+                                lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf"))
+
     def test_multiLine_json(self):
         people1 = self.spark.read.json("python/test_support/sql/people.json")
         people_array = self.spark.read.json("python/test_support/sql/people_array.json",
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index ad01b889429c..8bdc0221888d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql
 
-import java.io.IOException
 import java.lang.reflect.{ParameterizedType, Type}
 
 import scala.reflect.runtime.universe.TypeTag
@@ -456,9 +455,9 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         .map(_.asInstanceOf[ParameterizedType])
         .filter(e => e.getRawType.isInstanceOf[Class[_]] && e.getRawType.asInstanceOf[Class[_]].getCanonicalName.startsWith("org.apache.spark.sql.api.java.UDF"))
       if (udfInterfaces.length == 0) {
-        throw new IOException(s"UDF class ${className} doesn't implement any UDF interface")
+        throw new AnalysisException(s"UDF class ${className} doesn't implement any UDF interface")
       } else if (udfInterfaces.length > 1) {
-        throw new IOException(s"It is invalid to implement multiple UDF interfaces, UDF class ${className}")
+        throw new AnalysisException(s"It is invalid to implement multiple UDF interfaces, UDF class ${className}")
       } else {
         try {
           val udf = clazz.newInstance()
@@ -491,19 +490,41 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
             case 21 => register(name, udf.asInstanceOf[UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]], returnType)
             case 22 => register(name, udf.asInstanceOf[UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]], returnType)
             case 23 => register(name, udf.asInstanceOf[UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]], returnType)
-            case n => logError(s"UDF class with ${n} type arguments is not supported ")
+            case n =>
+              throw new AnalysisException(s"UDF class with ${n} type arguments is not supported.")
           }
         } catch {
           case e @ (_: InstantiationException | _: IllegalArgumentException) =>
-            logError(s"Can not instantiate class ${className}, please make sure it has public non argument constructor")
+            throw new AnalysisException(s"Can not instantiate class ${className}, please make sure it has public non argument constructor")
         }
       }
     } catch {
-      case e: ClassNotFoundException => logError(s"Can not load class ${className}, please make sure it is on the classpath")
+      case e: ClassNotFoundException => throw new AnalysisException(s"Can not load class ${className}, please make sure it is on the classpath")
     }
 
   }
 
+  /**
+   * Register a Java UDAF class using reflection, for use from pyspark
+   *
+   * @param name     UDAF name
+   * @param className    fully qualified class name of UDAF
+   */
+  private[sql] def registerJavaUDAF(name: String, className: String): Unit = {
+    try {
+      val clazz = Utils.classForName(className)
+      if (!classOf[UserDefinedAggregateFunction].isAssignableFrom(clazz)) {
+        throw new AnalysisException(s"class $className doesn't implement interface UserDefinedAggregateFunction")
+      }
+      val udaf = clazz.newInstance().asInstanceOf[UserDefinedAggregateFunction]
+      register(name, udaf)
+    } catch {
+      case e: ClassNotFoundException => throw new AnalysisException(s"Can not load class ${className}, please make sure it is on the classpath")
+      case e @ (_: InstantiationException | _: IllegalArgumentException) =>
+        throw new AnalysisException(s"Can not instantiate class ${className}, please make sure it has public non argument constructor")
+    }
+  }
+
   /**
    * Register a user-defined function with 1 arguments.
    * @since 1.3.0
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
new file mode 100644
index 000000000000..ddbaa45a483c
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDAFSuite.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class JavaUDAFSuite {
+
+  private transient SparkSession spark;
+
+  @Before
+  public void setUp() {
+    spark = SparkSession.builder()
+        .master("local[*]")
+        .appName("testing")
+        .getOrCreate();
+  }
+
+  @After
+  public void tearDown() {
+    spark.stop();
+    spark = null;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf1Test() {
+    spark.range(1, 10).toDF("value").registerTempTable("df");
+    spark.udf().registerJavaUDAF("myDoubleAvg", MyDoubleAvg.class.getName());
+    Row result = spark.sql("SELECT myDoubleAvg(value) as my_avg from df").head();
+    Assert.assertEquals(105.0, result.getDouble(0), 1.0e-6);
+  }
+
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleAvg.java b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
similarity index 99%
rename from sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleAvg.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
index ae0c097c362a..447a71d284fb 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleAvg.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleAvg.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.aggregate;
+package test.org.apache.spark.sql;
 
 import java.util.ArrayList;
 import java.util.List;
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleSum.java b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleSum.java
similarity index 98%
rename from sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleSum.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleSum.java
index d17fb3e5194f..93d20330c717 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/aggregate/MyDoubleSum.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/MyDoubleSum.java
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.aggregate;
+package test.org.apache.spark.sql;
 
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.expressions.MutableAggregationBuffer;
 import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 
 /**
  * An example {@link UserDefinedAggregateFunction} to calculate the sum of a
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 09dcc4055e00..f9462e79a69f 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -57,6 +57,13 @@
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
index aefc9cc77da8..636ce10da373 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
@@ -31,7 +31,7 @@
 import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
 import static org.apache.spark.sql.functions.*;
 import org.apache.spark.sql.hive.test.TestHive$;
-import org.apache.spark.sql.hive.aggregate.MyDoubleSum;
+import test.org.apache.spark.sql.MyDoubleSum;
 
 public class JavaDataFrameSuite {
   private transient SQLContext hc;
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 84f915977bd8..f245a79f805a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -20,16 +20,19 @@ package org.apache.spark.sql.hive.execution
 import scala.collection.JavaConverters._
 import scala.util.Random
 
+import test.org.apache.spark.sql.MyDoubleAvg
+import test.org.apache.spark.sql.MyDoubleSum
+
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.aggregate.{MyDoubleAvg, MyDoubleSum}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
+
 class ScalaAggregateFunction(schema: StructType) extends UserDefinedAggregateFunction {
 
   def inputSchema: StructType = schema

From c8e7f445b98fce0b419b26f43dd3a75bf7c7375b Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 5 Jul 2017 11:06:15 -0700
Subject: [PATCH 045/125] [SPARK-21307][SQL] Remove SQLConf parameters from the
 parser-related classes.

### What changes were proposed in this pull request?
This PR is to remove SQLConf parameters from the parser-related classes.

### How was this patch tested?
The existing test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18531 from gatorsmile/rmSQLConfParser.
---
 .../sql/catalyst/catalog/SessionCatalog.scala |   2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   6 +-
 .../sql/catalyst/parser/ParseDriver.scala     |   8 +-
 .../parser/ExpressionParserSuite.scala        | 167 +++++++++---------
 .../spark/sql/execution/SparkSqlParser.scala  |  11 +-
 .../org/apache/spark/sql/functions.scala      |   3 +-
 .../internal/BaseSessionStateBuilder.scala    |   2 +-
 .../sql/internal/VariableSubstitution.scala   |   4 +-
 .../sql/execution/SparkSqlParserSuite.scala   |  10 +-
 .../execution/command/DDLCommandSuite.scala   |   4 +-
 .../internal/VariableSubstitutionSuite.scala  |  31 ++--
 11 files changed, 121 insertions(+), 127 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c40d5f6031a2..336d3d65d0dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -74,7 +74,7 @@ class SessionCatalog(
       functionRegistry,
       conf,
       new Configuration(),
-      new CatalystSqlParser(conf),
+      CatalystSqlParser,
       DummyFunctionResourceLoader)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 8eac3ef2d356..b6a4686bb9ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -45,11 +45,9 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   import ParserUtils._
 
-  def this() = this(new SQLConf())
-
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
@@ -1457,7 +1455,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Special characters can be escaped by using Hive/C-style escaping.
    */
   private def createString(ctx: StringLiteralContext): String = {
-    if (conf.escapedStringLiterals) {
+    if (SQLConf.get.escapedStringLiterals) {
       ctx.STRING().asScala.map(stringWithoutUnescape).mkString
     } else {
       ctx.STRING().asScala.map(string).mkString
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 09598ffe770c..7e1fcfefc64a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -122,13 +121,8 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
 /**
  * Concrete SQL parser for Catalyst-only SQL statements.
  */
-class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser {
-  val astBuilder = new AstBuilder(conf)
-}
-
-/** For test-only. */
 object CatalystSqlParser extends AbstractSqlParser {
-  val astBuilder = new AstBuilder(new SQLConf())
+  val astBuilder = new AstBuilder
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 45f9f72dccc4..ac7325257a15 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -167,12 +167,12 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("like expressions with ESCAPED_STRING_LITERALS = true") {
-    val conf = new SQLConf()
-    conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true")
-    val parser = new CatalystSqlParser(conf)
-    assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
-    assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
-    assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+    val parser = CatalystSqlParser
+    withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
+      assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
+      assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
+      assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+    }
   }
 
   test("is null expressions") {
@@ -435,86 +435,85 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("strings") {
+    val parser = CatalystSqlParser
     Seq(true, false).foreach { escape =>
-      val conf = new SQLConf()
-      conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString)
-      val parser = new CatalystSqlParser(conf)
-
-      // tests that have same result whatever the conf is
-      // Single Strings.
-      assertEqual("\"hello\"", "hello", parser)
-      assertEqual("'hello'", "hello", parser)
-
-      // Multi-Strings.
-      assertEqual("\"hello\" 'world'", "helloworld", parser)
-      assertEqual("'hello' \" \" 'world'", "hello world", parser)
-
-      // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
-      // regular '%'; to get the correct result you need to add another escaped '\'.
-      // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
-      assertEqual("'pattern%'", "pattern%", parser)
-      assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
-
-      // tests that have different result regarding the conf
-      if (escape) {
-        // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
-        // Spark 1.6 behavior.
-
-        // 'LIKE' string literals.
-        assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
-        assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
-
-        // Escaped characters.
-        // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
-        // when ESCAPED_STRING_LITERALS is enabled.
-        // It is parsed literally.
-        assertEqual("'\\0'", "\\0", parser)
-
-        // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
-        val e = intercept[ParseException](parser.parseExpression("'\''"))
-        assert(e.message.contains("extraneous input '''"))
-
-        // The unescape special characters (e.g., "\\t") for 2.0+ don't work
-        // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
-        assertEqual("'\\\"'", "\\\"", parser)   // Double quote
-        assertEqual("'\\b'", "\\b", parser)     // Backspace
-        assertEqual("'\\n'", "\\n", parser)     // Newline
-        assertEqual("'\\r'", "\\r", parser)     // Carriage return
-        assertEqual("'\\t'", "\\t", parser)     // Tab character
-
-        // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
-        // They are parsed literally.
-        assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
-        // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
-        // They are parsed literally.
-        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
-          "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
-      } else {
-        // Default behavior
-
-        // 'LIKE' string literals.
-        assertEqual("'pattern\\\\%'", "pattern\\%", parser)
-        assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
-
-        // Escaped characters.
-        // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
-        assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
-        assertEqual("'\\''", "\'", parser)     // Single quote
-        assertEqual("'\\\"'", "\"", parser)    // Double quote
-        assertEqual("'\\b'", "\b", parser)     // Backspace
-        assertEqual("'\\n'", "\n", parser)     // Newline
-        assertEqual("'\\r'", "\r", parser)     // Carriage return
-        assertEqual("'\\t'", "\t", parser)     // Tab character
-        assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
-
-        // Octals
-        assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
-
-        // Unicode
-        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
-          parser)
+      withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> escape.toString) {
+        // tests that have same result whatever the conf is
+        // Single Strings.
+        assertEqual("\"hello\"", "hello", parser)
+        assertEqual("'hello'", "hello", parser)
+
+        // Multi-Strings.
+        assertEqual("\"hello\" 'world'", "helloworld", parser)
+        assertEqual("'hello' \" \" 'world'", "hello world", parser)
+
+        // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
+        // regular '%'; to get the correct result you need to add another escaped '\'.
+        // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
+        assertEqual("'pattern%'", "pattern%", parser)
+        assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
+
+        // tests that have different result regarding the conf
+        if (escape) {
+          // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
+          // Spark 1.6 behavior.
+
+          // 'LIKE' string literals.
+          assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
+          assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
+
+          // Escaped characters.
+          // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
+          // when ESCAPED_STRING_LITERALS is enabled.
+          // It is parsed literally.
+          assertEqual("'\\0'", "\\0", parser)
+
+          // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is
+          // enabled.
+          val e = intercept[ParseException](parser.parseExpression("'\''"))
+          assert(e.message.contains("extraneous input '''"))
+
+          // The unescape special characters (e.g., "\\t") for 2.0+ don't work
+          // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
+          assertEqual("'\\\"'", "\\\"", parser)   // Double quote
+          assertEqual("'\\b'", "\\b", parser)     // Backspace
+          assertEqual("'\\n'", "\\n", parser)     // Newline
+          assertEqual("'\\r'", "\\r", parser)     // Carriage return
+          assertEqual("'\\t'", "\\t", parser)     // Tab character
+
+          // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
+          // They are parsed literally.
+          assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
+          // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
+          // They are parsed literally.
+          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
+            "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
+        } else {
+          // Default behavior
+
+          // 'LIKE' string literals.
+          assertEqual("'pattern\\\\%'", "pattern\\%", parser)
+          assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
+
+          // Escaped characters.
+          // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
+          assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
+          assertEqual("'\\''", "\'", parser)     // Single quote
+          assertEqual("'\\\"'", "\"", parser)    // Double quote
+          assertEqual("'\\b'", "\b", parser)     // Backspace
+          assertEqual("'\\n'", "\n", parser)     // Newline
+          assertEqual("'\\r'", "\r", parser)     // Carriage return
+          assertEqual("'\\t'", "\t", parser)     // Tab character
+          assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
+
+          // Octals
+          assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
+
+          // Unicode
+          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
+            parser)
+        }
       }
-
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2f8e416e7df1..618d027d8dc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -39,10 +39,11 @@ import org.apache.spark.sql.types.StructType
 /**
  * Concrete parser for Spark SQL statements.
  */
-class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser {
-  val astBuilder = new SparkSqlAstBuilder(conf)
+class SparkSqlParser extends AbstractSqlParser {
 
-  private val substitutor = new VariableSubstitution(conf)
+  val astBuilder = new SparkSqlAstBuilder
+
+  private val substitutor = new VariableSubstitution
 
   protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     super.parse(substitutor.substitute(command))(toResult)
@@ -52,9 +53,11 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser {
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
+class SparkSqlAstBuilder extends AstBuilder {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
+  private def conf: SQLConf = SQLConf.get
+
   /**
    * Create a [[SetCommand]] logical plan.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 839cbf42024e..3c67960d13e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, ResolvedHint}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -1276,7 +1275,7 @@ object functions {
    */
   def expr(expr: String): Column = {
     val parser = SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
-      new SparkSqlParser(new SQLConf)
+      new SparkSqlParser
     }
     Column(parser.parseExpression(expr))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 2532b2ddb72d..9d0148117fad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -114,7 +114,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` field.
    */
   protected lazy val sqlParser: ParserInterface = {
-    extensions.buildParser(session, new SparkSqlParser(conf))
+    extensions.buildParser(session, new SparkSqlParser)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 4e7c813be992..2b9c574aaaf0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -25,7 +25,9 @@ import org.apache.spark.internal.config._
  *
  * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
-class VariableSubstitution(conf: SQLConf) {
+class VariableSubstitution {
+
+  private def conf = SQLConf.get
 
   private val provider = new ConfigProvider {
     override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index d238c76fbeef..2e29fa43f73d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -37,8 +37,7 @@ import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType
  */
 class SparkSqlParserSuite extends AnalysisTest {
 
-  val newConf = new SQLConf
-  private lazy val parser = new SparkSqlParser(newConf)
+  private lazy val parser = new SparkSqlParser
 
   /**
    * Normalizes plans:
@@ -285,6 +284,7 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   test("query organization") {
+    val conf = SQLConf.get
     // Test all valid combinations of order by/sort by/distribute by/cluster by/limit/windows
     val baseSql = "select * from t"
     val basePlan =
@@ -293,20 +293,20 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual(s"$baseSql distribute by a, b",
       RepartitionByExpression(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil,
         basePlan,
-        numPartitions = newConf.numShufflePartitions))
+        numPartitions = conf.numShufflePartitions))
     assertEqual(s"$baseSql distribute by a sort by b",
       Sort(SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
         global = false,
         RepartitionByExpression(UnresolvedAttribute("a") :: Nil,
           basePlan,
-          numPartitions = newConf.numShufflePartitions)))
+          numPartitions = conf.numShufflePartitions)))
     assertEqual(s"$baseSql cluster by a, b",
       Sort(SortOrder(UnresolvedAttribute("a"), Ascending) ::
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
         global = false,
         RepartitionByExpression(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil,
           basePlan,
-          numPartitions = newConf.numShufflePartitions)))
+          numPartitions = conf.numShufflePartitions)))
   }
 
   test("pipeline concatenation") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 5643c58d9f84..750574830381 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -29,13 +29,13 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.CreateTable
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 
 
 // TODO: merge this with DDLSuite (SPARK-14441)
 class DDLCommandSuite extends PlanTest {
-  private lazy val parser = new SparkSqlParser(new SQLConf)
+  private lazy val parser = new SparkSqlParser
 
   private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
     val e = intercept[ParseException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index d5a946aeaac3..c5e5b70e2133 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.PlanTest
 
-class VariableSubstitutionSuite extends SparkFunSuite {
+class VariableSubstitutionSuite extends SparkFunSuite with PlanTest {
 
-  private lazy val conf = new SQLConf
-  private lazy val sub = new VariableSubstitution(conf)
+  private lazy val sub = new VariableSubstitution
 
   test("system property") {
     System.setProperty("varSubSuite.var", "abcd")
@@ -35,26 +34,26 @@ class VariableSubstitutionSuite extends SparkFunSuite {
   }
 
   test("Spark configuration variable") {
-    conf.setConfString("some-random-string-abcd", "1234abcd")
-    assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
+    withSQLConf("some-random-string-abcd" -> "1234abcd") {
+      assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
+    }
   }
 
   test("multiple substitutes") {
     val q = "select ${bar} ${foo} ${doo} this is great"
-    conf.setConfString("bar", "1")
-    conf.setConfString("foo", "2")
-    conf.setConfString("doo", "3")
-    assert(sub.substitute(q) == "select 1 2 3 this is great")
+    withSQLConf("bar" -> "1", "foo" -> "2", "doo" -> "3") {
+      assert(sub.substitute(q) == "select 1 2 3 this is great")
+    }
   }
 
   test("test nested substitutes") {
     val q = "select ${bar} ${foo} this is great"
-    conf.setConfString("bar", "1")
-    conf.setConfString("foo", "${bar}")
-    assert(sub.substitute(q) == "select 1 1 this is great")
+    withSQLConf("bar" -> "1", "foo" -> "${bar}") {
+      assert(sub.substitute(q) == "select 1 1 this is great")
+    }
   }
 
 }

From c8d0aba198c0f593c2b6b656c23b3d0fb7ea98a2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 5 Jul 2017 16:33:23 -0700
Subject: [PATCH 046/125] [SPARK-21278][PYSPARK] Upgrade to Py4J 0.10.6

## What changes were proposed in this pull request?

This PR aims to bump Py4J in order to fix the following float/double bug.
Py4J 0.10.5 fixes this (https://github.com/bartdag/py4j/issues/272) and the latest Py4J is 0.10.6.

**BEFORE**
```
>>> df = spark.range(1)
>>> df.select(df['id'] + 17.133574204226083).show()
+--------------------+
|(id + 17.1335742042)|
+--------------------+
|       17.1335742042|
+--------------------+
```

**AFTER**
```
>>> df = spark.range(1)
>>> df.select(df['id'] + 17.133574204226083).show()
+-------------------------+
|(id + 17.133574204226083)|
+-------------------------+
|       17.133574204226083|
+-------------------------+
```

## How was this patch tested?

Manual.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18546 from dongjoon-hyun/SPARK-21278.
---
 LICENSE                                         |   2 +-
 bin/pyspark                                     |   2 +-
 bin/pyspark2.cmd                                |   2 +-
 core/pom.xml                                    |   2 +-
 .../apache/spark/api/python/PythonUtils.scala   |   2 +-
 dev/deps/spark-deps-hadoop-2.6                  |   2 +-
 dev/deps/spark-deps-hadoop-2.7                  |   2 +-
 python/README.md                                |   2 +-
 python/docs/Makefile                            |   2 +-
 python/lib/py4j-0.10.4-src.zip                  | Bin 74096 -> 0 bytes
 python/lib/py4j-0.10.6-src.zip                  | Bin 0 -> 80352 bytes
 python/setup.py                                 |   2 +-
 .../org/apache/spark/deploy/yarn/Client.scala   |   2 +-
 .../spark/deploy/yarn/YarnClusterSuite.scala    |   2 +-
 sbin/spark-config.sh                            |   2 +-
 15 files changed, 13 insertions(+), 13 deletions(-)
 delete mode 100644 python/lib/py4j-0.10.4-src.zip
 create mode 100644 python/lib/py4j-0.10.6-src.zip

diff --git a/LICENSE b/LICENSE
index 66a2e8f13295..39fe0dc46238 100644
--- a/LICENSE
+++ b/LICENSE
@@ -263,7 +263,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
-     (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
+     (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.6 - http://py4j.sourceforge.net/)
      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
      (BSD licence) sbt and sbt-launch-lib.bash
      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
diff --git a/bin/pyspark b/bin/pyspark
index 98387c2ec5b8..d3b512eeb120 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -57,7 +57,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index f211c0873ad2..46d4d5c883cf 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.4-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.6-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/core/pom.xml b/core/pom.xml
index 326dde4f274b..91ee94147149 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -335,7 +335,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.4</version>
+      <version>0.10.6</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index c4e55b5e8902..92e228a9dd10 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -32,7 +32,7 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.4-src.zip").mkString(File.separator)
+      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.6-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 9287bd47cf11..c1325318d52f 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -156,7 +156,7 @@ parquet-jackson-1.8.2.jar
 pmml-model-1.2.15.jar
 pmml-schema-1.2.15.jar
 protobuf-java-2.5.0.jar
-py4j-0.10.4.jar
+py4j-0.10.6.jar
 pyrolite-4.13.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 9127413ab6c2..ac5abd21807b 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -157,7 +157,7 @@ parquet-jackson-1.8.2.jar
 pmml-model-1.2.15.jar
 pmml-schema-1.2.15.jar
 protobuf-java-2.5.0.jar
-py4j-0.10.4.jar
+py4j-0.10.6.jar
 pyrolite-4.13.jar
 scala-compiler-2.11.8.jar
 scala-library-2.11.8.jar
diff --git a/python/README.md b/python/README.md
index 0a5c8010b848..84ec88141cb0 100644
--- a/python/README.md
+++ b/python/README.md
@@ -29,4 +29,4 @@ The Python packaging for Spark is not intended to replace all of the other use c
 
 ## Python Requirements
 
-At its core PySpark depends on Py4J (currently version 0.10.4), but additional sub-packages have their own requirements (including numpy and pandas).
\ No newline at end of file
+At its core PySpark depends on Py4J (currently version 0.10.6), but additional sub-packages have their own requirements (including numpy and pandas).
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 5e4cfb8ab6fe..09898f29950e 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -7,7 +7,7 @@ SPHINXBUILD   ?= sphinx-build
 PAPER         ?=
 BUILDDIR      ?= _build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.4-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.6-src.zip)
 
 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
diff --git a/python/lib/py4j-0.10.4-src.zip b/python/lib/py4j-0.10.4-src.zip
deleted file mode 100644
index 8c3829e328726df4dfad7b848d6daaed03495760..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 74096
zcmY(qV~{R9&@DQ)?b&1Qv2EM7ZQI&o;~CqwZQHi(`+n!1b5Gs;NK#2xex$0qv)1Y;
zNP~i*0sT))&s3EBUz7jcf&Vu;c(Pd0EBtR0C?LvzO%f{;IB_}u?IGX*0U`Y#6C*=o
zYX^HL7di*eVGZ5NB?ctl+ghj(2W^e$w>;vSJ@$f#yuDzFf7e&r7MV+=%bWhUq_X^r
zH#f`P&42VV$1|9dGMLYINZV=CIs65z8arM2I~)(io<dG`Y0BZyOw^;d7^qm*sWegK
z;3*$alihBQVpaaFOA8(-Sq<VkQFNz?%!l(@U|ko(Kpkg0qz2DHX-9)NP7#cCzS56n
zy|`Xvi&r)EMB2HadCFs>a!a<n=yLgmj&L*i-Jkd8%$jtP-RHo!XYoWp@x^+)z@E8-
zb05s#&yH1}WLwELsAFL^o9M5opx1wWz&jZOe>o@ltTF#<M}eS&qbkok%|klLw%n?K
zX2#(dfW=hRqX8e-(}tx<kxkLX+Se39h+_h_5_ZYCg<c+#Py#CbCyRBE_CRrrgKCD=
z3WXmETu#F51&S#_k(0z8e+&Mp2s-br`55E_?$XD2mD`64rY;bzop}>5lo^S4-PblK
zy-)^8!w`v0o|QoxZ99dFDMc75zp?k3fo2|d1Z5ZRo8&x)3LBju_h)PcAkPz;!weMI
z{)}eE=T9UMfkS~x(jv?~6%`-Wl>*`w0Zcx<@i_P*VX4?uGh!yOxFp444%{297-k?m
z73R}T0KhRE(Gd<_T1K7%!Q$a?xK0?PJ!+t3Os~N@G}+=JO!)L>@SVh~N)E#5XjaVA
zRRDFHHyv^zAeZ2SfNR`pgV!L@{#q3~20qP;IF<cthSjbOzu0NSA=S_dOK7hVSl6&Y
z@wYg|_xOw=+B@)a&O1H7$$+c0LKtT83^g?2v$sG~n2#J!bdq|(-J)twgUCWQ`pknA
z>g(Lug1yL$V^9<3?>ECLu##>*(a?(5%JZX?j+I~RO<JV<ln=%{$JkYq_-hR4KGE_G
zmWS3jwOnG}&FXPv#<$PGlUt)X1$sT?wOATf8y!YeBdPXlZsl8wmK(%qbWe3^=@SL@
zD=4@3hhrF#L+mmrdcFuQ+5Ox+5oMk7;25Zbe$_4gHIFclNg%hY?m;n-qch#TSA5Nl
zJ=Wd8d~Ru0@L6Lw>3kc(>$>z<^^d3qMZcn#<w{NrCkQjU7%MDl5w5A>>Yyk>3K3x@
z41k6#Wfu>{KQP9Ch!`waWhjP4OnHeDZz?j5zfmZ*-&VpVG=IBJk}bN_0U->h(1&=Y
zp~`^s;&XijeW46KegJUco0?Fqo%t~6*a47k(LUcZ?bpCjm|U>zb{nnFC_C=*zfTp~
z{PT<6x^kEuR+R|~$-&ES&H*Z5d-0<0iUrH`F1V~G9oquImm??3f(8A$X4u&kMt+q_
zM3ZcJW_!RqWM%)WpjL8jM{N$}ufkEtuO{oyk}uXA%2x4V&5?AxV8#2I!e8miKmH@m
z?=@RFwUHG-LGB1rUTKaoJ^I&ya=SIlLYGdM^Y4Ah{#Dxc8KFh3uJhJMq&_<M<~xx$
zkWL?cm!GTbF&e2Rg9K#V6=jOsq{gP}_p;<?K!X@7w^x+jNc!LtZHdM9!E@@sLur2-
z41)CQ{iaVZ#njIn*mv=4>YnLD@VdMa{Ec{;W{F%n!HV&j7gCSgph=&y;u>)}hhC|R
z7tx;^DWaz6-v*>G65LUz(D0J_hWhps<WO%&@@;_d4F%+Ie~xl1eH+gq{0+*_xUf^l
zBi#th5}`<Br<{9=>8Wk@_6)<qqqNw^Sz^M$EgwG!;(pxUF{EA!xMr!FDz-*D8MR)>
z9jV8Ps*QfoVZ)cr>*8}*4hem=l%<Hdf0%^!)?Bbn>?0QDO~1rkQ+kLzWVKlhV1*h0
z-SccbgI_x%J&EHeXW-iq0Xa<hr$%Cc-#uZ}sjK|FaE4Y_4!!^Pce~drNO*r_5L1kn
z!%ved+xq4X|8`l<n3gtYbz^(O0scpp$pM(@wf}wk7U~9k<Z{&Jy#4<T<>{m%l3p)(
zg(;{&K<?^5K>w2i{#Ud&wy`v|b8$9xax-=MFPvZNXggoBAo<PKGVB9G%7k6QC1o%v
zZ#Z0sS`OL<IO2;-%HngsEg)h2aC;}vjyEh8JpmzR2Ra*RYE`?bcjgwQ=)dmxJ=)UG
zU`n=RdhAZ|`F@hnV#9?GPWmX84piP0PDrn0sZ@!orRW{~O*~dYy92s~K{iyj&!HE&
zm=<kDC&m@oNt-u5^36h1#V_75Mcbvw2q@9Nt@z}5Osql~E<w6`^<a1-1{IPVX92Op
zF@;s-jAfGl!q^pGm+}au^sg4xOwj<%W%E>vLZJkK-%pv>f=jQgf3?&T7=+S9>@qnQ
zRmjEhp3~G66(z%1Q*ip6#&qJ5z$ur6fMC1>*D0cEnph38HZ}}sekyF0RZ0eSp+3lr
zG=`V;jB&z;OBL;T8F0{4cO@D?D$c@8Q8$N9gkA(YLMb-g6MbZo4^$dsg#gubMz7Sj
z?9mGRIwrZp<d2Bro|~||82AHXFLqoptR{jOsv%Xv)`{qc{R;vLqpnESGB4Ns3`y|y
z-Yh4AK4PgCCBT>b>e@WkswsdjMJbsttq8VbF0)T7+oz&Y-4JmJ2oyP0{Bod)5?A!W
zN6+KN@>p+TiPB1m6j&4lm=f0{@_bgsMv3m_$n*1lpBP~P0J*y|_V%v*tE=qHRu$5I
z>Arrx=zhM=-`|K|TCIHCS$@2*=K4BvwY`Cvn5E{6v$gayrR+{+Ly}nR-QlJw_H0Is
z3k50*n`oXXqn;O{WWQ`h)jR8nKVGl>X^RD8(E)yX{X-MckeW(0{ceMy2Dm!;QV-8K
zRcu>*utsrGu)q+hp>c{@(enlj0>;Rp?gdq5a4B?UQ-mn=jOS6}Y(p51I4NZ1ax{bP
zOU#)HX$G6S=<{Zvx(eKG_<!=KrI^y|7&zG~{&``Wau~8HX!m7LCLpwEkXLpHggcI&
z3|jT#?v}@%?S^U$ut+r&!3d*e{U}GdKrfot10gB`I3NOp)QXj^6?KpInUK#9jut7w
zKiSUbwOc-uG!<br@v*>bsMq^YktLX99?OzMG)va<P<mA<)IGzV8!Z4)4u4lX>tO}`
z=LmeO)`zKY3PMpisuG*WtjWn)^FkJRWF+>akWin*E`W=!ZEus_05F=5NBubT*qXjp
z!i3)XwB5RTy@5E?(hQ_3OV`)K*oe0XV=Av|hn;VFLx}sndTN?YW<)!NqgNlefECuK
z&e)1IyPH9zZXZr!HFV@&JSs@sV2ngllwiY!Mi)JJcwYT(_@`htK0DaRm>^P-UIe5#
zDwN7f=lYn7<bf$~!k*usPdojM4RWlE8;MbO3!ynfzF*NH0sdHU`RGsLTJ={_s!GD+
z!d6c<u&`)+rks;^;qLngkMsAwZ#i&sNv4d_Khw0^p3c%DXomt>N8DxK5h}jo8!)7j
zqA?>t6;gG}u^s5J(K_Az8DZc3pPwO)K>_hFA`hmhwY)0i!&|OTd6yj7DSA=l#Ho1}
zOJOvsywZXd@ViS1YO3eM48DK32G8KPwYHJreW*YyT1BPF6ve<Z)fwZ|Bv@oc;fnCo
zXnzfuNSc>W09BO0;p0&(5Ln(1KwEj|rg_3tHSjY=5Pw#6?6<H8J$+|euuLu1s5OO;
zJi&}HhneX6K8LD8+9L`(LQ+x416xusz`ZTL#lKo41+i5-lugDYjy<H}TWGMa$u^~|
zuG*<l0!fs$f^%b<db*oBa&K{xy0=qOG~y0B1Pr)9S3NTNvQXgHivA%}Owx$16PJ2J
zJ3K|$NFmn4^B4*kj$xBUuwy4h$u4t>U0-UxuaSKE+$Dzny%=wb1>#>hla@`@$lPdb
z;l6+++M)?hkm_5S7igBrBVjS$>|T^^A)2KJ+yBIRh9^-)pqjN|B4I#c#gx(V(i<aR
zs3}*RRnZ*IG*zND*s3o&j1v$t(6RVk4;uUj#`8D0=^YnzNffnb_R=LV_BUab7R16E
ztS^ddG60~}nqH)$uhDxB|GO(D8G77@#53rx9YJdxM~<O&6m*&^DLqZa$w~YM{u{&x
ztAH_;!xg*^&yTmB9td##sDG6Ms+#ALNfky4L1Cx>N23GQ&d{yID$&I*SXv+uZHy%@
zZwj#hjuzu!opRqD2?frtnQuc)D2JL|n+(wobld!^V@h11N4tzz2qNuO7#ImZx^M;r
zJE;$IC8f=}SJVCUhN)m5?5U6P#05(jy+jORX*PZ<R3NX5gtJo#`ZfUOhw^#@+@cu6
z*BsaqUNh|F=x6|Xs2&~tt17ZiX<;P5MjUqo(wVI40NfF{-fYc)rJ1pwv&AJ47Kren
zXrdtps+lVpy_+~O!gN-&r$0(_dZ$hVowhWCE!>)rap;)NaIx;DqS48a`5;su(33qP
z(C-Ndau(t96nS4zW16r5$8(~+B^KwUGgM1`HL6E^(ikk)X%C9B4b1c@)2)Pv%j~3{
zQxe=XMuJwFhdTVXpd&-&Vf2uvrf}4bb0aJJRgEO?je(Zzs$rGiq>g^9>TZ|*%2coV
zM<CoQDIVrCJj}~pPZ;k{>S<mhWD>@6NLHXCMMtvSgM~)l%;?ov13z1@oJXE(bk3d~
zjVD5q%4SS4-yY#!$P1)m#lizf!L2AwMEsg)t+?rI<J`=xFl!MsY&yospXufZCWIPt
zva?ttTXbG1jekMs79e+d8JC$6zc^Th<-ol~-3jom!@gL2#*~jTAtgRK`gi<DIhrFM
zIx=B}rV>Zmof9}g+jCpRqSTQI27ks>hF21Y$50fb&D0p*5q$>)lMrlVy<S=;EvTkM
z9g~G#$*Pt;5W#<prqknP?IjO8G;Dte!w)Sm{fy7-guAjttrT%9;^orK>5nXva^{R=
zA2R7>%cNS;#AtA=y^D+YD^SCH_>c_2n<*0Th9{j33SHzkda@@GaAS|iD`0O0BV-%3
z>nu|T+EgH;nn8cv9I4NAwUNBZOIl95l^5!PAB!u$+yt$?nj<_ZY#-cQt0}hwCcU!i
za&YB9GUxJ>_^oUJu@Tu(e4AZ#b^qR)!2C1%?)u2R$TQUfw?LH|XB4|{OoH}{!wGDC
z%j|FuN<cR7GvN4%((0**UBTGHeto|Okk%{IO<L0Jkhl%;uJBU`7KHwl-@6#63*+ET
zgYOUCC}vzKvpz$#vQzN(^K1>wFD`;uf;(gdlxz=2>8{#e>Dw)PmZD$L&KSxDoLaE`
z)rzSEffj;y3Y&y5y_yg5D;(J<=h>S^&fvDdj>M_EiT!cCg%Ely+^VdJ$|eTvbX#Bh
zx;On6DE%SC>4C=*pv7v_z}1JUvz+W+286fuVR6$%pbiG%zx%^#FG#(xVYbF#FbDSF
znxIQhZXLx_?_F5VQBb3x22$8+!8I>Q-?Z!N64B=i34EpYAT+>lzbK%d14{6~Up#N~
zMkOKbCfzjlO1uPaXyJf;75(vezt#SnzwCXS8a5n&g8&pUxzB&j-3|V-vt?KWXRdZz
z_vz|j!wZOg*n(v}i#|}QJ70??w-7$96SeZ4{~N{dPUZ4yvq9XrfjW)<Db6}Ll=qD$
z6ahP`5FV-_BB5U0fGces0E_6+sPMe3@0jh-ra|jzRduHe8#xt&A}04FaY=xEuzM|I
z@es|T_X3Bvi`gdRfEe+=aELv5qXr%gPoLE)a2v28#S3$?S+*kq;Uf$VCyoL~A<}D(
zv%H3UEYtSx7pLc+n=!yjJaNVVvjKywMtBJt2E~BhS{~x!J!A?^u`Hhmo;bcg-1~hV
zfvsi-=&d=geySs7@yO?d_G3wNUkg)z4>dK<*b-plz_kN75`RWl39lW6qTS~CUa7W0
z55eDBgJ3Ryb@m80E0mb^sgH`-TasMw);Itf*FN4$-a>Dv`+er}dAfx>9dl?$skzHL
z!G#Sl%1C7?CMVaPV9um{_!!2xrbp<1%_~W|dHC#8t#d5PW*s}lZwA3;sC%@ve{Wa2
zq<-IF3NwpE$<5&N@>L@=NaLo3q8tMnleo<!qM<qKilaA=E=a>KHk+#Z?hxBF;Jc^_
zaZ*~q!NP?%`jWFK;%mDap(_iQlH^*JtUKXvxL#TE+=FBD`AP{ZTujVq(YA9mKTN`P
zs?U`$dFRV_#BPn3tBhJrA*tETuhg&hjw=rj&85vfHxwf$t~2A&5umj^;7W_v{0c>m
zm{OCOWDa9E_CecHo1TuuP)3q;r!(6W=Dk`f+~|8VcU-UH!_iTA&pNU(Z8}~}EPKS<
z*Q?be=XWaR!i!Hv)Fvpb=yZA44k_6WFV<n_vaoMXQmp;Hg!w)n&Yu?VqASm773}S6
zXM14m&Dj?hCl#$ps<io51heh-odUGc8vwtg#csRTnv|Uv_*KgOU7=lh3b8pGH<pAF
zw>8d1`0VT#s7)KK_r9gFb534`VG)<ew29Un%(~>|cj3c6WB*=cB)OJLr66UAbhYgf
z<W#weF`O?aBJJ<kH}uCv>LEEs>`vc~@*MrNn>n~uX7j((JMFvc?Wv#NoPHQRl!W2U
zFE(O3P8S_B9ca#wI?F2<VzL7JO_mj@Zz+<PvXh`F!d--*osjd!N5v9#p&MlEiIR9%
z<`E{4O#WV~n2~tw!GHrsEV<qr&SgyR=HNkXw8`M+SX`7<JMPgxHPstX)qg)8V)>K*
zbe^lV6!uWP4<X%pF%q=Qfd8bX#3k@WtlsnbKwA2IzOy$zUf$LW%*vTB-pcty&DPM<
z%h%2fRxVDSV+pwQ1f1)uPqy;oxV^)ln7HhT_-%8~F6-@Dcc4%*Yi<iqsN$`rD|CWd
zu8+d`SVJRVpFB?xvcb2D3XUu6l}f}WiwX`ToAJWw^jg=!PWG6}aYl39ImV`-o_3*n
zI?jZzI+P6FU@Jw7b<Curl6@%ZnG=W313K9}fIkwG_?kffeaQdE$E2H~3<PniL?ahc
zG>AS6ago29*mlp>%+~JX{i73lIEN!+?*|*X;P7a|(QD@ThVM}I606O^X1xFLIGx>3
zrz2D8F*14rJF+mqvU)tHA{uY0?%GYizW}7WV3Tje^B29#m6R<m<p-?#-!TwC{Ec?T
z4|iUk&1u*D{&OrabACQO{VWTW(cQXO`N%sXq9h=uqXF8kqAfCdA}Tz2jAR$&jmhxo
zpWXF+GwVgWjl)M4WW~6(!0>7op{Kohny#Lc^3W5MAWr=z<0Lq`!puV%Z)ZYxW;Noa
ze68c<WI(;FaB7&*%B9e=0c`n7k6Q?q2!BKa$QJOKYv2~hP=aIObO8%-Wau57-_a99
zCA+bHuR6;*zskz0yhBmyl;_Wi)e(C-$tk)&>`cnd(E>q|G2bNL$Q?V8Enc}UV$j4h
zqq3p4NxTto^`}@!o|U7gqZHgmkeRamwHRdW(;JgngzRA0b+r80oH2I;t!4KjI8*~T
zF|u)g9+s<#r;+k;cfW%r!x^=;r30p8QKVYwyebFbeR?%Fe#ag@-j~P6s;>%??Pq5C
z#)LQb5YbDGw`<~1csP5<379~S`H(|&b{}#)gm)DR-o{2VRq2uvO;b6O!M(?(7zeAE
z8VD(cwRiu7A9w<*|82aNhRqZj?Kk@2zQ50fisE{E5s|<;Uw?|WFNJ_unB(dbhRAn!
z;0kwvSjO!eH<)TqWx(U4fvU!rRqv!P<a*4bStrz}Qy+w^MJC0hJ70OlLm=C&Kq@{n
zanms+@^zpAKee}0eSq4)h5KadD^hc*3H=E!(vq<*zxtj6mHoSlvF$f)0;9!>g>fq6
zR5fSr;FLq1wlLJ_Ir4`@w%j&$Y3oXp4-t;Ze02B?GXPpvTODb8#>RtWzWBVqyQ{rB
zQ=q?dZ-E<k(~mD6F0KP9!^fOFtC%=xPAFeEDE!tXjKFjEa2R<Gwt$ekA`T)PBEuMY
zcCsLcBz^{&^0d7Hs|!bEFychDAmcDTCQ`j)oELJKM>ua*h`u3uMKQ=NXIk&MA7H^@
zthG>VS0%h}YqI@}-BC?)Jha(&cBjkMGaZE6ie16Y+M~=sK*)$Rpi*mxHG)7axd9aw
zHk)W5<5@$xxs<N8aQgVNHjp<MZ2p0!yto9s@H3c8<`{%k|K##kAlr8`m6SNv9s%8^
zJpbi}$`TBLY*O2)M-%V!?~6TS&*IL+<ZMImZl{4a>2*b3mwD!WBiWt_$0CpkHypeA
zyhjDze{7R$bL?jgLbIr2tJ2B0A1wEyuy1kILaq8%Y3qP!q%P-iDKV=>UfG*N&o(W#
z#eXSPp`Uy<VMJK?IINbn#n*M`sqhnI;2P2IeQ<luJ6a2Jrlg*r(#%~Z%N8~l&^EBU
zm!p|b#O*xdLjo;X8oj;ZToDz{E*5IcC(N-=qt&q1R#(c;iI(;1^VCCZKbG>L#k_qf
zW^`@;QI0sP(HgU%kC*&XjOCezb8jowfgs)6rl3~Lz*%>*_<^4>SXd(u-{Sr0?P>S*
z4tD;Y<5bz*ya6ppbcc-q^YL5Sil4c&LNgHgr4b=Rx|&<f#2%k^v%GhdH;HruTv~_}
z-IS!i^23tXIJ-B|I_N^1cmt{NGtXl*QESHTD&g16W6nY$5uJ5<Do@BY>9xypEpE5b
zm?C<G=dQJ`a`Jp>tdzM4yj*m_w3G-ZzQX&ku+>7t%0_x9Giqo^6aG=m+`LI*@67QA
z`~Ro$?muHazCnP1jG+HhW&S@J&)(L-@PG7}Zq@17MFymvQ!24-=>}@#3*r!JFAbu-
zfDnN{jslfqk`&eM%G}o6n<v8_z<M$DIlCp=Zk!6L&qrDIemp-o`U2#uIyR{a8DGO$
z25ed2w?{Q5VWnMrC1G7C8T(2WW5q#IciD$HN?Q$(bm-V-ag=+~wk=A1#C$-g)@@OU
zj!3kcqGehGu7QwmWq!XX+L~87DLUi$q>_3WQih2@yD3W8+5ubxAy$P(p#V_nW?Jgt
zNOLg3!6Tux^o$q9)s0gY?a-;4a@qH7V8OcpsYNjqIwxovTqHyysq8_dw|1SaJ}dBj
zBZS&QQ=lhD^ofsU+w6v`nG*ZTF9Ek#c=$eWD9>3CS{4MOmm{@w{sF*>pC_pO{se`_
zCPy$gevK{ev^bHhTp!m<4JDS-vS&}`5EM`P9YZmp(WPZZR3!Yu6~c@~=WCD9!kD@p
zHn&oVBJOZ?r#A<2^Qm0S2-k!nu8CB=mtf3IKH@g@KsO_Fk=KfJF9DMi+>7(#Ka!;+
z!T2OF^Yk%>C1+ercu;AI-%93tLM&(_tcB}?+hr~*{Y67DC&K|39J=MqJ!2+Ux2WtF
z9;3H&o*8TOujT=mzk7c*-dSJTl73N_v>j{*<OXIJ97^>|bt~-C5@f!)lqjYzLnz0w
zD(k{aCKkmj3U^7$Ju7#k6U<%qbzb7lWz4f@3;Y)fN%)i#N}0as154zWN`g-V2K1FH
z?Ie`P0k}6T{SuR}sDF16o38X`v`aGb%`&yaW37Wnh$Blv%^S;9y+?IJn7VU<S>;{G
zP3Os@N6SCz+WoCX6o1_>5(g_-%loSgw1RuvzFjdtim7BUZ{E~i^&A}QbBA3O$GUIm
z9*{#x;b<r8SnAB$NPf2g<?!D*XOps980T?hl2s&<LjOeh!ljL@wdi!W_KGrnnzCk<
z48n0#y(dCWP=|_X2Qwhe6-S4HFYi%TZc`GivI~m-xdhZmsq8x#IoDN-Ij+7t&I)2q
zJ(lDu0%H=o|MMbw1O2~_@;}n0@<(CF2q+NH!hgCY^8axZGfO)|8%wYMm+V&Gj9L;v
z@|{&j``C+_0z(&sru8ruyaUk#h#1K$Wi)M`BW&Z3s(!gw(r1u5Yr4-t3O+V1>bh23
zES1x}kki`J=PQt<UBd13a_jTCP>lb>Wlv12xB?5R&Im=Jp_>N^cBaeN+f@)IN@vWQ
zZ<504AzqZS{OCRzDIgaxXdtCAN%SLCy-g>TXJv{eqlBi<2!n)eb3eVwAsyE+w=QP(
z7-*_iG;oHVUBe+e94S??TowSR=UlU#aNd=uZU|9urUbO_p`}AN0<|znxnMjRBzVYE
zhk|q}d5^Nwt2iRlZ%%&ZE()>!ZTx2KXC;?RR5<6p{Z3VFJ2og$%dM|_qy_$<92~tn
z4_K$zi|uGupdlx=&mjV-+IRrRHNiv(lcQduggt14WMDX?Dp5&e$IY;5t3Gcs5USE}
zWRyn@7i?D=LW=m*P^B<XE+m~9kCe0~?_WoPRT2FctdIc;%Lr2wEyut+#8w_1EY4G^
zw>SXD;-jk_{}$S%cD6{t<pI9*0_89XH&zn?YxB2?E+<r+x$0cjc>a`>|7#*sZ7-<q
zLxdo_uV;@!FG*gEA`a8Uz-l=XrKkZ7LfZhrKHSQDs#;Jn)IppZ-?W9rWq*M4V>>&q
zDKi36Fw=QKTy2R!qPa+GGDRk5WP85t(fsZ^oEz59T(Ggxv(Ed-d+X0X*>gmxB7$Vo
z9`JN@I|7gPyW=AjITdS_t1L=j<`seD6jFY5o3Dmc%au9Jlg}B;fKhndGHT;oH!K<V
z_!eyB9l*T?;GARs=Qf^1HI2)ttYrVO6f~63;~jaXWj>Z!92oZ*{B?XYe@a^xnuJ!>
z;k6>Pixz&*@~VB*S^2TREj&pLN|2eUytb${LFk*D-m-EdLwSV83#|qz*?-WLkHbsr
zfJ*jeowK{$Bb}sUrzN2co8cuL`;N&&Z{Fn`f&VXFp9fC*i0$)TX8HoIodaXJlH~p|
z`^%#|Phq~bmUlZzGvgT+C0~cKbw#<Vj@Ab8k7)ecGCGCrRbfW%7P=mMb9few*U4Y=
z$4G3HZ|_(=g(clI&SLD;b(<)*i***FQ<khn<CA8_=8m4cgM%(7T|f^hBArC|o4@2w
zsI=39{%Kby0ZC1+7WLxmR*Hee02p1cE8dh^B`Yub$yvcyFZ%EK@~eOEw^eT_yHk++
zhhLV%@8w^&VF=uo)m!_TOWSIFo2p=^HT3~k)xCw;uA_#*Upj_!(>7L|nursAQUdyP
zK+G;$Z0lLQ9HuONI;c+3s3oFSJK$LU|2qBuh+MbzX<i?2KtLd3KtNdk$LXyM-3;}O
z?QLvKja@A5?VSIY&}D1C$%gcEqu=kTU$o4cXh#@kKoD=blls>_VjEaky{ti&!pd|U
z^_Wzz{<r5eHn|LQvi{IzJOIMNIO%ny^H7+8>R9^zp)$<~Pd@1ELZFM_=jlP)%CdR$
zdc~4QGen70Y5GtCoWut(0v%C^n^9zd)ce8C*At$PS1O1X>3H^^0xV(dST9pMZ^W%#
z(nu4w8i>OD7ccjK)AchS>#q{>$d8EJkOdXF(*^xN)23B(EXsMQ#Xk~@n4wB_{Cg{V
z|Jr4)JIuL6ya>WC((cs!?7zctKPfR^0{2KIl02vYcHN?A5ibCg*&~PKFfj~+$$6+C
zH7dUf{(hVa9-Y#;WmM`((@JH`vL*CaHk}gT56APV^IJX%#6xXp%t%Ah^AWs=9j-F5
z>~<=0N$P|}CVj8xF8*j^du}RgLg*WN@V|sE(BJ0TFBVxn*S*eprWC)#H)*xtFi!7p
zYb$lj1!NNadC0XhjHfQ~wJw^rrxkr1TfB+KdGDB(!Cd)c8&aZusgfNTp$m~#LPfOd
zsu*9ukR_ZuwEp=lxT~hVrvc`qz27y3q>YQg6*GUPhqH)MD=CU#bQdnaPG4<g7Yv%u
zKKQ&3u#=pS&uPNujz>aj$F`a@1p%(V%)Z>=P<^2*q6SbRc)!ZPEDC!F4>FdQltnIC
z@@D5>GXg#7`x?sMoOEL61MMj=7t2V_q-XRI8kC}n5Iu}1yZ8}j3?a@}If;k3w(eA9
z=mwOLba2I(aTMM0A(lzH5dkHO6DUMK7^PX~WVllNgb(O;76ByLt!_uGv50Vczbg3-
zT<6m%k+o+^;qI|q$$3=UkA)fN9<)J?7?2naxDux>JmZH^WjGoIQT9g;D*o!YNJqdJ
z?ir0lCyuq7CR`XkFDuR`tnd=m@Q9XCL!%UwI*OEI?%X-O9jbo~=tpekuIV+fLb*j9
zQ`k|4V>b3!rp{y;k_6CH2r5zfL0Y`KF{e50ztKZcV!ZY43pXcE^Lqovo*U4!y?=gU
zmu~Sn#Y^>0d7`IlBB$X$fh}5N1suZ~GL3AC{Uk4?ThAo}ziLk~efvi-@YoCcVRE1u
z(;SO)$?ld7O>7%TyT%nsune&c7AlD)yy|X$Ju-$CA9&!h_>vtU6b{gUnj3Uj1ZU#<
z)71w|H2ZMl(|x$$gsW0UVDfCLo-*NPbcVo)Lvkg8v~ozvZ{*?N@=&@5w2a<1Ei5}l
z59b6bb!+y6byiMlQ_k}`hiWda@cU^ZQVQ1Vr8rtzNlBCEwa(5e_UzA-HOQH&E~=?z
zj%a%lZ0^+rmcd7Xgk3wa`;t`_mC`;0i`X1A#XhohR(R8PjKjjgJ>^)y*|ybA58+`!
z^uz*RM!WqHhDN{rH*~K^%>6EqS|5S$h6(L-S|w()rV*i#6vpj>o2Xljh)w)Mp8TuM
z*cvK7`4EW1*S8+TfDm+91?RU|IOk2x$$U4B)TrCdl)UtQpN!tZ+0PIym%*wZPpUV~
zLi8%=S{Pvx8E;GOv$|UkxbK_e^Aj@{Y^||}uHlK!jvUrzhx<ryC7F7>os#b;l2RxG
zU)(}5Q0g9aC8v2=z5Zifna*m65d?|c*$l$Pzg4%=!K|(*tlygoa7yj$4B$$(RI~u<
z)r#qhU4HAu|AITMfbAKAzf)(AIvR6y1t0v%Hj(z%9=sXk5N}b1wp9ASk5@W|9)_w6
zm3`BSi0)@h?>Z+zZaQwPg8#ZD?_I}R4S@8mgEYh0V`nq+4&p+Y_e+C@bFK!)<FujC
zD)oyju%&}j3HOtYODJg)%7Aj7E3nF|92HkSZP=_xKPAr}S`)I$<j3N?A0n=ipv$9!
ztYu^kc$+$Cxg3LFb&UQ9)hiZBNN(2v4ecwv8SflVCmCQA_5V{*QQ21!?4%*-{+EP!
z<Z%|jp6ZOJR@X(bE|5arXES4tIxn(=?#}je^et3O(Ie%}zmHYOL1FO`37DQYI=G3v
z^&%DU88xX~Nb7(V03ny{75^z+MA;D#kSheU?T_yGW9>qs4L)#T;VuK1^H^O$=VXOa
z`AxQfK34HAhP7m_bIL^<*NzKYTV$@ha3P+toYn-lXivT+(9<LM*~ibx1rPwf&vNhq
zFT>IN(Q0XDeh|cPdcSqCJ2wv9=?`H{Lyr4XFS&}HW+Tyh*I>hpgN~O&8S}nnGcs))
zU3e$Senoa<sVgZd>gtkZ3<G$NoiK9Ehq&UCI=WM;ufG78;Cf92s+zZgtzdDdpS`2E
zvR{rHNQJYe!QYIWjVW1>q3qw$yQ*?$hIY_UaN=j+rH+mRja1CBDfJ-6GV2~rPNjBh
zHRUV<p3$7zhA+w+uM0B)(+;zgnm$nY3q3F#BTojPp-OSS3hX}kO62ln^))AVkLHiI
zv^4T(Q9>9)Q2Jd!xV($lzNeilCQ5@R0e)5L2~=l6;cTv$;ru3^u7hLG96bdEbtz-j
zvXPVCq&)%$9?ev>JO*8N^JkEktgW}T>wA*G-Cj9Sn~+WRXK!%uzgTTH5%5F-RZ+*q
z24EW#8#`Sr3YM`+1x0c&b0Zcl0M7V7aw@U$Sh>l!g3%B!8P~Qq{A>t%0mWa-Pbxgl
zRdYBFp|+?iHBP`qcd~5fMLCS4VHEM2lh91FOz|NGQ{CVRPFGd14yp(nQ2etd-?=;J
z0Um`7wQcBd6ZY4Vf{pa9_-1PM(<}jgJb=&vje03bpqej`S+e&=tHuYnby^T_?2^Lq
zpp>Kk>VCec*4h&yAA&WFe(Krmcki;brVT#NbZT`z$gby9T6v8L+2#IJ-=BXu^so;K
z=4{6zpW~aM4`?UI4kuhIW)0nLRHZK7I_iuuE(9`5o2;Lq2SO;GfU5*1BFyH_@_lN;
zrA1kNe35Eva>)@z2fWRk!qDc1xeY5XtLHlQ<mWJ{6cTf1C2_X+l_Yg%IDH;Ge*e43
zTNYocvdYol8gtYOywk~_K)y7}e&pqW{0}L+O;b4iR`@R(sX>wG279;(YJea$)p?cd
zf%jhIj|L48^(U#Z*>!>I<%faqKY{M!&b0^EOTi>sxY@A=A!<M0b=1$6SuXaztu=a%
zI0S<75OoieqJ!F7wU1~=47kv}Dh`WP5PYQ#LxfhlDcE$4DUuUU3F>prwMh$y{>F*D
z?8bE+jHU^_8-w6nS=|oBA6u2&12Y0{Ks6)?NpqdIPvx*RY9;1q9B9n)adQ8K;Oe2g
zVy$4Qtj@hJebNrgH!i4Jm1qU4oTlXLOv-mFycN!A;eMC?JdIkD|6EMOCd{nbF=R?7
zZfe(SS~qS=PgYuQXG&k3DnuIoS3BNL72eLz*w2v%!&V*(Ig__Rue0UXd)LpS=eIu_
zRb9>RXl(5g%I5Wr`c369EQw1a{7WzhwCyZ=L^GuP*up1(n<z68Eqmbz#0YPQG%h4Y
z{M1}Ld{8$Jxpys3%U?8t$Ztg=jitRE;GC(nm~2(cO3YHbql2}gx)6O}7VVJMhUqP*
zpL>OKb`fL-p_uT{B@7p5hh?NtgK!@!Xqg=|I!i8s7$?IS*xsl0$#Gaw`p31q$<)&l
zGjf_{Hx`S$sJ*92<4|Ivaht=Mma3#cTuPk-n;kcnj4~p9)cNv!M-wmQTc+COSpfgv
zP4no|LGc=WyTxX_3aV8SGql;xUcFoickT+I);~-|tt0=UcORFZkE{7pX56-GA3vcn
z)%uKyP0nVOo*ey~i;>a6&H>cbCx$MczBL@wlZIdKU#B|d;*au?j+65TdQpBQ57yKh
zR9_ujsSf;QcF49?5t3kn#$)*%rAk06IAg3|?AyVbEsz(+JlOLIUMtV?E%-Gt#czwB
zR!ra~W%dP9GBY}Sv7xEVT>Z^k#q_}2$C5;sGvbi$i-+IN=1)i*;Ih>k2Y_DNi!*o|
zXme)-r)R}Ev6JO$^ypS4s>MH{SIIh_oS=e+QAltamy79Wnv(8alL4u+v4I)yiq#R#
z0@d03c7d~fJyL0gkCr06*II7XO0pLK`;S%2GbNWDJ*?&8B%9Km;^x`?=``)&Oe^+1
z;VG;&eA%@N>bVMU9t{1;)7ePMK2TfpgwvDanSTsw164Q)wknp=e~~B7h`UqLz7rb@
zO<#!JWlLR%m$<4s8=$By`P@`rTg%Vm{k9D;RKV!Z6y(iJxg^B&5l)IMViPZ#H?bI@
z2ekb+8mBNaC!Hyw${k|{VH44vZ(0^EZ~VFBlW%lZBZ#QR13$Qq;C3M%udtW;0G`Vc
z*I43Rdsv*)vU2(96e1f%qQj1vNM<{xE(SGZ^q>E9pXIvl==pvq``2N?ZolY*4a<F2
zZo8rT(JdJ}f99cGNqP&nwsnc%O-&|Y>ZTUE{MvR3;>RYU_a8#TrP0Bk<NdoYC+q8V
zLIMrzf!_Z=ULx+Z0gI3U0t!_F{=Y?m{|hgf8@iaf8+!hSm9FqVy*DN9-G9*s?tIt6
zX|#60K#mr4pqnHm7P#}ClF+r~MDQmi+?@CCdzovu*TYS?E#nT++cr|&%+1}+%}<*f
zcVz%R52P6?i%yQ+X<a>ESL5<`lC`_Hy%i;wo(WYIMLJbcT)9<=oldSvD(e1p<dqNr
za?gjXFJFL<-b$q_pS<32QkIh<X=6u+?8DCb_eF_Ti2b78?PC|+W2NUjX=RpA%2w(s
zd!-4}Y!<E+FaTrRp3gsprLrqk>`}BqPbRt}L&bF-SD}&O{jWg|o?7(lu#u{e@*=gS
zN@Wop`SP`GeLBd)wm@doCKyXioe(hn>2ar#A3qo#;w7;@`XX2!A3gc_tN8dUfbZL~
zwl%FbU;SC{fhwSz>SXxZ_kpLE4#RVOuk5+(QIk!(cEvTpRMNAv>H@uw&M-OJM(;pl
zCQC6<pLN-%o0^Gk{xCDi>c0cmvPM*LF#I(!dEXgpYijdXPt2DcnvcJmGSgwC`%2l!
zGC}s&^_P!y^Ly2vadwbH%zT@xDkbOF+-56G>#@kpN6p1cf-oB7Z@`0>;bX886UzN0
z5yi&er7afGdTAcqT^TR2tNiay(Zc*PYGtYg=>vPY^mQ-`kI3yGmMeaCDBCBpK<dEn
z3jeDQz`E<z#<ZsV00-)!R1=20tI|1oe5e(@Hg-vvjhdaJ`D+Iw_^XCKC6vhiwH;`&
z^F9XH{@r62chy}c`S4Pzx~cfc;i1Zu?4fNCb%a#z1rn)^R{al~(3h;ESXhKGoLTAu
zjng)~zT<JLuxa-Hxj5*s39xFg>ei~sXG8h#UiMU-VUZdv1B;H`x~XdOgWbsK&q^N&
zF4zT(PJC0xgK00Hz1pZVk4$)Te{tfoF`oq@=u&nEc?UeTrV<ZYbl?Mq%p%BXE2(;t
zddULLpD}<aI`^Z-dm!D`m;>@Z>Gx@PE=l~p4Kw9SOVVKjE0Ky~JI-?h*-cjKJZ>L8
zf9h60pPtQg@pH^ENBwz%wD>(f773pLguZTfAIGL{cVF-C^TXggH~gPFaf_vQz2?53
zFIP`rD0;rP;k&;-uh090bpF0xi8(pBKR<|ek%W|UgM)>QlUEOK%k$^fWp@vIcW0}m
z^tpsN#@tF$+MGNd?+=T?gS%#!;-~*DLN4b|AbOkozxCTUMDG&hnueFs>l5JX5#$1p
z7OLq#bbcPbSYKMH)PmW+KMT&|5`TDq8Vmd{T<86{L)Pu0aL5TGt@?c^tkssCpsVcl
z&&(J^Mn-mlcR<wcWdo=5vA*^ga%XEF#ybt21p83HAxyHNcnm7^lbr+V^yp4shmJ;#
z|8f#gc=~qtfWP(O<{yA?yc#KyF4Ho?<83SeU7-N6fIm1}fc38%#e{~&6MtOehb})V
za>LyH;B!mG9D#Od(vmOZga$w$lUpd1W@C1DcIgn!=JqFTVaKj8=0#<7_&*&NY4~Z>
z^83iS5^sL=O<ysE!`*;#J>*x;(RSBXl1Rt<%b44(R9d>WzSZp<sn>xf%D98(LeYO`
zda1Ei#%;2OfS|p)x>Z1aa!ONBel&~`59qn+s$ZJE+#ulCLo^Hv7Z-{))`-TGVvzTt
zl|(KLm|?FZCx?oHi&}lA8kFz`xq5<{2I?uq+oO<lvs{kDM;T&{(#OivNmlo5{Mk7f
za#2fBL{|z%_DyAuk6si-ApT}INYknMpiXumG-&u)34e*AkCtT9etN9~u}PV{^m<36
z)}G2LXvTPf(p<6u=(8&*iXJ@OeVy$+P;zsAfE))4$I~`oo|e|m{`BXVyvrGkd$5zf
zR%a1|29`qn0VSfrSaL0+hrz<4&X5={B36U$CM$;Q*-nAY%M}}WTH?c+XVA?yI~A{`
zk5xEhbhVH6F>I=P_EDu1+&2ajQ4YYK!UVgikT7`vTVl49^`)kWolJwaf}DVxkzqM3
zg$cv7jtM!MHn>eOjtiQN`2|5)*j4TS@Nw{D>gMS6wplotz_(Bnn`ka<UboL24lqO2
zgw^Eh=cUk2X$5yk3w&k9^DobK$QOaxKVaSibFHmZfS0fhx-^*E1%^zX1r*DF=3&42
z`}zat9<#;}+)R8E(I7uwz<A8qC6cYI)l<%O#}UPqZqjFuDuP4}PL!b(f|cWV!ky`S
ze)DyO9`o+98+|?&%Io(=bZDHCEm8MmU-_pF!W`XWqPPQ7S8uVcQo{URpYzA>Qc6i8
zG<k*lJXHjQI%{{Evze&m_VRv;|H|t<Qb94czE>2}3s0i(Np)JHG;R6APkz|%f=X<4
zx|mOODVnip3QFx83y|;$sf07da`m-Pd{KJR@dD-PA3cMVSOh$qR7jQ^**e~xzP7G^
zHnibJexwH))xaL71Mb5b!L-1&ML-6KXi}yd(vure#Evfbu&tjNpYW`akOFf5e1Uat
z0>ub%Z0w*=@iAWClGP2b>jMFsE{wx75EibUY+bp7$0g@mG=d2#;G&(oauvjuT&i-)
z>jy3HiYzr#1nA$Ls?t<GCFQf--UBI0G>&(PBpB=-<e2Ty)Q~}dP>PX<qqVfn#u@$>
zI((lm_KrN=obE0t-(!2GV)gUsGIhItSSSlair`a~%Qk-(3lIqdeU0{H%BhMNiuy?c
z?6f$!In32SaL4g4js@kF(F9}qjr($i%4)hnFG9)VfratFfyBopn<?M)F=HX+%G}h1
zHvMNV;jTp4W4D|%(<qx&R02*gVe_)pFyE5kg%ps}Ub+fguj^f?;~x65TAxri(^H(U
zAh3x7Xo~Kt>@jML4GgpGosnD{>Nb+9pUbvcAoB^t7FABwcSEQ&hC>aKhIh|%E1eAe
zWhQYj({4&nz@gZkajIhe2sa9^{abkI{PGt<vHb3o;BN614DfEo2DfDzGDU?cQKF&F
zs{+0T5@M=|T2W`inU>YV3;}^3D#1WM9Pg+J49HzFNC0;(QFq3znCATg8MJ&S`!l!A
zB?!DhB+-YAUC{91WcI1%1N~Kvz2`n^X1<E5?z)<`jC}I$#n{ehV;%5i^m`69-$&iH
z%q-p9>yRqoW&N_-6`B$ATVk3390@8$v-2{a4wa-N6OSw_xVJXCyrZ^C1N8z<moDSH
z=7DLjH$C<FJ?XU#6WGc&g5C?#qYHyXQfhNV1`l?{bco4=VeA$qAnFz)VOx2GhOup8
z8Olx~dw|9#2pMu;r?Kg#+KBi^wx=!OxtY30hit4a-%B?>(-O>?fU#;wIOw4K!mLg+
zo@)+GC&$yhIvrhXAPz|i;_zM<{wErE1uu3~IA}<N7>^bR!piq=CxvmGeMZyiung?^
zo<*sIpOEzeH9IIwTs<s<Re+JLYW6$ts?nl{(u76B=?EP76$cZBr4x_zO(iSKTwb05
zySB@0A70|bK^(?cP<w9=aHOd!3rQ9H+Z-iy{vHX|F*gAF4*IVlVRkj93Dlz~rVfoa
zRBjJ2l%~vEgrpcHRbVDGac5Z_<;&Du4UD3OwID|Ov8-dX9hovl@qpz;la1ObVEfu6
zb=)-2UYS7d3^&n6Zx^(&59Doz%{PtRhAn+oaUnDx91K%_E|y{XlIl8T%{twqj1B{F
z1QJXNmOPw{KMS2z=~_c`6N_~ooE4@reMw1Z)_z66P#>@<X~x}>^^p{2JZRA=K-;=8
zm@s{bLt&f>l(5`hHciQu=MKt`J=13`Xa3*I5)@q*oy(jD#b&7<ePjYbtmr%j9_7&G
zZ12Qrf){U((W&`ff}2!k-GM+$LL=Hn0fSy)0XG9r8^w@~!~X69f9c_z!eq=_Orzoo
z_p;>}rD;jdfdI1L7D4t?2}J9m9pOfH@qqWkd*KM}pJ<&7tmb@OgRO63M#-O!m1Cz+
zx(1&2Xw#H-)6_ZGqnI(6^iAa9#0@)vacT?N#edw@S(A|NI0imBIpkrZ{@XdRuX5z9
z8~mP{{V*D!oyckS#!b4~%By9nVF?xg0^<U<K+F*(;^BNm!Yv5@Wc|S{(<vG>1RDMa
z=|-x|0Av0ajod|w;{iw!^a>f)VC&IaV-&p(RuJfCwgeK&Fu=X3R6lT)P-j7+zoUel
zGxjv5n}lQZF`I&8z#}A#QZLv-WQdqlqpdMdX&gXA^+NY(QGk7?-OKs*_2={Xyt43C
zFD3fc2t1p@w@QgnvnBwoM1UrBU?OF}Xna&OPZ<itCj^W3x^i@Ag1^LecZ0@Qi^I&=
z`07~%s}INz+EXNNk8B2dqTRjjT%a<Rh~vGgA~5Vho53{Jf~XC5cE19^&-qn@m3&(<
z+^UYZtyG)l_V<w3jD_bxgl(ck1#|}W!GggNFF@kt*xMqz8oBjuuR>vf=)SApA<0aM
z_76&l(>X2~JPFfM<)`Vs1Z?#4wW{F|X9}mbY^-$5_SllmcO1Z2qH!R=a*Zb-F>vBa
zt&yc1{e2$(#f%VJv))60Wf{cxA-IQ3gi)S}wAv`2l2mX&+1JK%&L?2*^DZzBtPJa<
zBT&-vW{lmiIK^4K0hrmfpC<b$f9c$=jA|SU9&|7x%pdL}ylvZ97e}m_7$_sy%)ZGB
z*3u&0%W@bcLq<!y#u^y5A|PLQ2~C=S3{=I1K8$cJac9jikkQ9zvi)OY^N3pwD5%yS
z2Cal0&xojK)a>j>8=FO*E8kr1Q4Qbk0W69aIRCz)W(Pf*V$w--PiI85A3}8Ec&^b}
zF1qshS-^!}V^i#iXns53E=(J+zMq<-(E^^hPz>3jyf&SwajRDsdH4nz1-ji~C~*5R
zBBL{tS8!hg50YN6oNqdhyDh8g;Y>JU;oy*}3wr+vl=$~<`Uv5L^glXc>dr2i{>Gk>
ziEg19o&yICTXfxyJ(d_VQjC#yjx!si&YQjfgs4|~b&?8=@akwXS?rDsfg*?z%$PLF
z7tiEFk?yDy3-_&XxZ-Y{f=(#)4p1244~tuW1uwBZQ_<Q<=@n_H<zX$%&wycq6<D*%
zFl`EEN%-0T>_&#}h<41(bV+@$Z-845xB6dYrE)6A@W!6vC!K}qYL|3QBEXlj8B&*-
zOA3!RhSQ8cl{TEp)sx{$Cm<#=Q63WopM@skE-nX|^Dv?A@1GdO2lBI{&73_>auxvj
z=pq8lzRv_Mp7cRs2!W|ZtP1y{CvZaCAFd?jQ0aJklgyarxAY-%40yZ<<IzF8e<(&o
z>D8xrSqx!$D^d09x%re6VYHK4l^y28_z<`rCQZDl>TndyY<PQ%QJvb#_oq>BvZQ!^
z;uEP)yD{RG(MOGkg=w?KkdZxJjv_w^a<36DMAK_USDaF4i{c-mT@Y?giDLzIe`M6<
z<r>w+Yx#J&*sJ@&);$y*v*31ld^-h|G*y_oj(T;lV0U)`>$Z#fY8#)5qZ07K6y*!P
zM;rHs(YfY}K(4G@7Xk}jU!52kFJVXzUtij(nyinQSW0K<c`ntg^%j92P`p70MZd=j
z1Lz#OhJTeZx&&>{Ry=?e38&Q=TGdoXEZ`K?Y1SA{R1v&~dR_j4T-f1Xv16XrDu{Ri
z?>3d_wm%KS`(upeK1xu<32t!iv;|GcuN<hqo;OsS4DK9i3=gZ1QEve@R(0WyggM9j
z|A4MfF!8~3MreYkw*CDtv3CUlTyMuX|H!0XNfM#avpO6fnduO8K-bPyMUODkkHzg6
zW}|~J3u-jTISYtt$&F`#IGoz(1PZcTYDh(BkR5)fxk4MkV!cZ!<>FwwN?E)&Q_h^v
z<AwKgs2)pBIX8K90_BAW35{1Cyi6d2wVTP<a?cGz6rc$8yBcw&t2PFrSy_iSSy3>V
zF_KXz2ZKuI$8)Bj;sE!}*kmd8p?~&dM@g#oe<=TvxY864usFle8uhUZRsPu4(!<>z
z{0^_y^ueOQ{*$py;SYRC*~xB^6LVy;Mx^Gn9uP<`cG;2|zC-$1(PVT0fO2{=67$#<
z)C@_K<A^#HW3XiVoHU})*vgsi3%Dp751f$Mu3CvhQ>H|!U$5F4zZ2ud+3P8d!ith4
zqNXIvLa3{6AG*;G+P-HE6G)k^cX@PSyW>8Z@0=7y?-$%8<|sYUBprA?$8(uXR)C0d
z_s)axI8vyCt_#EhM#dF-koF{%G6S`&jkZDTaxJ#y2wN$ON5IFe*mpt2s!m%r+K5j^
zE!Z8%=K1eKibgi!SiIn>!Q9v}L!bY>PEqX2?Ok;nsYN5BXoz|IJgkbMlhST~L_GUi
zcER1qph{>gV50-Ai-G*?uzsB{2;73^!ws+A;x2L$FKE?1jf4090bW3%zl<t?#gaUQ
zXt{%}&N`MA!6pqf1x9#P-`h59>nQ=|6(bPzya*SMt%SA!A6Fq@FiQOqLI5^Ct0*pH
z=uU=>=pS%2vE}yhv|!1GV)miKRQf?P62RX}E))bzi@~FFW&!|T(*y-bq`~l|7r%y6
zaOcmzIRCJzL$K%=^oKj1NmcxiLlhv+w#L$9IQ+}wlNU$lB({-1>nVOkK^nlHUt9vC
z!+J7CAk<FEE4vnrAILodmQyexNByn}!O#Q&%+T=wg(!jsi<%dUifynxaYv&)jz8&$
zsy$p`2`q1ni&CRtP?7}bCL|r<qvr{B2QwX3(JWVL;bTz`wgTIo%fmCbOA8d`VTbog
zZw_YuYBqyS?HT`;v4#gvS!HR9-QOv0pXbC1WO76~#i<xveY-5bJu<D2Y_mf%>yw)T
zqUjZVJNF|R)i|pE5G$}FQde-<Q0aS~gh>I;JPcQs<IQYlD@CLiS0TNm!EX|>c&>Qs
zLK|9DO-T+<y|T6JgL1k87w5g~<6PUgoY@85Mg>A=LXx3WZo1Af&ivhw7xlsQ_PWEv
zHQXms4-S)Yc&tLRWD(7}IZpv+YM(xR#!H{K&!#YBqI|@EtAJqm-w(d$F%Een@O@UN
z=TD5}@a2-?EWY6#N~RxlI(#necVX`ig4*L0)BWVC-w#ALWo?5#4y2M}>!kG!@fvLd
z1IPxUKKnjLQ#Nw(dciS{$Uf>7={Q}njnlGO&hvYEtu+6fpCWByQ<N=$j@H?Dc@iBL
zt(IO7FQycHhN=cXa!VuDdyX6>!nSEbH8*6g>=JR2B1ks41X3)ZH7p==GbVfF*LWho
zVC~q@3;502B(>DI0RqxOAH>#AKC7xea!kkm@ai5Oj_{lF`6*7s)moxP@|vE?I-t=o
zC}8kPzM9+sB5+;}cSk}4h6uE6tC-^sTya6N5E2|$6Si9YhyykF#|gX~eA4%aLJ2}+
zGzciK*%Er}DcBlx#;u|hj10*zCw^ciz)%xR0792I-4$CJ1aZ{^-FCX*c-WiAL3oK$
zXxW$|<~X6vOHztu01Hw_%XwZKYs?ryjE&h%O{qaIOd~x<*ukl5ta=qnUp70%fEH&z
z15#jp2u%uoS#WLk$6p`4Jd2NM=$~8c^wZxQ9d=kF^$##e{q_gm@eTcpt*t_^!>t9C
zzHHeY{2z}y8&b^U0Y(-4V-4%!%Y%!HokkTJ{jVEIvCBd1#t_$g_!<#&2O0oK*J6Kc
zr-4{%Xbdb>V%-LsVc|%(01fA{ZSB(p7&AJsyPh*K^49hw&{O=~)WdYs*6Qu5`gPQ6
z+g9cC;WixZK1NPkLqp>3kcqcNCU$^qd^u3j^#>i9f*BnM`kti<^(EWD28W(4BeDya
zO6x25nQ{6e!-pa<MveV@kpS41o+sr4l-W!k_#^41zX{q(<!CZ2u$`-*cRnny7(2^B
zpTgN?yJRoVF(V8F78<5~{2W)8tZ)5mtNj8Fj(>6R2RVPc4M2c**%+Ota1O@m`1*ev
zS%)ng^Ca*Xou2<8V6fZEKR<eje0GOwc39fqBM&bAv~U>3W^=-BW(6T!n|Et*t)efU
zz~1oZTOK~ly73q^{N_7WqH4n%>{*A_q+xa7SwilpDV$YXk?@6$7z>AL$`rU|L^)%y
zg<=xFPG-X;$!KVv;<GpJ7S$c;vwU#Iq=|c#kc5ifZ<4~iF;NSPvEi8n1yBoI^kt4Z
zm26L(iDm-)oMbkQsvn7uE1v`XW7;=l`pLktjBpwX@Y4|Lc$gP;$ay&_S1jXF^PU!b
zb#KhkHYQio_ZX|?co*AUw6ktANCxGot*DU6=oN-dxoOmt6!^DTyt!y2r>lG?J`o$V
zQm(R$GJt8E{Rk!I0(}IZVE7w+d?JBxwr3dB1~~XAK|O#oOfA^3x~Wa&Qq!Ch=I9q-
zEZ{YN>DjmK0h$t-sYu36W$3&Gfb=lK(AA92U~Gj90h|wpOm(P6sBkFUKA98y4}?6M
z>Kr<~kZGiod3)_+o!aw#3c+H#8w7g>Fal(d+XV{sI|NX#7gSek5fE_Lt|XFXKf$j9
zhdgYIB&hX$-CW2dfsa9B41jDK>wzO<2#R(Qn$W@rtU@_0VZt-xLyd4CB0jycb+J-R
zqxG(UCAC>?U$27wBNwa$+2V&o);<pY;C?gt!h|VLpqI3~Rwk^V;qy<8Js%4#D<kAX
zqye-)qstUbFJj2NP2;KbumNnn-~N$<?UmoW-9(%^Dk<hs8?^ItEyOdwLn$B%pODps
zx8O#*$Gp`esq`y5oaUYxu#lp(`$df_Trz$T3G0NxKB-nGekmaZXqaWAo$;#7%4?u|
zbWl104dV@T9Zko`<R{t>5<ZR+I<)R4dW5nXe#2%V7aqOkK_PLAi%Tj_duqKv1uBE6
zyjBoRGsg;eURg}I#e(MG7L=U&J_7+9>ZAM<8+2WBJlB>1ccluorkRdYe24h~W6MTX
zddSj0Tjgq6JuBz1q3_u;$ELrH^U<L6h0}6z@$!(nGbXj(t2}03%nf8pvnJ{>9atvg
zJmVxm3}pZGs}EQ=9ules$e<Yl0W<b6>R@P`i0jJeG%gGiRKJ9Fpr<Bcrt>uR4#VRP
ziPzL~NeopsoGvtOOR0+w?h<{Bkz^(&r83FPxCqRVa~i2O+XbFn>#tC|qeIPA^=`f7
z=&**4BHtThtAGT*r39c}c|&9bR}DrV!c^O1#RAd*qdve?m00VZnkBEL{;ij|vamgP
ze^a8nOsv-!+~G&8iHq``mWeO}$26Txp20-pY{a9gt1>zcty5*Q&_VB=xwWv`q<S0s
zb`QI2sGb3+2o;YXxX8#X1%t;0^@yYX+5Fm+d7(klTfiL3a4q%B`Kg@UHN+<YxIV=g
zLX)gSbp^gmV9pew31A7Tul_VEm~AtQa%5QlR2ed0h)A(NlBp&h8Rd%MBw2c1NkSeo
zYtD&Iw|Q6By0~vvuz^cbl}q4^uS$5+AOX9uC1lz__P}W?q#0^B2mHbf1c!%hd5wl~
zwDr;D?R)0@AoF<Ok`68EV!DOUZ9gr79?e(I8aC9mWq4<_3s^du5}+xkeEi>M?{0k4
zz>Q&5!vnAkAIuO4#H<AumaYq_B6LlaqQP?aGRFHC$x}KMjV57v&XNiY+!P+p$Va7r
zmQQPt#j1j(;bo(>Zd&qo`yENcy;H?8voY<PEy{l_2d(hKf1wN>Uh5CPkn$@iq(8z!
z%1Nl0{`3p#!h&M@!!M+iiNN$%0IP$puujLT0Dv>8SjqiqgvTHO>AzHl^qc-_#K<r;
zxG%xkLt-Ku>VwST4=ti3tm@5@r2jU^)wsVsFZsWbr1`bVXm&#O93hCnOU8ymiX`Tg
ziIr-IsscuC0=W<l#pH)sLK!q3m-eLtvcCGdD<2_(f9#1(su%U4ov7UYL`XR0+!jgh
z!Th*u$?KrmJq!wdc8@DODvEd#W)!**dN!{CL*5<Rz)_1cScMG)O>7b)yzZ>xafK=N
z2h_#G9I~Y?F^-qF!s0{rc!Z*R_O}~XJuw@;g=Q2IVpJA&cN(ImWl2AH&*Zfd(vfYG
zN2_)3RBW)3qLGvdE~s$S#nY?FxkpGJGEgIe&yg_LL?;8_g)-H(`a{#DJ(i6~GxEaX
zTu9IyZ3e6ST><I{jIaXmGKaU(&DSQWARY;P6EB<<SX@Y7sqkVKBfPcWqarb5;o|Z0
zuS8BMXTdta_`B`9TVPSNPAWZF?_}?_r?Y5I(liY#pFTfL&dDjQcX=cgO3rsC-7n5u
z8%Q4%H{d>tdbeYGW`7(bAl`#V2yM^Tx3~9niH{Z2+HK0=)i0(LV1P%=#5utG1knEe
zv(5^VA+sH-wv9}*&a$n=zEra}#-f=+zwqNKsAfAIIr3htA=S{90T_uy0-v^CXzkSm
z!2Bh6s}-Sy{@9_{5}AQd-GR)FH|3@TyW*2)QH`i!*Do@_TEiU&6WqK7a#JsIh~9=L
z6XF5E({?2UdSHFmWgQ~7Dd~@tjeM`sq*8;C&T&raB3V6%Et8^yJX#?l(+EWd$Mvla
z>|9TN<1jvH@)_G&6c&p+6MN0rlW9T8ZxovDa1=@OM2~>0Wsi(nHg~@(m~on6JkW9N
zCke%u#+g(QMq)!D^PuX<UjHcbC}Ydk-wD`lVUyb916H1iUcBsj4I8w=2@_4YyL-;c
zg79W%Y*L|<92u3-%XE19u0S*f_vrh=+88-FbOt2J;kv3X7_Q%R4x2kmlEw@B1LK9!
zyE&iNW!sI)Qt8LnCqaeW;)DoKy`#RiMOV~HszmRg=EarvMDrn0Umu&;YgNA^NyeS&
zScQo?wP!)TD~hG2xU@#AMh;y?YUTZ8t|?tj#nxVyGczd|lNHR14orBPgWnT@Aon-m
zN8~N@QlZgwb9*}DyEcY(x3pv+8D%A&L%*^3J#0b|;3!7SlCyjY8RI`Amt^f%q0lZW
zb-r^Sl)JZL7(eV1isO=Lr<~kgJQAI~;5(>;>$1z{*ABAF3>Yp0TU#G>O3>{sbsGQ3
zdk%HzbKvgx;1~+G<KbRFSMfNsga$`fBN}?PL0eLrkHmBaFnR)#A`cU@1jK`=z+>2Z
zzNFT+0g*&uZeNFipwupoX}x%i;<lpwix<-t8XvV2#IU6ePCK<D?wt~sFTi$BhK|3<
z+2=}XyM<^<T;v_!ITACMC{z-##*H^HnnVXdZ9nY^NR!XOogsT7l(ctS)M(;H-jkYi
z6qH>!>M`ua98~NHdt|HT11fDkAV1UWt>?am*PI>1W~+oa{(^V3YG$Q0V^y=s09K_K
z!1Yi4F{ZR{LFir;;qu>UU8d1^4<n-S4s&l}r(_KeC@Eqkib))3O9k4F5ZgJ8C?`fL
z?vYZL8$K<IM}A>IB#e`YYF7}#4nL?vt;A=lIbKPo3_ocL3bsb3wq`{&$iZ_8MPq56
zh6?GZM!zW3wZ}@Oupba$<!n8dF}AXLY?d?_vm`j?V^U+cA59VUr!em=Fs9uZxDcy5
zo08t%!yC2AqZ+!BeWB7il%X&m=r*d51;(*tbSyjW8O>JE$X&q+gUNQshC9~F45r56
zsBN$zPfhoL$<F}Jx94+;L=)!Bhl2AvXAQ}i_57)=Y5e57Tw2YJUZ72z8Cby>3#?<;
zuYD!g1XBm~k^#J8<nU#=3L;TLT@7<>i9R%L-Bs*051MboCmK1}WX@Z&3DhQ-9zeIH
zgwTm16I0BPhv|CF$)Q`6Mveh*Dtpe)qlz(4K9-Wn{|fk}bJ|%eY&gcH_!-)?kNvIQ
zLLPNVHW`jgyj91Bm52|v&aDe*8>gmAwXGRQyQCo6x&=CBv=y`mTm$Tsf>~@z0sqK<
zD{L#Z2Y-<ziAsmlAtS@ikBoTJfc_o13lV!A<6!Tw#?2=>6k@WM8-}@Ir;4eH&}yY8
zyA|=rE|YK2q*+z5b*rcrbzlR=_8isu`RRGD*Fn^}?{?C4EWn1qV&iUmMQzNRUMbi<
z$z)22n#4E6i;+OddbIELH-or=3pn(tEoxM+0@I^9Bv@LrogBhxcOjAp!KozijYtv3
zJ#7t>SY|;N99)4L9b@k3cK;O~xxZE!LG#C`R2w&WFrP=3PMjRLpp0+MQQ?<bWku5S
zZyQ+VN*|6oRVyALy<q<-o!Vhi>+p@SXKN0*omFM=BcwBQ;{k%##o+0PDhzixO1GOf
z+7ekEG0hz{*X@pq$gVAM4@Tbgws~`eDTkRDM#6z!jHFR&WKL7{<MXm!fjpVK$}t&x
zk#0J15G%H^0rTJ85^184+N_XdTYq1bC~cymMosCUwb>v4ovmB>EJHaBO3GFW!VSWs
zy~=HRq+l^=DE!}VkE<&j{ul^+C;1OLI4|K2<yWqev<*+sEg!3EaUPc~Ft)X)hO^G*
zwwv72I|ypar1J_ThBOlMfd<dJqdn~Ht}p*<?T>eOyy~vV-@%ed;lCEohp(vTw^!2~
z5@L<Q!;t$r8vcN9I^K#q!meebLz24Jb9_*njR>DNp{8`D3{`I=AIn@(3RixDt7Ghf
zb5(<G8Wz}t&IPyKgkoje*0YgbYqt<JgSQn8`t?=6+kl5syhw?`ZO8`;zW*qxCM2Cq
zSrEiN%J67_w{9Z+KgCu*i76UDdf`1tDm*k<g*ykL#RjDEDqUr~i%}ZNfsj3Hlfy==
zEf@<<ER@Bh;i=a$#=6NI&lsYdu9oYH8ZY@A?Hrn`uHAPfaJ(6DGvGN>+~q%)yCGl~
zbV8)A3K;Xk<PHQ_Zu&qWpsF?<Q&{oAWyN6A@ihk7Q9vQ=!U6wP#^08MM~^lBs>Rm<
z_9){o7Kl%lv>JaO?aJ|eI2Hp_PpyACs<)&7#(ZyQdI*^#!iPbQHdEHDDDmX;(=Sdk
zPOjmnu7H4dI-c8KuVznv(ic9uGv)2J|8{vk;*53Q&_WGgOUEn}@8qE%j9oHX7^b_)
z+zrVjcFkw8gNgg=Fl2*Kw%w5JWD|0L{X5yOzjDK8w6tS#IrMwC*Kt)=yh`;*c3nzX
zGb`^izEaJkJuOXoQa(bDNnKzwyT^!6JgV&jV=Fv`Ci(l`!0vh5*S}yLF6jpeFfvF;
z{^B(9ZCH`<Z6|ReT5D~f5%Q?b8r01!H4&JX(5N-9!C?4i@5!^b!woFtz@x!{Cb%VM
zG`&kD*x}rum@?oloF84jK0nD`LEy~6izCY+V$|i!n`e7(1ETmfd6OGCwdOle_>O{{
zHcn8yg;`$VGT|($J}ha($TBL3x##+DPSVc))g>5n&|qg$eeN*vGNiPcHF6tpXB_*(
zmK)cl#g9U@Je6M&-7@el$~`|iJUagTNW(6bEw{v6n$y|np`$X%piGO@+`BRK<Nx%`
z;+PY8LMkD%p%F8ZYGiUWfqp1teFkUo_^G{2LiZQc(#)-*0?ZhC%$26vhwYg)_}U4Z
zna;_27A9s3gM+aqtPA|`Usz!9)_(Vk*hZj;{q7g>8;_C*`h`ux!W&p?*e*YfZ@W><
z<&h5tU4y3bE?4&WwdL3&?D`!rv_KF@ZOToniFa?|uS{z?dsrrpr~W4L7xPk5fU?-Z
z!p1QR*xx<7e_p)*Zn2ucj%t(CDiqFOs)?P88X1wf*275rRLYUX<4*Vy)EoNRk@;`~
zA+Q!bGI<R}!3{hZaZ^E*7+t5xfP&O9s6EnaN{C_~<DHE0Qy3hqkE;$}Dil>xT!$Z&
z>9}^o1!$=Rz-}QU1rRiBi-3c1k;yM?zopHv=g>v1?o=JMAv4?xt})S8=0RM=8RQ4n
zxB|K19lhP|7i?<*zVG5vY^@g3+`yC?Y2RJ1+||0APhHCUgh?-2Njkr7o;BqA>J-v4
zLp<i?q1=v%;YmvWj9}dJ7MjNoeHSki`wkyvn(MaT=L>(mx^Kc1vr1ij5t_5EED~>{
z0r%Q+da6UtvSm2hvC=_Dz1>@!*47U8ZT35VdxM6AthX(0!)C!}x1Sl5yWCzM?rT8i
zM_0eh1iFO8o$7UF8D<YfYo6f^`==I(5qeY#;8Bb3ap%OIK_r0D_jS_(5@P$75Qe?n
zkFPcHl40N?vsZ^07&~71*tr|2pdr8csy26<;Av!<)+ya<T~jG2NUMB;7n<)B8Ji(}
zWtmUsUj;Sc$-b|FmI7khg@B@HBzJa`{r{W^cagL@ii!{$brkmsT+;3MbP%OAo#cFI
zR5Kpc61gB4NIInrKJwjEh1e&q)#L)cDb#@T=QbuVOz86YD?Uk_ya0XQF%n<hoA<($
zyRUxma_sOk>u}e=fx^!WW}$aG!LuC{ne0KDOy;F#*`|cp!eN^-tpMV0{|H{`ZIa9c
zOaQ)?pgR&W2n$)ybH`G=qtb$Z?~40YYfsjZ_d4JNJJdv7*;ebAlU^uVZNG@MGh>n0
z4lTzn|8eX??(ic|Ci`tjjbKIW2R(^I(yk=tC?$5i8%!n$+qjQ6WHQHjanhK>Y?`Zw
zl@|!l3N+Eqw%dCebkhOvUl7rLemJcDf2CP#d~B^5=L7TkdWIuUd7&p{c{CD9KQzXl
z$&lm=o0|=x<EBzpoNB|=1;Zx^(4INV2d6iK0dD4SG65;&9yIc9qObc7PV}zpB|5%C
zoSqHML?;7SJZ!sdemSJv7nmmGj>uv`H!4n6G>`CeDv@SBH29COws$&VXON==#lAO8
z4idb%k}a4_=hzinVF<O^;5p?4+_weL26Y+j>n+Vncp=hXi`Dc1=B|XZ#oS^qoHy}t
zdAmn!&lrSTREs%bP&WK-<OwgH{>cRfL_#e=(gGuf-?@<i!)gP!NtlvX$_Zf2N(P=$
z7<8(qvcc-y;3UC_HWSz^>av1~ijrkaP?s$ES`0{odo|}P&?yhJmt2xZI!N8&&%zt*
z*V|;@zdShj!!o?cBc>`3;tPVk-ZK@HdNFgv9Acb$E{84SG!I<I3KBeFO?=)D)A~YH
z_|I)4g>45450k&g^X+UDA$WtsN1xjdoL4pnwo6#iLJQZK7U<H=o*$oYP|fsDOVjP~
zTdx)B5_jWUh908HwQ6(^xF#O|9;MoaD&9_F37l9L0tZ*vc3!kzP66J)MIwc57jkCf
zkfHeXp%bYkUmZajzujhW_Zi$}_C9px9z1IwXU2@$(=|E0z#1=XaY3KXhWPO9^YOg>
zWAOM*${iZil%MMU><fBb^h#`Ghnvi}+>4{F*Td~br;ZQr!^}tzBVFri*E2CEr?3iN
zeR^b<Ep84Vz!W6facK-S=OGN81}KTkZC8OU{$ILWXp&nIq+n04eLuz0UyMe;lC!50
z@FP`hV)$VU+8m`ndvyp_Z@aC~HWEuHaH0hua?R}n$_S=TH@waPA~ze8SEX3u5RDXG
zJx~TWS6fvVq%Zo_>SEba@0%uez)BMetgog(v>L!t2&dLF8Xn@e$YywujP1)YKn^X4
z^ujeDHY?0kX)Y<198ZFpMl7TrSmOkxum(EMO7}>!362y%zCb!hzfR4WocQlGht7>$
za3%o4sp)uaN^+v|Wy}@dfGWUjl5|!VP4r=KXlwlE_qSs&ZE9|GZipdCuTyK*8O!4o
zYNUF_K2NS*qts<Uh3gu(L|O2aj{8{hgRF`i*|4J#3=0SKqW}xNX)-7oM?qvC5C-8F
zV8UzuA0zSAn|nxj`Zw04Jc6)e`4J`Si_3${<HKxc!5S9Cf1^kZ-Rcsiy*b!PsOAEG
zy*GPrI~Ou?)C@UlMtfMxafla%oN`j;OFM)RkU4g&yB`|{U0MW*p#+rKC2wzRNU25G
zU#P0@0_q~L^P|sR!cCaRrzbne!&sQa;ols5evrL9IC-J>{BC!$dgQj76aHa@<*qdt
z8<Ar%*yI`c82C4cwJHc-ygod%3F!PO|G2~$kVU^u&fDp7TbfV^AGVfc%m$8^ZduQx
z4J@_@42O@fiUYtmeUS?>0iKcd`fv9hV5|K&^v24{BgpJS6|jE?a)jz8klQKBR+Q}W
z4`)YRnM$^5=r|s7cd<Z~9pnr~{`mLFcN`if0K07DVQ`v@ejLnB{DXQi?CgfB0k%Zv
ztbYq;mix66a%Sm0YWY8i1t3lzyy(-vJ$ul?mu83m(V?W-*Bd;V{Xcs2k<p_YgS<4?
z`z%M57X_I$9elklzr?A($?Ndc8L~~~2C-<pOB8DM-x`LR?G%R^#mAkVK)~he!^_k2
z9k$kgRh-;z8|<T$h>Z-J=*3~%|GN0N*WmX5H7Lsm;6FR8FwJ>ut{MC!Cw!9ALB9N2
zyr7gVk3i9HHbs#YuJ|%XaD31`$9>}SvbZy+I^V;0hW?9R{35B6eR`5gsNOL7rdXYz
zrd3zut*uqVY*e>WRn1CkxaclDr*}S}@Gar$UQ1$;EK!?r1K}i<l3=fht4&LweR^+z
zjU2>3!MgCPq<95^&1ff=RZns)<9`(0k<ZzXSTx@~<H2r5u>IO**}%BG|G}TB$C*L0
zyrmQ{^o+lo8t$`!gEYw3K-Q_u73+h~`a@Gq^SST4U6pc%#so(kBO!%}r6jUp(g`l@
zKhkv1yL8Z?UND<wDBvD+327wP%wx^Rbce1e<tKc`6}OnYmdS8Y_HB{BJ^BN(#h)9p
z2aJ^7f6@1h#xfGEW@CNr8IF;glG&7dZdy?jGCh~IYz8L0D<HN2dky}Z+khWs{2e6#
z1B~{RSn8u&g3FnJ-(-5E61t_CWbeR4vg7Byj<Wn^tNBoP_M3M8oB~n4eS~feT3k24
zHe*Cb@!nUYweJua7iFrrv?*w?yzWnwlu+d&CJA3@OO4X@k<A9LPkws>`6;&A4FoMU
z4RjLOzs`CPY5$Rx0NObGEt~*vqZf}0O82^zRPCQD)OH?0R}4sd2l{@P(k|uz*8uR@
z?_icLVa)d4wxjtF)p<wrm<-F%9;7y6N87d`uYiM7k_h3vK<%BW`rHfp{43V&E`fGk
zvxnH&ew*(u^jQu#SPZv!a|B}O<m56d&)q|s?T4w`xH;QTgs1Z(9ui^lk7VXbXv|z}
zTQ0GeCVfd4yC?Py#e7Ot9Cxa28O720(aGTv8K9EyI8f3cn0pvza^g^5x){gMjXI3+
zdUZ(<WOde|y+6kXMGhm)Ka>XkDizo<K`Jx>-n){|#sqNGw8B2E<Kzyj&^^~Sbt1Hn
zq=wXqlr}HgZcdv~cIs=0wy9}ZSvjy}IGustcg8QcSA;fKbJuo;DkM46pvr)0?9JQZ
z*1h<l&M_^8V|aY@MYdBIPbZg;l<!Sl1H10<VY|gg*)Be!F1*>9R?fV(x>p?t*r0{&
z;%fV-?%I#wvfYJxsc_s~nEjD_#q=x5cQ#6p;q+~670l>w_P$P`b1-}qsk#kplj<HP
z)cO7$yPAD)oBfX;SMCbk5V6ukFaOu%E+o(B(!euOTO;9%w=}f}2e<2vtcn}n|F>0}
z)mZfr-LNp~yosF;aPK|hLV5{e!jBKX58e!!BTTl*s`{GlSl}!HkkN*(?8|9%03-MX
za_8!<z#LFUTKW8yD^ljpN!3r10JIf%pbalCAygYjh2&^#FSu*h0OyPZ&`Ibqp*H5z
zyd*{M*ly3-N^JWnK|Di%5h`6sxTd^}iR_`s7)&Y;8NM2ScRb$TYP)``LLw9hhMp5q
zM~s{{64imx*s4IZsSQ;16&E5&S4>xO_>ESgkCOyR!dJYR680{4TKuUE?1^J*j+$1N
z(9~c!Cc4!q7Xw90I^djICQ^oOkFLQ-@uZr~43V>!Z&BiQiqQDJfNI5ePU73yx{+J$
zA8^;C&o4z~Qg*P<(bE2oMr#d&7r(sE{3|K{0ABqQ@V}q_{tqqpQ__Zz83riCNi9g5
z#=B*n>1ELJWXpYof_~6KP@uIprl>P9kIXfHu<))g9Y+@=j*YJDqTwf@Pu`<ExW;=K
zXfF^+&$JgEz7Ko60U@u^@uAe?wcUcF#j6Xawiz+^r6;c_?W>8@B!El=NgkH<;GANR
z=Uv%2bJ^hSN_~W;9)M^Zx+inb*ePn1NShUL0H1w<%oJUyPT2(O^1ZQ9Yvux8X-XO&
zcoXH0S8x*SN8iJ#t6IMX3(x%#1<knYQNM<la0ayKqZz#H9*XXg(Rt#N%DvSAWJHJH
znhHcZgh$;(+Ed6L>FwSHf6Z(faN>;-w~X1EcQW+wrU2AH*ff{BlDs4LA9|ytlh}PJ
zta}r+mVdy-ur5J7pc$(1pc`oIpzPQiTmB)vY2AUk+KVUDL!cEF?aI-V$61vv#vwH+
z;twH99SuERrrq{8`5mXxtD_Uy5!%x>^qZ6~A9{Grs{eKna+nS-PjEWkMRwe1j<l{m
z+j`@LhjsXV;H~QmP;m8j>NeXloW~A(?*EM8g}f|jzj`C2QhS_3X~gSdJ7u+*qxJ2;
zH9kCmUSp{3D{HU?q+@_>CnIBz?IOY8{{L;FX2*Di#;~B7ZY0gjg=of97up<lx+FMt
zwtUf+&ydc~K(!wXM>~P|XL(1u+dL8c-={DE2?v5X1PE0WRPJ&s{kQ}uaTB+-NRwbY
zg$nWR%(XjvIdRGd5&{8@?cLdHc6N5ozMFl#^8I|6*HF?As@U$F*&nv?CsrW#WQsSB
zi!YzdzIyLdkj{Y)JzSEgSzVNP3nukUVO;yJO@zB*o2%Q}6L^>?DN;oxkz4FcQ$wk!
zdw8aB3J(>ApahOAe<VT(Ji0kF<sQlAbjy3-{mD|JykR4nB0C3=oV1Q!AZHBloiNxT
z{vkBnuGP|GdxubveiowFP0cDvF24Nwn<q0c^ZWM0>5~tqgHIF}mJqvi!9J=Uy+2ZG
z=ok0SC{=a&c)kyZ54xMv=Y;I%U~?w>#pQ=nnB${c1T;J|N<IL8&`r)LP0u207ZQW}
zgs5J|#h`1)_=E74k5-{@D^eAO=^SbV^I(c|G()<4Xu){05bvia^<DJ^SclbZJ~Yv}
zmq|KLgkjeX-c;$+v1iy}Sq0R8&>j?`^d-LvIg;aK75ZBYs#RhT${GZGc0t!gd*cJe
ze@BLSTwAGf*f9^7wws7HJEghgButWUM_^!RS?ZpZCx~$;s_XD-7YP;Wj_Q;vR0%%V
zy17J%gpshG{f@~n$Sh#4O)(czIGpi5vK-^Up2z@?y@>C-G#}yzPY&{{E~gz~?7)8X
zB+Te<mpRe=VBX?TBJuvJf|UdIe6wo+Fa$)Cla+R|2$M8KZZVh;?BnM9D26UDVQ|EO
z9=-o{>xYFR=}4M2jd{KvJ>$T2J&Qi|v*UfVkgVwNa6+caIZDn(3lL&%?(pCm1!>)A
z`d~9-T?(3tb=gwA25Jb)CkDwaFkD)oTDKriRZ)(KS}lcvze8ap?R=`BN=X#Pf~j-I
zZD<t>YJ<vGim*X79b)z|y4AByLIrwwqcFSMEO)C)SV1N|(ski3$)pSItHmW>MWa_0
z6t6eUI3n>hai`f=Ted={M-dlWzO~3cFr!*msU%Ms$XS2IKq#E=12!8v*LU@L(GdW$
zJfj|~5d<{k!^lZzxTbNF6*3(ST&q-g^)i)!{yegb%5rLk)tIF){c4MB%Z-P5FN<Bz
z04pHmd)}XZ2&~!+YaBEQ4Q+jx#rZcad5{%V%c{tbhe03YF}Y5XzYH~32iQ&gpGQwz
zyFjP2W1k{WWE3F>REpLf+`*PL+<aS9g!B*v-Xv6?a38&1<_cm?${~d^k2aB(rGWRE
z<R%XBqMiV9wzvbOre*taf8q_R{Q$J5=zv&3-+I+O{im-_`tjAtYd_D$a&OxJaamA+
znK@NN+aLIx+G^hD&S}!#R?ifQY%69Fw!k}cw{1HY-qs79m~}*yvFViq79oHEJmi*=
zxC!0p094s_M4v6d(YSY;pHHXfKMdI9R8F?}@jG16g{<DZ^yS)3m?mdg)6qL#Jd2^*
zIto{d|C0vfOaUH6K-~PEyoGs%XO{6wC2PHe8ZGBU?3`Wa2?r^0Z@AB-WS~>^&{)0{
zevx<P(S{5=dWzSaIkIna8AO<4!6qtmyTmCU+hoGW99n)L>)O==@e`8xtQ{9e<RK0h
zGt8$t`1)I4hhy$ZrZ&L+*G+qt)K_Ry%+%C`S1V9@3`mEWWZxs+Izr93%wtjYe}s$a
zTdl^ui#5dEEqnLaSx?-5PmaAD3aI}Nj{9k^ACI`45c?@2tpmojq5OAn*~#J^POIA)
z7r=iM<7wIdBj_%x@;4E^sZa!TP}~|4;}{Y0`r3%`*lJNA8}Y$r?Q^b!&y%A>Lr+D$
zN?4GxCH$qTwjtbv;a-}0%$%*Vhh{(}mFQ{>6tL){O`_gZAY=k_PhSkH`KYp-9E-C$
z%p%IRqjPoEkEI;R;htu-5|jiUh@*+y8O^AV26G)bUL!uPc5Qvnfe((Gy&~x7;TmqQ
zxirqR-)cW$i}1FeJrn{G%9v>?pORS)HBqDhstwCN0KI!nZg|J;OZ12sumUk`sngFu
zI_Y30uDr$HH(!*$RIOS%$|#|-dTNr|g6n!Z<jS#OCJBHhY1fTmvM{Tb(O$(3C@d^V
zYBIUXS4%M;7eORhV0vlbC#XZYSrk%1XVeHQ+fFuly}sZ4Vmd;`39PNI?`mqN;V<20
zlfm8cK}M?Qu@jgewrNj`Wf%m>Wl{r&w!8_>ubT?QZ@#CZ*HD`HyNzi`7R3OJPs;0$
z*0~c<V|6I*WGiQ!r>0}+7jA70PKr-Hl6=5lfV>jSY)gSSt~R^Q#%0-{ZJW$pG%MKr
zsprDUQYxvaF_2IiApt40DW3rl+HC{DF5}i@oTnM;9bb|sYqES@ui%-cS9}NhB~~L4
zEd2DsN1M$EaO1&|Hhl2_FDLlR`TJhUaV;VeepNPHq9BwoJwjwIyTvW^tGp=@JE&JE
zr)w%$_M3GH?djU{zT0pmGR(m8R(@WZtwFlY5kGthAM>AH7{#-s>rZSh;eY~dYn%*M
zHm_RHNPI|M7cx8YLYB!O6)Y1r!lyk!qp<fLBn%XeY3G)1t9{_&@yI6~3{_Wrty`^4
zQQaJup?h!Hu{_N{y($~)=Yo*tR;*PU7wB+Y(pG|<#$|??l2FC*j@FW1gyA7(6YlnQ
zcD8Qnv4nJ`&B~Zz$;7JMtrxdeBolBG?Su`LU%^A9fi`Z>MxzPudU@jG1q}b&vmbw+
z_~&w6%{FUb@oVF?VclnDZMOsp68l_uCE#);^lDiG?OsDrnVI>{bc?AVcLje22e@c4
zfr&<)A~JTfn`X1yrkKdjX(j>>FXq6R6f*=uca8WYp_op>i>h)Bp0wI%g!D7Wsu9cN
zymwtcYL}K?Rv>S@h8~{Ib;<gmYYA=uDJz)`DC{-4EFyoww44ucYyjUS$m*uy#EtX|
zd(4f*+ym|kw!EOC;dW@Z)q*xU)})PCZEvePR6v^YM6DxERS=LnfxmcjvQQqsLbG21
z1yc@Q3^MSWe~OeX9mmY0paU3mcKs5b&&#q|!i7>S>o*O6kgd-B3UZ8*qVSrI@-CaV
z4{fo9lfC|((hW1a0O*SkY#87-9jG(`-J?MhHrZk8BTdnsRR{suO?nD5u9~On_pT|8
z*A9Z)mM#8eSb3-|(&mNFx~iP@+-bNbTX3!ihqp6{v!9zz&5Lo+Uzc`-_a^R~Ah+yF
zJ|wUfF?4QIgxRu)wlu;e_5(is0ySfN3rW=}0&b9fvtKkJDwXh$>Wga*(U=Q`@NNiF
zBdY_4!fewP%j$j&$77+MPUHi*MR9-&19=&YE7vZXdP`BYN;pL;I>;NMOz>1>-{HKV
z9)&P*Rj(_?_^$T#;j)`rlkhdVDu$H#sZR*N{BKQ2bSM$3D*r)_r#m2R66V2hGF~$Z
zWr-&X6%nho>K@37n)Ui99K}f?oQ54}Z%XBw?snm{v{}xRJ&|SFd;Ug8_Bgw%Z*Gar
zg^`F;!Rmppv9gN-Q)4Q-Ygu|h=O~zp-!Zpbqqv!Zwy-D_*Pl`$1xpc)0@Zuw)LNA&
z&16O_N%>|>LInjUM2poD@Hq<l{>fKgeE9NJy?z-hgtmEF1ji$e!hlx>BR$JP=0w7d
z^fDvLwA%estLdQOLEP?G9H9EL;9@#NhYjB!RyMc2Vd3FP&?pH`9HEh2_^oWjDO397
zdp6Ss3J=5gyJq#$`!8Q!d<^!<=#%+;$6R)^+ja`)LT(fuIwR?BC&@doN8)i_0Et$>
zwNLo6NSCv#N)d~=op$5zW*bQbCup;~xh+m&Q-ktR?3>$+I?9JG@^g`nrCEP*Guq3J
zV-d@TJ50E8;1l=Q^d>W6yo2XS#CHeJ9<&rY9=~##bNg!`f8O_D9Qhucc`+jr&N#96
zD_v=5#>ajBAAlUsW|(3BJ&1^sU4H%H`xhTwyf0BI0z<|qPz7A`{UUO%wOcq3Y6(vY
z&x?w@huJC^3p<ii>6h_IW^%84NP=)W6?peH)vk?2ReHse5JL^#QhjR`g_LbshAZ~!
za(EpXJAf@W!>j5s5dkC{xh;PF!3R@_5ObvUc|e4zU&Kq(FK1C$=Fk9KSP~(U=Hw@~
zn6`AkknZngy@w$&C~5IH)A)bUjXyHs?-%eN=LYjHyN2)88TfNX+3krp6O`;#7CbGN
z-J?9o%|4O8vdM_L1yhXg4%^#PF~1^%9$>3)N(M^Bc7^(kuKImQ^G@l30f&@yVHdxv
zN|-fN3aHBa^9^8Z-CC5d+YK_A88DL+jtyf1LUj%(sNnc~5-WU-k9db=mK7Npiy@=h
z1+=VDXh_0{JQrT|SH$f4nY$k3Hh^$|A7KnzR^^IKxC?)bD;&XDHpP*In%%mc8D$d`
zAuc#BAQm1ow^ZQmqnhi;c_%v2JbjFc18D!*02&68kUJt$aFkGl#vRzk6fVsuuV-Y9
zdM4%$1xU>iok*|mqTxib88dmpzy4?ir5K8W<3G7HGcm6(zk2^O2owgEm0!5i+Rf-F
zok^Q{40+sdArTiY;9{}NdxsH53Vi(~aNrg`q2AuVImur0-@NWsz$wbkyk>O{@fV7N
zlkOsD&ReY!FLaM1G@jJ{(%5TL^wpnxjXqH39awDKwKq_4bS|bdY%bH-xZj<Lwah5#
z>&Zu{{MI95MFBC|P9clT*o0&2o$Zr^^@!cSIdm}LVHf%JV7W==$LIK>e7LHFUn189
zl%>(GK&PMGHrs9A^>G!;a$E-bkyEBnF<6H!;z9pFQV7(o?(q;(!EE>dGHdwtG#GJi
z^85<D<xf%z@rF||@n+3;v_5kOhR6%91n&bQQe`RX96`;AtXc{2EBiDkNo@6o665`W
z(MYLB4(JQMp)P}Q3D7z4F8aVefNQ+smaW*JoG7wFo41>`8y_`(fvr;jsMSyGZ9P1Z
zMAU{)dM?VXMA%dn_w?ETupS<Og#4&Mv^f~*xS;$}UMM<I$5ZD-NJ5cl?n;TQ8cKj4
zd98OFb)iZ711+KPDAyBQzYRXXLkl;{VrDWoxN5jQqw%3Lh!-}0V|+5$d~S3}RTU?2
z2LPcR6;V+`bnnoDY*`_Plpi{g#=*%aMP*sZ{68J;VIoC|QU;mTk$@5MV;Z8k2j0QP
zrw-m6qLtva3IUf_DE-11yY!0>v1-W#7tJ5|kM=s*GmHvN|Jh<k+ztxfk~0)oZ+0%1
zuXmvVqtTpO$!uk)r?MkX*XH@0nSC<JW=SSQ`Wx^cM+AhyUYOBxPX_b`L*_^wU5v-f
zw%hvtA?VFzcsH}NsBZCFQ*L?gr0WA=P3-=umu%P5)8F-Wr;M@A*d3UIp_n}%HCF*O
zi4CYjzia)H4;}l2d!j8JLpXKv2ku_|C%QJ$y*1Zo?a}@@kke~_@yMX>0C`K3JFR{;
zq@QU^Y2V<ct~xti&fp8V-VCVCCYy29W9|ckXq9&W68kNL`?r#GUL4HEU7694T!8V?
z5IL8H>0b;gb*8(k_cAfOvnSCJxY=$Ej>IQk?5VHdq<BmmIw*PPuzXs@5jedc?BhrT
zV*tf1q{M!cW8_{8A(F{!KSM)vEZ%Dh&bD33r91mQ;N4;*BRJ|16N6xZe88u|Iu0RU
zlu6GMS`mMhhg8Wx(?eN5nnZoRq42P68atY$Zv&g{ahHMY9A<sx+tY;_3egkyUK~)U
zM(Ge%51u&5#x9Tx>VOpt`DEkc^Z|@lR|@s)f{B&@_3XNVtE*Rlkjll9xS*uo^(inZ
zr{u9&&`tN4oakfS_CTqAuNFIO3zvq19ng;W%?Rr{YK4=N_JC+AjEq#mofSvQI{V+M
zbYTW;V>T=(S}F9MF3Yrf*i>uG#R;M=<Q^BKL&8jCb%xxmv|R5&m=^T)Z>vG&<1()E
z&ErS|RsFm4!QY^f3r0-*3zTFvc|e(JilT8kc)fbXSDYAjoca{b*cPN9gZHf}Zlgnv
z1#e!Kfw<IMw6#^ML8p-*daerx6}PG?l(vr0q!Oo`IC38s6(>@nx)~sLThM98b0CLv
zVf#8BsGc#`4f^YU;^D+skw+>LZQ|v-%{_(50WBRUD#~BbK1UZKbcADbh1J8;QAj>k
zBbn^HgoL~m!iv4Us$>vt_0+;sABfW(57;mN2&uC}m-^+B{b#zOWyaCcx{cYQz(}+h
z&C=1ia)eAJJUu5hpR06A==(TwhbQ4u3@1SWoYN@Q`tkH7qZQGAP`5v;{}zP9uC7y_
zgr)5-X0EZ83|S2?URCK`Qi3NnC(_^XIKj=(H_cs|Z5oQ9!fy+7a@gi$6Gvruws!z5
zt!r)~w89}H5kF4K=^n;mCMcjX3cwqV-7H;{yGly_5Ys0*RscfoLocc|E#PR^UBC3!
zC^1jfDm9>w&!_J}k|h|;=a?gE@Ys@wk}SEbR=K6gKk|rw9zM*zn}3ijYWm@mZ>C?)
zzJT8_oFTKJckuMJnIr3bv;JjGw=TutQQ_le09{Gwch&PvWsDQa2+;VtTYK886>1_D
zH_XdYc(q;znga+Uz2&jPF^?#J)qqpsx_DX>`r&l?)pYQ5icCeCh{d8x?ue2dA?ts_
zu!syM@6D*2a-|rf7)EKx?`YyX&JT?Y@T`;QW{r!!a)Kci#Sz0H(aS$Q7<I?ZY1YRn
z5yga8#CRP&PU7HYC}e<v)dA@>*8nkBB<x9Zrjg>;OT9=+AW8|3AaWk!a~amj!F!W7
z8C{f=eD!{h0Z1@ZE<#7AmNJJ=nx*tVY2)%G;L?JIPEYoV34Q6@$q<c#E5VIKczQeu
zx|l<XdaMW&A8-8x<Ro!eua#3DAhRHWOvJ7X<Wz{~GZtf<a+Y7^0Sfe__ml|C@i8`$
zevR`N7~uHiAQQl|<j+s`qJNwPLDc$#skt4U=!9}NhS9jRsQ3xl<%;^h@<W!WD2EQC
z(Lt?2HWCw7isT<S)L-HybQH>2gLzZ@bXO0@t9Ti$_K%1OD#|z#d1$Cy@fjy_oNY(j
zO2v&FJwWzLs-2WXk7FopQEs=mh3Sb4{EhB&Zv?cFIfMxnQfUo?BJ+&%C$t&Tgu2o+
zb!v#*6CvQ7syy)VDXWd$5D@6sO=%fS5K8vXxGom}B7||vm}tVJ3LI42FF8NnoaLaU
zxYF|{17)sl&Dmy#J0$@m*#$gs`);$G{iDae>9O{^T56A5H`{m>&!Rutc6S8_q`-wM
z|Dsb!r?%5XX@JViC)pgiX!AVA7d8V0%@4m)nHW4<FidRmBNoD&5s`@b!5+6F-+l>2
z;QewUxg-xa5ykPsw?F>$U#~&k%vaJ^bi3mS<rw0C4aglWDkzt*uRmX6+C|0~K}#BU
zY{2fs#n9rQclBz;(LW{DFWg*VArV!Y4A5}1sL=>VgB>$;Pc1M=T3=t=(Zw_xXU2XW
zpiguXu=~ypkmC2^`|QQ=;;$<ZA7ai|ilg}|m53G<kuy7!;fIu{UIy6^YM^R}Jra`X
zVSGAhk_4PK3RS&4&9GG-2^u$<57*<MtCCrOfzT`@vnQQSl6eIN+IU1Sc>=IPRy*_H
zLWfO0n;p|vfeH0=0><ce^j_K43$jQDVpx()nIC`$h3gq?Pm?i4f~Y5R{xdOJm3LRm
zGGnt+WGO?h6gLA%Z>cf#3mxHqc>UAa<!2XPqv%4?TVEpgOcBX32he@pklQ@5Qu^k{
zWX+#pQhvj*MFN81Kbd|b@{ie!BbdXqj*{_yfrI%AP)h>@6aWAK2mqKQL`mc-L8{0P
z006i$000mG003}#G-@w!a&L5RV{dFOaCx;FYjYYm@caD=m41Pm-o$C9?a;b27+)Mx
z3>0uuPm(#rA<QNBVD1h(G?V|{-IY$d7mm|MrnP<Ar*>D`T}iE0%UWy&+a+-pdvU<R
zc)bq<TZ@e!iIimo^Fu%L@8FY1YZ?-%$9H~o%l4^Qv(>?x9-h6_Ol%8y`YD6|ILpMx
z?E4i5JPijHJVU71Y<$lbQN}iL!fr(*5*~cN+b6p?6;>#+ZM<e44+0R?WvhM0?nH86
zeq;#m{cOtup4<u+?ZcHwQr2DxkMDs2XnJ5g5d|GOpxe10ap0o0X$$+-?#l1ge$!$1
zJY`-SrT$tZVh!S?8F-qJ5|$rjB8hmwRwDAYAx}P_Dbj6x57=^U88Hb&T9_m-vVbs5
zS-fGX|Cf;F^h05@++OQFXd9Ef4fit1jk6%mEF=oS7A(yYkeAZT#7A$-qg#;MR_2O$
zD7wrGIEa>i-|@uft3X)znRc1TJR9vWNCeM-GLF~;M4dkCvZqf@pIBCp?}1g)W9K{p
zO#%@f*aZjoa6h#$Vm3(}GS}Vgvwb35m-*o?PBONSd@o)LH}ErbB#7dxG!FKea3zK%
zDbW)@K2uaM9WVn=*6!-ZXCgud-O|`v;FuzX4<C$>(_Y9=1>7B;y|=wM-0>_&Xj=^a
z1u?`AiIukN=BmQrArm}F_~B_6QSlp*y5Nk$pBf84b_cLd^n(H$vMh$c=&C=75PoDa
zes?U(df9)~cdz<?y2J5;ow28<&whIL^AA5h`$<8DV+HwMgMOA&F#dcv9xiSG*cwK#
zGX3n(lAy73*<TD_IVGI$t4RGZ4v+b`1&TQHL4WGFgBSf7j=p^hgWobI%5YKO94u$N
znBBP33FzZa&R;r%g*&`J+^l5{CgU*-3@2lEI-5+L+2W>*<FzbM$$8zsap%tLl`|`Y
z%dnO;Ux5At;N?6Y{)q^G^+=zWudb{M=Xw8nv~b~XMV-8F6-QxaR}PNDe#Q4eMwE-g
zPNdfP&BB?xi<>FIfea?&Tskv7<hQKRWPGW=M=k4oG8sAju^w81u*voL$k8Kf6h9bV
z^+$T>0_QLr7C5CX3ufcYobkX=CM~N!oAqz>Kn8TqLVs>So#EVk2QBNWKh>YSWsR>#
zMrzcurv1V1{YxWZ2hJJw=VsC#uy7VHCKq}n1TGWz%DFms$oFx}dNmmu_#K#O;mrDr
ziLuiyNV)+o<nKR?{u?s(RPCY=GvpxAvYgp$Lfk*X46g?RXFkWT16pNqJsZ1MFanC>
z;5WboxXib(SiOCg<^(QIPU7S&PFQim8j+@4LNi8C_IUos*o82g(XvBD-NEGQst>=X
zau9Vtft3&y%#-7kSZgRMJ&3~)wyD$_^hcu{T}Pyc+@24e(M1ik0T8Yu2%GBxn7W4G
zBf|W}_2OdkdhDtdQ^QIDi(;k*2>^m2IFbvs22T(U6Pe8FTtkGta7NP_WD5`(!!-~`
z5Uj^nL+5o3_#T1Ip95#wsLT$*7sI+0)(8L_IsK~|Yz{E=K_53|mhM-ca)wgy5?0py
z`n*PWYvwRNf|3?cM#F*Ah!hy4kuz>Y5e0L>92U)tJT;M-*aX7M{5UGkhMY)~xgP&M
zp1f|Bv<Ds<Qw3-vVpu|?)G+FgFXb9Kt{&<)^3$xSnGC%7WJX3j9-1=;cKNx}gp}k+
z#?Ey!n-Cul`i-)<#<{v4HDZJsV|Wb3F9^CGH==Bmr=I&FSQkwpS9u<CO%bOs=FJ$X
zVtggSI4K2lzD6&cQ4`_1u#EeQMSt+3(LRd(3lZ#!%xRIU+}uPL=No|uk7L0LE5F1U
zzVn#J9pQLnQDVQEswk@OgE`LJ>CXl)jxX047~z%k5UWI~I)Dm##k~(K-9haLm3cY2
znKERj5}8{|T9ttlMtCxT=#2LWGKHBFQW}wJl(g41)$APT05rmF{RrYMP4dD|ivFP8
zl|pcSEU*cMiiKvJFvpSQT4NaJC@K#$;oQ+gC)bPVH7#k_8x~ZFe4;8^S2#V^4tTg)
zbM~>z4m~!GBhitSwyM!8Xm>o=3q+NF*#Y!oNxQ?&u+U2OqNS*8y;GDRQMaX=wr$(0
zv~AnA?MmCWZQHhO+h%3w=^m%g`0wo)@emL3xc1z!=3X(s`U5UeNgKgTUM?t|WzSKH
zhku3AMNrd)!tw^GkY&m|5p<vUYOMq&ni{!yq&pytwc?4x7YY$R!qeUM#16M$bC%c_
zvuG&uDzn`l;E3d@ecII+>M~9$a3k@V5$4-Wap}zVd3p+c!tOw_XbhwLkh2v9heN&3
z`HMGC;x1=AW`^kY@G(RHp@eXOVC?>?m|Q>0fSxAW+mubcTI|XY3~(!s+zqrA*En>>
z(6G^#G@#T|6d@&SjNT|ZcHoYJDV0luWIE3WG?N?@OC*7KMc`GABT$UFpx?3}O|yus
z&k`q+^qG*G;aTR`pN({Zxp$rH8DsMxV7owE@3VfyK!de0Og7uKlFeY7RO0JRefhjK
z){oq>dlUhood5ymxVLKdK}}Sf8s{lq#GqCv@hFAEXd^=@yYGDtz&}FxxkFs>AmCpx
z9w`<^2mX*0q_Gt%&S?k!!^i&O;}-8hV`iwSC_JL}*Zbh!s^<CEuX9YT&ERtno?~A^
zq2>44**AMZMK<p8HvnR;oTM41U1cV7V}e{Kmkd@Ul-@4NEJ=ZMDujD>Y4E=NgXr+0
z&y?Xn=te1E!t%vqv2su-OIz20e`wU%yMV#AHUrZ8UGTC-^KO(l<qY_ggDB32TAF8B
zcYn4nDC*14a7AX!C}I<|+W{Bk)NG9tlw<rVuBwcqkSZD^5i#Y|zRVXBv@5bQVvUnl
z{)oaqz7u&26@o9H#X=KW?QQTTH#HRK4a>{V&<MduIV~*L<nA8}!_o~_V@nj*pc3kl
z4l+ytcjnMSs@yuZy(2=3uar7rm73E#G5kUi<z6=`;i}tFM{K_18miabc8;Om%c<dE
zbk@dr@iv+WA=kL3d%XN}3A0;w?LbNKqeVi`-jULQz_|mOf?@fXTaF>T$8Xu|CuWO}
z&*^caji59?+UP^=fwJi|DDL9Anz*nGEPu4RLuFB%GFnl0z&o1+=<?D6&iIEylW1%*
z?CZ+1;mZ~f_WbyTM{rnW2U#mz*X*vsVRB+~)=Z}Q_0|a39z16VhpGaA|H!}Zs~Xo(
zh3Oz<V2<mkI>&WJ9`3<F%O2(l;Pme#r%O;_=MNiUB!vSpYZEET=P2RB|Gfo(BrvBv
zQ0XBnKa~|GVcQ8%NG+9OsUi=K{NridZueeVeN4PfPFV?AQ(;>}sB|I_qA~6-(&=Aq
zplpM5jKE(gk+~c$ZL^!i88M^o|IH<~o)lv)H=)(4p{W;_fAMl?_%QIJ7Xc{eT1Llc
z`tclvN)NPy#_S9F%qIMY4S3Xf`ZM}(IW;REZkZ2W+5{tPS>;FS0V59&LB2c8U&V=Q
zG2^rA&AwPC`YqI~B_yLnzYW^CAJSsmiUXb%2aRFu@I*~5mDJ~B?qz}^Us`*`UCHb1
zo-1Odj)@@9*DPf)ylD_kU!73owJ2W;fAwjJRBW;48tv&U6p+p|lB0j;AD65}cVOYl
z2k6W)koj>oB+4lXHh9IaXI)xtj@_r=tI&6n7_8o19ikRVjvdY>UM!!&(hUX`HoDAR
zuSMP1HYv=dP0G_Mu<Iw{I}vOiPz58^Nq#7D#>vFEW`!iH?Qt#0KuF}M$M46^jGg6}
z=*X!|TrLUC?damvEo5jL!j3RI^l^emg4Topk}ioSm4#=DNE_Bjpx|Z10-Go~h-I2#
zOUl6rXRZFBzzq1$P5kTml^&qHyrdBCHA?cUK|mIg@qVH#96Fw#zL#E`0n$r4MIe))
zM~=OT&ZWEiADR{LUetT)McW##`gF?j_kDC{0s&T3Wdrw@Dg+j>$^^A+N3J9ZB(h5b
z=UpBTHS(`Eb*{<;G$?YDULYx)bql0l)90N?2&=?`m8EfZaThvTh&<Qbm2|4-G=+it
zvojT&k~OjKseOl#0-yvT$wE$*gz<<Uh=t{iYmYCGUHk%G;6#5R26=hjzs{c?OG|TY
zxUI|&@?#`7I1BOuTeKXV&Yg=vr>q}v9|^O6uzm8q{AfE~SwEfzPyZHp)pRUvpM9%v
z^6Bcy-%%$~H+Seo3I8qBH<RA&!)8j<redxp{z_yY!BNpO?CWl6k?$o}N1d=!55B_o
zA=KqUFgJ2r`^7K&7iO@&SVGC11LbM)1*2Sze8ND1vCX1MXu<Xs`3b|L!_lIW7K?>j
z&n;<DZP+^ycfuZ^!c&=_U^MOy5&D-GZk{t{1rgIHdfa7WKmfgS#v`_-?L?PHNvOMW
zeB&MPKm;^05`#31dR6(pq~otSw@0Jb4%NghSn#rDLieXgc|HdT>o@Ul6P8YrZel1{
za1^6SZy5QkCmn##L{QV0Cnee)zAK?*4@yR>m!|PWUB|tf;>U|VkZO(aK{zJSHS7WZ
zs}(Lo8&FcpCM0uEz*_0NcisDoz|b*-<|v_1$w4bKUcY2}k+cvK5wj5>sP2=D8U^v$
zo9_H4@4F0V_JiKrfx^|E!%_4ypQAuvc@(5+pkkDn9}mh52$>kUTra|m<>;_*9mLvK
zGoULYpljp@5iq8!Lg1e#wY-CS{RFVPaeK=17SayONIUk7eXgdfKsO$4@-qb|9W?4u
zt!NpPOsC(&CQ=$0plWQH<MbChDjp&^a_`47^%gzmcyOCFl4V{mZ87qm_85yFS(44=
z8j>=;W?(bGc(~^#HGXu7X?h>5q-}AFwrVzMb(e`plh!!p<N;com}{&|KV93+xhA-+
z3X!~Kh}O0x3W@4sMAa5&1$S}r(T3W+H3OXGS5X|hv@WOYh}+mom9n(rQMI5(z8vv%
z$Fh)ep#Gi#i6i%|mH;*NB&-LGRMeHKZ(Im=$fe;_hb<RsKbRnMyNrrdSZunCT-m?k
zPh4v<&0IR6hM4Mhf^ND*2y<lowTd%03rVB+?a^`Y17mkz57YRf#bae3U%fxwboV?=
zN`)tUS@w{^$n3-(q*8mHpPJ149O=MGuuF;eUE@QEcuu_*b<YRM(2qHX8PW%zWi6-+
z!zWOVncz0$;-b2idej!QZ1A-D<wbP&K~QYyY6BLUliaZd6vqPynHyU9yYGRXJK$Th
zDo_BHe0{A)F0h;B_&`xiOLYF#zA;V8cdcAh^Smf`22isNlNWBDCZ+k2{TA0`OH5=_
zU~vHGzA1zzOnnsaN%7Bhm%&zjJP%>_MZP7<oU7GibvoO;WvzU7l^;~i=Hz!dOo{d4
z#fFAviiZ_NCmmFFo3m8f*4~xzy9rH~WMw-u?G>rMUk>OsHt&tDi>m=Wm7~1bf}6!1
zRRPqUBH{d>Z2yy2USjl5T&Nx2CKD6@0OFVE_S^ge*n6;8(mPq0{Zifj3+q&^Ha_!z
z@P{;QBxN9QIP%4K{4|1w@#?Y=pls<)CE>W%p}9Rg%(@3flwq`?tl>Pn*1J>et?S!d
zZW`5Cgz#D~``?Y7@aHnFmn+q36_{N5FJrXWO&?@f3{lL)ANjyvT;e;JE8*QIAKIqt
zpGKZs_ErK?WH6s#AJT#ahZ!2d?tX$J0U$nd*RbYWh@c2yf6S^?d5{-i<h4RRl1BIz
z19>V&Xo(`gWJO1hQ!98)d~MkI-V|3V#HQLR+PV2tOkkU;PRYxJ6I%<bB5g+QkZgkF
zY6pw^W6s3!Kqe}f=9*Phtkpe5{=QX=nIb6&rF8o~epoyoKyJ!Z=utzq#VzBqg>*X!
zOfLU}?!Z<yLcwz)zCchoasAL<(k_Sr4wgTZfdv9KSH}yqgc>m!#4=z2c%JX?XNy}2
zy2BPXG&xzp$1DjEz%;jq&!C(|heXq7Wj4_UPMlyeN*2YmU4y+-vhM|jZ6E_IOv{_9
zXS@&<1IZY8V{k4wQKgWeE{wvPFX2^x#&NRlY%~fW;Gyf5>(OUY3Cp<O=hWm%yYBV^
zH;6sVN%&jquz9fCqx9*;Lk@ed0yF0f+<`O0_^|_YAf*CHX{~ac^oGfNR(}=eYJIwv
zioUpD^CJ5^clQW(7_iUHtj5CQq5Jp|*0o*tZ9dOTL#wN$5V#4o8~%V2h?#=($pa?Z
ztOTUZ@`_+td8(sEH=V&+V1v&`s%J?`N@ZVD<ZUZ2&YI^oGzm*4<c%9%dc)z4Gw`rd
zJg%URWL90!1c}Tpv<{8_64X@v`#n~v&>$N5j@Ky;_xmp!0~LxGD9C`ZR?L$@4_;}W
z=fh)12w*ymhg{cUXuaFij0hye)B9sKS1Go)4ewt%Q`fGzvc)#jlLBmD#H2c1oEYQG
zMwmN~t167L&u~1}oFPP*R!k6ue9D8I_C&TZ{t+RVbcko?ffe?PB0fVI$1pk2Kd7wh
zd0haapo5G~rd;;ZpAybe3NeJwRC5tEk=WvTQ&XTJlV1D~ug1623QMEGT8MKnk_l3z
zUh_n%9XL*gvv)2LG9-qM*L#$fF%-x*!93xfGesyOM7`M_?NJqVgx*exzKD`9ru;4M
z?j)9Suh(lXs25xBniX*R<rZ?w2~|<SO48#F?{d`!T&-iv62cA(s14r*`l(0;?87Q%
z%XC<6Tb%VupR!qWXE0SSx@ENWgoV@|cy&rn6cOb<bJFjpa1R0(e3yj1LT<Jv=Pi~q
z-|eS}-?biM<e;VWs(+Q}?pCKZJ{|#pF5{2TT|~ssJ}9u+8kM{eQ+WmzH8pGHRd*qP
zo!q_f^e0~=U4Ejc1)Oc+f!qUqb1#7Udk$;)e2#I)yd^pc)E5^7`AD1=cVgkL7e_yH
zYf!=xKe7+Z^EzWXH@aPSN$=1}-5iXy#pOtk1g|1Tw-teIwj4-UCOJJqD@|8A!<WYd
zQ)n{0)8IQ?b(J%S)nF7O{a2t=2(x-k1{mG<z-8sVbaTV}kz?Y}UZYtot8-zU*T@jL
z*D$+=A@@4ya#|52_B++1m%_#umZO*}>z^78cSGK9@cKT0W0{HYh!by93lh6;##mM#
zDi6HFy8~NFJ0KQk`1eON7TPL!UOAkqU&zAjVLDmC85SA`vXYK=0{O$GxPv0OoNl*l
zWy`lCG}IJIO8!ot9|X)uP&s&*I5HWGAEOVEH8J6|;>di}t9O7j6ITEz=6Bfb^^ltG
zBDzy$^?}V{bnmzcx)we`#sWOY_wd(YN>OhxCm>fJK7O>N;Fy)+m!A(_H2~7h&i8J=
zKXeVq%R)u0vwUxd%MC2{#p=aDbZg(i^Vzsn-s9<2HDIIDHBuWkek^yb{Zo(@$t37H
z@>D)%CZ&v8{}O&wn$Kf<?+6h06>Gl&|8E@!07%PFkocAE|9e6IKK*YUcQ$cycB239
z7a;)t3+Pv4H}1O+0svt7tM>mF==XnKG&Hd_GPf~sv|{{K{$e#PyF~^R-`m={0Vyb*
zjhM_f6pKE60?v|Vq&!9u+k=*!EcS+M8`OhdC-3oM63Xl`A^Oo%@6$)KTq83|_ka`+
zEXtX5VV3bWd5S?ng(=UQTPiAF%8a;!)~zw!*&(w`a<zWcp8kC(YR5=6&>6EjpQaK(
z&rnRQy)#eIAy`B+^}QepB7mY-3tw+vmY=6*XEfh8Pdr}^-fwokcSPCPQ*yoCLP$>-
zUc^Dg0ypk)0>}Qbb<t{%i^pNqaOEG9+R(l5aim~EnQ|y0J){<#JnSh)pu;hq4J-^|
zfXrJe7N#@f866tGRibl6Y=q>qq*CF{%2Pm2|8m0NcmgVPN=2&xYqXvqJ_O@*u;;pg
z_5<Y@&Ja$jGV}AHZ(TDM&a&At3e%OmqYCtp_@E+5pqJoMbXt9O9nj>u9nvzJB|p$i
z$|`df(mWUMiRprO6jj(N?qpyRt3z2TsIkJFV{)gGiQZPL2?&^8QPcp~axOc8k7HxP
zn|Y9EFV!o`9y0Nh0KX=6%zA@{3V{3t3n4=ijs`8z;Vkha$6!M$Vj4r-lRNjC25pV|
zDX4aLaCPJ(C8)23Fm>PEGR@hQy5-D?xrzD((Ucwr1-le8;XHb79YUkWDqk<Cmh8UT
zyiXmv!;p;SE?$24vPU+Vd?yhZ(5wFXJ2vy^;VTqRfd%tyeTVDPIg*yPz90~Q3|W*j
z_{a++cJ!QPcjJJPiC5ofU!n9bte=+vQglewo_oH!G2kNR7n>offCVF%3N!+mMv%oO
zG9>{jK+wSnn02eaA?DlxCkkxc^JD`g<(ROF^Zew`JLjn#A~m5Q3oP>t)SYKVmE9k<
zObT=$OW!yk>53y1zfu37YHSkn%>nD8R0$x_09?x=ZdpLVYIaT&hD2Fs?33bHmle7o
zFRelxTi7L6GXXRPv`wl#i`xPm#y@qtrq3%uj5p7hHh33;NA@S-g)BJ<L6OrarS2x2
zMu5Jx>z?GCkRKzUk=>lp3Bg2)0{q#bU|ypdlSK?xlbO^L0mOuNC(y}#jVC?HBUad$
zt(d1owb|rRMZBiprq8<CDXe9fYi%U{J3#EHhZDVhE4pZv8Yd&#O*yS~RzF4$3SiAF
z7CSH_g|xY<sU4$>tg)pov8Oh8vzyM#S)5nbC#BaXP+dZCmY-w%1Auv}wTh|z^M;)M
zws3#Kr};f#B;a|1e4XEl{<-`hnQuOu<Wvg`+ZZcssdUnJcg0MGvQcP9NLB3abMQAW
zR;_=3e9tU##k|wObqZz}SF!)-=K;=4#;hhZvNyNZSCX{@|8)c2;###$!+oE$aoD}$
z(nA5;{PYp?U3Gt*f4MF6k$1l4QFiUV&Uq6_6R5e+t#Lwq++5NYDf6A4XG>-fMyA+j
zSVB)^c+%~FzGmk?^|0KTf@9eSB<iQaXj5=6Xv6Xj4%nC6ON-YknuUMIj8sdmbQJ*K
zP3$BuM4v@+MldnYxAGl36NH0}2<OBAJyc)_03)26W!mb&P2j59<qk0V^hq*_5;Yx}
zxt~L9QG1rQCrIkDerSfPMQCTXOEcrsKbD(~IUz@;EST8t;1Shlfe8)SuhTAZh_i`$
zT`9KVUWjd`pLT$tZwExH%<SoD(BO=wXj4t`ML(2kw2qE6yoLWySuDh+kbVRN08svw
zM!f&jPeTu96FnzqM+;jsz5f!*UvUgp?*A_;c=suF$PKB;W|*iKf3hba6cRXp!K6tr
z&F_bCIS!XQ+4eN~J?<CAVUfimlxtx{L+-9u&M#pAxN((vOj~#38t7&f-^>me33W#$
zPkG+{P=S)7a>E0xo6rnPba}S$ER3ziSoFYP#w1w5PO;VP)3_F1)8@}%p)GjrY=F;2
zK4RlbjLykaUxB2TDu-k^dqME@*?=mjx%@EVm=I3WTT$gk-hxZ@5RXuS7J*J)fzDvN
zSJNUz1XE%7p3dZ3c6U%cTC2M`HDu#!hJD9bloO3#-0fxB910*euR+FA-Pg~`{gZHy
z|8osV96lGnR07MLY~xox`#Z3w##J>#C@h*RMtUO}a|%*m)sb4D9=fL>Sbfat;$%X|
zS?b|Zagq{S)W+X9iW+RrQ)kUCOV;(y1zk<VTqhiuS((4@9lErBvDI8wnsarCfbyk&
z4FKK)Rp%MD`&yG+W`r_e@EXC#B6{Qh<Vm-?*~w(XwU1T!V#H8qMcC&@+Qu=ZK(Til
z+K9j2c{nbcUOP~$ud=L<x7y$GWda$eYuV5F*w0lAGmfl1o-60cSj_{+X@dlMC!a_?
zIZH}s=d)*rzxSu*6r8UZe8Y2jx3b^1)pZ{Pc|ldx_J!(_0Fo=5Es`^yMm;^HfT%2f
zK>xE`wlrE5OnzIYCS?Esg8wWRBWnv2TW38d6GvAQ$Nw&vYaA=5P2s!Fzr*_sfv`1X
zvIhE41{8#}{w?DyCx80fXCBs7sn4TiV;Et6vbVdrvWpUJIAq*m(^>0p>RUToXFHQ)
z*8<(}>*Qq%o&SaTFnd1i5nG>6F{Sf6Wkl*dvS-v$XiU(w(n89;Ya8P(a>75q8Ka)g
zGbQHBL+!0gsC5j-s8)_QY$VV#^&<jr)=3s}qf_0Qw@pre)&?-d2xm~oZ--3@-nd|V
zWhu=5d4^e2`wG#!k0K{RPsggOl<A2pw>nSN?+K_s<d_}Ku%nqO{kWGdDTjCzFOgH@
z&V*Ah8d0cl%n321F#~=<uCWnIMgKbZ8b7qli5^i)r57tM#TV7n?ZMFN?)v%%enFKF
zcr|81aTs8b`P$1l%ajg#MyzOzr!vs!&5c@TPyhnpQOxp(F!m4<UN5$O8mQ|{aj3pa
zJjKCBF&K0hXLPd?f9J>YYx&R)KeiSxwg;!qo)ExQodH}vB3K=6&5e74K9%UOed=R}
z>TNzhCEPx_{7f`zxE3c4Df!QJj6XLF2BB{@u<y5QDKjc_iKDTA1{O_Hz3<gCJ|YIW
z8DP!UC!*%F9D4W-oce43>v%0bj2zMj4&A<7v@Tn47$bD~9#&lBaM;nyy(6X=p_r}n
z-x|YmG$h|s1)`&i+=$0b9kzV#BP<7eizT<XF!S$33SuL9k+$I9gkDGMxl0e{Y~KBP
zq&m+}{J)>DD1*_&FW;}bjr4Tuk&}pCj_(iW?_tq&>#OLqIra0Yr!Z4P>+jEhI^6|h
z3m-2Lqj-Bu?pJrSe!)n81^fyyrA%&aK@ZQBU*FQWsVFoWl&rG77;Nbt7P~K&OV^2F
zOk0DtH7TzoC7b*rnr^{cV9cFkbDGW<a(M<#GnnIR8<J=Rf_N)A`H;%H2b2kVWGY@F
zpyany9b4&F+M-GrW?5spGsTCLT8d6Jk^%}cWT(5-m?XVmKjdO|X-wXt!@f~FesEIh
z!~nK)H;L8lZ)%ns1f*Y$8ij;mU5|3fne-SA>s1gyrVq64CO=+G5DT(A1*G7+Z!I|N
z&tGprX}n^ypR_Di2sgpDc+DH_=wA({k`3uWrSbZ8pRDt+_7*Y8i;F$Pfn2?3N?AHe
zg}DP~1S2|i?CUDkvCm;b8DKJ`?*<(u{H^uL*IbwIhuI@dI7<;}5a$?S*USmaQm2dM
zF%&14y&3frQAbt>2K<Yc6iX!QvU<@#Ntz(q$RMZ&5opg)km*d>`2sOuMS<Ji`wkjn
z^qVnb4Z+~gyynj_H2ro^h&{6=QRJXNSYqBep+x>pQk7db!o()|$z6S|%F`&Y1~NHC
z&Z08vmnIwMtGS*x59Nnaqk_G&v%l}P-I62opqPFGyPrgU!{w6z%R*W4u~P9?8{&6$
zNCZ?obFJdf7}gwjyf_UWq6RP?0ymJ!6-X=SyQPn0?tqVu{^Lej0nmuSdH4%8kLs7a
zZV%~!X!8QoXH<|z(k5fYzYAqFXA_cAlK|Nl&rG}xnHwQ?^?JzH(JyTZIqM}X)*Dfp
z=i}DCP-sf!#|U(=W=-n(Hraotvi-gER)bdQO|_~IR59_bJi)W!Q)g<Fx&nt*oM)G=
z4IfvX2NtJK`Jt`u&k?IwD&$Fbh2>oBpq$i5xdvopAP5MVn*KgX1*JEdIoOOd()UA=
zY56B7EUp&inyXsZkD43opxQ565pqFGkjhcD78g!}Mem3m9I<}#f|Qn}8w|)?5a=JK
z3Tw!M3t%3lH58&S!k^(pC>kaM01<{baWG<7VlRFqEJ9ing>;=zbvDgS8=iC|GY%h5
zAXkmuNI;(2MhbLLxH<7R|AHFqJd~i2bbwW!ZX@81QEg7{B(t*!V90G8>0x^6d5*b-
zj5n<2p6GcVL$fw$F`?Q~wt-db<VX@fr6?HBxfEV-RZ$r-_2c!np$8@Fvt;w1dyIqS
z>anaXMy45}oS%jR2RnBCdhyA(r`LG<1#zLs<_di<m%2Z6ZF{fmZ+SnnMuUSX*6fC{
z3f~uK@wJYSIkbYy+bB?N(2M8|sQCCVT&)g?M_1Icl_3Ylfwr+i=D5zcOW_+3?~rfe
zO$61%CN}iNj7%|Zs|rZN+`xk|H*mfVDjH#ihTsQ?-!&jBE~_;6+Q{jhiyDYHX0y}A
z4U?R<w42A;T;|X-nI+kus^|??{Ugoh)-Hee0z%@sTXp+GV3dyV-GGb-{5WHkSx{BL
z9qsUre82s6tRcfxXPTNZAHZ{4W0q-2n1P@4x!MSTM`;sh1)3XV`~yU;Z{gO~|AlQA
zNP_d4IVPWghu(fLm^K0UrxK>Q;4Z7Og1Ryc7^?vRQ_MN6S3m8e8c;z}UminX{VT^9
zoWy|9X32zn!Yg8Wn*=ws^R8)Tfrcn1<m%_JQkKg)3Tp_Pr0t^`ka8^#F&YzSbd(n-
zic@r&TLa@CIIpA8$58(4-%Z21X1lc0Uzn!qk$Fw5@NdM5q|iRYV~%=*3ioeL<Xm0I
z=Y@-%mzJegqG6ep{afRN480yU4up^ckvQC64>15HNUEJ40xc;6W&+9Bf(}lF*%Km^
z9z0oqV#8ptIP}Nt9ol>@>_BK8AUeb(`MDSw*>}&d3#|eL!P6x3x<>R9HYinW0BNo$
zLb(J4Yf-CzFu45-*KeP&3d)^PuXs*|B3*!AT!oLqE)mlndMT9z=<`ym$l)Ej0sCj{
zOgAR9#j~vQMQ(FDaR+CX5f%fF+$Q=k^xO4Zp>Wk-p7&P8g2Fb*Mu@|&)ZCkz3g36m
zF8y>1m&p!qE7$rtpCbG7@RK>PV5H6y!3o@;n**=cZ}6)fE^QMxf%rl5zDd&JxzZ~N
zGzBOMbp@zyQPF_E_AlQCj7>2qJRFxy*LsXoaTmUwHU$DXPPPYeAbrV7o6WR|oHL!4
zV74K{n0f2$d3PM8JHgj=nX8h3r<?Q$YG)Jf6<7N3qAhXQ{;cj!X~A8kU2digZ(OJ8
zz&`e_>tp3aTp#B0^(@ZCJR1LJ67DN}9PDJ%C62fBL#=-Lt>c~%7<vd(05VUTxhLU@
zmSR3}gCw$~?;7w6R3K{Fo_B9n7f^QDR6FJY4|v}n?u~9xJpbkgy4Buf6JYg)N=S#)
z4FRli0&RPS?RqF(LE}F1`Zw>yNapReYcFMbHH3B%4$`d@Z;_=mf7{N<BCrYhDCBvK
zt6r$^B+?YPIxww@ov9R6pBW@DKU~i5^7uEE*Yqjjfr4U9Qu`kSp{#=6V~zws!%}@t
z-EXF{H!V@Bpf==6i9L0T9}e8;wGf43PT56Nec=@$Cvz}Bk?+m3Z7H34vc+4!*Vuji
ztMm2U+8}p+ww%z?w!&aR+w)q5Zjc(G0&Y7*G(+4SRG8~!!o8~k)!MV&{nQ8*nP6aE
zAYHqtG+Eu((P^2<+prW{WGh^Oc8H`{?$Az`GEe_Q0D(~e99~qB*u4n>tQ7AUu^ySz
zVMb$joKe4i@h0tY_lpW+4WI|yR+$U8TP#>lr}aMOmY>(WZM>#NcJVwr<CrsU>C!hQ
zP^uLsuj+Xvqz9|8(tzH1qGm{Fh~UA1z=+%s*0d|CjMw#SItQ44^HHrGz<~j9(&hD`
z{D8BA-g2mSqkP4Kho_mRnXSH(d^eu{{=H`sl`1kUddJD#xFU!l@tOLRCRf48vcMGC
zbC5bu*jJEkCb$N}NRH9rvb6kVkpPuYW7A8eIy=5uTazJKSBKMsY<trX^s}oA5msb>
zhXaj^<w(f&nQe(6(7%vNsW2>C8y)+yXd{VYrH2A8BS7AyPbc#11k7G($%11T<xW{!
z(?WfnzohT5^Q~G;F?1!IPL}TFIPq@a0yiIGeFkK0k=ZO>RIk&n!DY}fAy(X5BpHvY
z3>N-6x4IJJlFvI!BE(&6k76aS5=mnRuo*bk#=_@{DIbWE*}gtEAUo((vt7L-y}N^5
z={u9>gZoGHvD~Q>gs#*CY%IW%Mo9;!t?=8En-loE$|$)ip()X}%8GOC@JN6rbV@k{
zXMzgB*Gw{Cdr_ypsJWMg0~CKi4;7Kbd%5J((a0HtaXB4FO;ZI&Y*`PSx{u(E4r6lZ
zq^1>6^Np5xV8?^QrW8h8al^o9U)+JKVR1}6eB0NF4n-c2Ev&20nsasPZV9L9)Cda6
z%(vaHnDMyfg|-NV*U+uDEJ3LfUeTT74Cbo^Hu-r;G(f5wqzvi_d*KyRG7zs>tY%yL
zSW<LHKfhRBGJbaJ@{{n0Ts=dx$#~oeEN@1$<L(9;s-a=vb?`gUcA}qQ(tL&gv+@H?
z>QGOV1BP2tSA22#_h#VpRma^!R|FNas4De~-wWS+GYQNOdWL>!nnp0H-7P4m`fan&
z7fXp|EheYj-SN_Ebd!Vr#k>!GKZbsT@q+S4=CD=tT`tdArEytvw06*OSa9WJcNERx
zfyBB%W6(43^4`YA>}?~k7XcNDwhC<?zBK1G{W-n2f|?~AuBwsV#|hVO>NkI`u+RG5
zw3&_pPl>O)1i3fdpeiDSTOoTdf?H2@3@@`9%&%YAYApQjgtsZ(4iW^~)@iV)2;e6y
zK{uxAsIF*Y2R?b3bJ*Ls+)15#1NhA}HfL|OsrTh!kS@(2NBmN%`-Hl=5K_7(AgY2I
z)FRUPU|M!m1Qh_%T0WlHi4@N*LrS#7*u~qZgR;p)$j(EcbVa@g`(>8FgO4X8Qc^_8
zn?tobMwomk!^#Rfh!h{lfY(Z~PX-W`MbA`#Zpm#4I~D_w$(GrLX!C}FW(hC?^!e)$
z<s+3qnm8@nFkMUCy&GQiQ&x)h?Oow1Ou*#(rT#|e(^myxy09?c4y_Q3r$fGbNGk!g
z4OF-6DVavicaZ6*s079(93UH~9B}%UUf`HhIK@QpxdH$+Ie@L~^BWIHPx+-G`iN*}
z`qe+ZP_}Z_O{T5YK?g|DCkvJX<oofGqbz5an?8VfFDK)yUPbT~tnNY^oBP9P0dWw4
z^t<`7Tha<kNI>NRnbCL#aa5Mcwk++dqL-M~dZywY<fWs3+6?&;41V)tt=fWCo05I4
zjdn{1u6jB}4L|-_a`q5C5VfRL;@S?`2(Svhe#On<Z>}5WLc?v*Tte>KCi^5zztZCH
z9dzW4XB#+5Ah!mbbB4rAzVy(hU*=Jz{qkYIT>$gOc9@Py;k}nh!EI4@6_Y(i>edNY
z_;kMeS-Sl{cihr#Z}JAN7P1}ieI&iBmTS?xBnp5SZhx<c?COXiH57`brpyy`Gc2ht
zdtm#jLC?D7;DzNT^HGa-71$;dt+4+QB$lfgqjAnR!1C-sm&7!nMJIPV#4J@MJcMng
zy*2o}yjY!o%C#8KQhNr4yArT|8iXJ?^f;P#W?Z+(%cT&e?W|{J`6=Tz^bu!PD)RzX
z6Z#CzVZKYgJ~qVDqy0P{aTptX(pdm-wM!V9HMZ3UG%G{pd(<9*6$m{$;Vx5|l9eY9
ze!FD`7PHVd6<n0qqkPw39AsNi`Ut{)pYA!jjAi3=E3-vzh^&4w#r5bk^?p-gZfF__
zc~dAU6nL#br?a}QJ^Cid-%8r)PSEc^_<?53Lef+lkv9(5ya=^+smM45WHG0I6^QgL
zKffttRu5kAQOgJ1aRmB*+@;t1`+ODR@b)0srmvRZ?At}hjXs{`=+9r$C5`J@(X8{|
zIU&4q?&BVwo^ZX*E<{oE>>D)(PMs4`Qhjd*<j&E-2~Fl#ovFVbB%!C%)C_v0K$E`r
z#{(g8q~cFf1xz%Yfx$sa>~s4{K6D`BEwuRUOx7ED*?)TdtAC-r03bQ*uQA)GT;0A6
z^aC`<<|ER!$GCWZ7QAv8rb$+r!^>=-birgOl3puSiwag7%FfAs%(M~&5^%nCbxXBb
zNe~ces(u`H-8_?zsz04h<^I#6NfUelKmi5-Q2VuMaQ@SxF}3(T-nQ`kAA4q5ecLvZ
z0j1}ZlExEQGYF`Hr5tIY!n!+cNoJ#?(xL%}y{TwKHd<qe=R4*-)Y!%xnf90nF!$&7
z%^R-XW>b?wAIp#kwB0nKD>q*MW|sC6nEEMO8Qpk1sRtNtz0l+dnUq=3Lh(!xu$W|e
zMbSbrj3t3lKF3l%>XdmPrTr)vOw&SLwb}z)BRhH}?sDQ3Y^^(>$RVs%l~kQCr?!g(
zHLCP4|DT6UE@|$6Ge~Wg!psYu(d-N&T8&!`RT$l)>X0dybre($0nD@tY|-1$k$cUm
z<#TYnDRMlx@VDcgac^g22Q;(eCgjdXj-c^1K4}z5CF<D4vT)ptm(M16Qfh?hZaHWt
zmzd;a<LTq(SG3U*VGTNjca1tbA<$M4hj0YoDw~ramwcJFOBVJiv__(=U@dhps$^)a
z4)mO13WF}&3fcYf(BF2)KBm3vYDychw&%p?8gyoUz*wggAMck*AJ%?tA=Z$V^dumY
z0HZ2U(~5wWQDKNi;U`uU=Ctw~JHKI&b6j=ySTpxg0|Cc#Qqsgm-bvVu85^`z4z|jG
zHOm_|MIEnFg${D*as#G4-;&~En1~PlE}R0T$S^LVgejjR?h|gaGg3t?*wkm|Y*zmm
zT<yc=w2G<wgoI%N(s!D+vDgZ5f&d>o26+@*fR;rs(?BS`&-3-WWJfktsV==o-n#za
zJ?Dy+N4N$w4FD4C$KkciX_~|lrX8-=uY1B0wU`y7yX`fm|E*nckVL;h7zZT_7B9Dp
z^8-Q@xFZs;JOG2tji*Nt$YFjKPeG_B0w+R7S_TZaBs$4CZB}hXjG;Xp1D`=A_@o^>
zbhJkpj(twTnnK&|?RGgbV?zMnzcSp2A=ZA~eR2ryjooY3ctdV9AKf0ochcU7?we~H
z{0awVZZCPyIlsAhB|Ww4K6W?k>NxxqeEW!bUBJXCBe#zN`^+lre;Z17PwQqF8^^IK
zmC8^$J-P5x_Goh8AQ9*h(ahRd0#4YU_vFm$Tk2}s!w!4zB^EYIGJR_iDAys}O~}^}
z8TtYG&(%a@UDJ2*yP5`20RXW6b2VAm{+i=1HovB)iSe&Deyk?t^j{+*pK9iNhRh@6
zvf1N+l74rR;1hs!Si?I-w%>w^SP!@HG~*4%Y_LP1rgzdx9?c&w-77T~It5L1P~slC
zYB0wpN=h+IZ#}{My;pu)^6KCx-RpdvvRImrnpwg@`L4L)G|j1K9LiaM-Jq5Tg8<>D
zUEA+lH(9GH#X}=-Jfe2vv<knWy;tulI_24A<cFGfd$9X;#9ne>lo6;Bb|2=os76sw
zg86Q>=uNhM4?Z-2P2vKd^}S`619%NkZh|z`wK(7-8bqn=%TZ~99mO=gpfG&m>nM56
ztOpO4$RM%v=wK6{VXNk(zO@u_SCqt75WG`-G<-h8b}5r?x%1NX!3y^OEdG5t{%a8P
zXnuS9`aZ}Mf)0RL2^%0Y!=aD`ty1QyQd^zZuxeE|7%68b_SN!~TDgO?z$$BycPj7h
z5e*S2l_L75%+KPw)s0f5Z31$AhPjZj1Bfrtycoa`f|i_TD~0%HtZG!S6GgPitSA}d
zRAq({4in`Pqgq0DN;f{hze$<TnpzMqW^hCtFDd~}j7WHUJ|CI1v($?IdSHi>vr}r4
zJjgcf?e)l%Tn$*oN4c<Q_#Pofeyyy4wGRbF6}76Z3h#z4gHk9vK@ss4o+T&&DG11#
zj4%;!dp_CD;&>d(-11Vg8S`2wb^}>|`Yt^R*LK$))}eW2o5;f0P2MZ#^x)q)u;z4E
zwGiIDdMMrf^W)&@E%snHnR}mOrH)`r*T^MK1kDS!DaNpv0KbST=pNEU#*KpnRRf?#
z_{xKKH5^+tz#(xz(OV{G?OQsrISao;;~x!+TrYj2X9=kje0mYLPIWq7-Gk%E*2hPw
zF86bAQ-YNje*+J8J0e!{KfvHqO@WY|0vkSCU&;+!zUXJHF_MMuT@3|@0+5G65B7UL
zlLlYBIm<hptS9q@Vw20w?6GU8`EmKnJ=82E^a$9Cb`r_YL-j)h(ul%^s~`r-x^2`n
zz!S<EYk;!W0jsIotH^w%bZa0HyGnwBg4sI&=s?1pYLpN_=vl}98x5LxR5dNDm`(W@
z!%lm|#TYmLa43TcEA50$gfi$JVGah1ch1_7ViM(KhVjk<F_&^np0+&_I2Gmpc0|Nb
z=j7<YqsntBbU@nOLil`qqYutoDh<ukYh9A-)H{X^%?y(`NAq-mT%&By;4f-(kSqjl
zvr(;mFJvFq2D`FEc!m#mt4}b`TY1nDATz2m`|K)rKCNw@Z%>V+T%M7fb&j2Dm68Pv
zc-BSmafeC%xGI(9Uiaev_^Tba^RfHCbe{D^>_@#zwcy5Y{wxepR&$zcC7u36YCMwv
zr$XR%`2X&l003nW006lEsSuV1t_FGrj*bQ%|9u{ktSV!d$&b+eNUfU1FE}>h02fd5
zM`ocSc|&Sb$+(*AWo<+gt)p}3x%bxh&VV5;1)KS{JDm*%y64tQ4oF}><tWP=SRkP@
zjHK~>>XDbf9IoA+0AHXCm}zR}2=ir(FgeHIMmPsi(yf>ZR8exFYAl$DRF3T(jZz8~
zN}q}7j?77wst9i1168pY)00+;GKtf|B=T{ak#~S=uKVT&<W%#!GRDU>AG*k30xl;*
zUIvQ0fWEv5G2$-)HHIPulxCQBpkheQsSSp>1YWS;queyig|DI0D+tmcICaA?ZhV<9
zs=sgxbN>ZXtAl{*p-I56D{)$H945jLnQEX`<l5E2QaLW;6HJivh>UMbxMDP`SYy#&
zXHNrPykYq-I%Gq0qbFOkSKTD8s$*j;nSGH+u07(ltmp827?3`~e?HU7wAjFpFG>7;
zNA-d@k?sN74fc8a{4`Xs`-{#WG%}3g$e$Wz2Vh4B02vlX>?3k`Ahu#hFYy_6-UL)i
z_w-V=q1cj-7vRmML?CnOyRC-s+X(t~kk+1C0edz0;%m^5rzWxOoux~wF-xr4SEogX
z$ZZCuCSeEFa<}&%6Yg%KI{9XTtAuF%3qTm=ozK<Ec|*Wu<*X$ZTn5?GJNgTitOIUE
z$>p(DNFwp_bBWl+OWQ(j2z~8-ZKKPY7t*Z9`$$Oc&`QY&_|*f?+mbWY(zCrz!MPGO
zi38KoO}t5a759sH{QeV+uOSBvz9v^9W~<pE?%mM_`&=^{_K%!ZiqqoyBH&P7eY9-U
zC(}Bd^F;Enb|%xllpODyQVwU*7{k;m>-IT5I9&R>jk*P{B(pe2agWoIx3|lZck#df
z&jyDmeET;E5&)o);s1A{_rJ^6$iUj#(7?#*zbp7!(`@#4oBOw;X8?tdq)-4C0-{`y
z)F37@iIg`cv(@S$aj-|iFYx6yL+l`$30L9=wZnS<{<Zb=p5WTKjl~>*d_aU%pfEI7
z40qw?wXJ#yCYy+AqBbuxm4JFLQ777Z2MJk^-ugB~S_s&1ZIV$GwZ{4o5Y_-voPR2x
zy4I755c1s?ct=#?n}MLE*mNWv8Up#A^tK~7J#_=Z9%PbM1f|G#Pb7+qG^*>K5HEY2
zozfAt7+etFn1Bj#jCOW<5Q6o~M}@xXngk<02@aRx<)Eb%Oo(F_!m6gv(S(B~$}(2M
zfRht9ctZKx_X2r#Py9RbA`0Dq${V#64~)yA6DTPJ@MFPKymXt#_~HX+BDw}XP?EOq
z=eKbXlZKr*<T{QN2xl#(u(9SKp0pN@j*vlS3J4O4lbW~orHVQGv^@Oz@;(0;&x1RC
z>nb0WIuxU3aj+0esQnQnjC)iCK?4c~8J_6Q!lG5Zf@dC8vwW#{FvK5r0K#0Oh9k^d
zMJUWNRS6W*k~ZvtS-W5J)zl7f4QHumPxMx?w`?qEJj~rf_Ap8j;XT`r<M?TVDJFjX
z3qXUN;c&TR^brma8yE&gB7^lK-lt(jsBgwqxSQVv)BLy&B2HJV4d@u(`pyl0mH-(i
z&6ftu3e06A#>5FYGPyffti{KPy3mE_?+kh>(6<Q;DjZWKfO+HbmKG}5%NC*6>k)3v
z$s3hBIeSSgbt7|P`5S<@<@JUzeJeZnbe?!`%qsM18#(!I%nE)nE<HD!6Fr(ZPTN1G
zRS-u8v8Fe@4-|%+khd=i3rTe_$ek7#BD4fYDJ<Q|+l`{Y)h^D5W=qrM4m_xXM^6UO
zN^^_-n?~!K?zn~|4dX!inD(V3WqQF%T;~u>0R+8GVaGFINELiz#LT~9t=~={UM!;L
zHRNXC@XzZ!crV;ie!>xI0h=qk)b7i?c0KPX+Tr_~(+<V@t`h{?yH`IZ_7Gz`WFa(4
zdlgyO*#YWEl8mlhN(2l!knuhh_sC_=0=%30BN@twLfLT0?y^(y5BIiTClOR%D(|<u
zXsnfZAocgZ;+tfF+SFJiKUWv<Y7Df3Fr*Ryh$hmO8Vr^XPbp2?-f2{aP{mhy9smK*
z^w62YMSO6}tPx09+nRjd2~n%HrG)@-1Fb$gZ03;F2<cFl{YHKv0ZNGAzh|=vOhNAi
z4}#OO%yZH-5a381A$efuUMFOcv`q?p_mVI&Z|G`f;l(u$(K6OSZn)(-<wG42xg`ih
zQ*p^|0+lV-0v<M-lEu<->jil?c@Px`0hY(cl`JBMr)nu_VIicvCQviT5N~Q8em#Mk
zPALf~tR7U`^7?aVk8RCQj_>;0|Jo<C_p<5jMC7r(usmSF<{n8|ba6U9E}Bn!j<w3w
z41eeKqGE#-9!ci*xfmO!RS;t0kt$A@!7Th}8?~(tBif1YHllJ}qps9V?zxO`gfbR0
z<WN*pz85@~f3q|UP2Bg>Cys-nqin&SJMI{h)gXm7v?t%+`n<JR8Vx~7FqL41YLjkZ
z#H5cqKm3`4YXqW$NVimAB|tXVoIq!Vy`i?35;3y2d^XZFH{CeKGw(o6dNxH*4b)8c
zsxC&$bx(5$y1*lsQ^W5+Y#m7Jqx&5k2VGB+TN|k|-l0PFbM@Uyy1f>uX4_uKz81be
zieH;;{9S=z9Kac-h!ZnW+T)fh$O&&II&r;(OthXKFBo%vmPV~bvyz5)z!Q?GhlRTT
zK{PdjkaL<B1FtdmLIPPv2tt@dM;g>?DV$^xDwrhT9#==~x8Q{sxJuF#!Jz~eP=$bo
z1@FC`kyegSb@?{Pe^iZ`l)s2lKFt&`)*j`l1R)c$HsxZJaon#8svl5kwXzBy9Qj1}
z6z&S}LPi65Iuu07IDO&>je!0@hnKTHyS&;A8mlXaP$%?v#EaZk!>uc}8g#a%WIfNQ
zzn=i6#F2|MYUVwR1+DkMZ;Q-bicRcDm@>{4e$L`+7!CmXx;N2VHm6W~evcDbrG3iu
zi_x+(aFgLLxAW$fD?vyvzAHrA{W1PC?y_{S5X2PkSYpbYn_+p;UbpJg<lDsiNY=G-
zqg;L&`SPl%^6pbY`aGRY2$b=MUV}Y0f}VTe79t{Fg+@>|$D@1J`_#3Z!VrzuWoP+m
z<A*Zh`ZmGAJ}3(F{ydc5*?<-w3f8J61{q16JVrrjTquAKVaThr04c28ve4Fk*?&Dx
z_Fpbyz)=lOA!=PR6)8!O4U->m0ktcJz*K!|J79j`qWWFJIKzv!es4kfZ3PrE)B-Yi
zTDP3Xjrq!s?ehJBm03-SgfyPhF{XnVP$1cJBgLd1srR%@hUCgNF|v;t+60q)Oja(s
zo&O%{-7(mA39|eOO+_Q`WUZ|Nk`r>pRVPrapBW}JJxR^fFiO-+Tooq`wHs)M@Z<fG
zFJlKAy$w7wpvYlWv0V5?98L%2UthPdz?#>|%+J%Q?RUj0WtU?ziw>sQuum`2%qf_H
zEj89M&40ZzZ<1OO8HE*zKM|nAH`?WY%b4+1FvGq-L#^s|N!Io-`azxn|McTM^Xo!e
zU&395Bk72JOZsgUoDYy6XG_&tUO1zrUi?HfWY@M1;i-Gi_L!*%e94JCh=Ig-+@yuf
z-H79w8{SM*kO#-GLim*0YvdIafd~E}H}{UxNb^;FnwBT%Lrs4`D}wH}3PWm{$y^nY
z{zDd%Qr%bnm%ZzOYSVd2!>iyV?o@jliA+Anqy_JDhnIYTuQTtEC(Ym$J=Fy~6|g}L
zD8wi)Jz`nvHj-lgediDD4CUPclA5}`V`l-+Ac~t0JdzFs{lS`dj%gz{Q01qjZB2(f
zuK%R<_imm6>2?*#RAJ3&luy7Q%ZEn&%8y9@3)f&()%XRK^05R<75D45`E%(V7;d=K
z$K0lxS><5G5FdF}+Iz8_vVT&GUsX<662e%#qO$PK4{sf*P9%6lr((v++lX1a=`rDG
zKrAZ>Fru&t+B($qbB$?lp_^B`v{(LX%Y`GSt=RixGI6e^aV=pqlpLMy3jPK^7>)``
zH9t^LA%41X<Sq?O&hD26hxwh_QJyBw6cSkGsep_7cr^$9VTr+w7Tx74NDJDIOXdy{
zA~*X!J>rk)<-|K`Bb<lK$F1f$;lHjNe_dbA;_gzL9<GNAIr=+D>1RsUP{YMPKitH2
z&f<gIE7U!?KmEwQJ(-?g$$1D>)|&?|3Y&0s=o>AbGAys^zUXA$!2j8htYN8Z&HY*y
z%t!zL*#D1)#Mt70#<SF=V*kstcvVy1PYA0hF!7M|Qb!156Ym4epI{m%xjz`{cg6a+
zb-3!<Dk@F_LU&x7jxy`<h%FomFoQU3Q!WOGLY#)8L!olQdVpXkgaR>ej2S~8*C+?j
z3SXZxN860N9R9P8kc9w>y50it59WL{OMM2%Cy5{jkh(^482_<KKaXNI5Kx}5?SRmX
zcC7PyIFek@o-s~=AlX^^Amex=__k<dw1}b(3|8O*0_$F%)ypszpPA@b35q7+3`wFI
zF8()^&D~*Icjw_m$niX`Qj2aqTT%oCwtnBU@j0e{HBXVZ#T}TZ+6YHdvuSD!`z}Yj
z{m3S@%YK}T&ZG4)5fDYLJnjN`de3I2<zwh$fQ<ly98p+k+KX<^C)L|ty>r@B&W({Y
zPgkKFtf*WvcP?gLs!J{4WIP$cm}`%BHhB;V(x~GU1D{cqYq^29T|Uka!yXUvAFAVT
zkHo6)7G;3D>)Re8@4{4)Q!OiL{Gl$VCN<caP<Ny3fUPNYsy*+1si+p!92Ft|$F|{>
ztjs}6s|PXtVr4u_{Ye9av4R{{Oj+M{;4v^%w5Nb7JEQ%jE7?M94jv(%;vD{Z%F-?|
zSat=wEJOXP50TPYH)&V6X!~y83qd+udMcB;<UOx-7!IYQOBlIgv41maJkg2&39AVs
z6<-*Vyu{d!fVMLJCbwM2897?bu#Vh1{Xy~|>Zlm6n6?HSib&efM`6j3{Pge&G7VyB
zUfFO{>ynm^2oC9zWAHBfV$PyQk8_eMmOvS(qr#_*p4my`J*PRuxkM6;5H?t`0gz;p
z0c1{V9@@14CLd$V)u<NA3;Ssn_9?@=8WEHr8OU<d-<l&XeQU!xSh`zY11G&%&nk0v
zXUE#(QtvX49>EFlry%$r+rX}EkUVv64@c>wHG@obE!+BlH#KXXBjeM1={`0=W?(pV
zGbHIUeuL}2fsL(D{=+5T$}wXVM^jpEX>TApeXh6WtZQ#Z^p5u5mb<EGQ-dxI&wfIU
z0_gaFx&~_cw{|rh&6m#7dhE@QY60CJfg>Tnilm6K5VueGkkVZ&Y^&1H&jzDu5>l?)
z);VFf^cJ`mj(8y#Rd7prW+|*T)&Q%J&X<?yK3#m3;n|zR2Q%4~DpXF|gkuH8O8paC
z<3?-b4n}AWA;f4Sgg9!H2krBpZ?KGuTa(2-M*3h$n@~OXet~S2N+F|U^<*y1<GYf^
z71z;2w^3tW!G{cl6|uI)53QRm%vmRafm#y(#+EY*Nv;twZ4j}%G<<;>h-L~@6S2K-
zfd3Q%t$xgq+25gF(J}x4{Qrj#m>D>mxc!eeagBTJcqn}JH+P_IN?*tlsg);B2vf2)
zIIy#&>DcNldQ^h7oMRNOWl>k(oggO=U1;IP^V^2pV*iaoEORYEbwN_|`n9Fy@g{~9
zYE+kjXRfE6cl#Y7*ZRDp#|4Lx&A^Bjrm57r2=;?#`sS5yW8-5<hosfOIS+Q5qTPfy
zlNQndDVTZMfh^J(k#TgsYn?j^$p+C~9>B?)-XQ*RlxG@uB)?ZYZIG}90=ezZhgGtR
zA&m4Q?Ym3=!6vS5HT)KKSnA#Q1neeB1Oya<*zV*5>4@^DgS1Ch>|t?A#+GBmdY^oU
z8%m5OMgS7&F@P`r&R{D4e=+t>?V*LymS$|*wryv}wr$&Xc5K@=cWm3XZ6}qgr|Wbd
zT=doY0dszuYm9ds*gr;)BAuQtdbqkeZY|VI$M!hZ)X>Yt=sto(4Mw34c;u+|n?C$K
zfo@OGl7HY(x@YAT59mq<iP}WO{#U4mWRK?|xL>zYNLzeLVo<+$;@$p|$O>iV#BQ>>
zI=@Tq7({ttq06I9HW^Li_E+y%s%%joJqPe_S!pr-RnBo(Uz1py9{p-ydO8r9?(X|G
zJ<a%o-;wNUxpy+)&sdjJHs$J4SI4B&6KUsLR?q2ND!MhBaqnmn>GYt{cbZ=o<F3<W
zJL2s~_edZJOD#b;j%=<HH~u%sa7ZARE&{nNgP|<PSbV!ZF?v}eg!JR;laKN<YUH(g
zlF!3Ke~_W5FGdhtZcY}S&%@_>J4a6^ndaa?n#^Vhyg*?1L5!$%4u^XB9?@B`cbH%=
z?e{|nwnMtO=pkN&{Q*a%WnCc&2>sZ_m0|nKkpxPJLM&fFReG%%O*?#{+WuHtJb8)A
zgp{%1Uit8TJhz!_?{%LQaGVoj65bH=t#6nTOKVF@jVxUcZ##*Md00kQPe(_Gi;tsA
zwvVBm&S%|Av6F*`<NKx8$^Pj(v^(PPGS224mX`v6CflJK12H~7MoiH4K1a#eTo2Ft
z^RMy_;6n^aaQ_p<GeCg`xQOQaH08<n9X`1uh|15e?e_ETqsSethu^77fZZ#jrCNsA
z?Xq!%8p`K7J~Bcbhpu2wk2sa$vYu3G6JnEeser=lc)NZVF+xzm6|bYpK$G1z&bmt(
zP0v0xZOTIdG*v+D5+Nw6o3-u*5d)xk)y&#<)((i{QtR1whlijh-@<SFuLFl=ViQSl
z+a8?HtMf$D1GP6oy~res8&BwYY`{-_sKa=I@-01~*3i8oA5s*hwKFysBdD(MZJwqA
zG$@nrqJoB}+=82^(`%v{5KoD}2T-s(z>3~7+8#uILz&Ih$4$>lPL7q?$;$fK+tCU6
z%KB6L^HSr<O!Ec3Y*XaK!$D#t_0<}s&kzFmEV&wp-_z9H-Po8C(O?M6*#i#MwNTPP
z^;z9uxPQ|r=q@D|uvM;KG)9qX0Nc#O{d)G4&vHrqB2e;qcW&1<{%WHpt0YRktb)9C
z_AR{FZ*otxwslX$do&?`Qp>Zc{cDT6b4Zqs(=Zmu?XjEOHbr-?&L(g93A@Qs_6+`1
zUS+g8>bB*`=fi1=AO}`)du)8`l_&SRSMWr`ZCXLW&=)U~;TSH+#&V7OYtM_QG<HLL
z<lJ|T#eqC@e=u|<rGZgp9{f{#Rd7SaAM(ER()O1<4Ge~SRO`qE-$8?x2l)_nUt>qA
zxr9XGH;S_rZJW-tsT8amGjo30eUOco^pA*u3o(+rQP|+T*BH%S`W_#_q=-ok{;jF1
zN)wnaKC7m1A~g9&kG;2UbZC{jmEXvY`zIQ!9YzZKy;y~X26HK|X(Zm|YaRMI?en6$
zW7eP->w><CzZY+_1abn7`pYt&qfFuZeZn?vX_ha#j+KJLB?fFiXnU3K4V<Zq3lFK_
zONNSJqX>e3xfna~t&OQJBXR^>{;H{bnKc5~KpMab<FDeY`@Gr#+_79~q;Xm)y3e=f
z;X~1NiVyGk=#S>)?v7=ex7uEkNhEcjS=>PU-9{7`mu2Ed+DzCl6|r|^YW58Dn=Noc
zQNGD`ezx#s2)dA8edyS&oW1dW*;0FXm8fH-RritX?EjSd(h64PF95(;f}8{F`$`<R
zFz;hB$*{Nv8<Oo<b(S@ex=gZKFjb@WSff2&zR7DBn=^zHRyZ_w0Ky&6$eQnW)`m$k
z$9FTXF&<xN0<x+s{1=iO-`s*}J3G>ufj)l0iNCluThmt-tD+BMK_X0ga*q1O7HouO
zl-3p^NGkO~W||mME7KpI1)OU@6*G#VATSBhwu5;~PEmH?&JcIVAv*afA?KdL46Go4
z9`?=z^%P}A5J#%$OOWI+&jaxcvisG$y*1G@x&R*c8N^eEK0y{34Tw_BVv?l>=#af<
z2ZuEo82o?+ffkh?g{l0dZZ6=hL=l2jUxc><{HBOu7nn>C?;(X{l*RhECd1GTS%l^U
zBosm}iH@Yz54ojGb<O(-Q<%YQB86@U1Gxvosc(*$f-S5q{p4>XDA$J{vA(`+Z%(ff
zal9QAihv;E<#=P*8Y{*}wWDYgA2CR&QtWcLA1s=7Dl^yJ7MuIoY40Qa6Zq!qp1ApE
zCnO%;v$K7U8m#P|u;#WQA>1OvPpO=fbrkC#kT4BJ(6-s;R~-fx6p-?<7(i0_q!inW
zj`#`5X?Uoj!ENgJxUo}m5-pBJ94wp|>phPt6tLGcv2X$b7rSWUu^79cBQnKqCct;V
z{R8L`5BUOwR;L3dMo`W$Hmd?O;1b|$fLNS*zul?n%0qmu#%~+$fCRqE?DqW#=fPJE
zWpz{Rl7%BM0HlJy2fol$+d4+2+tp!7wHp&3ZyoW7HA%`8;x3KJFse+LII_y44NG4_
zUf9tJBh2grkB4@PlX2spo1*i`&u64vq>NTd6iQl>*29tk8NeRvv&fT8=bD4ne&{e?
zL3u!uF7i$L`IEOJ5RRyV%kr3`XmOT_f@U_ntPJX?IrL??U!;iHHj`cdGvV9GlkC)Q
zfU3CUC{L}(m4@dIenhb?mIWaD^zM$D{{o7<bP?&~GkST8+nU$6w21Suf%J<HSp8*~
zu*L~jhVkE<8PL?t;&T-cB8x&<V3WDiH-q3ZK1Z=3)``V9reVhLSr28r=4WDXrNwW@
zsL%aq7XoV%MFN9`!of}=n7%aI|G_P<w_SEq%n{6iMCMVKwX%iO7+`-G|5Iy~*$lfd
zDX(=KdrVk2BA$(ZPUuMEJ|<Dm_{aS8r|wq?ToQgvYG?D+d}b@FoZlyuS+hu?@z#^A
zo1a}{<XJ~{EX-h2D|E0kht;d!iv#A*ct8*Sy@UlPQ85~$a|r&9hZqjQm+X8pwe8Qz
zJ)qM%x=G8x4!SGUL;&?O0u{r)xZ?@y{v)(f(Ksfx5>yN!Y6=>6GaYL5l4tbqNu7*J
zG0;fa-HOS7W~Tqsv(k||D!d{jELb0EJ^6uQwtyZ37C+1lW`(*x4ig+sD7u1Icr3Ed
z$^cO<=`0I%SnOPp|5N3*m#sV2{cTM#KmjZ4kHp|T|ByI%V<I)z{H1uh?p8&!^?6!m
zods$O19B173uwBU)s?H{(_-Emafv5x=Z5tR_=@yJot6WN3~BRqjUM(aGcG!T?ix=;
z4pd+hi%mVCxcVpMaCrcrPV5#N4EF->n}6yYYhcq09~9PSXe|-l(g?#+UWQ$<N(n>U
zZ>WVDBnr!yHDmfNPz+24=yE7=)w>MHK^<iC#b@C*3x-DMHDNu;At<J4tKb!ToYmfn
z`d94Pa_o_T*z7RUO`tBi-;QT*N@oXkY6ZA=xcENQhhPshLDQMHwLveZyeO&QrEl_5
z9O!BF0*=T<nH|aeKA7WeQH^F!gU6s~UWN))S>CnSoGYK~kFd45sMP`UQ`b&yH8Fik
zB`QozijJ;b3d(IN_dF#F>4inUprXnIr-fSo){Q^4DDFqP4U|88qI7}tWh;sjrf0J3
zLQp5(<yx8C%>o)rE2<D`1AK__#4Fs&eV%W5o^u9IA_s@i$N7~W(jUiA*P#UV&2bc|
zC06K-d{?)Z^QYgCSTaJ!l?m_$BLurMQ{v2L{|W;YGQg=xWn3P*q_rvS11cwx7=7tZ
zeYo7lU{92C-lPS;(Gv$%7R~*6C^d|iYIZPo#~~yl4F4-b@k({Ut{JlyV#uw#nY<2b
zp=361KLu4(bhE=qkO#W}dt3D60B9Gu#*YjxTB=cG-bufy)kyNEuKxScar<hYf+u{o
zOq8c5nf*T5p(A2dRd5%+Dq~QwfxsArWuh_S&95gM&BAr_CV(S#O7#ecjE|#KK>gxc
zn(i>zLiC-{l(3L%3xmSU_<{^ASQ#l$bWgb?xo+ru{7ySwsAnh?3c5rzrz5AX3~a;(
zYDiS_h<!66J<4Sw^DZw`EK@MM<LS(~Zu^4`^lP4;;M2#8Mospqn--RaBCo)?oRo5;
zifGGh1>l?+!289Cphngq)Qu&fn3R8F(aP}se<a2{aCsV_G?k(|MHj!<rl_#K*~XXC
zVK6GvaU-%q=I%kOm$C(c8+1==R$F0=iV(cB|9;{<2+l1Pd;x>qH-Y|+P8&NFuDeMg
zX?QdVE9s(PHbnfN<`*LjDnB>z0GxHkX5&msobc8R>CEXnY}w*RKP1z2D`G?mq2K++
zmsh$gl%M3&bC3t{!(d>=UuhUBU{w2RH#*c?*N27f4O0BOO#57I#-RH5NS7_@4uq!R
zA{JdMD)UR9xHIfCym0ZIzPA~G1;2T;$>DAeb3$RGOA`TY3`w?NiX}yu?O--$xzELs
zelv`%PHmde$XPcn>o?ZTypg1`!yWVbS{R~lKEs>iIns}Ky{T3MYww%;TnbJh5iZF`
zilzuHknslWz-(x3;0<h}CbD+z0i*${Ji#kc>PoMUj5&y4>d&Ml?$O_lGGaW?&=UTt
zXuCY!l^gyeYgy!GHtbI?)(+pRw44~DQ|%a%32btbv;_p}C@fbj{$;$J=DEyX|HKyt
z<$TBDQ2&YfHjOV7c5#M*ava%moQ|;|H~!SW(AeqB&pTlVfw+)a(MyuYL!P3A=<V{A
zIl%YgH211|>DvzLkbCC4jA=|H@>#@%G?Q&CX4Q9G?fPW8+^P`)JgsOtTf2^+@kuH`
zrFvzyqmmWy>g7s7amg3D7~2MK4>=F&p1%ffiztGw&+e;fZv80|p!o2O-A*;;8Ctp@
zSA!0WaOw?|aFV)X(H3a564b(F{$88S1x4Nj?+9kgKT!11NO=P&?s?H40=NE$2L1MB
z%T0aTNQ|L4l$!1sl#Dq)v&r1$D+&<%81&UAuofqhR?PZm;I{c?Rxnl@ulH@;N1XMU
z$w<k2P4R<j)K49`q1rh^`kd@~^`e4n8s8mQOqL4O%zg&guY6AegV6-TTR?0>1)<mz
zp>l3NIELN(ZscY#Nuw(&sx*`|<!&+wc{FV>>L3uf6#KLPFOO$zu0uLdcp)wGLTn{;
zt|j(~f-}0=>=LsU-^YR5Qxe<-u(_?TI`YmxK6fD=h6e^$&?p<WV<qt-F%LTWA#32)
z#$8urcZ}$O_t>vgf5F+;cG&H22l|kanel;|Q=V|Y%%E~I9)HZ6${~n!N>JMB3#3pR
zO$#Ox!*w|RMKJ$*Bk{m+JOL;_G1HL8Hc!|GMl#`RFh@4BTOsI|C9uiZYANx<dhAb3
z9C43He6$ktW9XfctCqGONpaF?(0Bpo0vS~xYP3KAq-wZh%gZ_(h=7ZjOB&jv+{i*o
zTgd|Y$CFv+Op9$E-AVnYSa<_kQA~ohGeR-O0wMLeU5?du^w_$aX@UlI&xafo4fAVp
z+3A)LAEL(@l+oZ^DroKy(pH2y)t0(r9Y}M{p=?O;{YDD!As`D)ctO(Y?fGDF$3a3o
zA(o|ycuGs|7msOF)A4#J6aCwtf6{`gASr?O<;q>Wpcku#a5@6ga<#0!gCW_sGv5Gy
zJ`j=B&o6r`yO6vsSHr_4^oi|}2w4jqg#?3{llhTX%2qe6aBj2V(3@<A8?>wVY=$>%
zhC#kq^)DIq;Bvnoy$qel=;!*F8bmu@UgBdYu5Ch6>P1wPPNiejvzF`6R8O(B<<M0g
z@_dfUjoGa|iq;1k^Ald6D?HEre$@AVdI5BLrzxr(2X?UiV&?qld`T+OEFDPjzb3*d
zzY>BGdF+N=FjAmB&p#cDW%;J|C^vn9Fj0NuF04$wkI;h0CS*jgj;I9jPQp*Kw`T8b
zuCcq&Teu<&YK5+#BJ5X?_GiU^O}K$wS;Nnv<<IaSPS!Z=ZX?;*wO><GTxYW)5Xj`M
zh|FpwYLuIw^%Pp=tCjocwH~v|H8i}Q11cD%;(Ww!)w)w@iwvAz{o8jybnlW_A&4<<
z6&DBFytEa1>F5b$|31_T<k)dVy{;vF-o?^4L#P!DGi3f5fu`^z;3ox?i9ATA4HC34
zQ2xgNkjo!|SzNf}ZcJ&JwOo^Iv`?BD=!`-Cn_0bz_sD-fhX|_xqIYAbAu!TqV0i`1
zTZ6S-8MV;uwH7+h2bD5Io>cV#f!0siJX;3LR$L-^`dX0?#*K6^HF-|aXS3z6J711L
z+ju?eH%$(Pe@}6RKj{KccQeefWxFtXo2YZ+*3z4eQ+jG>dDFQ~EjMuQMU@6pt{J!H
zQE7CQB8md7>0zHUL;+bi_4tavu)|TMoXS2Nz~tldWMdhsZm;nRmukFpFGj}MlV~RP
z@zo|pKGA~G!H_;NVh1Q%w!qqMs~c~uqs;R)tJf6q=bFK@MXMYFy~PkZTZ|69X34fO
zwfqrD=VbycQtsGmRXj-#ic?!n_8TKJoOfp~O;9y(V~Pw?0YL`mt&M5R5v@spJ(wtg
zQ#?de0y8y@6`=*sA)d1@4U-aphGXxWg@jlQ{&9>8Eg{NG(Is;OjwIAHTnp&dRo}nj
z#RK}bvpHjiRk6Uqnk*uEn6BlwS)X;Ca6?H^%ZX6qKs=141_XoU74;v#HaKWv0s&u(
zw&p8&y2>Q^6oj>c%7eCI%cZ%?^z3{?0wCm<+uvB#q3ngxT}~j5eQ?MK<%5yLW~$d>
z*mv?M#yaM1$|$wwl(xYy@J&RwJ+f2;t|Fe}zSIyWM(I_94#Sq6Xqby8m?}UNA9$Uv
z`RYrA_sU9m+Fwh5xSD&glbr-t#IO01dIC?CS9eCkg+CDy9FpOkG0#wp(9|NQz8utQ
zOjxf3gM&R~kX$%&uI=|A5BcTcG`!d4q~)MNP3x$i*vG|4sdF+oMluHPt+z!Y*e#6e
zVXGNGY@qLWUUrY-WD!QkMsD^QfiGTe5=QW8^_w9^E_!?+xn<j=hzl-xr<XMBOqH3b
z5JbHQvc$E6)fr#5N1NT$go79Xmc~V#n!4?bvyf+uw0;++*9ZVgp;QMZcgjAxay56D
z6mfydD07or*s7ch`a|C>*$S)<>(v7PB_)6kM3)<fL~DE=*pZu=aoG-RhIu69+yJ_r
zAC^e#jV@ze*VLTO_Ly4}(^*4^ANQ;uv>Q}9aT@O7@c91Rq)X}$N|)5U7=oX^ds|z2
znp<-@#-UgL;`wPkuA;zx3S(El2Hgg`7ocwq_op}C;O`-Ts;xn3Er~L)UUW-sGVST>
zVg!^fH~QczGr^C2Y_rezEP!h)xsXU;W<ar;p5R_IyE%`gF)n7mpUj_t`iVhu+^wP*
z{4{M?>^nEKVubT6<ru%9LqAP5ocre?@AU!|z`5VAODJP8Q;|GCb^E7n-5^FeTP;HF
zZDp@=UxZq~3YfX598fvZP%d6U7%2pwKifGUZBglQ(TBQI%EZ#HFXk^#NP~d#QZj?E
z{8{Cr%!001&*Y%1hmmaSTrk}xqS<_~`xgIkIuY&+^cluMhc14?&%3(~@GE(^25U6P
zO4!I_!tS=>O)z|)K)~b9iubbFEcWAGDR}lpmAfhA5?vt?-nZnB&`VzW=(Xve8huYj
z@_cVKG&#vqM48tG0S!s@ep$w&Fjt}uctK+VI(ixh^#||P<2V1wQt6WSJ32q@9R~4&
zy-GHW4&X_;-lFOV=gE_6B7efbGFG;#eMOZ*7>Ads_!K#5&V^-R*T^X!5q3PqeAMSv
z^vDuhSlxW@HqDnx4gyK7v%J5^ZkE1yA(3zI+DEakBJ0i$Nl_!yC}T#~&3q&3o*X+x
z%f3h`aDGM9wz-#6kEvFTPGh^nZsz2*1I+5v9ehft<)E=xUSF|%4DbxcK7cz$9>mz@
z&eKGzx(JY9t8MLW$(SC6Mpj?FvvZk`q5;!)G2uNkY}*(*-d*~U_W*hf$7<;ZUFP=5
z9`E~Rc`$Yy3FUYLJaOKeln3UD22?bc==j1OVe01eFDMb*gG^RWoc<egE6d90JQO^!
zW*0*XZED7J%%i=O-|Igztz;~KrUD^pyz_j=tJQ9c_A!zmWjP!H?{xwpb!$8t%L4?s
za7tEca%(e{8%r2*5w1h{KXz)dHfj&{-RU}n0msdQi@<~Z|NISD7d6pgB3;?(fbGD-
zqN$ovarulK4S*<4z4!oymM>MJa$!4_S`bQM^@)c<W_0Y8jBU+E7ja0CNCz;kd`e{7
zmn1M=(*n*~VDv%Z%kF``OE1Ac;mw*V`)u|Rt-M51KbY>fm2{;CUi27U!Y$yS=tBK-
z7<%FZDkrLZ=$A?W#?g(7{w>c2?}pm{tETGz&3ISK5`A4x)h?q|5|N5<US}9er&<B^
z2y~^`UT`J$Bx#!U?#s0QBgDW>un2<QUs`^bz*0VyS`=W`JRkl7PYv5M{A14N=HNN0
z_pC3n#9i--ysJ}bw4FP@in}O>0dNDjsund-KJLsn5|mw9r%{<rIghc;pFF1+QB0Sk
z%8#+tmR=rbstS~+dI5k2nDi{EsW4@wL0j?hj=O2(2X|HmZ6&=(yTWQzxVd4UQJ<z*
zN}orzFyn!sE#QnpXU<ZYtD`&`nd(KC7o(5fk*6n0xBa4B09y-BMf4{}s<DX$LDuZ;
zFSCee{%)oQ9K`P>o=jf)g8yELU(x4X%7t-fXe;qwynp}X>gNVIEpf>9=e2}yntgJn
z6<79Ol4~UXMR+*<09m_(O1t~VjP*{E>_#xcf7KD@+WRG^<r20>-~}alWm#JTQ)i}t
z@=^KdX%#iEpYC3F;_4zW;M*g6QFh<TvgCYC#uiyPAjoyfcOJJ5<oxS4j3p(QHhd{>
z34%T@jR!NEjDlu18k@<v7E#0?b7Tq}i64{+ankJ3?faY>xYjHs)a(dEyf!$)yO>=|
zCGKf{j?iDXlugV7pw0ci)`}CP7u3Y0(ak*R^|0s^D@tGwHbi!5PXO4%65g>1-c@A1
zG-$oLfc}n}U>dOPV6a5lPg4>#P~K@H$D8Oh4JAngS4uE7<X&>tzXm4B9dnQU!w$gh
z&hn*cPS1N-tKDmLqv>cQg6X)+=y&oy=OT>ld^yqvmoFbK6KPV_j+V|aK#64)2y7`2
zN^5adoyU3|;A};hD%JY1=2KKz!i3h2$1!UGDKdY>>6{uv2H;OS+l8O|>xSsno+Z48
zuCP8}YS2Pnc$KL!7zP*9_)5RILmtMGo}9F+>eFtZ?~z<c#Rlz^PtwwSVs+mpmcA18
zZjVKbNRsG^<=}PV4i{-ZMq5`qMexL=J*aeD4h)WE?2re?YIaXVBrQ8(NaTk=K9OTc
zgt9zAgKvSjk-{k)|AujDqImm5?d(9r3B`kj2k!-GWu>aDP&xb^S5=u|{&JjDQaiUg
z8TuzMvCB)8at{FimtPx8MNkBaYG(EV<^5F?IDax3C{LqLC?hrL#Yvqdc&VxU0He8u
zca%?udQ5_dyf8`FTfYq4Laa)=X-e)fXp+|^ID}5&fRnKcvR2Lza*{v}p3wr><TC7K
zy)UM{Ub=W1=jo`yFkMEoj(TZUQ8edIt@!+A#VO}w^;eJ1>K0soKW2<Yc15pfD$<la
zAsZWYIsTG^8=C9<;<aQnF6&(TaY=9Td9>DhA!`VeFmFf-Tz2;_-mFlUqCa18E1CU4
zUzj@7%vZ1-`vj^WOF5n+Z>N2*Wwkf7fqZlI5CIo88I(yx?bkATN`>cn<{5;9WmjN@
z9asWHJwdeUZE*yXGCXN)oeKMtCX1#RN`m!J5M$WpeHxz`5Z%!eLegf0@BCx%o~J>C
z{m6^YTf4lDY&DjHwZo{k4f~pEwtjVI60}MrNeQ0n81uVPxnD%vzF|X#no5lz7WxfZ
z5W$y|+pLHY#Lql9Gf6>5NyPPO&Y)^gQK66X9Z1ywNx3Fw57-Qh5+-{`oGl(spgc;W
z^(SLmp8x|vWJieGse}VD#)FqbNWv&rY>LpNWOWbM@fa6_#EOVE;buUgB}Y3()6`(C
zBmw1;-AT_TS{n9waxqjMBUHPXSP88UgEY(>sjEoB3>%MPNvAiT8YeS~)8Q2-sg~53
zsMravWs&f4_jSV$*-3V~9}!p4dWpS&i049kdVv}Mg3qe375tgVN4<{;YQ*?=FiF7d
zgp;i#Bc{JcN%RT%pZ|rky{HA={&KEtVg8#d?*GV~%}uQB|F6V(E2YnNg8?S=#si9|
zV^vr`Z^$1A*byTXt;4$Gl8%AxfFhYzFd?x`j$gCo!c-yxXL+#NHo;f%&`j}u{v0S+
zUA4P`1Z1?|{34&qq=Feq(zBs#6UKh8se<RPDr35IV0n2@6|&rPhjBDE3a96~A#FzS
zwni%(@VbEk3M7jtMx~Nt%$IRfuqd5sQPgZCYcXF$kF5WQ>iZI7qi@g3quC$Eyr3-h
zu)b%8FrKlopMoo&3z~=*l62Xi5=g>=Ef}V=sD97y-m!(86nOhV4H>^WG-t^2ebm!W
zUwCAVz`jRBCrdyV%r?p!7Bk!@wK%=A%VU@+n_3Tv*iy9+v5r5%1cT=04zutH%7t8q
zDwgz$%24PLbTX40o^_a)IVuBn6u5n@adC@BUCry?PMVU#1G($+tJXjT+i!#kEkXiZ
zyPx?p{I?F<qA2(*MS?SK0+hCNuxC#>S%T=R-kRPyF{Q#`8CDQ(<B9TiV$e^Vv8Vga
z=^N?6a<ewPI+!-|q7E<TTKAB?`@{hUBj^#59%Huo%+9@iNSfh~p;hx~3d>ixKLpu2
zBvSafg70+iTiRe|7ILc0a<vg&yPG|H_>p#HC$k1W$@E^{o>1W{nM7+BbB8A6xpcR?
zdtKk;(NN|LJ|E|}17@%PB@d`{_1~f+7y!T;#sAzX))r3A|F=`7G;I<S|Bo0R@eX<^
zCEtFj-UxSRXINpRl|E+RaD7pcvJc_9=4V?5D0n4s8@9P{g`{KVcx{So7Ic6?bOxg@
zs|pcz&Z$AD@dNpA^*Lznlu`CIr2&*{B+5=g;5jTFz#3zVcxHoy$6)Xwt|yk_fCBOh
zRR@{=meYVEoE#!|ghia=E@{}jh?SF^5k_2TZqM4R`joG~)3B0YXAk=F3#1_RHO`_d
zq+ZR^JaB$kZ1Oe-AgB!>2II(CBnam9&i+}|Uyng_?T@QTBLL#({(Myq#yCwHF-UHj
zurZHg$cusi4~_`lWD^nNE8YR@-M&r#$|F;-t$;WSUbLuHgCIX-7{$=0_8iIKK}gdO
zL@@K89i<f+zbLjOLi{&ELyk`g#VdS$W7J&^Q8^^p+_~F8hVKynW}2#$Q{Dka3ED_X
zqy8RIQ|Z8H3~BxXD-rXx>o86_YBT6Mnql2RA-@K5kvAb4D=8zxyz6R|mm~C)xI6bG
zrz4QY6NF|e9o%_sb%&vK1^&e>mBp!qvo!R6)$9Q6e3LN+=#+EbcJF*5r8*p3eXC$c
zRKNuKvP#xJ=S7Zn)G%EOSQ*STzh+uO2B%;VU-G(u$$pXpSMpuzV;T*IaYeuHc$2Za
zts!00o@9Iex*Ud3_yoVc!)UMjocmI6*+XMgHO<iwzGjd^i-t7>QynIp^pn|<`QUZ7
zX>YzziAEh@-7NPGnACGa)xDdM$IlMY?jG(0;hS>}T|rF*5~La#Bhw@Ym%qb<D6Zxj
zI`GTPHzDGz%mn$Adk2t6SJ~G1fW6yefU~lIDs)i-fx|`8VzKb!Wnl6sM@NE#i25D(
z;&ydPz}#Pb3-60coNh5a(Ygmq^v<%X9DgDAwfIGE;fXzf=A!La;QfZLkgxAl@UYF@
zMoe!mPKCKaoiC9~y$a*vnA=AC$PRy+EQ%&6p=RE%DJWf<I6zZnItbJrc4SWGU!}X1
z?iclAVO%3AFQ3N^*rg1YEJ{7V+hJjt=f+0c89FY_e~q>0Sy9?(oE^N5&l02g9TS?%
z#Rp%FSN~2-Txz#8(3EJfFOs!n<0BU$qm+yfSQgT!S#tY#<AN*1cB%XM!M`J%bN##I
zy58o3Y;>cdvbdx!ZyG~BgoT;c=u%w<C4d?@#$?U1m>`^)#JPFB4A|!;j7v+|T{2#e
z`UN3XA1*l|WmC<pkdhRo;a_HIu(|Uq;@`X<xNmi>jHwiJBzv%(HrU8U+rahsop+u0
zgOF-<(3;CmOzuR1AacD~v%w70PlE?HhdKk)$+!sX*9?7WOdk?Or*yNLPuAAX!oN0_
z4Z)jJEBUr7hua2igT8uM`0Ol@s^-x25J*HvHGeci-&9HVPqE>Uh$hl3b6WPJUY&xM
zuZx<#Z<?!Y@iugOe@bwc_7ii$S_)=I>y?ZEGCZD}Rgrd3E5o6Dv#q@ed%UrpWrxR)
z-ndu~e%TTChJvw<{#sQ3!U@Y77y^aH<Lig+&~m{C5n|E!#>cBdy5*r}JeE^Jmr|7g
z8lRM}ruFa@ZBk48&nYGew7%T8EE9&G!nbA5D5q^$ry4b7l%zt-pk3SEw!Q)JimDw_
z8#1Xo^0e>Lv>o=;Sgkt(8FY3Q-fbq0ZA&Y&o>8iBNo+kL8Rk}c*Vm}vq#0njxZrF8
zS(+pnY6+V1Ler*;1}S9}lVLC|lEHi_2ugV4ZB-k3=xjwhgm~!lFZNX*oxvPSnLK_?
z)bt_4_7kq2=gF4=;z)E_=ZhHnW15!Fh|$st)hpRQAxq9=9%t8n1Cx}9PN7G|R%oux
zh6?zTGqDkte3~mZ%Gt20pyhh1H>od*I?J9^D*03U>&V@_T?EY*6Ak(7b`!OYYoB5x
zGPhEvn-x3X-H<DX=QqAlCNDj?e_(7ID3&~4HnPx}JN&JXH%0B(#H%wf#8Npzsoy}q
zinpBtThkAbJy1TZm=tCR=|wgNVVRm%Yqdm_3}9x?R*+d5hiGZ`?MtQQ-=Co{g8AV3
zKCw=<RE;#3G_3mcA(N{QmG}oQZwav+X}NQBT8<1XX{t5d&bBm{uNUoF%lM>Oz0#7V
zUA45Imz>|RU1o`kH8lD@)h>JJ4F;H|{#6JGEo7;|p?<OH+&C2j*4EL6Ww?8c?DSHH
z5pEAahY>A?o?O-Ad*<W|yiM>Jq5$~)7lohnKC0otulEq^*TD5ZNqub${^L?Jq@riF
z!GPfRQaam-jve(k`-*h3ZmtG9ND%*DDN`^`E;e^Vv4mrz^<&44G52?35v@rbq~y`Z
zN5@AtUCjj-I#gOfRCtEApyW0fiS8fh5sw+HMnp>y#ga)d4rQ;A^%F_u;Q3C*Xdp=m
z1&MqZi*rL-ro2vuJd71UgL1S^MgeZ<5lq@>!014>q|F}6frKq)&oB;GYrpVUP?KCP
z1%dYoR;4PS$vpCDwzQm(JXPoL15S`wHg&L&+^Pf|io^s-(&aqFGE+|ZE(>WgS5{v~
zfHUrxL_yi>-q{F=8NFr&6#XUEw<{($kpllV%J0|#(CuY+C%<<1)3#%2tNVP$eWAsd
zyh+&pJ2;oH=Zip#Q=@ejtV#AWi`{nA=Mmq~1d>>zm(TXQ{GhQWkCyK{94?YDSu*mj
zCk&y|3pg*wLabXKvtCR8a;eKj#?AWZp!>3<IusFJ!)vaqDIqIniw%d+Dw46P*}BeJ
z&HaI46rRt?$Ky&;t9^$^6d$oxcQT`D8Z49F=u%bu<I{M>VB^d^6B7u2EF4Gr#BP3s
znDQU`xu4LY7UvHN=L`Hes{S7LgrZT%u=Pr!ToMVdtHpz}KmzZLnRFub2x|!iqs8rm
zMQE2+EfNHJMqSZHMn76<Gn=5S9F){;*V!ogjt5MXUe>DVbREXU$rH3S{&cnWI-@5B
z>_&&;&mryBVPv8Prn<Gy(lBptm_0)>9j)>$-5HY8(md)+FCQd7x}+}q1VUd6pHoLX
zD;`kpczV4Y1CJWvi3f|+g{B#{v%9VU=E%96)^M@Jl|w~;Tks5rD(S>*`Hkmna|OUY
ziRA*g0sDQ2>ObR(-L1OIo7UmBMPIBm=iC<&5nGc-I0<eox8(KRk};t|mfW`r9U=#3
za6|I%M<{0-rvkGS5n7(peedSww@nIo>u-Sd?7@pQ=9`d7%dfy<<u>(F@eKtfi&r@p
zb&b_1jGuu2`NqG1B}ZZQyEdMFvEcvH+Hf-Yk89kPs!n940E*9xn)YR+P#mk)dmTa3
z7A<Yae27f0B&O}Y3`-evHfG}S&Wmip+~8${wUqF|!sMNAmQxM$=|{I)B_wSz0rhJj
z*07x@Lw7_i&kgM|TjD#|c3}%9s<Q1a9J;cBEl(=vN&^VtadR~*B%numQH#9Vn{)|D
z#Ad>9e-uEeC|WaVedKK?-Ml6fjI~tx)P+gYAZi3Cv*tlb^=o>4EVubDM-q=UWe``I
zehrlwoHRISohtzR6Jx>?<f~s1l|-I0zbPx;paATIL|S!2xrTZh?GM!>kDY2B<84jL
zy{anpylcNoP>S!NYpXd$=Ng;11zhMQJ1Pd%F`PACYcvW;tP({3!0vK*(M>?&9B;LJ
zGdl^UItqajdL!<Oae)<-u-(iO(m6Mpw{)wM(a*iOWK)vIlEXG88cq^hi`hor08#>Q
z{+y$V?Fsr48-0PCc>{RE7u*EI0?TdbH$bG<=(4fwCtj?-ksl7?i3Jn(iE!`OW8?7*
z3j@`q>6d_6&RtLD%pc_X&3puGcDhg>`_ISGmZcb{{jJ$q{;I_S^3sPs8B&TeDD41y
zkSRwTKD_G)L0ucIn7oasT=BUg=Z*gA4LtH!Il24XoK*gon__9XqRv1`O#yZBbfD7Z
z;s#E=my1nP`<{Bt+S<Ykh;Gxk`&KlUboyo~B}z4xEcb`~OJTnl3;HBJ>ec>2;st+n
zvi2^4P1>&)Is~;VIl4G9Hohq9x(Uw3prke_^VEZdk^9w5UhlsNKRXAn+U>f0Cq1`N
zQZMj1B)iZGhPse@H`Dvy=)?Im@Bg0!lK%~)2i1^NO#G!TGyJ|t|EK<Rws!iz&x~SK
zn_nh9!gr3IK?uCm9|)s;5g{nUq&qTw6Iue0QO9hli|P8HtEwcu8BThi9e_FbOrss%
zm(A>(X#)%}VBkqX*(M@-eIZ#SM@R1)0|XwE=1hx+v`+NT1n(S3@PJhMC_2bukt<8!
zBqRmZbq+=9J{LIUf-Hl1vS692lxE;{Nsww&4JmS}LQZw+#U?fU7H+{8iwlXRtE#(f
z;0skG)n}<LRju`D>E=#;xs%I?IZDq~{fTXP6ok*NzQoikK_M{dama&IVg@z+(8)Ta
zS;$P50yLbdeErePd)Wl0>3qk<zevtkNb?5OE(fq!IW1d6!e7g5g?p(VcBh#Oa;47h
zT)xQ&6Hs}ssvX+&D87>ZRB(Yk`pCGYFS2-f*8YKmT&6g`Cq)&BJUMN1bf&^*TYX22
z^Z8vtZQI7T&<uyPxO<q|t6kWxX4PFiv;IP(D<f7HZ#5Mqhm%m{?Z=Zd*9C8Ggz=Y2
zxV$SAUB`4tn~HemL)n|<!D2P+E~-kA(1JFT)tE(2;ejU;_%)YPj#AAGH3RJ2Xdn#|
z5fal42%A4hyUej*<Hjy)t2r$xPZv^B<7xj&c3l%%6J!r~0L7<>%p7neS-E@&nH&?6
z!5Qy))%N`h_YeVGljAM&O@yQ93G1&<*tpG^xoRcxaSkk#al3qlg7-@MjGF8T9N5%g
zfSAU3Q=|mvg*M&X=n<$sQ>t<c28I&kUUsy|6~q^)HWSYw1l&4=$(Nf*n;LWzTbg}N
z?SkxVmrqs392@Flhm@wgH3)EltvW5U*ohWJb@18FB?G#!uD{?L+bi`kGMtSgY1pBm
z6vuNyRT}ls0@MTpoI0M(6DU!XaW~0rZrZgU|7~V(=<U_p{yU{$L+xdsU;p(n!caoW
zIs$il_tH?BewxqQ+sEPgQ|{awOnr^n2AAs+akwOupHgDVaUC<|Uo|hb!?97~Q{T2j
zPj&H+Q_s+9X+I!z>I_CnQ)^Bf6#a!zCJ*<ynizMIYIOBLwW>|B@rvukmsF!5`G#5B
zRUd8t{igmxA7DszyK&4+cxfyuK-Lo4=&uwkj*Q;d{;9lPi$f=9r(jbq47=@lxsf=R
z?W{gF0ULLH=37_SB;Js{tNiGi`f-7211Fw#D|2h|xm6SVo34KCNvz|q=T#h4Qc6@E
zHKzGZpZJ4RZzrJ(=xRAe{OVU-V-i+kXd>dzDK0?(tRe%YZ-C#YdNfnAam{Q=lUwy7
zbnpRlew|M(%L?-`hmXBC4!BnIG$8YggD(EL%SAvp5=UHZ%g+$_<yCsawb941rpCs~
z+ID7~HTA^VXNC1PX}PgwMod;;<X06P6oXd<WhIro2Z}HA&LaB@RMFxU6VEO<8r$;p
zT5QIcFxFpi=YOmiL*Uw8;2eARfGzZ9mN7vOcG<?h!--YdPuyl$*SHbAdlFl`#;ydm
ztVStGuw6R0iwwyZ_v!MJG+tR{2F?f}wObnjTyJhU(dkx7d!=HB4k4tH-1oZF;hCin
z#j8V~!h93r;ue3WGUn@GAI~B8+!&F1{>xcARzDrp^?S}9dHgquyp4&Couh}IwTZ$1
zixAK`bNt`i*-q(fFh3j!yu<`&BBrExK8{Q&Bzx3|OVaS}Y9IJ-Y<%TFiucgC2s8(<
zjkQ!qWwL9}Ov2H&K&#!hXbOAFR;1YJ)amInu%oHnz85pcRp_hP>FUX7&J-_y)Xb#x
zN@wpk9>14C{e9yo<W>8P1_y+IuJ&0{_FpORxi=mkGRG^1LBY0>!AW#*y{Fn^_=PqG
zVQ&jML&0^tz!~Va`zRfA!tbNS$0<y>di*B&8Rsmgg?F~WZ}>|oHS?>|ufhTLTG8RN
zUF$_8F?`V(aPr}^wcRgp&vMFiLryl=0sjc8%=tpKv;s6%X|_XWL|I<7tay`zW3X~j
ztV3nZ&$F-~^x?>X*0ABTy;Bcp4JB(I<~(?9>yVa{7WrL$o|y?!J=n;PzT0}#w}3BT
z?AKDxOm2?ldmQc<b{IiF*}yBX{9j$Yi+dK^d78GwQ(wJuN-L&kPl&g-0r;%_z<s^M
zm5D6(1%AxCoYv{->*4Lvo!I@8%g@v9Wzd~BgFELTkjW0)%k%TFdtl(~qp)xb?H%3)
zUiDk}If9Wix$P`ppT@HVLN2>znZw7|<;j+-nP`ue(`&LrYnjj&k=4=>8UFN1jLjSv
zTQ^O85^P~*qmrw6`xIu_u<$U$iBnK!1uE)g%qPTy>XyLGPLG%z_^C02kXTTo*%u-f
z?CivWTJ7wq2fkq1)B6A+7hgeG;kP|V@rSUc?`z4*#Y?WDpliKCYy#Pg0=q0d7D{KY
zL91gFi-pZ4h`S1>13VO^Q#RVdDPpAPGcS7OXH)FJsM{Ql_BThiv<13#-`i}`lxG4a
z);siGPDpe%W5&Y4->ZJ{BlL9mZcnx*_zYuCQv`8~;zH6LqX%(nSo=y`tWY$4GawHl
z$7pD33*etPE3Q_U4CnoJ4xeb1AgRm9HjF94UJ_!>CT}IVMYe;6H<aSJ0<ZqXcHo%y
zrh)Rir;#WfRJIJbvQ*fme1xfUPn7i--_wudeN=OR<boJDUjME7<vg|mx15_O>;iwc
z?`Cl-*y|a2NpDGEANy}q(d=b{_D+Q&Ea%$=LunzGDd(aV@Z&(u(SMLlA;a(sf9+Er
zMIw{JSm<SsxabeK^JjBLc}Fct3;zN@a8{eZ(%h^L%4;YZ+6^mj3vek@8bg1v{e|D}
z1K`?4mMZ131o}#=F!$aYL$Wl2z_q=Gnc?xvNAwF~`%t@tb`&xb`Pd;|nA^o#J<wM>
zU|6+QMLlF_FQR}$wiqPM*AB^~-W;d3`VEbb$Ii2l2<Yjf$2US*XND=|3Em>O-*dMN
zg($$)Xt5KCITKcsu8Y-yCSDksjL*3l(04+!w>kdukH?rWJL?CY2Js2-Ug+vw@5Ug>
zQ*GrL2^jwkNezzaTZG7{7`l`v7vp6H*ok1oGF<RG*ZG$|Dvbz5U@tF;CAX>3{!qU$
zh?)=X%xueiKbjZ+G-N`@`-6>rjTgy}*WIbk)h#9Dm3S*V!W4Oz6u_qZOz4F@s7Pz0
zYAp}$;)FtP8_p=#X3Z|v*_`zi%;IX&?Q>zPq@D>ReMk9ZSgMguKK^GI=}{jm#E40g
zWCTHls!i*X?l$GH%&?2gIRk$Bq4y_>zHV+y0)56gR@2F8fP&Rr*GSxZ855ARP9RLZ
z=@zli?gL$f8*~RzA4EYIr{LOh{T>+0Tm^oRl$gW)FFFB7p+i5+W{&Lke&@3C9`-4K
zM^$g3Gn(WzlNV$rvRitTN-I;r{oFV-gu8U{EpW<tNMoi<G1R7-q)PCu>i9?qFcoC7
zi7izpsu`RFT!6|<U$Drqut)0n3iMp^LR=83hR0!jVZkNM=qOH`fP^hss(kBVjH=b~
z_%ts?E`DJkYn*4PZjJ3}NH8}U)2j^#C(Zat)n`hA2i>3LiE9(g?qq6Axj(m;R2LF6
zOI0PZ84U_zsgD$8^~J*}Q_dBJ69bCYoa(qb&xscW)AyS{_A1^U?k=zMwy6M7Ni-z#
zbj}KJK5^m)A#`dE^>B#@`8EcgGbl8s%rFB&s4eTbpquBMy8b`>w}!R{-xcwf74ee=
zg>m034YX*;8GzY!!1&32-2n+zAexoyX^V04{+n5y7r+A!9*C2x;7wWe!}Tt?e_AC^
zs-vcIp}8$M4Np2u)kdphwl~CXljXAp7P6D`c<2d%ujQ*@rc?WEKBk$Z(T}g~qDnRd
zlzlJCA}UUC!cs9hB!+1bO#dB_{3&iKqHeS0X9vw>Zx(ljz9L5}b~y%(D%n*x24_Gw
z8-Ssjj73)=m12bs+$gybxo#7f9t^TLk)jalfR`!qFMeoLrlOfg>g$w4(YEhN*kQa9
z_GdJn?rlNvKyIHR@r&a-;4)bDib{SOM_!b=R*GOM#_Fw3LMOA@>_E9)mEOg*#&Jg#
zbVFB}S3mNfSj6r%2TeT)BnWrn4=yti1r$x?zzeLWXX>AXe55S!2C_(1+Q<N6{={43
zt71)SIGhav1XlPJ7zH*oVg~ytshMuAtreu%KwobKmHWPE)cU)Wf*V1e<s_&O5))h~
zlx1qZTQBplEoaBBsy7a1{!5_FG~q*|?*A(WE$o7t@C<iKh>hr@D4=9ic7)6ibUMZi
zKqHZOjV5K{(2G#5(t3@X!f1jYw=TGJ)i$hZF(#*uI;ryGd?FPBP{7&zM~9;1>u3hy
zs$ad6OubHqt!g;9bs{j8VRtgTo1CFqsctl&$(@o9Z*Q;ICjM@PPLp74_QW*?vlV(5
zdb*}5DGqz(MKv3`Lv1YCl?~lh&!(C*yNOyNSfE^$H6+}mwp-Du2gysWQwM<FFXwbL
zSgg|$p+>L`Y|kd(7WNUYupT$<1kTHHv@jr=u9PEqLq~QK@eJZH-Y;z%El?sR5e?-0
z)5-l3@s8Xd0gO4Hcz0M-F4N>pd+$6L;M_1(xf;E*rKFJzDjC(Xpw6YB*KS^}lklbY
zSwnaV<y)~tz+G1cwFX;7-v0zEbY(?jz}_STE3_4EfPCAQv8~&w_O5OCAbZ!r=R(&h
zG=n)WgV{Ql`8L~Ya@NoAAg6P#!~P{ld-vUXFR@xOEn$<E6D`5RETm-EFO9$0GMT4@
ztB39?2sXzudC0m&BIrJY6i;kR8G@$XC^eJKnD@ePX!x}amUtOxjFwW>3-1x^UuMzu
zW2L^LnBw|TywYnGI;@DU-_ci)o7&K?p|YR59X!GQBPd4-IRE}RsspNn8HLSsw0dXy
z8b)Iu)pqVRERJl;gmrq6!|BT5Sbxcy(qyoSf;0A;)oUJ=@ksY?s(a?qN>lY+lS_SN
zrGl`F+zvl9Zh5sfYfPSseXz~Y1gqIrmk0NH9Qj7)myU-Y7`$79lpQuhK2Q?pk8NPu
z5Sj6%vhdVXoYE2i>!WrAtVa=znAGa03IvekM|A`lNo0$bOxGx(mX$oYm3hY>kANg{
z1#vesP(15xplCp2kYz=Vj6Z-rBWg@O@C!d4nB~4me?umpgvY_D=}T@SFYTH4dM@J6
zr~o263GRj&inBdJPznUjgQn_q0dHPZzOt<bgD$W8J1r%U|24N1OyjRhmm)MKTUZy2
zKCKY?!n0$;BztM0zMG9L_79AkxE#@%H}EH+FJ<~c9<1+awr-k-fZCx>*KI$XFExTZ
zjlaN+>3gu~^!#aq45?%hitetK-BL!OrW4-TS!QbD;oh8P_~#17z;*VIG#%f<28F3>
zOv+I(Wi<xAmhp=4Uh)Y@DUXc3puMy~Mc3!M*i-Ju_C!yT19QI9ZVDuFs5}`!0|2YG
zBX@Z&QCB+8C8J{XGY=_qsikbGR~y&~cJc!F;mL<f#&*Cow#){Y&+1-2F)V}Ull9Ob
z+Qy1Cv8_c~=&tWML-Ny3St$8;_P2YU-WOD<5BA}gfJvf4kg5M#X(xEk)_nM7;YD?U
ze!jUUIEC9XgR8n_rG^}q!Lc~FcS8jCgfsY0^b=J2K=PQtvP6M78fW1Z_CF8V#x{!0
zOn!%K&QSjierMxi?QG#}?r36Q{Qsm!Ev4xEj-XLOZ=R@G^v2|+d-MwtR!>Fqs~DDT
z*^MBE_#~3X65<Yv-d?y{_E#k`gRLo>x*LigatdGKofAkTMw@{V^YTO&yy_k=zaV+}
zdAUgs3(NWk+o-uv=%fV%+X<TW52YnI@m2C2dpVPfS?OjaCpJ-flVNs)@5z;6x)f9Z
z@QB5gLm|;<itt+d!%Y31ErbAeL^p<xSSDmFM9dGeXN^v$WQU}089mb?MX;SBjo2N^
z&^1n;tnNDTEJdQ^y}boGaPUMdQ0)z9Tc;0q=i<}=*hqs9CEexl<-I;0uA@Z2&r}NO
z^@E*B)_1c!<O`Ai(Wrvc<d%&t@6Rt;e<e4^fK_Kz&38zOiKN;a<0LDei-+m3C~X;K
zF9tk4m@a6n<&C0HHGG7?%bR)};hf$QNVO$OO3chR<x&vo%VA{Qm>@vogc4ZB33ziN
z`TsO|)~}p9yXMKE%mR+b9Nm^_d9|<HNZDNlc%g3^W=rdpM^SW-=Of$pCFloAa1rtx
z>kl|7Ct@2#xJh`PCTJ#w-_<Az`bK6Kkxi_NrY`BN^b3PysNwjo%D~95N`Y^1qiie-
zy?-GDsk#6`ysS+=Jrpxp9~LiCY$R|zBvPT2*`0JKEiRSuK2H%x%6WV>XgJ$$;@jh?
zeON7Nd@Ri$gWX<(d5ilceqX7Z_WHA_rFK%svA(Lm)6bX3d#1L!qje8J+|nG@o!Psb
zzOe1%%1>*+cmQ+QH*0C+U6c~IzIE#_tYI^L4RIdm8b8qM?HnU({1BKcEdPLl183Ol
zLX~sM6<9Bw*f*hl{+GO}W}~Bg`Cr*b*>8^$|7Qfk|4xe3`wzOEiMxS~y|u|N$2D1A
zMrz;}(Rfpf;V8+N)It>^)Vm(ED+t>4muA7(kX^=g;If4EXM3s?55+m=VY}1g22_y1
zRr-_yw9r6{v1%Ey3T+|;fnB{LXb<)>ek%Zq3VROE*jlnIqtZL)0P?7#htCMOq6&t)
z?WLo`7qt&A(lB=HF8%e-XK)lLry+H)I8+l_i$z5i6Bo-z783gg#BHJ?Sh+`4cs(#J
zss;#E;BQder#?&bpn)=&m^|9LOUD-$nOY+lD0-Y60P3^Z`jKbzKiw+&RH?bX>^{2b
zKX%OTSmNB!jGbUQkdFaZ%hO&xp+W3~os^W#a@3@Qj3A0BWlc$fwXCjaeznwD+a$s^
z^R8WEmQ-}#)pPc27t>Sv{ZevU&*3!_UF4qnuY50@ZSlzp>r<F6mg8E*@tKU`91%}I
zs9uH~qn|ntA+LS~g-N`NbgU=KKmUc6NdjA+aQ0h>*x#a%{QoEj({Dv&V!Lby7+`|k
zctQ8ZG<aO7cFTq5;L;=LL<AY&ve%PIg9;MguPSqgYrtT|w%s~)KS3Ve8Q6$*AnA*|
z3sFP=S7T=a4`uf~@Udkpdu0h@-^upcy$NOCvt$|T*w<`fq>v@M?2$c;lAVy`CGjFl
zAw-(Uz7%<>{GUNJ!!*DD^Z7iVnd|d$zH`sH_dfUDbDwh@xANEMYv)+$Y^2p$p6Tzz
zzw=zyX7da17dtT{bB+A$8#YZd!aCUS`&HsaR<fkhC8%DpyE@O{#y?tkHEX&bv$i9W
z=rhkg;Ft5cNBjDLup)V+Ae80e_{DI^foTdjZu3`%J)@G)ByL~Z<)|m_C;5$1w=~bl
z+ZyrQWzF|orHe3)0-lHW-r;5XSCmWcwmBA@6Jzkzs_dhBu}Q|g(Mbfklk>f|BNu``
zw!X)=-_`)s^cZNn{Mb~5JdtAO21ND#-H3g3dz{+OQ#9}L)k#ZHcrTPrD{>bV-KC=c
zR78^}xv2DrpH5Y4p|ygkE)zdL*%%77x2G?V`SfBpweDNjSdNVBI;GNx^M~oz{W(Jp
z#PMd`0u?v@a)@(yGqFa-NN-A`EYMi{m}yc0E@8#UD^~Qb&bX;VQ8(m<j7x}cUYJdJ
z4R;oYMvp9R@!47LEuG$w8#qPT4xLdeoBbs%l-(%^IWYc;rOc(Tww~c@<+7P)Uu}&J
z^nQZ*8IZ?aDm37@uD~N+7&GJPXCYL5>PZ_n;9F5?)fOaVC#GPFI#cx`q3j>Bl<FT1
zLR2;{AWX%n@<sjUnEw+bp=8yUhj!+cg`9umnMcc}DnjV<PCujmwLawE&gaJW8Q^YL
zOa%4*@)CJpRH2qFBO`I)yE4?aWvf&-bT>piL)dw;xX{o@k5g;SlejLTZTXApf9uTX
zR>_;DXqW@6@`(@x62N8^FFP9_uWOFJb|_})HPA%HTWSH~ElF|c*;R%QsmXa5;}gA#
z1`CGdvt+wG@LKBfZNC(`7q?uvC^lbp`eGA;>bmS|%YWWH&uF*k!*v=7?v(C4Z6AcD
zk<LCnWmwLwk|Yp4oLkZi+y0iA1`e}w-ugH`LuQ#v8ie!MmfdyW*64?w(4+?2PNyUZ
zwxP&sPoux&z?^+!#ExW|p<c=F?nHrOeJeQBy>d^D?|?F{EX`Fzh%4?sYAb$WjWe1y
zCs0EXTKI(0xS)n*y>Q-QN%GyTdio!ng*gpg`~faZ6QP@r>f%k{Txz>)se!H~*-&`~
zILqgor5<rZbPVQF9|z`zdsMF2+S#VtN3kbz6MH^pwyoFTeG_t9I!dgPt$O151;Q`9
zg_H{1|At<LTXdhisyzXVmxHfYe^p8txiFdUd+}-aUSZGl=qyRl$eW-!>yLrzw_1nk
zpZM9jWX6U>aB()$Ix#SSq_5tc%wOr)<L9{WZmr_7dAE#B<*b`c4PH<_{ZulA?ch?g
zu=&=)-pd7upD!Fy%cjL_wo9T?4!Oy&HqXAz@L=`^<)q}`9UOX#Xvasv@$^0(-EoQV
zYYA}ut~stmgAN^`_z|)$@a|GiKvC_WV-lQx?6qrDEC)w&U}_-KD}~*PrVqQ*nV%WZ
zTZ{^2$J)O@OHl?|&5;n|`_G`?z1B5eQyotMIC%J(@A<uAaJyl1&0{bxQ4W3}vtHI%
z_DoO$G)4K_s+OOrLmo?7vwKdE!^792)FjJbF8@4j<3}sgPaO@_<YvIIS#g)dq6b3J
z1=6@V*TO{PmzJ4Y2Yga~v{7E<&meeLwk55uMnK-j5ra5aBj7N{dK+=ig0IB$(ztpz
zKc~BlzqZ*!{p4j8`7p(W9L~A2UTdDazR-5O$-%U`Mvg^IW-=Y>_ptI-&TZEpHJcf1
z(E|~V*%tpgl!m7dfnI(VD9+qN*Fhfw*0}=2Bs)iUEELhc4z4-c1E~pqIlPvQr;rY>
z;C_fzx4KAGq^fMLDy;0p$u=>^kHjoJ>XUzEF+_}-jNRy$>LZs?HAy-#+FmB-y$<u6
z8k<`cgvDxUYw30w={8GUo=Robnu2rnNq>|HTbE8YQ4_v2Ao#vi$>iKkfe!v8F%FIG
z$O)1@5!K2%4=;8}+nC!TGnL$Vd`~#(lHuHUwU#y2c-3?^wpRoj*vRqSBMSpJj4zrK
zwq1Vp<z}%Y!n54mhDDWkzI)YdMip26M3tk(ST4RE|9w}{#+hAOCNGvO?b@dP3+n~Y
zPX#kQ;;Jv>o)<q$<tMVKXK!wO*|r&NUr1LK*{Ro5KV*jBqhFk>)R-EH_v-oJcHfCF
ze9KJOsNZD|YO8T0(fm0#r|PZbyAcL8atmz4cM7FtErRo7*3~@_1v7{pDMN_!3!YQe
z21?{L_~W`8U<s;kfpH(gVu)@&-B|mf&dwg|xUexUBN?<hp*vePksHibaz4gfVeIW?
zUhXhz8W){)T|R}{klETqS6l?Qj_J*C-7Ys8^@=znnshS42LGbCH3TA9doWg#Ry@SO
zfj7<ZlDX>+thnEP3r`uh!z`bVNxveCigvNV@;fxH&q=-$mY#5Rxt2>V^pa4q2j6`>
zedU5)?~4I={SC?SgOAj5en|0%`X`$R=I$k4J>4>ExNy64T0QbpYAm<To&dPHlvRq0
zf`_6+vy;d>)PY)YwVgfL%*Z<@JCT%6jJQ`IY7m<8T0Uv~Z{p%CF5^Jg`l?)R<NA}d
zG^9BMyyNo|8fI{(2s*8@6%ltIiILBy4Qg%TQq)r};k68R@+K1(Yp&*%$&&U|<<Am)
zwN&JATa%)Im#VcquTyxqoF1t=73*-T*lJjqa&y|cyDdFg*UwkOIE3+ZeM6wf-)a;Z
zDm-TRWZ%uD8uE8ZT|d0-BFe!j&j~5pyn2^}r)fKba<(tkR7YsLhq3}1Oed>z?sLQ=
zFDKD*{}huqR&QDQzBxHRfre44aKm*~s+l^Uy*G<*V=%G_+A?QT5D&T9zO$_*QVx?0
z4O9ppE!)up5qDT<4@0k*R*+lTLLDaCI^j`^DMDOtL`4hdPA2Y#>bVQcdbeDThPmh6
zk5<2b$^o}{deC%QrUN!%@+Hkr{cU_qS#dcRB*0wU;f=&&7`4VnMqZW=9dpV7ri0FK
z$I@Ion(|=lXR41B*{;+=T4ES`t_^!UT!z;Z&eF>aR;RlaGwu=cJdb>OzC<GCHG)#k
zTIasxZKKs-&&T!}T}6J(j+;rX&t{7>y{91s%2#4kz7sJ}s+{8S>GRRxV>E-1wIaX*
zGYj~(9_i1=A7uSA#4y^fTAOy%mlRAT_OuF^^7sRO@gu|Kkx`-r2&N94({9-{qL}Y^
zIgzX7MN5KsRm=0(Icr6a=C-wHsq_mkj_ao9ms+)>zqyXY0()Gvsb5s&*AtWar-Qu7
z<VhWB8p9$!F8^mQXY){M{MOt<>I{!Bvbp2wp2Vt3r_Zz`s3F*C$P!X4tyl*{iJaX_
zJ@xVvGA8w45bB%rUd>r8F$9xJ-O48#r^p3*HWXQvq=$Wdp|Z_2{5skp&J^6t)~1#X
zhSdr30g_v)wAP?&{>}bQrt44l*n`-_a_h9%SX2=O@He9i1TaVPamr*7)`)v;0v|$@
zbPyMUn8Uk9L=5dX|Fh0EPJT$7<eF0u|Ao+N<S!}jyUH|RKT9!@Z84tLAYPU=M>4m%
z;TSni&vMx@sKMLJ0fb<zOl9w^sKVO@&1Qja`!h@8Ueg!y-4m9zjrA4!=)Q_v98yf~
zAU;iW2O{5@RMo{4)cE9PQ^)4aVtvPCZlskhD@)l-_Jhr<Lg{8a;a#ocEM>*f_#@wk
z>+*AJGx9|XSI(SKR=Z#RcV^g&;OLU7b2+%ubR*6)d!tX{<ZoM0`-l;IZp*Ax5Hi*0
z`0O+#(-<+UA6LiPOsPq9+HF0vgf~Nq!rJyM|Hd<G$#S*pis4gvP!m_zv6uz~aVi@@
zX?t;gO2HB@Lgf}CjmOEi6H{eu&8>U!YtCIJ96_xkq$MHgC2_XAuzsQ%iTUW~;wOBf
zcHK*7zCTWqk^dAZa7EH3b)n;ZQQ2w9Touy*9fzKoPQRKk+eP|}p#fX@<xeH0HnST&
z1*Fb(!_9UPjF;kD32zTiQ|VoWTIX{X-^j7VABr2wqbujQt`3dfj$}7<p2Lf}x2b|y
zQi{6AnAP=dL%!{lag+a)1h=?<0O?20q}7x4HwLI1zh}PMh$mV}*8TP>J<>fJyz@FQ
zXS_gmB5vd0q&cut0$j)wAg4pN321=t$omX%BcFsq$i0$lLJ)|fo1-@b0<3=6z&h8(
z)W8P7-@!USAa*NzI}+r>^*|1-sE+EnaUh}`@c+o0{HGis3G+2acQ-%)1akNt0s+f`
zxy}m;pBEJ4+LwVO6M?v6YgE`R@kR}Rk~?ry{FH(6D>Uy2lZ2e7z~2q*xC!Gm{o68y
z`JApGl~|4$RofKbKNp@%j%DL4i}pE8K52?=gDbIbaGbScJ-JTR`ze+E@=J}{ku1l>
z-ppI~KuR~h{ab1^r0Wm(ka2MT-mg8Nk4#-v0Q>7A*k2tYi^yMty!N$20s`+Ba6q)6
zwa;6=Fz!K|pP!yEv6q2B`M21Es=eu^kIyCmfyjW-gyNT2q?}(n0>}XO9`0Vq6z8Y}
zP_$odLe&5ct_|rtQu5C-DgkN8fUR={Dbyg4pApiS$Ym|^0t5F7z}^4h2idU?l4BpG
zcpDFNGY;agR~}Lh%SS2eyK(;Cfq~H)IP6J=MyUm6RR<JzR}ZUWwK$X~KKf_*$R?n_
zQxMh(%+Uq`jZ>n@e+T4|+L!+;fz;6|94hM49(F~oR;C5)0OLCDB@kdhR7DK|+S*4P
zBpq;faj~=U2HNknqol*iIB1?cFv%a3#rn(zKT-n(3OB?#ViG(-RMKG#%s)`Bdt;{{
zcooXJfG*b{a<pl+1RPR4yxhIrZQNb{Kq#O8Goje#hl*k&vuhU6h=_n}UgT&RtByV-
z96l9~0>CcQLJI>pEfl8?6m?ks-KvgvI?`VNWTy*EI0PIFNvIv;RnbA$`}t{g=fbhB
zhwK=S++PO22hO-T0Xq6^?+`$EA-ZsDJ2x8#S1T`P;iJ_!wC`H@VKx3>?*ayHjnBXW
z%K)G;{i+6%)Rcl&4eRUPb`T(OlcSsc&*OEdH3J>Wry9o2n=q-@wF7KY0}PsAG~PlQ
z8t<UvLk@aAHX_?Db|QLSx1W6nS;N>;G~%yLij5}5N`|%npy$0prHz-P(T-%n!lX+>
z59}=T3_7T+?~l<~`(p!IJ{?Rr`!wu0^o%E{xaR8PxL-TOhfWMkME|GQiRclzQHgOa
z$B8KMxiP^qPm*J`9D3AWR9ITuaoCX<z?gt=QS3nUS1VD01=GiYs4rS#qV?XyPD6h^
z50%zCbDW0yvK}VdyAtd)^cO5pY1e^W?EMGU!RWZ(G*Mo+zyt$*#STM1v5pE$Svn3w
zIlqnxM#)5hwV9wFKSqU#uO5e?97e_j6LrB3Lq8vg3d8w=2E%Y#5ED-+`G4@x+>)rg
zojr8kF()M^9P~YQ9J-eY6_<&NgW5`v4F;3X2@}+hh!U#-(LE`sP(J+s4MqA@Fi|6g
zuv5{U5~x%|0xVPvE(%QaG8lF``sO<-edz=iI;O37O!%fk?0EE@TvR+0@p1g`eO*kf
z$qno*^c_=F7Awhd){&hNOu!X7Dy;1ZecuHYs7Zzf{C}`m;xV!QwDLt|Nl>D(P*$Lr
z>NI76Y0mVgmGMsw7??BN_{sUbPDTTKdxW^mHHgzEaR@jp|DHW#S~jBs+J6H6RE5JO
d)4I&jB@+Sg@;TBu$__dWOq-RdfLSf*{{W)JaJB#d

diff --git a/python/lib/py4j-0.10.6-src.zip b/python/lib/py4j-0.10.6-src.zip
new file mode 100644
index 0000000000000000000000000000000000000000..2f8edcc0c0b886669460642aa650a1b642367439
GIT binary patch
literal 80352
zcmafZ1B@`;wq@J4ZM*wx+qP}nwr$(CZQC}!wmtuSnLBspB`;ISE+lnMcBQh<I(w}p
zF9i$&1@NzfO5;TGzi$5b2^0Vaz}|z!l3qm_5&&49T1O+|pKx)91^@(k1_S^ALHXxJ
z`G1E0>k0v&QGOsvq7S?-^UssrKY{S?;d*)&wieEMdUW<4|230^y}AYwy4HW6Nl6u=
zoaJ5-LR@-QR$A^vl6rDZB|J`!nwFAQGA2%Ke42Kgo=QPnnre2Al2#%n?2d5qNx#vg
zLS!W4-9uaZ{$8F;zKm8{9bO_C`oE>MI*Aq63km=LhxLD@WoTezWpC%`{QpU7MO`Xx
ziw&XoQw?Flz@#5B5@*v;0sWj>WS)p39@G;8D1<s@ZCbKeP_e7ZFYiEzNkpNQeFvmm
zHk9S-^m}H0PJ)tZ)Q(P~%#Gexo5$<L`k9*6PK%IL*?8?<k$C`hLt-I5(=z@TuDJ1b
zIJUWG7h1MaltSnA*yLx(v!!!0C~%)9h2LB!DKn)Xb<nk25T^(HVQx$9*zT3HZBnh8
zu`}Xw#qxv|HTo80Bh$^}4JyQq#QXl{;l!(PC-Lt|sCQqlHlla+!3fX179`i<!u{E#
zFRfK23Z-t5-ZI9gnV3mR#8sTUCA@y_*|Fsu%4npO`+RXh_HI5nl5P56Q6xi6zNoux
z4;2?>fR(qEen@k{AUt3$<M^S%sDN$w<s?r*q`3f9xrH4tSplM3vnmF|j3xNBG(>zi
z;{dihzth9IJM2krlN>HW3%ZU@f)zh#x}qz1I%iKBe-(P<jlXf^J<<vP)^W|_3WTZw
z0yGTUl1Kt(9{Xy_LK1<w?CWiyp1C4XyXe(tBN%G}1I9ZD5Od7?L}`5kD28tQZm{@S
z4`m9bWos^k!O1pqG!%MfHzH0c0vZ{;_D?;Jqo#`Nim-Ix4N2Ri=?f1CVj%1Sw&S<U
zUu6j7O-}T28(e`wz`6ndsy~f!f|QwHIhc`*T46Ph^qW6uPX#8iDv~0sJ6QCE;_DRY
zaco4K(2YDq9{BS!rbh7kG1f9U>UFCT?dhT0aD|MeuiGxy4R>IH0`W_ZMf4)j+w+$g
z&DpJ|DXf0EK#%-2Xw7m&$!gLmp+z{0+|j)9=N-Q-kysDt#|9;^#5;m^kYh*zh&4#|
z4df7CH$H;4FfyEB7=l^*M<v$rooJwh%!>|dV=3w8Kc42583#*=jtV%zL*ueu&U2Oo
z&)ds{ur9liY+7?~5wuIlOP?FV65|<W<i(goF}ONk`d5M(g(RNG@WvOML&EAENg{Ut
z{BL&8uxTF)vpI&@c~BhLHUQcJVGBxzi*f|}(}U&tLx^qgqR*f_GihvUlJeSt5&(xO
z&3(^!`rKR0{cR=$hjy#|3v;i?(c2A-9##(rcgCtD4Jsbis)SMNE14DZ4RW$^hRIVM
zczDw0OzmgdFT#@e$D!RjHFLybZo#{HBEuq5WgSQ{qr-1{ngsNbP~%XH?SL82r0GFe
z^U9W|fOd+=$dvJ0bZjkg18kI$+)5t>eTeC@3ZEeknNdgMEe>Yr-~<eLX3VX9(X05_
znPA$ZpAp)SN#*n2OdDAv>rnGgV#w^%xwuzI{{u3=pNbvv`QwzoNZN_kuvWq3t?@y$
z8MEXqc4>Y2dgo}^sFQb_jN``W;U}$r=9)Y3S(74`h~W{;EmYZKnY^q{u~4q{4VQf~
z79KOih$5^^(v5a|aY3h~Q(t8JFvuJMhg%~`X$_cd@<K+pw%Zlj{!H43B4<r{;|{9D
z({O7zuixETA|tN8<9L8)_HkEh^UA3LtB?U-0IJz6Be;+qIxcIeoA4A;rD)_$=N8c#
z(*uzl#WzQ4v>_nb7q3Pt3<WoLR-Re#Qhc#<>mWyq?5X=1Cj-+d>H*(Nq>;;*OGB2~
z6}$*}Tnw0Z$!e_-f+JI8IeHLiZWRdIV|J<#VaX-l9)ZP_)z(gWnlBX7X-&o)PRc&<
z<yhLeUmY6{)p4RMt8%_XXVm!K)-r$bu1ok_YV+8^`F1AOm$Jk>KOTvBXrbPmD>-L=
z`cZbtSy-iQOV||H$!Y_4TiFHsY&F^43qL=mKdxUEPMtU`d(E{95DdDFx%`kp<6>h6
z#S4|1euLb|Jb~|)-B;LQ%_k-gCC-eYUZluQgEXN2=B+z*FU4ld<*LphdGKbhOaGWD
zJ*gL?wLTtS-$re89X)Dv*)w3Y=s!~ySv#&?=eaJ+n$M`ks-Q7b-^}BK#+ELLM*q4?
zzks;u+q55ct(g4>p8kby<~YP%=coVxzPbPaDF22hBWnv2TW2Q|M^_Wae`CSF!0uo1
z-@xu#`y%P6HRZRq4`{)gA;nzTHCwqvc|~jGVl{f}!l+g2NTZH@q<}<RhFye;yHqS|
z^Veq&FuuK<_f(u$UFD^jVjs{Q-S_q&fPQ#tuMcI)Fy%C>!kx?S`GNk;m;nclK2jw~
zLSc`rMZB#`+SWu@jR>*IxhIv>J77;TLtm|dR+R}ET4PTt10Lz#=z$QCuf$}BB%@tC
zwatRA;3~_<N9&kq#!BNx67n|>2F&aaf+Te*<7ur#?_x}v1l{r2iA|ziF@?!<oTw@X
zygA^WtBCFd6a^40Wu-ASICoWqyGrsr<3L56NN4DQltd0FjOvPn#uTcna+8l&>8en1
z0+yUUz;*&d6itxsh^nKNH%F=JwVFeo={PWkG99)Ej#hIr`S{v^z7Coe;@DNGtt95y
z_)cN9NK%<N*{tHeRHBMe0!<|muq0y}28{Al^}4nH)rXY|&8_i_F;_yfjlrc6W7%2f
zSda9LWQVHK#`W*h)v=noXt#u=N8VqTJ#&Kn$2hVSm+t9EDAj4b%B@me)YtcPEaf|r
zAVQNrXmuHb5~+cK^$aV-wq8Kq+9Xq5*c!g^Ct1ZH5d)4@t!3}yw$mU_03x|lCOK&D
zS7elGMh=`_E?+O+c=&m_x^uR+o#1x0wP)KcJ`ayyVO3LaGtVo2-_v|<J#w<9cBl7W
ze+^xq(KK`M^QL{<Ir(yQu1XH24^6l{wO2-L$<>NtQ!>4}8q1?h5+aHZ`(SP9qpoAG
zp|pO?h_NlcHeZ^L?@WKo@Xbs#0MhQ~q!?NYyOrcBY0sUy6RXp(QPdlD0?Ed>-U7(u
z<Oa?&nd10JG>FB51^`qfQV+$imO-#`mYU4RnMoxcI>XkFPQ%(Bm#W;-?yH)njvkwS
zb9KgUYiobmy1<Br+ErpWz+~`gnu;rjt61b3pZrYT;mBZ;H891N0A;L5*Ysg6a#tO2
zRG74iQUQhb-3oZ$oCV<x5I)*ID>eW{%z_!8F=R9wi)_mqCoE8czP@cTC(Ly<jSkCX
zE4eTlu<<s4ho{;0HQF~4&P`%uQ2C(u`*WU^O;et>M2yp<Ol0?)<-;#5L7D~?dR{~y
zC&(z#Cqx6uj4{ovv}t@p{D|t{>y#DnqSW@i#M#x|%?EQ7L~}E!QUlDq)X4OqO7?x5
z9sCQHB{GA$z}DKWKFje3nu5Bv1+45-_3_2QJ*$^sIYjTKZZ~4;-}~~q*MQ$GjcOuX
zC^TyH{mNsx$Nb4$j6J{;SX>b6lrm~!&7X~;@V+H~ZXsNRS;Ha*t1VXq#P^*s%|wt$
z6jU%>%TLBW&Kt)Yd~#`+FzrrSDP9@Xsh-AgS8)uRU={?oBCedNqePLlNcqXYzi$i7
z%2}`ZKc<p}3pH3vb|ifOJ@R#E>@EncP;Tu#(PgSNT<9E#W>*dMSoZQ^s-Rwp5Kd-h
z-XWRydEzVVYG!ZK+8jO5_s~>wf+FX$_m29Zu+IP3U}LytElYKIfk-2acnT}62p3uw
zv+t9RQ|gpS$?JU7w*C>bke}#Rn1cG=88$+ygmnt{ei-`8s9^}}LbWE5gb4F~l-KwB
z<K<T^i(huGx0{64Ek+syxDhSVs4dBsz13*W);3IOh0~L@hyIR0rhVhEsG+dU$|W#N
zjX3c4mDb)Mk@CER;=!68jV%fRxPWCrLH-vp;)6%Rs#>CTSRa<462<G<LcODLoRzCt
z1j~-zR%tW=i2xmea+Nk0nL>Kdc<Z><>S(4K39~X>J1BqRJETL>by{2Hkqof4uyPm)
z24-Q)&~AJc56<Ea1ky8rEWQ+oItghd>Z?}XSFQG=bP#Vi7e;ErG}p2mu;LG@Dww*L
zCwP;<%9KE8qz+f3AyB#6w(TMnJ|Vz?d9)(SU(op?8ead#DMy{;eYsSc;1F#x*lJ!C
z2CR~rVu}{{HT-vnv;YQ6^+2Mb)9I^<aR%*KXWwPy#MQG-Y0?^L3D{o}lAomzR&EIS
zs=qV6l_#!^){-#Vr~BOW`iB-GP}Yl~Je2g7G8<sP*AzzDJU}^;1j_+-?I-r)iN&b*
z_(o_QRY6uPuE3fkvE#8g-NhwQo9V5P!|1Rdie8fAGqX(}(|<RN&6Rh}Eh9>oz?f7b
z17Iqfu6OUnRtXUWj{WcV6cbBNHwjTYWK4sh3atk=sFHQg>8KWmWO+&(HL#RK3nFJ6
z3(tj#_Yo-2Fa3HCuSEm&)Cg<dWe6w)QG01KtwUmefybKjVWb4~aq<mgpPaR=5*Ljc
zUiTO7`aVf&?*B+bcqARa);CFGC>My<uK!qzsIZNUCgi&W5Pzzt5zy4<kcnN*z92i$
zu}?VCLoHIvW7}6@xNg|CK>fi%W<j9-74nXv$RP53#%Y^^A_~XBv2U!!?cA{~%f=Js
zP3hTXBSNRbVBNT$xd5<aL`4bTMy^y7sNMwU9^kY^|5N;OI2*f3s3!TLm2nIs#^V{P
z8fnwnQ8OW{a1(3_51UqdjZgq)QOfFt9fIyLQK;3JNj!Yzc6zrT!ro6aR#%!%2e2o_
zY!N224t5D=YhEvnC6$l}d)C800oDuCg~60X;bHx^gbjx+z@8rDMy$_BN}-ECiiqI^
zlHf_fZ-WnCp8VW#e~oR~AQgK+R<HH2YJ}Tn`0<Yr9{smLf+c1NaaOF;=qw6^*SI!*
zxF=jND+I5b3-&rU8*vezuktYn&&Mr2(&Lv@QHUzP5^^lEhy5K33A=z9(4^+AmbZ-N
zsXOmO1A{?vkZxj9o;et&IML#9uyuB%#461rjs@T^HR?g+2ks^`3~AHqk?f$k*_DEY
znLbzAo@~I2g*Gtf)9le=q(5_Od<w8>HX0c?aqR$A)z<v170@O#`K5hoa6;{b{8qa1
zsOA`!sV%BkW&H`IIdX;b`F++TRPsvd#a&>b(S!ERUswrmB=4hb5?~Hkl0b;BN8)z0
zHg^Qs;|E>fZn)76JSB~DVuNgTmUfB#<Zl{+*kEu#NHaxG06qBeCY|#l1H3M>IqMeh
z6vxngU2+c`kU4|#I`@W-iepJl3|@sN7P8zX-Pnj;akhHD2ZWwKWKU96v#dWF^cItU
z%aS4PI>Gk_?AwvoAcbdU;83eb{q;5{LZ+Yez-lM#BO77aSw>%gSVP*LiSa=?`OhR6
z>Yx32=Jyj2=2ch8f%usJ%=l9_%I0$VyUwJ_TtFHV5AYsaL}f<AK-p9|M<6kJq`mx2
zl16fg`Tm)r(vV`tO*Cfk4k?}3V771YJ~jd9yjh20Cc19OPhBjM$18br3;pZL;&zoF
zX*TRRWF~BksC7l0lrGCwK4LI_48zSTgul)PTQ0SkL5h7CiXP%-zqT_<#4Bxr_>n%T
zb7qzq5c42X49xd&DK~U-tqf0iQ@q`hQLK|iRzYzP!`ts*_Gv<P!JdY=n;u(V>TSFZ
zH{BpjK;lBcWc4L<P{7;GbBwG_3PYO9b?t=D@p6p-%;lE5KEuP~54X$Q1&y6~P;x$u
zjAMB$Jz~JZ;*{&mw&~P8Dr%WsHjxcG%@>{l?r4WXN~BNrG=KdD5fR;-mGYN8(J3Gr
zt5B8|oMeIkW<ahaAKpSlN;-h%OgKeOhx`z#%r#Gy5Q&WUs(rHywdA_FG&jxZgkgi#
z<+#yxd1982`&#J?ufj!`3(!a>sEorBX$M%VcnsBgpR%0G_M0cSuf;tj*0v3B?XpXt
zG}z7a+yG4#ehaCMCx%&G9LInwL!GSXQk3jY&)B|SKZEyM4ed6!dF|xK!SDgXrIBo9
zT$|HJa><L#ZrXE++FYxB4JE&R_I11vby!lW5EKS^-JCP5J02xC#KeH{TM;;d060c5
z`BQO?3J0kuEJy~(7ch_&wW@{?iT4U&%tfaZVMm-wvcbY&w92PHAvcYW@OY?&+DEaz
zRLBYzdTzayCsiZLlzs}aq3;9z5DVc3++|%Y4HQsp3O*|=Ea1n?CCi)cM9P0L9}~wV
zgB%=VL^DF``(T>vLydtmJHo^b?$>XSxd-4@FB}`L(=h^|tk}qSaat#V(=3aGd`WDp
zy0f^*mHCrN0Ws>ML5{1X-EdZE7(sRG!86)6_!Cbtp+4iIeshH22(O&+#jA5)w4=V?
zyBT#9VR+shhEWFwY1d8Ak#Cqfwy9yiYP(7%>IY9t;gjw){ukOI<N%sg6jWaQkhfyE
zQkpM4OjoD0@EW!nDD<(O;*nR`LS`<YqCmqlsuk?eQvN%w0PybF@52!y+-gE)3<IdL
zl2UIArmSs1b4Un+e}=Z&rEaKaOaDc_&VIuP{S-?JK_=1RxHA5D$5z2yWEOLlPG%)w
zS{thjQF#Y|tyf~;Ug2<PPL^wd3dy$W_3|5TpAZ#8UqGlaS$udO=@a8>K1A&rr=9yx
zBsm7GZiWRfdF^V7YaB!)d?<HZc(F%$g^q8(7%*pTQ8YZQ<ch}|#(i7<SBAj*&^x1w
zt2YDOE~?0BNE@;@)+)T|Q9^~)fOW+t7ECXmNA}O9LUWhw`2jEIF>i{Oem8?C2WirE
z-_9scgUE^|FZ6M_n;rS}e#^bD;d4~B3jXFGye&AQSKut`<)c85S2i8nXB11FW%WU!
zt|b#aI^_B|26Bohn`1G&kfxc{->1sIgOeH^uiU+YoJ(k>C+#*C->J)Afj^JXH<qa=
zWvY@<Yeg0*j+BU{<@}Kj{v&9UXUxSTk)tTU3@}#8DP<YoYiC@9F7J+Xs{p(1CXgZj
ze0J<@cySbq)K!m+`WiJ3XjzG$D_M{W1P|_;YeL`hS@cj$E?c@`By;vyWo}4?>1e#~
zTz~$kh4P+q44JH6tVhl<%&#yU@7t2mi=3FL2%5R@+;yr$s%pz{lWkhmD1D^J&+ww8
z-I6|{nD->^BDl%8SZM`#NI~_}IGeHAy!@L<T&$Ov-1L{e;Q|u!Z$WR>wVmS^6>nGe
z3@cH_wsy`JZEuF&Tr7PzHLt*P{jASW1vXE8wMy5yfaF2L3{|X<kVOObHTkPaqtZNy
z_vhF-->Td8<`hOQ7sfNA<{5k8_!YBj_7H`zuvV;uW7hTrtL_|DYLr2KQc<USC!+as
zk)^6tig4CNr%l$(DDC)o`rdhfDBR{_hzVxVT;$!HfETNl0;Z>#5WK<CP&>D=BoA-g
zFUPrd_}TR|9w&o8?OW(<Mq+D)!uF1qfi(GRvfK5IrMmk%t!kcPiz!g~mC)FPJSp#j
zRaF?^HqAMP>yB~dn~3p+=Hh~_l$graf_`UqbS;y>9%st6<ln@-T_t8ZAy*Er=aao1
zjF?cY=t~2x2>lM|RTj6uJGEp(Lhp}dmhT`Jz+OIkLg+f=*N8J$NAG`k?&t0&Rz^nd
zPb|DWycx3q)@fAre`I=m-_s1n?e3(zt=;1Y`w+@PW+T39t&}UUZVfxE>mB{zw;Px4
zy;WQr6QfL#VYgzpi;5??@05ncs5S{rsao)2gM8KG&<BfL2Ed0yE%XQS+U5#C6|zfX
zX*PJ$ViHhmRVbbLu%K42F}^*{htf=Sl;@LLTaQdGgH>c0K~g^=9Z@yHYTqS)=T9Xs
zvQi3j^>TD0jxz0zmngwhHUj{aoV@J-bSKE2Z)4UL8$>)8-%+zil)oOgez9fGUzBh!
zB%(jAiHaXO9qZMibWBFJ4rd7?Gf6KY&T%)JR9RI9)RU>;2BEiy8mbh?YGxVJU&nl=
zU}A!j$0kk9*4EV4?pI8_nY*}oj%m_?WQW6OC3JU?C0Xq1vgktW6;e<2clxCDd|zJ^
z#0FFM8IhF*3Wx>pY>t=Q34QyCe4QVh?<XiUQ@z>R3Xit_kWO#N?1PSJk6+jBf}dA}
zRCKBqRnN8Kg?>HHebxtOauS0t>aYxYoeUMsbpa19Tc0Z)Dr*FelymGA%mF$n#|wwN
z=Y)g$@MQJ%^hVR0va^rXnHA+m*ds+*d_C+u-Rf4|UAumr?|g~7z_U@I^^IO!I%E!3
zGC?dBJW8|fCMcdH9X&d^U=MD)4>nz^V%q_(8n&o-)kD6-5^j$93@_nE?d~jm1#gG)
z3?J~+8#$P})Wy~PLInnu<mAoCp2}&Or}y>Ds4+!t)<2`43iKPp*L3-%q0nl&dpQD4
zVr)getD`+z5c($5Cs+&HJ$7viXN9o?zdj0fC+%UVAvxq78gZ=aIG1qh@c5g3@9%@6
zMk$G;^VXa-e&3)Hrl=>6<V^eM74pp)vvj?UHNGpoG}JYCXk;1NlXvCjOwameVyYJh
zwY$5*y@lS=V#>~IE<&={1?0W&q7cV3WY<L;YjDDalH=>MmHlZ?7wYpO$4TK0Us<66
zD!x|x=1%QiBAA$W_}eit8GG$AF-~5(S{+V-abENe)~KXw1K=B<%wv&uz+(qMwDjPz
zfWOB5r8Xu}Gx|QiF0hk2SmCY^(#&GUOkV36XK9|xrqcMr1SSV$v*Y{y*0Wx6AAP+|
zYQRKvJ{`IS1)7P6bL8uygFE_r=C;2Wlw-rkr-}^s<|WVDf>FdlmDCXkfHi^`{mIt-
z{FdgAav&!}m4{g34Hx|2u`q!gKY=Vik;FDBCq(=$e(Y2bUq*<4Z1xoI1@_?%CX^S#
zxr{KN-0kY2Yx<TCjyb#1LhBmBb#;d*br4;pA=Kwqm}AG6??E9ok`~8spHbA#d=x^I
zq;FVhMo5PC0W`(HWIue7paHn}js>Q$c~tyfThY__XGrb&DzM|)YA(yI%w+a+YdZ#(
z@9T%a?XiH8r~6tSsfJ5y=+MY_7{ptNMXx<f%%f5Evh#^&QGLb=<vSPhCDF0N_)+X@
z;6?fno_BO0?lZD~Jo~)esGKKeNHPlly4N|%eB1(AneDNLLz4Ktw6&J<S!MpyeEtz^
zWwPVOF(?cGZL^>`c!s@fSz99Rd1+-zb+gYoQ-13DCSr0(swB40%H{K6nKqsta;wwM
zB0BVw5CYXJE6ltzcbHf~h9aXS@;qy2AJgf!(k8&Uo{f?zQm3=AILmZECaOgR_A&T%
zLUzUH&$s64jjze4Ekn_{L{5SIZY+|niknyt-37dQ&~;kVCA+YOoBg)h*yUCMcf#%R
z=cv7gw^s(W_c<XuZkw9@=G2fvzMu<=w^=pwX4#5T1-hNP89)Gbfod{R5dWl)bk>iZ
ziE<om(WS|Oa^JfMDvjH18Z%P!i&oHi*pq_DYXGpI8UdO`U+jd(R9PDS-8;4-ZdCCr
zTTY#3ouo^5X%?sU*jDPgGnnID<dxE;GrpBcPha!5n6HP8+gXfW;t3hd6RT<`gLOE6
zkK)z1*B|;muQiLaY>Lhk>M-5fV}wPqe>P^Ddf8nqo-VGmwtJh(##=-2wlrO#6Yg&Q
zXQFv(Dt~n=+|(;lEmvhdpk1FN^ZZ{dvov^Z*ez-0uNQx?jM^68nb(v)MP^@Mz<=TY
zU)Go^*?s~87yuv<@*f59|FFh(HueVpM?3r<_~I7Tnb;);gx)hs(H*HyD#S~oU@A}b
zf)HRNpnze(8Y1!x>z=I5ifij7^dnGr#!*+_EL-&W^MZ!H_tUYXIR`p|6ijqC>T>Cy
zLm7swnLsy3Ev6x4U3(}0ZlsC>Mf35JK*_soBQQwYbz*V?fhIAOyLWvy#eSyBfC!k{
zLQbea)G7!S>UO&NQ25I=xrT(1Rg;O7Z9$x~lnRtl#ycCzlm^IhqG(EXD%Eq)df}-;
zCJZl81QuyJ=ZFf6vX6Wp=_ZBoznPoydpHOnapf$uB&S4a^{sFV2!;KBU;^-~PH3yw
z5a`hFJbR)1ed&Ze++FZVX?*Tn-e~p`c)Ue`?EwXE)cr$71Lkt4p&rZDleX#Y4rsOC
z0WzMM+gA)$lMVc!Dx82C$Foic;;UvKSBAy(Z3aCh>xj)P(jz0dig#Ch=T+=%LW)!C
zzKx8<(?xAi+)f9Kg)LulFs{vUj`ZUxN8qHei*lZ7u~*y|^$*40|N3H|5E9Gz*V=V5
z277Ko)JoWgMxY2GktT$~u9YPHu3-MYl9+POo~Yh?x5jdgTQCxLFc>hoq)`#SrCU>T
z3CMclF?zer-eH0IRn!ZDZ(B=w%2o3qbz}_fbPw_P+0)EccJ6Nm=QRIHZuQjGI8yfc
zM4w#c60a!Y%Z0ZniPg=*Wl|{OS0wzzk#gj3v^WcLmjIYGX7L$uo_MqUlIL(9>QOy+
zK>RATPvBdw4nXrC)YUyVOY|hqvA#wC#EiP-jB3%|f@$Je`mxXVdS%$9n}$YO+fr_h
za<QOARB(lsHF@|!Vcjq#Pw?!xv2ZIiB#U6vV1-@op#9Jaot5yJ+SpSyVk!n$3^arJ
zEqq=vKE;tS>dYV20oZ|_p}*tlpoeT<-5la8h;Q~UqDTQ_4$*Glzeq2{NCP2m3)>|6
zb{(!`9dazIA49j3u-VL=lssbJ=p);~<`wtDv0lzAiL&4mP?=!Pa*JneH`cqd`VJ4p
zC_WW1K`AT>a`LN@R@$#Kc7l{Idn~K?YCFyUNy78KbO5@-cM16rxWoQWef`Tcp!87`
zy!20fEg}E_ApX0)Of75;tSvnMBgy;^u7PW{ov1BQ1iv*sMs0jcbgg32c_NN;ohbnV
z3&I}WltV2kVk@UAorbQ+qu{UGobPzjjfb6%wJ^a#axa#=zV{iJ53tldT0%9@v@DpP
z?jDVwW0HxH5s?Q`)EfSJ)FyfT2~sS5`yo=~FJjAaP-ZZeOi_uUwPH;wnP2v8_2pxP
zbQ*E!jAA~C4NS5LdaR5QpyXh7>7h92w}X*a`a}|=nwsR<{Y(q3f__i%zD8Kd(HOak
z<;u{NN^6@chZZ1Rc1bOB(lR7_QVvklK^p&>7^IpJm3i^Nk{XJ7>K112PtB9pdy^)w
zce)vs9r=!X){ZKfM3RC<ExT`UYKOQ1>6$$Q+cOQIC;3n{d-eiX@NI!MM9UfB;Tl#D
ze-*E8{iwgYT?^`kq8Z4!Il#coxrUN19xVZrsaui7e~=Al9zH!X-SOK{MPLHj<5XIK
zT2Unjv!m0SO;NI+8~9U}IUrIz%y>m-U3Q^LKiA1z9PWZo)-AQu12XNXppo{HR<L|N
zK)~rML@;o@t-x|C!o93%s@-IVyqK$Rl#LeHh55evFGcqU-y<WmL;8F7t>z}K0*);O
zph^{MDEO(Q4C(>VF7bq9CbzLrOkms^mjX<sgy_9F0{3PhC95Fa5r+SW`51eg;8ujH
zTXiH>9JQmva{58$@p@&nb-`rk%mnSu^^UPFuA}Jk2SHU+sCt1nZO;JHqxEK^rXX)%
zVSfIzB1Ur{n3GP>p<U%x8)UAKOnL8l9ANFTSBF)G`*89z@H{P>pnTyD?JO|kl6C*$
zLXb&c`d?T%#wEQV{ES5PLAKLv0*_l%uXu5T-6AlRv4TZ=S7d+IoIW<%0?Y}{(azb7
zJWfJ~)gT0#8Y}AwbR`J#Ag47e%uF*Lv1>xXBTio1?-UsV(FjN;d3gwNq08wDPq^8T
zSc473#czC0?W;&Em1uQqxq}B;=wrRE4BZAN?>YuWDWph7?qCROlG=_S7c|)Zy@|I3
zE+ZCmxQK0mWr>u!stw<QK;mmZG_MuHpZWS*d*>+sc|IdN*G(4lD-R0!V<aR-v8FSb
zy$CaA&Gy2^*BS|#uwZ?YnsGGRyejMn9vm6=IN;>*laS3FKasDe7kB)7E*|@N6%9XU
z^mJ90I{Cy?h6E#%-fuD=8?<5GQlMfWi$6*KL*=_rc6;UH^|yBf_s@gdi%-_@&l9TG
zaUiZ<ou6Aru4@ZkhdTK=!$@#heSb-w$E3+p9&|H6Pm^qgAF9C*{P+=M6ZnzG@CBdJ
zjuV>&=b*HHLT)IJ{fR~9?|<{|+%}|ns=xvO0Eq$sVElV^urzQr&@-~Lwl*<xwy?8x
z`nPWTKP?j4Hg;PaNIy4v15SP7T5IB+p9cumXyc|kT~CQD;X9ycJAx%PoT}7Pyb7|1
z)GvK|(|01px;W&rTQu^(A~ueu9NCz>*@PwBM)bb!17sU4vUPClTMJd_zr(4rZClsb
zCQB+r>U76UQYPG?w@ozQhfkWvpbYhEUztO<B#22wvx;mI?a6|TL`LXWhOxA*O+{Rv
z)+}+PS`-I&v|5Jja_jCY^(041j3!5{S&^7}q#}Jw7dU5u=<`qyx{t{{-Wx{ruP?8h
zIl#Sv3J<_9)N@fC;=_hBzDH_ACk@4v(+}1Ih32hPsvY4(X!NNXy7ekpL&+k^!y)4y
zL|)NxCaKj>h?J4Bhcqo|p{(Z&Z`n0aa4#H3Q>5z!W1)wA)Csb}b5HAFY2Fl2B(#IE
zXk=@Oan0JlPYrw%Xi~aw#08;uc7U9vhiGqS+7E@}17Hm}b<hKk*>x)n4cdt-e#5RB
z$2@GONPYgX1<bQz_FB%MWIYE~%vhZdGjGJxtzvs~!-h2cgDDh+ApjLhE56aXG$xoY
zCb_7X=}`_9y<9^Zymp4@S9!Yd58O*e{oCl=IigZ`#g-GG9JDdD9*sa<r-xLj`cV|J
zY1AZT1R$^y>DM!=A#1pTZ}R&I59H@JL{mYItp|XR{`!DslA|*FWCU^1aZecY+9F+R
za03z3MIHM1+VA-K1tb!#z|?@9&BD~x%_^KGe}bv1F=PXIVWO-ng88R?^ntF2Y)^?u
z1ylz?R0T2z3?=G>YqLb~f$>WECj-*smfFJ$`6G>2jX>uXiWxZ+K6x@p8pLOw?v*x+
z`&ipdL<fC?5wS|V3D*g)4&!P`-A6?+vBwyWNOB`4Jb|M4vId#~0JWKqxEXn+u!)bD
z(AOApiLWgS*F{z^Jzp?jB>l`Hsfxun5;2~ukpd|pOX}k)?#$T#fI-B*0cP=~zi_A|
z7(B&mB4T5bQGKu|sxwL2lK@O70}<v9_(^Zef*cS|ktM0j0xk4izE*cXTDNCTg9AA0
z#{?9>0`{e&bH?Iegcw>|9tGtasyb<8qH>_GR4XT!h)9xO)w-enyzW0DaR9|N5DI!O
z&sz~X5y`SPkLV=S`QYNBw^d;!10@$ovQ&x-O}&?b>JmjHcSDrM=y7KQQb20+Ket7(
zjla{D)`Xr}f!p~Dk1Z)A_Ul%(03^>8ZzcsSMWzQrj5-C#R|N`xeq#dz%Ln94_6R?^
z6_j9iP6`0uxVi?*8SZrZN|^fZ&~;Tey<%Jw#G)96=@8j_>JUbVvkQvpjs7ABj4`23
zfVD(WqDTGJTxRJ-F8}BU{3C?$0^9#4G^DC_$C09yJ{|ufR(+L9eIrjC2<s8Sf|q4m
zb*C^01ox_t|LJjC2mIpkTW5>^Fb+x&bPUHE(%v<ccE#{?&baXf0w<{4Xc3P^Rb9FM
z{e@Zls}gx5=={uQ>VSoybxsasJ|7Ai*`YCY7))oTGFazTEg3%fB7!Lr)D*RVzhKWW
zsDo@C<aRqmNBE@qLKu@kfVsLu<JI<>`#tSzOav%TG+AF|0^r61Bv2++6o3khwjr6=
zu+t_A$(zGg2-56U)f~@v*PW2*Y58RN&dO9yhaRTSzq1jJTW&5t5@SI_&b>EYMwmZY
zk(Grx;IVvcJ166XQf;x>w_O99z11AhRxaRLRck!9K2S^G!M<!0VXuPCYM^c783*gY
z3`SPox=Pv!-4rX+@+2m3Xd}MwW`%>d3Md!Vwtg_Mg}54w*Pr3p3}y#nCS12~LxFY=
z4I7qv<qrv}9bZ+(lnW&t#6`u60-8c%6ky`R9R*6`Qhmy(0Y<HZ>XcO7^@OpI^d}5!
zw4!L#`vEczEn6u>JyACk(aGev`CJl|A)4oax*|C+Kwm_@3xq&+HCm2hO6&BVi0;se
zqCLNS5>)M!shC~kCjw?pseaUxk|1Sm_2XmyoYsFNah7bcqTPiVoA<jtafC8$<bWEk
zYDcHN{tMvRI=VBUYs{Bd7_c6cK*Q)wunm}`zxe!h{KuPyGJyY24U%}!nTtTlVvRL6
zfpozRq^973j+)BxdVl`M82=PwY3B@CuyH=4oaBg1uo5>5^%Og2iBza+o(ceM%ENcj
z)Kn2);Ssx;nb=vM$eEZK@XME+oK@P5o<GD;Tc2%l8>Vft!Zv>}2q8w(r`cvr#r;>k
zYar73#j=a9Z8Bc}yl6;N(NsdoaYZ*`2Wv2Xnh&Ku1TD?{?^=I_r#CW1hRsNJ-me%_
z1i2{}$kh=11&1a35xksSEO?xeF9;N>YkD3P&$s%{F~Q*%SErd!4saA#YP|poj=!dg
z+VRvRScT09M9{~FntlS6wJ>N4#nfFsE8ds>A@LJqX{G4j0|%X2aQNL$3!!{OYj`aG
zWiVsY76AL99E4C5wRp!@^R_^ctyHoW8T=Ul&ixl$kvrHqib9HNQ$olJ^n-!QHqmhu
z9+w}Gu2yD+2f7m-ed{m!dSPD-BZwpnze=h{WpEq;N*Zf*8g)<V#}OIUmg^n)G(R2)
zJYOq_T12ZT)_7hMj`hoS6DehWNOct!GFp-{L^c>L9x`GmbAmel5HX1C4(7tciiCf}
z>&UdHzv82O;fEjjt?j7VU$NOx^p+@Z7Y{%%9m-}filb^48B{`|LLJI&8OZsd9r_W0
zO(V*fYfK$uwB`D7%QgPixX0p;mKMt}{0XD@ZVS`}@BoHU$Tu6>U74&jY!mxw!H1)Z
zZtVP=xY>N11Ux+Z_EgJCcS?_t+EO`}x>N6M4zC>H#5b1`I9{M!`Rym`#W%zZtqou*
zY4pR-w!iyhuY_F@2FZ{*V$9v|y0tO}QiaSea<3n+%qkpkE%_L?F{+7!@MIW4E&A5k
zVcIbhC<b%N^#p3D3&<`>K}ju|0)Rc4)Axy}V+TtX)i;s(m4nwy$rqAxm5FA|4D<?S
z9^R;B+=K?k?RI{Q{Ok*_oSC1K<Z|$I-8wSTYp;u5XNz0Un7RNPBv;U<nUg6e`dJTK
zp02g*MWbKbSu-R%(bpSf{NjBEN^o`&xeaO#aMA4&PCnvnpbP-%Ky7K94SE1_%WJ7X
z`O`_>XKQqtHMVXm;H$wgtbtM`w}w4es~`EAXwq_1mGEWPABXYRpB33mwlG#kd#t+<
zdG-FnBF7>lx_s<g&<<!as<Ryj`8tp=_VaiS!7e7b_*gP9V8nC4f)X`@LPC$Ww?0LK
zp-nnENXx}_SOt}L>@`yOb-X82q^8`jIS^D8$?Miyq`e2SgQEAf{pvfiTk-c6t5zJP
zu-Y$E1u46oUs^b)YeaN;RAQ3?a8oUfzr8N$sn6zWhdRJL#Q`r`C+yeQSt0BymRf0D
zZySZjE5_K9y4TG!CG@_n)PDDmcSz~&_ysVda!mmiWP4KwqmFMsVlQRnLknmnj0qdS
zpSNevdvjO#XWc(mG?6p#!9I{f?8``R8+2V`a<p}!pvA$qY5IKQ39ij$=fj&p<mDGS
zb2$hxX_~TSs{E?cggibt#@xP6zTPaGpjZn7<?=#(^uow|CdE_i85?nbL@La?H?5K&
z4pk;X3~b_WBDJa$b-r$1vU_-TRu)=dWh$0&^yHRDvS=>5)D_s;-Nnk9=|+>tj|#)Q
zM&b2!8rk?9$|5&I%Ltwc2frt$KFA#0PPUASx;2yA8T}_!=B{9NlGb%uQaK7R4)S3|
zc_kD32`NNqnoKM%h8}5Z)-h=@q(X7AXR2Z=HlXd8>oU&^%9^w0NVb!nW#U|rcCs|S
z3s$UC&}hH9Y-;W1@O^lwL4SGUshhfAOqY4wY7erebY|Y%-Ob2D?UGIux%F*p0G<B4
z`Pl#2u2B3|blSOderIMAKcN~^b3N<UXAiU$yEp~7rM-CWY@Qd(KAq57fx22iL@qxI
zr<d*Cq60sFw#uH7x0`jyO6@GL5+%7MX>VS#1Zt;^8KweJAqgtvpq*K6X5<)C(?Jql
zE7Lr*aqqXCIeKqh>jVB|vRh-}qt;E?D|zK_b!RlCh%|cKwT|zw8$()rSngJ}VxvtP
z#+<+RG{xtm=bo>P>Nv{u5HG3m0o3Ry=%YF;{<Wh!o8PhNvm`4Mrw-UHeTWU2Zuyfg
zZ*c@3a8On(xU@QL9bWzCUe}M^i`Qt(iBTc0n7xmO=4^vXp~^8DIvI27Xn?#I^k!T!
zjuH&?mqZbRXN^Tw-$wLQ#_4LLwnO4J6b&XjZGHx)VNQ%k<h(EYM$sxzbG^N;j*rFj
zd<X1FMC(gEu#1R7L6GJfI56D)XqG~d3R~37`<<ey@kVBRxcJ35sp@*e;Pqg1tOXKz
zz1g|olWvG!JdmtbJ;zbo$iZ7p5zHXyKwy~bd^jx`mGaxNnK#~*)5g!1;Eo@uzbR$@
zaf?h4pBy1h>o+r?l36*?-qjf~=BFLwNqrFgh|<pQR5W9#J8k>NT(*6uKlx_c%~+G3
z3%&*~wd;2M(xyBIyo|l$<`0W4D;K;s6P7k!%&zvXm-FWt9b3hn-+xG)e_3mJYA4Ce
z<pBU_wE+KPt^Kby&CI~r#Ld9t-!0nzK$UK5+c<5BCjQLoF-qyF@Z*C{#WuNBQTfDE
zZl)Hsp;B`Fse{Hz6hcCSAON7q$a?sBxk~c@7N3;-D|I6)p+Um1w6wFd^zA$sjTg}~
z=4F^DbL8wxP#Z@7`h%R;ym|ei!KOnhshknMB#G*%7}3?P8E*ZhAXABy3q`p*u0^>!
zD$<e|!I6|Cl1;@(-B=P|Gv(4UzIeJ-zv4+9r5=l{5@|_D-4J~iB`J-hLFXQ9SumW{
zW>#2jTmYU@p<be-lAKg6U*jLjlP0yV`PBHNTA~6YB*UCwnnJNmqD7~|DRSzDM9GYY
zypCZ64oaW&CQ5n*cehTRT)nIeuwulDxaT&~^!?q}q6zK@s)!1H2pX?bE=;?2s6MMg
zb%fNiN_0?U-dd>|5=n$e{|JMjUczXuAtDjoDLq*mf5arEniAPA66Gb^NCNmA$Wn5_
zsnS=iEafNB_>NP|rq`6%b#i6F#)#{<U&cTy8S?bE&DybOyRA}vfU8ixcGM$5M%iMl
zXWB8vb>w8iWY4ue=5JEVX}+fa=zO&FD<RK$WR()e5=)=bL#cdI>cPkR7=m(M5b8OY
zfK<O6;ZnzCWC)Fk3hG>^A&nI1HqbkrG>0q0@o%ppwfP4e1ibRfJ;dChGTA+!io%&W
zf!Sq=$4REv393fQ01br3gIQdmkDAeo<M)?LA4OxK6#krwOByQsM+6l)lbS;1jsB&Q
zzH@CQ#Eequ5(PN+B@(Q+CIv$VOJ@os6iQlxAyP=@7^%8Ua!sXoNYB7NwEH~j2@-{{
zQb_4YaQ>FGD$~JiFXyB?oZ!|5?!4M9&peF-;>D{%F}|=u)yiOsV&tPrVmH_*vI@v1
zg3x!56aSFhigdY&HmJU96J5=)VddU|9GtOc4a&JN^bG}^l`$8s5QefN6#jt*_Pdpv
z;GdjKQ(n*;!DQDbSiZo%@YDMI+vCn1ixyq&KJSgbNHFr>l`!48*WZ`5Rfpe`h35+f
zPEOw5OL(xm_V1?+zu?ciy`@$kFAoRSV6{2lx1Z13ouRz2I{Mk7sj8?v-xn=zH({@y
zpA!bG-0Wxi+Jd1Ag}UF5!>4w?kfU=|+vMGai;JkD-MEE(zh|_!YPT>tzGgZ<-ybrp
zFLy`GXF6HA&z^&ckw1(*&mvh{@O8BcyNW6UpD&sz*R|$#_j!>&l&Y2AfXz!(Du79G
zp{b4Zk&{$YPWEA8o3{G7{TugY`Dufbx)d-Fn-}0b86-x3IxfNi5s_mt*30Lg`WJem
zkn`&&AB|;V{a74n#fS-`-l4QTRzz-b%l2oMB9Jzu(*ZqOSrM>-0QXHJt$IluOEXRf
z<sl~2>`+lum)TL1&%bYxlfQ=4!YdXfS_SpxiH4-FZ4t>UUC581j{0G8#pQ6dT=;Qs
zk^6ov3Lol{KCaG|Wscb}-P6OS36nmd1bSUjJ4;^AAQFMem<DN7S6I5F`ed>GgqDkQ
zz>h-~x2;V708BJCezX=6qOxr9DU_p)ItkK%gh7s)17OTVS*M7xyD_+9o+H5Ld{{vC
zs|k7G0LuF@p`vM}8Wt9?k|6-Bp)*u_NU;d07PD#(hxeldddY{1(w|s1@|&}9c(^$7
z9xDTT0w1RwJNF|we;{DRVxZ3eP={53r2nFaCt9qy<_DYjPkgk%9#zO$BF!wm0J{3i
zXUs)?F}J8lRJ6VjMf8izApM~)C<>Y|xUKYK`73tz?G_I2=}%LrriQ+jmZu>;>YbUo
z>L#IXuTlvC0AJn+6Rm;h?p6ubtp#OC=`59Sg5z$Q3^7pF#R2+B*&<8-H;Asn!X8oQ
zxenY1gs1dYabnrnw5mAr5yU36ItbGrGnq00n;LK`?vaK>t<V#eT0oo7UrZFu)0BX(
zNEjwS`swD}c-`ZTWX0M4F)+{nq`x&^^~=-V0SlJSlkX)==XE?{6IG5YxACA2M4~7{
z7djzZe>$Sly`sMh<hc&^`)W228*&E%J&&+Vqb`b5Mc)!x8(iQMkeh(l3+tw>i`61Q
z*9TB<5~w*$0aKrj6itE+Ebo!QSb_%+M(_&9(#O;BfZG+61TIB_v=gWX%LnGzAj}4V
zD&Qms2W#DT+t1<KQfL@R(F(OfE@>!jA24~fkIf-2zbVD!j~K<*)sNW|do1Z4yHY*>
zU6FoAh^A@V-%#nx$E&5eYro}Cc)(#AlNOQGS+uk})T7WsM#oRA)aSZ6UT9jKH~m>l
zO;ff%0#FBP{8+a{Rk%fBRr=(RJV;}jKm;GMXq3_l{NL`Clof~8jBoB;(Y8h|YbzJT
z{3(O9$;h6gfaKs>DNf<CA^XLR(<>yoBl!vUn$g;?h?WqZ2#ERg@xA;GQuzhVVqD`3
zhas51{QBX{d+|W}is2aQBZu9p5A|{^_}x_S+x5}`8qmrd@LeS4?Ujcl+#$l<LDgP~
zEHf(0og*4=?MI<??*KXRqDds8;`Zk6a77=~mNije^U;$Cgu(4?<Or&s=>O_l#}?if
zH1xi@yfAaJ4d}37YxP(*IfNX6N0rXEe#ACI^<pwb_GKz;j?M4<ee!KZJGeUy)qnzy
zV;>$mIS2(2lt3V#Ze!8mJHP_Ju}l*DDP6Hax(>OAHlcp5mMRjKRJnrQ(0%@)9j7c0
z4pI#ei7h9}19J5sRQ#8qM)VrQL2w-`$T4fKI;F;5!ohd_^pipuP2=GU#Qu>5PzBvr
z*`U{`>lx;xAmzy|UAI0B`LNiN0WJlyO1b(xL6qce*a8Wn_J-m#vN!3M+WSXN4l6tW
z_sIxNM1(OzulZS^hRDD87$aJLKy1ch?Jx)`diw`Q<3rOv)$4Ow8-OOT*5R0x*DTGV
zCVLauCPQdY8tP>bTM%)_l2jD@0dG;M6dIMEbM^FFw1?URXAb51`@f6$kUX-P+rtQi
zQ@<bv_Mb5n-%{K^*6e>h)V4lo*RE7NtL&?X$g3LN1Html6@gNLs{xr5-(qs${!$-2
zkKM>6y2Dr-q&_YsQMS+VT(e1;uGUS5yl-L)E*MtDEEZow)=YVipgFw05a6NpDO~#T
zK=#E<n`l??37+m5EcO{B>{;yzBUJGmGZLO6(ZfkIRF@enlQRS`_0Q?5tUs>5jlS-)
zLCYLd<<LZL0!535ePS3xXVQZtp)_B!FEbs3pB#L`W5*-;l{Q4y!@W?}VyGK-U^y3)
zVO*91Sin<OT3=nUD~k-kjwk~)OC9-Dj(79npjBO?jDw}CeT8q|iZ@u5qZ`gpQUcje
zpQBzG4r@8gC$t?&Us^1hn{52!S%;VI4OD6m+tM}Fo4YCR_g<V;qS&Qv2CrK#lO!PV
zzX>@M43+p6H(HNS351NF`T>lUAB0R#>}2Moz%iRVjh6`(X29A(tq%wKyMY<Qncaz$
zCJc^s9ADD0hLwl-`j(0=QfOM6P=-T|Li{9=s=7##j3?KBM_RAGPJ%_vupY&3Tn;0$
zdHgL{12t?}9VMSVJ<O>!06;3J0DWe!gm5~4?Wt*V;Huf0Ne_Um26YPJRTkMY92ywM
zKjp^F44rbke`KnU)u|dO$I3C<;@FP?hR6`VKQf$x#DZxf(eFbMzrk`;>TvO32faj1
zrBfn!0^Kq^BfW(72p1CpLvyy`v45AGYqCu-)K;KsRyl8M(jaP9@STtQA)7jyVMG^#
z!8^0aQ>`px!f3)0>VYE7?YzdydI;Vo35y9S<;$joVK*`kI)}K*NwEZQkBZJ4-m_Vs
z3Uy7e7TjzbHqxvof%|ISRY3Wo3SO?Efx>~w3@EB9u9Hq^ANgtamURx&a<DB;+lZ=;
zSe$7)D~_zq){HAUi|b68hKETeYA|*-^Oy`?zK`hHqOb;8oYP2qv>+I$t9;VztpKu;
zWTZ(nlXzN#!p=z5S(*|m#LHmM?L;?&i`I|UygZK`#jmE!DNEl?chq>`Lck`oLVH6I
zdI&U?;FkXR5E+_c^QoksE9j^^>=A<9r1)j1Q_?(}IN9v}2GBtOE{GOof>ahM?Im>W
z>;?t9dsG!7YWGDBWrKs4)lXNzZbY7eH6o|0S`VbUZ~G(=w79BdE!QS$(*atenHC>0
zj>O|N1X2+k?HZ0sQOl_Wxop;15vmAMy<VFeXzR4f)*qaO#Csc94fv|wr3&iaFOgXA
zY7BO$h~5V9Hoz5maU@LJWf+b2cXe4fRr$Qkh9&z*_!83AaeG9uOtc}Q*pUV>3YH$F
zfqIE-Wu+IPB_ib1&&%oQg~HRvVbzZ*Rw=NNL+++Q$VV9)R4F~MMjbkeS}ZC-ZYS*q
zKwvnI8LZi-joj=$PA}>11zEjpaxcAp3lc9F5bh2;YYdE1n3{v!-|3d%oTEQb3-*w-
z8thqu?orsqg77V%ObvBH_cmg;v3+XsmWo$NrTSrVfg|IBQ)q^JAjYv_H}O#bj~e+2
zKs0f<t+MJ$N6H;w)dV(d-y<R{don`O1ktS5hj@`8zMy=y<6&N~Q!_Rt#0k#HmfG^n
z-H^f&o7gi+D9sWU!+wR_7l0O9Btw+lC`nh$y_w$vZXrRy5(OPVhu{c)a}{CBc4UNk
ztt8h){ZM*>RHl{3SU~?=-H+VLMfj>@kBrfFXR%{P3X>eK5phZS9;(cn;%UUjSW;6g
z$JAmKj}vN>2kO;0OTDwvg^J-(@0D+>GofIaQ96fHT`_ZquO7qhF*2ZXgjrRjWKu&a
zv^>r0A~L8-wYSX@5cg)o)wlO0;6Y*(@2)}j9PN+?|8Svn3>)_vUetn@)4?|(tPM1+
zhsV4XJl{dvn4M~8>PWlADm$~(L`B|lqe5l2%(6QZoby$X!h`@`z>T=N&Q@u)`EoU3
zJvc=D6XW5N)7-yxoP2B0o2N8?N+|EIO=N~``yCMalE9K%ZUx&YKVPLAy?%T<OIGiI
zC`=IvH1zH>Wk($}Vc;wp+6B(R>0rYxcCvMl*dZJc<{z0^|M`q5=bmAqY*z&q78miz
z2j#f&x{8m`P*#p=tM(23v`W~49KdN%lsSpeDsQNnAaI+8D}g)G%A~h>Ra5<OLaL13
z;fl{_R45*vtp5c}K(oK4h{un8hOD(7V_LAJte64{XO7NQn9z%d7jRlQ9^&x?v&-?Q
zksy|Aj}cKi!~hteD51u%2<m9J!L6799o^jker4u@qw%9t?43<+o<J%$=g@XR$$3xr
z4tC9GS9n~Q8qVP+9PNZ8a?rVFaUv&m^V!A37Mpd9@H%=JBbTfRI|9<^iuVm6KRk$@
zwPDX)9C+*Sr=NaeC`F*-msJda@=BH2s+cIfKPVi&{yzM{>r4Hz)Bs%u)l=T$qjMf4
z2Irx???)idJ|`>m=q=@qBKT$yEdCHn5e=a6d8c;FBI1MA_EZa`#>tdJJ?A7{L60J=
zle&uAm8yyul892L0t&+|w3v#KYd#YC7_{O%T2mbPr&&ySM%PMatAx)1jpVAzn47~H
z1^`{sGeIU}Z%Dw8$u@RB-FS@L>=__k%`(Y94dM56{BuV-d!ex&3rcAx%U{EK&F#pS
zC;ijo?VFMO>Q$fiB6n^@IvK)<w{L2jl8)2gxd&Mr;6WRbW<3}ZM%43*=&XPCqJL#Z
z;{paBsnlDPo%j16?!jznrqxSl1yWP%!~+NhK-7GNlfJt{Q=M%G613fKY@^m_n?2Qg
z6trv5?hYpX(>3=lvaTS88fR+mU1ngNwFQP2IwC4N*BaOM!1v%{!Z4+<IKd-~)kLne
zLjpJodEwIRxMOB6tE}*x+C-*0anB$MCYhYl+1@{HEduL)ACjA!^PV|0VJTca-3&t~
z`n;t}*AMh2yZ^{p2VX_lzC;@<$b98`Mzs#}EK54A-{&d@|AVfgCcN#k!-0IQ2l+qf
zK)&cQzKOfobQRcQsbG;cd<8$-8IdJ0Of~(9G8WH0aOKFf9xfjogz+lGh{}lmfm&Ah
zLNdOz2Oi1LB(lOIhqSE(bQ3N!;YBdBjx}$L6}7$rwEz}7z|t7i%rx+xGF$at6UjC5
z-{o<bjNjE$?xxStqwWF#+E5UboGGjkITP{7)=c5cc5#nU8AE|}Vg!t`S)dzj8;9x(
z-YK+nO51HEDwd8NOKNA<4yBc~<I_|GE3(^os*B5xY<&te69;xn#B8V;YWL0;cDT?y
z>}pSg9$@_8x*lJK69Jmp8i21%c%z5bNmt*@>|v*R=C0}M^8J>PJGDu!*9ROY{*Aoa
z^)6)i&|`UPox!JL`Mj4*7|ni3A4H4Axt|zV9Ss?5#Y_!XThm|LQ4z~kS^1Mu)B1*j
z+x1k$9RK)uy9}wi1#V1V{0xWdTwUXF!?--rVqVOGx<EC1_kE~iFxa*Tu4wpDS0Zm^
zm_WiD63gT<odj+x0zK3%o<OPW<~6t>G{2BF5wjcE;+C}89@W=X-8N0hc}pX0n^dx&
zWJRIMJWHpv11g*BZ*xw*0nuk{t__n_=BJGKFWyUb+WI@bH92cZXGgh-@fleDqYEgD
zBD+J4;J&zbW69Hp>3Se8TwgrKF(T@tkqIBo1!%|IC{8^R$9&^iR+U=jw3ET<wWOAM
z3X^OOq0ViwTLw#)I<vx<;i_1Bl~g@?$U4$1hA6nZS+04#UTPcg)i5FkT55NZ0=V&6
zMki!CwaGBIX97OCZ@vBIv|!7U?#JLFq}IP>MjG@F#K4ij>ouB>-80i5@aIfWgNd-A
zzHspRdKg_Dzq|UnsiWISsLz_Zcg-Xwe&`_p70B)PRt6W*>B;%4{uRMC+S%LZ1P4)<
zxEc)UfHer9VOv&A;;~zcW=G7sZYlM_T|8l5DPs`&IK&LMU{D%7OT>qB7IxzVE10ZV
z1UpiL9=am#E;1Pd1E{ArYrXS8m_h<|bNvJ9FXej7dLxyYL^G~j*^;3ip5`&)411Tp
zNGU>ji12mOOQI~zW-|zCPxW`!H9Xkol;ysAGWXt*;W=@DjQ>PHh*J@azl-DVx@PpQ
z9kyd;eX>xK8}E{-o%=3nb@K*I4?E~BS-XPitb&}bc@icnzEyZN!2EduDzn7~((6rA
zo-`m|3M%7PI&?`f&j7<B4i9T>3mpgbO(tBNYa7R@x7mF6?n5=KfbGof-PfFp5uNW7
z=w8OWP=>~t52v03eP+)+;eKV6>7(eDTHP%CSvPl=_QU=C{ii0i!|+)uYbG^6;=Q$m
zK>fGDvzp`3Zw#*+u8!||8RYOOrxPcC@xY6Y)b!UG5^wx%+}Ete*5e$fhhp4rH=&#L
zTtR!t2S?0m+BRuz3!Zegfk)|rW<Gl>u|AJpyh!zFPjDYaMLEu{Q=Zef%oDk0$7pT-
zn4h9;;!yP0I`lPGeK&=mW4g7{Tjz@{1)gE5p|6V6t3$<shod~OjYGK1jZO#fK99!~
z3uKPj%vFlju%?-7ogC0+&7S^dPrl9Kq*8^>CUK;)c2t5#Xaj70{4&ehbZWuXAI{eB
zkp5tX_xlA-1RjE+G8`q4@KlG%A)hiN7VsF4tiU5dG?`0`j>HE3r5HTAY7%pZE*UqZ
z3L(?+Ghvg}clpQ)e>ul<)Q{T@IAIOKJFP-xM_RKnN?c2@?eRN3Dxlz!B8S!iKd|SV
z*lzHQ5Kz}AmMdLp(2UpfKr_xRIyLU~<HkW7XQ5b{Fg^L|uKl|}v91hoLl87hWMQf?
zBP21hH9%WRZAzgBX=z`C3c#r=ZF;qVzRX{T321S)Bg}qy%|!9E?PY+?`s3^V>E+HV
zQ6z_JcJbm(zjtpn5`Tb$us(jU?Wz?1Y*bsd!FImb(An2U+k5su$hH3FdsviZHx&#i
z__tDP@APOec#u|M(EoRu)O9&kv^%uk8?TWuU(rCo^J<%$%=oiWAT~YRX<#}bO1aG~
z-(r(q2=Gpen4X6;0j7)&?#{Nr{`}TT(#TWl(X>7r_k`A3RCOQgvDJiC{rk7qaJ2X-
z>O&VAgdZVAEkp{dAfkc-MgQI2w>GzNBnf`^ujqj`!hjcor3vrF26yOPnxbWnMN&&r
z_Bb>Opb0d|wm<+z1EP68|M#nWRb{;zASKzJ-9gM)BvF<1%F4=jBC&dp@I-T>U75bt
zRDrx?2WT<q**46(46n-Nh52>J>BE=~MQn^y_V2_DU?M%%fP$5WHkDQoZLiW#1!E03
z4S=~|TX#XXtDwD`vHf#oc}gxl==e~vm!aFdY@|@2G2&=^`XnXd=!le<lvQ->TUpl%
zM}{K-JH@1$8qNXG7zVMTuM?rbsmH$YqRE5U0(9|4ebNFSZ_ry%c*@(^{FVeAj4hG;
zeMbedE?^d==Q-RH7QI=yW3xomH#gO!E|J-0x`pY0#^$2aV{RSK`)vFur?<cvUsOO1
zimnowDHq739^=$8qS^=-(|TO5nE7#jCA<knW)wZ8xUH%jO03QrZKKqQN$v(sRnU$-
zQo=@(RxNN6()sOFeDXEPxTFfjrVR%m*%jaqh-_w*dNmx{G;HMR1KwZ;%1yuc44l2p
zKcBJNmr^IK7iJFrl)$BVdf6LTuwi%C+q4_lFa_t-=Qp5PKqfN$;S>GRC439T57B?|
zyrG6*)x*?g=8r@0?i~JP1H{@Ohwh5oK&FI@3bK8_9o!D;nL77`d^u5P=vKkUh{0<+
z>AfD<5V#Nm&UPCJVVGP9=yI|R6cwxI04&!t?5lcV9N-YMVXiO{8T{B)=wUqq`yk{y
z6a!fmBTS4a;RD&U*In=N!6~{+7&_8zzJ(G{AkK(U+#kjagaIBKFDrTQiQ0WQt((>6
z`x?yto)?x~O5q=k*4`Za&3#mSFHX5*SS4JqmDrmb3K42-2&oxa5kizi186^jEzQW}
ztl_#D#7F7P#?03H-8PTC@{^rkziCBIyDI}oX4lQobK5A4@X5A9k3@~g&bBj4Ow?aU
z(o|*fmtQ!S0H{0HmQaIn-!I{YOWGBANa(;YEr1ISrZ36X3>rJs=FlrUpIz2h;L%VP
zBW39xwcvGh9DPX=wHsvJ?4zgfuFkgEFO**f=?gq~_$_V<sp0q!lp|FFyDQ+Vhv2T2
z0?{;6=l~(TtlO(*cm_9y`(54*jWB2W1OJoI^l23(_Eu;lTm^hYEldk1!@s1;GRzF8
zslk9w%m#z%nkC+zm5RsXGY~F@Ba1&>lJgpHr^V>(?8QDUXN2;e*Lg%r`s;bKvbxI=
z#q^SAJ<kcP+o%}xx_3!v>XCMcfpiU<&rZ9Bxm(qj6xO^V<s_g-fCA`3ZumI}zY<pp
zwMlWn@`mm4vP|lso^f_AvoOrr8jt0$i(*?5KSqEuW65=mJTqMaj>?9eN(r?P+|^lu
zScP%F^ZDELg4~80jMlxgS~vn(qbV6wgctr63FB(LlzVv85_!uYEikhNb5!*h4Hhu0
z3WkKwdI2{J$Ga=8YqX<~CbJPGp>kzH+tpjwCYo7OOfh)((i}9(VCh}OTNy<RyQ6Zc
z059&;+4{qUT)j9&-QnvRh$2?tiKasIz@H4AlD~P}FpL6(k0d_F<OB_%o?#^$-mZlU
z%Rb@oR*QV(nHgXVBO0fMS{f<4FvJ;M_dx0ZsJ{5qtl+fy6uqcg_&8xmz|eK3^)$}*
zt#-yy#psX(pI4F<iBC-hJ$l0{S?lbsS(yi%GuxaS%6McZUN_eMCa~F;mLdC6za0f7
zLuKc$SMY$4mK>BkP0GHn)~IOvj)d3|jW<-%!$rAFH|3-XG6&$4U^g9QjW;v%Wau#F
z3a*Zgns4e!#d$!zk(A?0*K=;PiU+hbYcM@P5WWk+Q^EH_y0B0Wo#M^%t&nEkUlva=
zZn)uCZVZtEPGyX4IOXL^Y5w5j=(JehS95bU{FK1ferF>l>_P^~w-Y%dd6;(Pi3+$i
z3M;bVN3NeG@X*!%l4wM<W>*ofR3j!3yB=Zj8@h{Snz>&BA=qIi2!Zam09FRC0CMK5
zP%=}7<X!KKE%TU5BKaHekp9pghm8#Lvj_TQ35gPJ(Zxt9`jB8HV`eboBz;!ZOPIO4
zIXV0vh|~OCc{F<<JDLfi(D#b$y40kQ@B_=$P~u}f-0E69d;ktPYX_-X2{c3$2iyT@
zUjx|{V@uGRSmG0*U&|UaQhDnMC!yN;Xk;!9Bq?5V>I0Vm!%o3VfN=qVk!1Xa6@g9L
zgJ<^a3TB9}V;bBNoZn}4bDd(aG#hak@Or_LK)%o$>R%oXDYb6zo+Gto7Vq=M0~GgW
zf7*EB2|xIssEt5@gtIKk)YUO0m`m?UZ~bNl;f#VuGNz5Ojo(f;8yLi}Ka^D<V$t=k
zvkA~<Nbe(l&_?wUKDkKH5Lr?49CK>fyZUdkq#fdoP%`rTr94QWPl7Z{rD2|pP-J%4
z0`Ster|{-?<5XZCn5mXc-NLX${MyH2UhHKw-`bxuO*CQA&4(#M@*(*Ypo-o-0LQG&
zuUn{56eE?3BiAXEZP*T+$zU)DJD)r|DNfN^=?bb8U~Gc)0c6Ichav_UfWiS<9`0iI
zKIya_3WPv{{MlaPZJ4%a>zkW9tghs<X^~Djs=p(x$}z?svEmuu34{B)PqQ63O=&yS
z;Sh-Ea?{{LOxa=8-r%If(V*s+i>RBg=5&U<=j(Dg*{}iqLJZ*UDumizmxVddLh!fF
zi*Pq$i&|IBCVTj+-pEp{DcALEwfp#Kx`!TE+eNz2tGDH5F3WL+#U(tK646Q|oJsVC
z42D<ryK0Cz>9xr^m|R<4oM<5uN)R3@4j9QCrz9?-)&s~g3OYceHN6v!fnymw)3+F~
zOBWIh+xWOCFG<&;VF9}%nyLBh@ub51Fb<mX2nH9E$1XP5Y4%8|WeX@M#th-yfT#ae
zkVg~d!N#NtXT&-bR1Z2i*<T+d=cL(^_IJEC57|wzLy<S0ie9Yjwmu$lI+#<C{h)OK
zj)3q^XC(RP$u}}=<0=}_@m(0!xVSEv_M(mvy#_B!h(Wtpue7|tRKLk@n>r=QbOm0<
z4eVfDav&$=I<&mh;NzGj+KmulICMf9jI9o~lNF-35cA?rJ1qGitFHudamsb#cgV?j
zCml0g;i7gd$hTFsP+1^FY1P<hoPevm8&6f%)6hnsR|&iY^G<(KOa+~bPiAyU4SyvJ
z!4}adI+3-^&lZNvhZ+w~eA~gG?}m}|P#~-%&M;+NP~V$JC^q<ceGn}fo%vrzWB5;B
zqpbTec4*RAs(w~$K<b{HVf@D&QyjM(wDHM_<}vGBH}d3Q#Vnij$_=k|eyJH`)PUhN
z(7pLjZ5(uBq^$CfQW)=&6}_w8`;5~Yzy5)jAAG=fA)$Vju$CqJITh161`<VK^~!po
z^wTM&vKmq2xy#e=r(Nr1dwvQExx?dg@v}aB2DCm*09T}XKi@k)_<rvPVZAp1h)}}6
zVV)zTe|XxnH);YOn$FR|(TQ1&@6Wus{j-A?u^V)<n@V(%{J!(|S@rjsqGRyMyFJS1
z!@EDKu{}3P_#1gbR;!Se^jTA&i5m{gdHCvjtCkjv*PiV?P#Y5+w#VGNi)V<+Liv91
zULG30>1P~m6A(%1rq=49F_WJZCE|X;9(iL$yjRTGB<OQLP0V_UF}#Fho93RC7Ba$D
zbQo&?IVEFK%4kJwM(SHGS`^`FjOX<`!0kZcmxykab&Bh!El3cY5Qe#;=E+5XiWZ4w
z6-fc0a;jNtx1O=^Riq2sK|2dY-6ZXoULxTe-}U06l9VT$jC5LGR>Y05!d#NeqDELU
z9IR?*ypdybWS?87DSzTt1lND7dpU^qyB8ykj!^f$2v`A^V^d*LNJ^3jB~;Q?I}PHZ
z<xFGDi6)g;k;5+i=*Z>aOplO^<&MRSi+Wa?Uv!0t0Qmx(!$iX@#Hvg*D3pmjc;R6m
zWt=I<jgll$%m@uNRlugtiT^29mK*!dELs$$Sc;LAGZQLxo-s)=BZ~bR>2)+!w4O@7
zcjF@NUYchMecHL=fE4gfDU<>GLN^GHvv<xAXeXFpXdsI5ZdCGvgjn;$%ILJbL2fI~
z2aQ^IQaYnf!Il~qW4Rl{TFuGqWM=Xo2F|DE_M;c21P?+Cae1pRAnln0gr~K17BB<s
z0#pMWabR%MC4Sa+2eck2o76H;&`2PF->>M-mpnO46uAn#s6sshBoJd+ew2JwY!wgO
zVwJbrA`M4taS-`L`5Sz9RCHn4+=B2#O(w#a9&OXrnsR?96dn}6(9CV{qK6eDI(%$J
zhyM}ir3&HJq%#|vrsl`E8NHOYkQzbm=Qx`o?o9Hjh7V`$(Y5Kj0<#y78tDp&!UZ5h
zJtKU0%Ti283mXv<e~3F>gj!LsU|A5UodT6h_`0ahD*nL09-wbmKZvJK?1hHy39;GD
z!q(9V)6S4BaERoti~sF=cY85Qore_Yg!e3w8iexQ+f3T+j~}l`5H%LE>`TY58~wA*
z;mb6wKUi8|+q{6*8ME+udU|r|=udD@+L(F1DlI%Q1IyStqK;}X?|3EnCw0i7W>Ly-
z%}5;%2#U(oU2na^lVc-qA3t*=IwyCZ|KP|q;8mR#n_|*37In%Hj43x+M~@j&$Xmne
z=_QMBX`U+c*j1O8pb|0!-|W?uV>?`)orqCZL+(+%Cr|MYhf^AENO&RUr)gA_O+9xH
zC$=`?(<?6@F%2;38MzZ+5F4Rd##Bs(-f6}56$(zWU&Grv1go~0;8oQHgSY9>+d`AE
zoq#JJFVG-I(A$M!__W*YA~TP;0a^D@#W_esUujOF7xNpol$LVZwAy8qXuhK&dc{!*
z>HDieTL4Z<R!-{k|G<iQ>y(#L%7dxt-Mj_{7G*y-6mD9ZI{pu+>@6>c!1@7V(^G2r
zC4h%-l^(k9z(R6W^S?k%OGswn&_7*<uaYEzz5o35VDHGY86@WEd8s5Mz$o9%1{j;h
z(~eNN;WlcZ2e->|!MS+i_ftXYOHFngOQLr>2r+9F!GBwI{4GQq-d5#Xb0r|{V!{yU
z%l_rf#iT55m;Ep{cvtO)zu_&>-i0TowzrL9hP93Wm2wHS6a2jig|7=oP-j;*Hw&ND
z$i|P4;C76XS3j+&V;pAK-Ry<itd)s)+aOd}jRwj8p}tkOv%Ki!b%Maf2V5doXy2Pr
z#jIjJM($+7jG$Q~Z%-KS0FOu~HY;@4#jg9ylV5j!dHQS5W?#;2L`tpX_*Gk$T|OB-
zw`pgX>|Tlw>yxyQQVOYb0s(3$iJkY@Ju-)vEPrsa0V;6ScuD%`<*@nVk_c2YoDhp_
zra*PXZSEwGiZk>UupWaK)g{D@a*jngKu)vB2!!RYXki*w7{{NQK@`nNAln)Q5b_l8
zx6&u4;_x1*Pj*UV4eTNMWTiybpj!3G_q7fMW+9n@I|)%Jx?R1Yq;LW&qI(<h!5+DN
z;Aa!F$?;=Kk`*NAe0K8v@lY(9+88)6EE`0tch;-R$N%05=;%hwYsWrm(QZu9nf?Cc
z8Z}(V5J@N$WU?hq<&!fl21GZhL2olIW^FQAt@-bv8<&Lck#1aNUGi?wA4etr(RHcG
ztL>x9zQ4MiOmQ_kNO(&h72ji~S0rO7ykBw=K28r?OBz?oC;`@W^O)U%19T%5cN{QM
z?4zfi_wIG=o45J?i+U-0rMF;Uc#~KWr|nRzSZG(iW!Q<BuStLg$o-TysEt_KglS&x
zjZ~VCZtwNZ<EL+u3=-qSo{xh*PH@?bNbgC1CmVJHY)Y4waC&h5>hyScbZ~aI_xvEC
z^t4l_A6`G*c@rSTH)x=4IEd4?$xtnalBz)%?u3XX_=nCY+iU=fM`K<_#U-dPS2&YF
zc74lw(d~i4PNziOaPYFFc$=MD5I9xB90NrtD+y`D4}!IHlphIiE$}4fo*wKU9DaA;
zypUBTi`WHOk`<uQ4|Aq=>p?Y;`XoKy9oD%>jVR&iY<_DWG$ck6eUJXX<o60p*4Gr+
z4}X=S8|r<*f+SRjR)DdhM~dLOHEd2aL*+RHz*6~*g$e9JUwKRuu!{ivFDx*!Kn1A*
zv59kI+I0h_C3@?T@Bbkjk`g;!o4ZDY#vjF&_;E9*KZVMs`BXY@Q`#t6H$(xGCM*TR
zHSjyy8YdIXz~hQm{+Fk_pwlyL;_!k;_}256QWm6UrJ`Xr4vE43{N>%V>fPtnY78%`
zB=c2JFw?6`fNe+?5+W{tB5otlO7r6JB3c3p{I%vYL}n17&raHs|1q!$dQlT<sKD_I
zYiY8#x1UJ~q{ktbE`IGuJrv}O|B9yiCDuEHFC$AKGLFvRXWC@SlW2ezOKduzEE=Wl
zgk++mCWCP~lkjvHKVw?dE2mp4-CfsyYBI6iAlgWA;F3S&yT~SQ-{ZpY4R51w({{ns
z2odmf*JQU4=ep9W@7AuZau@4*I?<T-S%7+SlN23MX$O(+TceO`(}So-f6B?3c%B3}
z3qJ=~ymE!}@Q?Rha82xY@F$VJK*!dh6474Y*HJ27hh11OPTwn;#G81-^;l}9S~&{5
zWoEav&;jWOij}K&wYGY&Ps#55X`6%uq&MXXVygw9*-thF($90Ok1qiw_ptBnG##Kq
zq|X$R!6(edOcsjPe9Wo$Gic%+>O)c`_Y=0qw%p0y2%#r{lK0{B2#jc~-(tkDMG&$4
zi<cV)sj`$<u`sqPjhTIo28w6042GH_ptGf}kRnTtbSHVI21Y<ymE-GfjPmK2L%=V~
z(1m#Z0kH`JU4I8$3Xo|V1d1G-^y#K@Hvdr%Tu;)IQHX_Tmr+!yV3O@9SPruMLYN|w
zC7S?j$({=`1IjMeY9rr9R7lvwZ8t7dIDxo5Gn{oZ4DuU&cSJ!Y#d9O?D<!dnLw<LZ
zH-<}z3@_yrNfgfDE{+3LkSNbWPc4RS&lZBr@LrP4(92Y`NeIbhG8)RU?iRoN8hqud
zZW0mDKK<MRYDo+i^Tm2vW-YZ@p#?r~tGlGOCoGui@`DG2WMfZG6P}<y?o7N8yCjro
zL$e9Iym5q#WbMZ?nOJ|V?8xKsZfa6}b0c<xmBcJ*CfPf3iMd4Z<1xVQDJlb>RK|C6
zE-Kx-IZY|itY`t#tUwaYjh$X;u*@(%$Q9BEM)AbF;Jp7SH0O5ueWYeM4@htADUO1G
zEmQ~@re8RcKm+**(ktVZAV)^23IRGiD!`keYa911nt8z$!P1rz1I|`ZGjp@C`T(Ss
zdU0soY>L(GyjZN43ov|#b9&m7gD$#oc~E!T{AUkS!9dm=tRGm<u=1q2V0hX!Rgn$T
z13rTPFxyXQOkgoUQG#ZF?iG6lRQMzfCf{9*RpSnr%nucw6Us+_Q-KhOOI|HQy>U@U
zwxR;?FQGNvGiO&{tXDMRC(*f)jmv*MB7H_Te4-NSgf2LE!*_j;dGX~>DhE$oQwRXY
z5f~YDMf8vX!ODQcEl4m^{stOKVD2m@r0R!Gr!Gms?R?q5A(<U*#^$kD*7Go@C*+P9
zgDhE7`H;zcjR{_jyiR2Yb*@#eDLF`_x|UOT4eNSiwl7s=Sw5u3rJEU0#eR|^coNjP
zl)9>20qZnz@aCk+!=z4RJ)u6rNYH^baR?z~x$XiZ;*Bvd&yH!_EB;@6-d<Yz0DOwd
ze~7bv&vVs23n7OUU9|9?rJQ+v+3eZjX{%@^AJ_Wp!-E&kHkM!Vk22P%w~dmQ<mBT^
zbRl=PIcCKn(Uzi}KXCeLByLcL5j+tS5YwXdVq)NR^&|%7*{xSy*s_rQddNY#mPZHY
z-<)iBirb&yHmCQ#CwK2t`!FXaVo&aHyvD#v5Luj2$M_m<|9tw#a39*P=l)O{??#S=
zJM_8q8E<-@lZ;-7uSiC=sQ5<ho*!&1qqiNNDjVMWsgdr7y4KY0Vq%U@%q=|n>cC!G
zcpMA^L-B3wknsA+n=nd`j0T)g)>Q?APppgo*w-cIon(UK?CG&`UdFMXtVDpEvo8{m
zBSccLVfcX$8bk<oZ6zJ-o~Y{BOM=2hRZ#u1p`M#JlqNV(%kUOwCUV`H%WVKH_CZPE
z^=nq`Dc}aH`5DTKez#?{Nc4Loz5|+@U|=1UhNYhDS_`+9iIvh2OWH*`!#%3ufnSb_
zWu!r*LU>An*gyugu@rOVlxLr~nUTlBzyoU?+t7CbLu8@eHmTrH0Hi|OP@HVJw295k
z=M}lm^*wOx0HJy+t4)nYR4Q6Mr%JH`%+?GLagm%v{GhGTkMC|qUKZg}WbS@Z4Xy@O
ztuqpLThn9kqnX}dTF$NpbhtrPv4jHk@~7D*FAFSvxeG1dORK0o8#XGzV%LUY<Y3_{
zaz-sfFNkow`?x{)4G8g?fBHyddwei;Pg0)y1Ibc8LL~R{{W$Bh^S$%K{p}g+e2H2I
z_aBg@MiP}_ro&6Qk*W3&kNdsed6T`!Kv9Q4QHS`3CBh+|R~+T!h%eV|<%|^+cI;li
zfn*qVyunZF+Dw_#Z%P4^0lk`rr-!b&Gz~t2ogRGs;$Z*$@Z@+4ei#}fJN(zZ@Aif-
z_Ku&sddQoNtRA{;U9b2ah^6luc%3jGTYOnOB@aY$hkf$KF~VoB_V=TFjINUVheO5y
zEjq1yURx1hJkVdB{OUBx)z45GuC|s(KavY3(Ca-wD{cV3?l522!TUtKUMG|aT6MJ_
z2H#j%xj&k{uK?zs^zVa?R(^Z^R5c|n%JBS$mj~J8Cz~X6lFliIt2p1_3-Z$J{Ndj$
zXAU^rq8Mm%uS4m`PCs^LC;9=aNa)SgGDr+)nFOEuzW{A<T8B-OOL@+1ef{CHH3{&g
z)dybs!7oqmb@6%F&_?-><w`pIeVs>JF(?J^pWdTeIeFRA2bU#?yuiq;G4S=K{uz$`
zx?F})XGk_JHHca3ZM;y2|5M#ihg<ofM*eXp$7koKulCPR+69fINd3|L<nAX(Jls((
zBG~j6I>zZY^N)MgRLgISu)Mck&7p$xv`Xfh!GFOBZ|(^M@<EDQ<Z7tu3xa~*Srr9Z
zxa#K;z)_%k3Gc*rb#*IIo$t)g1pUh|zbxiv``|AFcGc^F)Qegt)6<D7^sVi+G^a7Y
zk*-#)v|iQI!6|;10HnT^dIzpbk4&;|<arZEI8LQRvv(6$IxT|s)tvzAtBHSx*5Y?%
zJ(b$U02`-WoKu?6ax#JcqjyI+B}HP@eEyO)+t7u{+zpEg#>L$iKjMl7o1kg~q=3P{
z(a*Mr`de2`+VIx~tp|KoBo97iApyGC@&>zlsoYL*jOR#YArKYD7L!PaNg1f@`Y$<J
zRMp_5sZ~T~2};!rG$QqZ-rmjE*z*C)SwYMp{e@y&;fX1iwhphGvagH$_TUE~i+>Tc
zpDz`@f7$Vr#=H|bwK0F~DUN}flG>Ca|9KRpLWaM~UFMAmOK0RXz;?htv<~<|%3pK6
z-rWrANtjsb!)SsHsuB1$)g!mCIy0_)V2H@@@YyGsw)DeB^&!*j*J=7ZFi8340kXA`
z;_?8K%7}FH*eB}4NbOsA#zmPb&Rq%`Ow1uyNGBx(j)YLc6WRtMw0&T;!K>qMk0C$B
zCcS}7E3F#v5a&139;l-Tt+%WtFk|e0yZ8Jci7%Q8^77h+R9c^AYFk3kWd<m{r!AW;
zLOZJiv;yGk|2_uo_38P}n@wWZX3=>|^%!qUPX$sF+mU=W=&KPVotfWC%0w^AQjn_8
zt5@ljGjgv4>UH@I5#N4Szap?%cDZ;u*6k)=jGoTPB{xsKA<gDs>J&96{Rz`_`V(zQ
zn#kYJtS1974fd`*B&(Qc*)2uxk$kI&1;kW0$E~_sLUC|<aJ+wj3Mg~89h5=f%<T)!
zkBT#P^~E|-mR+?5<L-PWS>{D%?b&it0Vp&W+5AI;&|YO3cC3Y2&#tRwO|dZn9H&|#
zo7Q1*i(RnnyG@-4=n8W79P)`=PODTQ!AnedeYKt%whR-X2p^cCi@fo>YX5>VJNURt
z>G>gukYr5TExLqbuix}GzQy-7j=5H_hldB>Z|BC7rSg&R-L|yXPu}!?2WOuTC=0h6
z(=Z>68Y_G241*0K-Io4qe@wULJoYBE$FN*;`hEHF>CPL22yGl>#@d)ctfz0R{qc8U
zI<-pU9;ljO)RJ7?!<ahXzN2ol2d>rr_+k03tQo=zO)|&-ReTqlB>V)+FIjAjm@jq}
zbNXPv*mU2CxIsD0jnb_1Rqtcb1|uYALg^@gJK%e~sK;DXBRet0?n$|!Oyk^Fo{EFF
zEvMoTUXx0O8tMjRvay0?Ng?fvx%tiTovYglb3ie-^4*bUDN7Zp`dJZy7Nrz0u{tkd
zX>$vxMid&$kX7kv296mC%p#ZI=vzY_4cc1NVr(}q!(OS}1_Olnk^lx%$xTA5@`i+D
z_nnN)WbRGojA3m%576e=as5#VMbHrpEhj9FFrU_E(SfJ2Nq|U+4cPUFBXTLIja63|
z{6;s?pA$JrnjfK1E2VyN$KoArpidlGb<|{jZiediMu@kbmkU9$EfqM`%X-Q{QnO__
zsUFWSFD;Uz<!4sIb4sdSs24C_aS2vPV!m$RIc$-cBHkbrpjc#b<bWL6eDnDN?;1ES
z{tRo-6<U8d|HizAFHcYY_CumvD?SKAA%KjWJOUTSB<q&^&DBOv%_B}$&C?G?h#I6;
z!%fw}JTTY%-ps4!E4Rnh3mu89>`lW<LSMWCesB$S8A$uhoSvXhoT)Oq;&4LVprd{6
zM|V3hM>VhXoKh!3L7>Y)al~m~*;A9UN&*r%)vmPl;GBSy=WX4nvuvPXQ-?D7i3K3u
zhwNeQh@N5z9BH#63g8#zto_ZwFCd%Xl8V%^dS6N&$W0T}@ZZN6Gum_YZ&{Hx4?9A$
z1Ow%6wdNhXi>H5yoo3X1bU%hQY4f-Ut_s;N!>wpR=|D<4p6jX(x7AW1Tr)Q;9m0c^
z5$*}(B<gKmLhjBa@(g3y-;i3yWX+2V-M_BP)If(JwT`i14wY2&b}1LEsB0wa!tAd7
zJ?bQDgt*5r+~XbtxlP$_7}x$DgCt*|?0C_EdJ{;(qE|UQ@<di;nQ;!AQjWiQv{bv{
zg_rTNgWN1z?XTa*?s2pbSx;)~<;z(3ij=PGp+QBP{BOG<g~?KR5LI&D1sc8Rj9g#$
zw+u~p6)%#rmhZdXy*@Jnt}E7_(k(+u{$S-T5#$6fpk>y48mu<N(lC3B_#nkk%OH0z
z39$RN+l!!Wg|7*y$(z;P08%Sp(`XdvV<QxlgmTn+6RVlouSgjdSQE07v{;jwroy^V
z=dcZnFwjpvNy{$*&tDqRzT4|>1tKTe$0wiX!_wQT<DdqMb*=zT7Oc%OoGNXeJra9a
z?4cyCU`k91^V)Mngnzlo=L!TmO;#mK3UzUs$OTMhS%ZsFbmH))U<nVU3^C$0dj4>R
z5c6v<Eb3<dhwgNfcfk9Drv^Hn%%)}*dsun_4J_zYb3p~NTpH{U{{R|XSMU;1Yu|=~
z<XMPpn3^?`93H=Xbw1obIsWe8^jsA<+4Q3Lu)M&w4Y7D(vbzjL-DEWk)n)pPQrYB^
zmX!n!ZwxmlEQobK8;6tX7iR~jW{>YZ;ydu16U%erfyfzXcVllCP=ot~sGcO69X`8H
zJct*mth6q=qsk?SFx8_L5aV<)CU8SKyE8)N&48ny&g+}%sGd&iCN5;3DfdAypH#$2
zq+xdxL`z=cb<Cyf3fcy>tdgKMDpB&quR`hSKDr7eXt6`J)fj}j2GKDblDH@mUmxm4
zpkv<JFToQ|%)bhbTE*FBPjN4)2$K}t;md`oWnp@vPhjUxFxR1I7b+ECawXK~1e4&M
z?c3!Hk+2f9v)>RIE@uU}fO9inVdO6-Q{2Njm$%70US@!Y3gWFo^9JTEw719ns>f-E
z7`x>>+6rbQ$0bfQzQXNA>x~RQt<1?8_I$ox8o-bcjbB!{$)ZfMDRL7UhzucU<>tK;
zbreG#aFBN-vzwdlzxHW}X5!1UZLF#0`7(CKEzfng?Z|^I$kIaar-H``m@4&@>~_ZM
zW;MTohpQ8mb;IogWxeT(NmG#_Lr?YEP(vuIM9#&HVYpm_YSyd`JLhmwvR1R>^-VcJ
zPe{1=*ufelK^Z%($17K5RrzPb2P`}@N&$jy;SK|yV^}@<Bvj@2S`214^T`@(K_M%M
zq(`Ps+$Fknfq!+WLh?~HdQq9?^?ccjNjz-aPI0neFLZM8C&DRw>!5fhtD4!OzutMT
zXZ25z2!-l>KtjPu-K^@FSv~+D&ogmKA}0vX8w);Nv>DE5-ROmkPXqQU9lUyys%gI$
zc}5v#8-^h>X6Z5g$O+kck=sQti(k)XFCgH1_D>HCt2Pu*pw?cIuMe^~zojP+vZB_r
zDstp$^WBMKPLlj(V7c1BZlX7X*I)Sss>$|zih!9>m>gg$Jo|p}*~D;oGqATcIii4<
z1p6cIL)6R2ff%82$UzxJm&moHg!j70O_1WnMFQ~K;w==1=Ia+^dXcf*JOJ$>J0Ns$
zfAza-`=>Y3==Ib7pWlqwbI+jHWIe_NGb8K>zCY&c<W<wF+rv%!i;GO*%(mzjVH<oo
z^2V0E$nrrDL#Iv+9Akp;xyT~~=sM24p(1WXH&zBz2|EPYK?R`Fc81q{@ANdNE||WE
z+>WmKy>GZf7modsmS{?lX~Mgr4%>fVCH8GMhHm@B%ly~fkLf_33uoSffav<3Qlb@3
zcxV}~RI%1G)1#r9h%0sLJmsJ*+^hFbTKXK2hsONPz;E*9D87)vM0Y^U83Fq?;zdL?
z7Nk&_ccfVIu}Kzu?3$79(7SeeNBjg8K5H_MSB7H_4>PQ%x~=#N4w=E4BOM?ywfZmV
z@Lyf_Ecz1H<%*`Iwk8m*GDeR~*))%Rk9g|@YG(NFd!y?AB3#T#wdpV`un=PvRa7SM
zt|#Q*dkcrLjP^eq$1Uz+1<!{gt}BTBfsj^%@q8$M3tZM!yhD4Onv5@){6k_qRXqO3
zKzBKn_d|5URcCdit$ag>2wm|$s3tuz=#bKs)CcAaTdJP4yeF+EmQ3pRzUEcJf(Tvu
z#oMY{WZe6FMl(e{hU%)c&<s$nT8*x{=|+T56|ba-`m&lYDy-7!sbOtBs-la5#dyy>
z5rlk4M=n@Dq2)*|cLzWTK}q2O1)BJqXATz;2OQ>BRJGdR<8<9XZFs*x&NjO@2q(I+
zhc{~~UT+#FwQmW+)>f@2w8}9733bfLt@Os<+ARl%7^DE^eOP?Ip3GfKdE{(yFEJuw
z^9jVT({HT0hpHS*U+Y`^dvjF2rKgq@61+;&*+pui*@6QHwGFw_KFp{A@QSo^4i2uu
ztn=csDDF<VDY!^Y!J>RM5%+OcL{hyeRR+EfHk7OJfIDbPN6N}(#h1LEy_>(4iI8&w
zTC3}u8pm1EH>>%)FtX(xuhhNAIx#`UryV|8EkhtkFOyg}JiB^>yO+zV>J>qxVla?8
zvy>JsOvwTmfcZ)3{NI&E7+#bsP<OH$wAm*^=TZAbDWxSjX%G%#27E@}z+;5TRu`Cv
z<6^#E*}N>^Nid7Cgm=XZzvG2)S}hfHR5_3?sZ8|vS`X!Ii_mUK1UpH%`O-Rf3a~pq
z=HIN#@?|}R1YA^I5@QGQ8mrD#wfg&KU(e?qgBy2B+i-Me{@90~)W2Vql-D96;TM?v
z*|fm;SD7DR;Z3<3Uz>TAS0!Kv8GdG~8tPBiSF_TLSNZmz^Eq`Q$BdN}3c=tFxf)>G
z?9eaA=F9HiKkLN1<jx=2T*3hhw5c(#j@HfF*FI7oHq5f%?$96Pm>8)b`?MK^9rpnm
zroHzdL85RdJ2&x0&4EiODe)_{80hZs6@0a&H)eHHAw%!{W5@Pp2Wp5BmN*xHG_q=~
zT0)@1b@5mUb{dWut`MavPT%Oj1u4!joYrIO>#c9Lz0^z<{tGXwswV?vVp^_e<7=yu
z3Al--Pman@;1OAxF|Kzzoj%=q`x_9V`C`@VzJB9BpOd;8&S!?juWi(ZgwG5c^ER$m
ziI@xB!U$JPuO_9T-Lv}tRzvxo=@wE!nhN|FJm74_1a19dIS0mWc(t6b7bzz41@1%;
z;Uxw<Oq@dyD6U8{K5;50hmVR;l=t*6Q68m%`ufWe$5Oz1uR}j-O-s+Js@S<SGu#>Z
zN0x%_w=e?$Wd*kZ2YW^#i@*>QEyo8O1|X|*L{<$NP~k?jLz)@WATjp<w}Lz`sA+H=
znng9Hi;n4%K!jdgS2tjQl=euiBTkhOkS9@sk#o*m!WVyqZ_ThLK*1ab&kh;pJO33a
z8+wkBN5QIQv-#{Xyw8(zIWYnyd)7ngh)dx@a9&}JF)$Qf;#0mVM$KI_Fr4UR{qu<Y
zGq8N-tRgEePSYFOgS4Jabi9FO&-td&vhA<-LCc&%V)y8h?%16EKx3<)SIg2y?W`bJ
zf>M7VtZry6a>Fy91+IbiJYAjUk!?6~4~Ks<fwLdUq(*~YOwcXuh~y@moS?tlN_-?t
zFJ$NfUdkLDAELAdxDAyCDc;s_7}1Ueq-qBMFDuGpLf^Q8sMx}Ps?80+6pdY`5P^h%
zJ+P{r4k#?<&0tc!o0;cv;3Azc266%F01w6`s4^y1GhWsU%&JwISG2;1e2J9F{5B}Q
zhkXG%3S#0E(zhVSuj;@aPPeI%g0Hct7*OU9z99zYKh}oCj}oG)<3AwqbbW*TCirw&
zPR4&F1<C|p7HmRZYt=gp5EZxH0EMGGDS*?!TZp7o&PcWkwxub$wAp<=rq=g2da`@P
zO?`Du<q;`-Z&59$cO=G60(O;MjiK$D(#93|j$TZw8{(Gh86~;^Vg#yGIDagK6dpym
ziat6e)2lTtQJaa6dXn-DiG(r=_L&xoC*Z$e()Vvpjt)M*sAr%5h{1inZ=P#{tRw{r
zbCDgyNbOljoJdT|Gt5Z9w8H(Vo$H|RAU==EIDq~6z}a<(9vhS&rna`du<-C3p-~c?
z1VS6$;MP2ZbOWXCvLD$D7btuT_gBm5<NeRi4xb+$pF>RMrz_&Jm+QsK!8v4!!bfLA
zx|@DLw=19V!VNb2VvuNRxb{8^iy)j`R1UF_+kq0DK7rv(DzHKG_0_duaohp1t3ml7
z_RW7Hb+!N<B0q{k3N`C5VMcB2I0XnY8>E<U$AS0tvzfovz<3A0_anYLqyuT1<=FA~
zPntQmKW)jM=e}@{hP?l~Ue1U_Fisr(iXsi}crO?KVUXidvA_4?#aDa#-xg;t5B3kg
zKHQfq6@epTn5Y88{J>C!?^?4k??KJ!NzL!$ilT?nD;O21PV4uHofgI<bHyinhd2qR
zR<+{oT~^2#hN>#N>zra}NySCAw~Eq`FGz<g)jH_$x@YbH^4ttX)%#=w;A-Ty`QEc<
zrw3<e_w+uG<(T<Y<e2%?I~344EDc;Z5`Js><wv$Sj?3Sm4GbbHQ(-_hk{&PsL0MiS
zdOiLaVf=X){z$=pzc85JI5fP~X5jA`R=20R7KD;5#)1zMvU{&ca=9n`FJCgGZeeUJ
zy)m2OFXQM|2E9dJZs~V#nkm>bRz}*~0)%QEPCt`yNE5-`;`^#JyQYl-kBS#_gRxRw
z3`*QJb6_&f`%_~&HpmGGln)aMDoTjYkmrRD`4Pd*cRs1m$ygj2tsT1|i^38$j41sY
z>2m6&Jf>7|cUG@CF4w?O1BOkia!M}T1AmQESix>_3M&aMyLB@ZW0Ps(*KpJgKsp$D
ztt=_0V1|U{+6C_fmqOcPlsbS7Ka@bjLShmCL<-W~L59ZltlJxCno(X3(HnJ_-5mxH
zgk{hP?fM!G`wp9-lBfA!fNq3R1jT`)f4V$OU;cb{vj6Sy?ELg#??`VexD*;}8S<Fj
z7_w`f_K@%z4~$?j>1LM^W(xe`@oK$bKA~Q``#=5S&;DP&@H)U2$<6YIHG2p@fjT%D
zE&}en6=aVXx;qZg^rog-GbzEQPhHc{=>v4$fyc&817%4%d+a(x?lMc8_j_e?E!)s8
z_vUm`^{qSNMe)m%Uw3|a`fJZG7L(X*4+wjY*!$Zx6HG+dMR7gQu3z~1IgZM^i%N1j
zLg+$6u?q(OjWoVq6zj!*`nbw%cg<~rAG%a2lpNO074e`y;}Qbq+9mEdb7{neZ$V})
zzn%mm!A+JxfRn9%{0V9y(QsBxOddC^pG>$rScqny9Km~LRjQ~(9Ra8jkyX(UKWVj~
zq_8!&6n(+-tC8FBumv5FggOb<#Xv`>-?YDA4B!kzT=PTMOKy*v>FM>nS!G<oGV7OD
z*=_+)-z>3rYe1d{Rc$DyxOY*c62<-%h4&%^J^*Nk2Ot4IDu^~36nh#}C_d>_bZm}?
z$%&E#!pOmzftgh!4e-OndOfIy#^Z142~GZc@`A_CYa1VMI(FybW?9UF_XbbRY;Dp9
z#d{3sV7vd8B!Fr6xy~8Aq7Em_=^H}Z8AMHm=-z+_*`xvvDe-FhFH)};PMVt1v7-Bb
zI^9E`n<AwQDystlL*$1tMDYy#hmBqxkQ~C3NR>}3Cy?c$UqFss`UxK~EZlDTgZl&h
zlXgV3GlQ(q`2S?HjZ8LP$ss7LK_UA|1VlVSTUP@gs=1Xq$^z@t$codwX+MW@Px{3$
z&4kc=1O6kNYs-3Z+jzD4qG!QuAY~5sNgsyocaPEAm!Kq<t*2>s%=I1IF3Sb)-G-@$
z2O#@iPpTmFz?<)Ev388H+p(MSZ3SxfZfAMXNq1FmKp1yzzsb)znlH{IJ<=w^X%}<3
z<?Zz*5{Qk!C3fBK`66}x9z-$V>9GQbz76CpHP^lxba$J1=F8^E=cjdTj>5(%=tTwM
zO};m|Y_@dWLLS(PR(TH~vfoU&Z%ed!Q8F8SG6DXk_ai<UB<C#$Lx+K_b`!TN5+=5u
ztm4A?37jt$f+M*X&x_P|_m7)L#9?Jx-gs39)MXa7Vn1__tqMi}rACO=eu?Z+d0I;l
z;<$XL6}QVK)Qfpo<qA%|U5-mn#an3J&F8W>M8FS-(V8Yy$oqV%Lb^8cg_ZPtr4=v*
z-H<Bz_{Ake<;0{=U)E6=KIv<ki-8QKf%)Qo!a%WSc6}Q7(}flau@mP62T-a(IRr{P
z<<v=%yFgnb+D|a#lO@LKnOUzosl~DqOqAWo-ny+$HxvQj6Hqy;iA&RHBkOZwQYgt|
ztDsvM!3AyMuYre^gHrumjo0K8us6kI2Y4fHMOn9+vuVBxJSiNAWd)g$N+YwvO37w_
zTV)y(V9D7q#%!f<6VVR}Pj3yEQ{2S?Vk}3%CkR23PfnSMxl7s_x3vo%!Zt;p-^G!j
zP3@Cv$tW)ie6}YINv8e2^Z`EV9g0FQyaiwaKG0+>1CHmi#w;4`!fmX4_FD0^#=@|@
z)K}q-E#}Q?>w6m-Zq*}4iZ`#yK-_A?*4nC3NvDAz2B8b>hFjmCKUuyZdM(A0ikx!d
z$$eN>oXCi(D?qFl)v_D!0YA<IJJ;ZtvgR2PH}GHot3XFTjG{yCXGpY#mv83pFip<W
z!IMF8Zp4~`3rl%=I|+m%cZF$?#_yp9NekuP$*Pl+$CQxQOju#I7Zop}34#OO!>G6+
zPj|fFp#A-|&YCW@>Lt@}`o3mAAX^%GA>tJ^j6?(3EVa!QGh{}DCsx+sy9%#FOv_y6
zj#$Hmn2m}8a3}2cdarjLKYbH{m|y%zHQF8wVRNYKDQ&{U&KD}zSWgD1LHEnkl=coj
ztij`+eV%VGe$NbM^IAB>7#u7ihYJ0506`85vxr0<75SvAy<@$5-e+6Kd+)^yb`sG^
zPtssH4+x`x(@_|_q1??p#l9j<X-QYnCwx{6gsu;LP|ffFJL}cu<A3iY_KD*A!Ua9w
zJN;_!`GHx@3#bvb_1cn%l59Dtrd>;uU(<@;Y`@H`<nNbiIz2dlb$UEJGC!N;Y_l6V
zorH+Zlv(H1?ClJtE{DO^q$fFG(u;Xid|!PE(NpA{h)#eu)?NH*FQ#CL7+(=DTapiH
z1{^04Mtc^l-8}Pv@)yhb+iI4+L=V`|gVWQK)2+9Ybt*hWQY|XvF5-9Fb^YfAOM>KM
zp%-maN{i8nVU$9CJN;Z1l=jR3k5r+X865glB|{8K*tIW5t6o0ZVq7?G7k531h3CW<
z@Ld#jbT5rVSfS7W480D(t~tAsfc;1r7u`R|6pzWCSwiAOl#(8S^gLu}Ls(wtq}&t?
z83{@O!Hor}001XLr6v=rEGM>+AD;|M8Gj{=sP_$XY5qXr#>N_4rJYNQJpmKsX>cPI
zo_n4MUF4CX?Fv)(s^=|FK=cCTv7R|jeG8ohD`Wz8WlK&4c|Pkgh9zg|Pu<L$8<+Ql
z49xK}*5`Tkx}OoiEU0jo{Qa?o68Sj`lc>ojN_VCw+IJfo=$l(<9DHI`@j3eCviZLX
zQkGa%4n9UPt%^P!c45U#{y{+f83>`BNYCo;o5D|ZdW4@LUfR=s?Jxnf5BEeKb^(Hp
ze^Qy_{LLeG*hJpQ&K*?0gx*O_bT5X|3`%p2%@xMq{00x0K5rv|wmOHfphBXoVbEk$
zQ2m4kw&3TwQf2C_BXTA~z#ewFqvw-Po2G#vFs>WQGT0=P7@u)jj+Otiz_=z%lr*Uf
z2MqTUs*g7!J!r|H^z@ZL8M(1W>@$Otk_3_%0v?2YV`vh@NB08PV+#udVtB^N`vj{B
zM!V>(wpm{QQ^bMglX|>zkSv0YkkS|`GwK&3;G*UC9{j;ppuqice`1~3sPEQ}#1_7U
zMj#m>i5P91al`uU$Djh=Q06nN+S%qJitF7ie)-k^^Q`)LMIz}Fcimxy_8G!B4#O3U
zt07lS_pJnb6DYgL7$bp3uqlxPyUvSYo0pS`-^+Hg**_`QuaR6KHG)Bey#tVJTa>O_
zwr$(CZQHhX*|zPfUADc;wr$(SF5NnPUq|25FFHCSXGTUw##)&fbB#H2{A2!KimBnO
z17FP?<R}?;>LId~Rcuw0#l_SGMWwXGm7)2e_3v(SQ%qI=EqFT)-q*jk_6lw6NFQ$?
zOkBpCzx}pobBEjUI8+4Wm?_f8(rPo1G9;-N89^G^RHoE~-L<#M8*Mg7uGB(<GHdj5
z%$!v5Al4%*&}v2m2Zo>=x?nRyOr9345_k0gBiQh|0ZW2R3k*<Tw{-4b_3QwV!-GIq
zgX#)v$4l@E#oGx(Pkp~2KaJcKh;uV8C{dM3BHF8oef~q4W8nGTBUPVBPi03>Y9MG;
zGNBwOLs11J*?_Xb5+?Y7*Z**Ff4<t2R@BAG#raM60)si04>sH38DaP^D)pzQQ|{T@
znC%E{9t{%0#>Mw<<P(1Asv($DV|9*6xBfui$N%G6h!S}qb$WZcNdJSKU}OdWK=@B{
zc?U;(XZs&>`Tuk*wAj|tBtqBz?=bLYZ0jFW`TGuEQ2F<v?KYu#uny>*68{5=>W<i}
z#vHy`G$z`G7A;;FNx5^h&l`4OB^1grN$pM0D6)MR^HAmi4@v8d7S<i62I(Z8yQFxL
zF)Mtg2BQlCGt4ngn(#oLwjgn=+E=6r?{?^Y*fnz-(Nu72M-IQ@a1OLt7$%@^#l%MX
zw<J<go*|N*d7ONkF1Qc>Ax=$l%C)-mVixa)jnWPhW3|D1+Xf;&SYd-D!U-R_YYk5V
zf{b-{{6lS|dFKI%&bU;XDGBQ_Z>T^`g0q0xWJ{(d!IlYv5&==k1*7TVv+$E6-z~Bi
z7Ghr{^S;Pp0#^iR)>)bpAq~OoMwSvn&4$$-F_zEhiqV!aUhT2Qqil&$rq6gmo4#QR
z;;~GVK6SE1Op!^#kZv`vc(%Qd<;59LR;hlHRtIH}b^^^@pq}X8(LQ&rEZYD(sVeF+
z#LI-f5&_Mx@3AF-x}lT(BUQp0*W6`czT*~f+)3%t0ZVHhVLZIN+|5;VTfpT_(sY(k
ziNQ2n@zhYq`GRI^yNnks5#cA*FK>LNx#EEqX+@lCBLTmw($T><v5++8OjCp3vps+Y
zkhtfN{IbzI8gLc`7E)S24saeJLAZw3Blj{lq2K{2@>jh`@kkxqIKB^GC7XnyA%asy
zcgbbiz5%rJJLCwdXz(1x7LxIDT{)=ufR(_b72lGK_Xe&3)+2tS8Z^_&_cC(b%XcGw
z0j33>w3uqvde(Yv%idJ8V@!>KdU(2ioxJJKd~tj$3xN?sdK3uwjp+u>JsGg1_W<Yd
zhVPkyz4f8xCA=4~<9E8kW|ONHZ9}AqJDUU%TvzR6wdsAE+DBI6^Y!lA55_okqPT-i
z;7*m^bdF&h-VLYKZhmcI>k!)6X@A~XfItV`jb_P=AiHt-neV^0fD-+&_q^%M;P$za
zldXentX{)8yaA?t@w`Pt>~{UbQ>(Y5p?>Xh<I3@}YyXG)(*(P+a62aI0-^SlZ}M&W
z&2WV2;(lv0$K$~b87|D$0pkT!LYW4%g%u}Guk0k>RyTG;xRqT=TH1iEy9=k?$~5Hi
zJt%8ifmU}9v*d^KX{POf<#rzX_h@>t#dCoYr&9yK(EM75*G@3IugV+F_M(JcF>bL%
ztaTvK+D1D`@B8<5RN??!H1p-uxNs1u-R9fGU5ZwO|M+UhFMB%OpHg~@UK}{-2iziy
zTWQx@+u{NkM;9>F=C-}`+ltN_GNx#sd0Rlb?Vp9Sbmea&?UgRhGO23s5ayld_iLL_
zdGN)B4_7Xr2s365D7r6b-A8t&Ab{lecOGgs_xm<~<#-Olb|@4g^}qnN3Q?=mzx>4T
zIQHZ8M3L7|Bkb(Fue^v+?wQDK*!3lKKqCazc@)n45kwF=8A~?-<xq(saepPQJa^%0
zt@2m{cg+Hbx1JT>RD%H!&FJk<)DPH+YukiXt6=Ke&{YWug?cH&?$6csDEs#a4lHx5
zg6q<*9h8D>5Q#e5p$Z@ZvHyDRc9#S46VCc<xRJ;7Um|$3mRTDPH8cEf@5;im31z4g
zgdboTdzS8D2t~LB-E3aFGPT_);TgDukSfWC0e2|Pitk6VJdMRtOfRA180gH1r-x@U
zy@-GF065d+hs8?3KX!ebfyl{^8180ZA4$I62*$t+=h1}mLKq)p<)p(KWru0ljF0Bt
zw&6LF;<?ULM&JLMv=x#<O`LZs%q)(I``aa^r?R(ckZaK!jxxN9A(hA2j>}q5Ra{&H
zj8+4q8m>*JTQhe6GF*j8oD^)Ykz7>5Ji+aG1ee8D(`3DwHRJV}CjJC`sNdjVj&PEu
zV|EehBZ&zC^}=!7>F@wMo?clz^APT^*yvu-lOaKG6&n=J?Bu2VtNr+b3zXDA)j3;*
zO7u!twyWsNJn<?oGgxa&#?92QA*p<-lC*lf9}*E#o-Ce-p0~SNhT6MOf><azv!QN0
zQ_txBA}$PLIgY^Ym9RE&u?}2J{W>s4O$QxNBN;x1Lot9Dy3N=wnnD!MqW8}MIPT6g
zdFzLiuub~I`XP30rRvlIX?48V1S+XM*WoN_A&Hr-qSuA0c7&_=0PjKB2R(W4HH~fa
z7-*{AS?-!+YXx9di_yXCew=MYjk=#Kw~4rs-MsWniL8QC7WVjlGo-GwNUoI>6;djy
z^}?}B`YaTCwYTkheVj+fOlSI*uswRC<BJ>g_<6<Aw%_3=h+aTOC!<4_S%ZL#5xw!<
zI31yK>x~dNaRB9mro#xsh=8JeMO6~Zs=zm1ONWm0saUBEy3%jK)`@G#eFTdLWsBTj
zXmvaSFVU^sL80!_%aR!PbUC#nvmj2k=niC=Ru;O*V;Tm-kWGD-lH%7F*Z%S%Pzmjw
zw&=KIY<GcyBC<}WQtbj#h-l$Ml&?3*g2K4QS-+2S>l{4{$Wg&<4M&#yx?z1UD$6xw
z4ex--$tBQsHT;b`XjQdaD^aQiTSI0`te~O(wl7Y(BqBYBh;<-jTclj35mG;qKhc;b
zEsqLdb0v}lFO-vR%y$4#?59I;y|b*<v+(R1i#+I1CdDZ{*JJ2azq${B$3!7USh7&M
zx8j(mkTKGP-u8pRugX|g<jBv8v?hZpJfx6*?13R)aFcI@2yV|~KOkRXY+SX}mtw-x
zXcgp#x^grmnp<ra@HRu+Gi1{x(IHd`779imVBF@*Ov#dHylDQo3a-O(lFhl^9^1jA
zDo<YTsuf57)mC6mpDx7|UL*JJ7s^%5khqpu-afe?_gk|aV>4!rKymyf#He#G0&w{4
zNfaMZZ3HS3B49!Ryvf3PyAfj7M9bj<x!<fCnZIC-x0Fo949P2WQh&u5gO5_kK98&3
zCrg_$W;*6~21h7sCB%|w1+-i-^BgHF8ydtxA{kVLO2$7_NK&-fk;)8eg?PaLa!+gV
zVPsY6Pz7bo$Y9f@uH3WSFlE1u)YZz@X+wE`6&E5PcpM>E0KRuQkv}wm6a=9qwnOb5
z{fhJDJK+wcq+!%XM?Vfi=`69!P#V;DLMysU?I|8I7wOTQ4)u`089XHFKgVhGE0<Q{
zN0hZ2mKc%|>pLP3B|ZZIqI%%x7E8|-mJHb@Bs&rhVG%OV#gjTH<U;_4L1<`G2@4u=
zkW$<@<m5!1ei$<E$Zq>UZyJkfsv*pnz@dTF04T;~qj@pWyaZh6JlrH~59riQ3`Xy~
zQ~^6DXB4T^;VxQ4wBYGWCbI&BA%Ka@&Azw8``R`%j#KSV{iU_ELf&R(qM$;Z`3qP|
zH=9<WzS|(S76++eb)zq%>s9a}BBz-}9aUXQqR2Dezz#!U1v}NgMHK{c093)U_!m`{
zO2|&S0E;n=;P+LbN%Y)R1ZM9<<r4<HW*5G>F^r#I2CPPdY>`-T=2z;RW;X!^G{*;{
z;&1e_{g^&Q&6t1}`iV$dg)D<Dmq!Y5Qi_v{p2^(OQt-?Cm=pO)n4QT&8Gt4KjYE|q
z?%*U(BO5b=Zrs0qzbrcm&Oeub>HW=$Oww{%n!MQKS?VqNYTqrwlgioZHY5JnBjqEj
zVr>FTmy0tCtobDlgDFRV@woxk5~(EoM=_!Oe*9R(_|h;D;cfw1t-3`u&UZ8+jtPN?
zIVLvG6dMq=*(7RsjTg&a*vVBMJ{Nz#(g@=9RSNshUGl<Stz(?o?{o+z5w;oX{+%$R
zOR*f(!Kfya!-^7{h3wWumxLc+ix~-lpmGW}b}FBMn1fwj;Cw8nf)bJ*oxT?KnrwDk
zVw*yJ!SAYE-5slQt#}F*lI1}+SRPd|L^dP<R2OAIb&omd`Z>abAF~Jyn0V7ykASL>
zB13n=<OWvhy}-u(B;fq8*aB3r*9buoH(OnuBPb!IM-Y{Q)P+CIH?09ZOCEuj3Y<hT
zjPc%n=<n0MBk#@e@YAUwhVxeR>kepo3Ujk_M*_nxh$4Hh4@3@j{1gI;_TdhL^DL4_
zLzX@KpJhuv)_UAh2q=-}X3@P$;CAi)J^b$$0B=4Z7A|OScy2z)B0Kz7zEyf^jpE>#
zC_XkLlXXVp1ID1Rd1pgF2wUSv=9PrMNFVdc#Tg{M{eJJKQRdymqvDUo+n4V2<oJ9)
zKXwEmhlybs8O$r;f%-HVNA1pEL%^n&G7_8V9)HJl&hiR`U;Bpl@V<XhH=q}!HLM)@
zsvP5|wF>rzp=>Ok<2{MMVB@$7E@r)r3%VzJofZ+je+1%JPA|3@RXnWRppCjks^*ip
z{Z06g?n0}D1<_i?bse1idhIP4@@fW>9cw7zgv}yJ+%PHY!&>Y7DD|#JK@eHy6OV^c
zY6-}jP@-QuQAltX<&x5N1Z-Yb)TCN6FEk>b=>hs9Le00|A0C_Fh)gDJ(OYRT)YP;h
zLI^rHS39Wgjzm{TCvW>2fW-v#C0cU0h;-MNDf1BiqhEZ+Uj`t??xG+|$p5bETt5*!
zy>&Z%^;K^=)0*->`868N*r0JcQ|*9EL7QA#(Kr;S?vSXemXVxBb(NTV>iMU2pxnYx
zK4ZZom!dm>GO1|$tT_gx*g0%)<z1tb=j8Y7(IYc*C{+c=9!?5%Uiz$zb=#nNoPel=
zhW(4S;Vq^ko~A)u`|gU6qE<5Nlu$;w?J?^a?h^KdVY77=C8BpbkMmM~(CVZ46T0vD
zs4I=0?k%+2VBy=dIBt7j@tc!v&)GB(O_%1wBKU{q+XuWy<8fWX^-&X6c4xp>AsijJ
zLTFOY8WRf7C@>b7KBJF8Ut8Wg59OpoK!C#>;9a{n+RD-Poe;I+H(iF*iZe^{2{Zv#
zs79`p7R^$ecz%7J%3u4tKepM2sMG!Qag3<5C(wq$D4W_Y>BZDp?3-<YnRkjnr=P-Q
zugsck`zsZi9<tt8=vr10)=PS~S*;A}+$lxP3_|{omvkBadbisTt>Re(v5Y~|Zk3j>
zIfGX12`On=R_&yJD9)(e;V8gK`IeByh4(A%3b{!(=ASiE)7Mg6qOjajU({$Lh`=w<
zZbc%2he#jC3vv^j^hpj^hl=2ULr?N&JoTuJ_po%Bw@eyN4)ql;WXZow-f=;YN#UJ6
z>Z-3OsM>m}1`tjixG*YS)z|(eh|%=O&(G|eL>?cIM-B0H=~d?5n8&W{Imi80L4P=b
zxAA`kq~qWhMmD|sKClWug2;AT5(2;1c3B(Rr;zlD(K13$Q`eFCP~qJv8S~MB4*B>z
zLvqZD^P$!LrA1VOsT8%Fk3mDS0*W6X=x_&;jc=WQPvQ46E{kEf(rI|^#{h8mF@rH<
z3gy2g7>-}RLTY0w2*Qw??~0Ihk9mEBD_U7ix^9ezXqpAbIJA-j5EDuUR@j`WAS=TG
zO8`jsVJ$Ri>u*S>GKw;hi(h@~w0LY!Dt|6a?c`u{@4&P4dsII9R@dR^KwVeffVZ4X
zme}lL=*9HufOEUPT?*v_9hC~s7fGoz)N)Bx!RPY?b?Ptt%)%fmJ$HN}T77=~1(L;;
zjd;tiOiFIBk(MX@|H38w!@Ly!gKygm3IG6s2mk>8pM?h}OY<Mm?tg>XRjbR!ZLlG9
zU8p0BiE43d<9SlRyv`EI)(d0;$T~s+(bFojH6oQKs<3^%_y{MUm~xMyRnw(PPOoIK
z56n3|?;nF!x)3iJ4)AsQJ?*1wQReb`J(t=I%Q3he#=?sECB)Q4GgH3<PN?Cb+|XPc
zZaX+>p%1+ubaFYI1S`sDJ&UFzLypB_AG16{LTHjFGqifzhxA2I0I=5SG>N^)P;rN^
zGp;Ux@wcE~mt|Ql;KuA8I&b<`UxXR0W6SaT>8-SfO}AOMafKI9V9hn;@`vZxjJvA1
zG*e>Wt&u&wG4YDEAqFH18gy-F&NX-FHXA)4eHxTgF-2J9Q*3woJiK3DlWveL*f$}q
zQk=(XPi%Y4N_BllI|Nzg-D0;kz=b!Q70rlFq1xa;2nD5SNdQ=AVhT2+9#j~{Gy&xC
zdAjUJDNKXCR;LdEa9n*`W#EC?iVB;1*2tMdruNq`8E-<AG#M{j0%O{(74E_leyCyO
zTOrkDN{-`T7utdl?Li$3%#LMpm!n=qQ;7x(``n(xAFjHP&wxm}dVA{oiZQteOvyRm
z=3rkQKR**}{n}K%x5xIQ`MTup>bc_7l;8%;dnB6l<Pd!%hYzRk$WfWUawR3@GY0$A
zb<4v=lD?#_Y<kM>TIjNPwFK1xIEi>GIHSDJToJ!@9eO6tQ>38P<JN@sQ}KRE1_#Ta
zV}14lDBPd{?6sj6vVH2jSe9-*4$^+UTfmX7P6-6WAJ3L2kt!2Qr@eCwF&XU<-!K3E
zEefOY1!Y+vhcKjBNogzbFqjd~>sdx#Wc+D?>oFA|xN^taE<w2*_7tg@?@lFvn0Uo;
zI#A-7E%<bJ;zA3?pm~?;ZVqRBlc77_>|gNyo|P3QPP6g;PW5mg*H;2NHM!sbM1*13
z#e>sje~5|lVthi<%t`J=urHaNd<D8M03@XR_*W@D&VYa@n?VFQ{rn8D`WH*%XH^#n
zXP!7H>V&`dY!ZM1AdV(ORsi-k`CN=Rr{K9P&v;D~c&8sDvpH8ur#x1P+x^QO#l?Y#
zhF(5yQ$hUlWsPk6X+}D-S7@#BjtE)KA{|Y_4m)5l)>H0#QW^UnmO_a84$)-J3gtLJ
zZ|JmcSW+);gbhcYvi$=uZdY6#fwNbwb==^>5?*!bdnPnFR`QV({^roBe&ULxvLWf4
zGC&$=S4zP+8z;h+EirKVI8WEi)rbRPE^4|Nx~ZDEctj~puq`L2vWh=GdCFfWaQB4G
zf61TYOJ{R7c(w?{hd)IRcgljuA7aTu#`Ma*%(`a%#;T@R;05Y2yFMCf^2(QpBdIpm
z0QFBzYZauVZ&*#H3Y&=3Z{&hj>$;Y=zK4$~vfhC~lZ53<+JhTv2Oao*QgRfI5}j3D
zB&Es!5<0Qxe4aM@{C!<0wFXTr@>RfzIjy<q^7X{zBfaG&b>m|?o_B&o9L#qoYf`^F
z9!{F2yEw#kwO#IrSTZ2Y;^o5MKmB9<P{4$vBbY%+7Cpxxdj3JpD#PHu3ocjjjAIOg
zk^U$VqQpe3+f26!%VHx85|lIavy*AZh0$IxX;`a5tbJtnYzWu9$A>m&>40qW<}1#@
zaF>vLA0uPPm+t(ygGR-^?!If^J7!yOq_I8Sj+?sTjH|<paS=~;Tiu$z0>W5BXZj|<
z8ThqdMr>RmQW{I#M*&?vRoP)9TWJH@Om~Ti{gYZy{lr;LY0bp@`u?&%al&noY~KN+
zl}GkNqslvQeQu5y4!Iqe`Ba<eJ+@aRJsGwipv(6>_);+oJQ@H)ZB}e9Iwk2U!u_W+
z@K$U1{!k`Xu8Yh!J%_!wk&}z=yX+z52u^_vSk*O)?`e3ZbEDxERI*dkrig!KV{7Os
zce8EMj>98WcTH#WG3{mOSl=-6^!UORQn$Tg9GV!!n9G}U&ZJ<@F7l}&62UE=`U~)%
z=nn<(&+Q*r&VM}l{~n<KGx|H5IypPhtExZ(0RO|tiTD{V?$7{$AWwh*03awoZ~w1@
z{Ii6h`oDty^XfkX?cWE{*SEB@bk^7Z&kFYcfJG%HQ1D=pB2f7`F@b`iv9+PODb&B0
zsT%t+-)j&604w<a2fq4$Kd6zZow0?jp`$h9|8kaMbzAuZ4ur35eZ!;fBDd;|28YU!
z3rU(0x+)3+Eland{88D><=w%AD}W`d@AsI4P7ao<Ksx-tOt~IrAJOJKni7bGqN%}J
z5H$!<%7m;^5sT5ca(8Z+sXZ)FmT%hCs-=w#&u5<BcsohrNHjmqc_zC1-hgZ7)aUug
z{cqX9@XA#9<(*`SgDT391UV0DXH$2VALou(-tFy>baM3kGt=GRNegRhOZ*nm+W~T;
z5lW{bg9tboPM%%M)CsRfzVgCbyqC@hJczn^1O|``g1gk?=)8{-?qY_xsBsDVIuN~Y
zZnjv*r_N+h+5m1eYyA)i%BM;sO{rNY?9O1?!de89E@a3>>Wc-$_5S<@Cd5OqjY5B7
z%e82UL8x`YOmW<}wW}3(wh~My4|~^yvmLP(M5ueapJe74$j6XY20h(+U8liFY$_tm
z_JV;{=88v?j+sZXBt#25B{(lYRbk@8aIt@nx7B>{Rb!OaeV|t$!sSe%jMEkA+o$I9
z4_hqOo5+s>i&7<_l#z0gW3yFCBN%Kr3KJKBG>C(W22v$M#1NGT5fK~zxUtEUX0#mc
z#ST=1Izw>E6Lgalv31wV)Rs_loM%neg&Ot6R5%+JZnH?2i=Y!}7U&#z?b@>&%7XjC
zZf%t?ts69MuaKVt#=OcZujVQ`A?zfo1TE)IvK5@LK&mq)!21Hp79c{~NMH#pMAW@e
zDByxiMKy^Mr_juGzZkzFj~|YlmWH7V4U4@0@!cVMiN9NIMw>v8igPN{@~=x63MPe<
zHC13_z*sc8z5c+=o10~U7k_L;RIDi@Zla3<<8t*X)xSeS<7H9sK!%;Kh>C4(R_|Gr
zNtc9l;RhBXi*_;#r0LvyAHi6uHfUGN9Cjb*wR7p-k^>%?h{HTUOaVP3k+nq1sK_;b
z?+{q?VwG9SA=ORS0Z{MWm^twRZRlCCP4c83|8v`Jmlf6?kGbh_&k;e!g67Ce2&s9T
zQxEwy&d|kt0OEt1D4vTgh8j{L6%>FS2FP6xemPVEVgHb2Ieb^2Y~s_G>^dHC8;V_P
zYr065JvRAAo<r8VzawkO%S(MM&un*h9>WuQmxW%iQr`%>I;Nw7=79>;!HU1X{1ZP5
zUb(FJ?%0F@i_KX}RVOBhEe2Tqz$a_?olQ?xM*Hw;KghKncw0`w#~Ve>0r2pxa`nie
z`7o!K1w3uvs!vCFT)0P=e|^KPXIl@*wzT_xL1VFBD|JY>^<8RWgUQ>yOR5+{>612-
z_i)<`d#dU*azp2){kV0oZlVY?n)C!65yL;L;=uOK((a1Pb&JGp@5aWz_PH6`EbRMS
zqjTwf8a+hsq4VZ;$EK5;x4U!Qi@yDtzQMwC1N%<8(Gh}DO1_#2H=>jTUo_ogiZMwF
zA&)WH0U^DQ1fp}mC=b`rIf?WwOLGTgQy7K*SFJ_}dGz@ZaWJ^FB$3?j_&@xM=tOZ=
zs)d1nclJ*{_rmy%7vYsz*jf2^ZEC<crR<h|Oym*Z^93ay?QL^=aOcg9ws~&LKl$X8
zr~AD}LiY7A9r!9rl>`Lt_RKof_8{g?K42gIa+`eJ|8;j|Q)6<@!D@Dc2=)jL;^`Mm
zDM6H5acyx4YnhwnQR^P96?PvR5~0KIFf(y7yBRJ6XBIAZ<8E)X^x~v&{Q~}X{4B(#
zkohNktNg@Jynn}NBM)a&eJ5u}OFMJ@e<JGt5=BE)CStP~P_`aWTgSjEAgyL;DkRpL
zWT{=|)T~Of6iqoDAVr2to=K7U%6I1QFIYi@=0^z7o}V~J_(tM5Y4qp_ST)OEsNGk)
z^*f(a2wF>k2=7C&qJ#uAmxhr>bC@d#QGECzrk}*pL{2SEl{F1#qZY3|3He3~QM@Ey
zNj3y8h*$U?iy<3YLS?KL{T)-9NSF>u_d$-yozxrp)4<d)0Q_Qk(B&H{P@yze0+*DX
zui>&M7268(Cwa~&AOck+Pj+RcUsGtp>P?#+>QLXewa*x|T>3GOcjNKSXY<ErhU~<y
zhgqw>d0jAKem2}i%>#a$05l&I1wf6Rp^i#DhcCmsDy8?qISqs_TEtM^A@Nohp%>=D
z^SVOS!==qVC5E1^EG&9Z7Geuo?)*aW!^k{dgx^k&fiGEHQHrJG+=iOoTR6Eg8^ZIt
z5UWHZxlHes;{_haxec~l$gta!mJXSVA1nmR_3lU3zuVIo+WNymer?1zp4H7Oxk;PB
zktCI|R-EcyKSl}7-CO&+5n5i0T6<<lx%0%Q^dn0JHq_1|Nb)OC(hljsDKwRhAFEs_
zWIbv#!2YpOWTr2PlIIP7b2WMiU*E@EjQ)CKW;bc%9dW1mE%D?7#X|Ef%wP%-N^wjH
zn(?C5rIWOK-0~ajzb4Z^S;L<7?v5!k0Kkb3006<iCzG*_rKz2>zLTk=tEuC^rqlm`
z+q>eicG}=b-1(%gyI0L|uB;;=Z*)uHwa*#(g>;g*ifV1f#G0g}h(be{4?zd0HK)7f
zw`0d50YpNoD3_hlg{6vz1i^y!c@5TU{Cv8DS1&Ow+TcF!o6h6+h`;LSoHd5ZLZ0cj
zT%XQZFJ)R1m8zN^44sR+wk#_0sqRrFnmHCu4SE|XxU#79sP;zlr2WWrKxm}Eau_k2
zN$n%zurQSrCx`-_YcbTwor@a(E)K(Yay%B}!DLd~-*L+|*^j!yqb%&&H7A{+LJJIj
zEPndq`;}QcAPe{a`pJ_f*-uPsa!&-`_!bYziW)I*h;&!piQAw*wb|0jz1J9ZV$R>0
zD^pv$Et{>+FR!Q9m7~|w;rZR@ahL~qILnmyOR{b%NgZ92tTF#BlDdI^iE8rEc{;#w
z6VSg&sVS2p`al!IC(}q9@b;cG&e-FR)WNf0D#$iQ!PV1@yq>In&Zj$D7hhH<PJ_i5
zpsNZJxI!eSeAs2F!f9o)d4B3z9wW4GwZepZvboge9pyI;kWwcb`L7?a)hNt9JJu~W
z4leH08!a#z0Hls3MV`!=x!>)gfB7LzZ^nC1y4uI1!(12P1+Qn%-;b?V6Vq;S$`5sI
zd)7gkVY#9}YyrAsWa3tHOROjg5vxbOSB(nNDyxd2$cc}a$cLWC3P8Iw>_H!)vKtK|
z$gLZ5Qxh0CA$P=2ItSlh-M|-WP+^u_%@EHM9$1w;K6t~C@EaoYd=)XEo_Wjj!qSn(
z&tIQj&zF&1U72+!>Nk2iGOLG?pD#xjD>nD{fdRVQJ#Bvs(`Ke-#)hUqJoPuzzu5V+
zf9vb;F#C&C+VRjTN~~G#G5npi?|V86XlG%lbEo;KVHoE60Br%-kGas#!p>3WQEvlo
z>(MOqI$ti(8<$_bwBxo;VXk<j)}ff(^jLM{nclG{^%K5Rh&pnO-3t*UQ0P}-RiDE6
zfP^z}ZK%MkLEy~<NW6Zkkr-fQhY_HMTQ3Pj_qO;(kuotRiH_XEC48fiox;jpfyq~w
zI6?=Ky7aUJtXA`C*R7FXk4y%$F_))Jf^kzp1iGqHoT&zjT4HYHu<dv8fzg%PO|q{R
z80tf$erDvra9N%a+Egx<ZIl70^d-Dgz@-$pUt2vRS`_@9|FCBioGe8!g67S6Mvb!v
zN5%NCJk$qlFDz{6*&oaH&p;F=8@YWM#Ppa{+asSALbD|bXExWV{mHowcB9SIg-dJo
zpoZKF?xQ<&H6VJBBhKV;gn@rIDsT=YCqp)+6SjI7tyV+Pg^c~6Ie5`A-9m$B8Dj~Z
zjI@B%3^X}qnCQJme+gjx?)R)%PO5O(6wBfN-B7VbG<$*A7Y>*%!!Zm++Xpp8?*CD2
zhEIoU_QaP=<Be#>9N+9z@B{P)s#u~|Y60-e2_ti$cngNNlhP~a^78812BEZfMo9#Q
zD-SR_n6ntP&_ooG#A22*f?!kzCG{U8LtS-ZX5rC52nFph8D0how>{Kh6ui`rc*CKf
zw3ntxx2~5mWTXZgY1E6fkOY9iB9MVRNRQ1gcPpXZ6CpJvurzVX=%ryP1-fckv)ird
z{^<UxS5ot^VyeXJ4h7rQ+|^t{`+m^0@mn1Ct(ui!<D?L;09_ip`DsDroz%@qYWv9|
zlKlJ)9l~#q***tqo_x}&xyXsG3&Obn;W+SuzxpI2cc9D%j<0tf7*w=JoV@I9KH4S7
zjs`n2{IURGy6l&6m3|a?64e5aMaghQ+KeAU8btGAAb3}r24({k>@Bq+<Xn%IIgUDt
zpK_!Lb`bYcg)##<1clLngfvFUM&2YE(BUdece5-)q)F-BZuC~X%?^HHLtz{zg!;`Y
z)t1V)uAova#7Au?jPs-=;$3H&D&m?!hBAISIdl?UT84U6;Ag)!oH4H8h?4EiZk6)f
z7ofy<OS=5vF;q&SpS^7Ia<D~pYkLOj!zo^0CCTD@-OnM;(Bfxcly*X-Oa|N|wJ-bK
zF;~VpZ2_FvS)Lr)9dhbeM9HW_hRE5f4FW%G)hxtT{bLl15v0f6M4~?h5Zl>2-vxE@
zLHWKj_x%*8olz~c#eRYD>h3u3Qc<=<QdqcP!Sk1F^2L)CC~CiP$;^HHQ)t|0-KWe9
zJB4jM0FKloLkIf=*a`tEoa(fr87NADBo;2Cev}n-8`KjLRxgm++A#XPk(kgXoNp*=
zpcw}fW=@8UoHh53ViNTZ*WArp_3V1tprlZPO3pJ%0&?WsrQeVgP1<%CLnP92DiGSj
z3~pwTQ-a{UXna{gEzWTeCfx|lgdh`j##>`}s9*v#!eliGVr_&p1fw`V88S^LOrzDx
z&Zvu#RCYp>2)C}1i=IM6(_H8QDlDJS10u5B&NN1s0+c}8D6AG62S)2w0ajRDEw-F9
zT{v$7K8D;T^oHbRsC4NRuhHqS|IR&*QGnRHQ8BYNkYa|(J+S2wHeRVI!=RX%T$|Za
zaoKJ8YvMErnQ|$C^KE=|u^^l8kd;wh#~bWD)uF)@*6ZZH59Y!^yG)?`nc}qu@{^r>
z;2_q7!_2(Q*Ch($SS!%H@UfuWpEkWaCFk_DO>89AZl1$y$MTJ0n(g#iNa8SPTivBV
zH24F@n?x&qI)Q0T+A>B)v;(^y=~sHbA^1eNV8K0EL3$g=T{6I*0GBOR=|(_Pp-J88
z3P-%*+y#S+dfCnMRa05Gbw<mtx=<&3Cqiv9o16)!nPWrdz|h=$x~@Q2%sL7{Y8ued
zP(T+fO2(6JU=IfP`#N8N^(`@h#Vv0ec44NjQ=Cfl_{(tLs6OgGP)rWHKY#I5s5g=+
zMjeTp_-;4AE^Yh<_42HgPMk8x|L)%ANcEHi5)*H(6_+iWLOCcHG*HWq!3`WSb)?YV
zMlvixP+w#<;Cv`DhNc-D9PdA;nu`EjOM8G0E~`j-W2)^EaQ87<7v^$=6;cPcZGugr
zZZoO7IYbNarm(4!=@670=hf5tyB7~M1RE;iUOrO8XJcKlU+5dmPiSJXbtR~%K50~R
z&LMA1CG#*aN>s@X=|l<nT;e$)i+2c1S-5b2c1_9Bm{5~R3%C#4qJ^0v23HrGRN^@F
zEt+Ul&gWeX#Y&0&Zes(mK`RdL8$N7RsBZXB^<ih+7hVHEjd4_conI@d2nuahC=Q-=
zvvqX|HJxp6XeE~cl&zpr;!;E?YULRm%4$DCfQjp?2u2U{0E&YoJy$WwOYt?>CL`R9
zQt*+@Nqd|zM%=rHZOBSZoD)tCYw;|rz53lE1Rc%^JHldebwq+a631O4Uabc%<gry-
z9C0*<zp)E;dljJ?Fy7P3@$k%ZazVI|ezUv#Wl{-G3-5FleKSKFskZqkG?^Cm)WYMN
zVbPuE|3^CkCxLv3IHloS{Q;u{g<sX(qH=YdfxHdRlQ^h^hvU*Fg59k{?n7WZ?!5yg
zTv<;klRcT9;hXhfdDK*%BF#mibpt)<%J{FnDG(mS_mjG;Fz=ZyTqvejDDEYB#?XTv
zb@*MF*mkMN<M{}?3){|woU6IF`tkdCoz-#~Tp8zLA+7&2yINz?RzFqYej23J2S4bR
z)~9+>th)q&w^RefcCaUAaAWG0zy}YW%j@NM?4}Y>QQIC(3!?~7nFZP2&W1(}BU-<Z
zz|Cy7sh`f3W!tm#T%grcql|B$0%CuJ_E4<+my(leyj>HNF*G7R?^@Tynje#h!%M;-
zHtgK=`EMbJtUh?}DYgZbND*-8nw|Tn)S{1c45N96=Ww6FI?NY|35sMnxYrNNgIMhT
zW|nnZ7%b>V`QTRSz*d~yJ@?i1DV$>{B*GvI%)s;^DtB;3s-OG^dupb<p53B~Ye<<y
zNcn7pPCsMG9T&DErx<L!1_Qjn;ARCOKXNEj>D*yL@GaGo<~w^nKVv~(8~yi--b8e5
zx`b-CkrSryV3mU`0jcny+@3>@a3VW;xdNgUZ(N8Sr56iIsmm}B@wY&irn>(hL3~`N
zIZFnNP4S!UIYPIL50EDQiDvI#UmMhfsgb2o;c!YV6h$wa`Yuo^@^=vZUm&1y*ZRUM
zzQ{m$*VJ(@H=OGMcT$X%ylY|n7g7%RgQJ@_FaEX*H@8|Z5r5;>4$s$6y@Ey-C;OPW
zw1dT`$GwmcR03Iw2(MeFv<b3@qyoQ$@fk3v6R9D$8#^@y)ChKdt=!M!)v)>;`Dh53
z2Yu}dqT@L?a+D;lGBAy(WUvmAW%N=lGrfzPQvLzDc|FM5Wp9$u?GL0)Nzm;J;KqaJ
z?B6{{TIA~xE=3ICt^QlOUB{CaM44!LNj%$!#}U)~?*IMQnhCFKt#j?;Y~`@F%gkUl
z{jq4Fimmq1QZ<)zy6(qvQIN`X6Na1$mcg)=k~aJsFxOwZ1cEvh0eOaEj5apLLt71`
z3`f#s1&m(~FPM=0-QwXD+9GH~@v*&ZThIKS+0K$;T$w<8&#hmC^J(227N&WD0ye#|
ziN`MaE<Nly8SW6JZ2<Alh|XOjbxxh;HIbFrNUewa$k`)-T2o5XlKgL-lH+H0H;l~`
z-%I|rF`s=hBKw8dJ*Qdc+yDdFDCK27GS-gAAy?vzb>aR}F{|gJk!#uUKo*@1hClqh
z_&d-}s||+x>u_8sL~<u$BcCNAxm!S-r?XrxLa2Mc`;0SIm+hEI=8HyWnfF*{Tx?wT
ztMc+e&`27=dvzFT+32g{mv`NXqOi%foxG5`NKW%f+p&z{9W7zSc>TjNe<@Vh6Oh%2
zTrY&Q|NOj~D%Y-r_zyoi!S?lYdJH$$*Gi)CK^yFfVARoVg04tFh&KC8OXCU#mZmQP
z)H2##%O-NIjg8YlMZ?8WF2=7vH3DrpF>N=?=2+ZCHCqZyS*I8Gv-o9HM&fBeRF78i
zsYdlIY=ARdN^i!)=tPqK7=3abdSlBwmr^&zUD$5p4DVf<`&(#(xz`;n$9-X&T2@1r
zj{uQZvJ8CAfzRAJQv$UCA^de;#{gc)lP@l(3t}$n?2bOVjo)qCewR<R5vlu`N9Bm+
z4rE68>}1IlA3}<@)HE=*AW?trdlepzK0sG8HOxMFSCHRFTaBBk>iK;;{k(0dlTJ~3
zwJbIVgH~$uSD0Kke_gi4wknAF3AD>VMbSo3Hv8WJ9V`@m-Z6HB%BTb?GQTHp9Jn%I
zq_><ie3|q+SsE>$_fCS+1!95~E?L9MT?kqaWX6Po5(Zp=5yf{&O9-Ppg0rS^nbDg@
zmL=^GRE2v!Z8u|eIf9`eon~9*m3USF3E9wvwzpqNS65&%3=Bj9xx9y7tjk7`^**;d
zyp>a|uxa|7?9*f*%l1Ycpt9sQ`4a@l<vRqNAY1kz!5hw9;cEgL7D&mbwN{s{BiYl9
z6VB5`!w0h21udf#AfpfsU5l&|iA_^x3m=Y0qR1aZ$r_e*CRx17EbXpNgkG;nL+LFr
zEZCshMdH$Nb|5pUKVw<uU^4ljJaFR$@gC*nW!ENmaYW8vWa(JR_noj8_;?i;w*`()
z_ay@iR%uY|iuU1OlTl2T4OROPiozy#HWT_`WDx>($b6uAK`mnxy6Cp^s1C$4G2TE+
zNyrlVmO2^D?kY^<2(JS|%%Qn!pu=o%84h)M^usKx4>r3KxAl?hIvS)g)*GUOq)n+n
z&3pzZ0Dz?^xM!D}-h*trkrtwL?7x(0r>4>!0FAa0#~TL`yuu-f&kH3dtDY%OiV7?c
zKh3bH1R1)7E`^k@Rc_Y^<FqUo^~9{gn`|!4E*@WlR2!0=*UP(9L$^Mm9eqB8CbT6L
znOc=@C}=xuU2#1_Ep|h<R41RZoWTLwFt5dpne2?*X(xs;dyLr9dGx13#J~mZ8&-fV
z<si3phxkF9Trup5BO-Bf{T-}DVYbO}6_;zi!g8gxJ<W#uO8EQsn_sEt=e||G^If}W
zr;_Q89j?!}VZOG~5MLHTSW9(et1Y+UY^ax4H{t~NcHZwl;Rtig7xre_7=zt-H5%Wc
zFbCgay05~im}}BN9`$zG2F)+9m2lr4^x@y6ovRaSzI`c-e;w;!Gg6QO$9`Nfw44aQ
zH)#$<&rH59hJZp-<w`9CyB5^ALzaA3Q`KtwWN7mY$`bMrUz)!>D`#t%Dm&M%PzMY0
zw>?$;N^`|7VY`5zbrYVbFCV!D(ONtvuMp3aNxa<@D72}QqF>>uomQ|RBmWPI*+*fy
z<ecq~-ABI-o^WtbCzE=vRpD4EV}}Q*902`Iy0v7wT^h$#$q67?Inw$7Qb^DVM(a#H
zHT^4=O@$PHOL30&lscQCty#3>(8@we<nqe|B%ESW<nay#VqL3DrsDebOE+v3`r(p3
zfO==yThpn+5mwEJO}3w>x&EM8q!t0X0dy34k^=8R{n$d1-_V9vI^ViFt*z1~#7<Sw
z_i@XhUek12U~)*ynd3wE1rSz-y4}t>@c7s3;T+6_QW+s#84a^N7j;cM!D_JGQC>;b
zMciX1#(N?M*QZ>qAmp0$3s~ot(MXiSJJDo-IJ-O;R+Dme7WRlm0L34%iAFs;DJ?Kt
zinfmmT&4U!<CPeoX8sYxLEt@=H3=46_sj1NkMRFhzWYb7OBZ?uKn4Z?Q2%N9!1;Ii
z&dl<s@59pbzm@L(2i0y#eaCu(0m1i0pKudB3lur4KMz4;Y&lGjeYL$*G1%q@Q_zqi
zbuaX?a)a`ne9<h~L065BB<a@ve$Bl#=Bm4*DP@4AL?q=nP2#~%JjavGJq@mQW=$wB
zF<I0N1~-&*P~b}@Do8O!r7M}2d^n^fAzQ)p)9;&nf<<y*C8kjYQp+-{HSh~-s&eq`
z#R67pg^j(}j>M!|5QnSTz{jl<s-e?_=Nq;mMPG;b_K|yHw+eM?)F9o#AklRr`GA8`
zOP&!4X2n=i2^G~`E6*<XFF9PFIqidz7su8#7#CpdSO+NNoJ@a)(qR+2Cy}0D0XFt9
zbTQ|!Sb3?x`Bb~dq_ee^;Bi-R@tfCRI3Oo;`0(;QO3cw&GW5q)D)}jtM&>h!M^1&u
zYzFIy4GRMY^?q_fOup?Lfq+5O@{OcCqt$CSe<ZW#o&Y@Bw)u0E4{N^eZi4R~LF)vl
zK|l$VC_lZvaxN_5iNYX(n^A!YsKK8|rN&MWPP4&qMv(_|=yqI+t6^M-1J6L{+AGcg
zMGYoUU;`}fVexv(5mjzG((RBcfCJkX+u%;fEQ`zR282G=C){Nrp;!q@JIz|;C$2e(
z5m&^U^8><-ARj@pU`~xWfgW9G+nu={2OoN8NcV{3ROX~CKJ7x0#Sn(e&Tb#Fa~Tbe
z^M?u)iO;hZqfS8NP4$WcZ}*2ggGNyVS;q{S#HM~Afkjs@jK*fFJP7P+!K*f#u23cy
zb8YHw)5hxWyyd$VGViQ0(m<h-L-tVtp$vgX5~+(_nqKikXp%gXBsY@KU{$E_QHgmD
zmYAaFvOt1US6xz#mTY5wj1J619Jz!}Sa9h2kNR!$&@03(&aS<k>ETm=Uld*CavLuY
zJ?sv(KG}Qkh&<?`ZRC+Po4Vc^2mMzdtHZo#*P0AoBP=_J*DBreoPHOSwf4~`0jnwf
zC+3?}y?r5np=#zV(!5waUx|DknY;>;)XsKS)-P-`!)#LQ3k17|THJfhUj4KT?D(`M
z(IJXW0){DQZ>@urTFttxL(NbzzyF))LSs|Y`~GvG^`im+VEy|-v$XqBTwH8_YUQRT
z|BK+Vr6KL~(`MEEpl+}Uonq*5KE}*P(lSpaOC!IJGY<_$l*VM6XaF!8`Q;-3Kmv%6
zm?9<1y%cTAgVwRb7ogf_5yPaV8Xu9!hLhqVogu*Ky(M5mrcEFZ6Iu4VwPOcwAyrZE
zNK%AD>7eY?y}YZ6DjyMNmANk07>Drfj~e&y#c5VsELyf-%gB$hJjh*%c^x_Kc`@4=
znoM}4uzJXV+6@mT@fBJLmt)7z9b_``n2(!NkFj%T>8qrX0ceyf-B;HX)x*MU2E-;h
zjeU^2fu&mr=Aj78qioS>GyZ$n<r`3*i()!!RfVVMj~PKNd`KwOhcCduiQnUrfOBDQ
zVMinMuW?8BGxDcWNkfgNe-A#MK0k)FJUZHZK7H@f8qq{KQNsJ$4}mbINjBhIWz^#u
zrHfl}4C&H&h<ldWTT0D-C$@x;#lfa*HzGcPkm)GBmEsz?lMfUTJyDTcOwi5wvyA#K
z_f1pvb84|Tx+qK63i4^u;Dsqcx|A#+9W+pa4l{;TNGJ%7pP$o~b707J!0$+;jTp8-
zp2JPg2gr{Yvf<=lDJFDiRM$mS*9#&DyhYZrrhV)2+IA>nR3J}c9gdEi+%oL~My~tw
z<=0Ic$g=J`xS3{CNXSXW;I+ax1t(yU0QfG|%amTQsnr|CkXuJOk5cQJMZ<QfYWJ3f
ziZ5R5EyJoKqBobLSyomAtv}GDag@FkwsQfjwuXzh?|ATb`&?hV@Pa>!-o_sTw<p_x
z42eRj=Dvb5=aP!o`-fIh^|CG|UpdHPHH6Y3i%(4m;oGJ7Z;K_0%(ph#44QIm+JF~d
z4M&XwF6gyHbkF}l0=ug0+$M&uc=ftH*n7H1qO!X3*P~o6{Il?;F9pW9DlpvBGSnO8
zYQfeV{=DV0T)(Os@HooAknBxnf`Sz5Adu}_YQdlgcEBrMO~o*wqoYqO74pQr0vpB`
zF4;-2*x?~ubZ;k;wGQ4jojAae!^y`lY+*!p&(S8OIEm&zadA~b>FK(blIxXzY2W>p
zBcu#4lP4$wz?_x^lYChQQo^6CJ~L*H*EwcV+7vRn<bl>ziZu9kF|l#_$t?)Lb0^8g
zwv!M32-C0D4DKB%3;&Y0c2*0yw4-v>s$A&ZsPffA!%gEc#FsPHcS%mF9P31^dP>6&
z-LrW>8f{-o&XE~*l_hb-BdagHtf%B~ox9`UVLiHB{<yTWGk6!akM)~s7NpN4OuaE8
zopHA>Qib{RSbu$HtIB*~4icv0a^qu<Q{Y(+r%mQ~zC6~v@-}pQneWhssQT>Ep+LR<
z-SSPY)_nL84Yyc6txWrY_pn+2sejJFTkYGn^*Hlw?=^Mv?{youJwPi02mqiA0ssK_
z-{Hi{(A7}i(9zM*<6oNF|FCe!sL9%|3m|m8t5<n9kWK(49U@RFDN9tUS<&0dBWlSS
z0Ek~ScXYrV?YJ%LSK(s!$K<?hce3Lxzjoiyq6YTSPLRF>Mv}54N}D}oocILJ<2bs^
z@C7k}qon7oLOoLuF6GKk=8Q8Bztyl53r%ZgqW)p3N<JgoAwn+~1Qim8(Q~Gg3hESd
z*9&5?4BgH!g|VViYZy8^X7vqW-}Q2I3s$cD%9-w77KfW>a)F<@Qj~_`5yQ8ofEe+M
zM+M?U0i{>z7OWPYd0+!-GK-VyJ7Z@IT9&uzp%pZC*N?FIHNuj~FPf)#nK>Yp(>)C8
zm2QU|jJrw}*DOF2sOgRUEV@1jZ>K@z_6ZxPbtKMd7Z94?pn^n;fgbym&!Ro?o9*y%
zz9e(JP7`?+%(Yn&%Vl3QR|@%N?ql#k8P+*7U?~f^ajvdcU#!?s-wlf(iSIs(MOfp=
zKNp|s;jtA2k%ov&0nDZF7U*^ukjya23+0R+4VvGolb`$ox}X)7#v`{DcPv)Q!yDih
zoDotsr2`s`=FAxFdYD#%=l(n!tNH2uheJSKezI^!H|2<z^E6|-L0WN>F{fIfyZ3Zs
zUnf&*uPHO+G+`xKQvVDT4m#?iQ>$DnoYgUM4g;P8+5X*cI6|$nqCRNx7RrB?kl>n1
z7t^$6yfQ@kdgb-{V1o;nJ4i*ZKlqxos#bgkxbj5!G48B0x9y<I^+pmxA~!tG8%eVw
zI}AwKkS*9tPZV}1ZEa4ya942;;7yucxH@qh&yQ0*gDKs@<BtW~lHc!4r+p<{Tf{Z-
zB$QL`Lbmf~fQ;#hBEfXE)waEm9}bD>y>&0ZnPY|^CHvi_;QGi~{w(+V|H(^pDh{NG
zWFt}5e|pnyIsSjL@_)>4V?!Goqo3}ue@*d<pZ(DPjQ`f5cJ+VvM0@#yR&Ii0Ua;Df
zc7e8=8{m%EE?8&gJ-d2Y00*R=YDAD&QC5_@)qj767U30<Ot@y+d0t_H8bu9w%s7~p
zX{EX_7=k$M6Jq86nw(9d!+Hk)3<}bB7*|i`6&*>xLxq%R6C;0(6j_yrqgib;VG)UK
zl<^o3KOo?braYjC@lNv`g!GoffJ>z7j|v8(B##Z`P~g_Z`gI=x{zbHt!3+5d#3Vfs
zYQg`Vv5-y@iNAeZkbW^K=+ZoqjtQeVs#btPKIr!Z_!`jwou<LdkKJ?z{;x1^M|dej
z4syw9%T8bKEhkG1<6L_~j&`E;bJnl$9O~HvTD=p7XTZQ2X$_AWwDwaYSgIKRH~9-_
zsUW({T>^xvNr-oeRZueylDw88>KC$LZaht+*?<GHcsN&GB&V=<!hFgb3mJbX4i`%W
z3d_&eVgi2CXeL;>@Kg*M?)`+ZmmxpA9%sGSo!FeHyAM%xQg_L0lN86eZLwQs0+MSY
zH&K651|1Qd>xD8W2ZuV~71thi3_iqOfN&%@Jg~nbhByGQ<xm|DD&=`~HuAd-I}@e*
z#*lzjbPc}F#;?kItsqH6=A%j5Dn6Jazjiw`>TGl~y+sxwC0`9LKZv~^GjkJeWkQ7{
zh_?G3XmZ8@30ho@sJ!3u)78FsQL}X*#I)Mjp141hI&(+jgE>qwgPd+3T;*b)_YQSm
z&&CWeuBVnLNw`{qS(59UBS>*Z<c5P`cf@NgMVOx9<~rPNX2&=^efY#@rgwOLPtSee
ze7-*}QM33lilW<3lP|2n<nSf&pDe85<drf*^1r(<Wyr~s5cx$AjU(pyY1Ki?=pe?5
z#b6~Wkof!5fyaoAMpB4GH~;c77FnvN;7_%q>2hc8kHMp72Wa{5u3H*T_`9Gu!5cBC
z)?<P<(XF1O`J9f(u}g#!6yjTsqu6ic0kzd<_Di-9)j$-Xh)Pudo#fWP?X~-n_*TT0
z@me?N@mcMJvj1YcvqW$#=GW&Nmp$s`X+8+|U+=z60CGrAVtj#yk|=9`kY6O(@$0|5
zDMWt0vP9iw0q8SMckI|Hri$txX~~AV%?b5@&kySe$fEQrhNSSO7))lg7MNLcz@bhk
z=m4u(N_7|+=oKn7h`Kchwo};O7$}7J+Y#G&M~ykKIgoMPCElQ@#wqZcIP1%hs7oeA
z234_rpBE^xVP;E_yXtl0>jgLqCMa;EaxWlN&)uG46f^0w5Sy^E&=(E2m}daSGY`ea
zsIki91#-OgWjZ4_1s8Pfb54pS#uS0bW5wn9!X&X262HQMo=gSo@mVczPfPc1CBI7x
zb(^aDwq*(56l*B=Ig7WuJ<G7i#QF0s-i@si$+AO7tjo$DmbmOAn?S|~u)Dd_89SFb
zSYm<hG{axPBp}m|l0O=YU_5CxM0|_#nGq#OcqQ`x4`c7xrdiN#*`}>Z+p4r}+qP}n
z&aCv4wry0}wr$(isonjh-_v_vSO15I6>CO}Id0N>8}KhsY?ln07Ro>Gk}5YAR6hY+
zvdN-9ChP@oF_;X=W#zB3*pbQQ?Od{zfS57TG?L6_x~Tkvd6i1Gml#^IAEuYlKfNij
zBOyo}zyhC)7^m<&IgKjP{Q<=pWFbR7hSiB#<?9(=#p67R;)jRcjE~BGr4XtOz_*;x
zWAw3|_bc8z9jHgC`V@sQ190-$@OPR42=+06ib}#PLZzLmc`8d|ZL_xa8>y%^k_az7
zmwC7ChL^!4=#&%Ve9s(VRp*Wdy<+N>P7N$QFSBE(c`>)1Xb9g|7pB_8Fqe>&=%>Qz
zVnee~sUXTC0O@Z6^rb<(mSey!K6i1A;2(!>DD`G$na2`kN)Pe_<@1u9K{Zk+3u~)p
zJ*arZIs0Vr9Po7VJ-95^4=3GuPC!7ZK$RSM-vtI2nP-fv;=DuF+ti5W^pp?y#I1aR
zcaw0j(e>+_@T2)pEkC5cChx2rlqj-9w#&6wuXL7{TBX}eKcJP{O_*Ipni((`_o@|Z
zhUuuB@=747x1SN5_es*0?Re@o{u+<4OS(@4r6FWNl#Dne_D(uEsN+x|WlcdhY)~ls
zz}T_Cs$jx3ieyoNX-Q+%=Xmj;YJH-1BgS14Y?A5yCLx2^CCu_`iGQDZvUqj07;bSg
zxEVFYBfWZxSy*)c@dXv~xh@(6c@6u;2`0gXEAwTQiq1sEMbY-+<J)w@EU&Bpt=zy?
zgz+zpa0^rZ`e}80BVVqHuX6|4qDy|kw=gNm!!ja>2+8aPttmT<CdlpS(MF?QQrG(D
z?R2+Aj*%Ben*aSMHkx1In}z%%(ji+WxT;$eD#%9D=jvMW0D!1cvL+KHm0TrKg|OO5
zcSu=TmKFoy-Tb2QsSt5)My(Y>mxOCG((3r7EyZaB$UJ~5zSeYXeojdCtskqKh*A<u
zk%;1i^qXM+@VSh}2p!nf-pUKl@7EAQ>zIcNB$o5;MO=Q?<_icVi&vv-zvOC%(^ZC)
z-mlMJi+jQ}5mJQtYo}d(s&QPj=cM;cp@gpg@)6mr6aX2-6dXEV@KM3-LTFM-!%!Ou
zqcpOgEXi=+JHStLp?88xhEk$dN8s6;%1ONAxusGLRbSiLRnI+wP5miC8%0Ctwbqf<
zzg(!@wN8DkUH@EUanuA$Lb@w08>+8z@*<T>7EgiEz(&M3LXJ|h2wwGFH&FU7i_Yt>
z*ASUu)T-%tDsHc?evDJ(DGH2f`-6oxN4`loJt?t?&+<x#QquJlfg2Lf7K-Vuq~qrh
z%ZQq{doj6psyWOLtwz~7nn*AHT$yplyiVb)TH}IaMM*y8x$JVMYumc?Q3ErgER>3k
zSX1hN!n7oGV}-c}7MLH14`@jlZcO+53{^|0E9L~^jN?(sR!fM)5l=S!kFaE5rK{Sg
z;q<$mq7WyvY4&)a<%~<~R40~GOWY8av3;@pvvE$>%vo9Hr&=+yF`dfMk~;L3brWA{
z-<wJRYwp$!(RH5eYDwhx3Q3iL9ipTu|0>}n9{4BU^d_zDh2GW44_Zfgn&s)I8@*aG
z-m}e@)<(Ct<r`bG_Xo7v#Bc44ZanO6twwdcpX*VwcMNPbki(FY=zz3pNmdgpbu6y(
zy&cBXUymuPh%pK_9jg^o5@?!Xhvp%*Wsstd{hH@dmEE+OPf{HXVEKP=WN2TK`ksS}
z@9mOWz7+Ig@)Vd#|Bbzq!H16{l3H#5o4x2+LF6dC#v(KDwPszSv{Wx}Rhh%Y{Wl7O
zEbnL9mKXX{S1eZFeS|LTvhW-9@yZ(jY=wjnW}KIvvGkNG0XW^iY@dBO{X5t!1P&g+
zjdrz$_r`KdG$*RC!fCVvQOMHJd){r7F@aP;{WhO?=I$$i(W)5ow)n6mDd3PitWUKZ
zwsY-K1;wnKq)Npyk@N;|A8@QI&ZUp7y*3M1jr5mG%XyQ{9mQgd4d~2?U?z&MEGmoO
z@!>Fo*G;uwY${@rJ&>ipP{CojOO&B~IJ<=Luj;%}RyI}cs?4|V$@(aT{y5le=sRlo
zi<gOU`k^m<a`P`ORGuBYSM!+k-i5Vp4?kz$3_9<RVBhWUrH%`V?}#8&4E4n{hGAf9
zUpV}O)bT$wPTuWq0?$I-r%EVD%?8naK+giJE1%UTxheqvT*v^uJ^p3p{M4VP(aNVJ
zk<OkQFjVigi$(q11RT7EF^dk*p_79i^RVjyS>L;9b1*eG+&6y-T#7-zLyol{yx%-(
zJfZME;QyHkSFts;SN~lxF(U&3;rut5(8TgTDa`*N5ngF%%U`k~`OVbU^-CAjsBHmt
z;YLD633O#UQ)MrN!w^l7CJpWmmS0^q{B(DdN@tk_7KC=QAVy%#_&5{bIQ3D7IBr*{
z0S!l}!DGNQv&woxq$z?2F|-LBsyQuF^~w=LITfzi$X}<po5(r{qS}nMKt_NR@q6!c
zz&@6O-7(i4cdC8{sl$Vcdg37lVE3WPb6yC3ujYJu{RYDT#R6B7oFQeS*`eb>3XYRp
zFo4epTKwg*?}((+egG?0)LGEW6geHgcpfjhf=)OI@BL)^7c-GMPUdsL#GMLFk!2_a
z2ZEg>m_QKVV|53mn~oWth)~^d&+Q;i&gXfVM8W5Ip0soIX#zBbhNk3OjOJ6b*22vx
z<O3h%m?+4$__+=F-bdl@Blf|ViPsff4)nwguo76W;30RJoJH18&U1C716xD*+p{=s
zTpYr<<s2iAIaa1MO+f$yg9JJ)AG9M}4G&uGBadsWF*4KY9$%3iQ*B<WjI615dUkI4
z+Ob5ktQ^%b>(zc7b*!DsWnN`&1Esn^{$UiH(b9b2z3s8Zm5CCKZxT<!Y{^jw;*6t*
z>R)${Q!(C_elg?Dv;`%`NQ{gY7j9?P+&ZL0ae-pfu2HaxV(Fr%vM5@#6|qYcdm>_X
zJcY5~GrM5~7CJpW8*}b+OJ{t)#7XD{&lS^HTM(*VV&YGTH<84)vq|?3A*;ThlVPlR
z|7Q@b+;yRWmJJjfi@dFyGG^8LWw596P(eBJDqPZS*D&2NUSkF;46-Ykp_*}0i!GN>
z1%#`W?{s2PPfhW(iYU80#Z*E#Kd}mUj7=8!Q4Q`uG5I%3@P^&F>cS4hmwO0bqIWPQ
zP&u-KKEmO6;O3##0*=W===!nj`Z3pLX0^l4^6G+MYrOLp*c+nO>5rJljhMur`@jhs
zM%(I}<)!&<G;W2qtj>xYm>m#VbqV)kxBTRHq6>8V<>O^kwuHC2HlEPtaP$YSZ@kXW
ztNYqagHNj#FW4`eJyn9Kd9TKEE%P{<xll?$j<upB-}y>it>=!?dhU%cdr6Onuu*yN
zGASx1jO{y|u$3P-o6S4#ztr5zGV=`s-Jxdf6D?0B@m>^Gw2l&xaAnzK@HR097s8)e
zNo;!`J}NkV4g~5moR%76)Fhf+5>%1Fb7E{0EqT@*@zlw4JdnW6Lcdr1l@`Z$1P;FS
zT;X#b)XL^vU&l^!FfT~E5T;c3ROAK{v>z7gvE#&ly4+;YQHttd_m07@NpFs(&s=hF
z61S5;5Y`J8D7P^A{eN`=4Sk~=1}IFZDL#XA`v0$@)_>Kqpn(2MGUk^Ctcphn1VmQ>
z1Vr%Pd@RfjT}<8oe^*HVgLCYX*V1`ItYPPgdU&=H=2QU+SgNJa%d0s`X6^o5OVO>t
zv`vAKv&bMAC|H>3A@14zl3?RH9hf0F8p(BV2FHGxJTNEcI;Z>ibdP+^t6OxqZ}7>y
z<M8WfZ<*SsA>VR+SWqH6O-^DrX~jimQZH4bYkLL<$KN^el?%#!Y!v#y`h4y!ku@!>
zFKhLgB{C^~oz3winlMIkzhf4-nU}pJ|3FlCKc=8p+Q`Z|0)E>G*8?AK#BNvIasm2_
zH~&nVzeL}72CtZYD5>p$n;F}w3>vS)c|=-J@1YiF|H3BAFN6Dz)t*tlb5%E_FHP(h
z6p^jxbZ*hD;9!2C!Ds03@7eEsUl*>wmx+68eLGisM?(fbMbrlKDT+*5Wq*yh^ksDA
zZ_)IF(<qc?QZEG|E~k}yXR#I%n7<Q2=!90{Y~NXpVZiW4Sfq~HH0=D@hWmW>Ou#O1
zz`jFb`3rUT#R^SjvKd@bUQO#m1fM=$X`%+`#`Lsw_rbrbCf2`6@}aLJ@MLs%?l$=?
z%I5i9N*)<}$pZb|>~Osyyj(A6R4caoGTA>xQ++w}AAW%gnX$B#(mj!X9%O-JqsaXU
z;J?*5A0d3hO?(=-bG9)roXOJ}ExUSGynHpbf>Z{dYD3`)$PUXZfQ&uKuM21C%+LQk
zei;n48`eV#gwsbvc-bM)Uq3O&NX5mNXUJEETpY)TC2e8GUl6r9Fo1wYN|?uc<y(1W
zk3@mVi5D09T6HdvKxaKw#0>zHIdSXT_27gU0wwmxjaEKBB@ed>zKZeyKL|)qj$|9=
z`R)DApIg;b*eGo^Xy&_3e=QcD!>!sm#^lDq!5Qcq3hx6?0$U?lmh0Ks`HTB++$Y8I
z@Ed>Y&e{n^@Ic|t<-^xR>n(8voWh*7@qQSegr<B?Ft+EbAY>}YvC&30g8jGM&j6n_
zPslO>7OZ9FOyI@E5FbbCIKdQy-}txio?ZT^KQ>tn`&%S*ju`(G-pUG>t`OT8jSIO!
zecUqI`X-4x+B~I1|J}kZ!2P)_Ye)|2I8jpZk=fm@><4O#kXk!#YlRZ$A1lYshsMIk
zo*F?{RO6H+plq`j{Fq~?#SH})KzdK9dwOXPQMbZem91x<stBL|#VS6$ry)b^2;?__
z<#~ISk=RJY2W5$@xGzJ_@cC<&pXjtS1{L-L0|#g5n~BTl@wbZ)-s&&s&h|W_xB>{K
zBL77N4XsoQ?E~=7LKt9t=5S6Rk<CEsT&s>c@cpJr+4~Lqo`e<i!^g`++#KCbtbcRm
zneSf;4~(7O+ytiSrx73qh&Z`S^yh|v9}Px#!*37~Ac9x2%!W<Eef4^KBa?+r)|=Fc
zMFg-fLoG%e(}od#u$3gLeHs)z-_9p7b_vua{;~2pE<IB{*J;b0YB8QD<Z^S0CUCFe
zUqk)QBM8H_1wJIBh|Ibl<dmNTcNP)Jx=#Jrm<|~1^xePP<EHKPSY|U5&uP%(N@ky=
zsz%H8;@(c~Si#*{rkx171Y1>k-<8v>m5#zq0G6A%{f416>WnRtVVhq&?Z#4KE|+<&
zJ;f%-Nc<6;jilymOj7sp{1_n$$bdA`)o7ZRZEkoUs-mEJj!f~(v83>dup{xTFb<t-
zvy#7KiGKU>l1#bu(7EREGm@yrHksV|lxcpW9QfRR<CNV4P_Neml3|kM)2}|v>|qm7
zX1>E5(86hDc`uxfyfc47C=C!G^V^bZCyKscvLPbb_Y#kFaSqdlwM6Xj*dZDm6Nd*X
z7q2a(c8?Yvd7(y?cX1<J4In^BL8<9@NDEEG?6zJr1ct+;fgd%ObK+8m%1xMg<&o3+
z$up78%9Z;i;c_7_<^$m0E<NRC5}qpFoJ)ahq0C%$nX6O95WmfX_@~U!@kBH4-O6qI
zcu-|HA}RW+YfG8=1(UeRR^$!`9n-qb5(&V>`CUlmOnD7y!T%;S<hss{?GBb9tn%m0
zQH9w&Pn$fq_8UPY<<Fj+7*sOc_<Vm((E1^pOu=a764fgF%zvt>OZ>v)ki^Xc4tg8$
zPBj!N=Hu49hS-{3KW!pzAd?#(vk5IntG>i`z93N88eq`eRYJwqLLC@%z&HKNfQJD~
zcIdB-tgdvF&J(aSFo$F~*T@#K$K^>=Vfo9GcfOAt=MXuWOP^{Mv?Vct;^d9mY-2ap
zGQ@6w{mC4{lVHwrKfw}->7ds<E$ZPGm6WeNgtx);o?^7jwxb(oqXl2lejP2vbS4lj
zeki8}4pf#29*k)|**{J~tRkomEt=Oz>&x~jG>SlA5t6D7r<e$&$uKT2<v8TQAk+cb
z7dAc*4w*(xK*1lxchc~bLI`9(b5R8E7YX1Xu2&xdbUO6IYB^1O9_p*AW~%mc3_vf<
zXp98GlVU=oW$w^Fr3!s5-UQc`(V&w$J%o+ie|8p>->1c?c#ffn@E83N`zoK1b`+$y
zcv$+^wJuc1tioWDHe{)9|AXO=7=E;h{A8V-M3?AYKs5u}$HDuMjnA$3zDN6WikF@P
znto^xL_VZbXZt)AdYKA_x{qdP(ZboMWEYi@gt&!7D2Ur)1a<m{-W)3`X4!O{h`$d5
z{b|25VYam3h|O<6OiC5~@65_Un_VekYzGfz-D1UaxF=0ZILV;2O;l)gQvUAy18RQA
z?)yD{r0(?uBTgkGHV+3z*G6d%1(d?wMn)i>TPTzXy0MZqfD0u_v<P??wLW3bz3%G|
z)Sdp@uMv8#zxTpmk9~*0J$br)OHpyqPcB-8zcE~7wlWisBoxc7nhW{b6qQ&xlPUw5
zX3h6yG`w5D+N>b-6_%SxbL_h2xop=NiK>^Jw(#E=elbc=E7$}@1!zYLhX*h?{)C+R
zTz+KRP=Q6VkuTz3`v;J5{_FCK!Aj{sMOiZebD3W8)bD;$T+62|B|Urk#1T1uo#|$j
zpisQIa>>)3KrYeg1`hC7lLOQ2nSYqlY|+O{^r*dFD0^wUtzkJH1xPYce5i8l`<+K{
zQzP|4QBIxjy>xjZ6<7D-Yq1FvKf=w=b6a%TMCR*8UqNhUVP++e5a2}?s8egpcJ)TO
z!Y@i_Z`B!nw+(|eg`i`=xxj-=jutv2$E=B16&%GEOOF3KOmB>@maUnjC9?L=ml}AZ
z$v2%`Crw>p(7Gkq2sdCBV|z1X7FmYTXHsEM##BuEnWf3>b4>)=d}8Lz;S+4HY{-yg
zC?$#_V@yfx>Dhl=7s>P|o?G_UXMwV8g8GDF{)QRxFYQezph`4GI}2<RhoT9R9lEk?
zycWz-mDbHRpDwp|nw&z=$Yv_V{Juxa;hWF#9a06{%ru?5-1j?@V4uxH6V9Z^kx$!A
zxLnk$s1$)gQr@D&$}0L`mz`9PD`Mgt|H4NGfl7REA%GzE>X##(491B%LAyHz27LzY
z57X@jvlNu@m)}&#$tu4fK`SsTCvZoCIjTI}KKYvHBF_JdFvhVr^5Z0=Ri^7!Kr$8U
zv{cF6OkNs01fOYU*4TO`r@-}>LZ%C!e5q2G1;hA<uv`yttX><#dK7G3;>uCIifArs
zb5&?HmM@KxdY=^<E^b*e&rxldPdm?uE%Xw8S}KEAg8!vTOJnJEpZHQg{j6S&a;yo1
z>@y?(D|OgdC7ICfXO|Oh!1*(sF($uojeg7}3X-G948j7Zrp`)z58#jXz{>rsSlI;q
z>t|wCR|Flhlcei!hIV7w7rYYfWd{5YNyd}0<|^NyhL|^*D(&VuFNvqOG95hPDw1#{
z#pRS9jzAK@*s`qhb|a=^ra7Tnt(0{d1dA0+IHjeyXs`=oh`af5G{w@ueFmKn*Kkk}
zH=KgvW*pH~Q116n{y!rcn;!N}w%!$Ho1&MNU~V+Y)7#y9K@S&v&SpFUQS#=-KRx?+
zVU>e-cnde}dX(x&j!Fs8Rfh;lmo=Vg$Dd-ROVt)x#Lsp`olxzf7fmo#DWZEhH?NHO
zky?)w$Rlbl<c-5N4|wiWzq>1Au3w^aVZ;lsZp|{p<xVCYgXb@;7e`2Eu2cyvI`jnb
zD}*_}4-~GanAR9J0TLF=31M_w%u^pWr8SCxc4E=L<TVuoIM@$ZqC<=FML+Q~2Q?Od
z2Mf_+6|Sw_h3=4W<b?p>t0g==UFeG&ojS3DrW9_$&9PYuM}aq%4HacgB>Dnn-Qa^#
zXn<Hq*XP6b618v`l_KwRbxHlre-9mwT9t%;*EM553EKl^Nk*O9BQ+<(6BKD{-@lm^
z8CWh)fjnsoHb>5DjU&XGw<&@5FtIe2kw{-g2*{hp^{_g_;JYyBO_U93>tWI{+Rro>
ztZs;fq`}d4<vo5T-Rgs??g-^UAubRw)AY@hRNhaVt~Oy4RD&h3MhZ_Ur_^Bzp_J_T
zLlU{To;NjF*u5`ARw!Z{4B90TejL~1RmYZ6+GE<Op{|fEB~~p|CSt%HSRpA5=xL|f
zTq%Z@gkn?D#c9<M1w?}5)8Tn)pwvpRO(SpL*FNA`ep(}2Q!E4&x$l(@d7x4$E~~C_
zK}WS}ItuL8FuiwuuBrw(spz8k?t;YMUD((}>U})39r<Ik@1F1)&gaMLRAC){OsPHX
zYq^lra!f2HnHG2v%QKzTCTdn)u|kcg(O#7tS(r5Af2`ZPACClO363kw#c$)LP~jCk
z7FQ}!*~0bR-RiIFWWi4Eb&OTki)`2Gn=8x%>O|vY%@i?Tqk^+&!6aKM!t<r0tX*BH
zvX1m1A%nW@r3K^pFlHhLxVsAJ7Hi9Ss!>n*q~_%mD@0h_DNy2Ck`rPiaJy@V)wzi^
z-P=OcEiTF5$hHQh>_yhd?ra4-j>JWV_svoW)|BPY^%jZsE_BF|`4EC04kIL;2aBNN
z_UvKmQvl%#6#d3>u@}Z528%cftl_#1)wgvs7fv2?n$u$_YeyMjp4g~Kk>a|!RK?bv
zMcX;d#Fw68qLyzJ^H?^Cd#u#RgX85*U{Etvduo<+mG49rM6Rms{+_WtJR_hUvD;rf
zBiTzW%fE}z+$X?!4FK_T!P(|t&nZE%p#E+u&kirzC;$X7gKGVS_mLJ5+w=|kR{bY^
zn;xqjGivKc=MOQP6P4@ge6rvzwK&6<4Yy{HCKz__<sK4Aesz{getHm<C#}zNUQ{|E
zxNDt1lzZ&GU?Pj~NkHE@UVXF>_(f9pWl8|={LZdk^P#&!S*PA<eRD|v`+k-d)_}o~
zK8$k|JRM~bwrUL7W}PENCAb3&cXGQ}Z-EU7Mk}GG7>o^i0O+?F{mOW%$9oBU18t+d
z%}pmIapJc9!;2_BV*NPbi=(L6+n42c6K8)n0y3sQs8d$N{Y<;tp?tlc$6AwcSqevy
zuPc7u3jsluckc8uvEZDQ?FGW4%@|nZb+icQ_m-*Cd6wGCG#q@b5X&YM!-XZreLHjy
z`fLt^jz;g-Jj2IVcIt+j{YHb{qu!G$3>Lku#Uk1y;BhRJ7sf^Xyi$5HdtS3%@pj4g
zbayx#BA^i)UgCFaF{^|l&y}|{8k=+a@IpKMdV`E(=Th{aZGC*T%PRXs<{fS1&O1`0
zbW;7lB)-rEEC`>AL`8)fVV?Il{)`1t*RnC;t9^6{K3;B|0h1b+G|@OxjY87~8G7}j
zH%wDNgJ>o9Y!BJ#Gev(sOf!Nh$nMS6-Y<84#&a8_v1I+|Ko=3k_Ja=skHpNxL+hyw
zs!CKq?#%?|O&_qfrHzjRcT?c(SOX!6PNUxHU!b)d4qrXNJD(i6gUxf9VR0I~`McRc
zCzKI(pi>7-CL?}xRWFOHvUySs#wXDFdHw==z=;a@#kerBYoDe?#7DOshUZRJvt_$+
zHZ0@5<-^WU=eJ&}XQI;^0{<|gpTfwyiwLXj7(Mt<SG8{Vg6dfF`bCe0)X^rf-x~o&
zvI^BWc+8^A^&Hyks@|9)#fH8QlURv-5$OiUe9P&Gp~ys$)C<~*PJ|erwWmw$7b4qN
zjby%0w`mbm&ICSVWZ{}z0*T>SdcIT{W7A3P+JDImex8iXS7R6+KM*Y5(HN$xK&F~2
zkveULMj5e>k&;we@r0z&ut#g_GxSlA3u!=`J7u;aURBgB(q{2nnMMbT>htA%v<|~;
zwpls4ndEphFl>!>=v|zrH`;tmZ(qN*eQ<p(uxAqEvfh*xB2zD8G8@t$>l)r!h{q%r
zH%6ISo%4J>W6CyRJ#2Y7YPl9$`vpgv`HiJ%+s^J!kgY}6!YWsidj)|zz_nc0nln|4
zZNftu*{R&JF`)Il+_XUzj;yPzcw2#O3~Im`H-dZ3bqebb07kq8giaIe+&A?x^!H<k
zZU7D6Z$p5(nw-WZ{pMG7Tv3ryuO`4DC8&WkIzhABPDh^G4tamH!rlT9ge?W3ZcQ*b
zW}unK!4Kd{)9V}aN#sc^hZ9)WSW67-rJu5;@oV&(axp)oB(YL8aJf$hI^a`f^kg#l
zcT6|~Uv7aP>%hK*e>-5b2k*>pTP2b6iFFni0im5%vCaX)mc63tO(8%*KP^MEjuOxc
z&99C@#U5P{pr6OB(LuX;`&T4NFUM76G`|Q3^-JPuB4kwyqY&OZgiM(ZKdJM~MY9aO
zAb<0Hzea|5>~j?OT~cn4XL7Kdre#DCI_W}$5_4395`^`h#1)9G)qv^AMkAY>qfY!g
zHl}ZD1l_iX(7{QdGtF*J=sKc6lNg-5WqsYM3A97Sd}LUB)UZ?ue$A|uIm8BiJ^T?q
zU~@tDqC;;v56EvKA#+(f%fI2QG={z&kA{$9LaQ_#hF_mqQ#p~w2W)_7(sQ^MQYV-G
zdrg^k%^}yQca9)W*-6L)(C?LgzDV4|>SV~@w$Mh^BH@d8GUS-xLhgMXzUq_rNGklM
z!+=d?xjI{lchxc4IDWC*usbV}^H8#tut%WsG;%4G!9f3cTmMt%>4&fGP7%7$8M+@A
zhSzu51U0VHJxG^`kPt)pcRn2@_BKkf9z!0$l?mc01CFFDZ0H_3seJ=QHQ$-1?8vce
zn3c37MSH%~QlR=2x*bQJVCW<Nb06TBF&FV0^lEfvCk1cD5Bu+ev<P8P2fiu0t^w4K
zF}u4gI1_BN8Qd12M_TQ*LD!bIm305B1Ce6Kx+oPy)e;YTv55F#x_&>fwxoaP!sy7t
zZ!)^8|4&{&=MHMGO&TMWX^C#Xqa>7Uo538jDY135A&)b-C8q$2CHfA{9kzL=6ymJF
z7R8+d(@Q;&v7Z$Zq()Zw2FqozCROK<WnbFO5kM?WeZQJRZN$Lp1nFtWr_-cX53Xcs
zMQgFd1RthSf;!zebjWnO<_+yo7LQP3;9YLV>up~()tE^xR_+S%W8?-h8xom-KJeE)
zY~recJ3^F7o{B}UU&2|0t#XP6E$1FFCnBFjWjF+VxCU>My>qnonO>$+7}=)DQ?bnc
zblyIpB)<%$omxez5Rm{OgHkkK$UfHS1CT$f=$B_NaXNt<-`~d;i7a{hJI4gBP9fYu
zn;iob+wPE=M0(_&3BvUnWw{9kN)r8E&BYzodTQd`@ee}ec)#oVqD!g3y$5bKC(r{V
zrG&5tW-{gYen~Gj*)BgzGKfn>5pa@kZ0pLqMRn0)&{4+*lhFKRp#mC6;F?jmO1|)>
z<1nIO!S%=h8m5|<_g;A^Zx>T`XAifMJE#_;e8f3Th=5cW6W`o0fa&zq@nnY@Z6_X9
zzcLnu^e4H6)!Nxc-`!oh>Se2ME@$iT82?pp<HFw2_gS%v{)YBBZTCtRnkUM`9ssz6
zt}LJ#nEz9qJsILPNcX1K9}3RDK;@Hbdhic=P9thFR`YNmv_Tl&L8YcNpWc?!^`OW4
zPd$esD%_gZ?e}xjoAXsddR3EH4rf#Op~V;xq?(4|;7)WZmxgR|{K38ZHhE)vG}x6A
zDuqOt+UFlU2(xGcGxW`5t0eeh)j;xP^h2V;ibknBNnbo4q9KAsV4aIfq77_;`olxC
znj%^{$z1~a4Hv^-i&x2no4<3FTt^#u_fr_l;Og^hLa+iQdXl#(t0G~PYh?z_T!GP3
z6&9?7aLzeQHECgRJ}4ZvsINmU&km@;HSd@i0qlc3LN>j^f~KXx8xpZ2(}{r*zpm3C
zV>giKP@z7)#@&4<Ya)w({Zmg(T)IlhRy!%%nBULCeA|m>ivkiJZ^BJDMshWhI3~9N
ztI};GS-j8kw8zNd$hM?pay}Y?ML$RqGDqT6WH8n_FV;DkL)z#{EP%L=p^>59cw+eO
zs-SwU-bXB%bL5a_Jnl&T?h?aj>XeO4l_e@+R|wrRU_w0-#qwr$s4~oJ>XN&Y*NYqL
ze-EszugYuQ?wNjQ7$TZuq#+!48|C)>gyn8c)n!VZy!0a`{7B@V^}NE5KbAM=ej6=;
zuOHtqg1%{gJj0UMu*?WSm*Kab=dTM_VQfP4m!+zP8*n9Kf5Cn1xc6}TMJ$FN#Wi`S
z=@&>G-g$I#<BuI=V_e+>&6{2ri(Lf=IXqF6GIjf;giTGm_|Cqu=1_1>CDk^jW}04!
zEfH0j$iU8F&yef;vt|jMd`bLZa6RBFy5OBmgcGVm4LXOv+GZN9dLG;kPAn0Iu)Hb0
zY$0?B(Z$P;;QU}|eKth0Kf!mSA+R<Y#eBMpcCry%LoxX#NVTxLJLa%=^2LX=H#P>b
zY?gEi6_T9Lvbu+|X`gwrSvnb=NtW|Hl{vO<d%ETpC(^gEJ5BS!5PtK%QQ+!70Ivo>
zgd8g)fv`PK>xcv?f&%4T0pBixup^gV+~*g<?Upgo`aJpiwDRjuwJH6n{apEZSR%Py
z;?gD<QO6I^I*P=t$gk%cVt}jO9NH-GO}7q!yN$LOVuU@#8XiKzZy)Si0kg>34tl--
zc7pWw@)EE}Yk%A<t+~Q^z>3#Wj#HT^B-~URNp6!%RirIrDp4rK>BUD5$fXCrFQtRL
zA2E&4g*z#1i4$>Ix=N1H8Vc*A)EQ2~|BPY&(~e;uw8T|>%?4c^LiP|AZ@v1IJ$B4`
zKrS@bgwQJ_T6a`s!l?#AeaA6%=Xz#DG`6iPH!j@GKqkRzGa-QNK>)n-*8ZypyD5I#
zaY#LA6fKOGs)-V#W=8ygwyU1U-dp&F%NsO|!_qzn%znEQx?8uF1UHVBKul1{PY=w$
zI$qhG?xLKqyYy~ME#xEWweohW9Bdj9YS*`M>;gxG_B^?9UyJH&z8UP`HNg#qt&z%h
zRj2p7S}SlWN~>c@XYpB+n!C~Gi$rQ%-6U4bG-bo)SWM8{yO73~!|YMk>H(OfJa>!O
zf$$yScU}2t(YXrc)CD&Ni&)`qFyfBd^Mlt%-PuEvpzvP}H8-SWYlB<STfulGJ<%c`
z>g#~j3gOGrpfCQ-LIT<D+iKa~*M~3D3%LO1kFTchFQ*YX!-npj%WZNVu4lkFmOq+Y
zsrlY4?<*JSPy%eN^)&TVdh-_S4Nmy_>^S$f+HJREKU_U-SwcYxZg+0X3{RkDC9ex&
z`Ej8Aw5_?&54ORoN3MCf&UcegDZZPAB}i;Cv}1)ApLgX!-9ENzYwQ~9pZ3g__d(K<
zyYrEenL^IgXHQPJN<k|pROp%qnx&WD7DiwfTG^G`!50p5n2y$mElp8W1f6;j%tj8u
zMes*K%X{oB|5TT^G&;X*09ti1cE+HQT?A#an*$Ihv9!UTF4att<UW0#&dl?FZ6b3A
z8B@Rez=k-dis(U2Iu9n*Mg!1PlkpEU?2=yY>WTrX%cWQzD7##toP(I^ksBo}UU>Al
z)$9RBa4exyoLIT~GVT5N^}<S`1zexeBC0(S_d%ir3BMsQt^(t&8XFATUvo%`49l28
zB$7_O!BQ^kS-=BZISdqS77zQ{jdE27^O1673O#w?E69JnR9+8wDL53PK+)@^8$@Zg
zNYlLlB+<o=t*fvK2xnSI;sKaxE>`(DfX{7_v5NjL9Sjn+(XPCrrgHUBj@YCRjV02H
zkFTqHL;tsA5kE;j4B*9@N3PQ#G<lpRnDXGYZsbT%B}q~Jr+y+?J9M%YN1oTV0qhJD
zf-0(V7-RumFv)U`4@+f<0-<Xp;Y0i$XG6g^B-FN@h(w`L;C7L7VGg?TL~DXBlZ}*o
zbLpO%2e|ozT>wA&BvvPJ6fFV?xhL34*>Juho@za^cRAl3tHR)>G%x~=LfJamcH3E6
zYu)<ORuT||((PfDV%iI%)3}cs(Ks5npsGR@{h-Uj338ib(M<va4il}QV`=7Cb4=;q
zDe_QAL{`VZ1`V?fgZHD`Q@$m1=Z2KZyhD2(P8AM&B4KkJx`+hCd6I%ET=Iix`R%n*
zUasiRlIt`nxkuUbVu!)uCZ$vo#Ew)=dMVy5>jq~dv~iaMBUAN*gWI_hA=~`!ymSPi
zoed?l>eSz^7LqU8iR2TM;a({skBWONq-^EDjA#O?1IxJv%w5RWIZl`VO4piHm7b|r
z!%=RurWvfepH|Jk+bxMLEbHoN%@dE}Tw&FoC!6flQGRrYVX?%NlG@}8iPJ~oc;Ion
zKChU9HdgX5N;QMoK-avl-cH?I>o)3^PuIS(&8Ryr*mlKRo7^u_NnD~Wi%WE@*xI*-
z*60#nV4p<MKsTILEl0%-;XK)R)v2b7Rv(Py)C_+yrvrjmpW4R4N~kK1+L)qLO=<bw
zkw=5xN7Z%a=u#Ffu*k*RiP8jI4y$kT*%j(`i|*kB*6u>V3=LeD1S~3>;aJMhwZ@})
zsc;aWR>_T@<fgJLXztDaHM8@$O%4F?{L@-rNO-C!qhHJS)mE2}IF{jOb>vA{+NPk!
zlin&g>VN9nMSVeNn#Qz9z8F1JV-}SUJ{}csw>HS_%YFZ0t`FZH7BQ?YvTb|q8z7~u
zs{GOp>mZ14YjCBQdp0SFrfiv^AWN6M`?RXB82^bVvAw7V-TzY$+QI!_b<_Wa^;?+Q
zIQ$3F|35HFODTPJ8w_w^H=Z!WU6i5*d7^<J;7*uf=v_7)mvjtl2b2I>!NjD$^7@i)
zO5Cz7xGlp=vh(s$pE6Q<n5sZ0(c`LN9FU3@rnoA_5~^Z4Fyau!*&x%^i2#yqO0mK@
z;<WR3I{-(+G&!{n$;{jwgf)JO+w^+#?^$$M+Q`CX9UMSn^R~PgER<6>X?a^lEv2i?
zJp%|oTR&}F`qlGG#xoona8rDbeVCwjnqbc1q2K&*z2h)BuVl%588i{Yx*QRTnsk-9
z{6E;pdQR(J^G|LUx(ampBlYb27d}M+*q<}l<3%7V=DLHrgBh;#TEbS9wJEHOjjj^m
z8F_k`xEAO*!P~ysdqIBaL_F1S^>SQ!MX9Xt5WdtjX4jpPdTxbKLj{c>dq$%CadG2h
zP7YeKt6lf%-*#aEr-cuAi6ue;8n<r)bU&jbTmwGk1Z+iOCpQ6!J8+d0rLz!hI2bDj
zdoC8G#~^!gt}DS_+!|#cJgcP(R}LSrdnhhA@)%;7EQ*^yceHIm1g}B{Uk9+FWZAcN
z`K(R5!DKAGiy1L0X^P9&ct3>Icqql0S%U95Us0)joa;$TxOAV=`+g@rk56y+KE}Kw
z2p^?BT_Y<i5gyHSiF$G5TXKB5yK@8TU-H<fvlj0^mF`fvd+`6cj!IYC7rh~XfYvDg
z`*mbv>Fo0VvW}*-bnQ3TP<>x&4VEz4T){MBwI|cUpGYpWP<89vyrJO&>uZhM8p@PZ
zjD9{6l1-FR%l}%EuSggqyx`y8-s(ja9C66qz=Ka31WE*L_v6CrBPi^C^gMd?9As5P
zW#?JTtic$qUV0%w()?b_W6(LKDMbpSTrUb?N~W00q9KeSfj}Ku1KPWY8RQJEyFGnE
z$cYggrZ!PNyY{)78Y3T)v6il{y`x32=a}Q}tnB2wZFd)40s>&9rtQN!K%|4n{Wdg_
z+lTRJ<NhJ+#YjfxiWl`P7Xa(2TDGRFs6gMu-_MP4G!<Iim&F#2SYSEiogz9gp1DQt
zw*fSQw@G_2LP&gc>DdEIyMV@2CNsLr-G@9;G|<B6OoQnHgxw4Iw2-5SQOP3q4<V#(
zu<T9n4`_sIoy4}|578+<`peq@RXLPheOSq~(R9rJpcgxcyN0UdN6$IwxkWyNR(<yx
za)S!1n`_^}QYGpw`N1YeR=PME<Z0OJ1$vfnzLk>waiE1(on{DK)%pH1_#y2iN<~iC
zr7f)mEM=1g?|^C~VXU%|(sPteUDR<(+EVTu?8q!pgf;oo(_JFr(xw45mPf^hc)Ytv
z)Q1r=hZiWs?jxQ+o#Lc!U!+fJW0qWIp{F|^f{FFsl~x?AFTLr4Rd3PX+mhc@r6c%b
z6#aTPn4{A%_3MPGB7-RLD{oHFiyeg=uC?yO0U{`(>tF|t`Yu{#e=vtE7m7O+FuJ1d
z^inQ%5C=a-QZV0F(T|YN91{pN|4z@&IfzLIj=c(9oL_4dBYR>HFK%KYtM`9^lGcz_
z?11dHH9|&M<0&}g_kywnLu8wM{mhgBQ&Ns^1TQ)&Byum}!XjYr?t0v6HeN8Vz2N_#
zenp<>&#_KA<^UU2QVM?}lC*&&6l2;YZ<;(KuzPf@i0SN9PLXq!(brH5Sm%JfP{5GJ
z+-Ks-3q9u*CA0{z<`85mUcgkO&7c8H7f9^r-~IKTRYzby+M8-G+`Pf!7LoAr&<03Y
zBgB2Vr_3!(jLS0G$=Ub2CWMl*7^<yHlz*RW03PIp5O{O33*{CG@ND_p_0HR#Z7Rak
z)XYnf%XESYN5QBvdu2{+EOrZ3+yo@_L3V&YaK)WH2CMu3x*FnCj*r$<_I6{-i8X>Y
zHB@J^HOe6?oT;E*FO&yKWK%`_$>G1Neza;)p5unqdMzJDfI4@4M&Dl;S_3gsj)*)j
zEWDUgqts2GiOUV7*8(V3RW<2x8t=G6jC%I<n61x~1!r>CAKT8^oLfbnETFGOV993x
z!jT<jGZIq{kNUI0!i>#Law0!{<!Hv>Dtc*HF6thxqc-x|Uv!tVg)Clw;iH$pqnC%)
z4Nhuhkk3Oq;D+#9_-CI3(ldtBlC2PTdh~#eyZTAe@@vz%oFeW<{<VkI*XH=TVS}Es
z$*IZ9MWFGQ@1G=*z|~x0t1n(cZ2CiWk;KAa#U9?=OE_AKU<mjsr&HnzrjXxxAq<CA
z4lVsFj2!!=q0*jA7y0xddDfk`s2avx;B^@@1Fjbf;V#;#L=KdTIpK1yMr@R>ws+bX
ze%4d9ekGoEybCI{cqU1nY3koL3f1S0A1rOySJ~pszPxt^6N7(U)|M+2SI$un+`^bz
zCN$}!!K=VYTRN$#s8R)dor%$9!s)YPyIC!<sVs{PHw7@^!1rw^X$2cml8P%g{*l`t
z`|rU<X<mdHQ>>2`3UF&db{W>EjeB{RUEp?gE4LLa)1%vR(fYnD-TFu^h@*#&4WfP0
zlIPd*7p8mWhOTT?cl$%Rp_&RL%U(O)K6>HIusbeB`Ks3|h{2V5+Sh1*P6)HJgK(KD
z+C95j4indwH2^ZRVVc3ptMS?l?$#~YM*cc-6CP@nwst{3yP*_QS2rIfz#BK-n3c~x
z{utOUe?M49hGtvALajWaw&@i)tvm6z_F{I%?8jF2c_~^z-lBQN3&7!`c8G3@?2Lka
z#7|c!tdu2(f#@MDPNQjQjC@Mr%6z;-*+(Zh_NJUId1sizRg;?gX6FPCE-3q4Uepi1
z!sv2LXnJ(l_v>tI)*C#PKXYVoxSvYVSM+gmmdO{qR$9<YN4y|L*s=3=sAksTu5*nD
z_(H=hvA5l+NO_i&pPdxTyJTN@&E;XX!vN749-b6h1q?E`Vx117#cur$!G)x7*&t2+
z>Eeq{z5mC|*Z=Zx$n2vTUHt3wVnhG;?6$4p|I>fbklK#@!aohijJ{#NG?l7iX1}y3
z+3c`@goJlMsa3<~aXY!J(SfrX#QM+lnxuBe6`QQYZYtNw$K96uiSq$REFB9JC}$FU
zM-aF-x5=6XPgKE60jqWSKfPp=rZD)EFKW`a)I^43x0P&VptIVdl-0m`iN-jthbrc5
zK|c`+Y_h6#fIGIL%Pb2-JlHg#-Cwz+_%F(5ii6G2*Yp8KqVKtg@UnePC_1YG*kepJ
zGn|eC_4=D6u^23Tao=8PnJTyr<q#l6S^y-<2po5%dhDnre+EVXzJPMvaLBIjR;$c}
z!Bvk%VHy4dFWO^#SQwrDlwDf9Nqe5|8;w7HuK_vYQrP-1)aF_s(;kds?I?S_7B+D9
zqsb6y(j7pkQmJ=JxL{tfdg*vMMyRsQ$nh{H{mkWdg{kCCZcedDuHrS8iub(6zSSS_
zx@yQu_(;a+3bOK#0OpY*qcxp-O`~0q+{OMQzTp_s*<|7oS#qR%1(VxSQZ_#WvZ8NW
z&lAlFI1QU@5FpR8@rl>Wh^u%<Dkx1{mqNyp)GN6yEGCzCGEF~&mCOIJ&;OB4&^dor
z%4;NO$e^rbrkAKULSvES#jjhd+ISyv<D>xFxyQn-Dl)@SV>x7Px~)FH^KmxzUBR-I
zTcq?OkuCK-AFKg~?~-+x-?1@o<PU%`^pd-Zib6EpSs&)UR%dOneL`wk^DFuk)#o;?
zMe^iqnuCyMf7@d!DimmUQOeMp0Xyx;lkSjuCk$37^UB8{@_y@@nl_&Dsmt-XzlTb4
zhLfER&(4DXz?AIsWMH?MIX-&8UK2cG?MM!&$rlw(C)$wea(n?Zg<nhnF-<BdH+QT)
z5~8W_HWxiNo2pZNW~Ir{HeVmLwQzzM<Ia3d($JrZ%@E4BeXY+d&}0yPK-SkI&WekJ
z4SAr!J;ASvYe9v^mR)rapl{py?Lv}a266(=>+qNwN*T)usIuUQ>$qsoT~m|7{Qf_1
z=KtD13)u3Mw*U6e6YPJ#f1FMKpZ*88)OMmX1^#JlYB4XPh2j^eeL0|-rT{5IXMzi*
zg)~VVlBlk&9oF-oFL<ov*tQmN^B39j*Bm|FH_l<Mx-03HLeh+*Abj~@r%PZ3mAB)L
zdd8OQN;|#Ui*CYLN8W#jSkcljM~+4TDV$LCRJP@kP!djL&7lnvDNN)EWGO6WW*~nV
zn5bzZhX)+xa+VVfg|nru^h*Szv{9|EEy6$84#Zo!>}60mIydu&v;GF1fvJzl(1(`E
z*y7Is`~CBL1o;`%Fd>&ED`?S;ITS*ulTMa2)o-HtOhzQ%a7;4^8}G_m9?76<&$~@)
zcC~!vaZSy}`zvEaSb~RLsvlvEt3ev;rpE;aoz-gJes()c4>nB%x<8iZpqFtWGvO&5
z)+Z~`WNw!_>n6_Bt?SX|cboPGyLH&G5X|I(mCHog&7c|T(KpxQtwW&AN5vq_5^fzg
z`w3bm(YywjJ%rk{3Sk@IdSCbBt@g(jlc$5;3_#c2v^EZFPDzOz_Z=9#>)qR*V;$u-
z9aqFv-t{b)*F;h!LJHV;7}RGji=~oTH$EBli?O(Tqz_k#yKbvicwniH+MLSo-eSLD
zY;t0eis~kY^5Yl45@jUYNjKdna^rt}V-$YT<G1?P>_BVAgmeZ7X$fdZV}XXJv^7B2
ze&`ypJG7pV>G!%i;Psxs|Dcuy%N>HA%_mRL{-Nai-s>T~3Y<;FQubhDA{jOZYSSgW
zO8Mr7H8DB)nFHZ`4I|0f)0n2<S`3cAMMcWl6d$?I%4B?S4*YZ(xpUv+8jG^u1J{3g
zMc~U0q85pjBnsJ>+`ofS*2H?0|IbJRt|_II_0P!9@GlzwckLY)8|VL*L{qG${a=a3
zZ>5%SF?=+wgiNpl1e}G&wUJeD*`vPE&;ll?B-Vxt_1`Ei+?$mf1q|_)tm1)BqbQ!w
zY4>Zm0R~JQ0|cf5v^VhMLl*MV5ej~YzL_c~Y1{3Df7O*F{?vX!qyis1mwzfIYIUr<
z<cNaB;RGq@8s<FYBe*0savQtmD%@EyVhcNTfJ*^OMsTwbZTm%%8HEMEEi1jdJJJqk
zogKBIY3ri=3R{b+vYlRSBAT~>OW<<#T8T0$IGBH`$Gx>PQW9g5)AZ{X>Lh3yG;<XY
zOaE|l8Mrv}DAs*RC!i?CnG(;n07$njtw}@PU7Qy?r)7&s_-mQ2a7@3J{_JRtz}&@&
z?E@2K!YR*9vrVQJ%}*-82^DOomz+yZPHrd9CLm~#%M3SETrC;7EGcDuebVBwhSC1w
zZ16xd_d@pvmiK^xeD9@mu!lED-l+LcHArR_d-U1kc@<6Z?j&?|KmE*tWwX1FRHEA$
zQvM}~&U+Sg;jxfXuK1MtRjHdTkg`ewva0h~6`8qaSlGoI%wOvzPr3ANI!1HexzV6W
zV`8HQD{wD?ARlox!LE(nHyW=*<QzTVaZQ%Jha7Ee`1KI|Ab~?ZMU<xA0x2q$nlbs`
zk#&#TEn50-pK`O2#?`nyMm3b?E^3Z6D*2-(!NujHmiH|qW?`JJf%DPJ$u&mE{tRa5
zUg(BIAG*>U*V;@g!>6o9E0UF4$Z%ue_pxK0PGI<f>aKAD(;>EsX6nlQ%}S?oDrt{%
ztruLUyLJ=n&Ac$*?ua<&Pk94PvQoz>V^XkaS5R;I?itX9b^Qg`INqs`(GklW$+2R0
zyH2O9+Vu+YWMrs&l&in?lniDmxw7t;H_mcCNPalm+H!Q`YHKMoz8sjmfCs|W4{@{%
zlx^;YPuQ8goWDP=hNj-aQ)?c`3{2^pUAGNH($Zn{+DTa_EGjk5OP*~8(_duQIlX5-
zOH#gb--KhrU?6F9x=k?W<-BF2WR{!RG`$w9W1mUpSzFLsH(OMTo^@LB$z|i@xJ>?9
z_fhvZOd2fo0f$z%8N|{@md2q0WvQZ%V#mV8bJ6)ZxaQY$bLa-|<nyE<2-j3UKaPgW
zdR-P862HJ>i}@p|r&DxieoIf_>7ACCt`j5(w+k~b>@Cj*oPcKX`8N@i!Dc-Gg_0>>
zN<n1sq*?hP^4Z}O0CA&{ydpWSYQp>xd`L>*=z=7z6NDij+Sw)GS1XkHoj%e0`w~&@
zoVEY1SYfr>9Zz*8?ci|g&T;0XdydoK7<BqVxXaEwj~xVZf9bJj4%in|b$N@;z9x9x
z%h{oW{bg-7+Nr^d^4wjLI9-cW*_@$%N8R%}7t&iDMJ*+-fwGg?{e0RN?6+$p9O7zV
z=$~fi?`EU>**~kGTw9beVvsT)@DBZ^tNvBYXd``Z)nZKi2XoVA%Qrce9Gr)N14-?^
zqaq(?Cnw~;t-L;VBMqq*hP?CB)?7Fgr%nlB{F&?#&mVgpc=AdxVEB27<EUvA*AwKm
z-UI{We@Xdb421h+B*xE!WU>|;9-LmG^xqUv{O@^F|GR<oUq|*>{d7$4zvKGI^Zyz{
zVryz^@8qd(V`})Hf{*{<Ht<Tv)_J`h`8!8HpGbd__g|yTa$(Zs_*iMagq{fH)Y8j~
z8-IjycX1V0Qf~b1$(-y>w8hjI*}}rwU!pfft#av_FOD=?_wkTlKK?2W&)zA5FIFxG
z_m-P`WiYqTHg4}`(zPa@IrAe#)ZHw_T$W)C`bN%1(+~acWID>uaB@&**?e0k3ZSHK
ziN}}X7I&Dzlv`=ir`~;`yWWo0*@)4{oR(N{7$N9_mdBHpph@9%Z<BZ#j#<V6_K9Yt
ztKrgKSjLV~F(+cZ_MKvcU?*<!p>L=R;Pb)b#mvDxLl2RfJ5*Rw5Ou}1YOE3=T{({;
zZz4>apH?yY79{IS-&Qz&xK&&(m+`5l_4#(v)q1>fz#ljDa4?sN-cY*wZPt(Jv1%53
z(~RWGm~(P;A6J66-bcEM0Oe{|AK9bb*cM%=wr#NT6b`pOBk901F2NAIz2~S^uKZ2P
z3Q;PxRyV`&PAhUdm_O^dK)#CnE>)=e7$^RvPV3_2<@WjgNdJ8&{4=n--}q+6;K2?D
zUeRKEeR&yk$67E+JgMRuT}=@_Vu2sUhD>1X%#5i&sO!i2cM6Ag(Zh-JqkHFUdeGb(
z;fLv1t^%r~th!~1j$r1b#yA>*tvkh{qJ7Tzv>dxh<fKWEB$cuHZ4*wF)df6GjI`IV
z)@|<Sh|G)ipV#*n7nV-vb=<5)z;<@Gf1H^8J5+zw3IW30S>!9%Iik2iJ3s1CIDvPU
z-wy!k@D#um)4?MyuIR;>w+q%-Sj;z%+FUPvSK+kyD_QB-R&g?ZM}#FTx3M03n>-DC
zP}rH_&7XNJ8L<yLW~SplFCjUKsX=10(V>nTqWJB^dIVHn6@Pkzy?fxJwb8f(y;w@?
z414TEfp=RPW%8V1frZkomPj5fhqHc(56ZZ|(G3hp{B-ig;XS<yBJME!(2yY<NIBUe
z@_x!~(}wx~>+Gt-s%pM9ozmUi-5{ZWbhmVOBi%@ebSmB5DBWGsEhXLEA>6}PKhW>~
z^xng>pXdB>c;DG;*38~Bv(|d)6D8yX$Tr1`w&%dx=eR7)7Ul+5$6kr_uXQP-S_%?H
zPRs(onLB49nq21noW^QeYl#WG8yXrgjO1|~#BD_iep`r>tcWysA1)DeNC|V+IsxaL
zJo2r!A>ADNt86HtJ8}U|T;XZ58f>~Qr({_77xjL)LnMM=bOr=dt=d?;hLF7|2$$Cs
zB+ehV$25ST=Gc#6(BUkKOuC+@H<>E6!;7n|amIZ<IK2FZp2Y~HZSWi}GoVUF2qm;s
zx5Do$rT~SaQ-Cp_D(2k6aKed<0jy^xUGVumJnhuCA+?^)6r?ttPqjrl&!YW>n1Zuy
zqstW0@=NrT^HnoPYM^-DPosi*`FerrcO+p2sPnbq!$G;Hsy%Dy5~>p1ZK;HM*2zXb
z{fVjKU5Lmm#Y@JI_1?xT_vk!KQ4C}L2uaXgAV~d*IxF$Zq$u^4HzobNY-0FCg%ONw
zZ%eR|xGhesd4ug{yZHLbgtV@lwAh)I1c1Td#i{O4v1y!Mu&dv`G7V(zwG`%aO0aNi
z_>wHsB<6Hsc<aA3E9q7fazK_&hv!^*s4NJsr`q8x&i@WdlF%(tLxChyIZqbDzF)o}
zk7S)^y+VX**>d%CBkM?XZPsg-W9(~zH<qT5@-F*_^&sNNNI&*u2#YU`LQK!B3t;xQ
z2f!!g;j%5O;e3T#oV}wYkHNR*=OvH694c;G4Tj1~p8yLke_{aZi4?-o8|k9JZBF*0
z^^b{}kp?2cM_?M`vQ#y934~Cah+fWu9CN}1Nj6k#h=VC99f%gyshUt+()=8l)jiT~
zkN(~`p4V_4ZGf4q(8}=DS|*#NNtmY@%wlvtACn`yEDUvCm6|dc(t`NR>jT+3+1_|D
z4m1Qf<e@J?2zdkOs<3O439cnVDq@1?;uXg|9~SLrxQIyJd;<ZmjeqeTS;tB^5l*az
z?Lz1(DZH!5XNvpcjYIqpm{W*>-x$=LQsR)%6&u-!`@1=sdRP<_W_KDDX03Nnp9)8%
zN7STZ*DEC0V~2$fz}c$Crz=z~TvXrDxfN#ie_E~O^H{gL?^ilnTU%L@$W;Jh6t9rw
zS2fR|y(V<fl{Y?yA-Bq8N#$Zke?wmPk-vaAxS(ueOSz>uWk=!m#z)13-Lw?D7aynB
zV#$BGG=9&vB2BFGt>%Jo-x(ZsuXseiyF{#5?JMw}iQZXcOzBvR))45lHQL>z>5JaV
zfzpgth1o_5yCZWfJq$Rw=E<PB4DQ%rb(2PAFofBXfaO~dXV*I97;{pC7Q6yFpEpd%
zsx^B&ns2w#>xS?{a^#2G5taIe&lH0THeo(-hct4onE9zLaKg)x(N)rX+>f_{W-lxd
zc7%22&ysUVtT$l=Zzm2U?!%oDY6^5vx>6o^yHX`|+k#`hs&P}Yvv@@8eo1_f0ZcPx
zT&u_<EKr6U4MXl+9<vB2whE-*li*|Z>}tv^wX=_&fMtd}V-Qotilg#nv9c%%sAkSw
zRBdoTcMjbYl^TUAPUyV0v_1`_M$Q6I1Da$Q8$aTjCzRYQ45hFWr(J6gI+pGvTL=}h
zI|!DOLU#%X%PqGJR8EgP84Cd}M8;R@D1)0%SbiQXd*2L;xa-xjDlB^&-RP?trg-so
zq`5Ow>}9S>BE+sdQClMG&hjbfnz6zMs`20zUbQ1H*k^sLP&Lpu6ydBe;kr=xq6(1n
z0p^XtAK3s8F?&{NzZ9qqbb5;i6rUxuFJ0Bw<{eoaCsSC@tY5(Xt#YektA@zQw6n_s
zcCEn6<=P=0$b_p=s2FAJ>RSI@6(4#vD6Hq218wCTy=#h&n#!kMpAZuozGE9lNKw$R
zv0f--2bR;?X#FlcIQKQfSZlJ?+v}?WbKM~VYr0*b%EmbRQ7v&|<d{)QOT*~bS94il
z?%K4S3oKqJEjUdw_IkCbQ-m76HgIh_AWNF3(3$0e#WX5SY^%X7!cGd$qM?zIojM>v
z$Q$$q`*Z!%qIQhHq-gC7rF6Cl7l8yY$NhY~gQODXWOf~=zis;gZR^P>RFcE6$>jyL
z4ZT8b6T6Oo5ke~~Gi9Db82>h412BR|@&$OcRU{@Di+T!lFOB_m`Lq{FA*NU^u)|jh
zyH;GcwPfba2ffV(%jT1acxrP~818!!_Osz#ccc&U!K*f)QVW#6#Ek3qeu_A?R>qvT
zlsONi3t?**RC^AA;9ST?w$2~ON?J-)#fiydxTD9*?qVW%5MQX<>3d|0Cq4&DIyG)H
zp@4TIwk>^+8geb=U%AQVHD9mAgPM6=m_jh8jdpO(;qR1>L<!Lts!9*)r(|p4#hVKw
z;k}ZQ%W{hUeR>1d%P5Nqs{-5DENMnKX-1t*of?aYM0_A~?aI?_FiWK)W<y9h$|J0)
z_j^a|V%21#tB3Kmr|Kpp_q@uAC|6idrt=l}rd!Iu?66OR!0s6IygZMjlG+qiqeBZ=
zvocq~2NgY`j`6~Xk&^|;^U1sx>fCdf@^mOF!^-g@Ujmq^w=exql<UC7q}<x`32JUl
z;I@s>_E3prwfgQs$fb?tyaI@%)D>|xQPe5Hv>_Gmx>P>959M6FQLoRpl@Hterv4>7
z`Kr~Rj|TaOTc_vb$_~xjUs(1wST|OB-IU5tc)PW|Obdnz5G~!RwGiaHyVWMpy-b(>
zVt-!t0unFhCQ{>`uj61%IA~ET_J~SV&AGOT#DomDd^^~3w!enGW-596^u1N}<i*5?
zQsl$Jg{uWbEP!liv4mf~qgSvHLZyb8`l;^h@b<P1Mf>J+td>oq71_kA$Ppy4_xH?Z
zS>PYg2)c7O9T1LE6{qsr%*Dy#UX27RO=-uaZY}O!O8`>{2HY3NcP$@I!gwbEslDcq
z8j@w^M8(qF8&?V(fHZ%ep$z&4?h6~^BEiS%i@nc!QDxgSW^68by@FEso;1fc;rixm
z(bH<=az~x#3F5a_iWyqUP^GEfShze8EY9a7@%$R#70`}WOrc&<ba&rB9}n}rj^)N3
zpVMW51%!O$1qn*wdTK7qmh#`WNv_CE5}M`6v4rA|{TL(AJDY863fRiC2bBN&p|qu?
zgN41Ry@{QnuEB4p6Hdfw0%}sD1{|ExuxigBN}d4Y__qjiz!>_J8`Wb8Ql8spN>=%i
zatt&tMxs$j*O9{ZzupK&l-S@EIUw00l|zlh0iENQRuVekAG_PZ$J00~g>vaP=TKQs
z7_6~<6Cqn7>i4!SFN#@tyo9|46D?c0Yorc2zRyQ&sl~?BEoP`FsuC2PSX8!-3;m1G
z*)UA*0MJa1ExZLzu}Y>;LcaW)sV*EX-j~LXtpr?dFA*e~%=GtdcG;gf5PN9VqYT~3
z(!7W6?0){a|69-pHINlG7E474bU0P$G!y|*^4d!K_h0KvqdjqPNvA^j0ztN*KDWNM
z;78}JqpyNLl#$Kiw(T{VY|bvuw56d)Jk2S2tCv_N8vsLH@R9=GG3PcjG*4r0Adw!w
zA3}&)E^?u?$d`G*Oh3&|L9XqUD3wJLDR&Sypd1T?Z}c+(pA$qdIAkboda!Q@#qcs}
zLKPcVuYeOehG4lGHYdaGt|n7zod$~>d+lT8k8U$RVp%EMuEr>gC_~9$nOx8mj1<dX
z6-sYf)8`8XVNz7uFDq=4rdZ6e*Nr#N+rZ+rpwD46hNtJ}AkDPGqY+pTC3|i<)fBII
zCtjFcg*a3Ce;<v>5Sa1l$WNZ4zqlTBcY9yOR<x5Kf__k8ROu%4q3;$*-?nm*J-Jey
z&eRHMcEOvY*Qw$Sw*Uk|*<fY;2VF+X!%7++nL62w=j(Jy{zI}rDgMvS@vPv#_X@j%
z^$}_<R(}WwRdI_D+H|=g47<edYAU_M$a#U8`9*u?cBFsSo%j|(pi|;r+lD6xNuLY@
zO<yj;@;Zq?sP%`9yh{DGbU8qbOCG?7NuE~LuyJ8E)BdM)rJ=K~rHzH*FG@bg%1TMJ
z0}8Yrlw#V6Gx@PA!T5VuLbm$CxT2`C_jXt%>;}w8ncp=g%DBl6b2V|CtT%4B;LoDK
z1Y?<i*C!Bu1w*7$k?67#(nm<Bih!zsq%Aj#-+;FpMH}C9Ktf+8y$r193r06kGOsxN
zR(1mnqr=e0jI?zGzU6mHE2?M``Anad&Y!8QpV>59xk(yEa!;?nMJj;H;5uIznb)-J
z3upu|DlqPfNU@E^uJF1Im}#1C^N1Ccr63Hc20s(Z>x9&)$wbOJRjbKDw4JS19gK}-
z6{UJ?SyfP`1{V+LORDGNe!kjL3*7pHs2hFRYq-mpJcBeuXd4S@Y!2w!lWRlPqvh;Y
z*9*?P7#R1HdCjj+H(XhlvNo(Bh1ryr(eao(5j5e=T#B<Rh@fM)wxy~0_fR`hJ0N@#
z<}zON-D;koZFyxx$8fQ-k8KRz{}7Qx3RfPr4XBC|4(Kt`-{>voUv!t0Y`s-G<FmOl
zG(QTrqw#$2+-E}E2GkX(;H{kYW-1828PV4VBtler@P0O{^GB?rHjJeV3fv?9hmw6N
zYP$As5dFjb8qs<%$JE~=Io2NdZal}O*spY9gOE>l_GZ~h!JUfSi?DHyDWhK|ite+L
z>)Rg6HZYwjpMt`3SGq{N3}?okbt<I=JxgLMu-5MaM)Wuq@GJ3!85!9`DkQ#4&72j^
z%Z#EtH`t53)()Y?h}%~{x7sg4rc@_gW--Q!Lxil5Na1_-)xNZdGjPPb5ZB>^=}I;4
zQ<iNbl1Y_F)sgXtLd~^-{SWocm%xlKr2sZ819&~{f3xF5XpW&3Kvm%P+}}a2->jVx
zvEhcM>Nyvp{dWF0LCWmx7-a0tY}9nlomVCJ!co!N2Q;cx(GVHYuZ@h1jj_#9?U$Oe
z>0a1XZQ#rBc*`c2<7=(#a)rko_GHf;UmN?LzjGX*Q`sD;%b+ytP4;y-gv?y@JINb~
zGn&=g!$1%MKH4h-?(#!VDIZO@h-0J=hQicN80!^e10DqV>OA5XiGRkBmyGuH!zwQk
z*uD-9#D#&azi5#QD}CCY+=$e>CN#FjTu}M#sD`u{h=e4$=lk^A0~Wze)!02Se8&gT
z{rF-KtD;f<pSnL5yz>8A?>vWrrPr;Uk`u<aHiSxWMGq56JjP_yl~&-*m{T(x5=rX<
zznzh5x1b#E!Nh^uRaaZStsJ{XY)m48B6nRdp8rV#lvT?BbVpJ$RBeu;C>bky(o)>Y
zfo!cKi0{s4KK+9G&8Y1y<kJ&}^lykYyMQj+gaiVj`)^%lXQ=OB_r}!G@HhLcQ&Gx%
zj`3m2Lgbq%duKGJpo74h^*I}k2)^-ir)1;GE^UPYX)%t%^&(AgSaU^UmMnW6ro_oY
z;(>-Y(=I4Xx;!kp{@I(rS}A(;FUfG|wEdz{0~OP0lyz=iw+uu4Q0<euv*AJrjKg3}
zK(Cpj*bDi0Z9TF{>Ep{2R_8a(I#g>Za4z2E+uFTM5L8N^>kwfA70sMC@6A9pX|Gz~
zH$uDlzAn=Nn=Mgxq(}>N5|JV!+y0F-Hns&h8_q{W>BO=72>-Tti9m2OQldMfGpDsd
zN;GvAWqmugt*O<$6mCL<B=z$|l!IhEOVY*kW-nFvopih~HH5NofJ>Z@0pa_%1iKhB
zbkXmX($Yhsf^=0PT`bbmL0u=p)DQ%8Qzq0Z<=4w5^9v8k8K`eFJ&1eE(@NyHiBi|@
z<qZ!l!ci|v=&uH^`Yz(YymHxCQ+ua5v49DbCGFY!EY*RA+e0hV$2GlpsK{O}934bc
zXNQ)*w&P&>3T&0%!9WTY?@aHUCG|o7Ooib4PM1||gJZ`~xCb!l<UuRjWdq7>u);|y
zuqM~&C-~>{sG3EdVbo_jF_Mvm9|%<fBQ_u-ttd`Qq&E~mu?)05)EFw2pNGjS5!W{i
zl~-gLoQ~TJ;XWteK0TN?a<J*Vf_>^zqDY;GUjVbI5WuZ|P&xSH+4p;6-ZEUu959Cl
z9NeJ^P+$azb<@*<33FiXvYH$SYp!(|-nBR=w{3criawuC6ToOJ#rGi0aHW!rMTj77
zIHsUgK{%XdrO8e)&GVo(pH|4{^|Hl8gZU;76VaWU$u-I`BfZHI`b|(eyRF{D`cqbn
zla6Ip%cQ_8h#F3hh`DX?%ViA9dwEwTmz!2DdcFAs$!-Vr;P81PySqxPzW5dXgT9uF
z(`?iI-kP27(4^3O!dGl<GH>`-<e?SWQgR$>K90}|O;iK34OE*jl|Hr4;HpR037|wD
zF4j*~xBjrt8&hL|=*n*$^zFTtvIc~}Hg2exY<2Ey=z}|A!mOg&s|OZSmhG}Wd%H~u
z(49M`tyR7Mo`*1Nw@d1{UBb<}NmMD;BogGBW%qVadPufo!?vQ~<qIQ%A-+6{OrfqR
zs1+gllMua-WrseNpkq=9_4lf@rotbg5Npwr)}r+EnV3_Mj6;N(*MlfEHthNB$Tw_B
zMU+^n@+p%>yDyxU@LUL;#YpEPy4xmjh%TWLc!!^f@Q6v8(319&CPNmNIw!QzOud<H
zDC4Kfdd;K0W;e(I&9f@Np0aJD?Y&IV&iL}YAFj72p)I2&N)lSXnzZh|B)O2IDcYz*
zFF_QgDP>);nW@RA%T9lhrw>i$tA0T?D!W~)25(`vQzX-O!AYvjk6BdGEckLG$a2<V
z2GM3g&S&w=U8RJw-n)YiutiPEJ}lU4@$U#smNZ002p7pZE<`frI^H2};)L%vcL|L8
z8R0iK`VL$$2!F-xgWKi^fN7sbiJ=3@dI>BoTgHE$x!|`3%bFjuP5v?rq>}43r~hte
z&?ya%x(>SZut7=H;pwTTU`6(H5Vim{YxF1D+!UtoT<kY2cm8@l$n+y@GK2@p;yM;K
zt=D^C$tvy%B8E_AO5<Ut39RQV6S>E0@n+a?IiRCEwcUH}pB+_Qt~atY4#`>Q(insX
zgcxd7=WS5kA>L_?yc0NE9H)F0AGi)i3=U9~#qzO2WI);U__!LC_<nMRtCq1)EDfD4
zXw)csf;_I!<L%ZKSYaNyM0Koxl};lF<7bB31cec43ae$Ufexm2(-}?j(Ear*{`OKu
z8;QZA#R(YRMQa%e43~Ii(7xWWb1MZolzv_f`Jn2{-4?R~dC2LwrA%F(XPG6i<>u=y
z*qL!bd7v07@L@CYRgs0TMQ-Eat#s?5VzyucA0wDKp>VJ3VQi#0K6G^zy0!VWwG=0K
zH|!;Y%Q4G4UU5@=!5LCc(>l!MT?p|7;Zejr6O;izQ<t%M-M1CDUyHcGU&%_x(uR~$
zb#i)MR2_J??*M(Ux5)0qLnm5`C`4i#c(%eV)_lHXFkFrigHNNR-N#>`S4wV{n`AZx
z$!Nz?l-hbd0fsHOVwaJ^72z3u1;|~cI%5k640DnxKcGDowB4!z3M9S5q)!I-Y_7Rz
z-=#f(%xc6aNU4C=bme6;u?kglKoxP(&b|=JRuQ6{`24Yx4o3L}(>KMP;mCro#adx_
z{Z1;=TE$AK^se>hCHYgw_l)z;xbH6H!61)n8GwBcT>}bVT5Ex7Kn1KYcd+R)vX@$B
zbl!tW_3ORb7MaFeZ{`|(0YrnxA`aR8A=8h9WZ4YTpeC`cp6Iz$AmN(Y-J^1!@qiNf
zPN6hkSpeZ;{BS@(SWh3{ZwxJr9;OmQfS!z@rII2v5NHOVF8;&yLv8%VxEc6D(Wr+&
z`o%FaN5*U}I<nxIi-F6qPH$jPqrhD2DJq8nDu=~TRw(|}i)eMk_(%<Ttrni}Qv3K+
z6#FZ!&P!KE7q^eSH&-`?u2r3<)b+-93KloCrj;A%P|ezu#{!=))M|#u!9^Ir9Eo~h
z`M<k+S}zL>V}N`YYw9`^DDy&1x8BpPyZwIr!suS&Ds|Y}OCyPrYQ6KUrPDDM@p4%R
zWhE8HAl&0T(d*e_Yvetm)IzAGHYy?ixTiv0#{GBVcMH}TgviZDxcA{Wttbtmu(Tq<
zZLw)YqLBws<p)I=kv39|Wqa32_;(@&{j13*2%RY7E@kc^c-b>@)vutd1i9U^o)7UI
z<^q90bkh4Je0D{WTCOO}AYIbwa1tS?oLX;(JZjKFQ}KwfFEaDp@Cyk;pM2NlA>i&R
zx>0>ItC-Xkp{2qSIOE8mElhM->L8I6PQ?p_i92=SiYp+BR)g$PtqgJgwij)i9oIhg
zD|C}B_t*7{BF%ob*Y?N0HL+Gv-rdI2?Ly>tNqJ-t(ffr<r|AbYaRW|M(uYjmx0ik4
z!U)ros3E4#D5qwWmXVW+qz8>(I-8gwBikT~bCxSd!tl_pneEWtc}vYq_=roYA}jOt
zmB?QlODh?^W<JbK$lcCa`6wdg?ZIoZo4h-{1KcgnJ!l=j{xPHydvA~bwfo^^$)tOD
zfCl0w_SDDfRkRkQua+EZ3L7P!!8V;Tuf%LHZ=p83QYjrfoh)b~D8?C3FIM4G7S&ff
zSu%TPljj3XieKF5z1oZ6x5>`;m8~5ctB-lB7uOW+u|46zJ+TX#9H|=}zbfxMs#U*g
zD>IWQcb&Bra`PGbr7}&#Way#$>2u}9A$_1Mx=_<a*Q8z+%vG`W@%+>neU^ZM)p~0h
z5EN*-AaNI!d9qkGi1!d;8H2k-@fKj98lK&yqQP2HJdU!U>g)xY5?cW`uhf$7E^A)+
zWkpnb?5!O*pMW4@?z6nbtHq2qAVQ9tw|v+2R+zKb8{K>y_4Cr1`{I;FZ?Vg~(`fy3
z)5NOTL3EFPY{v4V@~BnnY#5cBxZ0h@-dnwd!<%Yg+by&g(m^d2tGRb@FQ{o7F9^#P
zP{@+IeJO}t_)}dR4+lSeiC0KJSaiYSV1nO6=*V~D`_|Y^*mDDqA6umx=Qv*mx^Pg#
z#(kND$BOKskLjJ0h`)(m%j{8xf4sm4t|yS3y<KTi6dyvcjT``Nd$z0LOh+1+Dj;aV
zeQrBhk!kRXHMnjFdup^O>g7;&<@FR;t9xg_S1Bg)?u~}B%n_p*WSTaxTPTueJMu3U
z@$qws7$bu<sb5jHNx;>gnM{WAL0i5Oi<^2IgCbYeDJdLZ*i+Ft0=I-1BQ<NeADwaL
zuC_)^){Ph3#aZ5=9_Ns7yx=S<8W-qEZBy>EgBjgqx1PDSUK)$%@ZO~N`m#unzLoNT
zN!low6rK}u@a>W#K>~VmngR{U>gTEa#qU!zC~sE85y`!V?0f~ohs09s+b_9s+1!gu
zz+vM=m7ydM#`dhr2Z5NZnP?Q8^Wa5pp%}B<d4~Kd&s>bt(<}WyxVtyg8^EUsde#_{
znxI|7>1#1+8t#?|pjMR88EzKnI2<ZqF7NJxg}Lk>5|1j%6Jo}CLM*8dK#Zb6#=D2a
z)_V93S$)<WBk~}e`Gi0cSj{h^`u&TEn)IsAT^p=4MoXF(uR0t7%7`HnKVn(Pb>&db
zE3Ho=C}pyCUb`XVsGLi5Yvnjw`6PMS(C%>E*|Wo~;v`h>mc7N*5-h*ZO9_(|<#V-?
zb!oA0T6G3C({C`kWi7ukY=qJ)4T@xMn5xUek@V3P1<H_qsa0kLPj9r$@P;t<-2g!?
zqTnR0#)=5fAndo7oGQ%mN*oyW8%akcT+ebtse6d{1j5l}2FWBRoqdkG!(X7pnZVrh
zYTDl4V>zmgUdIo-K)e$fX47nTc(-wHxb3An8NNhl-T`yxezG2(nym7jRj|}lEoJ3C
zKS&#yi=nQ`PT{iD`0n(5I;Ft+36IGl*)%GA%zG1S@j2+0)xLEFHin>iS$NJdI4BM4
z8Xc6r*HCyO$kjZ4qZbWXubOU&DZQZX{rkOE;OmamX<usdqi{I6UGHx!9`>I$e=M{h
zzU&3FHP1!mC{!$%46$ii8IbHbTp7q~)xB)horFMF%C;O|wwl!8@H67@0OuHFWXiI`
z0N1h2#<C+k=c3g>PEt$9rSL-PYxZkC*|pX<^tq!r@OT%^)faQE{cZI=F^J9P&LCh&
z<OX}CnksUd1*1v&B1Xza4{h{1@HjaV8aHwUmoO&V#P<cCjq9Dc*A6;IqVdvqzny{_
zy0IKbQ}Hk9ZhFpHhr<+<r;*>@sIze96hyol8~FrtE_OEJTqLIbwIL9J;Zw@~z%csf
z1|5nw*r0lReW-*HDm=bzZ(p!sv}&H|R7Ne;TE0ZZK_mEHiL`)wuQ`Zd4Tp&3@>MUT
zcSv-@lrdw^t|49XsG}e;HF#)9!ixh*9nY?Wg*7F|s9XI8{QE1>Xac~|my|DslRod~
z2&xrJI{qu4kDe#zyxJ_ItPK~Jyp`iGtw)><HP1CC_<GfI;>}iU+<9<hQk=@$^1s=l
ztlv&ke0l49lV(axN9~#>2h2<G3~DNk{EGp!D7z6|m%Dd3Q8Gwin(_NEIWdgL9$#x`
zpN>32-+HTxlTcQSN=|xo>(OoLj(#_sRF>ULnZhVy-Ja^%*QgLL$rf%zQ=*XDE=rHo
zfe2rEVnnp#=;*{-D37sC*R9PQvvk#$eoT-_KEi#C#IzD-bXOE4CQ-|(8sz!5>A1kx
z+muXCD$md+q1`pO&(&EL1U4Glb|qY^mZ5rPp0rGH5%sxrdRjDtNCq)PC%8;P;6c2a
zw%k(aWnzD?LrDOoMiZAb9O6Zfgf@~Qs{R)n{}uk5%bk^X{adFm8WBEJxw>7-I`j)d
zr+UD|F?p=ZTawRoIpvU8mE!lZJ-5ZlRKmF+LPfJ#Hw6^?@HEhi=LqZoi)cii`k?bl
zfd$vMENyDELw3E9Xh(TB`%?qk*@Bj@T4VGx6a~+kCbVRg847P7ao$o{|1sWVaJU@q
zg`N?xD?+P-#t~*x)O)Ak`|!|R?WZd`Ph{4nD!`tXGazj9hZXPtVKLXJ+zZHOL~Pul
zffy8WL?fy2pnP^2K@LA`zI=)d20|B#l2efvp{Sm@+5BN#oKCi=CLcaPXz-Zx08Yf%
zhK~y}6`a*ot?m2z`t2vJa>t~-))U7dNlsV%VUI60exw<F=#3HLtE8y#5yd*X#3TK*
zMPC$V#DWCSJ#o(N-$Q+=UTI%8q~27>5<n$)+A1_u^%V8%(XLF@<P8512fht$0|~O`
zVFCNh$qemSq*>YnIRLW-2A>|k-m&4}1+*9P*R*mO5+bqH%);~Ob8rvS@OnC<93fLX
z2#yd?Wd2CdBM;P!IT|@EQS3ytGWM_3L3}(S1X~EJCCL-Ra`<xIb3VPKpE%yjlgWbn
z$k5nuR~@q|Aa_$oB}3*{7$wcEyBIq<UcQZ>w_vdjKaK~d5`p<<`*IP;jYkI#oXdEQ
z252Ch4xK`gu*mBQQ=*U9?R4!J+iFT>r*v_$nuJ6|90gh~$SHQGlImj=m7(0-0sBZo
z0nFUHG0ykQmPLxm>9hOqg(A_7RlKsibDMPK(s8}~wk*DmcibO2;Ca9`l5d;AC}-m7
zXYx12iz}ZsIjPyP;*-qA9V)XEDb}5tXDC+K5O(yi%c2H#I&0(}C%`lZN=5I*v5tk{
zq@=}D<w3Gy7Hb-QAQC~wO=ISurzN=!t!oSH_Cjc4PiAXzqSAf|-to$TRAJ3cIL()T
z3UeQj($8}#+Bi~A=HxnBtyaLTLp~9_l3jqsDUgN7n}-0%MqV62jcQCBNopQYK1m5n
z;9ehB1Qa~3;*kd^;JPK@dxkU1W|FOzRrtrNeY|(^gZ|=X6&Pl)w`}>Ed>Q5a^!VU6
z0eT2jyCZ&ojU=@mGw1so=xNLbUn6loe{qang#Po;!@(3vKuz`p0^*P8nb&DseXr*U
zuX6)=oxFMG@Lg{ISdRb>=>Q*U)irTD1X+Cy!1sp>;qUc`qak49(Aw(nO><cqP1T3%
zhgFES_Fuo#)+Xd7q+q0HVx(uMfRU8|2EhmY55oh~-yd)w%)h=O0`2iXe0|tJVNg;8
z2Lk;UhCiYH6MRUj13E1Q_$O<?h4>c~V9yFD%jd=4P}<t2R;Kpa+JCeifcg-8_(QvY
zM4jxz4sQYn0)of>5q0u!)CbWr8*4lJ-$Z>dtO+hj{U$&*)Kd!x2=z%@7XT@+f84gd
zg(*Nj^o^k%z>a?-|H29n$kZ`5y_kUQs3^cP68;G?D&F77)|NK9|FrfOm=9r{3fGx_
zV*t!iL?9rfCz$qxe_{TkEc|y<f5Wr~<sV#?04rS(VITnLpMC#lheeV81^cHkoVA68
zp}sx9=idA#`hy{OOZv!7WC7dY>cBskn2Y%_`d?M?|3?0WIUeflZys)rGXPA%#PXv{
z9K8Jt*~ZS=-df+<;&0T4GW)-9G{IT|%SwQwK>=i&5uRAXB>FGZzn(sSgZ{53{`cVp
z`xod#qxblY;{O7DICX#Xi$V4$;Dalmd0&D*47rC3{Yfhep920rhFXbrujf7>UBV3E
zM}I&_c+yxsLo0m~OI<s2reB)kKhr}d4yEK7z`Ya!4ERr4E|dBc-NoKe8zA3nYGwSN
z?2KPe77sqRWd3c@5CsTmL*qf>@UcfHr9a{Lb8u__Il7;){KEep`rxU+_hT0I$|o%U
z49X`A|Et^l$5v03jUO{~)I4GMN$2?2c6zvjr&6bn@lhI|;QulHb)O2K{+i-vwaLd6
zC(S=o{BwZ+PYeox<zI9se|$PT)#rK4Lec*-%M-PpU*E>hnjVh{6o-B$_(|adK=K<?
z&(jhfk4gFtekOTZ(Bs#)@zCK<i#t4KKtBGN;c2Od-(z^1ZTpyE<??5SC%L!3NANTq
z@i76(?au^Hk`sTA;AytlV}iB2p9!Aij{P3N)9j4L1e2h^f4+45`7rp`T#eu4coOOL
zm}4K|DaXIWef=KA(_oy(6eY+{DE>Hp`)6c7KBxRWqNi~rkBKx;e@*mfRLSo#J&m+@
z%#?-pUzq+i?&9|tp9Ts%W>mrWFO2^lHt>6tPY;qGQx;(TO!<HBuBQjFkMaAkf5!i5
zCI9Q2`1yq9F$p@s6Otd#YXI>7a#r)>^W^E)!(;d)%BS!@o+|&h=Ktq?@acl?F@6rs
hU-*xgcL30TeIEe_EK-1g$N+yGfcJL@+K0FH{{dOjytV)U

literal 0
HcmV?d00001

diff --git a/python/setup.py b/python/setup.py
index 2644d3e79dea..cfc83c68e3df 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -194,7 +194,7 @@ def _supports_symlinks():
             'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
         scripts=scripts,
         license='http://www.apache.org/licenses/LICENSE-2.0',
-        install_requires=['py4j==0.10.4'],
+        install_requires=['py4j==0.10.6'],
         setup_requires=['pypandoc'],
         extras_require={
             'ml': ['numpy>=1.7'],
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index e5131e636dc0..1dd071591804 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1124,7 +1124,7 @@ private[spark] class Client(
         val pyArchivesFile = new File(pyLibPath, "pyspark.zip")
         require(pyArchivesFile.exists(),
           s"$pyArchivesFile not found; cannot run pyspark application in YARN mode.")
-        val py4jFile = new File(pyLibPath, "py4j-0.10.4-src.zip")
+        val py4jFile = new File(pyLibPath, "py4j-0.10.6-src.zip")
         require(py4jFile.exists(),
           s"$py4jFile not found; cannot run pyspark application in YARN mode.")
         Seq(pyArchivesFile.getAbsolutePath(), py4jFile.getAbsolutePath())
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 59adb7e22d18..fc78bc488b11 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -249,7 +249,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     // needed locations.
     val sparkHome = sys.props("spark.test.home")
     val pythonPath = Seq(
-        s"$sparkHome/python/lib/py4j-0.10.4-src.zip",
+        s"$sparkHome/python/lib/py4j-0.10.6-src.zip",
         s"$sparkHome/python")
     val extraEnvVars = Map(
       "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index f2d9e6b568a9..bac154e10ae6 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.6-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi

From ab866f117378e64dba483ead51b769ae7be31d4d Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 5 Jul 2017 18:26:28 -0700
Subject: [PATCH 047/125] [SPARK-21248][SS] The clean up codes in
 StreamExecution should not be interrupted

## What changes were proposed in this pull request?

This PR uses `runUninterruptibly` to avoid that the clean up codes in StreamExecution is interrupted. It also removes an optimization in `runUninterruptibly` to make sure this method never throw `InterruptedException`.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18461 from zsxwing/SPARK-21248.
---
 .../org/apache/spark/util/UninterruptibleThread.scala  | 10 +---------
 .../apache/spark/util/UninterruptibleThreadSuite.scala |  5 ++---
 .../sql/execution/streaming/StreamExecution.scala      |  6 +++++-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
index 27922b31949b..6a58ec142dd7 100644
--- a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
+++ b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
@@ -55,9 +55,6 @@ private[spark] class UninterruptibleThread(
    * Run `f` uninterruptibly in `this` thread. The thread won't be interrupted before returning
    * from `f`.
    *
-   * If this method finds that `interrupt` is called before calling `f` and it's not inside another
-   * `runUninterruptibly`, it will throw `InterruptedException`.
-   *
    * Note: this method should be called only in `this` thread.
    */
   def runUninterruptibly[T](f: => T): T = {
@@ -73,12 +70,7 @@ private[spark] class UninterruptibleThread(
 
     uninterruptibleLock.synchronized {
       // Clear the interrupted status if it's set.
-      if (Thread.interrupted() || shouldInterruptThread) {
-        shouldInterruptThread = false
-        // Since it's interrupted, we don't need to run `f` which may be a long computation.
-        // Throw InterruptedException as we don't have a T to return.
-        throw new InterruptedException()
-      }
+      shouldInterruptThread = Thread.interrupted() || shouldInterruptThread
       uninterruptible = true
     }
     try {
diff --git a/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
index 39b31f8ddeab..6a190f63ac9d 100644
--- a/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UninterruptibleThreadSuite.scala
@@ -68,7 +68,6 @@ class UninterruptibleThreadSuite extends SparkFunSuite {
         Uninterruptibles.awaitUninterruptibly(interruptLatch, 10, TimeUnit.SECONDS)
         try {
           runUninterruptibly {
-            assert(false, "Should not reach here")
           }
         } catch {
           case _: InterruptedException => hasInterruptedException = true
@@ -80,8 +79,8 @@ class UninterruptibleThreadSuite extends SparkFunSuite {
     t.interrupt()
     interruptLatch.countDown()
     t.join()
-    assert(hasInterruptedException === true)
-    assert(interruptStatusBeforeExit === false)
+    assert(hasInterruptedException === false)
+    assert(interruptStatusBeforeExit === true)
   }
 
   test("nested runUninterruptibly") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index d5f8d2acba92..10c42a7338e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -357,7 +357,11 @@ class StreamExecution(
         if (!NonFatal(e)) {
           throw e
         }
-    } finally {
+    } finally microBatchThread.runUninterruptibly {
+      // The whole `finally` block must run inside `runUninterruptibly` to avoid being interrupted
+      // when a query is stopped by the user. We need to make sure the following codes finish
+      // otherwise it may throw `InterruptedException` to `UncaughtExceptionHandler` (SPARK-21248).
+
       // Release latches to unblock the user codes since exception can happen in any place and we
       // may not get a chance to release them
       startLatch.countDown()

From 75b168fd30bb9a52ae223b6f1df73da4b1316f2e Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Thu, 6 Jul 2017 14:18:50 +0800
Subject: [PATCH 048/125] [SPARK-21308][SQL] Remove SQLConf parameters from the
 optimizer

### What changes were proposed in this pull request?
This PR removes SQLConf parameters from the optimizer rules

### How was this patch tested?
The existing test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18533 from gatorsmile/rmSQLConfOptimizer.
---
 .../optimizer/CostBasedJoinReorder.scala      |  7 ++--
 .../sql/catalyst/optimizer/Optimizer.scala    | 36 +++++++++----------
 .../optimizer/StarSchemaDetection.scala       |  4 ++-
 .../sql/catalyst/optimizer/expressions.scala  | 14 ++++----
 .../spark/sql/catalyst/optimizer/joins.scala  |  6 ++--
 .../BinaryComparisonSimplificationSuite.scala |  2 +-
 .../BooleanSimplificationSuite.scala          |  2 +-
 .../optimizer/CombiningLimitsSuite.scala      |  2 +-
 .../optimizer/ConstantFoldingSuite.scala      |  2 +-
 .../optimizer/DecimalAggregatesSuite.scala    |  2 +-
 .../optimizer/EliminateMapObjectsSuite.scala  |  2 +-
 .../optimizer/JoinOptimizationSuite.scala     |  2 +-
 .../catalyst/optimizer/JoinReorderSuite.scala | 27 +++++++++++---
 .../optimizer/LimitPushdownSuite.scala        |  2 +-
 .../optimizer/OptimizeCodegenSuite.scala      |  2 +-
 .../catalyst/optimizer/OptimizeInSuite.scala  | 24 +++++++------
 .../StarJoinCostBasedReorderSuite.scala       | 36 ++++++++++++++-----
 .../optimizer/StarJoinReorderSuite.scala      | 25 ++++++++++---
 .../optimizer/complexTypesSuite.scala         |  2 +-
 .../spark/sql/catalyst/plans/PlanTest.scala   |  4 +--
 .../execution/OptimizeMetadataOnlyQuery.scala |  8 ++---
 .../spark/sql/execution/SparkOptimizer.scala  |  6 ++--
 .../internal/BaseSessionStateBuilder.scala    |  2 +-
 23 files changed, 137 insertions(+), 82 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 3a7543e2141e..db7baf6e9bc7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -32,7 +32,10 @@ import org.apache.spark.sql.internal.SQLConf
  * We may have several join reorder algorithms in the future. This class is the entry of these
  * algorithms, and chooses which one to use.
  */
-case class CostBasedJoinReorder(conf: SQLConf) extends Rule[LogicalPlan] with PredicateHelper {
+object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
+
+  private def conf = SQLConf.get
+
   def apply(plan: LogicalPlan): LogicalPlan = {
     if (!conf.cboEnabled || !conf.joinReorderEnabled) {
       plan
@@ -379,7 +382,7 @@ object JoinReorderDPFilters extends PredicateHelper {
 
     if (conf.joinReorderDPStarFilter) {
       // Compute the tables in a star-schema relationship.
-      val starJoin = StarSchemaDetection(conf).findStarJoins(items, conditions.toSeq)
+      val starJoin = StarSchemaDetection.findStarJoins(items, conditions.toSeq)
       val nonStarJoin = items.filterNot(starJoin.contains(_))
 
       if (starJoin.nonEmpty && nonStarJoin.nonEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 946fa7bae019..d82af94dbffb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -34,10 +34,10 @@ import org.apache.spark.sql.types._
  * Abstract class all optimizers should inherit of, contains the standard batches (extending
  * Optimizers can override this.
  */
-abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
+abstract class Optimizer(sessionCatalog: SessionCatalog)
   extends RuleExecutor[LogicalPlan] {
 
-  protected val fixedPoint = FixedPoint(conf.optimizerMaxIterations)
+  protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
 
   def batches: Seq[Batch] = {
     Batch("Eliminate Distinct", Once, EliminateDistinct) ::
@@ -77,11 +77,11 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
     Batch("Operator Optimizations", fixedPoint, Seq(
       // Operator push down
       PushProjectionThroughUnion,
-      ReorderJoin(conf),
+      ReorderJoin,
       EliminateOuterJoin,
       PushPredicateThroughJoin,
       PushDownPredicate,
-      LimitPushDown(conf),
+      LimitPushDown,
       ColumnPruning,
       InferFiltersFromConstraints,
       // Operator combine
@@ -92,10 +92,10 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
       CombineLimits,
       CombineUnions,
       // Constant folding and strength reduction
-      NullPropagation(conf),
+      NullPropagation,
       ConstantPropagation,
       FoldablePropagation,
-      OptimizeIn(conf),
+      OptimizeIn,
       ConstantFolding,
       ReorderAssociativeOperator,
       LikeSimplification,
@@ -117,11 +117,11 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
       CombineConcats) ++
       extendedOperatorOptimizationRules: _*) ::
     Batch("Check Cartesian Products", Once,
-      CheckCartesianProducts(conf)) ::
+      CheckCartesianProducts) ::
     Batch("Join Reorder", Once,
-      CostBasedJoinReorder(conf)) ::
+      CostBasedJoinReorder) ::
     Batch("Decimal Optimizations", fixedPoint,
-      DecimalAggregates(conf)) ::
+      DecimalAggregates) ::
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
       CombineTypedFilters) ::
@@ -129,7 +129,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
       ConvertToLocalRelation,
       PropagateEmptyRelation) ::
     Batch("OptimizeCodegen", Once,
-      OptimizeCodegen(conf)) ::
+      OptimizeCodegen) ::
     Batch("RewriteSubquery", Once,
       RewritePredicateSubquery,
       CollapseProject) :: Nil
@@ -178,8 +178,7 @@ class SimpleTestOptimizer extends Optimizer(
   new SessionCatalog(
     new InMemoryCatalog,
     EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)),
-  new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)))
 
 /**
  * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
@@ -288,7 +287,7 @@ object RemoveRedundantProject extends Rule[LogicalPlan] {
 /**
  * Pushes down [[LocalLimit]] beneath UNION ALL and beneath the streamed inputs of outer joins.
  */
-case class LimitPushDown(conf: SQLConf) extends Rule[LogicalPlan] {
+object LimitPushDown extends Rule[LogicalPlan] {
 
   private def stripGlobalLimitIfPresent(plan: LogicalPlan): LogicalPlan = {
     plan match {
@@ -1077,8 +1076,7 @@ object CombineLimits extends Rule[LogicalPlan] {
  * the join between R and S is not a cartesian product and therefore should be allowed.
  * The predicate R.r = S.s is not recognized as a join condition until the ReorderJoin rule.
  */
-case class CheckCartesianProducts(conf: SQLConf)
-    extends Rule[LogicalPlan] with PredicateHelper {
+object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper {
   /**
    * Check if a join is a cartesian product. Returns true if
    * there are no join conditions involving references from both left and right.
@@ -1090,7 +1088,7 @@ case class CheckCartesianProducts(conf: SQLConf)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan =
-    if (conf.crossJoinEnabled) {
+    if (SQLConf.get.crossJoinEnabled) {
       plan
     } else plan transform {
       case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, condition)
@@ -1112,7 +1110,7 @@ case class CheckCartesianProducts(conf: SQLConf)
  * This uses the same rules for increasing the precision and scale of the output as
  * [[org.apache.spark.sql.catalyst.analysis.DecimalPrecision]].
  */
-case class DecimalAggregates(conf: SQLConf) extends Rule[LogicalPlan] {
+object DecimalAggregates extends Rule[LogicalPlan] {
   import Decimal.MAX_LONG_DIGITS
 
   /** Maximum number of decimal digits representable precisely in a Double */
@@ -1130,7 +1128,7 @@ case class DecimalAggregates(conf: SQLConf) extends Rule[LogicalPlan] {
             we.copy(windowFunction = ae.copy(aggregateFunction = Average(UnscaledValue(e))))
           Cast(
             Divide(newAggExpr, Literal.create(math.pow(10.0, scale), DoubleType)),
-            DecimalType(prec + 4, scale + 4), Option(conf.sessionLocalTimeZone))
+            DecimalType(prec + 4, scale + 4), Option(SQLConf.get.sessionLocalTimeZone))
 
         case _ => we
       }
@@ -1142,7 +1140,7 @@ case class DecimalAggregates(conf: SQLConf) extends Rule[LogicalPlan] {
           val newAggExpr = ae.copy(aggregateFunction = Average(UnscaledValue(e)))
           Cast(
             Divide(newAggExpr, Literal.create(math.pow(10.0, scale), DoubleType)),
-            DecimalType(prec + 4, scale + 4), Option(conf.sessionLocalTimeZone))
+            DecimalType(prec + 4, scale + 4), Option(SQLConf.get.sessionLocalTimeZone))
 
         case _ => ae
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
index ca729127e7d1..1f20b7661489 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
@@ -28,7 +28,9 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Encapsulates star-schema detection logic.
  */
-case class StarSchemaDetection(conf: SQLConf) extends PredicateHelper {
+object StarSchemaDetection extends PredicateHelper {
+
+  private def conf = SQLConf.get
 
   /**
    * Star schema consists of one or more fact tables referencing a number of dimension
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 66b8ca62e5e4..6c83f4790004 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -173,12 +173,12 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
  * 2. Replaces [[In (value, seq[Literal])]] with optimized version
  *    [[InSet (value, HashSet[Literal])]] which is much faster.
  */
-case class OptimizeIn(conf: SQLConf) extends Rule[LogicalPlan] {
+object OptimizeIn extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
       case expr @ In(v, list) if expr.inSetConvertible =>
         val newList = ExpressionSet(list).toSeq
-        if (newList.size > conf.optimizerInSetConversionThreshold) {
+        if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) {
           val hSet = newList.map(e => e.eval(EmptyRow))
           InSet(v, HashSet() ++ hSet)
         } else if (newList.size < list.size) {
@@ -414,7 +414,7 @@ object LikeSimplification extends Rule[LogicalPlan] {
  * equivalent [[Literal]] values. This rule is more specific with
  * Null value propagation from bottom to top of the expression tree.
  */
-case class NullPropagation(conf: SQLConf) extends Rule[LogicalPlan] {
+object NullPropagation extends Rule[LogicalPlan] {
   private def isNullLiteral(e: Expression): Boolean = e match {
     case Literal(null, _) => true
     case _ => false
@@ -423,9 +423,9 @@ case class NullPropagation(conf: SQLConf) extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
       case e @ WindowExpression(Cast(Literal(0L, _), _, _), _) =>
-        Cast(Literal(0L), e.dataType, Option(conf.sessionLocalTimeZone))
+        Cast(Literal(0L), e.dataType, Option(SQLConf.get.sessionLocalTimeZone))
       case e @ AggregateExpression(Count(exprs), _, _, _) if exprs.forall(isNullLiteral) =>
-        Cast(Literal(0L), e.dataType, Option(conf.sessionLocalTimeZone))
+        Cast(Literal(0L), e.dataType, Option(SQLConf.get.sessionLocalTimeZone))
       case ae @ AggregateExpression(Count(exprs), _, false, _) if !exprs.exists(_.nullable) =>
         // This rule should be only triggered when isDistinct field is false.
         ae.copy(aggregateFunction = Count(Literal(1)))
@@ -552,14 +552,14 @@ object FoldablePropagation extends Rule[LogicalPlan] {
 /**
  * Optimizes expressions by replacing according to CodeGen configuration.
  */
-case class OptimizeCodegen(conf: SQLConf) extends Rule[LogicalPlan] {
+object OptimizeCodegen extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
     case e: CaseWhen if canCodegen(e) => e.toCodegen()
   }
 
   private def canCodegen(e: CaseWhen): Boolean = {
     val numBranches = e.branches.size + e.elseValue.size
-    numBranches <= conf.maxCaseBranchesForCodegen
+    numBranches <= SQLConf.get.maxCaseBranchesForCodegen
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index bb97e2c808b9..edbeaf273fd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
  *
  * If star schema detection is enabled, reorder the star join plans based on heuristics.
  */
-case class ReorderJoin(conf: SQLConf) extends Rule[LogicalPlan] with PredicateHelper {
+object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
   /**
    * Join a list of plans together and push down the conditions into them.
    *
@@ -87,8 +87,8 @@ case class ReorderJoin(conf: SQLConf) extends Rule[LogicalPlan] with PredicateHe
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case ExtractFiltersAndInnerJoins(input, conditions)
         if input.size > 2 && conditions.nonEmpty =>
-      if (conf.starSchemaDetection && !conf.cboEnabled) {
-        val starJoinPlan = StarSchemaDetection(conf).reorderStarJoins(input, conditions)
+      if (SQLConf.get.starSchemaDetection && !SQLConf.get.cboEnabled) {
+        val starJoinPlan = StarSchemaDetection.reorderStarJoins(input, conditions)
         if (starJoinPlan.nonEmpty) {
           val rest = input.filterNot(starJoinPlan.contains(_))
           createOrderedJoin(starJoinPlan ++ rest, conditions)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index 2a04bd588dc1..a313681eeb8f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -33,7 +33,7 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
       Batch("AnalysisNodes", Once,
         EliminateSubqueryAliases) ::
       Batch("Constant Folding", FixedPoint(50),
-        NullPropagation(conf),
+        NullPropagation,
         ConstantFolding,
         BooleanSimplification,
         SimplifyBinaryComparison,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index c6345b60b744..56399f4831a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -35,7 +35,7 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
       Batch("AnalysisNodes", Once,
         EliminateSubqueryAliases) ::
       Batch("Constant Folding", FixedPoint(50),
-        NullPropagation(conf),
+        NullPropagation,
         ConstantFolding,
         BooleanSimplification,
         PruneFilters) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index ac71887c16f9..87ad81db11b6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -32,7 +32,7 @@ class CombiningLimitsSuite extends PlanTest {
       Batch("Combine Limit", FixedPoint(10),
         CombineLimits) ::
       Batch("Constant Folding", FixedPoint(10),
-        NullPropagation(conf),
+        NullPropagation,
         ConstantFolding,
         BooleanSimplification,
         SimplifyConditionals) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 25c592b9c1dd..641c89873dcc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -33,7 +33,7 @@ class ConstantFoldingSuite extends PlanTest {
       Batch("AnalysisNodes", Once,
         EliminateSubqueryAliases) ::
       Batch("ConstantFolding", Once,
-        OptimizeIn(conf),
+        OptimizeIn,
         ConstantFolding,
         BooleanSimplification) :: Nil
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
index cc4fb3a244a9..711294ed6192 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
@@ -29,7 +29,7 @@ class DecimalAggregatesSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("Decimal Optimizations", FixedPoint(100),
-      DecimalAggregates(conf)) :: Nil
+      DecimalAggregates) :: Nil
   }
 
   val testRelation = LocalRelation('a.decimal(2, 1), 'b.decimal(12, 1))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
index d4f37e2a5e87..157472c2293f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
@@ -31,7 +31,7 @@ class EliminateMapObjectsSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = {
       Batch("EliminateMapObjects", FixedPoint(50),
-        NullPropagation(conf),
+        NullPropagation,
         SimplifyCasts,
         EliminateMapObjects) :: Nil
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index a6584aa5fbba..2f30a78f0321 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -37,7 +37,7 @@ class JoinOptimizationSuite extends PlanTest {
         CombineFilters,
         PushDownPredicate,
         BooleanSimplification,
-        ReorderJoin(conf),
+        ReorderJoin,
         PushPredicateThroughJoin,
         ColumnPruning,
         CollapseProject) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
index 71db4e2e0ec4..2fb587d50a4c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinReorderSuite.scala
@@ -24,25 +24,42 @@ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.statsEstimation.{StatsEstimationTestBase, StatsTestPlan}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{CBO_ENABLED, JOIN_REORDER_ENABLED}
 
 
 class JoinReorderSuite extends PlanTest with StatsEstimationTestBase {
 
-  override val conf = new SQLConf().copy(CBO_ENABLED -> true, JOIN_REORDER_ENABLED -> true)
-
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("Operator Optimizations", FixedPoint(100),
         CombineFilters,
         PushDownPredicate,
-        ReorderJoin(conf),
+        ReorderJoin,
         PushPredicateThroughJoin,
         ColumnPruning,
         CollapseProject) ::
       Batch("Join Reorder", Once,
-        CostBasedJoinReorder(conf)) :: Nil
+        CostBasedJoinReorder) :: Nil
+  }
+
+  var originalConfCBOEnabled = false
+  var originalConfJoinReorderEnabled = false
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalConfCBOEnabled = conf.cboEnabled
+    originalConfJoinReorderEnabled = conf.joinReorderEnabled
+    conf.setConf(CBO_ENABLED, true)
+    conf.setConf(JOIN_REORDER_ENABLED, true)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      conf.setConf(CBO_ENABLED, originalConfCBOEnabled)
+      conf.setConf(JOIN_REORDER_ENABLED, originalConfJoinReorderEnabled)
+    } finally {
+      super.afterAll()
+    }
   }
 
   /** Set up tables and columns for testing */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index d8302dfc9462..f50e2e86516f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -32,7 +32,7 @@ class LimitPushdownSuite extends PlanTest {
       Batch("Subqueries", Once,
         EliminateSubqueryAliases) ::
       Batch("Limit pushdown", FixedPoint(100),
-        LimitPushDown(conf),
+        LimitPushDown,
         CombineLimits,
         ConstantFolding,
         BooleanSimplification) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
index 9dc6738ba04b..b71067c0af3a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules._
 class OptimizeCodegenSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("OptimizeCodegen", Once, OptimizeCodegen(conf)) :: Nil
+    val batches = Batch("OptimizeCodegen", Once, OptimizeCodegen) :: Nil
   }
 
   protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index d8937321ecb9..6a77580b29a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -34,10 +34,10 @@ class OptimizeInSuite extends PlanTest {
       Batch("AnalysisNodes", Once,
         EliminateSubqueryAliases) ::
       Batch("ConstantFolding", FixedPoint(10),
-        NullPropagation(conf),
+        NullPropagation,
         ConstantFolding,
         BooleanSimplification,
-        OptimizeIn(conf)) :: Nil
+        OptimizeIn) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
@@ -159,16 +159,20 @@ class OptimizeInSuite extends PlanTest {
         .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), Literal(3))))
         .analyze
 
-    val notOptimizedPlan = OptimizeIn(conf)(plan)
-    comparePlans(notOptimizedPlan, plan)
+    withSQLConf(OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "10") {
+      val notOptimizedPlan = OptimizeIn(plan)
+      comparePlans(notOptimizedPlan, plan)
+    }
 
     // Reduce the threshold to turning into InSet.
-    val optimizedPlan = OptimizeIn(conf.copy(OPTIMIZER_INSET_CONVERSION_THRESHOLD -> 2))(plan)
-    optimizedPlan match {
-      case Filter(cond, _)
-        if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getHSet().size == 3 =>
-          // pass
-      case _ => fail("Unexpected result for OptimizedIn")
+    withSQLConf(OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "2") {
+      val optimizedPlan = OptimizeIn(plan)
+      optimizedPlan match {
+        case Filter(cond, _)
+          if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getHSet().size == 3 =>
+        // pass
+        case _ => fail("Unexpected result for OptimizedIn")
+      }
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
index a23d6266b284..ada6e2a43ea0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinCostBasedReorderSuite.scala
@@ -24,28 +24,46 @@ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.statsEstimation.{StatsEstimationTestBase, StatsTestPlan}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
 
 
 class StarJoinCostBasedReorderSuite extends PlanTest with StatsEstimationTestBase {
 
-  override val conf = new SQLConf().copy(
-    CBO_ENABLED -> true,
-    JOIN_REORDER_ENABLED -> true,
-    JOIN_REORDER_DP_STAR_FILTER -> true)
-
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("Operator Optimizations", FixedPoint(100),
         CombineFilters,
         PushDownPredicate,
-        ReorderJoin(conf),
+        ReorderJoin,
         PushPredicateThroughJoin,
         ColumnPruning,
         CollapseProject) ::
-        Batch("Join Reorder", Once,
-          CostBasedJoinReorder(conf)) :: Nil
+      Batch("Join Reorder", Once,
+        CostBasedJoinReorder) :: Nil
+  }
+
+  var originalConfCBOEnabled = false
+  var originalConfJoinReorderEnabled = false
+  var originalConfJoinReorderDPStarFilter = false
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalConfCBOEnabled = conf.cboEnabled
+    originalConfJoinReorderEnabled = conf.joinReorderEnabled
+    originalConfJoinReorderDPStarFilter = conf.joinReorderDPStarFilter
+    conf.setConf(CBO_ENABLED, true)
+    conf.setConf(JOIN_REORDER_ENABLED, true)
+    conf.setConf(JOIN_REORDER_DP_STAR_FILTER, true)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      conf.setConf(CBO_ENABLED, originalConfCBOEnabled)
+      conf.setConf(JOIN_REORDER_ENABLED, originalConfJoinReorderEnabled)
+      conf.setConf(JOIN_REORDER_DP_STAR_FILTER, originalConfJoinReorderDPStarFilter)
+    } finally {
+      super.afterAll()
+    }
   }
 
   private val columnInfo: AttributeMap[ColumnStat] = AttributeMap(Seq(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
index 605c01b7220d..777c5637201e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/StarJoinReorderSuite.scala
@@ -24,19 +24,36 @@ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.statsEstimation.{StatsEstimationTestBase, StatsTestPlan}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, STARSCHEMA_DETECTION}
+import org.apache.spark.sql.internal.SQLConf._
 
 class StarJoinReorderSuite extends PlanTest with StatsEstimationTestBase {
 
-  override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, STARSCHEMA_DETECTION -> true)
+  var originalConfStarSchemaDetection = false
+  var originalConfCBOEnabled = true
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalConfStarSchemaDetection = conf.starSchemaDetection
+    originalConfCBOEnabled = conf.cboEnabled
+    conf.setConf(STARSCHEMA_DETECTION, true)
+    conf.setConf(CBO_ENABLED, false)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      conf.setConf(STARSCHEMA_DETECTION, originalConfStarSchemaDetection)
+      conf.setConf(CBO_ENABLED, originalConfCBOEnabled)
+    } finally {
+      super.afterAll()
+    }
+  }
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("Operator Optimizations", FixedPoint(100),
         CombineFilters,
         PushDownPredicate,
-        ReorderJoin(conf),
+        ReorderJoin,
         PushPredicateThroughJoin,
         ColumnPruning,
         CollapseProject) :: Nil
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 0a18858350e1..3634accf1ec2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -37,7 +37,7 @@ class ComplexTypesSuite extends PlanTest{
       Batch("collapse projections", FixedPoint(10),
           CollapseProject) ::
       Batch("Constant Folding", FixedPoint(10),
-          NullPropagation(conf),
+          NullPropagation,
           ConstantFolding,
           BooleanSimplification,
           SimplifyConditionals,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index e9679d336150..5389bf3389da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -31,8 +31,8 @@ import org.apache.spark.sql.internal.SQLConf
  */
 trait PlanTest extends SparkFunSuite with PredicateHelper {
 
-  // TODO(gatorsmile): remove this from PlanTest and all the analyzer/optimizer rules
-  protected val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)
+  // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules
+  protected def conf = SQLConf.get
 
   /**
    * Since attribute references are given globally unique ids during analysis,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index 3c046ce49428..5cfad9126986 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -38,12 +38,10 @@ import org.apache.spark.sql.internal.SQLConf
  * 3. aggregate function on partition columns which have same result w or w/o DISTINCT keyword.
  *  e.g. SELECT col1, Max(col2) FROM tbl GROUP BY col1.
  */
-case class OptimizeMetadataOnlyQuery(
-    catalog: SessionCatalog,
-    conf: SQLConf) extends Rule[LogicalPlan] {
+case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!conf.optimizerMetadataOnly) {
+    if (!SQLConf.get.optimizerMetadataOnly) {
       return plan
     }
 
@@ -106,7 +104,7 @@ case class OptimizeMetadataOnlyQuery(
             val caseInsensitiveProperties =
               CaseInsensitiveMap(relation.tableMeta.storage.properties)
             val timeZoneId = caseInsensitiveProperties.get(DateTimeUtils.TIMEZONE_OPTION)
-              .getOrElse(conf.sessionLocalTimeZone)
+              .getOrElse(SQLConf.get.sessionLocalTimeZone)
             val partitionData = catalog.listPartitions(relation.tableMeta.identifier).map { p =>
               InternalRow.fromSeq(partAttrs.map { attr =>
                 Cast(Literal(p.spec(attr.name)), attr.dataType, Option(timeZoneId)).eval()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 1de4f508b89a..00ff4c8ac310 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -22,16 +22,14 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
 import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate
-import org.apache.spark.sql.internal.SQLConf
 
 class SparkOptimizer(
     catalog: SessionCatalog,
-    conf: SQLConf,
     experimentalMethods: ExperimentalMethods)
-  extends Optimizer(catalog, conf) {
+  extends Optimizer(catalog) {
 
   override def batches: Seq[Batch] = (preOptimizationBatches ++ super.batches :+
-    Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+
+    Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
     Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+
     Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions)) ++
     postHocOptimizationBatches :+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 9d0148117fad..72d0ddc62303 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -208,7 +208,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf`, `catalog` and `experimentalMethods` fields.
    */
   protected def optimizer: Optimizer = {
-    new SparkOptimizer(catalog, conf, experimentalMethods) {
+    new SparkOptimizer(catalog, experimentalMethods) {
       override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
         super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
     }

From 14a3bb3a008c302aac908d7deaf0942a98c63be7 Mon Sep 17 00:00:00 2001
From: Sumedh Wale <swale@snappydata.io>
Date: Thu, 6 Jul 2017 14:47:22 +0800
Subject: [PATCH 049/125] [SPARK-21312][SQL] correct offsetInBytes in
 UnsafeRow.writeToStream

## What changes were proposed in this pull request?

Corrects offsetInBytes calculation in UnsafeRow.writeToStream. Known failures include writes to some DataSources that have own SparkPlan implementations and cause EXCHANGE in writes.

## How was this patch tested?

Extended UnsafeRowSuite.writeToStream to include an UnsafeRow over byte array having non-zero offset.

Author: Sumedh Wale <swale@snappydata.io>

Closes #18535 from sumwale/SPARK-21312.
---
 .../spark/sql/catalyst/expressions/UnsafeRow.java   |  2 +-
 .../scala/org/apache/spark/sql/UnsafeRowSuite.scala | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 86de90984ca0..56994fafe064 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -550,7 +550,7 @@ public void copyFrom(UnsafeRow row) {
    */
   public void writeToStream(OutputStream out, byte[] writeBuffer) throws IOException {
     if (baseObject instanceof byte[]) {
-      int offsetInByteArray = (int) (Platform.BYTE_ARRAY_OFFSET - baseOffset);
+      int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
       out.write((byte[]) baseObject, offsetInByteArray, sizeInBytes);
     } else {
       int dataRemaining = sizeInBytes;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index a32763db054f..a5f904c621e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -101,9 +101,22 @@ class UnsafeRowSuite extends SparkFunSuite {
         MemoryAllocator.UNSAFE.free(offheapRowPage)
       }
     }
+    val (bytesFromArrayBackedRowWithOffset, field0StringFromArrayBackedRowWithOffset) = {
+      val baos = new ByteArrayOutputStream()
+      val numBytes = arrayBackedUnsafeRow.getSizeInBytes
+      val bytesWithOffset = new Array[Byte](numBytes + 100)
+      System.arraycopy(arrayBackedUnsafeRow.getBaseObject.asInstanceOf[Array[Byte]], 0,
+        bytesWithOffset, 100, numBytes)
+      val arrayBackedRow = new UnsafeRow(arrayBackedUnsafeRow.numFields())
+      arrayBackedRow.pointTo(bytesWithOffset, Platform.BYTE_ARRAY_OFFSET + 100, numBytes)
+      arrayBackedRow.writeToStream(baos, null)
+      (baos.toByteArray, arrayBackedRow.getString(0))
+    }
 
     assert(bytesFromArrayBackedRow === bytesFromOffheapRow)
     assert(field0StringFromArrayBackedRow === field0StringFromOffheapRow)
+    assert(bytesFromArrayBackedRow === bytesFromArrayBackedRowWithOffset)
+    assert(field0StringFromArrayBackedRow === field0StringFromArrayBackedRowWithOffset)
   }
 
   test("calling getDouble() and getFloat() on null columns") {

From 60043f22458668ac7ecba94fa78953f23a6bdcec Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 6 Jul 2017 00:20:26 -0700
Subject: [PATCH 050/125] [SS][MINOR] Fix flaky test in
 DatastreamReaderWriterSuite. temp checkpoint dir should be deleted

## What changes were proposed in this pull request?

Stopping query while it is being initialized can throw interrupt exception, in which case temporary checkpoint directories will not be deleted, and the test will fail.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18442 from tdas/DatastreamReaderWriterSuite-fix.
---
 .../spark/sql/streaming/test/DataStreamReaderWriterSuite.scala   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 3de0ae67a389..e8a6202b8adc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -641,6 +641,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
   test("temp checkpoint dir should be deleted if a query is stopped without errors") {
     import testImplicits._
     val query = MemoryStream[Int].toDS.writeStream.format("console").start()
+    query.processAllAvailable()
     val checkpointDir = new Path(
       query.asInstanceOf[StreamingQueryWrapper].streamingQuery.resolvedCheckpointRoot)
     val fs = checkpointDir.getFileSystem(spark.sessionState.newHadoopConf())

From 5800144a54f5c0180ccf67392f32c3e8a51119b1 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Thu, 6 Jul 2017 15:32:49 +0800
Subject: [PATCH 051/125] [SPARK-21012][SUBMIT] Add glob support for resources
 adding to Spark

Current "--jars (spark.jars)", "--files (spark.files)", "--py-files (spark.submit.pyFiles)" and "--archives (spark.yarn.dist.archives)" only support non-glob path. This is OK for most of the cases, but when user requires to add more jars, files into Spark, it is too verbose to list one by one. So here propose to add glob path support for resources.

Also improving the code of downloading resources.

## How was this patch tested?

UT added, also verified manually in local cluster.

Author: jerryshao <sshao@hortonworks.com>

Closes #18235 from jerryshao/SPARK-21012.
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 166 ++++++++++++++----
 .../spark/deploy/SparkSubmitArguments.scala   |   2 +-
 .../spark/deploy/SparkSubmitSuite.scala       |  68 ++++++-
 docs/configuration.md                         |   6 +-
 4 files changed, 196 insertions(+), 46 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index d13fb4193970..abde04062c4b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -17,17 +17,21 @@
 
 package org.apache.spark.deploy
 
-import java.io.{File, IOException}
+import java.io._
 import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
 import java.net.URL
 import java.nio.file.Files
-import java.security.PrivilegedExceptionAction
+import java.security.{KeyStore, PrivilegedExceptionAction}
+import java.security.cert.X509Certificate
 import java.text.ParseException
+import javax.net.ssl._
 
 import scala.annotation.tailrec
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 import scala.util.Properties
 
+import com.google.common.io.ByteStreams
+import org.apache.commons.io.FileUtils
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -310,33 +314,33 @@ object SparkSubmit extends CommandLineUtils {
       RPackageUtils.checkAndBuildRPackage(args.jars, printStream, args.verbose)
     }
 
-    // In client mode, download remote files.
-    if (deployMode == CLIENT) {
-      val hadoopConf = new HadoopConfiguration()
-      args.primaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
-      args.jars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
-      args.pyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
-      args.files = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
-    }
-
-    // Require all python files to be local, so we can add them to the PYTHONPATH
-    // In YARN cluster mode, python files are distributed as regular files, which can be non-local.
-    // In Mesos cluster mode, non-local python files are automatically downloaded by Mesos.
-    if (args.isPython && !isYarnCluster && !isMesosCluster) {
-      if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
-        printErrorAndExit(s"Only local python files are supported: ${args.primaryResource}")
+    val hadoopConf = new HadoopConfiguration()
+    val targetDir = Files.createTempDirectory("tmp").toFile
+    // scalastyle:off runtimeaddshutdownhook
+    Runtime.getRuntime.addShutdownHook(new Thread() {
+      override def run(): Unit = {
+        FileUtils.deleteQuietly(targetDir)
       }
-      val nonLocalPyFiles = Utils.nonLocalPaths(args.pyFiles).mkString(",")
-      if (nonLocalPyFiles.nonEmpty) {
-        printErrorAndExit(s"Only local additional python files are supported: $nonLocalPyFiles")
-      }
-    }
+    })
+    // scalastyle:on runtimeaddshutdownhook
 
-    // Require all R files to be local
-    if (args.isR && !isYarnCluster && !isMesosCluster) {
-      if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
-        printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}")
-      }
+    // Resolve glob path for different resources.
+    args.jars = Option(args.jars).map(resolveGlobPaths(_, hadoopConf)).orNull
+    args.files = Option(args.files).map(resolveGlobPaths(_, hadoopConf)).orNull
+    args.pyFiles = Option(args.pyFiles).map(resolveGlobPaths(_, hadoopConf)).orNull
+    args.archives = Option(args.archives).map(resolveGlobPaths(_, hadoopConf)).orNull
+
+    // In client mode, download remote files.
+    if (deployMode == CLIENT) {
+      args.primaryResource = Option(args.primaryResource).map {
+        downloadFile(_, targetDir, args.sparkProperties, hadoopConf)
+      }.orNull
+      args.jars = Option(args.jars).map {
+        downloadFileList(_, targetDir, args.sparkProperties, hadoopConf)
+      }.orNull
+      args.pyFiles = Option(args.pyFiles).map {
+        downloadFileList(_, targetDir, args.sparkProperties, hadoopConf)
+      }.orNull
     }
 
     // The following modes are not supported or applicable
@@ -841,36 +845,132 @@ object SparkSubmit extends CommandLineUtils {
    * Download a list of remote files to temp local files. If the file is local, the original file
    * will be returned.
    * @param fileList A comma separated file list.
+   * @param targetDir A temporary directory for which downloaded files
+   * @param sparkProperties Spark properties
    * @return A comma separated local files list.
    */
   private[deploy] def downloadFileList(
       fileList: String,
+      targetDir: File,
+      sparkProperties: Map[String, String],
       hadoopConf: HadoopConfiguration): String = {
     require(fileList != null, "fileList cannot be null.")
-    fileList.split(",").map(downloadFile(_, hadoopConf)).mkString(",")
+    fileList.split(",")
+      .map(downloadFile(_, targetDir, sparkProperties, hadoopConf))
+      .mkString(",")
   }
 
   /**
    * Download a file from the remote to a local temporary directory. If the input path points to
    * a local path, returns it with no operation.
+   * @param path A file path from where the files will be downloaded.
+   * @param targetDir A temporary directory for which downloaded files
+   * @param sparkProperties Spark properties
+   * @return A comma separated local files list.
    */
-  private[deploy] def downloadFile(path: String, hadoopConf: HadoopConfiguration): String = {
+  private[deploy] def downloadFile(
+      path: String,
+      targetDir: File,
+      sparkProperties: Map[String, String],
+      hadoopConf: HadoopConfiguration): String = {
     require(path != null, "path cannot be null.")
     val uri = Utils.resolveURI(path)
     uri.getScheme match {
-      case "file" | "local" =>
-        path
+      case "file" | "local" => path
+      case "http" | "https" | "ftp" =>
+        val uc = uri.toURL.openConnection()
+        uc match {
+          case https: HttpsURLConnection =>
+            val trustStore = sparkProperties.get("spark.ssl.fs.trustStore")
+              .orElse(sparkProperties.get("spark.ssl.trustStore"))
+            val trustStorePwd = sparkProperties.get("spark.ssl.fs.trustStorePassword")
+              .orElse(sparkProperties.get("spark.ssl.trustStorePassword"))
+              .map(_.toCharArray)
+              .orNull
+            val protocol = sparkProperties.get("spark.ssl.fs.protocol")
+              .orElse(sparkProperties.get("spark.ssl.protocol"))
+            if (protocol.isEmpty) {
+              printErrorAndExit("spark ssl protocol is required when enabling SSL connection.")
+            }
+
+            val trustStoreManagers = trustStore.map { t =>
+              var input: InputStream = null
+              try {
+                input = new FileInputStream(new File(t))
+                val ks = KeyStore.getInstance(KeyStore.getDefaultType)
+                ks.load(input, trustStorePwd)
+                val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
+                tmf.init(ks)
+                tmf.getTrustManagers
+              } finally {
+                if (input != null) {
+                  input.close()
+                  input = null
+                }
+              }
+            }.getOrElse {
+              Array({
+                new X509TrustManager {
+                  override def getAcceptedIssuers: Array[X509Certificate] = null
+                  override def checkClientTrusted(
+                      x509Certificates: Array[X509Certificate], s: String) {}
+                  override def checkServerTrusted(
+                      x509Certificates: Array[X509Certificate], s: String) {}
+                }: TrustManager
+              })
+            }
+            val sslContext = SSLContext.getInstance(protocol.get)
+            sslContext.init(null, trustStoreManagers, null)
+            https.setSSLSocketFactory(sslContext.getSocketFactory)
+            https.setHostnameVerifier(new HostnameVerifier {
+              override def verify(s: String, sslSession: SSLSession): Boolean = false
+            })
+
+          case _ =>
+        }
 
+        uc.setConnectTimeout(60 * 1000)
+        uc.setReadTimeout(60 * 1000)
+        uc.connect()
+        val in = uc.getInputStream
+        val fileName = new Path(uri).getName
+        val tempFile = new File(targetDir, fileName)
+        val out = new FileOutputStream(tempFile)
+        // scalastyle:off println
+        printStream.println(s"Downloading ${uri.toString} to ${tempFile.getAbsolutePath}.")
+        // scalastyle:on println
+        try {
+          ByteStreams.copy(in, out)
+        } finally {
+          in.close()
+          out.close()
+        }
+        tempFile.toURI.toString
       case _ =>
         val fs = FileSystem.get(uri, hadoopConf)
-        val tmpFile = new File(Files.createTempDirectory("tmp").toFile, uri.getPath)
+        val tmpFile = new File(targetDir, new Path(uri).getName)
         // scalastyle:off println
         printStream.println(s"Downloading ${uri.toString} to ${tmpFile.getAbsolutePath}.")
         // scalastyle:on println
         fs.copyToLocalFile(new Path(uri), new Path(tmpFile.getAbsolutePath))
-        Utils.resolveURI(tmpFile.getAbsolutePath).toString
+        tmpFile.toURI.toString
     }
   }
+
+  private def resolveGlobPaths(paths: String, hadoopConf: HadoopConfiguration): String = {
+    require(paths != null, "paths cannot be null.")
+    paths.split(",").map(_.trim).filter(_.nonEmpty).flatMap { path =>
+      val uri = Utils.resolveURI(path)
+      uri.getScheme match {
+        case "local" | "http" | "https" | "ftp" => Array(path)
+        case _ =>
+          val fs = FileSystem.get(uri, hadoopConf)
+          Option(fs.globStatus(new Path(uri))).map { status =>
+            status.filter(_.isFile).map(_.getPath.toUri.toString)
+          }.getOrElse(Array(path))
+      }
+    }.mkString(",")
+  }
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 7800d3d624e3..fd1521193fde 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -520,7 +520,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |                              (Default: client).
         |  --class CLASS_NAME          Your application's main class (for Java / Scala apps).
         |  --name NAME                 A name of your application.
-        |  --jars JARS                 Comma-separated list of local jars to include on the driver
+        |  --jars JARS                 Comma-separated list of jars to include on the driver
         |                              and executor classpaths.
         |  --packages                  Comma-separated list of maven coordinates of jars to include
         |                              on the driver and executor classpaths. Will search the local
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index b089357e7b86..97357cdbb608 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -20,12 +20,14 @@ package org.apache.spark.deploy
 import java.io._
 import java.net.URI
 import java.nio.charset.StandardCharsets
+import java.nio.file.Files
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 
 import com.google.common.io.ByteStreams
-import org.apache.commons.io.{FilenameUtils, FileUtils}
+import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
@@ -42,7 +44,6 @@ import org.apache.spark.TestUtils.JavaSourceFromString
 import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
 
-
 trait TestPrematureExit {
   suite: SparkFunSuite =>
 
@@ -726,6 +727,47 @@ class SparkSubmitSuite
     Utils.unionFileLists(None, Option("/tmp/a.jar")) should be (Set("/tmp/a.jar"))
     Utils.unionFileLists(Option("/tmp/a.jar"), None) should be (Set("/tmp/a.jar"))
   }
+
+  test("support glob path") {
+    val tmpJarDir = Utils.createTempDir()
+    val jar1 = TestUtils.createJarWithFiles(Map("test.resource" -> "1"), tmpJarDir)
+    val jar2 = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpJarDir)
+
+    val tmpFileDir = Utils.createTempDir()
+    val file1 = File.createTempFile("tmpFile1", "", tmpFileDir)
+    val file2 = File.createTempFile("tmpFile2", "", tmpFileDir)
+
+    val tmpPyFileDir = Utils.createTempDir()
+    val pyFile1 = File.createTempFile("tmpPy1", ".py", tmpPyFileDir)
+    val pyFile2 = File.createTempFile("tmpPy2", ".egg", tmpPyFileDir)
+
+    val tmpArchiveDir = Utils.createTempDir()
+    val archive1 = File.createTempFile("archive1", ".zip", tmpArchiveDir)
+    val archive2 = File.createTempFile("archive2", ".zip", tmpArchiveDir)
+
+    val args = Seq(
+      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+      "--name", "testApp",
+      "--master", "yarn",
+      "--deploy-mode", "client",
+      "--jars", s"${tmpJarDir.getAbsolutePath}/*.jar",
+      "--files", s"${tmpFileDir.getAbsolutePath}/tmpFile*",
+      "--py-files", s"${tmpPyFileDir.getAbsolutePath}/tmpPy*",
+      "--archives", s"${tmpArchiveDir.getAbsolutePath}/*.zip",
+      jar2.toString)
+
+    val appArgs = new SparkSubmitArguments(args)
+    val sysProps = SparkSubmit.prepareSubmitEnvironment(appArgs)._3
+    sysProps("spark.yarn.dist.jars").split(",").toSet should be
+      (Set(jar1.toURI.toString, jar2.toURI.toString))
+    sysProps("spark.yarn.dist.files").split(",").toSet should be
+      (Set(file1.toURI.toString, file2.toURI.toString))
+    sysProps("spark.submit.pyFiles").split(",").toSet should be
+      (Set(pyFile1.getAbsolutePath, pyFile2.getAbsolutePath))
+    sysProps("spark.yarn.dist.archives").split(",").toSet should be
+      (Set(archive1.toURI.toString, archive2.toURI.toString))
+  }
+
   // scalastyle:on println
 
   private def checkDownloadedFile(sourcePath: String, outputPath: String): Unit = {
@@ -738,7 +780,7 @@ class SparkSubmitSuite
     assert(outputUri.getScheme === "file")
 
     // The path and filename are preserved.
-    assert(outputUri.getPath.endsWith(sourceUri.getPath))
+    assert(outputUri.getPath.endsWith(new Path(sourceUri).getName))
     assert(FileUtils.readFileToString(new File(outputUri.getPath)) ===
       FileUtils.readFileToString(new File(sourceUri.getPath)))
   }
@@ -752,25 +794,29 @@ class SparkSubmitSuite
 
   test("downloadFile - invalid url") {
     intercept[IOException] {
-      SparkSubmit.downloadFile("abc:/my/file", new Configuration())
+      SparkSubmit.downloadFile(
+        "abc:/my/file", Utils.createTempDir(), mutable.Map.empty, new Configuration())
     }
   }
 
   test("downloadFile - file doesn't exist") {
     val hadoopConf = new Configuration()
+    val tmpDir = Utils.createTempDir()
     // Set s3a implementation to local file system for testing.
     hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
     // Disable file system impl cache to make sure the test file system is picked up.
     hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     intercept[FileNotFoundException] {
-      SparkSubmit.downloadFile("s3a:/no/such/file", hadoopConf)
+      SparkSubmit.downloadFile("s3a:/no/such/file", tmpDir, mutable.Map.empty, hadoopConf)
     }
   }
 
   test("downloadFile does not download local file") {
     // empty path is considered as local file.
-    assert(SparkSubmit.downloadFile("", new Configuration()) === "")
-    assert(SparkSubmit.downloadFile("/local/file", new Configuration()) === "/local/file")
+    val tmpDir = Files.createTempDirectory("tmp").toFile
+    assert(SparkSubmit.downloadFile("", tmpDir, mutable.Map.empty, new Configuration()) === "")
+    assert(SparkSubmit.downloadFile("/local/file", tmpDir, mutable.Map.empty,
+      new Configuration()) === "/local/file")
   }
 
   test("download one file to local") {
@@ -779,12 +825,14 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
+    val tmpDir = Files.createTempDirectory("tmp").toFile
     // Set s3a implementation to local file system for testing.
     hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
     // Disable file system impl cache to make sure the test file system is picked up.
     hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     val sourcePath = s"s3a://${jarFile.getAbsolutePath}"
-    val outputPath = SparkSubmit.downloadFile(sourcePath, hadoopConf)
+    val outputPath =
+      SparkSubmit.downloadFile(sourcePath, tmpDir, mutable.Map.empty, hadoopConf)
     checkDownloadedFile(sourcePath, outputPath)
     deleteTempOutputFile(outputPath)
   }
@@ -795,12 +843,14 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
+    val tmpDir = Files.createTempDirectory("tmp").toFile
     // Set s3a implementation to local file system for testing.
     hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
     // Disable file system impl cache to make sure the test file system is picked up.
     hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     val sourcePaths = Seq("/local/file", s"s3a://${jarFile.getAbsolutePath}")
-    val outputPaths = SparkSubmit.downloadFileList(sourcePaths.mkString(","), hadoopConf).split(",")
+    val outputPaths = SparkSubmit.downloadFileList(
+      sourcePaths.mkString(","), tmpDir, mutable.Map.empty, hadoopConf).split(",")
 
     assert(outputPaths.length === sourcePaths.length)
     sourcePaths.zip(outputPaths).foreach { case (sourcePath, outputPath) =>
diff --git a/docs/configuration.md b/docs/configuration.md
index c785a664c67b..7dc23e441a7b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -422,21 +422,21 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.files</code></td>
   <td></td>
   <td>
-    Comma-separated list of files to be placed in the working directory of each executor.
+    Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed.
   </td>
 </tr>
 <tr>
   <td><code>spark.submit.pyFiles</code></td>
   <td></td>
   <td>
-    Comma-separated list of .zip, .egg, or .py files to place on the PYTHONPATH for Python apps.
+    Comma-separated list of .zip, .egg, or .py files to place on the PYTHONPATH for Python apps. Globs are allowed.
   </td>
 </tr>
 <tr>
   <td><code>spark.jars</code></td>
   <td></td>
   <td>
-    Comma-separated list of local jars to include on the driver and executor classpaths.
+    Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.
   </td>
 </tr>
 <tr>

From 6ff05a66fe83e721063efe5c28d2ffeb850fecc7 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 6 Jul 2017 15:47:09 +0800
Subject: [PATCH 052/125] [SPARK-20703][SQL] Associate metrics with data writes
 onto DataFrameWriter operations

## What changes were proposed in this pull request?

Right now in the UI, after SPARK-20213, we can show the operations to write data out. However, there is no way to associate metrics with data writes. We should show relative metrics on the operations.

#### Supported commands

This change supports updating metrics for file-based data writing operations, including `InsertIntoHadoopFsRelationCommand`, `InsertIntoHiveTable`.

Supported metrics:

* number of written files
* number of dynamic partitions
* total bytes of written data
* total number of output rows
* average writing data out time (ms)
* (TODO) min/med/max number of output rows per file/partition
* (TODO) min/med/max bytes of written data per file/partition

####  Commands not supported

`InsertIntoDataSourceCommand`, `SaveIntoDataSourceCommand`:

The two commands uses DataSource APIs to write data out, i.e., the logic of writing data out is delegated to the DataSource implementations, such as  `InsertableRelation.insert` and `CreatableRelationProvider.createRelation`. So we can't obtain metrics from delegated methods for now.

`CreateHiveTableAsSelectCommand`, `CreateDataSourceTableAsSelectCommand` :

The two commands invokes other commands to write data out. The invoked commands can even write to non file-based data source. We leave them as future TODO.

#### How to update metrics of writing files out

A `RunnableCommand` which wants to update metrics, needs to override its `metrics` and provide the metrics data structure to `ExecutedCommandExec`.

The metrics are prepared during the execution of `FileFormatWriter`. The callback function passed to `FileFormatWriter` will accept the metrics and update accordingly.

There is a metrics updating function in `RunnableCommand`. In runtime, the function will be bound to the spark context and `metrics` of `ExecutedCommandExec` and pass to `FileFormatWriter`.

## How was this patch tested?

Updated unit tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18159 from viirya/SPARK-20703-2.
---
 .../scala/org/apache/spark/util/Utils.scala   |   9 ++
 .../command/DataWritingCommand.scala          |  75 ++++++++++
 .../sql/execution/command/commands.scala      |  12 ++
 .../datasources/FileFormatWriter.scala        | 121 ++++++++++++---
 .../InsertIntoHadoopFsRelationCommand.scala   |  18 ++-
 .../sql/sources/PartitionedWriteSuite.scala   |  21 +--
 .../hive/execution/InsertIntoHiveTable.scala  |   8 +-
 .../sql/hive/execution/SQLMetricsSuite.scala  | 139 ++++++++++++++++++
 8 files changed, 362 insertions(+), 41 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 26f61e25da4d..b4caf68f0afa 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1002,6 +1002,15 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  /**
+   * Lists files recursively.
+   */
+  def recursiveList(f: File): Array[File] = {
+    require(f.isDirectory)
+    val current = f.listFiles
+    current ++ current.filter(_.isDirectory).flatMap(recursiveList)
+  }
+
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
new file mode 100644
index 000000000000..0c381a2c0298
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.ExecutedWriteSummary
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+
+/**
+ * A special `RunnableCommand` which writes data out and updates metrics.
+ */
+trait DataWritingCommand extends RunnableCommand {
+
+  override lazy val metrics: Map[String, SQLMetric] = {
+    val sparkContext = SparkContext.getActive.get
+    Map(
+      "avgTime" -> SQLMetrics.createMetric(sparkContext, "average writing time (ms)"),
+      "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of written files"),
+      "numOutputBytes" -> SQLMetrics.createMetric(sparkContext, "bytes of written output"),
+      "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+      "numParts" -> SQLMetrics.createMetric(sparkContext, "number of dynamic part")
+    )
+  }
+
+  /**
+   * Callback function that update metrics collected from the writing operation.
+   */
+  protected def updateWritingMetrics(writeSummaries: Seq[ExecutedWriteSummary]): Unit = {
+    val sparkContext = SparkContext.getActive.get
+    var numPartitions = 0
+    var numFiles = 0
+    var totalNumBytes: Long = 0L
+    var totalNumOutput: Long = 0L
+    var totalWritingTime: Long = 0L
+
+    writeSummaries.foreach { summary =>
+      numPartitions += summary.updatedPartitions.size
+      numFiles += summary.numOutputFile
+      totalNumBytes += summary.numOutputBytes
+      totalNumOutput += summary.numOutputRows
+      totalWritingTime += summary.totalWritingTime
+    }
+
+    val avgWritingTime = if (numFiles > 0) {
+      (totalWritingTime / numFiles).toLong
+    } else {
+      0L
+    }
+
+    metrics("avgTime").add(avgWritingTime)
+    metrics("numFiles").add(numFiles)
+    metrics("numOutputBytes").add(totalNumBytes)
+    metrics("numOutputRows").add(totalNumOutput)
+    metrics("numParts").add(numPartitions)
+
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toList)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 81bc93e7ebcf..7cd4baef89e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.{logical, QueryPlan}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.debug._
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types._
@@ -37,6 +38,11 @@ import org.apache.spark.sql.types._
  * wrapped in `ExecutedCommand` during execution.
  */
 trait RunnableCommand extends logical.Command {
+
+  // The map used to record the metrics of running the command. This will be passed to
+  // `ExecutedCommand` during query planning.
+  lazy val metrics: Map[String, SQLMetric] = Map.empty
+
   def run(sparkSession: SparkSession, children: Seq[SparkPlan]): Seq[Row] = {
     throw new NotImplementedError
   }
@@ -49,8 +55,14 @@ trait RunnableCommand extends logical.Command {
 /**
  * A physical operator that executes the run method of a `RunnableCommand` and
  * saves the result to prevent multiple executions.
+ *
+ * @param cmd the `RunnableCommand` this operator will run.
+ * @param children the children physical plans ran by the `RunnableCommand`.
  */
 case class ExecutedCommandExec(cmd: RunnableCommand, children: Seq[SparkPlan]) extends SparkPlan {
+
+  override lazy val metrics: Map[String, SQLMetric] = cmd.metrics
+
   /**
    * A concrete command should override this lazy field to wrap up any side effects caused by the
    * command or any other computation that should be evaluated exactly once. The value of this field
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 0daffa93b474..64866630623a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -22,7 +22,7 @@ import java.util.{Date, UUID}
 import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
@@ -82,7 +82,7 @@ object FileFormatWriter extends Logging {
   }
 
   /** The result of a successful write task. */
-  private case class WriteTaskResult(commitMsg: TaskCommitMessage, updatedPartitions: Set[String])
+  private case class WriteTaskResult(commitMsg: TaskCommitMessage, summary: ExecutedWriteSummary)
 
   /**
    * Basic work flow of this command is:
@@ -104,7 +104,7 @@ object FileFormatWriter extends Logging {
       hadoopConf: Configuration,
       partitionColumns: Seq[Attribute],
       bucketSpec: Option[BucketSpec],
-      refreshFunction: (Seq[TablePartitionSpec]) => Unit,
+      refreshFunction: (Seq[ExecutedWriteSummary]) => Unit,
       options: Map[String, String]): Unit = {
 
     val job = Job.getInstance(hadoopConf)
@@ -196,12 +196,10 @@ object FileFormatWriter extends Logging {
         })
 
       val commitMsgs = ret.map(_.commitMsg)
-      val updatedPartitions = ret.flatMap(_.updatedPartitions)
-        .distinct.map(PartitioningUtils.parsePathFragment)
 
       committer.commitJob(job, commitMsgs)
       logInfo(s"Job ${job.getJobID} committed.")
-      refreshFunction(updatedPartitions)
+      refreshFunction(ret.map(_.summary))
     } catch { case cause: Throwable =>
       logError(s"Aborting job ${job.getJobID}.", cause)
       committer.abortJob(job)
@@ -247,9 +245,9 @@ object FileFormatWriter extends Logging {
     try {
       Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
         // Execute the task to write rows out and commit the task.
-        val outputPartitions = writeTask.execute(iterator)
+        val summary = writeTask.execute(iterator)
         writeTask.releaseResources()
-        WriteTaskResult(committer.commitTask(taskAttemptContext), outputPartitions)
+        WriteTaskResult(committer.commitTask(taskAttemptContext), summary)
       })(catchBlock = {
         // If there is an error, release resource and then abort the task
         try {
@@ -273,12 +271,36 @@ object FileFormatWriter extends Logging {
    * automatically trigger task aborts.
    */
   private trait ExecuteWriteTask {
+
     /**
-     * Writes data out to files, and then returns the list of partition strings written out.
-     * The list of partitions is sent back to the driver and used to update the catalog.
+     * The data structures used to measure metrics during writing.
      */
-    def execute(iterator: Iterator[InternalRow]): Set[String]
+    protected var totalWritingTime: Long = 0L
+    protected var timeOnCurrentFile: Long = 0L
+    protected var numOutputRows: Long = 0L
+    protected var numOutputBytes: Long = 0L
+
+    /**
+     * Writes data out to files, and then returns the summary of relative information which
+     * includes the list of partition strings written out. The list of partitions is sent back
+     * to the driver and used to update the catalog. Other information will be sent back to the
+     * driver too and used to update the metrics in UI.
+     */
+    def execute(iterator: Iterator[InternalRow]): ExecutedWriteSummary
     def releaseResources(): Unit
+
+    /**
+     * A helper function used to determine the size in  bytes of a written file.
+     */
+    protected def getFileSize(conf: Configuration, filePath: String): Long = {
+      if (filePath != null) {
+        val path = new Path(filePath)
+        val fs = path.getFileSystem(conf)
+        fs.getFileStatus(path).getLen()
+      } else {
+        0L
+      }
+    }
   }
 
   /** Writes data to a single directory (used for non-dynamic-partition writes). */
@@ -288,24 +310,26 @@ object FileFormatWriter extends Logging {
       committer: FileCommitProtocol) extends ExecuteWriteTask {
 
     private[this] var currentWriter: OutputWriter = _
+    private[this] var currentPath: String = _
 
     private def newOutputWriter(fileCounter: Int): Unit = {
       val ext = description.outputWriterFactory.getFileExtension(taskAttemptContext)
-      val tmpFilePath = committer.newTaskTempFile(
+      currentPath = committer.newTaskTempFile(
         taskAttemptContext,
         None,
         f"-c$fileCounter%03d" + ext)
 
       currentWriter = description.outputWriterFactory.newInstance(
-        path = tmpFilePath,
+        path = currentPath,
         dataSchema = description.dataColumns.toStructType,
         context = taskAttemptContext)
     }
 
-    override def execute(iter: Iterator[InternalRow]): Set[String] = {
+    override def execute(iter: Iterator[InternalRow]): ExecutedWriteSummary = {
       var fileCounter = 0
       var recordsInFile: Long = 0L
       newOutputWriter(fileCounter)
+
       while (iter.hasNext) {
         if (description.maxRecordsPerFile > 0 && recordsInFile >= description.maxRecordsPerFile) {
           fileCounter += 1
@@ -314,21 +338,35 @@ object FileFormatWriter extends Logging {
 
           recordsInFile = 0
           releaseResources()
+          numOutputRows += recordsInFile
           newOutputWriter(fileCounter)
         }
 
         val internalRow = iter.next()
+        val startTime = System.nanoTime()
         currentWriter.write(internalRow)
+        timeOnCurrentFile += (System.nanoTime() - startTime)
         recordsInFile += 1
       }
       releaseResources()
-      Set.empty
+      numOutputRows += recordsInFile
+
+      ExecutedWriteSummary(
+        updatedPartitions = Set.empty,
+        numOutputFile = fileCounter + 1,
+        numOutputBytes = numOutputBytes,
+        numOutputRows = numOutputRows,
+        totalWritingTime = totalWritingTime)
     }
 
     override def releaseResources(): Unit = {
       if (currentWriter != null) {
         try {
+          val startTime = System.nanoTime()
           currentWriter.close()
+          totalWritingTime += (timeOnCurrentFile + System.nanoTime() - startTime) / 1000 / 1000
+          timeOnCurrentFile = 0
+          numOutputBytes += getFileSize(taskAttemptContext.getConfiguration, currentPath)
         } finally {
           currentWriter = null
         }
@@ -348,6 +386,8 @@ object FileFormatWriter extends Logging {
     // currentWriter is initialized whenever we see a new key
     private var currentWriter: OutputWriter = _
 
+    private var currentPath: String = _
+
     /** Expressions that given partition columns build a path string like: col1=val/col2=val/... */
     private def partitionPathExpression: Seq[Expression] = {
       desc.partitionColumns.zipWithIndex.flatMap { case (c, i) =>
@@ -403,19 +443,19 @@ object FileFormatWriter extends Logging {
         case _ =>
           None
       }
-      val path = if (customPath.isDefined) {
+      currentPath = if (customPath.isDefined) {
         committer.newTaskTempFileAbsPath(taskAttemptContext, customPath.get, ext)
       } else {
         committer.newTaskTempFile(taskAttemptContext, partDir, ext)
       }
 
       currentWriter = desc.outputWriterFactory.newInstance(
-        path = path,
+        path = currentPath,
         dataSchema = desc.dataColumns.toStructType,
         context = taskAttemptContext)
     }
 
-    override def execute(iter: Iterator[InternalRow]): Set[String] = {
+    override def execute(iter: Iterator[InternalRow]): ExecutedWriteSummary = {
       val getPartitionColsAndBucketId = UnsafeProjection.create(
         desc.partitionColumns ++ desc.bucketIdExpression, desc.allColumns)
 
@@ -429,15 +469,22 @@ object FileFormatWriter extends Logging {
       // If anything below fails, we should abort the task.
       var recordsInFile: Long = 0L
       var fileCounter = 0
+      var totalFileCounter = 0
       var currentPartColsAndBucketId: UnsafeRow = null
       val updatedPartitions = mutable.Set[String]()
+
       for (row <- iter) {
         val nextPartColsAndBucketId = getPartitionColsAndBucketId(row)
         if (currentPartColsAndBucketId != nextPartColsAndBucketId) {
+          if (currentPartColsAndBucketId != null) {
+            totalFileCounter += (fileCounter + 1)
+          }
+
           // See a new partition or bucket - write to a new partition dir (or a new bucket file).
           currentPartColsAndBucketId = nextPartColsAndBucketId.copy()
           logDebug(s"Writing partition: $currentPartColsAndBucketId")
 
+          numOutputRows += recordsInFile
           recordsInFile = 0
           fileCounter = 0
 
@@ -447,6 +494,8 @@ object FileFormatWriter extends Logging {
             recordsInFile >= desc.maxRecordsPerFile) {
           // Exceeded the threshold in terms of the number of records per file.
           // Create a new file by increasing the file counter.
+
+          numOutputRows += recordsInFile
           recordsInFile = 0
           fileCounter += 1
           assert(fileCounter < MAX_FILE_COUNTER,
@@ -455,18 +504,33 @@ object FileFormatWriter extends Logging {
           releaseResources()
           newOutputWriter(currentPartColsAndBucketId, getPartPath, fileCounter, updatedPartitions)
         }
-
+        val startTime = System.nanoTime()
         currentWriter.write(getOutputRow(row))
+        timeOnCurrentFile += (System.nanoTime() - startTime)
         recordsInFile += 1
       }
+      if (currentPartColsAndBucketId != null) {
+        totalFileCounter += (fileCounter + 1)
+      }
       releaseResources()
-      updatedPartitions.toSet
+      numOutputRows += recordsInFile
+
+      ExecutedWriteSummary(
+        updatedPartitions = updatedPartitions.toSet,
+        numOutputFile = totalFileCounter,
+        numOutputBytes = numOutputBytes,
+        numOutputRows = numOutputRows,
+        totalWritingTime = totalWritingTime)
     }
 
     override def releaseResources(): Unit = {
       if (currentWriter != null) {
         try {
+          val startTime = System.nanoTime()
           currentWriter.close()
+          totalWritingTime += (timeOnCurrentFile + System.nanoTime() - startTime) / 1000 / 1000
+          timeOnCurrentFile = 0
+          numOutputBytes += getFileSize(taskAttemptContext.getConfiguration, currentPath)
         } finally {
           currentWriter = null
         }
@@ -474,3 +538,20 @@ object FileFormatWriter extends Logging {
     }
   }
 }
+
+/**
+ * Wrapper class for the metrics of writing data out.
+ *
+ * @param updatedPartitions the partitions updated during writing data out. Only valid
+ *                          for dynamic partition.
+ * @param numOutputFile the total number of files.
+ * @param numOutputRows the number of output rows.
+ * @param numOutputBytes the bytes of output data.
+ * @param totalWritingTime the total writing time in ms.
+ */
+case class ExecutedWriteSummary(
+  updatedPartitions: Set[String],
+  numOutputFile: Int,
+  numOutputRows: Long,
+  numOutputBytes: Long,
+  totalWritingTime: Long)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index ab26f2affbce..0031567d3d28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -21,6 +21,7 @@ import java.io.IOException
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition}
@@ -29,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 
 /**
  * A command for writing data to a [[HadoopFsRelation]].  Supports both overwriting and appending.
@@ -53,7 +55,7 @@ case class InsertIntoHadoopFsRelationCommand(
     mode: SaveMode,
     catalogTable: Option[CatalogTable],
     fileIndex: Option[FileIndex])
-  extends RunnableCommand {
+  extends DataWritingCommand {
   import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 
   override def children: Seq[LogicalPlan] = query :: Nil
@@ -123,8 +125,16 @@ case class InsertIntoHadoopFsRelationCommand(
 
     if (doInsertion) {
 
-      // Callback for updating metastore partition metadata after the insertion job completes.
-      def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
+      // Callback for updating metric and metastore partition metadata
+      // after the insertion job completes.
+      def refreshCallback(summary: Seq[ExecutedWriteSummary]): Unit = {
+        val updatedPartitions = summary.flatMap(_.updatedPartitions)
+          .distinct.map(PartitioningUtils.parsePathFragment)
+
+        // Updating metrics.
+        updateWritingMetrics(summary)
+
+        // Updating metastore partition metadata.
         if (partitionsTrackedByCatalog) {
           val newPartitions = updatedPartitions.toSet -- initialMatchingPartitions
           if (newPartitions.nonEmpty) {
@@ -154,7 +164,7 @@ case class InsertIntoHadoopFsRelationCommand(
         hadoopConf = hadoopConf,
         partitionColumns = partitionColumns,
         bucketSpec = bucketSpec,
-        refreshFunction = refreshPartitionsCallback,
+        refreshFunction = refreshCallback,
         options = options)
 
       // refresh cached files in FileIndex
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
index a2f3afe3ce23..6f998aa60faf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
@@ -91,15 +91,15 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext {
     withTempDir { f =>
       spark.range(start = 0, end = 4, step = 1, numPartitions = 1)
         .write.option("maxRecordsPerFile", 1).mode("overwrite").parquet(f.getAbsolutePath)
-      assert(recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 4)
+      assert(Utils.recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 4)
 
       spark.range(start = 0, end = 4, step = 1, numPartitions = 1)
         .write.option("maxRecordsPerFile", 2).mode("overwrite").parquet(f.getAbsolutePath)
-      assert(recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 2)
+      assert(Utils.recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 2)
 
       spark.range(start = 0, end = 4, step = 1, numPartitions = 1)
         .write.option("maxRecordsPerFile", -1).mode("overwrite").parquet(f.getAbsolutePath)
-      assert(recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 1)
+      assert(Utils.recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 1)
     }
   }
 
@@ -111,7 +111,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext {
         .option("maxRecordsPerFile", 1)
         .mode("overwrite")
         .parquet(f.getAbsolutePath)
-      assert(recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 4)
+      assert(Utils.recursiveList(f).count(_.getAbsolutePath.endsWith("parquet")) == 4)
     }
   }
 
@@ -138,14 +138,14 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext {
     val df = Seq((1, ts)).toDF("i", "ts")
     withTempPath { f =>
       df.write.partitionBy("ts").parquet(f.getAbsolutePath)
-      val files = recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
+      val files = Utils.recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
       assert(files.length == 1)
       checkPartitionValues(files.head, "2016-12-01 00:00:00")
     }
     withTempPath { f =>
       df.write.option(DateTimeUtils.TIMEZONE_OPTION, "GMT")
         .partitionBy("ts").parquet(f.getAbsolutePath)
-      val files = recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
+      val files = Utils.recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
       assert(files.length == 1)
       // use timeZone option "GMT" to format partition value.
       checkPartitionValues(files.head, "2016-12-01 08:00:00")
@@ -153,18 +153,11 @@ class PartitionedWriteSuite extends QueryTest with SharedSQLContext {
     withTempPath { f =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "GMT") {
         df.write.partitionBy("ts").parquet(f.getAbsolutePath)
-        val files = recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
+        val files = Utils.recursiveList(f).filter(_.getAbsolutePath.endsWith("parquet"))
         assert(files.length == 1)
         // if there isn't timeZone option, then use session local timezone.
         checkPartitionValues(files.head, "2016-12-01 08:00:00")
       }
     }
   }
-
-  /** Lists files recursively. */
-  private def recursiveList(f: File): Array[File] = {
-    require(f.isDirectory)
-    val current = f.listFiles
-    current ++ current.filter(_.isDirectory).flatMap(recursiveList)
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 223d37523239..cd263e8b6df8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -31,14 +31,16 @@ import org.apache.hadoop.hive.ql.exec.TaskRunner
 import org.apache.hadoop.hive.ql.ErrorMsg
 import org.apache.hadoop.hive.ql.plan.TableDesc
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.command.{CommandUtils, RunnableCommand}
+import org.apache.spark.sql.execution.command.{CommandUtils, DataWritingCommand}
 import org.apache.spark.sql.execution.datasources.FileFormatWriter
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.sql.hive.client.{HiveClientImpl, HiveVersion}
@@ -80,7 +82,7 @@ case class InsertIntoHiveTable(
     partition: Map[String, Option[String]],
     query: LogicalPlan,
     overwrite: Boolean,
-    ifPartitionNotExists: Boolean) extends RunnableCommand {
+    ifPartitionNotExists: Boolean) extends DataWritingCommand {
 
   override def children: Seq[LogicalPlan] = query :: Nil
 
@@ -354,7 +356,7 @@ case class InsertIntoHiveTable(
       hadoopConf = hadoopConf,
       partitionColumns = partitionAttributes,
       bucketSpec = None,
-      refreshFunction = _ => (),
+      refreshFunction = updateWritingMetrics,
       options = Map.empty)
 
     if (partition.nonEmpty) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
new file mode 100644
index 000000000000..1ef1988d4c60
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import java.io.File
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.Utils
+
+class SQLMetricsSuite extends SQLTestUtils with TestHiveSingleton {
+  import spark.implicits._
+
+  /**
+   * Get execution metrics for the SQL execution and verify metrics values.
+   *
+   * @param metricsValues the expected metric values (numFiles, numPartitions, numOutputRows).
+   * @param func the function can produce execution id after running.
+   */
+  private def verifyWriteDataMetrics(metricsValues: Seq[Int])(func: => Unit): Unit = {
+    val previousExecutionIds = spark.sharedState.listener.executionIdToData.keySet
+    // Run the given function to trigger query execution.
+    func
+    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    val executionIds =
+      spark.sharedState.listener.executionIdToData.keySet.diff(previousExecutionIds)
+    assert(executionIds.size == 1)
+    val executionId = executionIds.head
+
+    val executionData = spark.sharedState.listener.getExecution(executionId).get
+    val executedNode = executionData.physicalPlanGraph.nodes.head
+
+    val metricsNames = Seq(
+      "number of written files",
+      "number of dynamic part",
+      "number of output rows")
+
+    val metrics = spark.sharedState.listener.getExecutionMetrics(executionId)
+
+    metricsNames.zip(metricsValues).foreach { case (metricsName, expected) =>
+      val sqlMetric = executedNode.metrics.find(_.name == metricsName)
+      assert(sqlMetric.isDefined)
+      val accumulatorId = sqlMetric.get.accumulatorId
+      val metricValue = metrics(accumulatorId).replaceAll(",", "").toInt
+      assert(metricValue == expected)
+    }
+
+    val totalNumBytesMetric = executedNode.metrics.find(_.name == "bytes of written output").get
+    val totalNumBytes = metrics(totalNumBytesMetric.accumulatorId).replaceAll(",", "").toInt
+    assert(totalNumBytes > 0)
+    val writingTimeMetric = executedNode.metrics.find(_.name == "average writing time (ms)").get
+    val writingTime = metrics(writingTimeMetric.accumulatorId).replaceAll(",", "").toInt
+    assert(writingTime >= 0)
+  }
+
+  private def testMetricsNonDynamicPartition(
+      dataFormat: String,
+      tableName: String): Unit = {
+    withTable(tableName) {
+      Seq((1, 2)).toDF("i", "j")
+        .write.format(dataFormat).mode("overwrite").saveAsTable(tableName)
+
+      val tableLocation =
+        new File(spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location)
+
+      // 2 files, 100 rows, 0 dynamic partition.
+      verifyWriteDataMetrics(Seq(2, 0, 100)) {
+        (0 until 100).map(i => (i, i + 1)).toDF("i", "j").repartition(2)
+          .write.format(dataFormat).mode("overwrite").insertInto(tableName)
+      }
+      assert(Utils.recursiveList(tableLocation).count(_.getName.startsWith("part-")) == 2)
+    }
+  }
+
+  private def testMetricsDynamicPartition(
+      provider: String,
+      dataFormat: String,
+      tableName: String): Unit = {
+    withTempPath { dir =>
+      spark.sql(
+        s"""
+           |CREATE TABLE $tableName(a int, b int)
+           |USING $provider
+           |PARTITIONED BY(a)
+           |LOCATION '${dir.toURI}'
+         """.stripMargin)
+      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+      assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
+
+      val df = spark.range(start = 0, end = 40, step = 1, numPartitions = 1)
+        .selectExpr("id a", "id b")
+
+      // 40 files, 80 rows, 40 dynamic partitions.
+      verifyWriteDataMetrics(Seq(40, 40, 80)) {
+        df.union(df).repartition(2, $"a")
+          .write
+          .format(dataFormat)
+          .mode("overwrite")
+          .insertInto(tableName)
+      }
+      assert(Utils.recursiveList(dir).count(_.getName.startsWith("part-")) == 40)
+    }
+  }
+
+  test("writing data out metrics: parquet") {
+    testMetricsNonDynamicPartition("parquet", "t1")
+  }
+
+  test("writing data out metrics with dynamic partition: parquet") {
+    testMetricsDynamicPartition("parquet", "parquet", "t1")
+  }
+
+  test("writing data out metrics: hive") {
+    testMetricsNonDynamicPartition("hive", "t1")
+  }
+
+  test("writing data out metrics dynamic partition: hive") {
+    withSQLConf(("hive.exec.dynamic.partition.mode", "nonstrict")) {
+      testMetricsDynamicPartition("hive", "hive", "t1")
+    }
+  }
+}

From b8e4d567a7d6c2ff277700d4e7707e57e87c7808 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Thu, 6 Jul 2017 16:00:31 +0800
Subject: [PATCH 053/125] [SPARK-21324][TEST] Improve statistics test suites

## What changes were proposed in this pull request?

1. move `StatisticsCollectionTestBase` to a separate file.
2. move some test cases to `StatisticsCollectionSuite` so that `hive/StatisticsSuite` only keeps tests that need hive support.
3. clear up some test cases.

## How was this patch tested?

Existing tests.

Author: wangzhenhua <wangzhenhua@huawei.com>
Author: Zhenhua Wang <wzh_zju@163.com>

Closes #18545 from wzhfy/cleanStatSuites.
---
 .../spark/sql/StatisticsCollectionSuite.scala | 193 +++---------------
 .../sql/StatisticsCollectionTestBase.scala    | 192 +++++++++++++++++
 .../spark/sql/hive/StatisticsSuite.scala      | 124 +++--------
 3 files changed, 258 insertions(+), 251 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index d9392de37a81..843ced7f0e69 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -17,19 +17,12 @@
 
 package org.apache.spark.sql
 
-import java.{lang => jl}
-import java.sql.{Date, Timestamp}
-
 import scala.collection.mutable
-import scala.util.Random
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
-import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData.ArrayData
 import org.apache.spark.sql.types._
 
@@ -58,6 +51,37 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("analyzing views is not supported") {
+    def assertAnalyzeUnsupported(analyzeCommand: String): Unit = {
+      val err = intercept[AnalysisException] {
+        sql(analyzeCommand)
+      }
+      assert(err.message.contains("ANALYZE TABLE is not supported"))
+    }
+
+    val tableName = "tbl"
+    withTable(tableName) {
+      spark.range(10).write.saveAsTable(tableName)
+      val viewName = "view"
+      withView(viewName) {
+        sql(s"CREATE VIEW $viewName AS SELECT * FROM $tableName")
+        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+      }
+    }
+  }
+
+  test("statistics collection of a table with zero column") {
+    val table_no_cols = "table_no_cols"
+    withTable(table_no_cols) {
+      val rddNoCols = sparkContext.parallelize(1 to 10).map(_ => Row.empty)
+      val dfNoCols = spark.createDataFrame(rddNoCols, StructType(Seq.empty))
+      dfNoCols.write.format("json").saveAsTable(table_no_cols)
+      sql(s"ANALYZE TABLE $table_no_cols COMPUTE STATISTICS")
+      checkTableStats(table_no_cols, hasSizeInBytes = true, expectedRowCounts = Some(10))
+    }
+  }
+
   test("analyze column command - unsupported types and invalid columns") {
     val tableName = "column_stats_test1"
     withTable(tableName) {
@@ -239,154 +263,3 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
   }
 
 }
-
-
-/**
- * The base for test cases that we want to include in both the hive module (for verifying behavior
- * when using the Hive external catalog) as well as in the sql/core module.
- */
-abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils {
-  import testImplicits._
-
-  private val dec1 = new java.math.BigDecimal("1.000000000000000000")
-  private val dec2 = new java.math.BigDecimal("8.000000000000000000")
-  private val d1 = Date.valueOf("2016-05-08")
-  private val d2 = Date.valueOf("2016-05-09")
-  private val t1 = Timestamp.valueOf("2016-05-08 00:00:01")
-  private val t2 = Timestamp.valueOf("2016-05-09 00:00:02")
-
-  /**
-   * Define a very simple 3 row table used for testing column serialization.
-   * Note: last column is seq[int] which doesn't support stats collection.
-   */
-  protected val data = Seq[
-    (jl.Boolean, jl.Byte, jl.Short, jl.Integer, jl.Long,
-      jl.Double, jl.Float, java.math.BigDecimal,
-      String, Array[Byte], Date, Timestamp,
-      Seq[Int])](
-    (false, 1.toByte, 1.toShort, 1, 1L, 1.0, 1.0f, dec1, "s1", "b1".getBytes, d1, t1, null),
-    (true, 2.toByte, 3.toShort, 4, 5L, 6.0, 7.0f, dec2, "ss9", "bb0".getBytes, d2, t2, null),
-    (null, null, null, null, null, null, null, null, null, null, null, null, null)
-  )
-
-  /** A mapping from column to the stats collected. */
-  protected val stats = mutable.LinkedHashMap(
-    "cbool" -> ColumnStat(2, Some(false), Some(true), 1, 1, 1),
-    "cbyte" -> ColumnStat(2, Some(1.toByte), Some(2.toByte), 1, 1, 1),
-    "cshort" -> ColumnStat(2, Some(1.toShort), Some(3.toShort), 1, 2, 2),
-    "cint" -> ColumnStat(2, Some(1), Some(4), 1, 4, 4),
-    "clong" -> ColumnStat(2, Some(1L), Some(5L), 1, 8, 8),
-    "cdouble" -> ColumnStat(2, Some(1.0), Some(6.0), 1, 8, 8),
-    "cfloat" -> ColumnStat(2, Some(1.0f), Some(7.0f), 1, 4, 4),
-    "cdecimal" -> ColumnStat(2, Some(Decimal(dec1)), Some(Decimal(dec2)), 1, 16, 16),
-    "cstring" -> ColumnStat(2, None, None, 1, 3, 3),
-    "cbinary" -> ColumnStat(2, None, None, 1, 3, 3),
-    "cdate" -> ColumnStat(2, Some(DateTimeUtils.fromJavaDate(d1)),
-      Some(DateTimeUtils.fromJavaDate(d2)), 1, 4, 4),
-    "ctimestamp" -> ColumnStat(2, Some(DateTimeUtils.fromJavaTimestamp(t1)),
-      Some(DateTimeUtils.fromJavaTimestamp(t2)), 1, 8, 8)
-  )
-
-  private val randomName = new Random(31)
-
-  def checkTableStats(
-      tableName: String,
-      hasSizeInBytes: Boolean,
-      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
-    val stats = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
-    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
-      assert(stats.isDefined)
-      assert(stats.get.sizeInBytes >= 0)
-      assert(stats.get.rowCount === expectedRowCounts)
-    } else {
-      assert(stats.isEmpty)
-    }
-
-    stats
-  }
-
-  /**
-   * Compute column stats for the given DataFrame and compare it with colStats.
-   */
-  def checkColStats(
-      df: DataFrame,
-      colStats: mutable.LinkedHashMap[String, ColumnStat]): Unit = {
-    val tableName = "column_stats_test_" + randomName.nextInt(1000)
-    withTable(tableName) {
-      df.write.saveAsTable(tableName)
-
-      // Collect statistics
-      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " +
-        colStats.keys.mkString(", "))
-
-      // Validate statistics
-      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
-      assert(table.stats.isDefined)
-      assert(table.stats.get.colStats.size == colStats.size)
-
-      colStats.foreach { case (k, v) =>
-        withClue(s"column $k") {
-          assert(table.stats.get.colStats(k) == v)
-        }
-      }
-    }
-  }
-
-  // This test will be run twice: with and without Hive support
-  test("SPARK-18856: non-empty partitioned table should not report zero size") {
-    withTable("ds_tbl", "hive_tbl") {
-      spark.range(100).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("ds_tbl")
-      val stats = spark.table("ds_tbl").queryExecution.optimizedPlan.stats
-      assert(stats.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
-
-      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
-        sql("CREATE TABLE hive_tbl(i int) PARTITIONED BY (j int)")
-        sql("INSERT INTO hive_tbl PARTITION(j=1) SELECT 1")
-        val stats2 = spark.table("hive_tbl").queryExecution.optimizedPlan.stats
-        assert(stats2.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
-      }
-    }
-  }
-
-  // This test will be run twice: with and without Hive support
-  test("conversion from CatalogStatistics to Statistics") {
-    withTable("ds_tbl", "hive_tbl") {
-      // Test data source table
-      checkStatsConversion(tableName = "ds_tbl", isDatasourceTable = true)
-      // Test hive serde table
-      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
-        checkStatsConversion(tableName = "hive_tbl", isDatasourceTable = false)
-      }
-    }
-  }
-
-  private def checkStatsConversion(tableName: String, isDatasourceTable: Boolean): Unit = {
-    // Create an empty table and run analyze command on it.
-    val createTableSql = if (isDatasourceTable) {
-      s"CREATE TABLE $tableName (c1 INT, c2 STRING) USING PARQUET"
-    } else {
-      s"CREATE TABLE $tableName (c1 INT, c2 STRING)"
-    }
-    sql(createTableSql)
-    // Analyze only one column.
-    sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS c1")
-    val (relation, catalogTable) = spark.table(tableName).queryExecution.analyzed.collect {
-      case catalogRel: CatalogRelation => (catalogRel, catalogRel.tableMeta)
-      case logicalRel: LogicalRelation => (logicalRel, logicalRel.catalogTable.get)
-    }.head
-    val emptyColStat = ColumnStat(0, None, None, 0, 4, 4)
-    // Check catalog statistics
-    assert(catalogTable.stats.isDefined)
-    assert(catalogTable.stats.get.sizeInBytes == 0)
-    assert(catalogTable.stats.get.rowCount == Some(0))
-    assert(catalogTable.stats.get.colStats == Map("c1" -> emptyColStat))
-
-    // Check relation statistics
-    assert(relation.stats.sizeInBytes == 0)
-    assert(relation.stats.rowCount == Some(0))
-    assert(relation.stats.attributeStats.size == 1)
-    val (attribute, colStat) = relation.stats.attributeStats.head
-    assert(attribute.name == "c1")
-    assert(colStat == emptyColStat)
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
new file mode 100644
index 000000000000..41569762d3c5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.{lang => jl}
+import java.sql.{Date, Timestamp}
+
+import scala.collection.mutable
+import scala.util.Random
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types.Decimal
+
+
+/**
+ * The base for statistics test cases that we want to include in both the hive module (for
+ * verifying behavior when using the Hive external catalog) as well as in the sql/core module.
+ */
+abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils {
+  import testImplicits._
+
+  private val dec1 = new java.math.BigDecimal("1.000000000000000000")
+  private val dec2 = new java.math.BigDecimal("8.000000000000000000")
+  private val d1 = Date.valueOf("2016-05-08")
+  private val d2 = Date.valueOf("2016-05-09")
+  private val t1 = Timestamp.valueOf("2016-05-08 00:00:01")
+  private val t2 = Timestamp.valueOf("2016-05-09 00:00:02")
+
+  /**
+   * Define a very simple 3 row table used for testing column serialization.
+   * Note: last column is seq[int] which doesn't support stats collection.
+   */
+  protected val data = Seq[
+    (jl.Boolean, jl.Byte, jl.Short, jl.Integer, jl.Long,
+      jl.Double, jl.Float, java.math.BigDecimal,
+      String, Array[Byte], Date, Timestamp,
+      Seq[Int])](
+    (false, 1.toByte, 1.toShort, 1, 1L, 1.0, 1.0f, dec1, "s1", "b1".getBytes, d1, t1, null),
+    (true, 2.toByte, 3.toShort, 4, 5L, 6.0, 7.0f, dec2, "ss9", "bb0".getBytes, d2, t2, null),
+    (null, null, null, null, null, null, null, null, null, null, null, null, null)
+  )
+
+  /** A mapping from column to the stats collected. */
+  protected val stats = mutable.LinkedHashMap(
+    "cbool" -> ColumnStat(2, Some(false), Some(true), 1, 1, 1),
+    "cbyte" -> ColumnStat(2, Some(1.toByte), Some(2.toByte), 1, 1, 1),
+    "cshort" -> ColumnStat(2, Some(1.toShort), Some(3.toShort), 1, 2, 2),
+    "cint" -> ColumnStat(2, Some(1), Some(4), 1, 4, 4),
+    "clong" -> ColumnStat(2, Some(1L), Some(5L), 1, 8, 8),
+    "cdouble" -> ColumnStat(2, Some(1.0), Some(6.0), 1, 8, 8),
+    "cfloat" -> ColumnStat(2, Some(1.0f), Some(7.0f), 1, 4, 4),
+    "cdecimal" -> ColumnStat(2, Some(Decimal(dec1)), Some(Decimal(dec2)), 1, 16, 16),
+    "cstring" -> ColumnStat(2, None, None, 1, 3, 3),
+    "cbinary" -> ColumnStat(2, None, None, 1, 3, 3),
+    "cdate" -> ColumnStat(2, Some(DateTimeUtils.fromJavaDate(d1)),
+      Some(DateTimeUtils.fromJavaDate(d2)), 1, 4, 4),
+    "ctimestamp" -> ColumnStat(2, Some(DateTimeUtils.fromJavaTimestamp(t1)),
+      Some(DateTimeUtils.fromJavaTimestamp(t2)), 1, 8, 8)
+  )
+
+  private val randomName = new Random(31)
+
+  def getCatalogTable(tableName: String): CatalogTable = {
+    spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+  }
+
+  def getCatalogStatistics(tableName: String): CatalogStatistics = {
+    getCatalogTable(tableName).stats.get
+  }
+
+  def checkTableStats(
+      tableName: String,
+      hasSizeInBytes: Boolean,
+      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
+    val stats = getCatalogTable(tableName).stats
+    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
+      assert(stats.isDefined)
+      assert(stats.get.sizeInBytes >= 0)
+      assert(stats.get.rowCount === expectedRowCounts)
+    } else {
+      assert(stats.isEmpty)
+    }
+
+    stats
+  }
+
+  /**
+   * Compute column stats for the given DataFrame and compare it with colStats.
+   */
+  def checkColStats(
+      df: DataFrame,
+      colStats: mutable.LinkedHashMap[String, ColumnStat]): Unit = {
+    val tableName = "column_stats_test_" + randomName.nextInt(1000)
+    withTable(tableName) {
+      df.write.saveAsTable(tableName)
+
+      // Collect statistics
+      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " +
+        colStats.keys.mkString(", "))
+
+      // Validate statistics
+      val table = getCatalogTable(tableName)
+      assert(table.stats.isDefined)
+      assert(table.stats.get.colStats.size == colStats.size)
+
+      colStats.foreach { case (k, v) =>
+        withClue(s"column $k") {
+          assert(table.stats.get.colStats(k) == v)
+        }
+      }
+    }
+  }
+
+  // This test will be run twice: with and without Hive support
+  test("SPARK-18856: non-empty partitioned table should not report zero size") {
+    withTable("ds_tbl", "hive_tbl") {
+      spark.range(100).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("ds_tbl")
+      val stats = spark.table("ds_tbl").queryExecution.optimizedPlan.stats
+      assert(stats.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
+
+      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
+        sql("CREATE TABLE hive_tbl(i int) PARTITIONED BY (j int)")
+        sql("INSERT INTO hive_tbl PARTITION(j=1) SELECT 1")
+        val stats2 = spark.table("hive_tbl").queryExecution.optimizedPlan.stats
+        assert(stats2.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
+      }
+    }
+  }
+
+  // This test will be run twice: with and without Hive support
+  test("conversion from CatalogStatistics to Statistics") {
+    withTable("ds_tbl", "hive_tbl") {
+      // Test data source table
+      checkStatsConversion(tableName = "ds_tbl", isDatasourceTable = true)
+      // Test hive serde table
+      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
+        checkStatsConversion(tableName = "hive_tbl", isDatasourceTable = false)
+      }
+    }
+  }
+
+  private def checkStatsConversion(tableName: String, isDatasourceTable: Boolean): Unit = {
+    // Create an empty table and run analyze command on it.
+    val createTableSql = if (isDatasourceTable) {
+      s"CREATE TABLE $tableName (c1 INT, c2 STRING) USING PARQUET"
+    } else {
+      s"CREATE TABLE $tableName (c1 INT, c2 STRING)"
+    }
+    sql(createTableSql)
+    // Analyze only one column.
+    sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS c1")
+    val (relation, catalogTable) = spark.table(tableName).queryExecution.analyzed.collect {
+      case catalogRel: CatalogRelation => (catalogRel, catalogRel.tableMeta)
+      case logicalRel: LogicalRelation => (logicalRel, logicalRel.catalogTable.get)
+    }.head
+    val emptyColStat = ColumnStat(0, None, None, 0, 4, 4)
+    // Check catalog statistics
+    assert(catalogTable.stats.isDefined)
+    assert(catalogTable.stats.get.sizeInBytes == 0)
+    assert(catalogTable.stats.get.rowCount == Some(0))
+    assert(catalogTable.stats.get.colStats == Map("c1" -> emptyColStat))
+
+    // Check relation statistics
+    assert(relation.stats.sizeInBytes == 0)
+    assert(relation.stats.rowCount == Some(0))
+    assert(relation.stats.attributeStats.size == 1)
+    val (attribute, colStat) = relation.stats.attributeStats.head
+    assert(attribute.name == "c1")
+    assert(colStat == emptyColStat)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index c601038a2b0a..e00fa64e9f2c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -25,7 +25,7 @@ import scala.util.matching.Regex
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics}
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -33,7 +33,6 @@ import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.HiveExternalCatalog._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
 
 
 class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
@@ -82,58 +81,42 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       spark.table(tableName).queryExecution.analyzed.stats.sizeInBytes
 
     // Non-partitioned table
-    sql("CREATE TABLE analyzeTable (key STRING, value STRING)").collect()
-    sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
-    sql("INSERT INTO TABLE analyzeTable SELECT * FROM src").collect()
+    val nonPartTable = "non_part_table"
+    withTable(nonPartTable) {
+      sql(s"CREATE TABLE $nonPartTable (key STRING, value STRING)")
+      sql(s"INSERT INTO TABLE $nonPartTable SELECT * FROM src")
+      sql(s"INSERT INTO TABLE $nonPartTable SELECT * FROM src")
 
-    sql("ANALYZE TABLE analyzeTable COMPUTE STATISTICS noscan")
+      sql(s"ANALYZE TABLE $nonPartTable COMPUTE STATISTICS noscan")
 
-    assert(queryTotalSize("analyzeTable") === BigInt(11624))
-
-    sql("DROP TABLE analyzeTable").collect()
+      assert(queryTotalSize(nonPartTable) === BigInt(11624))
+    }
 
     // Partitioned table
-    sql(
-      """
-        |CREATE TABLE analyzeTable_part (key STRING, value STRING) PARTITIONED BY (ds STRING)
-      """.stripMargin).collect()
-    sql(
-      """
-        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-01')
-        |SELECT * FROM src
-      """.stripMargin).collect()
-    sql(
-      """
-        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-02')
-        |SELECT * FROM src
-      """.stripMargin).collect()
-    sql(
-      """
-        |INSERT INTO TABLE analyzeTable_part PARTITION (ds='2010-01-03')
-        |SELECT * FROM src
-      """.stripMargin).collect()
+    val partTable = "part_table"
+    withTable(partTable) {
+      sql(s"CREATE TABLE $partTable (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+      sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-01') SELECT * FROM src")
+      sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-02') SELECT * FROM src")
+      sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-03') SELECT * FROM src")
 
-    assert(queryTotalSize("analyzeTable_part") === spark.sessionState.conf.defaultSizeInBytes)
+      assert(queryTotalSize(partTable) === spark.sessionState.conf.defaultSizeInBytes)
 
-    sql("ANALYZE TABLE analyzeTable_part COMPUTE STATISTICS noscan")
+      sql(s"ANALYZE TABLE $partTable COMPUTE STATISTICS noscan")
 
-    assert(queryTotalSize("analyzeTable_part") === BigInt(17436))
-
-    sql("DROP TABLE analyzeTable_part").collect()
+      assert(queryTotalSize(partTable) === BigInt(17436))
+    }
 
     // Try to analyze a temp table
-    sql("""SELECT * FROM src""").createOrReplaceTempView("tempTable")
-    intercept[AnalysisException] {
-      sql("ANALYZE TABLE tempTable COMPUTE STATISTICS")
+    withView("tempTable") {
+      sql("""SELECT * FROM src""").createOrReplaceTempView("tempTable")
+      intercept[AnalysisException] {
+        sql("ANALYZE TABLE tempTable COMPUTE STATISTICS")
+      }
     }
-    spark.sessionState.catalog.dropTable(
-      TableIdentifier("tempTable"), ignoreIfNotExists = true, purge = false)
   }
 
   test("SPARK-21079 - analyze table with location different than that of individual partitions") {
-    def queryTotalSize(tableName: String): BigInt =
-      spark.table(tableName).queryExecution.analyzed.stats.sizeInBytes
-
     val tableName = "analyzeTable_part"
     withTable(tableName) {
       withTempPath { path =>
@@ -148,15 +131,12 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
         sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
 
-        assert(queryTotalSize(tableName) === BigInt(17436))
+        assert(getCatalogStatistics(tableName).sizeInBytes === BigInt(17436))
       }
     }
   }
 
   test("SPARK-21079 - analyze partitioned table with only a subset of partitions visible") {
-    def queryTotalSize(tableName: String): BigInt =
-      spark.table(tableName).queryExecution.analyzed.stats.sizeInBytes
-
     val sourceTableName = "analyzeTable_part"
     val tableName = "analyzeTable_part_vis"
     withTable(sourceTableName, tableName) {
@@ -188,39 +168,19 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
           // Register only one of the partitions found on disk
           val ds = partitionDates.head
-          sql(s"ALTER TABLE $tableName ADD PARTITION (ds='$ds')").collect()
+          sql(s"ALTER TABLE $tableName ADD PARTITION (ds='$ds')")
 
           // Analyze original table - expect 3 partitions
           sql(s"ANALYZE TABLE $sourceTableName COMPUTE STATISTICS noscan")
-          assert(queryTotalSize(sourceTableName) === BigInt(3 * 5812))
+          assert(getCatalogStatistics(sourceTableName).sizeInBytes === BigInt(3 * 5812))
 
           // Analyze partial-copy table - expect only 1 partition
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
-          assert(queryTotalSize(tableName) === BigInt(5812))
+          assert(getCatalogStatistics(tableName).sizeInBytes === BigInt(5812))
         }
     }
   }
 
-  test("analyzing views is not supported") {
-    def assertAnalyzeUnsupported(analyzeCommand: String): Unit = {
-      val err = intercept[AnalysisException] {
-        sql(analyzeCommand)
-      }
-      assert(err.message.contains("ANALYZE TABLE is not supported"))
-    }
-
-    val tableName = "tbl"
-    withTable(tableName) {
-      spark.range(10).write.saveAsTable(tableName)
-      val viewName = "view"
-      withView(viewName) {
-        sql(s"CREATE VIEW $viewName AS SELECT * FROM $tableName")
-        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
-        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
-      }
-    }
-  }
-
   test("test table-level statistics for hive tables created in HiveExternalCatalog") {
     val textTable = "textTable"
     withTable(textTable) {
@@ -290,8 +250,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     if (analyzedByHive) hiveClient.runSqlHive(s"ANALYZE TABLE $tabName COMPUTE STATISTICS")
     val describeResult1 = hiveClient.runSqlHive(s"DESCRIBE FORMATTED $tabName")
 
-    val tableMetadata =
-      spark.sessionState.catalog.getTableMetadata(TableIdentifier(tabName)).properties
+    val tableMetadata = getCatalogTable(tabName).properties
     // statistics info is not contained in the metadata of the original table
     assert(Seq(StatsSetupConst.COLUMN_STATS_ACCURATE,
       StatsSetupConst.NUM_FILES,
@@ -327,8 +286,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     val tabName = "tab1"
     withTable(tabName) {
       createNonPartitionedTable(tabName, analyzedByHive = false, analyzedBySpark = false)
-      checkTableStats(
-        tabName, hasSizeInBytes = true, expectedRowCounts = None)
+      checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None)
 
       // ALTER TABLE SET TBLPROPERTIES invalidates some contents of Hive specific statistics
       // This is triggered by the Hive alterTable API
@@ -370,10 +328,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   }
 
   test("alter table should not have the side effect to store statistics in Spark side") {
-    def getCatalogTable(tableName: String): CatalogTable = {
-      spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
-    }
-
     val table = "alter_table_side_effect"
     withTable(table) {
       sql(s"CREATE TABLE $table (i string, j string)")
@@ -637,12 +591,12 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
       // the default value for `spark.sql.hive.convertMetastoreParquet` is true, here we just set it
       // for robustness
-      withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "true") {
+      withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
         checkTableStats(parquetTable, hasSizeInBytes = false, expectedRowCounts = None)
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS")
         checkTableStats(parquetTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
       }
-      withSQLConf("spark.sql.hive.convertMetastoreOrc" -> "true") {
+      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") {
         // We still can get tableSize from Hive before Analyze
         checkTableStats(orcTable, hasSizeInBytes = true, expectedRowCounts = None)
         sql(s"ANALYZE TABLE $orcTable COMPUTE STATISTICS")
@@ -759,8 +713,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       val parquetTable = "parquetTable"
       withTable(parquetTable) {
         sql(createTableCmd)
-        val catalogTable = spark.sessionState.catalog.getTableMetadata(
-          TableIdentifier(parquetTable))
+        val catalogTable = getCatalogTable(parquetTable)
         assert(DDLUtils.isDatasourceTable(catalogTable))
 
         // Add a filter to avoid creating too many partitions
@@ -795,17 +748,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     "partitioned data source table",
     "CREATE TABLE parquetTable (key STRING, value STRING) USING PARQUET PARTITIONED BY (key)")
 
-  test("statistics collection of a table with zero column") {
-    val table_no_cols = "table_no_cols"
-    withTable(table_no_cols) {
-      val rddNoCols = sparkContext.parallelize(1 to 10).map(_ => Row.empty)
-      val dfNoCols = spark.createDataFrame(rddNoCols, StructType(Seq.empty))
-      dfNoCols.write.format("json").saveAsTable(table_no_cols)
-      sql(s"ANALYZE TABLE $table_no_cols COMPUTE STATISTICS")
-      checkTableStats(table_no_cols, hasSizeInBytes = true, expectedRowCounts = Some(10))
-    }
-  }
-
   /** Used to test refreshing cached metadata once table stats are updated. */
   private def getStatsBeforeAfterUpdate(isAnalyzeColumns: Boolean)
     : (CatalogStatistics, CatalogStatistics) = {

From d540dfbff33aa2f8571e0de149dfa3f4e7321113 Mon Sep 17 00:00:00 2001
From: Wang Gengliang <ltnwgl@gmail.com>
Date: Thu, 6 Jul 2017 19:12:15 +0800
Subject: [PATCH 054/125] [SPARK-21273][SQL][FOLLOW-UP] Add missing test cases
 back and revise code style

## What changes were proposed in this pull request?

Add missing test cases back and revise code style

Follow up the previous PR: https://github.com/apache/spark/pull/18479

## How was this patch tested?

Unit test

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Wang Gengliang <ltnwgl@gmail.com>

Closes #18548 from gengliangwang/stat_propagation_revise.
---
 .../plans/logical/LogicalPlanVisitor.scala    |  2 +-
 .../BasicStatsEstimationSuite.scala           | 45 +++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index b23045810a4f..2652f6d72730 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -38,10 +38,10 @@ trait LogicalPlanVisitor[T] {
     case p: Range => visitRange(p)
     case p: Repartition => visitRepartition(p)
     case p: RepartitionByExpression => visitRepartitionByExpr(p)
+    case p: ResolvedHint => visitHint(p)
     case p: Sample => visitSample(p)
     case p: ScriptTransformation => visitScriptTransform(p)
     case p: Union => visitUnion(p)
-    case p: ResolvedHint => visitHint(p)
     case p: LogicalPlan => default(p)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 5fd21a06a109..913be6d1ff07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -78,6 +78,37 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
     checkStats(globalLimit, stats)
   }
 
+  test("sample estimation") {
+    val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 1000).toLong, plan)
+    checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5)))
+
+    // Child doesn't have rowCount in stats
+    val childStats = Statistics(sizeInBytes = 120)
+    val childPlan = DummyLogicalPlan(childStats, childStats)
+    val sample2 =
+      Sample(0.0, 0.11, withReplacement = false, (math.random * 1000).toLong, childPlan)
+    checkStats(sample2, Statistics(sizeInBytes = 14))
+  }
+
+  test("estimate statistics when the conf changes") {
+    val expectedDefaultStats =
+      Statistics(
+        sizeInBytes = 40,
+        rowCount = Some(10),
+        attributeStats = AttributeMap(Seq(
+          AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), Some(10), 0, 4, 4))))
+    val expectedCboStats =
+      Statistics(
+        sizeInBytes = 4,
+        rowCount = Some(1),
+        attributeStats = AttributeMap(Seq(
+          AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), Some(5), 0, 4, 4))))
+
+    val plan = DummyLogicalPlan(defaultStats = expectedDefaultStats, cboStats = expectedCboStats)
+    checkStats(
+      plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
+  }
+
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,
@@ -99,3 +130,17 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
   private def checkStats(plan: LogicalPlan, expectedStats: Statistics): Unit =
     checkStats(plan, expectedStats, expectedStats)
 }
+
+/**
+ * This class is used for unit-testing the cbo switch, it mimics a logical plan which computes
+ * a simple statistics or a cbo estimated statistics based on the conf.
+ */
+private case class DummyLogicalPlan(
+    defaultStats: Statistics,
+    cboStats: Statistics)
+  extends LeafNode {
+
+  override def output: Seq[Attribute] = Nil
+
+  override def computeStats(): Statistics = if (conf.cboEnabled) cboStats else defaultStats
+}

From 565e7a8d4ae7879ee704fb94ae9b3da31e202d7e Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Thu, 6 Jul 2017 19:49:34 +0800
Subject: [PATCH 055/125] [SPARK-20950][CORE] add a new config to
 diskWriteBufferSize which is hard coded before

## What changes were proposed in this pull request?

This PR Improvement in two:
1.With spark.shuffle.spill.diskWriteBufferSize configure diskWriteBufferSize of ShuffleExternalSorter.
    when change the size of the diskWriteBufferSize to test `forceSorterToSpill`
    The average performance of running 10 times is as follows:(their unit is MS).
```
diskWriteBufferSize:       1M    512K    256K    128K    64K    32K    16K    8K    4K
---------------------------------------------------------------------------------------
RecordSize = 2.5M          742   722     694     686     667    668    671    669   683
RecordSize = 1M            294   293     292     287     283    285    281    279   285
```

2.Remove outputBufferSizeInBytes and inputBufferSizeInBytes to initialize in mergeSpillsWithFileStream function.

## How was this patch tested?
The unit test.

Author: caoxuewen <cao.xuewen@zte.com.cn>

Closes #18174 from heary-cao/buffersize.
---
 .../shuffle/sort/ShuffleExternalSorter.java   | 11 +++++---
 .../shuffle/sort/UnsafeShuffleWriter.java     | 14 +++++++---
 .../unsafe/sort/UnsafeSorterSpillWriter.java  | 24 ++++++++++-------
 .../spark/internal/config/package.scala       | 27 +++++++++++++++++++
 4 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index c33d1e33f030..338faaadb33d 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -43,6 +43,7 @@
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.Utils;
+import org.apache.spark.internal.config.package$;
 
 /**
  * An external sorter that is specialized for sort-based shuffle.
@@ -82,6 +83,9 @@ final class ShuffleExternalSorter extends MemoryConsumer {
   /** The buffer size to use when writing spills using DiskBlockObjectWriter */
   private final int fileBufferSizeBytes;
 
+  /** The buffer size to use when writing the sorted records to an on-disk file */
+  private final int diskWriteBufferSize;
+
   /**
    * Memory pages that hold the records being sorted. The pages in this list are freed when
    * spilling, although in principle we could recycle these pages across spills (on the other hand,
@@ -116,13 +120,14 @@ final class ShuffleExternalSorter extends MemoryConsumer {
     this.taskContext = taskContext;
     this.numPartitions = numPartitions;
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
-    this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
+    this.fileBufferSizeBytes = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.numElementsForSpillThreshold =
       conf.getLong("spark.shuffle.spill.numElementsForceSpillThreshold", 1024 * 1024 * 1024);
     this.writeMetrics = writeMetrics;
     this.inMemSorter = new ShuffleInMemorySorter(
       this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true));
     this.peakMemoryUsedBytes = getMemoryUsage();
+    this.diskWriteBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
   }
 
   /**
@@ -155,7 +160,7 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
     // be an API to directly transfer bytes from managed memory to the disk writer, we buffer
     // data through a byte array. This array does not need to be large enough to hold a single
     // record;
-    final byte[] writeBuffer = new byte[DISK_WRITE_BUFFER_SIZE];
+    final byte[] writeBuffer = new byte[diskWriteBufferSize];
 
     // Because this output will be read during shuffle, its compression codec must be controlled by
     // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use
@@ -195,7 +200,7 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
       int dataRemaining = Platform.getInt(recordPage, recordOffsetInPage);
       long recordReadPosition = recordOffsetInPage + 4; // skip over record length
       while (dataRemaining > 0) {
-        final int toTransfer = Math.min(DISK_WRITE_BUFFER_SIZE, dataRemaining);
+        final int toTransfer = Math.min(diskWriteBufferSize, dataRemaining);
         Platform.copyMemory(
           recordPage, recordReadPosition, writeBuffer, Platform.BYTE_ARRAY_OFFSET, toTransfer);
         writer.write(writeBuffer, 0, toTransfer);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 34c179990214..1b578491b81d 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -55,6 +55,7 @@
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.Utils;
+import org.apache.spark.internal.config.package$;
 
 @Private
 public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
@@ -65,6 +66,7 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
 
   @VisibleForTesting
   static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
+  static final int DEFAULT_INITIAL_SER_BUFFER_SIZE = 1024 * 1024;
 
   private final BlockManager blockManager;
   private final IndexShuffleBlockResolver shuffleBlockResolver;
@@ -78,6 +80,8 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final SparkConf sparkConf;
   private final boolean transferToEnabled;
   private final int initialSortBufferSize;
+  private final int inputBufferSizeInBytes;
+  private final int outputBufferSizeInBytes;
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
@@ -140,6 +144,10 @@ public UnsafeShuffleWriter(
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
     this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
                                                   DEFAULT_INITIAL_SORT_BUFFER_SIZE);
+    this.inputBufferSizeInBytes =
+      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
+    this.outputBufferSizeInBytes =
+      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
     open();
   }
 
@@ -209,7 +217,7 @@ private void open() throws IOException {
       partitioner.numPartitions(),
       sparkConf,
       writeMetrics);
-    serBuffer = new MyByteArrayOutputStream(1024 * 1024);
+    serBuffer = new MyByteArrayOutputStream(DEFAULT_INITIAL_SER_BUFFER_SIZE);
     serOutputStream = serializer.serializeStream(serBuffer);
   }
 
@@ -360,12 +368,10 @@ private long[] mergeSpillsWithFileStream(
 
     final OutputStream bos = new BufferedOutputStream(
             new FileOutputStream(outputFile),
-            (int) sparkConf.getSizeAsKb("spark.shuffle.unsafe.file.output.buffer", "32k") * 1024);
+            outputBufferSizeInBytes);
     // Use a counting output stream to avoid having to close the underlying file and ask
     // the file system for its size after each partition is written.
     final CountingOutputStream mergedFileOutputStream = new CountingOutputStream(bos);
-    final int inputBufferSizeInBytes =
-      (int) sparkConf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024;
 
     boolean threwException = true;
     try {
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
index 164b9d70b79d..f9b549375544 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
@@ -20,9 +20,10 @@
 import java.io.File;
 import java.io.IOException;
 
-import org.apache.spark.serializer.SerializerManager;
 import scala.Tuple2;
 
+import org.apache.spark.SparkConf;
+import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.serializer.DummySerializerInstance;
 import org.apache.spark.storage.BlockId;
@@ -30,6 +31,7 @@
 import org.apache.spark.storage.DiskBlockObjectWriter;
 import org.apache.spark.storage.TempLocalBlockId;
 import org.apache.spark.unsafe.Platform;
+import org.apache.spark.internal.config.package$;
 
 /**
  * Spills a list of sorted records to disk. Spill files have the following format:
@@ -38,12 +40,16 @@
  */
 public final class UnsafeSorterSpillWriter {
 
-  static final int DISK_WRITE_BUFFER_SIZE = 1024 * 1024;
+  private final SparkConf conf = new SparkConf();
+
+  /** The buffer size to use when writing the sorted records to an on-disk file */
+  private final int diskWriteBufferSize =
+    (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
 
   // Small writes to DiskBlockObjectWriter will be fairly inefficient. Since there doesn't seem to
   // be an API to directly transfer bytes from managed memory to the disk writer, we buffer
   // data through a byte array.
-  private byte[] writeBuffer = new byte[DISK_WRITE_BUFFER_SIZE];
+  private byte[] writeBuffer = new byte[diskWriteBufferSize];
 
   private final File file;
   private final BlockId blockId;
@@ -114,7 +120,7 @@ public void write(
     writeIntToBuffer(recordLength, 0);
     writeLongToBuffer(keyPrefix, 4);
     int dataRemaining = recordLength;
-    int freeSpaceInWriteBuffer = DISK_WRITE_BUFFER_SIZE - 4 - 8; // space used by prefix + len
+    int freeSpaceInWriteBuffer = diskWriteBufferSize - 4 - 8; // space used by prefix + len
     long recordReadPosition = baseOffset;
     while (dataRemaining > 0) {
       final int toTransfer = Math.min(freeSpaceInWriteBuffer, dataRemaining);
@@ -122,15 +128,15 @@ public void write(
         baseObject,
         recordReadPosition,
         writeBuffer,
-        Platform.BYTE_ARRAY_OFFSET + (DISK_WRITE_BUFFER_SIZE - freeSpaceInWriteBuffer),
+        Platform.BYTE_ARRAY_OFFSET + (diskWriteBufferSize - freeSpaceInWriteBuffer),
         toTransfer);
-      writer.write(writeBuffer, 0, (DISK_WRITE_BUFFER_SIZE - freeSpaceInWriteBuffer) + toTransfer);
+      writer.write(writeBuffer, 0, (diskWriteBufferSize - freeSpaceInWriteBuffer) + toTransfer);
       recordReadPosition += toTransfer;
       dataRemaining -= toTransfer;
-      freeSpaceInWriteBuffer = DISK_WRITE_BUFFER_SIZE;
+      freeSpaceInWriteBuffer = diskWriteBufferSize;
     }
-    if (freeSpaceInWriteBuffer < DISK_WRITE_BUFFER_SIZE) {
-      writer.write(writeBuffer, 0, (DISK_WRITE_BUFFER_SIZE - freeSpaceInWriteBuffer));
+    if (freeSpaceInWriteBuffer < diskWriteBufferSize) {
+      writer.write(writeBuffer, 0, (diskWriteBufferSize - freeSpaceInWriteBuffer));
     }
     writer.recordWritten();
   }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 8dee0d970c4c..a629810bf093 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -336,4 +336,31 @@ package object config {
         "spark.")
       .booleanConf
       .createWithDefault(false)
+
+  private[spark] val SHUFFLE_FILE_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.file.buffer")
+      .doc("Size of the in-memory buffer for each shuffle file output stream. " +
+        "These buffers reduce the number of disk seeks and system calls made " +
+        "in creating intermediate shuffle files.")
+      .bytesConf(ByteUnit.KiB)
+      .checkValue(v => v > 0 && v <= Int.MaxValue / 1024,
+        s"The file buffer size must be greater than 0 and less than ${Int.MaxValue / 1024}.")
+      .createWithDefaultString("32k")
+
+  private[spark] val SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.unsafe.file.output.buffer")
+      .doc("The file system for this buffer size after each partition " +
+        "is written in unsafe shuffle writer.")
+      .bytesConf(ByteUnit.KiB)
+      .checkValue(v => v > 0 && v <= Int.MaxValue / 1024,
+        s"The buffer size must be greater than 0 and less than ${Int.MaxValue / 1024}.")
+      .createWithDefaultString("32k")
+
+  private[spark] val SHUFFLE_DISK_WRITE_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.spill.diskWriteBufferSize")
+      .doc("The buffer size to use when writing the sorted records to an on-disk file.")
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(v => v > 0 && v <= Int.MaxValue,
+        s"The buffer size must be greater than 0 and less than ${Int.MaxValue}.")
+      .createWithDefault(1024 * 1024)
 }

From 26ac085debb54d0104762d1cd4187cdf73f301ba Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Fri, 7 Jul 2017 01:04:57 +0800
Subject: [PATCH 056/125] [SPARK-21228][SQL] InSet incorrect handling of
 structs

## What changes were proposed in this pull request?
When data type is struct, InSet now uses TypeUtils.getInterpretedOrdering (similar to EqualTo) to build a TreeSet. In other cases it will use a HashSet as before (which should be faster). Similarly, In.eval uses Ordering.equiv instead of equals.

## How was this patch tested?
New test in SQLQuerySuite.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #18455 from bogdanrdc/SPARK-21228.
---
 .../sql/catalyst/expressions/predicates.scala | 57 ++++++++++++-------
 .../catalyst/expressions/PredicateSuite.scala | 31 +++++-----
 .../catalyst/optimizer/OptimizeInSuite.scala  |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 22 +++++++
 4 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index f3fe58caa6fe..7bf10f199f1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.immutable.TreeSet
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.catalyst.expressions.codegen.{Predicate => BasePredicate}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, GenerateSafeProjection, GenerateUnsafeProjection, Predicate => BasePredicate}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
@@ -175,20 +176,23 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
                  |[${sub.output.map(_.dataType.catalogString).mkString(", ")}].
                """.stripMargin)
           } else {
-            TypeCheckResult.TypeCheckSuccess
+            TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
           }
         }
       case _ =>
-        if (list.exists(l => l.dataType != value.dataType)) {
-          TypeCheckResult.TypeCheckFailure("Arguments must be same type")
+        val mismatchOpt = list.find(l => l.dataType != value.dataType)
+        if (mismatchOpt.isDefined) {
+          TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
+            s"${value.dataType} != ${mismatchOpt.get.dataType}")
         } else {
-          TypeCheckResult.TypeCheckSuccess
+          TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
         }
     }
   }
 
   override def children: Seq[Expression] = value +: list
   lazy val inSetConvertible = list.forall(_.isInstanceOf[Literal])
+  private lazy val ordering = TypeUtils.getInterpretedOrdering(value.dataType)
 
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
@@ -203,10 +207,10 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
       var hasNull = false
       list.foreach { e =>
         val v = e.eval(input)
-        if (v == evaluatedValue) {
-          return true
-        } else if (v == null) {
+        if (v == null) {
           hasNull = true
+        } else if (ordering.equiv(v, evaluatedValue)) {
+          return true
         }
       }
       if (hasNull) {
@@ -265,7 +269,7 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   override def nullable: Boolean = child.nullable || hasNull
 
   protected override def nullSafeEval(value: Any): Any = {
-    if (hset.contains(value)) {
+    if (set.contains(value)) {
       true
     } else if (hasNull) {
       null
@@ -274,27 +278,40 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
     }
   }
 
-  def getHSet(): Set[Any] = hset
+  @transient private[this] lazy val set = child.dataType match {
+    case _: AtomicType => hset
+    case _: NullType => hset
+    case _ =>
+      // for structs use interpreted ordering to be able to compare UnsafeRows with non-UnsafeRows
+      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset
+  }
+
+  def getSet(): Set[Any] = set
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val setName = classOf[Set[Any]].getName
     val InSetName = classOf[InSet].getName
     val childGen = child.genCode(ctx)
     ctx.references += this
-    val hsetTerm = ctx.freshName("hset")
-    val hasNullTerm = ctx.freshName("hasNull")
-    ctx.addMutableState(setName, hsetTerm,
-      s"$hsetTerm = (($InSetName)references[${ctx.references.size - 1}]).getHSet();")
-    ctx.addMutableState("boolean", hasNullTerm, s"$hasNullTerm = $hsetTerm.contains(null);")
+    val setTerm = ctx.freshName("set")
+    val setNull = if (hasNull) {
+      s"""
+         |if (!${ev.value}) {
+         |  ${ev.isNull} = true;
+         |}
+       """.stripMargin
+    } else {
+      ""
+    }
+    ctx.addMutableState(setName, setTerm,
+      s"$setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();")
     ev.copy(code = s"""
       ${childGen.code}
       boolean ${ev.isNull} = ${childGen.isNull};
       boolean ${ev.value} = false;
       if (!${ev.isNull}) {
-        ${ev.value} = $hsetTerm.contains(${childGen.value});
-        if (!${ev.value} && $hasNullTerm) {
-          ${ev.isNull} = true;
-        }
+        ${ev.value} = $setTerm.contains(${childGen.value});
+        $setNull
       }
      """)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 6fe295c3dd93..ef510a95ef44 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -35,7 +35,8 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(s"3VL $name") {
       truthTable.foreach {
         case (l, r, answer) =>
-          val expr = op(NonFoldableLiteral(l, BooleanType), NonFoldableLiteral(r, BooleanType))
+          val expr = op(NonFoldableLiteral.create(l, BooleanType),
+            NonFoldableLiteral.create(r, BooleanType))
           checkEvaluation(expr, answer)
       }
     }
@@ -72,7 +73,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
         (false, true) ::
         (null, null) :: Nil
     notTrueTable.foreach { case (v, answer) =>
-      checkEvaluation(Not(NonFoldableLiteral(v, BooleanType)), answer)
+      checkEvaluation(Not(NonFoldableLiteral.create(v, BooleanType)), answer)
     }
     checkConsistencyBetweenInterpretedAndCodegen(Not, BooleanType)
   }
@@ -120,22 +121,26 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       (null, null, null) :: Nil)
 
   test("IN") {
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq(Literal(1), Literal(2))), null)
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType),
-      Seq(NonFoldableLiteral(null, IntegerType))), null)
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq.empty), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
+      Literal(2))), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
+      Seq(NonFoldableLiteral.create(null, IntegerType))), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq.empty), null)
     checkEvaluation(In(Literal(1), Seq.empty), false)
-    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral(null, IntegerType))), null)
-    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), true)
-    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), null)
+    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral.create(null, IntegerType))), null)
+    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+      true)
+    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+      null)
     checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false)
     checkEvaluation(
-      And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1), Literal(2)))),
+      And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1),
+        Literal(2)))),
       true)
 
-    val ns = NonFoldableLiteral(null, StringType)
+    val ns = NonFoldableLiteral.create(null, StringType)
     checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null)
     checkEvaluation(In(ns, Seq(ns)), null)
     checkEvaluation(In(Literal("a"), Seq(ns)), null)
@@ -155,7 +160,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
           case _ => value
         }
       }
-      val input = inputData.map(NonFoldableLiteral(_, t))
+      val input = inputData.map(NonFoldableLiteral.create(_, t))
       val expected = if (inputData(0) == null) {
         null
       } else if (inputData.slice(1, 10).contains(inputData(0))) {
@@ -279,7 +284,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("BinaryComparison: null test") {
     // Use -1 (default value for codegen) which can trigger some weird bugs, e.g. SPARK-14757
     val normalInt = Literal(-1)
-    val nullInt = NonFoldableLiteral(null, IntegerType)
+    val nullInt = NonFoldableLiteral.create(null, IntegerType)
 
     def nullTest(op: (Expression, Expression) => Expression): Unit = {
       checkEvaluation(op(normalInt, nullInt), null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 6a77580b29a2..28bf7b6f8434 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -169,7 +169,7 @@ class OptimizeInSuite extends PlanTest {
       val optimizedPlan = OptimizeIn(plan)
       optimizedPlan match {
         case Filter(cond, _)
-          if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getHSet().size == 3 =>
+          if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getSet().size == 3 =>
         // pass
         case _ => fail("Unexpected result for OptimizedIn")
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 68f61cfab6d2..5171aaebc990 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2616,4 +2616,26 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)"))
     assert(e.message.contains("Invalid number of arguments"))
   }
+
+  test("SPARK-21228: InSet incorrect handling of structs") {
+    withTempView("A") {
+      // reduce this from the default of 10 so the repro query text is not too long
+      withSQLConf((SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "3")) {
+        // a relation that has 1 column of struct type with values (1,1), ..., (9, 9)
+        spark.range(1, 10).selectExpr("named_struct('a', id, 'b', id) as a")
+          .createOrReplaceTempView("A")
+        val df = sql(
+          """
+            |SELECT * from
+            | (SELECT MIN(a) as minA FROM A) AA -- this Aggregate will return UnsafeRows
+            | -- the IN will become InSet with a Set of GenericInternalRows
+            | -- a GenericInternalRow is never equal to an UnsafeRow so the query would
+            | -- returns 0 results, which is incorrect
+            | WHERE minA IN (NAMED_STRUCT('a', 1L, 'b', 1L), NAMED_STRUCT('a', 2L, 'b', 2L),
+            |   NAMED_STRUCT('a', 3L, 'b', 3L))
+          """.stripMargin)
+        checkAnswer(df, Row(Row(1, 1)))
+      }
+    }
+  }
 }

From 48e44b24a7663142176102ac4c6bf4242f103804 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 7 Jul 2017 01:07:45 +0800
Subject: [PATCH 057/125] [SPARK-21204][SQL] Add support for Scala Set
 collection types in serialization

## What changes were proposed in this pull request?

Currently we can't produce a `Dataset` containing `Set` in SparkSQL. This PR tries to support serialization/deserialization of `Set`.

Because there's no corresponding internal data type in SparkSQL for a `Set`, the most proper choice for serializing a set should be an array.

## How was this patch tested?

Added unit tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18416 from viirya/SPARK-21204.
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 28 +++++++++++++++--
 .../expressions/objects/objects.scala         |  5 +--
 .../org/apache/spark/sql/SQLImplicits.scala   | 10 ++++++
 .../spark/sql/DataFrameAggregateSuite.scala   | 10 ++++++
 .../spark/sql/DatasetPrimitiveSuite.scala     | 31 +++++++++++++++++++
 5 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 814f2c10b909..4d5401f30d39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -309,7 +309,10 @@ object ScalaReflection extends ScalaReflection {
           Invoke(arrayData, primitiveMethod, arrayCls, returnNullable = false)
         }
 
-      case t if t <:< localTypeOf[Seq[_]] =>
+      // We serialize a `Set` to Catalyst array. When we deserialize a Catalyst array
+      // to a `Set`, if there are duplicated elements, the elements will be de-duplicated.
+      case t if t <:< localTypeOf[Seq[_]] ||
+          t <:< localTypeOf[scala.collection.Set[_]] =>
         val TypeRef(_, _, Seq(elementType)) = t
         val Schema(dataType, elementNullable) = schemaFor(elementType)
         val className = getClassNameFromType(elementType)
@@ -327,8 +330,10 @@ object ScalaReflection extends ScalaReflection {
         }
 
         val companion = t.normalize.typeSymbol.companionSymbol.typeSignature
-        val cls = companion.declaration(newTermName("newBuilder")) match {
-          case NoSymbol => classOf[Seq[_]]
+        val cls = companion.member(newTermName("newBuilder")) match {
+          case NoSymbol if t <:< localTypeOf[Seq[_]] => classOf[Seq[_]]
+          case NoSymbol if t <:< localTypeOf[scala.collection.Set[_]] =>
+            classOf[scala.collection.Set[_]]
           case _ => mirror.runtimeClass(t.typeSymbol.asClass)
         }
         UnresolvedMapObjects(mapFunction, getPath, Some(cls))
@@ -502,6 +507,19 @@ object ScalaReflection extends ScalaReflection {
           serializerFor(_, valueType, valuePath, seenTypeSet),
           valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
 
+      case t if t <:< localTypeOf[scala.collection.Set[_]] =>
+        val TypeRef(_, _, Seq(elementType)) = t
+
+        // There's no corresponding Catalyst type for `Set`, we serialize a `Set` to Catalyst array.
+        // Note that the property of `Set` is only kept when manipulating the data as domain object.
+        val newInput =
+          Invoke(
+           inputObject,
+           "toSeq",
+           ObjectType(classOf[Seq[_]]))
+
+        toCatalystArray(newInput, elementType)
+
       case t if t <:< localTypeOf[String] =>
         StaticInvoke(
           classOf[UTF8String],
@@ -713,6 +731,10 @@ object ScalaReflection extends ScalaReflection {
         val Schema(valueDataType, valueNullable) = schemaFor(valueType)
         Schema(MapType(schemaFor(keyType).dataType,
           valueDataType, valueContainsNull = valueNullable), nullable = true)
+      case t if t <:< localTypeOf[Set[_]] =>
+        val TypeRef(_, _, Seq(elementType)) = t
+        val Schema(dataType, nullable) = schemaFor(elementType)
+        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
       case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true)
       case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 24c06d8b14b5..9b28a18035b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -627,8 +627,9 @@ case class MapObjects private(
 
     val (initCollection, addElement, getResult): (String, String => String, String) =
       customCollectionCls match {
-        case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) =>
-          // Scala sequence
+        case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) ||
+          classOf[scala.collection.Set[_]].isAssignableFrom(cls) =>
+          // Scala sequence or set
           val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"
           val builder = ctx.freshName("collectionBuilder")
           (
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 86574e2f71d9..05db292bd41b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -171,6 +171,16 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
   /** @since 2.3.0 */
   implicit def newMapEncoder[T <: Map[_, _] : TypeTag]: Encoder[T] = ExpressionEncoder()
 
+  /**
+   * Notice that we serialize `Set` to Catalyst array. The set property is only kept when
+   * manipulating the domain objects. The serialization format doesn't keep the set property.
+   * When we have a Catalyst array which contains duplicated elements and convert it to
+   * `Dataset[Set[T]]` by using the encoder, the elements will be de-duplicated.
+   *
+   * @since 2.3.0
+   */
+  implicit def newSetEncoder[T <: Set[_] : TypeTag]: Encoder[T] = ExpressionEncoder()
+
   // Arrays
 
   /** @since 1.6.1 */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 5db354d79bb6..b52d50b195bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -460,6 +460,16 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       df.select(collect_set($"a"), collect_set($"b")),
       Seq(Row(Seq(1, 2, 3), Seq(2, 4)))
     )
+
+    checkDataset(
+      df.select(collect_set($"a").as("aSet")).as[Set[Int]],
+      Set(1, 2, 3))
+    checkDataset(
+      df.select(collect_set($"b").as("bSet")).as[Set[Int]],
+      Set(2, 4))
+    checkDataset(
+      df.select(collect_set($"a"), collect_set($"b")).as[(Set[Int], Set[Int])],
+      Seq(Set(1, 2, 3) -> Set(2, 4)): _*)
   }
 
   test("collect functions structs") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index a6847dcfbffc..f62f9e23db66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import scala.collection.immutable.{HashSet => HSet}
 import scala.collection.immutable.Queue
 import scala.collection.mutable.{LinkedHashMap => LHMap}
 import scala.collection.mutable.ArrayBuffer
@@ -342,6 +343,31 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
       LHMapClass(LHMap(1 -> 2)) -> LHMap("test" -> MapClass(Map(3 -> 4))))
   }
 
+  test("arbitrary sets") {
+    checkDataset(Seq(Set(1, 2, 3, 4)).toDS(), Set(1, 2, 3, 4))
+    checkDataset(Seq(Set(1.toLong, 2.toLong)).toDS(), Set(1.toLong, 2.toLong))
+    checkDataset(Seq(Set(1.toDouble, 2.toDouble)).toDS(), Set(1.toDouble, 2.toDouble))
+    checkDataset(Seq(Set(1.toFloat, 2.toFloat)).toDS(), Set(1.toFloat, 2.toFloat))
+    checkDataset(Seq(Set(1.toByte, 2.toByte)).toDS(), Set(1.toByte, 2.toByte))
+    checkDataset(Seq(Set(1.toShort, 2.toShort)).toDS(), Set(1.toShort, 2.toShort))
+    checkDataset(Seq(Set(true, false)).toDS(), Set(true, false))
+    checkDataset(Seq(Set("test1", "test2")).toDS(), Set("test1", "test2"))
+    checkDataset(Seq(Set(Tuple1(1), Tuple1(2))).toDS(), Set(Tuple1(1), Tuple1(2)))
+
+    checkDataset(Seq(HSet(1, 2)).toDS(), HSet(1, 2))
+    checkDataset(Seq(HSet(1.toLong, 2.toLong)).toDS(), HSet(1.toLong, 2.toLong))
+    checkDataset(Seq(HSet(1.toDouble, 2.toDouble)).toDS(), HSet(1.toDouble, 2.toDouble))
+    checkDataset(Seq(HSet(1.toFloat, 2.toFloat)).toDS(), HSet(1.toFloat, 2.toFloat))
+    checkDataset(Seq(HSet(1.toByte, 2.toByte)).toDS(), HSet(1.toByte, 2.toByte))
+    checkDataset(Seq(HSet(1.toShort, 2.toShort)).toDS(), HSet(1.toShort, 2.toShort))
+    checkDataset(Seq(HSet(true, false)).toDS(), HSet(true, false))
+    checkDataset(Seq(HSet("test1", "test2")).toDS(), HSet("test1", "test2"))
+    checkDataset(Seq(HSet(Tuple1(1), Tuple1(2))).toDS(), HSet(Tuple1(1), Tuple1(2)))
+
+    checkDataset(Seq(Seq(Some(1), None), Seq(Some(2))).toDF("c").as[Set[Integer]],
+      Seq(Set[Integer](1, null), Set[Integer](2)): _*)
+  }
+
   test("nested sequences") {
     checkDataset(Seq(Seq(Seq(1))).toDS(), Seq(Seq(1)))
     checkDataset(Seq(List(Queue(1))).toDS(), List(Queue(1)))
@@ -352,6 +378,11 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
     checkDataset(Seq(LHMap(Map(1 -> 2) -> 3)).toDS(), LHMap(Map(1 -> 2) -> 3))
   }
 
+  test("nested set") {
+    checkDataset(Seq(Set(HSet(1, 2), HSet(3, 4))).toDS(), Set(HSet(1, 2), HSet(3, 4)))
+    checkDataset(Seq(HSet(Set(1, 2), Set(3, 4))).toDS(), HSet(Set(1, 2), Set(3, 4)))
+  }
+
   test("package objects") {
     import packageobject._
     checkDataset(Seq(PackageClass(1)).toDS(), PackageClass(1))

From bf66335acab3c0c188f6c378eb8aa6948a259cb2 Mon Sep 17 00:00:00 2001
From: Wang Gengliang <ltnwgl@gmail.com>
Date: Thu, 6 Jul 2017 13:58:27 -0700
Subject: [PATCH 058/125] [SPARK-21323][SQL] Rename
 plans.logical.statsEstimation.Range to ValueInterval

## What changes were proposed in this pull request?

Rename org.apache.spark.sql.catalyst.plans.logical.statsEstimation.Range to ValueInterval.
The current naming is identical to logical operator "range".
Refactoring it to ValueInterval is more accurate.

## How was this patch tested?

unit test

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Wang Gengliang <ltnwgl@gmail.com>

Closes #18549 from gengliangwang/ValueInterval.
---
 .../statsEstimation/FilterEstimation.scala    | 36 ++++++++--------
 .../statsEstimation/JoinEstimation.scala      | 14 +++----
 .../{Range.scala => ValueInterval.scala}      | 41 ++++++++++---------
 3 files changed, 48 insertions(+), 43 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/{Range.scala => ValueInterval.scala} (65%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
index 5a3bee7b9e44..e13db85c7a76 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -316,8 +316,8 @@ case class FilterEstimation(plan: Filter) extends Logging {
     // decide if the value is in [min, max] of the column.
     // We currently don't store min/max for binary/string type.
     // Hence, we assume it is in boundary for binary/string type.
-    val statsRange = Range(colStat.min, colStat.max, attr.dataType)
-    if (statsRange.contains(literal)) {
+    val statsInterval = ValueInterval(colStat.min, colStat.max, attr.dataType)
+    if (statsInterval.contains(literal)) {
       if (update) {
         // We update ColumnStat structure after apply this equality predicate:
         // Set distinctCount to 1, nullCount to 0, and min/max values (if exist) to the literal
@@ -388,9 +388,10 @@ case class FilterEstimation(plan: Filter) extends Logging {
     // use [min, max] to filter the original hSet
     dataType match {
       case _: NumericType | BooleanType | DateType | TimestampType =>
-        val statsRange = Range(colStat.min, colStat.max, dataType).asInstanceOf[NumericRange]
+        val statsInterval =
+          ValueInterval(colStat.min, colStat.max, dataType).asInstanceOf[NumericValueInterval]
         val validQuerySet = hSet.filter { v =>
-          v != null && statsRange.contains(Literal(v, dataType))
+          v != null && statsInterval.contains(Literal(v, dataType))
         }
 
         if (validQuerySet.isEmpty) {
@@ -440,12 +441,13 @@ case class FilterEstimation(plan: Filter) extends Logging {
       update: Boolean): Option[BigDecimal] = {
 
     val colStat = colStatsMap(attr)
-    val statsRange = Range(colStat.min, colStat.max, attr.dataType).asInstanceOf[NumericRange]
-    val max = statsRange.max.toBigDecimal
-    val min = statsRange.min.toBigDecimal
+    val statsInterval =
+      ValueInterval(colStat.min, colStat.max, attr.dataType).asInstanceOf[NumericValueInterval]
+    val max = statsInterval.max.toBigDecimal
+    val min = statsInterval.min.toBigDecimal
     val ndv = BigDecimal(colStat.distinctCount)
 
-    // determine the overlapping degree between predicate range and column's range
+    // determine the overlapping degree between predicate interval and column's interval
     val numericLiteral = if (literal.dataType == BooleanType) {
       if (literal.value.asInstanceOf[Boolean]) BigDecimal(1) else BigDecimal(0)
     } else {
@@ -566,18 +568,18 @@ case class FilterEstimation(plan: Filter) extends Logging {
     }
 
     val colStatLeft = colStatsMap(attrLeft)
-    val statsRangeLeft = Range(colStatLeft.min, colStatLeft.max, attrLeft.dataType)
-      .asInstanceOf[NumericRange]
-    val maxLeft = statsRangeLeft.max
-    val minLeft = statsRangeLeft.min
+    val statsIntervalLeft = ValueInterval(colStatLeft.min, colStatLeft.max, attrLeft.dataType)
+      .asInstanceOf[NumericValueInterval]
+    val maxLeft = statsIntervalLeft.max
+    val minLeft = statsIntervalLeft.min
 
     val colStatRight = colStatsMap(attrRight)
-    val statsRangeRight = Range(colStatRight.min, colStatRight.max, attrRight.dataType)
-      .asInstanceOf[NumericRange]
-    val maxRight = statsRangeRight.max
-    val minRight = statsRangeRight.min
+    val statsIntervalRight = ValueInterval(colStatRight.min, colStatRight.max, attrRight.dataType)
+      .asInstanceOf[NumericValueInterval]
+    val maxRight = statsIntervalRight.max
+    val minRight = statsIntervalRight.min
 
-    // determine the overlapping degree between predicate range and column's range
+    // determine the overlapping degree between predicate interval and column's interval
     val allNotNull = (colStatLeft.nullCount == 0) && (colStatRight.nullCount == 0)
     val (noOverlap: Boolean, completeOverlap: Boolean) = op match {
       // Left < Right or Left <= Right
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index f48196997a24..dcbe36da91df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -175,9 +175,9 @@ case class InnerOuterEstimation(join: Join) extends Logging {
       // Check if the two sides are disjoint
       val leftKeyStats = leftStats.attributeStats(leftKey)
       val rightKeyStats = rightStats.attributeStats(rightKey)
-      val lRange = Range(leftKeyStats.min, leftKeyStats.max, leftKey.dataType)
-      val rRange = Range(rightKeyStats.min, rightKeyStats.max, rightKey.dataType)
-      if (Range.isIntersected(lRange, rRange)) {
+      val lInterval = ValueInterval(leftKeyStats.min, leftKeyStats.max, leftKey.dataType)
+      val rInterval = ValueInterval(rightKeyStats.min, rightKeyStats.max, rightKey.dataType)
+      if (ValueInterval.isIntersected(lInterval, rInterval)) {
         // Get the largest ndv among pairs of join keys
         val maxNdv = leftKeyStats.distinctCount.max(rightKeyStats.distinctCount)
         if (maxNdv > ndvDenom) ndvDenom = maxNdv
@@ -239,16 +239,16 @@ case class InnerOuterEstimation(join: Join) extends Logging {
     joinKeyPairs.foreach { case (leftKey, rightKey) =>
       val leftKeyStats = leftStats.attributeStats(leftKey)
       val rightKeyStats = rightStats.attributeStats(rightKey)
-      val lRange = Range(leftKeyStats.min, leftKeyStats.max, leftKey.dataType)
-      val rRange = Range(rightKeyStats.min, rightKeyStats.max, rightKey.dataType)
+      val lInterval = ValueInterval(leftKeyStats.min, leftKeyStats.max, leftKey.dataType)
+      val rInterval = ValueInterval(rightKeyStats.min, rightKeyStats.max, rightKey.dataType)
       // When we reach here, join selectivity is not zero, so each pair of join keys should be
       // intersected.
-      assert(Range.isIntersected(lRange, rRange))
+      assert(ValueInterval.isIntersected(lInterval, rInterval))
 
       // Update intersected column stats
       assert(leftKey.dataType.sameType(rightKey.dataType))
       val newNdv = leftKeyStats.distinctCount.min(rightKeyStats.distinctCount)
-      val (newMin, newMax) = Range.intersect(lRange, rRange, leftKey.dataType)
+      val (newMin, newMax) = ValueInterval.intersect(lInterval, rInterval, leftKey.dataType)
       val newMaxLen = math.min(leftKeyStats.maxLen, rightKeyStats.maxLen)
       val newAvgLen = (leftKeyStats.avgLen + rightKeyStats.avgLen) / 2
       val newStats = ColumnStat(newNdv, newMin, newMax, 0, newAvgLen, newMaxLen)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/Range.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
similarity index 65%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/Range.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
index 4ac5ba5689f8..0caaf796a3b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/Range.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ValueInterval.scala
@@ -22,12 +22,12 @@ import org.apache.spark.sql.types._
 
 
 /** Value range of a column. */
-trait Range {
+trait ValueInterval {
   def contains(l: Literal): Boolean
 }
 
-/** For simplicity we use decimal to unify operations of numeric ranges. */
-case class NumericRange(min: Decimal, max: Decimal) extends Range {
+/** For simplicity we use decimal to unify operations of numeric intervals. */
+case class NumericValueInterval(min: Decimal, max: Decimal) extends ValueInterval {
   override def contains(l: Literal): Boolean = {
     val lit = EstimationUtils.toDecimal(l.value, l.dataType)
     min <= lit && max >= lit
@@ -38,46 +38,49 @@ case class NumericRange(min: Decimal, max: Decimal) extends Range {
  * This version of Spark does not have min/max for binary/string types, we define their default
  * behaviors by this class.
  */
-class DefaultRange extends Range {
+class DefaultValueInterval extends ValueInterval {
   override def contains(l: Literal): Boolean = true
 }
 
 /** This is for columns with only null values. */
-class NullRange extends Range {
+class NullValueInterval extends ValueInterval {
   override def contains(l: Literal): Boolean = false
 }
 
-object Range {
-  def apply(min: Option[Any], max: Option[Any], dataType: DataType): Range = dataType match {
-    case StringType | BinaryType => new DefaultRange()
-    case _ if min.isEmpty || max.isEmpty => new NullRange()
+object ValueInterval {
+  def apply(
+      min: Option[Any],
+      max: Option[Any],
+      dataType: DataType): ValueInterval = dataType match {
+    case StringType | BinaryType => new DefaultValueInterval()
+    case _ if min.isEmpty || max.isEmpty => new NullValueInterval()
     case _ =>
-      NumericRange(
+      NumericValueInterval(
         min = EstimationUtils.toDecimal(min.get, dataType),
         max = EstimationUtils.toDecimal(max.get, dataType))
   }
 
-  def isIntersected(r1: Range, r2: Range): Boolean = (r1, r2) match {
-    case (_, _: DefaultRange) | (_: DefaultRange, _) =>
-      // The DefaultRange represents string/binary types which do not have max/min stats,
+  def isIntersected(r1: ValueInterval, r2: ValueInterval): Boolean = (r1, r2) match {
+    case (_, _: DefaultValueInterval) | (_: DefaultValueInterval, _) =>
+      // The DefaultValueInterval represents string/binary types which do not have max/min stats,
       // we assume they are intersected to be conservative on estimation
       true
-    case (_, _: NullRange) | (_: NullRange, _) =>
+    case (_, _: NullValueInterval) | (_: NullValueInterval, _) =>
       false
-    case (n1: NumericRange, n2: NumericRange) =>
+    case (n1: NumericValueInterval, n2: NumericValueInterval) =>
       n1.min.compareTo(n2.max) <= 0 && n1.max.compareTo(n2.min) >= 0
   }
 
   /**
-   * Intersected results of two ranges. This is only for two overlapped ranges.
+   * Intersected results of two intervals. This is only for two overlapped intervals.
    * The outputs are the intersected min/max values.
    */
-  def intersect(r1: Range, r2: Range, dt: DataType): (Option[Any], Option[Any]) = {
+  def intersect(r1: ValueInterval, r2: ValueInterval, dt: DataType): (Option[Any], Option[Any]) = {
     (r1, r2) match {
-      case (_, _: DefaultRange) | (_: DefaultRange, _) =>
+      case (_, _: DefaultValueInterval) | (_: DefaultValueInterval, _) =>
         // binary/string types don't support intersecting.
         (None, None)
-      case (n1: NumericRange, n2: NumericRange) =>
+      case (n1: NumericValueInterval, n2: NumericValueInterval) =>
         // Choose the maximum of two min values, and the minimum of two max values.
         val newMin = if (n1.min <= n2.min) n2.min else n1.min
         val newMax = if (n1.max <= n2.max) n1.max else n2.max

From 0217dfd26f89133f146197359b556c9bf5aca172 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 6 Jul 2017 17:28:20 -0700
Subject: [PATCH 059/125] [SPARK-21267][SS][DOCS] Update Structured Streaming
 Documentation

## What changes were proposed in this pull request?

Few changes to the Structured Streaming documentation
- Clarify that the entire stream input table is not materialized
- Add information for Ganglia
- Add Kafka Sink to the main docs
- Removed a couple of leftover experimental tags
- Added more associated reading material and talk videos.

In addition, https://github.com/apache/spark/pull/16856 broke the link to the RDD programming guide in several places while renaming the page. This PR fixes those sameeragarwal cloud-fan.
- Added a redirection to avoid breaking internal and possible external links.
- Removed unnecessary redirection pages that were there since the separate scala, java, and python programming guides were merged together in 2013 or 2014.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18485 from tdas/SPARK-21267.
---
 docs/_layouts/global.html                     |   7 +-
 docs/index.md                                 |  13 +-
 docs/java-programming-guide.md                |   7 -
 docs/programming-guide.md                     |   7 +
 docs/python-programming-guide.md              |   7 -
 docs/rdd-programming-guide.md                 |   2 +-
 docs/scala-programming-guide.md               |   7 -
 docs/sql-programming-guide.md                 |  16 +-
 .../structured-streaming-programming-guide.md | 172 +++++++++++++++---
 .../scala/org/apache/spark/sql/Dataset.scala  |   3 -
 10 files changed, 169 insertions(+), 72 deletions(-)
 delete mode 100644 docs/java-programming-guide.md
 create mode 100644 docs/programming-guide.md
 delete mode 100644 docs/python-programming-guide.md
 delete mode 100644 docs/scala-programming-guide.md

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index c00d0db63cd1..570483c0b04e 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -69,11 +69,10 @@
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">Programming Guides<b class="caret"></b></a>
                             <ul class="dropdown-menu">
                                 <li><a href="quick-start.html">Quick Start</a></li>
-                                <li><a href="programming-guide.html">Spark Programming Guide</a></li>
-                                <li class="divider"></li>
-                                <li><a href="streaming-programming-guide.html">Spark Streaming</a></li>
-                                <li><a href="sql-programming-guide.html">DataFrames, Datasets and SQL</a></li>
+                                <li><a href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a></li>
+                                <li><a href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a></li>
                                 <li><a href="structured-streaming-programming-guide.html">Structured Streaming</a></li>
+                                <li><a href="streaming-programming-guide.html">Spark Streaming (DStreams)</a></li>
                                 <li><a href="ml-guide.html">MLlib (Machine Learning)</a></li>
                                 <li><a href="graphx-programming-guide.html">GraphX (Graph Processing)</a></li>
                                 <li><a href="sparkr.html">SparkR (R on Spark)</a></li>
diff --git a/docs/index.md b/docs/index.md
index f7b5863957ce..81ed4653b46b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -88,13 +88,12 @@ options for deployment:
 **Programming Guides:**
 
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
-* [Spark Programming Guide](programming-guide.html): detailed overview of Spark
-  in all supported languages (Scala, Java, Python, R)
-* Modules built on Spark:
-  * [Spark Streaming](streaming-programming-guide.html): processing real-time data streams
-  * [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): support for structured data and relational queries
-  * [MLlib](ml-guide.html): built-in machine learning library
-  * [GraphX](graphx-programming-guide.html): Spark's new API for graph processing
+* [RDD Programming Guide](programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables  
+* [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): processing structured data with relational queries (newer API than RDDs)
+* [Structured Streaming](structured-streaming-programming-guide.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
+* [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
+* [MLlib](ml-guide.html): applying machine learning algorithms
+* [GraphX](graphx-programming-guide.html): processing graphs 
 
 **API Docs:**
 
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
deleted file mode 100644
index bb539582f64e..000000000000
--- a/docs/java-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Java Programming Guide
-redirect: programming-guide.html
----
-
-This document has been merged into the [Spark programming guide](programming-guide.html).
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
new file mode 100644
index 000000000000..f8b8f74c53b5
--- /dev/null
+++ b/docs/programming-guide.md
@@ -0,0 +1,7 @@
+---
+layout: global
+title: Spark Programming Guide
+redirect: rdd-programming-guide.html
+---
+
+This document has moved [here](rdd-programming-guide.html).
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
deleted file mode 100644
index 68f04b50aaa0..000000000000
--- a/docs/python-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Python Programming Guide
-redirect: programming-guide.html
----
-
-This document has been merged into the [Spark programming guide](programming-guide.html).
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 483acd347a82..3aa529169bb1 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -1,6 +1,6 @@
 ---
 layout: global
-title: Spark Programming Guide
+title: RDD Programming Guide
 description: Spark SPARK_VERSION_SHORT programming guide in Java, Scala and Python
 ---
 
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
deleted file mode 100644
index 69ceb637dc8d..000000000000
--- a/docs/scala-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Spark Programming Guide
-redirect: programming-guide.html
----
-
-This document has moved [here](programming-guide.html).
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 8e722ae6adca..b5eca76480eb 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -392,41 +392,31 @@ While those functions are designed for DataFrames, Spark SQL also has type-safe
 Moreover, users are not limited to the predefined aggregate functions and can create their own.
 
 ### Untyped User-Defined Aggregate Functions
-
-<div class="codetabs">
-
-<div data-lang="scala"  markdown="1">
-
 Users have to extend the [UserDefinedAggregateFunction](api/scala/index.html#org.apache.spark.sql.expressions.UserDefinedAggregateFunction)
 abstract class to implement a custom untyped aggregate function. For example, a user-defined average
 can look like:
 
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
 {% include_example untyped_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala%}
 </div>
-
 <div data-lang="java"  markdown="1">
-
 {% include_example untyped_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java%}
 </div>
-
 </div>
 
 ### Type-Safe User-Defined Aggregate Functions
 
 User-defined aggregations for strongly typed Datasets revolve around the [Aggregator](api/scala/index.html#org.apache.spark.sql.expressions.Aggregator) abstract class.
 For example, a type-safe user-defined average can look like:
-<div class="codetabs">
 
+<div class="codetabs">
 <div data-lang="scala"  markdown="1">
-
 {% include_example typed_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala%}
 </div>
-
 <div data-lang="java"  markdown="1">
-
 {% include_example typed_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java%}
 </div>
-
 </div>
 
 # Data Sources
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d478042dea5c..3bc377c9a38b 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -15,7 +15,7 @@ In this guide, we are going to walk you through the programming model and the AP
 # Quick Example
 Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
 [Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py)/[R]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming/structured_network_wordcount.R).
-And if you [download Spark](http://spark.apache.org/downloads.html), you can directly run the example. In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
+And if you [download Spark](http://spark.apache.org/downloads.html), you can directly [run the example](index.html#running-the-examples-and-shell). In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
@@ -450,7 +450,12 @@ running counts with the new data to compute updated counts, as shown below.
 
 ![Model](img/structured-streaming-example-model.png)
 
-This model is significantly different from many other stream processing 
+**Note that Structured Streaming does not materialize the entire table**. It reads the latest
+available data from the streaming data source, processes it incrementally to update the result,
+and then discards the source data. It only keeps around the minimal intermediate *state* data as
+required to update the result (e.g. intermediate counts in the earlier example).
+
+This model is significantly different from many other stream processing
 engines. Many streaming systems require the user to maintain running 
 aggregations themselves, thus having to reason about fault-tolerance, and 
 data consistency (at-least-once, or at-most-once, or exactly-once). In this 
@@ -486,11 +491,11 @@ Streaming DataFrames can be created through the `DataStreamReader` interface
 returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
 
 #### Input Sources
-In Spark 2.0, there are a few built-in sources.
+There are a few built-in sources.
 
   - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
 
-  - **Kafka source** - Poll data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
+  - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
 
@@ -517,17 +522,18 @@ Here are the details of all the sources in Spark.
         <br/>
         <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
         <br/>
-        · "file:///dataset.txt"<br/>
-        · "s3://a/dataset.txt"<br/>
-        · "s3n://a/b/dataset.txt"<br/>
-        · "s3a://a/b/c/dataset.txt"<br/>
-        <br/>
-
-        <br/>
+        "file:///dataset.txt"<br/>
+        "s3://a/dataset.txt"<br/>
+        "s3n://a/b/dataset.txt"<br/>
+        "s3a://a/b/c/dataset.txt"<br/>
+        <br/><br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
         (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
         href="api/R/read.stream.html">R</a>).
-        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code></td>
+        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code>.
+        <br/><br/>
+        In addition, there are session configurations that affect certain file-formats. See the <a href="sql-programming-guide.html">SQL Programming Guide</a> for more details. E.g., for "parquet", see <a href="sql-programming-guide.html#configuration">Parquet configuration</a> section.
+        </td>
     <td>Yes</td>
     <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
   </tr>
@@ -758,6 +764,60 @@ count(groupBy(df, "deviceType"))
 </div>
 </div>
 
+You can also register a streaming DataFrame/Dataset as a temporary view and then apply SQL commands on it.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  // returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">  
+{% highlight java %}
+df.createOrReplaceTempView("updates");
+spark.sql("select count(*) from updates");  // returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">  
+{% highlight python %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  # returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+createOrReplaceTempView(df, "updates")
+sql("select count(*) from updates")
+{% endhighlight %}
+</div>
+</div>
+
+Note, you can identify whether a DataFrame/Dataset has streaming data or not by using `df.isStreaming`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.isStreaming
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available.
+{% endhighlight %}
+</div>
+</div>
+
 ### Window Operations on Event Time
 Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration. 
 
@@ -1043,7 +1103,7 @@ streamingDf \
 </div>
 
 ### Arbitrary Stateful Operations
-Many uscases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)). 
+Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)).
 
 ### Unsupported Operations
 There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
@@ -1201,6 +1261,16 @@ writeStream
     .start()
 {% endhighlight %}
 
+- **Kafka sink** - Stores the output to one or more topics in Kafka.
+
+{% highlight scala %}
+writeStream
+    .format("kafka")
+    .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+    .option("topic", "updates")
+    .start()
+{% endhighlight %}
+
 - **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
 
 {% highlight scala %}
@@ -1253,12 +1323,19 @@ Here are the details of all the sinks in Spark.
         href="api/R/write.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
     </td>
-    <td>Yes</td>
+    <td>Yes (exactly-once)</td>
     <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
   </tr>
+  <tr>
+    <td><b>Kafka Sink</b></td>
+    <td>Append, Update, Complete</td>
+    <td>See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+    <td>Yes (at-least-once)</td>
+    <td>More details in the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+  </tr>
   <tr>
     <td><b>Foreach Sink</b></td>
-    <td>Append, Update, Compelete</td>
+    <td>Append, Update, Complete</td>
     <td>None</td>
     <td>Depends on ForeachWriter implementation</td>
     <td>More details in the <a href="#using-foreach">next section</a></td>
@@ -1624,10 +1701,9 @@ Not available in R.
 
 
 ## Monitoring Streaming Queries
-There are two APIs for monitoring and debugging active queries - 
-interactively and asynchronously.
+There are multiple ways to monitor active streaming queries. You can either push metrics to external systems using Spark's Dropwizard Metrics support, or access them programmatically.
 
-### Interactive APIs
+### Reading Metrics Interactively
 
 You can directly get the current status and metrics of an active query using 
 `streamingQuery.lastProgress()` and `streamingQuery.status()`. 
@@ -1857,7 +1933,7 @@ Will print something like the following.
 </div>
 </div>
 
-### Asynchronous API
+### Reporting Metrics programmatically using Asynchronous APIs
 
 You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
@@ -1922,6 +1998,41 @@ Not available in R.
 </div>
 </div>
 
+### Reporting Metrics using Dropwizard 
+Spark supports reporting metrics using the [Dropwizard Library](monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession. 
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">  
+{% highlight java %}
+spark.conf().set("spark.sql.streaming.metricsEnabled", "true");
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true");
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">  
+{% highlight python %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+# or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+</div>
+
+
+All queries started in the SparkSession after this configuration has been enabled will report metrics through Dropwizard to whatever [sinks](monitoring.html#metrics) have been configured (e.g. Ganglia, Graphite, JMX, etc.).
+
 ## Recovering from Failures with Checkpointing 
 In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
 
@@ -1971,8 +2082,23 @@ write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "pat
 </div>
 </div>
 
-# Where to go from here
-- Examples: See and run the
-[Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r/streaming)
-examples.
+# Additional Information
+
+**Further Reading**
+
+- See and run the
+  [Scala]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming)
+  examples.
+    - [Instructions](index.html#running-the-examples-and-shell) on how to run Spark examples
+- Read about integrating with Kafka in the [Structured Streaming Kafka Integration Guide](structured-streaming-kafka-integration.html)
+- Read more details about using DataFrames/Datasets in the [Spark SQL Programming Guide](sql-programming-guide.html)
+- Third-party Blog Posts
+    - [Real-time Streaming ETL with Structured Streaming in Apache Spark 2.1 (Databricks Blog)](https://databricks.com/blog/2017/01/19/real-time-streaming-etl-structured-streaming-apache-spark-2-1.html)
+    - [Real-Time End-to-End Integration with Apache Kafka in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/04/04/real-time-end-to-end-integration-with-apache-kafka-in-apache-sparks-structured-streaming.html)
+    - [Event-time Aggregation and Watermarking in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/05/08/event-time-aggregation-watermarking-apache-sparks-structured-streaming.html)
+
+**Talks**
+
+- Spark Summit 2017 Talk - [Easy, Scalable, Fault-tolerant Stream Processing with Structured Streaming in Apache Spark](https://spark-summit.org/2017/events/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark/)
 - Spark Summit 2016 Talk - [A Deep Dive into Structured Streaming](https://spark-summit.org/2016/events/a-deep-dive-into-structured-streaming/)
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7be4aa1ca956..b1638a2180b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -520,7 +520,6 @@ class Dataset[T] private[sql](
    * @group streaming
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   def isStreaming: Boolean = logicalPlan.isStreaming
 
@@ -581,7 +580,6 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * :: Experimental ::
    * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
    * before which we assume no more late data is going to arrive.
    *
@@ -605,7 +603,6 @@ class Dataset[T] private[sql](
    * @group streaming
    * @since 2.1.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   // We only accept an existing column name, not a derived column here as a watermark that is
   // defined on a derived column cannot referenced elsewhere in the plan.

From 40c7add3a4c811202d1fa2be9606aca08df81266 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 7 Jul 2017 08:44:31 +0800
Subject: [PATCH 060/125] [SPARK-20946][SQL] Do not update conf for existing
 SparkContext in SparkSession.getOrCreate

## What changes were proposed in this pull request?

SparkContext is shared by all sessions, we should not update its conf for only one session.

## How was this patch tested?

existing tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18536 from cloud-fan/config.
---
 .../spark/ml/recommendation/ALSSuite.scala    |  4 +---
 .../apache/spark/ml/tree/impl/TreeTests.scala |  2 --
 .../org/apache/spark/sql/SparkSession.scala   | 19 +++++++------------
 .../spark/sql/SparkSessionBuilderSuite.scala  |  8 +++-----
 4 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 3094f52ba1bc..b57fc8d21ab3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -818,15 +818,13 @@ class ALSCleanerSuite extends SparkFunSuite {
       FileUtils.listFiles(localDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
     try {
       conf.set("spark.local.dir", localDir.getAbsolutePath)
-      val sc = new SparkContext("local[2]", "test", conf)
+      val sc = new SparkContext("local[2]", "ALSCleanerSuite", conf)
       try {
         sc.setCheckpointDir(checkpointDir.getAbsolutePath)
         // Generate test data
         val (training, _) = ALSSuite.genImplicitTestData(sc, 20, 5, 1, 0.2, 0)
         // Implicitly test the cleaning of parents during ALS training
         val spark = SparkSession.builder
-          .master("local[2]")
-          .appName("ALSCleanerSuite")
           .sparkContext(sc)
           .getOrCreate()
         import spark.implicits._
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
index 92a236928e90..b6894b30b0c2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
@@ -43,8 +43,6 @@ private[ml] object TreeTests extends SparkFunSuite {
       categoricalFeatures: Map[Int, Int],
       numClasses: Int): DataFrame = {
     val spark = SparkSession.builder()
-      .master("local[2]")
-      .appName("TreeTests")
       .sparkContext(data.sparkContext)
       .getOrCreate()
     import spark.implicits._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 0ddcd2111aa5..6dfe8a66baa9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -867,7 +867,7 @@ object SparkSession {
      *
      * @since 2.2.0
      */
-    def withExtensions(f: SparkSessionExtensions => Unit): Builder = {
+    def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized {
       f(extensions)
       this
     }
@@ -912,21 +912,16 @@ object SparkSession {
 
         // No active nor global default session. Create a new one.
         val sparkContext = userSuppliedContext.getOrElse {
-          // set app name if not given
-          val randomAppName = java.util.UUID.randomUUID().toString
           val sparkConf = new SparkConf()
           options.foreach { case (k, v) => sparkConf.set(k, v) }
+
+          // set a random app name if not given.
           if (!sparkConf.contains("spark.app.name")) {
-            sparkConf.setAppName(randomAppName)
-          }
-          val sc = SparkContext.getOrCreate(sparkConf)
-          // maybe this is an existing SparkContext, update its SparkConf which maybe used
-          // by SparkSession
-          options.foreach { case (k, v) => sc.conf.set(k, v) }
-          if (!sc.conf.contains("spark.app.name")) {
-            sc.conf.setAppName(randomAppName)
+            sparkConf.setAppName(java.util.UUID.randomUUID().toString)
           }
-          sc
+
+          SparkContext.getOrCreate(sparkConf)
+          // Do not update `SparkConf` for existing `SparkContext`, as it's shared by all sessions.
         }
 
         // Initialize extensions if the user has defined a configurator class.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index cdac6827082c..770e15629c83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -102,11 +102,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     assert(session.conf.get("key1") == "value1")
     assert(session.conf.get("key2") == "value2")
     assert(session.sparkContext == sparkContext2)
-    assert(session.sparkContext.conf.get("key1") == "value1")
-    // If the created sparkContext is not passed through the Builder's API sparkContext,
-    // the conf of this sparkContext will also contain the conf set through the API config.
-    assert(session.sparkContext.conf.get("key2") == "value2")
-    assert(session.sparkContext.conf.get("spark.app.name") == "test")
+    // We won't update conf for existing `SparkContext`
+    assert(!sparkContext2.conf.contains("key2"))
+    assert(sparkContext2.conf.get("key1") == "value1")
     session.stop()
   }
 

From e5bb26174d3336e07dd670eec4fd2137df346163 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Thu, 6 Jul 2017 18:11:41 -0700
Subject: [PATCH 061/125] [SPARK-21329][SS] Make EventTimeWatermarkExec
 explicitly UnaryExecNode

## What changes were proposed in this pull request?

Making EventTimeWatermarkExec explicitly UnaryExecNode

/cc tdas zsxwing

## How was this patch tested?

Local build.

Author: Jacek Laskowski <jacek@japila.pl>

Closes #18509 from jaceklaskowski/EventTimeWatermarkExec-UnaryExecNode.
---
 .../sql/execution/streaming/EventTimeWatermarkExec.scala    | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 25cf609fc336..87e5b7855042 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.AccumulatorV2
@@ -81,7 +81,7 @@ class EventTimeStatsAccum(protected var currentStats: EventTimeStats = EventTime
 case class EventTimeWatermarkExec(
     eventTime: Attribute,
     delay: CalendarInterval,
-    child: SparkPlan) extends SparkPlan {
+    child: SparkPlan) extends UnaryExecNode {
 
   val eventTimeStats = new EventTimeStatsAccum()
   val delayMs = EventTimeWatermark.getDelayMs(delay)
@@ -117,6 +117,4 @@ case class EventTimeWatermarkExec(
       a
     }
   }
-
-  override def children: Seq[SparkPlan] = child :: Nil
 }

From d451b7f43d559aa1efd7ac3d1cbec5249f3a7a24 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Fri, 7 Jul 2017 12:24:03 +0800
Subject: [PATCH 062/125] [SPARK-21326][SPARK-21066][ML] Use TextFileFormat in
 LibSVMFileFormat and allow multiple input paths for determining numFeatures

## What changes were proposed in this pull request?

This is related with [SPARK-19918](https://issues.apache.org/jira/browse/SPARK-19918) and [SPARK-18362](https://issues.apache.org/jira/browse/SPARK-18362).

This PR proposes to use `TextFileFormat` and allow multiple input paths (but with a warning) when determining the number of features in LibSVM data source via an extra scan.

There are three points here:

- The main advantage of this change should be to remove file-listing bottlenecks in driver side.

- Another advantage is ones from using `FileScanRDD`. For example, I guess we can use `spark.sql.files.ignoreCorruptFiles` option when determining the number of features.

- We can unify the schema inference code path in text based data sources. This is also a preparation for [SPARK-21289](https://issues.apache.org/jira/browse/SPARK-21289).

## How was this patch tested?

Unit tests in `LibSVMRelationSuite`.

Closes #18288

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18556 from HyukjinKwon/libsvm-schema.
---
 .../ml/source/libsvm/LibSVMRelation.scala     | 26 +++++++++----------
 .../org/apache/spark/mllib/util/MLUtils.scala | 25 ++++++++++++++++--
 .../source/libsvm/LibSVMRelationSuite.scala   | 17 +++++++++---
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index f68847a664b6..dec118330aec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.TaskContext
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
@@ -66,7 +67,10 @@ private[libsvm] class LibSVMOutputWriter(
 
 /** @see [[LibSVMDataSource]] for public documentation. */
 // If this is moved or renamed, please update DataSource's backwardCompatibilityMap.
-private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSourceRegister {
+private[libsvm] class LibSVMFileFormat
+  extends TextBasedFileFormat
+  with DataSourceRegister
+  with Logging {
 
   override def shortName(): String = "libsvm"
 
@@ -89,18 +93,14 @@ private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSour
       files: Seq[FileStatus]): Option[StructType] = {
     val libSVMOptions = new LibSVMOptions(options)
     val numFeatures: Int = libSVMOptions.numFeatures.getOrElse {
-      // Infers number of features if the user doesn't specify (a valid) one.
-      val dataFiles = files.filterNot(_.getPath.getName startsWith "_")
-      val path = if (dataFiles.length == 1) {
-        dataFiles.head.getPath.toUri.toString
-      } else if (dataFiles.isEmpty) {
-        throw new IOException("No input path specified for libsvm data")
-      } else {
-        throw new IOException("Multiple input paths are not supported for libsvm data.")
-      }
-
-      val sc = sparkSession.sparkContext
-      val parsed = MLUtils.parseLibSVMFile(sc, path, sc.defaultParallelism)
+      require(files.nonEmpty, "No input path specified for libsvm data")
+      logWarning(
+        "'numFeatures' option not specified, determining the number of features by going " +
+        "though the input. If you know the number in advance, please specify it via " +
+        "'numFeatures' option to avoid the extra scan.")
+
+      val paths = files.map(_.getPath.toUri.toString)
+      val parsed = MLUtils.parseLibSVMFile(sparkSession, paths)
       MLUtils.computeNumFeatures(parsed)
     }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 4fdad0597396..14af8b5c7387 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -28,8 +28,10 @@ import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.linalg.BLAS.dot
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.{PartitionwiseSampledRDD, RDD}
-import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.text.TextFileFormat
+import org.apache.spark.sql.functions._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.random.BernoulliCellSampler
 
@@ -102,6 +104,25 @@ object MLUtils extends Logging {
       .map(parseLibSVMRecord)
   }
 
+  private[spark] def parseLibSVMFile(
+      sparkSession: SparkSession, paths: Seq[String]): RDD[(Double, Array[Int], Array[Double])] = {
+    val lines = sparkSession.baseRelationToDataFrame(
+      DataSource.apply(
+        sparkSession,
+        paths = paths,
+        className = classOf[TextFileFormat].getName
+      ).resolveRelation(checkFilesExist = false))
+      .select("value")
+
+    import lines.sqlContext.implicits._
+
+    lines.select(trim($"value").as("line"))
+      .filter(not((length($"line") === 0).or($"line".startsWith("#"))))
+      .as[String]
+      .rdd
+      .map(MLUtils.parseLibSVMRecord)
+  }
+
   private[spark] def parseLibSVMRecord(line: String): (Double, Array[Int], Array[Double]) = {
     val items = line.split(' ')
     val label = items.head.toDouble
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index e164d279f3f0..a67e49d54e14 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -35,15 +35,22 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    val lines =
+    val lines0 =
       """
         |1 1:1.0 3:2.0 5:3.0
         |0
+      """.stripMargin
+    val lines1 =
+      """
         |0 2:4.0 4:5.0 6:6.0
       """.stripMargin
     val dir = Utils.createDirectory(tempDir.getCanonicalPath, "data")
-    val file = new File(dir, "part-00000")
-    Files.write(lines, file, StandardCharsets.UTF_8)
+    val succ = new File(dir, "_SUCCESS")
+    val file0 = new File(dir, "part-00000")
+    val file1 = new File(dir, "part-00001")
+    Files.write("", succ, StandardCharsets.UTF_8)
+    Files.write(lines0, file0, StandardCharsets.UTF_8)
+    Files.write(lines1, file1, StandardCharsets.UTF_8)
     path = dir.toURI.toString
   }
 
@@ -145,7 +152,9 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("create libsvmTable table without schema and path") {
     try {
-      val e = intercept[IOException](spark.sql("CREATE TABLE libsvmTable USING libsvm"))
+      val e = intercept[IllegalArgumentException] {
+        spark.sql("CREATE TABLE libsvmTable USING libsvm")
+      }
       assert(e.getMessage.contains("No input path specified for libsvm data"))
     } finally {
       spark.sql("DROP TABLE IF EXISTS libsvmTable")

From 53c2eb59b2cc557081f6a252748dc38511601b0d Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 7 Jul 2017 14:05:22 +0900
Subject: [PATCH 063/125] [SPARK-21327][SQL][PYSPARK] ArrayConstructor should
 handle an array of typecode 'l' as long rather than int in Python 2.

## What changes were proposed in this pull request?

Currently `ArrayConstructor` handles an array of typecode `'l'` as `int` when converting Python object in Python 2 into Java object, so if the value is larger than `Integer.MAX_VALUE` or smaller than `Integer.MIN_VALUE` then the overflow occurs.

```python
import array
data = [Row(longarray=array.array('l', [-9223372036854775808, 0, 9223372036854775807]))]
df = spark.createDataFrame(data)
df.show(truncate=False)
```

```
+----------+
|longarray |
+----------+
|[0, 0, -1]|
+----------+
```

This should be:

```
+----------------------------------------------+
|longarray                                     |
+----------------------------------------------+
|[-9223372036854775808, 0, 9223372036854775807]|
+----------------------------------------------+
```

## How was this patch tested?

Added a test and existing tests.

Author: Takuya UESHIN <ueshin@databricks.com>

Closes #18553 from ueshin/issues/SPARK-21327.
---
 .../scala/org/apache/spark/api/python/SerDeUtil.scala  | 10 ++++++++++
 python/pyspark/sql/tests.py                            |  6 ++++++
 2 files changed, 16 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 6e4eab4b805c..42f67e8dbe86 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -73,6 +73,16 @@ private[spark] object SerDeUtil extends Logging {
         // This must be ISO 8859-1 / Latin 1, not UTF-8, to interoperate correctly
         val data = args(1).asInstanceOf[String].getBytes(StandardCharsets.ISO_8859_1)
         construct(typecode, machineCodes(typecode), data)
+      } else if (args.length == 2 && args(0) == "l") {
+        // On Python 2, an array of typecode 'l' should be handled as long rather than int.
+        val values = args(1).asInstanceOf[JArrayList[_]]
+        val result = new Array[Long](values.size)
+        var i = 0
+        while (i < values.size) {
+          result(i) = values.get(i).asInstanceOf[Number].longValue()
+          i += 1
+        }
+        result
       } else {
         super.construct(args)
       }
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index c0e3b8d13239..9db2f40474f7 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -2342,6 +2342,12 @@ def test_to_pandas(self):
         self.assertEquals(types[2], np.bool)
         self.assertEquals(types[3], np.float32)
 
+    def test_create_dataframe_from_array_of_long(self):
+        import array
+        data = [Row(longarray=array.array('l', [-9223372036854775808, 0, 9223372036854775807]))]
+        df = self.spark.createDataFrame(data)
+        self.assertEqual(df.first(), Row(longarray=[-9223372036854775808, 0, 9223372036854775807]))
+
 
 class HiveSparkSubmitTests(SparkSubmitTests):
 

From c09b31eb8fa83d5463a045c9278f5874ae505a8e Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 7 Jul 2017 13:09:32 +0800
Subject: [PATCH 064/125] [SPARK-21217][SQL] Support
 ColumnVector.Array.to<type>Array()

## What changes were proposed in this pull request?

This PR implements bulk-copy for `ColumnVector.Array.to<type>Array()` methods (e.g. `toIntArray()`) in `ColumnVector.Array` by using `System.arrayCopy()` or `Platform.copyMemory()`.

Before this PR, when one of these method is called, the generic method in `ArrayData` is called. It is not fast since element-wise copy is performed.

This PR can improve performance of a benchmark program by 1.9x and 3.2x.

Without this PR
```
OpenJDK 64-Bit Server VM 1.8.0_131-8u131-b11-0ubuntu1.16.04.2-b11 on Linux 4.4.0-66-generic
Intel(R) Xeon(R) CPU E5-2667 v3  3.20GHz

Int Array                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)
------------------------------------------------------------------------------------------------
ON_HEAP                                        586 /  628         14.3          69.9
OFF_HEAP                                       893 /  902          9.4         106.5
```

With this PR
```
OpenJDK 64-Bit Server VM 1.8.0_131-8u131-b11-0ubuntu1.16.04.2-b11 on Linux 4.4.0-66-generic
Intel(R) Xeon(R) CPU E5-2667 v3  3.20GHz

Int Array                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)
------------------------------------------------------------------------------------------------
ON_HEAP                                        306 /  331         27.4          36.4
OFF_HEAP                                       282 /  287         29.8          33.6
```

Source program
```
    (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode => {
      val len = 8 * 1024 * 1024
      val column = ColumnVector.allocate(len * 2, new ArrayType(IntegerType, false), memMode)

      val data = column.arrayData
      var i = 0
      while (i < len) {
        data.putInt(i, i)
        i += 1
      }
      column.putArray(0, 0, len)

      val benchmark = new Benchmark("Int Array", len, minNumIters = 20)
      benchmark.addCase(s"$memMode") { iter =>
        var i = 0
        while (i < 50) {
          column.getArray(0).toIntArray
          i += 1
        }
      }
      benchmark.run
    }}
```

## How was this patch tested?

Added test suite

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18425 from kiszk/SPARK-21217.
---
 .../execution/vectorized/ColumnVector.java    | 56 ++++++++++++++++++
 .../vectorized/OffHeapColumnVector.java       | 58 +++++++++++++++++++
 .../vectorized/OnHeapColumnVector.java        | 58 +++++++++++++++++++
 .../vectorized/ColumnarBatchSuite.scala       | 49 ++++++++++++++++
 4 files changed, 221 insertions(+)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
index 24260a60197f..0c027f80d48c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
@@ -100,6 +100,27 @@ public ArrayData copy() {
       throw new UnsupportedOperationException();
     }
 
+    @Override
+    public boolean[] toBooleanArray() { return data.getBooleans(offset, length); }
+
+    @Override
+    public byte[] toByteArray() { return data.getBytes(offset, length); }
+
+    @Override
+    public short[] toShortArray() { return data.getShorts(offset, length); }
+
+    @Override
+    public int[] toIntArray() { return data.getInts(offset, length); }
+
+    @Override
+    public long[] toLongArray() { return data.getLongs(offset, length); }
+
+    @Override
+    public float[] toFloatArray() { return data.getFloats(offset, length); }
+
+    @Override
+    public double[] toDoubleArray() { return data.getDoubles(offset, length); }
+
     // TODO: this is extremely expensive.
     @Override
     public Object[] array() {
@@ -366,6 +387,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract boolean getBoolean(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract boolean[] getBooleans(int rowId, int count);
+
   /**
    * Sets the value at rowId to `value`.
    */
@@ -386,6 +412,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract byte getByte(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract byte[] getBytes(int rowId, int count);
+
   /**
    * Sets the value at rowId to `value`.
    */
@@ -406,6 +437,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract short getShort(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract short[] getShorts(int rowId, int count);
+
   /**
    * Sets the value at rowId to `value`.
    */
@@ -432,6 +468,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract int getInt(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract int[] getInts(int rowId, int count);
+
   /**
    * Returns the dictionary Id for rowId.
    * This should only be called when the ColumnVector is dictionaryIds.
@@ -465,6 +506,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract long getLong(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract long[] getLongs(int rowId, int count);
+
   /**
    * Sets the value at rowId to `value`.
    */
@@ -491,6 +537,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract float getFloat(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract float[] getFloats(int rowId, int count);
+
   /**
    * Sets the value at rowId to `value`.
    */
@@ -517,6 +568,11 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
    */
   public abstract double getDouble(int rowId);
 
+  /**
+   * Gets values from [rowId, rowId + count)
+   */
+  public abstract double[] getDoubles(int rowId, int count);
+
   /**
    * Puts a byte array that already exists in this column.
    */
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index a7d3744d00e9..2d1f3da8e746 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -134,6 +134,16 @@ public void putBooleans(int rowId, int count, boolean value) {
   @Override
   public boolean getBoolean(int rowId) { return Platform.getByte(null, data + rowId) == 1; }
 
+  @Override
+  public boolean[] getBooleans(int rowId, int count) {
+    assert(dictionary == null);
+    boolean[] array = new boolean[count];
+    for (int i = 0; i < count; ++i) {
+      array[i] = (Platform.getByte(null, data + rowId + i) == 1);
+    }
+    return array;
+  }
+
   //
   // APIs dealing with Bytes
   //
@@ -165,6 +175,14 @@ public byte getByte(int rowId) {
     }
   }
 
+  @Override
+  public byte[] getBytes(int rowId, int count) {
+    assert(dictionary == null);
+    byte[] array = new byte[count];
+    Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count);
+    return array;
+  }
+
   //
   // APIs dealing with shorts
   //
@@ -197,6 +215,14 @@ public short getShort(int rowId) {
     }
   }
 
+  @Override
+  public short[] getShorts(int rowId, int count) {
+    assert(dictionary == null);
+    short[] array = new short[count];
+    Platform.copyMemory(null, data + rowId * 2, array, Platform.SHORT_ARRAY_OFFSET, count * 2);
+    return array;
+  }
+
   //
   // APIs dealing with ints
   //
@@ -244,6 +270,14 @@ public int getInt(int rowId) {
     }
   }
 
+  @Override
+  public int[] getInts(int rowId, int count) {
+    assert(dictionary == null);
+    int[] array = new int[count];
+    Platform.copyMemory(null, data + rowId * 4, array, Platform.INT_ARRAY_OFFSET, count * 4);
+    return array;
+  }
+
   /**
    * Returns the dictionary Id for rowId.
    * This should only be called when the ColumnVector is dictionaryIds.
@@ -302,6 +336,14 @@ public long getLong(int rowId) {
     }
   }
 
+  @Override
+  public long[] getLongs(int rowId, int count) {
+    assert(dictionary == null);
+    long[] array = new long[count];
+    Platform.copyMemory(null, data + rowId * 8, array, Platform.LONG_ARRAY_OFFSET, count * 8);
+    return array;
+  }
+
   //
   // APIs dealing with floats
   //
@@ -348,6 +390,14 @@ public float getFloat(int rowId) {
     }
   }
 
+  @Override
+  public float[] getFloats(int rowId, int count) {
+    assert(dictionary == null);
+    float[] array = new float[count];
+    Platform.copyMemory(null, data + rowId * 4, array, Platform.FLOAT_ARRAY_OFFSET, count * 4);
+    return array;
+  }
+
 
   //
   // APIs dealing with doubles
@@ -395,6 +445,14 @@ public double getDouble(int rowId) {
     }
   }
 
+  @Override
+  public double[] getDoubles(int rowId, int count) {
+    assert(dictionary == null);
+    double[] array = new double[count];
+    Platform.copyMemory(null, data + rowId * 8, array, Platform.DOUBLE_ARRAY_OFFSET, count * 8);
+    return array;
+  }
+
   //
   // APIs dealing with Arrays.
   //
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 94ed32294cfa..506434364be4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -130,6 +130,16 @@ public boolean getBoolean(int rowId) {
     return byteData[rowId] == 1;
   }
 
+  @Override
+  public boolean[] getBooleans(int rowId, int count) {
+    assert(dictionary == null);
+    boolean[] array = new boolean[count];
+    for (int i = 0; i < count; ++i) {
+      array[i] = (byteData[rowId + i] == 1);
+    }
+   return array;
+  }
+
   //
 
   //
@@ -162,6 +172,14 @@ public byte getByte(int rowId) {
     }
   }
 
+  @Override
+  public byte[] getBytes(int rowId, int count) {
+    assert(dictionary == null);
+    byte[] array = new byte[count];
+    System.arraycopy(byteData, rowId, array, 0, count);
+    return array;
+  }
+
   //
   // APIs dealing with Shorts
   //
@@ -192,6 +210,14 @@ public short getShort(int rowId) {
     }
   }
 
+  @Override
+  public short[] getShorts(int rowId, int count) {
+    assert(dictionary == null);
+    short[] array = new short[count];
+    System.arraycopy(shortData, rowId, array, 0, count);
+    return array;
+  }
+
 
   //
   // APIs dealing with Ints
@@ -234,6 +260,14 @@ public int getInt(int rowId) {
     }
   }
 
+  @Override
+  public int[] getInts(int rowId, int count) {
+    assert(dictionary == null);
+    int[] array = new int[count];
+    System.arraycopy(intData, rowId, array, 0, count);
+    return array;
+  }
+
   /**
    * Returns the dictionary Id for rowId.
    * This should only be called when the ColumnVector is dictionaryIds.
@@ -286,6 +320,14 @@ public long getLong(int rowId) {
     }
   }
 
+  @Override
+  public long[] getLongs(int rowId, int count) {
+    assert(dictionary == null);
+    long[] array = new long[count];
+    System.arraycopy(longData, rowId, array, 0, count);
+    return array;
+  }
+
   //
   // APIs dealing with floats
   //
@@ -325,6 +367,14 @@ public float getFloat(int rowId) {
     }
   }
 
+  @Override
+  public float[] getFloats(int rowId, int count) {
+    assert(dictionary == null);
+    float[] array = new float[count];
+    System.arraycopy(floatData, rowId, array, 0, count);
+    return array;
+  }
+
   //
   // APIs dealing with doubles
   //
@@ -366,6 +416,14 @@ public double getDouble(int rowId) {
     }
   }
 
+  @Override
+  public double[] getDoubles(int rowId, int count) {
+    assert(dictionary == null);
+    double[] array = new double[count];
+    System.arraycopy(doubleData, rowId, array, 0, count);
+    return array;
+  }
+
   //
   // APIs dealing with Arrays
   //
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 80d41577dcf2..ccf7aa7022a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -709,6 +709,55 @@ class ColumnarBatchSuite extends SparkFunSuite {
     }}
   }
 
+  test("toArray for primitive types") {
+    // (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode => {
+    (MemoryMode.ON_HEAP :: Nil).foreach { memMode => {
+      val len = 4
+
+      val columnBool = ColumnVector.allocate(len, new ArrayType(BooleanType, false), memMode)
+      val boolArray = Array(false, true, false, true)
+      boolArray.zipWithIndex.map { case (v, i) => columnBool.arrayData.putBoolean(i, v) }
+      columnBool.putArray(0, 0, len)
+      assert(columnBool.getArray(0).toBooleanArray === boolArray)
+
+      val columnByte = ColumnVector.allocate(len, new ArrayType(ByteType, false), memMode)
+      val byteArray = Array[Byte](0, 1, 2, 3)
+      byteArray.zipWithIndex.map { case (v, i) => columnByte.arrayData.putByte(i, v) }
+      columnByte.putArray(0, 0, len)
+      assert(columnByte.getArray(0).toByteArray === byteArray)
+
+      val columnShort = ColumnVector.allocate(len, new ArrayType(ShortType, false), memMode)
+      val shortArray = Array[Short](0, 1, 2, 3)
+      shortArray.zipWithIndex.map { case (v, i) => columnShort.arrayData.putShort(i, v) }
+      columnShort.putArray(0, 0, len)
+      assert(columnShort.getArray(0).toShortArray === shortArray)
+
+      val columnInt = ColumnVector.allocate(len, new ArrayType(IntegerType, false), memMode)
+      val intArray = Array(0, 1, 2, 3)
+      intArray.zipWithIndex.map { case (v, i) => columnInt.arrayData.putInt(i, v) }
+      columnInt.putArray(0, 0, len)
+      assert(columnInt.getArray(0).toIntArray === intArray)
+
+      val columnLong = ColumnVector.allocate(len, new ArrayType(LongType, false), memMode)
+      val longArray = Array[Long](0, 1, 2, 3)
+      longArray.zipWithIndex.map { case (v, i) => columnLong.arrayData.putLong(i, v) }
+      columnLong.putArray(0, 0, len)
+      assert(columnLong.getArray(0).toLongArray === longArray)
+
+      val columnFloat = ColumnVector.allocate(len, new ArrayType(FloatType, false), memMode)
+      val floatArray = Array(0.0F, 1.1F, 2.2F, 3.3F)
+      floatArray.zipWithIndex.map { case (v, i) => columnFloat.arrayData.putFloat(i, v) }
+      columnFloat.putArray(0, 0, len)
+      assert(columnFloat.getArray(0).toFloatArray === floatArray)
+
+      val columnDouble = ColumnVector.allocate(len, new ArrayType(DoubleType, false), memMode)
+      val doubleArray = Array(0.0, 1.1, 2.2, 3.3)
+      doubleArray.zipWithIndex.map { case (v, i) => columnDouble.arrayData.putDouble(i, v) }
+      columnDouble.putArray(0, 0, len)
+      assert(columnDouble.getArray(0).toDoubleArray === doubleArray)
+    }}
+  }
+
   test("Struct Column") {
     (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode => {
       val schema = new StructType().add("int", IntegerType).add("double", DoubleType)

From 5df99bd364561c6f4c02308149ba5eb71f89247e Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 7 Jul 2017 13:12:20 +0800
Subject: [PATCH 065/125] [SPARK-20703][SQL][FOLLOW-UP] Associate metrics with
 data writes onto DataFrameWriter operations

## What changes were proposed in this pull request?

Remove time metrics since it seems no way to measure it in non per-row tracking.

## How was this patch tested?

Existing tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18558 from viirya/SPARK-20703-followup.
---
 .../command/DataWritingCommand.scala          | 10 ---------
 .../datasources/FileFormatWriter.scala        | 22 +++----------------
 .../sql/hive/execution/SQLMetricsSuite.scala  |  3 ---
 3 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
index 0c381a2c0298..700f7f81dc8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
@@ -30,7 +30,6 @@ trait DataWritingCommand extends RunnableCommand {
   override lazy val metrics: Map[String, SQLMetric] = {
     val sparkContext = SparkContext.getActive.get
     Map(
-      "avgTime" -> SQLMetrics.createMetric(sparkContext, "average writing time (ms)"),
       "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of written files"),
       "numOutputBytes" -> SQLMetrics.createMetric(sparkContext, "bytes of written output"),
       "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
@@ -47,23 +46,14 @@ trait DataWritingCommand extends RunnableCommand {
     var numFiles = 0
     var totalNumBytes: Long = 0L
     var totalNumOutput: Long = 0L
-    var totalWritingTime: Long = 0L
 
     writeSummaries.foreach { summary =>
       numPartitions += summary.updatedPartitions.size
       numFiles += summary.numOutputFile
       totalNumBytes += summary.numOutputBytes
       totalNumOutput += summary.numOutputRows
-      totalWritingTime += summary.totalWritingTime
     }
 
-    val avgWritingTime = if (numFiles > 0) {
-      (totalWritingTime / numFiles).toLong
-    } else {
-      0L
-    }
-
-    metrics("avgTime").add(avgWritingTime)
     metrics("numFiles").add(numFiles)
     metrics("numOutputBytes").add(totalNumBytes)
     metrics("numOutputRows").add(totalNumOutput)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 64866630623a..9eb9eae699e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -275,8 +275,6 @@ object FileFormatWriter extends Logging {
     /**
      * The data structures used to measure metrics during writing.
      */
-    protected var totalWritingTime: Long = 0L
-    protected var timeOnCurrentFile: Long = 0L
     protected var numOutputRows: Long = 0L
     protected var numOutputBytes: Long = 0L
 
@@ -343,9 +341,7 @@ object FileFormatWriter extends Logging {
         }
 
         val internalRow = iter.next()
-        val startTime = System.nanoTime()
         currentWriter.write(internalRow)
-        timeOnCurrentFile += (System.nanoTime() - startTime)
         recordsInFile += 1
       }
       releaseResources()
@@ -355,17 +351,13 @@ object FileFormatWriter extends Logging {
         updatedPartitions = Set.empty,
         numOutputFile = fileCounter + 1,
         numOutputBytes = numOutputBytes,
-        numOutputRows = numOutputRows,
-        totalWritingTime = totalWritingTime)
+        numOutputRows = numOutputRows)
     }
 
     override def releaseResources(): Unit = {
       if (currentWriter != null) {
         try {
-          val startTime = System.nanoTime()
           currentWriter.close()
-          totalWritingTime += (timeOnCurrentFile + System.nanoTime() - startTime) / 1000 / 1000
-          timeOnCurrentFile = 0
           numOutputBytes += getFileSize(taskAttemptContext.getConfiguration, currentPath)
         } finally {
           currentWriter = null
@@ -504,9 +496,7 @@ object FileFormatWriter extends Logging {
           releaseResources()
           newOutputWriter(currentPartColsAndBucketId, getPartPath, fileCounter, updatedPartitions)
         }
-        val startTime = System.nanoTime()
         currentWriter.write(getOutputRow(row))
-        timeOnCurrentFile += (System.nanoTime() - startTime)
         recordsInFile += 1
       }
       if (currentPartColsAndBucketId != null) {
@@ -519,17 +509,13 @@ object FileFormatWriter extends Logging {
         updatedPartitions = updatedPartitions.toSet,
         numOutputFile = totalFileCounter,
         numOutputBytes = numOutputBytes,
-        numOutputRows = numOutputRows,
-        totalWritingTime = totalWritingTime)
+        numOutputRows = numOutputRows)
     }
 
     override def releaseResources(): Unit = {
       if (currentWriter != null) {
         try {
-          val startTime = System.nanoTime()
           currentWriter.close()
-          totalWritingTime += (timeOnCurrentFile + System.nanoTime() - startTime) / 1000 / 1000
-          timeOnCurrentFile = 0
           numOutputBytes += getFileSize(taskAttemptContext.getConfiguration, currentPath)
         } finally {
           currentWriter = null
@@ -547,11 +533,9 @@ object FileFormatWriter extends Logging {
  * @param numOutputFile the total number of files.
  * @param numOutputRows the number of output rows.
  * @param numOutputBytes the bytes of output data.
- * @param totalWritingTime the total writing time in ms.
  */
 case class ExecutedWriteSummary(
   updatedPartitions: Set[String],
   numOutputFile: Int,
   numOutputRows: Long,
-  numOutputBytes: Long,
-  totalWritingTime: Long)
+  numOutputBytes: Long)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
index 1ef1988d4c60..24c038587d1d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
@@ -65,9 +65,6 @@ class SQLMetricsSuite extends SQLTestUtils with TestHiveSingleton {
     val totalNumBytesMetric = executedNode.metrics.find(_.name == "bytes of written output").get
     val totalNumBytes = metrics(totalNumBytesMetric.accumulatorId).replaceAll(",", "").toInt
     assert(totalNumBytes > 0)
-    val writingTimeMetric = executedNode.metrics.find(_.name == "average writing time (ms)").get
-    val writingTime = metrics(writingTimeMetric.accumulatorId).replaceAll(",", "").toInt
-    assert(writingTime >= 0)
   }
 
   private def testMetricsNonDynamicPartition(

From 7fcbb9b57f5eba8b14bf7d86ebaa08a8ee937cd2 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Fri, 7 Jul 2017 08:31:30 +0100
Subject: [PATCH 066/125] [SPARK-21313][SS] ConsoleSink's string representation

## What changes were proposed in this pull request?

Add `toString` with options for `ConsoleSink` so it shows nicely in query progress.

**BEFORE**

```
  "sink" : {
    "description" : "org.apache.spark.sql.execution.streaming.ConsoleSink4b340441"
  }
```

**AFTER**

```
  "sink" : {
    "description" : "ConsoleSink[numRows=10, truncate=false]"
  }
```

/cc zsxwing tdas

## How was this patch tested?

Local build

Author: Jacek Laskowski <jacek@japila.pl>

Closes #18539 from jaceklaskowski/SPARK-21313-ConsoleSink-toString.
---
 .../org/apache/spark/sql/execution/streaming/ForeachSink.scala  | 2 ++
 .../org/apache/spark/sql/execution/streaming/console.scala      | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
index de09fb568d2a..2cc54107f8b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
@@ -63,4 +63,6 @@ class ForeachSink[T : Encoder](writer: ForeachWriter[T]) extends Sink with Seria
       }
     }
   }
+
+  override def toString(): String = "ForeachSink"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 3baea6376069..1c9284e252bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -52,6 +52,8 @@ class ConsoleSink(options: Map[String, String]) extends Sink with Logging {
       data.sparkSession.sparkContext.parallelize(data.collect()), data.schema)
       .show(numRowsToShow, isTruncated)
   }
+
+  override def toString(): String = s"ConsoleSink[numRows=$numRowsToShow, truncate=$isTruncated]"
 }
 
 case class ConsoleRelation(override val sqlContext: SQLContext, data: DataFrame)

From 56536e9992ac4ea771758463962e49bba410e896 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 7 Jul 2017 18:32:01 +0800
Subject: [PATCH 067/125] [SPARK-21285][ML] VectorAssembler reports the column
 name of unsupported data type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
add the column name in the exception which is raised by unsupported data type.

## How was this patch tested?
+ [x] pass all tests.

Author: Yan Facai (颜发才) <facai.yan@gmail.com>

Closes #18523 from facaiy/ENH/vectorassembler_add_col.
---
 .../apache/spark/ml/feature/VectorAssembler.scala | 15 +++++++++------
 .../spark/ml/feature/VectorAssemblerSuite.scala   |  5 ++++-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index ca900536bc7b..73f27d1a423d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   override def transformSchema(schema: StructType): StructType = {
     val inputColNames = $(inputCols)
     val outputColName = $(outputCol)
-    val inputDataTypes = inputColNames.map(name => schema(name).dataType)
-    inputDataTypes.foreach {
-      case _: NumericType | BooleanType =>
-      case t if t.isInstanceOf[VectorUDT] =>
-      case other =>
-        throw new IllegalArgumentException(s"Data type $other is not supported.")
+    val incorrectColumns = inputColNames.flatMap { name =>
+      schema(name).dataType match {
+        case _: NumericType | BooleanType => None
+        case t if t.isInstanceOf[VectorUDT] => None
+        case other => Some(s"Data type $other of column $name is not supported.")
+      }
+    }
+    if (incorrectColumns.nonEmpty) {
+      throw new IllegalArgumentException(incorrectColumns.mkString("\n"))
     }
     if (schema.fieldNames.contains(outputColName)) {
       throw new IllegalArgumentException(s"Output column $outputColName already exists.")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 46cced3a9a6e..6aef1c683702 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -79,7 +79,10 @@ class VectorAssemblerSuite
     val thrown = intercept[IllegalArgumentException] {
       assembler.transform(df)
     }
-    assert(thrown.getMessage contains "Data type StringType is not supported")
+    assert(thrown.getMessage contains
+      "Data type StringType of column a is not supported.\n" +
+      "Data type StringType of column b is not supported.\n" +
+      "Data type StringType of column c is not supported.")
   }
 
   test("ML attributes") {

From fef081309fc28efe8e136f363d85d7ccd9466e61 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 7 Jul 2017 20:04:30 +0800
Subject: [PATCH 068/125] [SPARK-21335][SQL] support un-aliased subquery

## What changes were proposed in this pull request?

un-aliased subquery is supported by Spark SQL for a long time. Its semantic was not well defined and had confusing behaviors, and it's not a standard SQL syntax, so we disallowed it in https://issues.apache.org/jira/browse/SPARK-20690 .

However, this is a breaking change, and we do have existing queries using un-aliased subquery. We should add the support back and fix its semantic.

This PR fixes the un-aliased subquery by assigning a default alias name.

After this PR, there is no syntax change from branch 2.2 to master, but we invalid a weird use case:
`SELECT v.i from (SELECT i FROM v)`. Now this query will throw analysis exception because users should not be able to use the qualifier inside a subquery.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18559 from cloud-fan/sub-query.
---
 .../sql/catalyst/parser/AstBuilder.scala      | 16 ++--
 .../catalyst/plans/logical/LogicalPlan.scala  |  2 +-
 .../sql/catalyst/parser/PlanParserSuite.scala | 13 ---
 .../resources/sql-tests/inputs/group-by.sql   |  2 +-
 .../test/resources/sql-tests/inputs/limit.sql |  2 +-
 .../sql-tests/inputs/string-functions.sql     |  2 +-
 .../in-subquery/in-set-operations.sql         |  2 +-
 .../negative-cases/invalid-correlation.sql    |  2 +-
 .../scalar-subquery-predicate.sql             |  2 +-
 .../test/resources/sql-tests/inputs/union.sql |  4 +-
 .../results/columnresolution-negative.sql.out | 16 ++--
 .../sql-tests/results/group-by.sql.out        |  2 +-
 .../resources/sql-tests/results/limit.sql.out |  2 +-
 .../results/string-functions.sql.out          |  6 +-
 .../in-subquery/in-set-operations.sql.out     |  2 +-
 .../invalid-correlation.sql.out               |  2 +-
 .../scalar-subquery-predicate.sql.out         |  2 +-
 .../results/subquery/subquery-in-from.sql.out | 20 +----
 .../resources/sql-tests/results/union.sql.out |  4 +-
 .../apache/spark/sql/CachedTableSuite.scala   | 82 ++++++-------------
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 13 +++
 .../org/apache/spark/sql/SubquerySuite.scala  |  8 +-
 22 files changed, 83 insertions(+), 123 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b6a4686bb9ec..4d725904bc9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -751,15 +751,17 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * hooks.
    */
   override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
-    // The unaliased subqueries in the FROM clause are disallowed. Instead of rejecting it in
-    // parser rules, we handle it here in order to provide better error message.
-    if (ctx.strictIdentifier == null) {
-      throw new ParseException("The unaliased subqueries in the FROM clause are not supported.",
-        ctx)
+    val alias = if (ctx.strictIdentifier == null) {
+      // For un-aliased subqueries, use a default alias name that is not likely to conflict with
+      // normal subquery names, so that parent operators can only access the columns in subquery by
+      // unqualified names. Users can still use this special qualifier to access columns if they
+      // know it, but that's not recommended.
+      "__auto_generated_subquery_name"
+    } else {
+      ctx.strictIdentifier.getText
     }
 
-    aliasPlan(ctx.strictIdentifier,
-      plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
+    SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 8649603b1a9f..9b440cd99f99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -253,7 +253,7 @@ abstract class LogicalPlan
 
       // More than one match.
       case ambiguousReferences =>
-        val referenceNames = ambiguousReferences.map(_._1).mkString(", ")
+        val referenceNames = ambiguousReferences.map(_._1.qualifiedName).mkString(", ")
         throw new AnalysisException(
           s"Reference '$name' is ambiguous, could be: $referenceNames.")
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 5b2573fa4d60..6dad097041a1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -450,19 +450,6 @@ class PlanParserSuite extends AnalysisTest {
         |      (select id from t0)) as u_1
       """.stripMargin,
       plan.union(plan).union(plan).as("u_1").select('id))
-
-  }
-
-  test("aliased subquery") {
-    val errMsg = "The unaliased subqueries in the FROM clause are not supported"
-
-    assertEqual("select a from (select id as a from t0) tt",
-      table("t0").select('id.as("a")).as("tt").select('a))
-    intercept("select a from (select id as a from t0)", errMsg)
-
-    assertEqual("from (select id as a from t0) tt select a",
-      table("t0").select('id.as("a")).as("tt").select('a))
-    intercept("from (select id as a from t0) select a", errMsg)
   }
 
   test("scalar sub-query") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index bc2120727dac..1e1384549a41 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -34,7 +34,7 @@ SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a),
 FROM testData;
 
 -- Aggregate with foldable input and multiple distinct groups.
-SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a;
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
 
 -- Aliases in SELECT could be used in GROUP BY
 SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql
index df555bdc1976..f21912a04271 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql
@@ -21,7 +21,7 @@ SELECT * FROM testdata LIMIT true;
 SELECT * FROM testdata LIMIT 'a';
 
 -- limit within a subquery
-SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3;
+SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3;
 
 -- limit ALL
 SELECT * FROM testdata WHERE key < 3 LIMIT ALL;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 20c039066403..c95f4817b7ce 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -7,7 +7,7 @@ select 'a' || 'b' || 'c';
 
 -- Check if catalyst combine nested `Concat`s
 EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t;
+FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10));
 
 -- replace function
 select replace('abc', 'b', '123');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
index 42f84e974871..5c371d2305ac 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
@@ -394,7 +394,7 @@ FROM   (SELECT *
                              FROM   t1)) t4
         WHERE  t4.t2b IN (SELECT Min(t3b)
                           FROM   t3
-                          WHERE  t4.t2a = t3a)) T;
+                          WHERE  t4.t2a = t3a));
 
 -- UNION, UNION ALL, UNION DISTINCT, INTERSECT and EXCEPT for NOT IN
 -- TC 01.12
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
index f3f0c7622ccd..e22cade93679 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
@@ -23,7 +23,7 @@ AND    t2b = (SELECT max(avg)
               FROM   (SELECT   t2b, avg(t2b) avg
                       FROM     t2
                       WHERE    t2a = t1.t1b
-                     ) T
+                     )
              )
 ;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index dbe8d76d2f11..fb0d07fbdace 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -19,7 +19,7 @@ AND    c.cv = (SELECT max(avg)
                FROM   (SELECT   c1.cv, avg(c1.cv) avg
                        FROM     c c1
                        WHERE    c1.ck = p.pk
-                       GROUP BY c1.cv) T);
+                       GROUP BY c1.cv));
 
 create temporary view t1 as select * from values
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
index 63bc044535e4..e57d69eaad03 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/union.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -5,7 +5,7 @@ CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2);
 SELECT *
 FROM   (SELECT * FROM t1
         UNION ALL
-        SELECT * FROM t1) T;
+        SELECT * FROM t1);
 
 -- Type Coerced Union
 SELECT *
@@ -13,7 +13,7 @@ FROM   (SELECT * FROM t1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
-        SELECT * FROM t2) T;
+        SELECT * FROM t2);
 
 -- Regression test for SPARK-18622
 SELECT a
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 9e60e592c2bd..b5a4f5c2bf65 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -72,7 +72,7 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query 8 output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 9
@@ -81,7 +81,7 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query 9 output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 10
@@ -99,7 +99,7 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 12
@@ -108,7 +108,7 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 13
@@ -125,7 +125,7 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query 14 output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 15
@@ -134,7 +134,7 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query 15 output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 16
@@ -143,7 +143,7 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query 16 output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 'i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 17
@@ -152,7 +152,7 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query 17 output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: i1#x, i1#x.; line 1 pos 7
+Reference 't1.i1' is ambiguous, could be: t1.i1, t1.i1.; line 1 pos 7
 
 
 -- !query 18
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index e23ebd4e822f..986bb01c13fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -134,7 +134,7 @@ struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min
 
 
 -- !query 14
-SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t GROUP BY a
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
 -- !query 14 schema
 struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
 -- !query 14 output
diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
index afdd6df2a571..146abe6cbd05 100644
--- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
@@ -93,7 +93,7 @@ The limit expression must be integer type, but got string;
 
 
 -- !query 10
-SELECT * FROM (SELECT * FROM range(10) LIMIT 5) t WHERE id > 3
+SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3
 -- !query 10 schema
 struct<id:bigint>
 -- !query 10 output
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 52eb554edf89..b0ae9d775d96 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -30,20 +30,20 @@ abc
 
 -- !query 3
 EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col
-FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) t
+FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10))
 -- !query 3 schema
 struct<plan:string>
 -- !query 3 output
 == Parsed Logical Plan ==
 'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x]
-+- 'SubqueryAlias t
++- 'SubqueryAlias __auto_generated_subquery_name
    +- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x]
       +- 'UnresolvedTableValuedFunction range, [10]
 
 == Analyzed Logical Plan ==
 col: string
 Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x]
-+- SubqueryAlias t
++- SubqueryAlias __auto_generated_subquery_name
    +- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL]
       +- Range (0, 10, step=1, splits=None)
 
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
index 5780f49648ec..e06f9206d340 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
@@ -496,7 +496,7 @@ FROM   (SELECT *
                              FROM   t1)) t4
         WHERE  t4.t2b IN (SELECT Min(t3b)
                           FROM   t3
-                          WHERE  t4.t2a = t3a)) T
+                          WHERE  t4.t2a = t3a))
 -- !query 13 schema
 struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(2,-2),t2h:timestamp,t2i:date>
 -- !query 13 output
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index ca3930b33e06..e4b1a2dbc675 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -40,7 +40,7 @@ AND    t2b = (SELECT max(avg)
               FROM   (SELECT   t2b, avg(t2b) avg
                       FROM     t2
                       WHERE    t2a = t1.t1b
-                     ) T
+                     )
              )
 -- !query 3 schema
 struct<>
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index 1d5dddca76a1..8b29300e71f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -39,7 +39,7 @@ AND    c.cv = (SELECT max(avg)
                FROM   (SELECT   c1.cv, avg(c1.cv) avg
                        FROM     c c1
                        WHERE    c1.ck = p.pk
-                       GROUP BY c1.cv) T)
+                       GROUP BY c1.cv))
 -- !query 3 schema
 struct<pk:int,cv:int>
 -- !query 3 output
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
index 14553557d1ff..50370df34916 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
@@ -37,26 +37,14 @@ struct<key:int,value:string>
 -- !query 4
 SELECT * FROM (SELECT * FROM testData) WHERE key = 1
 -- !query 4 schema
-struct<>
+struct<key:int,value:string>
 -- !query 4 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-The unaliased subqueries in the FROM clause are not supported.(line 1, pos 14)
-
-== SQL ==
-SELECT * FROM (SELECT * FROM testData) WHERE key = 1
---------------^^^
+1	1
 
 
 -- !query 5
 FROM (SELECT * FROM testData WHERE key = 1) SELECT *
 -- !query 5 schema
-struct<>
+struct<key:int,value:string>
 -- !query 5 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-The unaliased subqueries in the FROM clause are not supported.(line 1, pos 5)
-
-== SQL ==
-FROM (SELECT * FROM testData WHERE key = 1) SELECT *
------^^^
+1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index 865b3aed65d7..d123b7fdbe0c 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -22,7 +22,7 @@ struct<>
 SELECT *
 FROM   (SELECT * FROM t1
         UNION ALL
-        SELECT * FROM t1) T
+        SELECT * FROM t1)
 -- !query 2 schema
 struct<c1:int,c2:string>
 -- !query 2 output
@@ -38,7 +38,7 @@ FROM   (SELECT * FROM t1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
-        SELECT * FROM t2) T
+        SELECT * FROM t2)
 -- !query 3 schema
 struct<c1:decimal(11,1),c2:string>
 -- !query 3 output
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 506cc2548e26..3e4f61943159 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -631,13 +631,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       val ds2 =
         sql(
           """
-            |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
+            |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1)
             |WHERE
-            |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
+            |c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
             |OR
             |EXISTS (SELECT c1 FROM t3)
             |OR
-            |tt.c1 IN (SELECT c1 FROM t4)
+            |c1 IN (SELECT c1 FROM t4)
           """.stripMargin)
       assert(getNumInMemoryRelations(ds2) == 4)
     }
@@ -683,20 +683,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       Seq(1).toDF("c1").createOrReplaceTempView("t1")
       Seq(2).toDF("c1").createOrReplaceTempView("t2")
 
-      sql(
+      val sql1 =
         """
           |SELECT * FROM t1
           |WHERE
           |NOT EXISTS (SELECT * FROM t2)
-        """.stripMargin).cache()
+        """.stripMargin
+      sql(sql1).cache()
 
-      val cachedDs =
-        sql(
-          """
-            |SELECT * FROM t1
-            |WHERE
-            |NOT EXISTS (SELECT * FROM t2)
-          """.stripMargin)
+      val cachedDs = sql(sql1)
       assert(getNumInMemoryRelations(cachedDs) == 1)
 
       // Additional predicate in the subquery plan should cause a cache miss
@@ -717,20 +712,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       Seq(1).toDF("c1").createOrReplaceTempView("t2")
 
       // Simple correlated predicate in subquery
-      sql(
+      val sqlText =
         """
           |SELECT * FROM t1
           |WHERE
           |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1)
-        """.stripMargin).cache()
+        """.stripMargin
+      sql(sqlText).cache()
 
-      val cachedDs =
-        sql(
-          """
-            |SELECT * FROM t1
-            |WHERE
-            |t1.c1 in (SELECT t2.c1 FROM t2 where t1.c1 = t2.c1)
-          """.stripMargin)
+      val cachedDs = sql(sqlText)
       assert(getNumInMemoryRelations(cachedDs) == 1)
     }
   }
@@ -741,22 +731,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       spark.catalog.cacheTable("t1")
 
       // underlying table t1 is cached as well as the query that refers to it.
-      val ds =
-      sql(
+      val sqlText =
         """
           |SELECT * FROM t1
           |WHERE
           |NOT EXISTS (SELECT * FROM t1)
-        """.stripMargin)
+        """.stripMargin
+      val ds = sql(sqlText)
       assert(getNumInMemoryRelations(ds) == 2)
 
-      val cachedDs =
-        sql(
-          """
-            |SELECT * FROM t1
-            |WHERE
-            |NOT EXISTS (SELECT * FROM t1)
-          """.stripMargin).cache()
+      val cachedDs = sql(sqlText).cache()
       assert(getNumInMemoryTablesRecursively(cachedDs.queryExecution.sparkPlan) == 3)
     }
   }
@@ -769,45 +753,31 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       Seq(1).toDF("c1").createOrReplaceTempView("t4")
 
       // Nested predicate subquery
-      sql(
+      val sql1 =
         """
           |SELECT * FROM t1
           |WHERE
           |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1))
-        """.stripMargin).cache()
+        """.stripMargin
+      sql(sql1).cache()
 
-      val cachedDs =
-        sql(
-          """
-            |SELECT * FROM t1
-            |WHERE
-            |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1))
-          """.stripMargin)
+      val cachedDs = sql(sql1)
       assert(getNumInMemoryRelations(cachedDs) == 1)
 
       // Scalar subquery and predicate subquery
-      sql(
+      val sql2 =
         """
-          |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
+          |SELECT * FROM (SELECT c1, max(c1) FROM t1 GROUP BY c1)
           |WHERE
-          |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
+          |c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
           |OR
           |EXISTS (SELECT c1 FROM t3)
           |OR
-          |tt.c1 IN (SELECT c1 FROM t4)
-        """.stripMargin).cache()
+          |c1 IN (SELECT c1 FROM t4)
+        """.stripMargin
+      sql(sql2).cache()
 
-      val cachedDs2 =
-        sql(
-          """
-            |SELECT * FROM (SELECT max(c1) as c1 FROM t1 GROUP BY c1) tt
-            |WHERE
-            |tt.c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
-            |OR
-            |EXISTS (SELECT c1 FROM t3)
-            |OR
-            |tt.c1 IN (SELECT c1 FROM t4)
-          """.stripMargin)
+      val cachedDs2 = sql(sql2)
       assert(getNumInMemoryRelations(cachedDs2) == 1)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 5171aaebc990..472ff7385b19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2638,4 +2638,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-21335: support un-aliased subquery") {
+    withTempView("v") {
+      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
+      checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1))
+
+      val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)"))
+      assert(e.message ==
+        "cannot resolve '`v.i`' given input columns: [__auto_generated_subquery_name.i]")
+
+      checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index c0a3b5add313..7bcb419e8df6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -112,7 +112,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
           |   with t4 as (select 1 as d, 3 as e)
           |   select * from t4 cross join t2 where t2.b = t4.d
           | )
-          | select a from (select 1 as a union all select 2 as a) t
+          | select a from (select 1 as a union all select 2 as a)
           | where a = (select max(d) from t3)
         """.stripMargin),
       Array(Row(1))
@@ -606,8 +606,8 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
             |    select cntPlusOne + 1 as cntPlusTwo from (
             |        select cnt + 1 as cntPlusOne from (
             |            select sum(r.c) s, count(*) cnt from r where l.a = r.c having cnt = 0
-            |        ) t1
-            |    ) t2
+            |        )
+            |    )
             |) = 2""".stripMargin),
       Row(1) :: Row(1) :: Row(null) :: Row(null) :: Nil)
   }
@@ -655,7 +655,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
           """
             | select c1 from onerow t1
             | where exists (select 1
-            |               from   (select 1 as c1 from onerow t2 LIMIT 1) t2
+            |               from   (select c1 from onerow t2 LIMIT 1) t2
             |               where  t1.c1=t2.c1)""".stripMargin),
         Row(1) :: Nil)
     }

From fbbe37ed416f2ca9d8fc713a135b335b8a0247bf Mon Sep 17 00:00:00 2001
From: CodingCat <zhunansjtu@gmail.com>
Date: Fri, 7 Jul 2017 20:10:24 +0800
Subject: [PATCH 069/125] [SPARK-19358][CORE] LiveListenerBus shall log the
 event name when dropping them due to a fully filled queue

## What changes were proposed in this pull request?

Some dropped event will make the whole application behaves unexpectedly, e.g. some UI problem...we shall log the dropped event name to facilitate the debugging

## How was this patch tested?

Existing tests

Author: CodingCat <zhunansjtu@gmail.com>

Closes #16697 from CodingCat/SPARK-19358.
---
 .../main/scala/org/apache/spark/scheduler/LiveListenerBus.scala  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index f0887e090b95..0dd63d439280 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -232,6 +232,7 @@ private[spark] class LiveListenerBus(conf: SparkConf) extends SparkListenerBus {
         "This likely means one of the SparkListeners is too slow and cannot keep up with " +
         "the rate at which tasks are being started by the scheduler.")
     }
+    logTrace(s"Dropping event $event")
   }
 }
 

From a0fe32a219253f0abe9d67cf178c73daf5f6fcc1 Mon Sep 17 00:00:00 2001
From: Wang Gengliang <ltnwgl@gmail.com>
Date: Fri, 7 Jul 2017 15:39:29 -0700
Subject: [PATCH 070/125] [SPARK-21336] Revise rand comparison in
 BatchEvalPythonExecSuite

## What changes were proposed in this pull request?

Revise rand comparison in BatchEvalPythonExecSuite

In BatchEvalPythonExecSuite, there are two cases using the case "rand() > 3"
Rand() generates a random value in [0, 1), it is wired to be compared with 3, use 0.3 instead

## How was this patch tested?

unit test

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Wang Gengliang <ltnwgl@gmail.com>

Closes #18560 from gengliangwang/revise_BatchEvalPythonExecSuite.
---
 .../spark/sql/execution/python/BatchEvalPythonExecSuite.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
index 80ef4eb75ca5..bbd9484271a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
@@ -65,7 +65,7 @@ class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext {
 
   test("Python UDF: no push down on non-deterministic") {
     val df = Seq(("Hello", 4)).toDF("a", "b")
-      .where("b > 4 and dummyPythonUDF(a) and rand() > 3")
+      .where("b > 4 and dummyPythonUDF(a) and rand() > 0.3")
     val qualifiedPlanNodes = df.queryExecution.executedPlan.collect {
       case f @ FilterExec(
           And(_: AttributeReference, _: GreaterThan),
@@ -77,7 +77,7 @@ class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext {
 
   test("Python UDF: no push down on predicates starting from the first non-deterministic") {
     val df = Seq(("Hello", 4)).toDF("a", "b")
-      .where("dummyPythonUDF(a) and rand() > 3 and b > 4")
+      .where("dummyPythonUDF(a) and rand() > 0.3 and b > 4")
     val qualifiedPlanNodes = df.queryExecution.executedPlan.collect {
       case f @ FilterExec(And(_: And, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f
     }

From e1a172c201d68406faa53b113518b10c879f1ff6 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Sat, 8 Jul 2017 13:47:41 +0800
Subject: [PATCH 071/125] [SPARK-21100][SQL] Add summary method as alternative
 to describe that gives quartiles similar to Pandas

## What changes were proposed in this pull request?

Adds method `summary`  that allows user to specify which statistics and percentiles to calculate. By default it include the existing statistics from `describe` and quartiles (25th, 50th, and 75th percentiles) similar to Pandas. Also changes the implementation of `describe` to delegate to `summary`.

## How was this patch tested?

additional unit test

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #18307 from aray/SPARK-21100.
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 113 +++++++++++-------
 .../sql/execution/stat/StatFunctions.scala    |  98 ++++++++++++++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 112 +++++++++++++----
 3 files changed, 258 insertions(+), 65 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index b1638a2180b0..5326b45b50a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -38,18 +38,18 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.CatalogRelation
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions}
 import org.apache.spark.sql.catalyst.optimizer.CombineUnions
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
-import org.apache.spark.sql.catalyst.util.{usePrettyExpression, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.python.EvaluatePython
+import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
@@ -224,7 +224,7 @@ class Dataset[T] private[sql](
     }
   }
 
-  private def aggregatableColumns: Seq[Expression] = {
+  private[sql] def aggregatableColumns: Seq[Expression] = {
     schema.fields
       .filter(f => f.dataType.isInstanceOf[NumericType] || f.dataType.isInstanceOf[StringType])
       .map { n =>
@@ -2161,9 +2161,9 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Computes statistics for numeric and string columns, including count, mean, stddev, min, and
-   * max. If no columns are given, this function computes statistics for all numerical or string
-   * columns.
+   * Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
+   * and max. If no columns are given, this function computes statistics for all numerical or
+   * string columns.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
    * backward compatibility of the schema of the resulting Dataset. If you want to
@@ -2181,46 +2181,79 @@ class Dataset[T] private[sql](
    *   // max     92.0  192.0
    * }}}
    *
+   * Use [[summary]] for expanded statistics and control over which statistics to compute.
+   *
+   * @param cols Columns to compute statistics on.
+   *
    * @group action
    * @since 1.6.0
    */
   @scala.annotation.varargs
-  def describe(cols: String*): DataFrame = withPlan {
-
-    // The list of summary statistics to compute, in the form of expressions.
-    val statistics = List[(String, Expression => Expression)](
-      "count" -> ((child: Expression) => Count(child).toAggregateExpression()),
-      "mean" -> ((child: Expression) => Average(child).toAggregateExpression()),
-      "stddev" -> ((child: Expression) => StddevSamp(child).toAggregateExpression()),
-      "min" -> ((child: Expression) => Min(child).toAggregateExpression()),
-      "max" -> ((child: Expression) => Max(child).toAggregateExpression()))
-
-    val outputCols =
-      (if (cols.isEmpty) aggregatableColumns.map(usePrettyExpression(_).sql) else cols).toList
-
-    val ret: Seq[Row] = if (outputCols.nonEmpty) {
-      val aggExprs = statistics.flatMap { case (_, colToAgg) =>
-        outputCols.map(c => Column(Cast(colToAgg(Column(c).expr), StringType)).as(c))
-      }
-
-      val row = groupBy().agg(aggExprs.head, aggExprs.tail: _*).head().toSeq
-
-      // Pivot the data so each summary is one row
-      row.grouped(outputCols.size).toSeq.zip(statistics).map { case (aggregation, (statistic, _)) =>
-        Row(statistic :: aggregation.toList: _*)
-      }
-    } else {
-      // If there are no output columns, just output a single column that contains the stats.
-      statistics.map { case (name, _) => Row(name) }
-    }
-
-    // All columns are string type
-    val schema = StructType(
-      StructField("summary", StringType) :: outputCols.map(StructField(_, StringType))).toAttributes
-    // `toArray` forces materialization to make the seq serializable
-    LocalRelation.fromExternalRows(schema, ret.toArray.toSeq)
+  def describe(cols: String*): DataFrame = {
+    val selected = if (cols.isEmpty) this else select(cols.head, cols.tail: _*)
+    selected.summary("count", "mean", "stddev", "min", "max")
   }
 
+  /**
+   * Computes specified statistics for numeric and string columns. Available statistics are:
+   *
+   * - count
+   * - mean
+   * - stddev
+   * - min
+   * - max
+   * - arbitrary approximate percentiles specified as a percentage (eg, 75%)
+   *
+   * If no statistics are given, this function computes count, mean, stddev, min,
+   * approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting Dataset. If you want to
+   * programmatically compute summary statistics, use the `agg` function instead.
+   *
+   * {{{
+   *   ds.summary().show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // mean    53.3  178.05
+   *   // stddev  11.6  15.7
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 50%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * {{{
+   *   ds.summary("count", "min", "25%", "75%", "max").show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * To do a summary for specific columns first select them:
+   *
+   * {{{
+   *   ds.select("age", "height").summary().show()
+   * }}}
+   *
+   * See also [[describe]] for basic statistics.
+   *
+   * @param statistics Statistics from above list to be computed.
+   *
+   * @group action
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def summary(statistics: String*): DataFrame = StatFunctions.summary(this, statistics.toSeq)
+
   /**
    * Returns the first `n` rows.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 1debad03c93f..436e18fdb5ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -19,9 +19,10 @@ package org.apache.spark.sql.execution.stat
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.expressions.{Cast, GenericInternalRow}
+import org.apache.spark.sql.catalyst.expressions.{Cast, CreateArray, Expression, GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.util.QuantileSummaries
+import org.apache.spark.sql.catalyst.util.{usePrettyExpression, QuantileSummaries}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -220,4 +221,97 @@ object StatFunctions extends Logging {
 
     Dataset.ofRows(df.sparkSession, LocalRelation(schema.toAttributes, table)).na.fill(0.0)
   }
+
+  /** Calculate selected summary statistics for a dataset */
+  def summary(ds: Dataset[_], statistics: Seq[String]): DataFrame = {
+
+    val defaultStatistics = Seq("count", "mean", "stddev", "min", "25%", "50%", "75%", "max")
+    val selectedStatistics = if (statistics.nonEmpty) statistics else defaultStatistics
+
+    val hasPercentiles = selectedStatistics.exists(_.endsWith("%"))
+    val (percentiles, percentileNames, remainingAggregates) = if (hasPercentiles) {
+      val (pStrings, rest) = selectedStatistics.partition(a => a.endsWith("%"))
+      val percentiles = pStrings.map { p =>
+        try {
+          p.stripSuffix("%").toDouble / 100.0
+        } catch {
+          case e: NumberFormatException =>
+            throw new IllegalArgumentException(s"Unable to parse $p as a percentile", e)
+        }
+      }
+      require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in the range [0, 1]")
+      (percentiles, pStrings, rest)
+    } else {
+      (Seq(), Seq(), selectedStatistics)
+    }
+
+
+    // The list of summary statistics to compute, in the form of expressions.
+    val availableStatistics = Map[String, Expression => Expression](
+      "count" -> ((child: Expression) => Count(child).toAggregateExpression()),
+      "mean" -> ((child: Expression) => Average(child).toAggregateExpression()),
+      "stddev" -> ((child: Expression) => StddevSamp(child).toAggregateExpression()),
+      "min" -> ((child: Expression) => Min(child).toAggregateExpression()),
+      "max" -> ((child: Expression) => Max(child).toAggregateExpression()))
+
+    val statisticFns = remainingAggregates.map { agg =>
+      require(availableStatistics.contains(agg), s"$agg is not a recognised statistic")
+      agg -> availableStatistics(agg)
+    }
+
+    def percentileAgg(child: Expression): Expression =
+      new ApproximatePercentile(child, CreateArray(percentiles.map(Literal(_))))
+        .toAggregateExpression()
+
+    val outputCols = ds.aggregatableColumns.map(usePrettyExpression(_).sql).toList
+
+    val ret: Seq[Row] = if (outputCols.nonEmpty) {
+      var aggExprs = statisticFns.toList.flatMap { case (_, colToAgg) =>
+        outputCols.map(c => Column(Cast(colToAgg(Column(c).expr), StringType)).as(c))
+      }
+      if (hasPercentiles) {
+        aggExprs = outputCols.map(c => Column(percentileAgg(Column(c).expr)).as(c)) ++ aggExprs
+      }
+
+      val row = ds.groupBy().agg(aggExprs.head, aggExprs.tail: _*).head().toSeq
+
+      // Pivot the data so each summary is one row
+      val grouped: Seq[Seq[Any]] = row.grouped(outputCols.size).toSeq
+
+      val basicStats = if (hasPercentiles) grouped.tail else grouped
+
+      val rows = basicStats.zip(statisticFns).map { case (aggregation, (statistic, _)) =>
+        Row(statistic :: aggregation.toList: _*)
+      }
+
+      if (hasPercentiles) {
+        def nullSafeString(x: Any) = if (x == null) null else x.toString
+        val percentileRows = grouped.head
+          .map {
+            case a: Seq[Any] => a
+            case _ => Seq.fill(percentiles.length)(null: Any)
+          }
+          .transpose
+          .zip(percentileNames)
+          .map { case (values: Seq[Any], name) =>
+            Row(name :: values.map(nullSafeString).toList: _*)
+          }
+        (rows ++ percentileRows)
+          .sortWith((left, right) =>
+            selectedStatistics.indexOf(left(0)) < selectedStatistics.indexOf(right(0)))
+      } else {
+        rows
+      }
+    } else {
+      // If there are no output columns, just output a single column that contains the stats.
+      selectedStatistics.map(Row(_))
+    }
+
+    // All columns are string type
+    val schema = StructType(
+      StructField("summary", StringType) :: outputCols.map(StructField(_, StringType))).toAttributes
+    // `toArray` forces materialization to make the seq serializable
+    Dataset.ofRows(ds.sparkSession, LocalRelation.fromExternalRows(schema, ret.toArray.toSeq))
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 9ea9951c24ef..2c7051bf431c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -28,8 +28,7 @@ import org.scalatest.Matchers._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, OneRowRelation, Project, Union}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, OneRowRelation, Union}
 import org.apache.spark.sql.execution.{FilterExec, QueryExecution}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchange}
@@ -663,13 +662,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     assert(df.schema.map(_.name) === Seq("key", "valueRenamed", "newCol"))
   }
 
-  test("describe") {
-    val describeTestData = Seq(
-      ("Bob", 16, 176),
-      ("Alice", 32, 164),
-      ("David", 60, 192),
-      ("Amy", 24, 180)).toDF("name", "age", "height")
+  private lazy val person2: DataFrame = Seq(
+    ("Bob", 16, 176),
+    ("Alice", 32, 164),
+    ("David", 60, 192),
+    ("Amy", 24, 180)).toDF("name", "age", "height")
 
+  test("describe") {
     val describeResult = Seq(
       Row("count", "4", "4", "4"),
       Row("mean", null, "33.0", "178.0"),
@@ -686,32 +685,99 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
 
     def getSchemaAsSeq(df: DataFrame): Seq[String] = df.schema.map(_.name)
 
-    val describeTwoCols = describeTestData.describe("name", "age", "height")
-    assert(getSchemaAsSeq(describeTwoCols) === Seq("summary", "name", "age", "height"))
-    checkAnswer(describeTwoCols, describeResult)
-    // All aggregate value should have been cast to string
-    describeTwoCols.collect().foreach { row =>
-      assert(row.get(2).isInstanceOf[String], "expected string but found " + row.get(2).getClass)
-      assert(row.get(3).isInstanceOf[String], "expected string but found " + row.get(3).getClass)
-    }
-
-    val describeAllCols = describeTestData.describe()
+    val describeAllCols = person2.describe()
     assert(getSchemaAsSeq(describeAllCols) === Seq("summary", "name", "age", "height"))
     checkAnswer(describeAllCols, describeResult)
+    // All aggregate value should have been cast to string
+    describeAllCols.collect().foreach { row =>
+      row.toSeq.foreach { value =>
+        if (value != null) {
+          assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+        }
+      }
+    }
 
-    val describeOneCol = describeTestData.describe("age")
+    val describeOneCol = person2.describe("age")
     assert(getSchemaAsSeq(describeOneCol) === Seq("summary", "age"))
     checkAnswer(describeOneCol, describeResult.map { case Row(s, _, d, _) => Row(s, d)} )
 
-    val describeNoCol = describeTestData.select("name").describe()
-    assert(getSchemaAsSeq(describeNoCol) === Seq("summary", "name"))
-    checkAnswer(describeNoCol, describeResult.map { case Row(s, n, _, _) => Row(s, n)} )
+    val describeNoCol = person2.select().describe()
+    assert(getSchemaAsSeq(describeNoCol) === Seq("summary"))
+    checkAnswer(describeNoCol, describeResult.map { case Row(s, _, _, _) => Row(s)} )
 
-    val emptyDescription = describeTestData.limit(0).describe()
+    val emptyDescription = person2.limit(0).describe()
     assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
     checkAnswer(emptyDescription, emptyDescribeResult)
   }
 
+  test("summary") {
+    val summaryResult = Seq(
+      Row("count", "4", "4", "4"),
+      Row("mean", null, "33.0", "178.0"),
+      Row("stddev", null, "19.148542155126762", "11.547005383792516"),
+      Row("min", "Alice", "16", "164"),
+      Row("25%", null, "24.0", "176.0"),
+      Row("50%", null, "24.0", "176.0"),
+      Row("75%", null, "32.0", "180.0"),
+      Row("max", "David", "60", "192"))
+
+    val emptySummaryResult = Seq(
+      Row("count", "0", "0", "0"),
+      Row("mean", null, null, null),
+      Row("stddev", null, null, null),
+      Row("min", null, null, null),
+      Row("25%", null, null, null),
+      Row("50%", null, null, null),
+      Row("75%", null, null, null),
+      Row("max", null, null, null))
+
+    def getSchemaAsSeq(df: DataFrame): Seq[String] = df.schema.map(_.name)
+
+    val summaryAllCols = person2.summary()
+
+    assert(getSchemaAsSeq(summaryAllCols) === Seq("summary", "name", "age", "height"))
+    checkAnswer(summaryAllCols, summaryResult)
+    // All aggregate value should have been cast to string
+    summaryAllCols.collect().foreach { row =>
+      row.toSeq.foreach { value =>
+        if (value != null) {
+          assert(value.isInstanceOf[String], "expected string but found " + value.getClass)
+        }
+      }
+    }
+
+    val summaryOneCol = person2.select("age").summary()
+    assert(getSchemaAsSeq(summaryOneCol) === Seq("summary", "age"))
+    checkAnswer(summaryOneCol, summaryResult.map { case Row(s, _, d, _) => Row(s, d)} )
+
+    val summaryNoCol = person2.select().summary()
+    assert(getSchemaAsSeq(summaryNoCol) === Seq("summary"))
+    checkAnswer(summaryNoCol, summaryResult.map { case Row(s, _, _, _) => Row(s)} )
+
+    val emptyDescription = person2.limit(0).summary()
+    assert(getSchemaAsSeq(emptyDescription) === Seq("summary", "name", "age", "height"))
+    checkAnswer(emptyDescription, emptySummaryResult)
+  }
+
+  test("summary advanced") {
+    val stats = Array("count", "50.01%", "max", "mean", "min", "25%")
+    val orderMatters = person2.summary(stats: _*)
+    assert(orderMatters.collect().map(_.getString(0)) === stats)
+
+    val onlyPercentiles = person2.summary("0.1%", "99.9%")
+    assert(onlyPercentiles.count() === 2)
+
+    val fooE = intercept[IllegalArgumentException] {
+      person2.summary("foo")
+    }
+    assert(fooE.getMessage === "requirement failed: foo is not a recognised statistic")
+
+    val parseE = intercept[IllegalArgumentException] {
+      person2.summary("foo%")
+    }
+    assert(parseE.getMessage === "Unable to parse foo% as a percentile")
+  }
+
   test("apply on query results (SPARK-5462)") {
     val df = testData.sparkSession.sql("select key from testData")
     checkAnswer(df.select(df("key")), testData.select('key).collect().toSeq)

From 7896e7b99d95d28800f5644bd36b3990cf0ef8c4 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 7 Jul 2017 23:05:38 -0700
Subject: [PATCH 072/125] [SPARK-21281][SQL] Use string types by default if
 array and map have no argument

## What changes were proposed in this pull request?
This pr modified code to use string types by default if `array` and `map` in functions have no argument. This behaviour is the same with Hive one;
```
hive> CREATE TEMPORARY TABLE t1 AS SELECT map();
hive> DESCRIBE t1;
_c0   map<string,string>

hive> CREATE TEMPORARY TABLE t2 AS SELECT array();
hive> DESCRIBE t2;
_c0   array<string>
```

## How was this patch tested?
Added tests in `DataFrameFunctionsSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #18516 from maropu/SPARK-21281.
---
 .../sql/catalyst/expressions/arithmetic.scala | 10 +++---
 .../expressions/complexTypeCreator.scala      | 35 ++++++++++--------
 .../spark/sql/catalyst/expressions/hash.scala |  5 +--
 .../expressions/nullExpressions.scala         |  7 ++--
 .../ExpressionTypeCheckingSuite.scala         |  4 +--
 .../org/apache/spark/sql/functions.scala      | 10 ++----
 .../spark/sql/DataFrameFunctionsSuite.scala   | 36 +++++++++++++++++++
 7 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index ec6e6ba0f091..423bf66a24d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -527,13 +527,14 @@ case class Least(children: Seq[Expression]) extends Expression {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length <= 1) {
-      TypeCheckResult.TypeCheckFailure(s"LEAST requires at least 2 arguments")
+      TypeCheckResult.TypeCheckFailure(
+        s"input to function $prettyName requires at least two arguments")
     } else if (children.map(_.dataType).distinct.count(_ != NullType) > 1) {
       TypeCheckResult.TypeCheckFailure(
         s"The expressions should all have the same type," +
           s" got LEAST(${children.map(_.dataType.simpleString).mkString(", ")}).")
     } else {
-      TypeUtils.checkForOrderingExpr(dataType, "function " + prettyName)
+      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
     }
   }
 
@@ -592,13 +593,14 @@ case class Greatest(children: Seq[Expression]) extends Expression {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length <= 1) {
-      TypeCheckResult.TypeCheckFailure(s"GREATEST requires at least 2 arguments")
+      TypeCheckResult.TypeCheckFailure(
+        s"input to function $prettyName requires at least two arguments")
     } else if (children.map(_.dataType).distinct.count(_ != NullType) > 1) {
       TypeCheckResult.TypeCheckFailure(
         s"The expressions should all have the same type," +
           s" got GREATEST(${children.map(_.dataType.simpleString).mkString(", ")}).")
     } else {
-      TypeUtils.checkForOrderingExpr(dataType, "function " + prettyName)
+      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 98c4cbee38de..d9eeb5358ef7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -41,12 +41,13 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
 
   override def foldable: Boolean = children.forall(_.foldable)
 
-  override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), "function array")
+  override def checkInputDataTypes(): TypeCheckResult = {
+    TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
+  }
 
   override def dataType: ArrayType = {
     ArrayType(
-      children.headOption.map(_.dataType).getOrElse(NullType),
+      children.headOption.map(_.dataType).getOrElse(StringType),
       containsNull = children.exists(_.nullable))
   }
 
@@ -93,7 +94,7 @@ private [sql] object GenArrayData {
     if (!ctx.isPrimitiveType(elementType)) {
       val genericArrayClass = classOf[GenericArrayData].getName
       ctx.addMutableState("Object[]", arrayName,
-        s"$arrayName = new Object[${numElements}];")
+        s"$arrayName = new Object[$numElements];")
 
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
         val isNullAssignment = if (!isMapKey) {
@@ -119,7 +120,7 @@ private [sql] object GenArrayData {
         UnsafeArrayData.calculateHeaderPortionInBytes(numElements) +
         ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements)
       val baseOffset = Platform.BYTE_ARRAY_OFFSET
-      ctx.addMutableState("UnsafeArrayData", arrayDataName, "");
+      ctx.addMutableState("UnsafeArrayData", arrayDataName, "")
 
       val primitiveValueTypeName = ctx.primitiveTypeName(elementType)
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
@@ -169,13 +170,16 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size % 2 != 0) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName expects a positive even number of arguments.")
+      TypeCheckResult.TypeCheckFailure(
+        s"$prettyName expects a positive even number of arguments.")
     } else if (keys.map(_.dataType).distinct.length > 1) {
-      TypeCheckResult.TypeCheckFailure("The given keys of function map should all be the same " +
-        "type, but they are " + keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+      TypeCheckResult.TypeCheckFailure(
+        "The given keys of function map should all be the same type, but they are " +
+          keys.map(_.dataType.simpleString).mkString("[", ", ", "]"))
     } else if (values.map(_.dataType).distinct.length > 1) {
-      TypeCheckResult.TypeCheckFailure("The given values of function map should all be the same " +
-        "type, but they are " + values.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+      TypeCheckResult.TypeCheckFailure(
+        "The given values of function map should all be the same type, but they are " +
+          values.map(_.dataType.simpleString).mkString("[", ", ", "]"))
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
@@ -183,8 +187,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
 
   override def dataType: DataType = {
     MapType(
-      keyType = keys.headOption.map(_.dataType).getOrElse(NullType),
-      valueType = values.headOption.map(_.dataType).getOrElse(NullType),
+      keyType = keys.headOption.map(_.dataType).getOrElse(StringType),
+      valueType = values.headOption.map(_.dataType).getOrElse(StringType),
       valueContainsNull = values.exists(_.nullable))
   }
 
@@ -292,14 +296,17 @@ trait CreateNamedStructLike extends Expression {
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (children.size % 2 != 0) {
+    if (children.length < 1) {
+      TypeCheckResult.TypeCheckFailure(
+        s"input to function $prettyName requires at least one argument")
+    } else if (children.size % 2 != 0) {
       TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.")
     } else {
       val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType)
       if (invalidNames.nonEmpty) {
         TypeCheckResult.TypeCheckFailure(
           "Only foldable StringType expressions are allowed to appear at odd position, got:" +
-          s" ${invalidNames.mkString(",")}")
+            s" ${invalidNames.mkString(",")}")
       } else if (!names.contains(null)) {
         TypeCheckResult.TypeCheckSuccess
       } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index ffd0e64d86cf..2476fc962a6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -247,8 +247,9 @@ abstract class HashExpression[E] extends Expression {
   override def nullable: Boolean = false
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (children.isEmpty) {
-      TypeCheckResult.TypeCheckFailure("function hash requires at least one argument")
+    if (children.length < 1) {
+      TypeCheckResult.TypeCheckFailure(
+        s"input to function $prettyName requires at least one argument")
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 0866b8d791e0..1b625141d56a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -52,10 +52,11 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
   override def foldable: Boolean = children.forall(_.foldable)
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (children == Nil) {
-      TypeCheckResult.TypeCheckFailure("input to function coalesce cannot be empty")
+    if (children.length < 1) {
+      TypeCheckResult.TypeCheckFailure(
+        s"input to function $prettyName requires at least one argument")
     } else {
-      TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), "function coalesce")
+      TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 30459f173ab5..30725773a37b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -155,7 +155,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
       "input to function array should all be the same type")
     assertError(Coalesce(Seq('intField, 'booleanField)),
       "input to function coalesce should all be the same type")
-    assertError(Coalesce(Nil), "input to function coalesce cannot be empty")
+    assertError(Coalesce(Nil), "function coalesce requires at least one argument")
     assertError(new Murmur3Hash(Nil), "function hash requires at least one argument")
     assertError(Explode('intField),
       "input to function explode should be array or map type")
@@ -207,7 +207,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
 
   test("check types for Greatest/Least") {
     for (operator <- Seq[(Seq[Expression] => Expression)](Greatest, Least)) {
-      assertError(operator(Seq('booleanField)), "requires at least 2 arguments")
+      assertError(operator(Seq('booleanField)), "requires at least two arguments")
       assertError(operator(Seq('intField, 'stringField)), "should all have the same type")
       assertError(operator(Seq('mapField, 'mapField)), "does not support ordering")
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 3c67960d13e0..1263071a3ffd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1565,10 +1565,7 @@ object functions {
    * @since 1.5.0
    */
   @scala.annotation.varargs
-  def greatest(exprs: Column*): Column = withExpr {
-    require(exprs.length > 1, "greatest requires at least 2 arguments.")
-    Greatest(exprs.map(_.expr))
-  }
+  def greatest(exprs: Column*): Column = withExpr { Greatest(exprs.map(_.expr)) }
 
   /**
    * Returns the greatest value of the list of column names, skipping null values.
@@ -1672,10 +1669,7 @@ object functions {
    * @since 1.5.0
    */
   @scala.annotation.varargs
-  def least(exprs: Column*): Column = withExpr {
-    require(exprs.length > 1, "least requires at least 2 arguments.")
-    Least(exprs.map(_.expr))
-  }
+  def least(exprs: Column*): Column = withExpr { Least(exprs.map(_.expr)) }
 
   /**
    * Returns the least value of the list of column names, skipping null values.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 0e9a2c6cf7de..0681b9cbeb1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -448,6 +448,42 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
       rand(Random.nextLong()), randn(Random.nextLong())
     ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
   }
+
+  test("SPARK-21281 use string types by default if array and map have no argument") {
+    val ds = spark.range(1)
+    var expectedSchema = new StructType()
+      .add("x", ArrayType(StringType, containsNull = false), nullable = false)
+    assert(ds.select(array().as("x")).schema == expectedSchema)
+    expectedSchema = new StructType()
+      .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false)
+    assert(ds.select(map().as("x")).schema == expectedSchema)
+  }
+
+  test("SPARK-21281 fails if functions have no argument") {
+    val df = Seq(1).toDF("a")
+
+    val funcsMustHaveAtLeastOneArg =
+      ("coalesce", (df: DataFrame) => df.select(coalesce())) ::
+      ("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) ::
+      ("named_struct", (df: DataFrame) => df.select(struct())) ::
+      ("named_struct", (df: DataFrame) => df.selectExpr("named_struct()")) ::
+      ("hash", (df: DataFrame) => df.select(hash())) ::
+      ("hash", (df: DataFrame) => df.selectExpr("hash()")) :: Nil
+    funcsMustHaveAtLeastOneArg.foreach { case (name, func) =>
+      val errMsg = intercept[AnalysisException] { func(df) }.getMessage
+      assert(errMsg.contains(s"input to function $name requires at least one argument"))
+    }
+
+    val funcsMustHaveAtLeastTwoArgs =
+      ("greatest", (df: DataFrame) => df.select(greatest())) ::
+      ("greatest", (df: DataFrame) => df.selectExpr("greatest()")) ::
+      ("least", (df: DataFrame) => df.select(least())) ::
+      ("least", (df: DataFrame) => df.selectExpr("least()")) :: Nil
+    funcsMustHaveAtLeastTwoArgs.foreach { case (name, func) =>
+      val errMsg = intercept[AnalysisException] { func(df) }.getMessage
+      assert(errMsg.contains(s"input to function $name requires at least two arguments"))
+    }
+  }
 }
 
 object DataFrameFunctionsSuite {

From 9760c15acbcf755dd5b13597ceb333576f806ecf Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sat, 8 Jul 2017 14:20:09 +0800
Subject: [PATCH 073/125] [SPARK-20379][CORE] Allow SSL config to reference env
 variables.

This change exposes the internal code path in SparkConf that allows
configs to be read with variable substitution applied, and uses that
new method in SSLOptions so that SSL configs can reference other
variables, and more importantly, environment variables, providing
a secure way to provide passwords to Spark when using SSL.

The approach is a little bit hacky, but is the smallest change possible.
Otherwise, the concept of "namespaced configs" would have to be added
to the config system, which would create a lot of noise for not much
gain at this point.

Tested with added unit tests, and on a real cluster with SSL enabled.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18394 from vanzin/SPARK-20379.try2.
---
 .../scala/org/apache/spark/SSLOptions.scala   | 20 +++++++++----------
 .../scala/org/apache/spark/SparkConf.scala    |  5 +++++
 .../org/apache/spark/SSLOptionsSuite.scala    | 16 +++++++++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index 29163e7f3054..f86fd20e5919 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -167,39 +167,39 @@ private[spark] object SSLOptions extends Logging {
   def parse(conf: SparkConf, ns: String, defaults: Option[SSLOptions] = None): SSLOptions = {
     val enabled = conf.getBoolean(s"$ns.enabled", defaultValue = defaults.exists(_.enabled))
 
-    val port = conf.getOption(s"$ns.port").map(_.toInt)
+    val port = conf.getWithSubstitution(s"$ns.port").map(_.toInt)
     port.foreach { p =>
       require(p >= 0, "Port number must be a non-negative value.")
     }
 
-    val keyStore = conf.getOption(s"$ns.keyStore").map(new File(_))
+    val keyStore = conf.getWithSubstitution(s"$ns.keyStore").map(new File(_))
         .orElse(defaults.flatMap(_.keyStore))
 
-    val keyStorePassword = conf.getOption(s"$ns.keyStorePassword")
+    val keyStorePassword = conf.getWithSubstitution(s"$ns.keyStorePassword")
         .orElse(defaults.flatMap(_.keyStorePassword))
 
-    val keyPassword = conf.getOption(s"$ns.keyPassword")
+    val keyPassword = conf.getWithSubstitution(s"$ns.keyPassword")
         .orElse(defaults.flatMap(_.keyPassword))
 
-    val keyStoreType = conf.getOption(s"$ns.keyStoreType")
+    val keyStoreType = conf.getWithSubstitution(s"$ns.keyStoreType")
         .orElse(defaults.flatMap(_.keyStoreType))
 
     val needClientAuth =
       conf.getBoolean(s"$ns.needClientAuth", defaultValue = defaults.exists(_.needClientAuth))
 
-    val trustStore = conf.getOption(s"$ns.trustStore").map(new File(_))
+    val trustStore = conf.getWithSubstitution(s"$ns.trustStore").map(new File(_))
         .orElse(defaults.flatMap(_.trustStore))
 
-    val trustStorePassword = conf.getOption(s"$ns.trustStorePassword")
+    val trustStorePassword = conf.getWithSubstitution(s"$ns.trustStorePassword")
         .orElse(defaults.flatMap(_.trustStorePassword))
 
-    val trustStoreType = conf.getOption(s"$ns.trustStoreType")
+    val trustStoreType = conf.getWithSubstitution(s"$ns.trustStoreType")
         .orElse(defaults.flatMap(_.trustStoreType))
 
-    val protocol = conf.getOption(s"$ns.protocol")
+    val protocol = conf.getWithSubstitution(s"$ns.protocol")
         .orElse(defaults.flatMap(_.protocol))
 
-    val enabledAlgorithms = conf.getOption(s"$ns.enabledAlgorithms")
+    val enabledAlgorithms = conf.getWithSubstitution(s"$ns.enabledAlgorithms")
         .map(_.split(",").map(_.trim).filter(_.nonEmpty).toSet)
         .orElse(defaults.map(_.enabledAlgorithms))
         .getOrElse(Set.empty)
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index de2f475c6895..715cfdcc8f4e 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -373,6 +373,11 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     Option(settings.get(key)).orElse(getDeprecatedConfig(key, this))
   }
 
+  /** Get an optional value, applying variable substitution. */
+  private[spark] def getWithSubstitution(key: String): Option[String] = {
+    getOption(key).map(reader.substitute(_))
+  }
+
   /** Get all parameters as a list of pairs */
   def getAll: Array[(String, String)] = {
     settings.entrySet().asScala.map(x => (x.getKey, x.getValue)).toArray
diff --git a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
index 6fc7cea6ee94..8eabc2b3cb95 100644
--- a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
@@ -22,6 +22,8 @@ import javax.net.ssl.SSLContext
 
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.util.SparkConfWithEnv
+
 class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("test resolving property file as spark conf ") {
@@ -133,4 +135,18 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     assert(opts.enabledAlgorithms === Set("ABC", "DEF"))
   }
 
+  test("variable substitution") {
+    val conf = new SparkConfWithEnv(Map(
+      "ENV1" -> "val1",
+      "ENV2" -> "val2"))
+
+    conf.set("spark.ssl.enabled", "true")
+    conf.set("spark.ssl.keyStore", "${env:ENV1}")
+    conf.set("spark.ssl.trustStore", "${env:ENV2}")
+
+    val opts = SSLOptions.parse(conf, "spark.ssl", defaults = None)
+    assert(opts.keyStore === Some(new File("val1")))
+    assert(opts.trustStore === Some(new File("val2")))
+  }
+
 }

From d0bfc6733521709e453d643582df2bdd68f28de7 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant@apache.org>
Date: Fri, 7 Jul 2017 23:33:12 -0700
Subject: [PATCH 074/125] [SPARK-21069][SS][DOCS] Add rate source to
 programming guide.

## What changes were proposed in this pull request?

SPARK-20979 added a new structured streaming source: Rate source. This patch adds the corresponding documentation to programming guide.

## How was this patch tested?

Tested by running jekyll locally.

Author: Prashant Sharma <prashant@apache.org>
Author: Prashant Sharma <prashsh1@in.ibm.com>

Closes #18562 from ScrapCodes/spark-21069/rate-source-docs.
---
 docs/structured-streaming-programming-guide.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 3bc377c9a38b..8f64faadc32d 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -499,6 +499,8 @@ There are a few built-in sources.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
 
+  - **Rate source (for testing)** - Generates data at the specified number of rows per second, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. This source is intended for testing and benchmarking.
+
 Some sources are not fault-tolerant because they do not guarantee that data can be replayed using 
 checkpointed offsets after a failure. See the earlier section on 
 [fault-tolerance semantics](#fault-tolerance-semantics).
@@ -546,6 +548,19 @@ Here are the details of all the sources in Spark.
     <td>No</td>
     <td></td>
   </tr>
+  <tr>
+    <td><b>Rate Source</b></td>
+    <td>
+        <code>rowsPerSecond</code> (e.g. 100, default: 1): How many rows should be generated per second.<br/><br/>
+        <code>rampUpTime</code> (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes <code>rowsPerSecond</code>. Using finer granularities than seconds will be truncated to integer seconds. <br/><br/>
+        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
+        
+        The source will try its best to reach <code>rowsPerSecond</code>, but the query may be resource constrained, and <code>numPartitions</code> can be tweaked to help reach the desired speed.
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+
   <tr>
     <td><b>Kafka Source</b></td>
     <td>

From a7b46c627b5d2461257f337139a29f23350e0c77 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Fri, 7 Jul 2017 23:51:32 -0700
Subject: [PATCH 075/125] [SPARK-20307][SPARKR] SparkR: pass on
 setHandleInvalid to spark.mllib functions that use StringIndexer

## What changes were proposed in this pull request?

For randomForest classifier, if test data contains unseen labels, it will throw an error. The StringIndexer already has the handleInvalid logic. The patch add a new method to set the underlying StringIndexer handleInvalid logic.

This patch should also apply to other classifiers. This PR focuses on the main logic and randomForest classifier. I will do follow-up PR for other classifiers.

## How was this patch tested?

Add a new unit test based on the error case in the JIRA.

Author: wangmiao1981 <wm624@hotmail.com>

Closes #18496 from wangmiao1981/handle.
---
 R/pkg/R/mllib_tree.R                          | 11 ++++++--
 R/pkg/tests/fulltests/test_mllib_tree.R       | 17 +++++++++++++
 .../apache/spark/ml/feature/RFormula.scala    | 25 +++++++++++++++++++
 .../r/RandomForestClassificationWrapper.scala |  4 ++-
 .../spark/ml/feature/StringIndexerSuite.scala |  2 +-
 5 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index 2f1220a75278..75b1a74ee8c7 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -374,6 +374,10 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
 #'                     can speed up training of deeper trees. Users can set how often should the
 #'                     cache be checkpointed or disable it by setting checkpointInterval.
+#' @param handleInvalid How to handle invalid data (unseen labels or NULL values) in classification model.
+#'        Supported options: "skip" (filter out rows with invalid data),
+#'                           "error" (throw an error), "keep" (put invalid data in a special additional
+#'                           bucket, at index numLabels). Default is "error".
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.randomForest,SparkDataFrame,formula-method
 #' @return \code{spark.randomForest} returns a fitted Random Forest model.
@@ -409,7 +413,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                    maxDepth = 5, maxBins = 32, numTrees = 20, impurity = NULL,
                    featureSubsetStrategy = "auto", seed = NULL, subsamplingRate = 1.0,
                    minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
-                   maxMemoryInMB = 256, cacheNodeIds = FALSE) {
+                   maxMemoryInMB = 256, cacheNodeIds = FALSE,
+                   handleInvalid = c("error", "keep", "skip")) {
             type <- match.arg(type)
             formula <- paste(deparse(formula), collapse = "")
             if (!is.null(seed)) {
@@ -430,6 +435,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                      new("RandomForestRegressionModel", jobj = jobj)
                    },
                    classification = {
+                     handleInvalid <- match.arg(handleInvalid)
                      if (is.null(impurity)) impurity <- "gini"
                      impurity <- match.arg(impurity, c("gini", "entropy"))
                      jobj <- callJStatic("org.apache.spark.ml.r.RandomForestClassifierWrapper",
@@ -439,7 +445,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                                          as.numeric(minInfoGain), as.integer(checkpointInterval),
                                          as.character(featureSubsetStrategy), seed,
                                          as.numeric(subsamplingRate),
-                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
+                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds),
+                                         handleInvalid)
                      new("RandomForestClassificationModel", jobj = jobj)
                    }
             )
diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R
index 9b3fc8d270b2..66a0693a59a5 100644
--- a/R/pkg/tests/fulltests/test_mllib_tree.R
+++ b/R/pkg/tests/fulltests/test_mllib_tree.R
@@ -212,6 +212,23 @@ test_that("spark.randomForest", {
   expect_equal(length(grep("1.0", predictions)), 50)
   expect_equal(length(grep("2.0", predictions)), 50)
 
+  # Test unseen labels
+  data <- data.frame(clicked = base::sample(c(0, 1), 10, replace = TRUE),
+                    someString = base::sample(c("this", "that"), 10, replace = TRUE),
+                    stringsAsFactors = FALSE)
+  trainidxs <- base::sample(nrow(data), nrow(data) * 0.7)
+  traindf <- as.DataFrame(data[trainidxs, ])
+  testdf <- as.DataFrame(rbind(data[-trainidxs, ], c(0, "the other")))
+  model <- spark.randomForest(traindf, clicked ~ ., type = "classification",
+                          maxDepth = 10, maxBins = 10, numTrees = 10)
+  predictions <- predict(model, testdf)
+  expect_error(collect(predictions))
+  model <- spark.randomForest(traindf, clicked ~ ., type = "classification",
+                             maxDepth = 10, maxBins = 10, numTrees = 10,
+                             handleInvalid = "skip")
+  predictions <- predict(model, testdf)
+  expect_equal(class(collect(predictions)$clicked[1]), "character")
+
   # spark.randomForest classification can work on libsvm data
   if (windows_with_hadoop()) {
     data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 4b44878784c9..61aa6463bb6d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -132,6 +132,30 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   @Since("1.5.0")
   def getFormula: String = $(formula)
 
+  /**
+   * Param for how to handle invalid data (unseen labels or NULL values).
+   * Options are 'skip' (filter out rows with invalid data),
+   * 'error' (throw an error), or 'keep' (put invalid data in a special additional
+   * bucket, at index numLabels).
+   * Default: "error"
+   * @group param
+   */
+  @Since("2.3.0")
+  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "How to handle " +
+    "invalid data (unseen labels or NULL values). " +
+    "Options are 'skip' (filter out rows with invalid data), error (throw an error), " +
+    "or 'keep' (put invalid data in a special additional bucket, at index numLabels).",
+    ParamValidators.inArray(StringIndexer.supportedHandleInvalids))
+  setDefault(handleInvalid, StringIndexer.ERROR_INVALID)
+
+  /** @group setParam */
+  @Since("2.3.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  /** @group getParam */
+  @Since("2.3.0")
+  def getHandleInvalid: String = $(handleInvalid)
+
   /** @group setParam */
   @Since("1.5.0")
   def setFeaturesCol(value: String): this.type = set(featuresCol, value)
@@ -197,6 +221,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
             .setInputCol(term)
             .setOutputCol(indexCol)
             .setStringOrderType($(stringIndexerOrderType))
+            .setHandleInvalid($(handleInvalid))
           prefixesToRewrite(indexCol + "_") = term + "_"
           (term, indexCol)
         case _ =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index 8a83d4e980f7..132345fb9a6d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -78,11 +78,13 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       seed: String,
       subsamplingRate: Double,
       maxMemoryInMB: Int,
-      cacheNodeIds: Boolean): RandomForestClassifierWrapper = {
+      cacheNodeIds: Boolean,
+      handleInvalid: String): RandomForestClassifierWrapper = {
 
     val rFormula = new RFormula()
       .setFormula(formula)
       .setForceIndexLabel(true)
+      .setHandleInvalid(handleInvalid)
     checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index 806a92760c8b..027b1fbc6657 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, Identifiable, MLTestingUtils}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.col

From f5f02d213d3151f58070e113d64fcded4f5d401e Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Fri, 7 Jul 2017 23:59:34 -0700
Subject: [PATCH 076/125] [SPARK-20456][DOCS] Add examples for functions
 collection for pyspark

## What changes were proposed in this pull request?

This adds documentation to many functions in pyspark.sql.functions.py:
`upper`, `lower`, `reverse`, `unix_timestamp`, `from_unixtime`, `rand`, `randn`, `collect_list`, `collect_set`, `lit`
Add units to the trigonometry functions.
Renames columns in datetime examples to be more informative.
Adds links between some functions.

## How was this patch tested?

`./dev/lint-python`
`python python/pyspark/sql/functions.py`
`./python/run-tests.py --module pyspark-sql`

Author: Michael Patterson <map222@gmail.com>

Closes #17865 from map222/spark-20456.
---
 R/pkg/R/functions.R                           |  11 +-
 python/pyspark/sql/functions.py               | 166 +++++++++++-------
 .../org/apache/spark/sql/functions.scala      |  14 +-
 3 files changed, 119 insertions(+), 72 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index c529d83060f5..f28d26a51baa 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -336,7 +336,8 @@ setMethod("asin",
           })
 
 #' @details
-#' \code{atan}: Computes the tangent inverse of the given value.
+#' \code{atan}: Computes the tangent inverse of the given value; the returned angle is in the range
+#' -pi/2 through pi/2.
 #'
 #' @rdname column_math_functions
 #' @export
@@ -599,7 +600,7 @@ setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = "characterOr
           })
 
 #' @details
-#' \code{cos}: Computes the cosine of the given value.
+#' \code{cos}: Computes the cosine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases cos cos,Column-method
@@ -1407,7 +1408,7 @@ setMethod("sign", signature(x = "Column"),
           })
 
 #' @details
-#' \code{sin}: Computes the sine of the given value.
+#' \code{sin}: Computes the sine of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases sin sin,Column-method
@@ -1597,7 +1598,7 @@ setMethod("sumDistinct",
           })
 
 #' @details
-#' \code{tan}: Computes the tangent of the given value.
+#' \code{tan}: Computes the tangent of the given value. Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases tan tan,Column-method
@@ -1896,7 +1897,7 @@ setMethod("year",
 
 #' @details
 #' \code{atan2}: Returns the angle theta from the conversion of rectangular coordinates
-#' (x, y) to polar coordinates (r, theta).
+#' (x, y) to polar coordinates (r, theta). Units in radians.
 #'
 #' @rdname column_math_functions
 #' @aliases atan2 atan2,Column-method
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3416c4b118a0..5d8ded83f667 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -67,9 +67,14 @@ def _():
     _.__doc__ = 'Window function: ' + doc
     return _
 
+_lit_doc = """
+    Creates a :class:`Column` of literal value.
 
+    >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
+    [Row(height=5, spark_user=True)]
+    """
 _functions = {
-    'lit': 'Creates a :class:`Column` of literal value.',
+    'lit': _lit_doc,
     'col': 'Returns a :class:`Column` based on the given column name.',
     'column': 'Returns a :class:`Column` based on the given column name.',
     'asc': 'Returns a sort expression based on the ascending order of the given column name.',
@@ -95,10 +100,13 @@ def _():
             '0.0 through pi.',
     'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' +
             '-pi/2 through pi/2.',
-    'atan': 'Computes the tangent inverse of the given value.',
+    'atan': 'Computes the tangent inverse of the given value; the returned angle is in the range' +
+            '-pi/2 through pi/2',
     'cbrt': 'Computes the cube-root of the given value.',
     'ceil': 'Computes the ceiling of the given value.',
-    'cos': 'Computes the cosine of the given value.',
+    'cos': """Computes the cosine of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'cosh': 'Computes the hyperbolic cosine of the given value.',
     'exp': 'Computes the exponential of the given value.',
     'expm1': 'Computes the exponential of the given value minus one.',
@@ -109,15 +117,33 @@ def _():
     'rint': 'Returns the double value that is closest in value to the argument and' +
             ' is equal to a mathematical integer.',
     'signum': 'Computes the signum of the given value.',
-    'sin': 'Computes the sine of the given value.',
+    'sin': """Computes the sine of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'sinh': 'Computes the hyperbolic sine of the given value.',
-    'tan': 'Computes the tangent of the given value.',
+    'tan': """Computes the tangent of the given value.
+
+           :param col: :class:`DoubleType` column, units in radians.""",
     'tanh': 'Computes the hyperbolic tangent of the given value.',
-    'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
-    'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
+    'toDegrees': '.. note:: Deprecated in 2.1, use :func:`degrees` instead.',
+    'toRadians': '.. note:: Deprecated in 2.1, use :func:`radians` instead.',
     'bitwiseNOT': 'Computes bitwise not.',
 }
 
+_collect_list_doc = """
+    Aggregate function: returns a list of objects with duplicates.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_list('age')).collect()
+    [Row(collect_list(age)=[2, 5, 5])]
+    """
+_collect_set_doc = """
+    Aggregate function: returns a set of objects with duplicate elements eliminated.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_set('age')).collect()
+    [Row(collect_set(age)=[5, 2])]
+    """
 _functions_1_6 = {
     # unary math functions
     'stddev': 'Aggregate function: returns the unbiased sample standard deviation of' +
@@ -131,9 +157,8 @@ def _():
     'var_pop':  'Aggregate function: returns the population variance of the values in a group.',
     'skewness': 'Aggregate function: returns the skewness of the values in a group.',
     'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
-    'collect_list': 'Aggregate function: returns a list of objects with duplicates.',
-    'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +
-                   ' eliminated.',
+    'collect_list': _collect_list_doc,
+    'collect_set': _collect_set_doc
 }
 
 _functions_2_1 = {
@@ -147,7 +172,7 @@ def _():
 # math functions that take two arguments as input
 _binary_mathfunctions = {
     'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
-             'polar coordinates (r, theta).',
+             'polar coordinates (r, theta). Units in radians.',
     'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.',
     'pow': 'Returns the value of the first argument raised to the power of the second argument.',
 }
@@ -200,17 +225,20 @@ def _():
 @since(1.3)
 def approxCountDistinct(col, rsd=None):
     """
-    .. note:: Deprecated in 2.1, use approx_count_distinct instead.
+    .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead.
     """
     return approx_count_distinct(col, rsd)
 
 
 @since(2.1)
 def approx_count_distinct(col, rsd=None):
-    """Returns a new :class:`Column` for approximate distinct count of ``col``.
+    """Aggregate function: returns a new :class:`Column` for approximate distinct count of column `col`.
 
-    >>> df.agg(approx_count_distinct(df.age).alias('c')).collect()
-    [Row(c=2)]
+    :param rsd: maximum estimation error allowed (default = 0.05). For rsd < 0.01, it is more
+        efficient to use :func:`countDistinct`
+
+    >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
+    [Row(distinct_ages=2)]
     """
     sc = SparkContext._active_spark_context
     if rsd is None:
@@ -267,8 +295,7 @@ def coalesce(*cols):
 
 @since(1.6)
 def corr(col1, col2):
-    """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1`` and ``col2``.
 
     >>> a = range(20)
     >>> b = [2 * x for x in range(20)]
@@ -282,8 +309,7 @@ def corr(col1, col2):
 
 @since(2.0)
 def covar_pop(col1, col2):
-    """Returns a new :class:`Column` for the population covariance of ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the population covariance of ``col1`` and ``col2``.
 
     >>> a = [1] * 10
     >>> b = [1] * 10
@@ -297,8 +323,7 @@ def covar_pop(col1, col2):
 
 @since(2.0)
 def covar_samp(col1, col2):
-    """Returns a new :class:`Column` for the sample covariance of ``col1``
-    and ``col2``.
+    """Returns a new :class:`Column` for the sample covariance of ``col1`` and ``col2``.
 
     >>> a = [1] * 10
     >>> b = [1] * 10
@@ -450,7 +475,7 @@ def monotonically_increasing_id():
 def nanvl(col1, col2):
     """Returns col1 if it is not NaN, or col2 if col1 is NaN.
 
-    Both inputs should be floating point columns (DoubleType or FloatType).
+    Both inputs should be floating point columns (:class:`DoubleType` or :class:`FloatType`).
 
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
     >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
@@ -460,10 +485,15 @@ def nanvl(col1, col2):
     return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2)))
 
 
+@ignore_unicode_prefix
 @since(1.4)
 def rand(seed=None):
     """Generates a random column with independent and identically distributed (i.i.d.) samples
     from U[0.0, 1.0].
+
+    >>> df.withColumn('rand', rand(seed=42) * 3).collect()
+    [Row(age=2, name=u'Alice', rand=1.1568609015300986),
+     Row(age=5, name=u'Bob', rand=1.403379671529166)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -473,10 +503,15 @@ def rand(seed=None):
     return Column(jc)
 
 
+@ignore_unicode_prefix
 @since(1.4)
 def randn(seed=None):
     """Generates a column with independent and identically distributed (i.i.d.) samples from
     the standard normal distribution.
+
+    >>> df.withColumn('randn', randn(seed=42)).collect()
+    [Row(age=2, name=u'Alice', randn=-0.7556247885860078),
+    Row(age=5, name=u'Bob', randn=-0.0861619008451133)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -760,7 +795,7 @@ def ntile(n):
 @since(1.5)
 def current_date():
     """
-    Returns the current date as a date column.
+    Returns the current date as a :class:`DateType` column.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_date())
@@ -768,7 +803,7 @@ def current_date():
 
 def current_timestamp():
     """
-    Returns the current timestamp as a timestamp column.
+    Returns the current timestamp as a :class:`TimestampType` column.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_timestamp())
@@ -787,8 +822,8 @@ def date_format(date, format):
     .. note:: Use when ever possible specialized functions like `year`. These benefit from a
         specialized implementation.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect()
     [Row(date=u'04/08/2015')]
     """
     sc = SparkContext._active_spark_context
@@ -800,8 +835,8 @@ def year(col):
     """
     Extract the year of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(year('a').alias('year')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(year('dt').alias('year')).collect()
     [Row(year=2015)]
     """
     sc = SparkContext._active_spark_context
@@ -813,8 +848,8 @@ def quarter(col):
     """
     Extract the quarter of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(quarter('a').alias('quarter')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(quarter('dt').alias('quarter')).collect()
     [Row(quarter=2)]
     """
     sc = SparkContext._active_spark_context
@@ -826,8 +861,8 @@ def month(col):
     """
     Extract the month of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(month('a').alias('month')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(month('dt').alias('month')).collect()
     [Row(month=4)]
    """
     sc = SparkContext._active_spark_context
@@ -839,8 +874,8 @@ def dayofmonth(col):
     """
     Extract the day of the month of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(dayofmonth('a').alias('day')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofmonth('dt').alias('day')).collect()
     [Row(day=8)]
     """
     sc = SparkContext._active_spark_context
@@ -852,8 +887,8 @@ def dayofyear(col):
     """
     Extract the day of the year of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(dayofyear('a').alias('day')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofyear('dt').alias('day')).collect()
     [Row(day=98)]
     """
     sc = SparkContext._active_spark_context
@@ -865,8 +900,8 @@ def hour(col):
     """
     Extract the hours of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(hour('a').alias('hour')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(hour('ts').alias('hour')).collect()
     [Row(hour=13)]
     """
     sc = SparkContext._active_spark_context
@@ -878,8 +913,8 @@ def minute(col):
     """
     Extract the minutes of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(minute('a').alias('minute')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(minute('ts').alias('minute')).collect()
     [Row(minute=8)]
     """
     sc = SparkContext._active_spark_context
@@ -891,8 +926,8 @@ def second(col):
     """
     Extract the seconds of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
-    >>> df.select(second('a').alias('second')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(second('ts').alias('second')).collect()
     [Row(second=15)]
     """
     sc = SparkContext._active_spark_context
@@ -904,8 +939,8 @@ def weekofyear(col):
     """
     Extract the week number of a given date as integer.
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
-    >>> df.select(weekofyear(df.a).alias('week')).collect()
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(weekofyear(df.dt).alias('week')).collect()
     [Row(week=15)]
     """
     sc = SparkContext._active_spark_context
@@ -917,9 +952,9 @@ def date_add(start, days):
     """
     Returns the date that is `days` days after `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(date_add(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 4, 9))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 9))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.date_add(_to_java_column(start), days))
@@ -930,9 +965,9 @@ def date_sub(start, days):
     """
     Returns the date that is `days` days before `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(date_sub(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 4, 7))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
+    [Row(prev_date=datetime.date(2015, 4, 7))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.date_sub(_to_java_column(start), days))
@@ -956,9 +991,9 @@ def add_months(start, months):
     """
     Returns the date that is `months` months after `start`
 
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['d'])
-    >>> df.select(add_months(df.d, 1).alias('d')).collect()
-    [Row(d=datetime.date(2015, 5, 8))]
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(add_months(df.dt, 1).alias('next_month')).collect()
+    [Row(next_month=datetime.date(2015, 5, 8))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.add_months(_to_java_column(start), months))
@@ -969,8 +1004,8 @@ def months_between(date1, date2):
     """
     Returns the number of months between date1 and date2.
 
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
-    >>> df.select(months_between(df.t, df.d).alias('months')).collect()
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2'])
+    >>> df.select(months_between(df.date1, df.date2).alias('months')).collect()
     [Row(months=3.9495967...)]
     """
     sc = SparkContext._active_spark_context
@@ -1073,12 +1108,17 @@ def last_day(date):
     return Column(sc._jvm.functions.last_day(_to_java_column(date)))
 
 
+@ignore_unicode_prefix
 @since(1.5)
 def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
     """
     Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
     representing the timestamp of that moment in the current system time zone in the given
     format.
+
+    >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
+    >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
+    [Row(ts=u'2015-04-08 00:00:00')]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
@@ -1092,6 +1132,10 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
     locale, return null if fail.
 
     if `timestamp` is None, then it returns current timestamp.
+
+    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect()
+    [Row(unix_time=1428476400)]
     """
     sc = SparkContext._active_spark_context
     if timestamp is None:
@@ -1106,8 +1150,8 @@ def from_utc_timestamp(timestamp, tz):
     that corresponds to the same time of day in the given timezone.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(from_utc_timestamp(df.t, "PST").alias('t')).collect()
-    [Row(t=datetime.datetime(1997, 2, 28, 2, 30))]
+    >>> df.select(from_utc_timestamp(df.t, "PST").alias('local_time')).collect()
+    [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz))
@@ -1119,9 +1163,9 @@ def to_utc_timestamp(timestamp, tz):
     Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
     another timestamp that corresponds to the same time of day in UTC.
 
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect()
-    [Row(t=datetime.datetime(1997, 2, 28, 18, 30))]
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['ts'])
+    >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect()
+    [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
@@ -2095,7 +2139,7 @@ def _test():
     sc = spark.sparkContext
     globs['sc'] = sc
     globs['spark'] = spark
-    globs['df'] = sc.parallelize([Row(name='Alice', age=2), Row(name='Bob', age=5)]).toDF()
+    globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 1263071a3ffd..a5e4a444f33b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1321,7 +1321,8 @@ object functions {
   def asin(columnName: String): Column = asin(Column(columnName))
 
   /**
-   * Computes the tangent inverse of the given value.
+   * Computes the tangent inverse of the given column; the returned angle is in the range
+   * -pi/2 through pi/2
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1329,7 +1330,8 @@ object functions {
   def atan(e: Column): Column = withExpr { Atan(e.expr) }
 
   /**
-   * Computes the tangent inverse of the given column.
+   * Computes the tangent inverse of the given column; the returned angle is in the range
+   * -pi/2 through pi/2
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1338,7 +1340,7 @@ object functions {
 
   /**
    * Returns the angle theta from the conversion of rectangular coordinates (x, y) to
-   * polar coordinates (r, theta).
+   * polar coordinates (r, theta). Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1470,7 +1472,7 @@ object functions {
   }
 
   /**
-   * Computes the cosine of the given value.
+   * Computes the cosine of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1937,7 +1939,7 @@ object functions {
   def signum(columnName: String): Column = signum(Column(columnName))
 
   /**
-   * Computes the sine of the given value.
+   * Computes the sine of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1969,7 +1971,7 @@ object functions {
   def sinh(columnName: String): Column = sinh(Column(columnName))
 
   /**
-   * Computes the tangent of the given value.
+   * Computes the tangent of the given value. Units in radians.
    *
    * @group math_funcs
    * @since 1.4.0

From 01f183e8497d4931f1fe5c69ff16fe84b1e41492 Mon Sep 17 00:00:00 2001
From: Joachim Hereth <joachim.hereth@numberfour.eu>
Date: Sat, 8 Jul 2017 08:32:45 +0100
Subject: [PATCH 077/125] Mesos doc fixes

## What changes were proposed in this pull request?

Some link fixes for the documentation [Running Spark on Mesos](https://spark.apache.org/docs/latest/running-on-mesos.html):

* Updated Link to Mesos Frameworks (Projects built on top of Mesos)
* Update Link to Mesos binaries from Mesosphere (former link was redirected to dcos install page)

## How was this patch tested?

Documentation was built and changed page manually/visually inspected.

No code was changed, hence no dev tests.

Since these changes are rather trivial I did not open a new JIRA ticket.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Joachim Hereth <joachim.hereth@numberfour.eu>

Closes #18564 from daten-kieker/mesos_doc_fixes.
---
 docs/running-on-mesos.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index ec130c1db8f5..7401b63e022c 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -10,7 +10,7 @@ Spark can run on hardware clusters managed by [Apache Mesos](http://mesos.apache
 The advantages of deploying Spark with Mesos include:
 
 - dynamic partitioning between Spark and other
-  [frameworks](https://mesos.apache.org/documentation/latest/mesos-frameworks/)
+  [frameworks](https://mesos.apache.org/documentation/latest/frameworks/)
 - scalable partitioning between multiple instances of Spark
 
 # How it Works
@@ -61,7 +61,7 @@ third party projects publish binary releases that may be helpful in setting Meso
 
 One of those is Mesosphere.  To install Mesos using the binary releases provided by Mesosphere:
 
-1. Download Mesos installation package from [downloads page](http://mesosphere.io/downloads/)
+1. Download Mesos installation package from [downloads page](https://open.mesosphere.com/downloads/mesos/)
 2. Follow their instructions for installation and configuration
 
 The Mesosphere installation documents suggest setting up ZooKeeper to handle Mesos master failover,

From 330bf5c99825afb6129577a34e6bed8b221a98cc Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Sat, 8 Jul 2017 08:34:51 +0100
Subject: [PATCH 078/125] [SPARK-20609][MLLIB][TEST] manually cleared
 'spark.local.dir' before/after a test in ALSCleanerSuite

## What changes were proposed in this pull request?

This PR is similar to #17869.
Once` 'spark.local.dir'` is set. Unless this is manually cleared before/after a test. it could return the same directory even if this property is configured.
and add before/after for each likewise in ALSCleanerSuite.

## How was this patch tested?
existing test.

Author: caoxuewen <cao.xuewen@zte.com.cn>

Closes #18537 from heary-cao/ALSCleanerSuite.
---
 .../spark/ml/recommendation/ALSSuite.scala       | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index b57fc8d21ab3..0a0fea255c7f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -29,6 +29,7 @@ import scala.language.existentials
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import org.apache.commons.io.FileUtils
 import org.apache.commons.io.filefilter.TrueFileFilter
+import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -777,7 +778,20 @@ class ALSSuite
   }
 }
 
-class ALSCleanerSuite extends SparkFunSuite {
+class ALSCleanerSuite extends SparkFunSuite with BeforeAndAfterEach {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    // Once `Utils.getOrCreateLocalRootDirs` is called, it is cached in `Utils.localRootDirs`.
+    // Unless this is manually cleared before and after a test, it returns the same directory
+    // set before even if 'spark.local.dir' is configured afterwards.
+    Utils.clearLocalRootDirs()
+  }
+
+  override def afterEach(): Unit = {
+    Utils.clearLocalRootDirs()
+    super.afterEach()
+  }
+
   test("ALS shuffle cleanup standalone") {
     val conf = new SparkConf()
     val localDir = Utils.createTempDir()

From 0b8dd2d08460f3e6eb578727d2c336b6f11959e7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 8 Jul 2017 20:16:47 +0800
Subject: [PATCH 079/125] [SPARK-21345][SQL][TEST][TEST-MAVEN]
 SparkSessionBuilderSuite should clean up stopped sessions.

## What changes were proposed in this pull request?

`SparkSessionBuilderSuite` should clean up stopped sessions. Otherwise, it leaves behind some stopped `SparkContext`s interfereing with other test suites using `ShardSQLContext`.

Recently, master branch fails consequtively.
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/

## How was this patch tested?

Pass the Jenkins with a updated suite.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18567 from dongjoon-hyun/SPARK-SESSION.
---
 .../spark/sql/SparkSessionBuilderSuite.scala  | 46 ++++++++-----------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 770e15629c83..c0301f2ce2d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -17,50 +17,49 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.BeforeAndAfterEach
+
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite {
+class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
 
-  private var initialSession: SparkSession = _
+  override def afterEach(): Unit = {
+    // This suite should not interfere with the other test suites.
+    SparkSession.getActiveSession.foreach(_.stop())
+    SparkSession.clearActiveSession()
+    SparkSession.getDefaultSession.foreach(_.stop())
+    SparkSession.clearDefaultSession()
+  }
 
-  private lazy val sparkContext: SparkContext = {
-    initialSession = SparkSession.builder()
+  test("create with config options and propagate them to SparkContext and SparkSession") {
+    val session = SparkSession.builder()
       .master("local")
       .config("spark.ui.enabled", value = false)
       .config("some-config", "v2")
       .getOrCreate()
-    initialSession.sparkContext
-  }
-
-  test("create with config options and propagate them to SparkContext and SparkSession") {
-    // Creating a new session with config - this works by just calling the lazy val
-    sparkContext
-    assert(initialSession.sparkContext.conf.get("some-config") == "v2")
-    assert(initialSession.conf.get("some-config") == "v2")
-    SparkSession.clearDefaultSession()
+    assert(session.sparkContext.conf.get("some-config") == "v2")
+    assert(session.conf.get("some-config") == "v2")
   }
 
   test("use global default session") {
-    val session = SparkSession.builder().getOrCreate()
+    val session = SparkSession.builder().master("local").getOrCreate()
     assert(SparkSession.builder().getOrCreate() == session)
-    SparkSession.clearDefaultSession()
   }
 
   test("config options are propagated to existing SparkSession") {
-    val session1 = SparkSession.builder().config("spark-config1", "a").getOrCreate()
+    val session1 = SparkSession.builder().master("local").config("spark-config1", "a").getOrCreate()
     assert(session1.conf.get("spark-config1") == "a")
     val session2 = SparkSession.builder().config("spark-config1", "b").getOrCreate()
     assert(session1 == session2)
     assert(session1.conf.get("spark-config1") == "b")
-    SparkSession.clearDefaultSession()
   }
 
   test("use session from active thread session and propagate config options") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     val activeSession = defaultSession.newSession()
     SparkSession.setActiveSession(activeSession)
     val session = SparkSession.builder().config("spark-config2", "a").getOrCreate()
@@ -73,16 +72,14 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     SparkSession.clearActiveSession()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
-    SparkSession.clearDefaultSession()
   }
 
   test("create a new session if the default session has been stopped") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     SparkSession.setDefaultSession(defaultSession)
     defaultSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != defaultSession)
-    newSession.stop()
   }
 
   test("create a new session if the active thread session has been stopped") {
@@ -91,11 +88,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     activeSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != activeSession)
-    newSession.stop()
   }
 
   test("create SparkContext first then SparkSession") {
-    sparkContext.stop()
     val conf = new SparkConf().setAppName("test").setMaster("local").set("key1", "value1")
     val sparkContext2 = new SparkContext(conf)
     val session = SparkSession.builder().config("key2", "value2").getOrCreate()
@@ -105,11 +100,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     // We won't update conf for existing `SparkContext`
     assert(!sparkContext2.conf.contains("key2"))
     assert(sparkContext2.conf.get("key1") == "value1")
-    session.stop()
   }
 
   test("create SparkContext first then pass context to SparkSession") {
-    sparkContext.stop()
     val conf = new SparkConf().setAppName("test").setMaster("local").set("key1", "value1")
     val newSC = new SparkContext(conf)
     val session = SparkSession.builder().sparkContext(newSC).config("key2", "value2").getOrCreate()
@@ -121,14 +114,12 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     // the conf of this sparkContext will not contain the conf set through the API config.
     assert(!session.sparkContext.conf.contains("key2"))
     assert(session.sparkContext.conf.get("spark.app.name") == "test")
-    session.stop()
   }
 
   test("SPARK-15887: hive-site.xml should be loaded") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true")
     assert(session.sparkContext.hadoopConfiguration.get("hive.in.test") == "true")
-    session.stop()
   }
 
   test("SPARK-15991: Set global Hadoop conf") {
@@ -140,7 +131,6 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
       assert(session.sessionState.newHadoopConf().get(mySpecialKey) == mySpecialValue)
     } finally {
       session.sparkContext.hadoopConfiguration.unset(mySpecialKey)
-      session.stop()
     }
   }
 }

From 9fccc3627fa41d32fbae6dbbb9bd1521e43eb4f0 Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wangzhenhua@huawei.com>
Date: Sat, 8 Jul 2017 20:44:12 +0800
Subject: [PATCH 080/125] [SPARK-21083][SQL] Store zero size and row count when
 analyzing empty table

## What changes were proposed in this pull request?

We should be able to store zero size and row count after analyzing empty table.

This pr also enhances the test cases for re-analyzing tables.

## How was this patch tested?

Added a new test case and enhanced some test cases.

Author: Zhenhua Wang <wangzhenhua@huawei.com>

Closes #18292 from wzhfy/analyzeNewColumn.
---
 .../command/AnalyzeTableCommand.scala         |  5 +-
 .../spark/sql/StatisticsCollectionSuite.scala | 13 +++++
 .../spark/sql/hive/StatisticsSuite.scala      | 52 ++++++++++++++++---
 3 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index 42e2a9ca5c4e..cba147c35dd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableType}
-import org.apache.spark.sql.execution.SQLExecution
 
 
 /**
@@ -40,10 +39,10 @@ case class AnalyzeTableCommand(
     }
     val newTotalSize = CommandUtils.calculateTotalSize(sessionState, tableMeta)
 
-    val oldTotalSize = tableMeta.stats.map(_.sizeInBytes.toLong).getOrElse(0L)
+    val oldTotalSize = tableMeta.stats.map(_.sizeInBytes.toLong).getOrElse(-1L)
     val oldRowCount = tableMeta.stats.flatMap(_.rowCount.map(_.toLong)).getOrElse(-1L)
     var newStats: Option[CatalogStatistics] = None
-    if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
+    if (newTotalSize >= 0 && newTotalSize != oldTotalSize) {
       newStats = Some(CatalogStatistics(sizeInBytes = newTotalSize))
     }
     // We only set rowCount when noscan is false, because otherwise:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 843ced7f0e69..b80bd80e93e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -82,6 +82,19 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("analyze empty table") {
+    val table = "emptyTable"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (key STRING, value STRING) USING PARQUET")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS noscan")
+      val fetchedStats1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+      assert(fetchedStats1.get.sizeInBytes == 0)
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
+      val fetchedStats2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+      assert(fetchedStats2.get.sizeInBytes == 0)
+    }
+  }
+
   test("analyze column command - unsupported types and invalid columns") {
     val tableName = "column_stats_test1"
     withTable(tableName) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index e00fa64e9f2c..84bcea30d61a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -26,6 +26,7 @@ import scala.util.matching.Regex
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics}
+import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -210,27 +211,62 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
-  test("test elimination of the influences of the old stats") {
+  test("keep existing row count in stats with noscan if table is not changed") {
     val textTable = "textTable"
     withTable(textTable) {
-      sql(s"CREATE TABLE $textTable (key STRING, value STRING) STORED AS TEXTFILE")
+      sql(s"CREATE TABLE $textTable (key STRING, value STRING)")
       sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS")
       val fetchedStats1 =
         checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
 
       sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
-      // when the total size is not changed, the old row count is kept
+      // when the table is not changed, total size is the same, and the old row count is kept
       val fetchedStats2 =
         checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = Some(500))
       assert(fetchedStats1 == fetchedStats2)
+    }
+  }
 
-      sql(s"INSERT INTO TABLE $textTable SELECT * FROM src")
-      sql(s"ANALYZE TABLE $textTable COMPUTE STATISTICS noscan")
-      // update total size and remove the old and invalid row count
+  test("keep existing column stats if table is not changed") {
+    val table = "update_col_stats_table"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (c1 INT, c2 STRING, c3 DOUBLE)")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c1")
+      val fetchedStats0 =
+        checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+      assert(fetchedStats0.get.colStats == Map("c1" -> ColumnStat(0, None, None, 0, 4, 4)))
+
+      // Insert new data and analyze: have the latest column stats.
+      sql(s"INSERT INTO TABLE $table SELECT 1, 'a', 10.0")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c1")
+      val fetchedStats1 =
+        checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(1)).get
+      assert(fetchedStats1.colStats == Map(
+        "c1" -> ColumnStat(distinctCount = 1, min = Some(1), max = Some(1), nullCount = 0,
+          avgLen = 4, maxLen = 4)))
+
+      // Analyze another column: since the table is not changed, the precious column stats are kept.
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c2")
+      val fetchedStats2 =
+        checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(1)).get
+      assert(fetchedStats2.colStats == Map(
+        "c1" -> ColumnStat(distinctCount = 1, min = Some(1), max = Some(1), nullCount = 0,
+          avgLen = 4, maxLen = 4),
+        "c2" -> ColumnStat(distinctCount = 1, min = None, max = None, nullCount = 0,
+          avgLen = 1, maxLen = 1)))
+
+      // Insert new data and analyze: stale column stats are removed and newly collected column
+      // stats are added.
+      sql(s"INSERT INTO TABLE $table SELECT 2, 'b', 20.0")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c1, c3")
       val fetchedStats3 =
-        checkTableStats(textTable, hasSizeInBytes = true, expectedRowCounts = None)
-      assert(fetchedStats3.get.sizeInBytes > fetchedStats2.get.sizeInBytes)
+        checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(2)).get
+      assert(fetchedStats3.colStats == Map(
+        "c1" -> ColumnStat(distinctCount = 2, min = Some(1), max = Some(2), nullCount = 0,
+          avgLen = 4, maxLen = 4),
+        "c3" -> ColumnStat(distinctCount = 2, min = Some(10.0), max = Some(20.0), nullCount = 0,
+          avgLen = 8, maxLen = 8)))
     }
   }
 

From 9131bdb7e12bcfb2cb699b3438f554604e28aaa8 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sun, 9 Jul 2017 00:24:54 +0800
Subject: [PATCH 081/125] [SPARK-20342][CORE] Update task accumulators before
 sending task end event.

This makes sures that listeners get updated task information; otherwise it's
possible to write incomplete task information into event logs, for example,
making the information in a replayed UI inconsistent with the original
application.

Added a new unit test to try to detect the problem, but it's not guaranteed
to fail since it's a race; but it fails pretty reliably for me without the
scheduler changes.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18393 from vanzin/SPARK-20342.try2.
---
 .../apache/spark/scheduler/DAGScheduler.scala | 70 ++++++++++++-------
 .../spark/scheduler/DAGSchedulerSuite.scala   | 32 ++++++++-
 2 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 3422a5f204b1..89b4cab88109 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1122,6 +1122,25 @@ class DAGScheduler(
     }
   }
 
+  private def postTaskEnd(event: CompletionEvent): Unit = {
+    val taskMetrics: TaskMetrics =
+      if (event.accumUpdates.nonEmpty) {
+        try {
+          TaskMetrics.fromAccumulators(event.accumUpdates)
+        } catch {
+          case NonFatal(e) =>
+            val taskId = event.taskInfo.taskId
+            logError(s"Error when attempting to reconstruct metrics for task $taskId", e)
+            null
+        }
+      } else {
+        null
+      }
+
+    listenerBus.post(SparkListenerTaskEnd(event.task.stageId, event.task.stageAttemptId,
+      Utils.getFormattedClassName(event.task), event.reason, event.taskInfo, taskMetrics))
+  }
+
   /**
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
@@ -1138,34 +1157,36 @@ class DAGScheduler(
       event.taskInfo.attemptNumber, // this is a task attempt number
       event.reason)
 
-    // Reconstruct task metrics. Note: this may be null if the task has failed.
-    val taskMetrics: TaskMetrics =
-      if (event.accumUpdates.nonEmpty) {
-        try {
-          TaskMetrics.fromAccumulators(event.accumUpdates)
-        } catch {
-          case NonFatal(e) =>
-            logError(s"Error when attempting to reconstruct metrics for task $taskId", e)
-            null
-        }
-      } else {
-        null
-      }
-
-    // The stage may have already finished when we get this event -- eg. maybe it was a
-    // speculative task. It is important that we send the TaskEnd event in any case, so listeners
-    // are properly notified and can chose to handle it. For instance, some listeners are
-    // doing their own accounting and if they don't get the task end event they think
-    // tasks are still running when they really aren't.
-    listenerBus.post(SparkListenerTaskEnd(
-       stageId, task.stageAttemptId, taskType, event.reason, event.taskInfo, taskMetrics))
-
     if (!stageIdToStage.contains(task.stageId)) {
+      // The stage may have already finished when we get this event -- eg. maybe it was a
+      // speculative task. It is important that we send the TaskEnd event in any case, so listeners
+      // are properly notified and can chose to handle it. For instance, some listeners are
+      // doing their own accounting and if they don't get the task end event they think
+      // tasks are still running when they really aren't.
+      postTaskEnd(event)
+
       // Skip all the actions if the stage has been cancelled.
       return
     }
 
     val stage = stageIdToStage(task.stageId)
+
+    // Make sure the task's accumulators are updated before any other processing happens, so that
+    // we can post a task end event before any jobs or stages are updated. The accumulators are
+    // only updated in certain cases.
+    event.reason match {
+      case Success =>
+        stage match {
+          case rs: ResultStage if rs.activeJob.isEmpty =>
+            // Ignore update if task's job has finished.
+          case _ =>
+            updateAccumulators(event)
+        }
+      case _: ExceptionFailure => updateAccumulators(event)
+      case _ =>
+    }
+    postTaskEnd(event)
+
     event.reason match {
       case Success =>
         task match {
@@ -1176,7 +1197,6 @@ class DAGScheduler(
             resultStage.activeJob match {
               case Some(job) =>
                 if (!job.finished(rt.outputId)) {
-                  updateAccumulators(event)
                   job.finished(rt.outputId) = true
                   job.numFinished += 1
                   // If the whole job has finished, remove it
@@ -1203,7 +1223,6 @@ class DAGScheduler(
 
           case smt: ShuffleMapTask =>
             val shuffleStage = stage.asInstanceOf[ShuffleMapStage]
-            updateAccumulators(event)
             val status = event.result.asInstanceOf[MapStatus]
             val execId = status.location.executorId
             logDebug("ShuffleMapTask finished on " + execId)
@@ -1374,8 +1393,7 @@ class DAGScheduler(
         // Do nothing here, left up to the TaskScheduler to decide how to handle denied commits
 
       case exceptionFailure: ExceptionFailure =>
-        // Tasks failed with exceptions might still have accumulator updates.
-        updateAccumulators(event)
+        // Nothing left to do, already handled above for accumulator updates.
 
       case TaskResultLost =>
         // Do nothing here; the TaskScheduler handles these failures and resubmits the task.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 453be26ed8d0..3b5df657d45c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.util.Properties
-import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
 import scala.annotation.meta.param
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
@@ -2346,6 +2346,36 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
       (Success, 1)))
   }
 
+  test("task end event should have updated accumulators (SPARK-20342)") {
+    val tasks = 10
+
+    val accumId = new AtomicLong()
+    val foundCount = new AtomicLong()
+    val listener = new SparkListener() {
+      override def onTaskEnd(event: SparkListenerTaskEnd): Unit = {
+        event.taskInfo.accumulables.find(_.id == accumId.get).foreach { _ =>
+          foundCount.incrementAndGet()
+        }
+      }
+    }
+    sc.addSparkListener(listener)
+
+    // Try a few times in a loop to make sure. This is not guaranteed to fail when the bug exists,
+    // but it should at least make the test flaky. If the bug is fixed, this should always pass.
+    (1 to 10).foreach { i =>
+      foundCount.set(0L)
+
+      val accum = sc.longAccumulator(s"accum$i")
+      accumId.set(accum.id)
+
+      sc.parallelize(1 to tasks, tasks).foreach { _ =>
+        accum.add(1L)
+      }
+      sc.listenerBus.waitUntilEmpty(1000)
+      assert(foundCount.get() === tasks)
+    }
+  }
+
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.

From 062c336d06a0bd4e740a18d2349e03e311509243 Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Sun, 9 Jul 2017 00:27:58 +0800
Subject: [PATCH 082/125] [SPARK-21343] Refine the document for
 spark.reducer.maxReqSizeShuffleToMem.

## What changes were proposed in this pull request?

In current code, reducer can break the old shuffle service when `spark.reducer.maxReqSizeShuffleToMem` is enabled. Let's refine document.

Author: jinxing <jinxing6042@126.com>

Closes #18566 from jinxing64/SPARK-21343.
---
 .../org/apache/spark/internal/config/package.scala     |  6 ++++--
 docs/configuration.md                                  | 10 ++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index a629810bf093..512d539ee9c3 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -323,9 +323,11 @@ package object config {
 
   private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM =
     ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem")
-      .internal()
       .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " +
-        "above this threshold. This is to avoid a giant request takes too much memory.")
+        "above this threshold. This is to avoid a giant request takes too much memory. We can " +
+        "enable this config by setting a specific value(e.g. 200m). Note that this config can " +
+        "be enabled only when the shuffle shuffle service is newer than Spark-2.2 or the shuffle" +
+        " service is disabled.")
       .bytesConf(ByteUnit.BYTE)
       .createWithDefault(Long.MaxValue)
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 7dc23e441a7b..6ca84240c124 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -528,6 +528,16 @@ Apart from these, the following properties are also available, and may be useful
     By allowing it to limit the number of fetch requests, this scenario can be mitigated.
   </td>
 </tr>
+<tr>
+  <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
+  <td>Long.MaxValue</td>
+  <td>
+    The blocks of a shuffle request will be fetched to disk when size of the request is above
+    this threshold. This is to avoid a giant request takes too much memory. We can enable this
+    config by setting a specific value(e.g. 200m). Note that this config can be enabled only when
+    the shuffle shuffle service is newer than Spark-2.2 or the shuffle service is disabled.
+  </td>
+</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>

From c3712b77a915a5cb12b6c0204bc5bd6037aad1f5 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Sat, 8 Jul 2017 11:56:19 -0700
Subject: [PATCH 083/125] [SPARK-21307][REVERT][SQL] Remove SQLConf parameters
 from the parser-related classes

## What changes were proposed in this pull request?
Since we do not set active sessions when parsing the plan, we are unable to correctly use SQLConf.get to find the correct active session. Since https://github.com/apache/spark/pull/18531 breaks the build, I plan to revert it at first.

## How was this patch tested?
The existing test cases

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18568 from gatorsmile/revert18531.
---
 .../sql/catalyst/catalog/SessionCatalog.scala |   2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   6 +-
 .../sql/catalyst/parser/ParseDriver.scala     |   8 +-
 .../parser/ExpressionParserSuite.scala        | 167 +++++++++---------
 .../spark/sql/execution/SparkSqlParser.scala  |  11 +-
 .../org/apache/spark/sql/functions.scala      |   3 +-
 .../internal/BaseSessionStateBuilder.scala    |   2 +-
 .../sql/internal/VariableSubstitution.scala   |   4 +-
 .../sql/execution/SparkSqlParserSuite.scala   |  10 +-
 .../execution/command/DDLCommandSuite.scala   |   4 +-
 .../internal/VariableSubstitutionSuite.scala  |  31 ++--
 11 files changed, 127 insertions(+), 121 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 336d3d65d0dd..c40d5f6031a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -74,7 +74,7 @@ class SessionCatalog(
       functionRegistry,
       conf,
       new Configuration(),
-      CatalystSqlParser,
+      new CatalystSqlParser(conf),
       DummyFunctionResourceLoader)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 4d725904bc9b..a616b0f773f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -45,9 +45,11 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging {
   import ParserUtils._
 
+  def this() = this(new SQLConf())
+
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
@@ -1457,7 +1459,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * Special characters can be escaped by using Hive/C-style escaping.
    */
   private def createString(ctx: StringLiteralContext): String = {
-    if (SQLConf.get.escapedStringLiterals) {
+    if (conf.escapedStringLiterals) {
       ctx.STRING().asScala.map(stringWithoutUnescape).mkString
     } else {
       ctx.STRING().asScala.map(string).mkString
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 7e1fcfefc64a..09598ffe770c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -121,8 +122,13 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
 /**
  * Concrete SQL parser for Catalyst-only SQL statements.
  */
+class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser {
+  val astBuilder = new AstBuilder(conf)
+}
+
+/** For test-only. */
 object CatalystSqlParser extends AbstractSqlParser {
-  val astBuilder = new AstBuilder
+  val astBuilder = new AstBuilder(new SQLConf())
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index ac7325257a15..45f9f72dccc4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -167,12 +167,12 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("like expressions with ESCAPED_STRING_LITERALS = true") {
-    val parser = CatalystSqlParser
-    withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
-      assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
-      assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
-      assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
-    }
+    val conf = new SQLConf()
+    conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true")
+    val parser = new CatalystSqlParser(conf)
+    assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
+    assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
+    assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
   }
 
   test("is null expressions") {
@@ -435,85 +435,86 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("strings") {
-    val parser = CatalystSqlParser
     Seq(true, false).foreach { escape =>
-      withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> escape.toString) {
-        // tests that have same result whatever the conf is
-        // Single Strings.
-        assertEqual("\"hello\"", "hello", parser)
-        assertEqual("'hello'", "hello", parser)
-
-        // Multi-Strings.
-        assertEqual("\"hello\" 'world'", "helloworld", parser)
-        assertEqual("'hello' \" \" 'world'", "hello world", parser)
-
-        // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
-        // regular '%'; to get the correct result you need to add another escaped '\'.
-        // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
-        assertEqual("'pattern%'", "pattern%", parser)
-        assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
-
-        // tests that have different result regarding the conf
-        if (escape) {
-          // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
-          // Spark 1.6 behavior.
-
-          // 'LIKE' string literals.
-          assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
-          assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
-
-          // Escaped characters.
-          // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
-          // when ESCAPED_STRING_LITERALS is enabled.
-          // It is parsed literally.
-          assertEqual("'\\0'", "\\0", parser)
-
-          // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is
-          // enabled.
-          val e = intercept[ParseException](parser.parseExpression("'\''"))
-          assert(e.message.contains("extraneous input '''"))
-
-          // The unescape special characters (e.g., "\\t") for 2.0+ don't work
-          // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
-          assertEqual("'\\\"'", "\\\"", parser)   // Double quote
-          assertEqual("'\\b'", "\\b", parser)     // Backspace
-          assertEqual("'\\n'", "\\n", parser)     // Newline
-          assertEqual("'\\r'", "\\r", parser)     // Carriage return
-          assertEqual("'\\t'", "\\t", parser)     // Tab character
-
-          // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
-          // They are parsed literally.
-          assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
-          // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
-          // They are parsed literally.
-          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
-            "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
-        } else {
-          // Default behavior
-
-          // 'LIKE' string literals.
-          assertEqual("'pattern\\\\%'", "pattern\\%", parser)
-          assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
-
-          // Escaped characters.
-          // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
-          assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
-          assertEqual("'\\''", "\'", parser)     // Single quote
-          assertEqual("'\\\"'", "\"", parser)    // Double quote
-          assertEqual("'\\b'", "\b", parser)     // Backspace
-          assertEqual("'\\n'", "\n", parser)     // Newline
-          assertEqual("'\\r'", "\r", parser)     // Carriage return
-          assertEqual("'\\t'", "\t", parser)     // Tab character
-          assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
-
-          // Octals
-          assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
-
-          // Unicode
-          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
-            parser)
-        }
+      val conf = new SQLConf()
+      conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString)
+      val parser = new CatalystSqlParser(conf)
+
+      // tests that have same result whatever the conf is
+      // Single Strings.
+      assertEqual("\"hello\"", "hello", parser)
+      assertEqual("'hello'", "hello", parser)
+
+      // Multi-Strings.
+      assertEqual("\"hello\" 'world'", "helloworld", parser)
+      assertEqual("'hello' \" \" 'world'", "hello world", parser)
+
+      // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
+      // regular '%'; to get the correct result you need to add another escaped '\'.
+      // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
+      assertEqual("'pattern%'", "pattern%", parser)
+      assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
+
+      // tests that have different result regarding the conf
+      if (escape) {
+        // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
+        // Spark 1.6 behavior.
+
+        // 'LIKE' string literals.
+        assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
+        assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
+
+        // Escaped characters.
+        // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
+        // when ESCAPED_STRING_LITERALS is enabled.
+        // It is parsed literally.
+        assertEqual("'\\0'", "\\0", parser)
+
+        // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
+        val e = intercept[ParseException](parser.parseExpression("'\''"))
+        assert(e.message.contains("extraneous input '''"))
+
+        // The unescape special characters (e.g., "\\t") for 2.0+ don't work
+        // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
+        assertEqual("'\\\"'", "\\\"", parser)   // Double quote
+        assertEqual("'\\b'", "\\b", parser)     // Backspace
+        assertEqual("'\\n'", "\\n", parser)     // Newline
+        assertEqual("'\\r'", "\\r", parser)     // Carriage return
+        assertEqual("'\\t'", "\\t", parser)     // Tab character
+
+        // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
+        // They are parsed literally.
+        assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
+        // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
+        // They are parsed literally.
+        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
+          "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
+      } else {
+        // Default behavior
+
+        // 'LIKE' string literals.
+        assertEqual("'pattern\\\\%'", "pattern\\%", parser)
+        assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
+
+        // Escaped characters.
+        // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
+        assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
+        assertEqual("'\\''", "\'", parser)     // Single quote
+        assertEqual("'\\\"'", "\"", parser)    // Double quote
+        assertEqual("'\\b'", "\b", parser)     // Backspace
+        assertEqual("'\\n'", "\n", parser)     // Newline
+        assertEqual("'\\r'", "\r", parser)     // Carriage return
+        assertEqual("'\\t'", "\t", parser)     // Tab character
+        assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
+
+        // Octals
+        assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
+
+        // Unicode
+        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
+          parser)
       }
+
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 618d027d8dc0..2f8e416e7df1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -39,11 +39,10 @@ import org.apache.spark.sql.types.StructType
 /**
  * Concrete parser for Spark SQL statements.
  */
-class SparkSqlParser extends AbstractSqlParser {
+class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser {
+  val astBuilder = new SparkSqlAstBuilder(conf)
 
-  val astBuilder = new SparkSqlAstBuilder
-
-  private val substitutor = new VariableSubstitution
+  private val substitutor = new VariableSubstitution(conf)
 
   protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     super.parse(substitutor.substitute(command))(toResult)
@@ -53,11 +52,9 @@ class SparkSqlParser extends AbstractSqlParser {
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class SparkSqlAstBuilder extends AstBuilder {
+class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
-  private def conf: SQLConf = SQLConf.get
-
   /**
    * Create a [[SetCommand]] logical plan.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index a5e4a444f33b..0c7b483f5c83 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, ResolvedHint}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -1275,7 +1276,7 @@ object functions {
    */
   def expr(expr: String): Column = {
     val parser = SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
-      new SparkSqlParser
+      new SparkSqlParser(new SQLConf)
     }
     Column(parser.parseExpression(expr))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 72d0ddc62303..267f76217df8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -114,7 +114,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` field.
    */
   protected lazy val sqlParser: ParserInterface = {
-    extensions.buildParser(session, new SparkSqlParser)
+    extensions.buildParser(session, new SparkSqlParser(conf))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 2b9c574aaaf0..4e7c813be992 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -25,9 +25,7 @@ import org.apache.spark.internal.config._
  *
  * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
-class VariableSubstitution {
-
-  private def conf = SQLConf.get
+class VariableSubstitution(conf: SQLConf) {
 
   private val provider = new ConfigProvider {
     override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 2e29fa43f73d..d238c76fbeef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType
  */
 class SparkSqlParserSuite extends AnalysisTest {
 
-  private lazy val parser = new SparkSqlParser
+  val newConf = new SQLConf
+  private lazy val parser = new SparkSqlParser(newConf)
 
   /**
    * Normalizes plans:
@@ -284,7 +285,6 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   test("query organization") {
-    val conf = SQLConf.get
     // Test all valid combinations of order by/sort by/distribute by/cluster by/limit/windows
     val baseSql = "select * from t"
     val basePlan =
@@ -293,20 +293,20 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual(s"$baseSql distribute by a, b",
       RepartitionByExpression(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil,
         basePlan,
-        numPartitions = conf.numShufflePartitions))
+        numPartitions = newConf.numShufflePartitions))
     assertEqual(s"$baseSql distribute by a sort by b",
       Sort(SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
         global = false,
         RepartitionByExpression(UnresolvedAttribute("a") :: Nil,
           basePlan,
-          numPartitions = conf.numShufflePartitions)))
+          numPartitions = newConf.numShufflePartitions)))
     assertEqual(s"$baseSql cluster by a, b",
       Sort(SortOrder(UnresolvedAttribute("a"), Ascending) ::
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
         global = false,
         RepartitionByExpression(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil,
           basePlan,
-          numPartitions = conf.numShufflePartitions)))
+          numPartitions = newConf.numShufflePartitions)))
   }
 
   test("pipeline concatenation") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 750574830381..5643c58d9f84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -29,13 +29,13 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.CreateTable
-import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 
 
 // TODO: merge this with DDLSuite (SPARK-14441)
 class DDLCommandSuite extends PlanTest {
-  private lazy val parser = new SparkSqlParser
+  private lazy val parser = new SparkSqlParser(new SQLConf)
 
   private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
     val e = intercept[ParseException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index c5e5b70e2133..d5a946aeaac3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.AnalysisException
 
-class VariableSubstitutionSuite extends SparkFunSuite with PlanTest {
+class VariableSubstitutionSuite extends SparkFunSuite {
 
-  private lazy val sub = new VariableSubstitution
+  private lazy val conf = new SQLConf
+  private lazy val sub = new VariableSubstitution(conf)
 
   test("system property") {
     System.setProperty("varSubSuite.var", "abcd")
@@ -34,26 +35,26 @@ class VariableSubstitutionSuite extends SparkFunSuite with PlanTest {
   }
 
   test("Spark configuration variable") {
-    withSQLConf("some-random-string-abcd" -> "1234abcd") {
-      assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
-      assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
-      assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
-      assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
-    }
+    conf.setConfString("some-random-string-abcd", "1234abcd")
+    assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
+    assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
+    assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
+    assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
   }
 
   test("multiple substitutes") {
     val q = "select ${bar} ${foo} ${doo} this is great"
-    withSQLConf("bar" -> "1", "foo" -> "2", "doo" -> "3") {
-      assert(sub.substitute(q) == "select 1 2 3 this is great")
-    }
+    conf.setConfString("bar", "1")
+    conf.setConfString("foo", "2")
+    conf.setConfString("doo", "3")
+    assert(sub.substitute(q) == "select 1 2 3 this is great")
   }
 
   test("test nested substitutes") {
     val q = "select ${bar} ${foo} this is great"
-    withSQLConf("bar" -> "1", "foo" -> "${bar}") {
-      assert(sub.substitute(q) == "select 1 1 this is great")
-    }
+    conf.setConfString("bar", "1")
+    conf.setConfString("foo", "${bar}")
+    assert(sub.substitute(q) == "select 1 1 this is great")
   }
 
 }

From 08e0d033b40946b4ef5741a7aa1e7ba0bd48c6fb Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 8 Jul 2017 14:24:37 -0700
Subject: [PATCH 084/125] [SPARK-21093][R] Terminate R's worker processes in
 the parent of R's daemon to prevent a leak

## What changes were proposed in this pull request?

This is a retry for #18320. This PR was reverted due to unexpected test failures with -10 error code.

I was unable to reproduce in MacOS, CentOS and Ubuntu but only in Jenkins. So, the tests proceeded to verify this and revert the past try here - https://github.com/apache/spark/pull/18456

This new approach was tested in https://github.com/apache/spark/pull/18463.

**Test results**:

- With the part of suspicious change in the past try (https://github.com/apache/spark/pull/18463/commits/466325d3fd353668583f3bde38ae490d9db0b189)

  Tests ran 4 times and 2 times passed and 2 time failed.

- Without the part of suspicious change in the past try (https://github.com/apache/spark/pull/18463/commits/466325d3fd353668583f3bde38ae490d9db0b189)

  Tests ran 5 times and they all passed.

- With this new approach (https://github.com/apache/spark/pull/18463/commits/0a7589c09f53dfc2094497d8d3e59d6407569417)

  Tests ran 5 times and they all passed.

It looks the cause is as below (see https://github.com/apache/spark/pull/18463/commits/466325d3fd353668583f3bde38ae490d9db0b189):

```diff
+ exitCode <- 1
...
+   data <- parallel:::readChild(child)
+   if (is.raw(data)) {
+     if (unserialize(data) == exitCode) {
      ...
+     }
+   }

...

- parallel:::mcexit(0L)
+ parallel:::mcexit(0L, send = exitCode)
```

Two possibilities I think

 - `parallel:::mcexit(.. , send = exitCode)`

   https://stat.ethz.ch/R-manual/R-devel/library/parallel/html/mcfork.html

   > It sends send to the master (unless NULL) and then shuts down the child process.

   However, it looks possible that the parent attemps to terminate the child right after getting our custom exit code. So, the child gets terminated between "send" and "shuts down", failing to exit properly.

 - A bug between `parallel:::mcexit(..., send = ...)` and `parallel:::readChild`.

**Proposal**:

To resolve this, I simply decided to avoid both possibilities with this new approach here (https://github.com/apache/spark/pull/18465/commits/9ff89a7859cb9f427fc774f33c3521c7d962b723). To support this idea, I explained with some quotation of the documentation as below:

https://stat.ethz.ch/R-manual/R-devel/library/parallel/html/mcfork.html

> `readChild` and `readChildren` return a raw vector with a "pid" attribute if data were available, an integer vector of length one with the process ID if a child terminated or `NULL` if the child no longer exists (no children at all for `readChildren`).

`readChild` returns "an integer vector of length one with the process ID if a child terminated" so we can check if it is `integer` and the same selected "process ID". I believe this makes sure that the children are exited.

In case that children happen to send any data manually to parent (which is why we introduced the suspicious part of the change (https://github.com/apache/spark/pull/18463/commits/466325d3fd353668583f3bde38ae490d9db0b189)), this should be raw bytes and will be discarded (and then will try to read the next and check if it is `integer` in the next loop).

## How was this patch tested?

Manual tests and Jenkins tests.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18465 from HyukjinKwon/SPARK-21093-retry-1.
---
 R/pkg/inst/worker/daemon.R | 51 +++++++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/R/pkg/inst/worker/daemon.R b/R/pkg/inst/worker/daemon.R
index 3a318b71ea06..2e31dc5f728c 100644
--- a/R/pkg/inst/worker/daemon.R
+++ b/R/pkg/inst/worker/daemon.R
@@ -30,8 +30,50 @@ port <- as.integer(Sys.getenv("SPARKR_WORKER_PORT"))
 inputCon <- socketConnection(
     port = port, open = "rb", blocking = TRUE, timeout = connectionTimeout)
 
+# Waits indefinitely for a socket connecion by default.
+selectTimeout <- NULL
+
 while (TRUE) {
-  ready <- socketSelect(list(inputCon))
+  ready <- socketSelect(list(inputCon), timeout = selectTimeout)
+
+  # Note that the children should be terminated in the parent. If each child terminates
+  # itself, it appears that the resource is not released properly, that causes an unexpected
+  # termination of this daemon due to, for example, running out of file descriptors
+  # (see SPARK-21093). Therefore, the current implementation tries to retrieve children
+  # that are exited (but not terminated) and then sends a kill signal to terminate them properly
+  # in the parent.
+  #
+  # There are two paths that it attempts to send a signal to terminate the children in the parent.
+  #
+  #   1. Every second if any socket connection is not available and if there are child workers
+  #     running.
+  #   2. Right after a socket connection is available.
+  #
+  # In other words, the parent attempts to send the signal to the children every second if
+  # any worker is running or right before launching other worker children from the following
+  # new socket connection.
+
+  # The process IDs of exited children are returned below.
+  children <- parallel:::selectChildren(timeout = 0)
+
+  if (is.integer(children)) {
+    lapply(children, function(child) {
+      # This should be the PIDs of exited children. Otherwise, this returns raw bytes if any data
+      # was sent from this child. In this case, we discard it.
+      pid <- parallel:::readChild(child)
+      if (is.integer(pid)) {
+        # This checks if the data from this child is the same pid of this selected child.
+        if (child == pid) {
+          # If so, we terminate this child.
+          tools::pskill(child, tools::SIGUSR1)
+        }
+      }
+    })
+  } else if (is.null(children)) {
+    # If it is NULL, there are no children. Waits indefinitely for a socket connecion.
+    selectTimeout <- NULL
+  }
+
   if (ready) {
     port <- SparkR:::readInt(inputCon)
     # There is a small chance that it could be interrupted by signal, retry one time
@@ -44,12 +86,15 @@ while (TRUE) {
     }
     p <- parallel:::mcfork()
     if (inherits(p, "masterProcess")) {
+      # Reach here because this is a child process.
       close(inputCon)
       Sys.setenv(SPARKR_WORKER_PORT = port)
       try(source(script))
-      # Set SIGUSR1 so that child can exit
-      tools::pskill(Sys.getpid(), tools::SIGUSR1)
+      # Note that this mcexit does not fully terminate this child.
       parallel:::mcexit(0L)
+    } else {
+      # Forking succeeded and we need to check if they finished their jobs every second.
+      selectTimeout <- 1
     }
   }
 }

From 680b33f16694b7c460235b11b8c265bc304f795a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 9 Jul 2017 16:30:35 -0700
Subject: [PATCH 085/125] [SPARK-18016][SQL][FOLLOWUP] merge
 declareAddedFunctions, initNestedClasses and declareNestedClasses

## What changes were proposed in this pull request?

These 3 methods have to be used together, so it makes more sense to merge them into one method and then the caller side only need to call one method.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18579 from cloud-fan/minor.
---
 .../expressions/codegen/CodeGenerator.scala   | 29 +++++++------------
 .../codegen/GenerateMutableProjection.scala   |  5 +---
 .../codegen/GenerateOrdering.scala            |  5 +---
 .../codegen/GeneratePredicate.scala           |  5 +---
 .../codegen/GenerateSafeProjection.scala      |  5 +---
 .../codegen/GenerateUnsafeProjection.scala    |  5 +---
 .../sql/execution/WholeStageCodegenExec.scala |  5 +---
 .../columnar/GenerateColumnAccessor.scala     |  5 +---
 8 files changed, 18 insertions(+), 46 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index b15bf2ca7c11..7cf9daf62860 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -302,29 +302,20 @@ class CodegenContext {
   }
 
   /**
-   * Instantiates all nested, private sub-classes as objects to the `OuterClass`
+   * Declares all function code. If the added functions are too many, split them into nested
+   * sub-classes to avoid hitting Java compiler constant pool limitation.
    */
-  private[sql] def initNestedClasses(): String = {
+  def declareAddedFunctions(): String = {
+    val inlinedFunctions = classFunctions(outerClassName).values
+
     // Nested, private sub-classes have no mutable state (though they do reference the outer class'
     // mutable state), so we declare and initialize them inline to the OuterClass.
-    classes.filter(_._1 != outerClassName).map {
+    val initNestedClasses = classes.filter(_._1 != outerClassName).map {
       case (className, classInstance) =>
         s"private $className $classInstance = new $className();"
-    }.mkString("\n")
-  }
-
-  /**
-   * Declares all function code that should be inlined to the `OuterClass`.
-   */
-  private[sql] def declareAddedFunctions(): String = {
-    classFunctions(outerClassName).values.mkString("\n")
-  }
+    }
 
-  /**
-   * Declares all nested, private sub-classes and the function code that should be inlined to them.
-   */
-  private[sql] def declareNestedClasses(): String = {
-    classFunctions.filterKeys(_ != outerClassName).map {
+    val declareNestedClasses = classFunctions.filterKeys(_ != outerClassName).map {
       case (className, functions) =>
         s"""
            |private class $className {
@@ -332,7 +323,9 @@ class CodegenContext {
            |}
            """.stripMargin
     }
-  }.mkString("\n")
+
+    (inlinedFunctions ++ initNestedClasses ++ declareNestedClasses).mkString("\n")
+  }
 
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 635766835029..3768dcde00a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -115,8 +115,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           ${ctx.initPartition()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         public ${classOf[BaseMutableProjection].getName} target(InternalRow row) {
           mutableRow = row;
           return this;
@@ -136,8 +134,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           return mutableRow;
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index a31943255b99..4e4789598520 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -173,15 +173,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           ${ctx.initMutableStates()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         public int compare(InternalRow a, InternalRow b) {
           $comparisons
           return 0;
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index b400783bb5e5..e35b9dda6c01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -66,15 +66,12 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
           ${ctx.initPartition()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         public boolean eval(InternalRow ${ctx.INPUT_ROW}) {
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index dd0419d2286d..192701a82968 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -175,16 +175,13 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           ${ctx.initPartition()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         public java.lang.Object apply(java.lang.Object _i) {
           InternalRow ${ctx.INPUT_ROW} = (InternalRow) _i;
           $allExpressions
           return mutableRow;
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 6be69d119bf8..f2a66efc98e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -391,8 +391,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${ctx.initPartition()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         // Scala.Function1 need this
         public java.lang.Object apply(java.lang.Object row) {
           return apply((InternalRow) row);
@@ -403,8 +401,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           return ${eval.value};
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }
       """
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 0bd28e36135c..1007a7d55691 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -352,14 +352,11 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
           ${ctx.initPartition()}
         }
 
-        ${ctx.declareAddedFunctions()}
-
         protected void processNext() throws java.io.IOException {
           ${code.trim}
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }
       """.trim
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index fc977f2fd553..da3464328191 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -192,8 +192,6 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           this.columnIndexes = columnIndexes;
         }
 
-        ${ctx.declareAddedFunctions()}
-
         public boolean hasNext() {
           if (currentRow < numRowsInBatch) {
             return true;
@@ -222,8 +220,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           return unsafeRow;
         }
 
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
+        ${ctx.declareAddedFunctions()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(

From 457dc9ccbf8404fef6c1ebf8f82e59e4ba480a0e Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Mon, 10 Jul 2017 11:22:28 +0800
Subject: [PATCH 086/125] [MINOR][DOC] Improve the docs about how to correctly
 set configurations

## What changes were proposed in this pull request?

Spark provides several ways to set configurations, either from configuration file, or from `spark-submit` command line options, or programmatically through `SparkConf` class. It may confuses beginners why some configurations set through `SparkConf` cannot take affect. So here add some docs to address this problems and let beginners know how to correctly set configurations.

## How was this patch tested?

N/A

Author: jerryshao <sshao@hortonworks.com>

Closes #18552 from jerryshao/improve-doc.
---
 docs/configuration.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 6ca84240c124..91b5befd1b1e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -95,6 +95,13 @@ in the `spark-defaults.conf` file. A few configuration keys have been renamed si
 versions of Spark; in such cases, the older key names are still accepted, but take lower
 precedence than any instance of the newer key.
 
+Spark properties mainly can be divided into two kinds: one is related to deploy, like
+"spark.driver.memory", "spark.executor.instances", this kind of properties may not be affected when
+setting programmatically through `SparkConf` in runtime, or the behavior is depending on which
+cluster manager and deploy mode you choose, so it would be suggested to set through configuration
+file or `spark-submit` command line options; another is mainly related to Spark runtime control,
+like "spark.task.maxFailures", this kind of properties can be set in either way.
+
 ## Viewing Spark Properties
 
 The application web UI at `http://<driver>:4040` lists Spark properties in the "Environment" tab.

From 0e80ecae300f3e2033419b2d98da8bf092c105bb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 9 Jul 2017 22:53:27 -0700
Subject: [PATCH 087/125] [SPARK-21100][SQL][FOLLOWUP] cleanup code and add
 more comments for Dataset.summary

## What changes were proposed in this pull request?

Some code cleanup and adding comments to make the code more readable. Changed the way to generate result rows, to be more clear.

## How was this patch tested?

existing tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18570 from cloud-fan/summary.
---
 .../scala/org/apache/spark/sql/Dataset.scala  |   9 --
 .../sql/execution/stat/StatFunctions.scala    | 129 ++++++++----------
 .../org/apache/spark/sql/DataFrameSuite.scala |   2 +-
 3 files changed, 56 insertions(+), 84 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 5326b45b50a8..dfb51192c69b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -224,15 +224,6 @@ class Dataset[T] private[sql](
     }
   }
 
-  private[sql] def aggregatableColumns: Seq[Expression] = {
-    schema.fields
-      .filter(f => f.dataType.isInstanceOf[NumericType] || f.dataType.isInstanceOf[StringType])
-      .map { n =>
-        queryExecution.analyzed.resolveQuoted(n.name, sparkSession.sessionState.analyzer.resolver)
-          .get
-      }
-  }
-
   /**
    * Compose the string representing rows for output
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 436e18fdb5ff..a75cfb360022 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.execution.stat
 
+import java.util.Locale
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.expressions.{Cast, CreateArray, Expression, GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, GenericInternalRow, GetArrayItem, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.util.{usePrettyExpression, QuantileSummaries}
+import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -228,90 +231,68 @@ object StatFunctions extends Logging {
     val defaultStatistics = Seq("count", "mean", "stddev", "min", "25%", "50%", "75%", "max")
     val selectedStatistics = if (statistics.nonEmpty) statistics else defaultStatistics
 
-    val hasPercentiles = selectedStatistics.exists(_.endsWith("%"))
-    val (percentiles, percentileNames, remainingAggregates) = if (hasPercentiles) {
-      val (pStrings, rest) = selectedStatistics.partition(a => a.endsWith("%"))
-      val percentiles = pStrings.map { p =>
-        try {
-          p.stripSuffix("%").toDouble / 100.0
-        } catch {
-          case e: NumberFormatException =>
-            throw new IllegalArgumentException(s"Unable to parse $p as a percentile", e)
-        }
+    val percentiles = selectedStatistics.filter(a => a.endsWith("%")).map { p =>
+      try {
+        p.stripSuffix("%").toDouble / 100.0
+      } catch {
+        case e: NumberFormatException =>
+          throw new IllegalArgumentException(s"Unable to parse $p as a percentile", e)
       }
-      require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in the range [0, 1]")
-      (percentiles, pStrings, rest)
-    } else {
-      (Seq(), Seq(), selectedStatistics)
     }
+    require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in the range [0, 1]")
 
-
-    // The list of summary statistics to compute, in the form of expressions.
-    val availableStatistics = Map[String, Expression => Expression](
-      "count" -> ((child: Expression) => Count(child).toAggregateExpression()),
-      "mean" -> ((child: Expression) => Average(child).toAggregateExpression()),
-      "stddev" -> ((child: Expression) => StddevSamp(child).toAggregateExpression()),
-      "min" -> ((child: Expression) => Min(child).toAggregateExpression()),
-      "max" -> ((child: Expression) => Max(child).toAggregateExpression()))
-
-    val statisticFns = remainingAggregates.map { agg =>
-      require(availableStatistics.contains(agg), s"$agg is not a recognised statistic")
-      agg -> availableStatistics(agg)
-    }
-
-    def percentileAgg(child: Expression): Expression =
-      new ApproximatePercentile(child, CreateArray(percentiles.map(Literal(_))))
-        .toAggregateExpression()
-
-    val outputCols = ds.aggregatableColumns.map(usePrettyExpression(_).sql).toList
-
-    val ret: Seq[Row] = if (outputCols.nonEmpty) {
-      var aggExprs = statisticFns.toList.flatMap { case (_, colToAgg) =>
-        outputCols.map(c => Column(Cast(colToAgg(Column(c).expr), StringType)).as(c))
-      }
-      if (hasPercentiles) {
-        aggExprs = outputCols.map(c => Column(percentileAgg(Column(c).expr)).as(c)) ++ aggExprs
+    var percentileIndex = 0
+    val statisticFns = selectedStatistics.map { stats =>
+      if (stats.endsWith("%")) {
+        val index = percentileIndex
+        percentileIndex += 1
+        (child: Expression) =>
+          GetArrayItem(
+            new ApproximatePercentile(child, Literal.create(percentiles)).toAggregateExpression(),
+            Literal(index))
+      } else {
+        stats.toLowerCase(Locale.ROOT) match {
+          case "count" => (child: Expression) => Count(child).toAggregateExpression()
+          case "mean" => (child: Expression) => Average(child).toAggregateExpression()
+          case "stddev" => (child: Expression) => StddevSamp(child).toAggregateExpression()
+          case "min" => (child: Expression) => Min(child).toAggregateExpression()
+          case "max" => (child: Expression) => Max(child).toAggregateExpression()
+          case _ => throw new IllegalArgumentException(s"$stats is not a recognised statistic")
+        }
       }
+    }
 
-      val row = ds.groupBy().agg(aggExprs.head, aggExprs.tail: _*).head().toSeq
+    val selectedCols = ds.logicalPlan.output
+      .filter(a => a.dataType.isInstanceOf[NumericType] || a.dataType.isInstanceOf[StringType])
 
-      // Pivot the data so each summary is one row
-      val grouped: Seq[Seq[Any]] = row.grouped(outputCols.size).toSeq
+    val aggExprs = statisticFns.flatMap { func =>
+      selectedCols.map(c => Column(Cast(func(c), StringType)).as(c.name))
+    }
 
-      val basicStats = if (hasPercentiles) grouped.tail else grouped
+    // If there is no selected columns, we don't need to run this aggregate, so make it a lazy val.
+    lazy val aggResult = ds.select(aggExprs: _*).queryExecution.toRdd.collect().head
 
-      val rows = basicStats.zip(statisticFns).map { case (aggregation, (statistic, _)) =>
-        Row(statistic :: aggregation.toList: _*)
-      }
+    // We will have one row for each selected statistic in the result.
+    val result = Array.fill[InternalRow](selectedStatistics.length) {
+      // each row has the statistic name, and statistic values of each selected column.
+      new GenericInternalRow(selectedCols.length + 1)
+    }
 
-      if (hasPercentiles) {
-        def nullSafeString(x: Any) = if (x == null) null else x.toString
-        val percentileRows = grouped.head
-          .map {
-            case a: Seq[Any] => a
-            case _ => Seq.fill(percentiles.length)(null: Any)
-          }
-          .transpose
-          .zip(percentileNames)
-          .map { case (values: Seq[Any], name) =>
-            Row(name :: values.map(nullSafeString).toList: _*)
-          }
-        (rows ++ percentileRows)
-          .sortWith((left, right) =>
-            selectedStatistics.indexOf(left(0)) < selectedStatistics.indexOf(right(0)))
-      } else {
-        rows
+    var rowIndex = 0
+    while (rowIndex < result.length) {
+      val statsName = selectedStatistics(rowIndex)
+      result(rowIndex).update(0, UTF8String.fromString(statsName))
+      for (colIndex <- selectedCols.indices) {
+        val statsValue = aggResult.getUTF8String(rowIndex * selectedCols.length + colIndex)
+        result(rowIndex).update(colIndex + 1, statsValue)
       }
-    } else {
-      // If there are no output columns, just output a single column that contains the stats.
-      selectedStatistics.map(Row(_))
+      rowIndex += 1
     }
 
     // All columns are string type
-    val schema = StructType(
-      StructField("summary", StringType) :: outputCols.map(StructField(_, StringType))).toAttributes
-    // `toArray` forces materialization to make the seq serializable
-    Dataset.ofRows(ds.sparkSession, LocalRelation.fromExternalRows(schema, ret.toArray.toSeq))
-  }
+    val output = AttributeReference("summary", StringType)() +:
+      selectedCols.map(c => AttributeReference(c.name, StringType)())
 
+    Dataset.ofRows(ds.sparkSession, LocalRelation(output, result))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 2c7051bf431c..b2219b4eb8c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -770,7 +770,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     val fooE = intercept[IllegalArgumentException] {
       person2.summary("foo")
     }
-    assert(fooE.getMessage === "requirement failed: foo is not a recognised statistic")
+    assert(fooE.getMessage === "foo is not a recognised statistic")
 
     val parseE = intercept[IllegalArgumentException] {
       person2.summary("foo%")

From 96d58f285bc98d4c2484150eefe7447db4784a86 Mon Sep 17 00:00:00 2001
From: Eric Vandenberg <ericvandenberg@fb.com>
Date: Mon, 10 Jul 2017 14:40:20 +0800
Subject: [PATCH 088/125] [SPARK-21219][CORE] Task retry occurs on same
 executor due to race condition with blacklisting

## What changes were proposed in this pull request?

There's a race condition in the current TaskSetManager where a failed task is added for retry (addPendingTask), and can asynchronously be assigned to an executor *prior* to the blacklist state (updateBlacklistForFailedTask), the result is the task might re-execute on the same executor.  This is particularly problematic if the executor is shutting down since the retry task immediately becomes a lost task (ExecutorLostFailure).  Another side effect is that the actual failure reason gets obscured by the retry task which never actually executed.  There are sample logs showing the issue in the https://issues.apache.org/jira/browse/SPARK-21219

The fix is to change the ordering of the addPendingTask and updatingBlackListForFailedTask calls in TaskSetManager.handleFailedTask

## How was this patch tested?

Implemented a unit test that verifies the task is black listed before it is added to the pending task.  Ran the unit test without the fix and it fails.  Ran the unit test with the fix and it passes.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Eric Vandenberg <ericvandenberg@fb.com>

Closes #18427 from ericvandenbergfb/blacklistFix.
---
 .../spark/scheduler/TaskSetManager.scala      | 21 ++++-----
 .../spark/scheduler/TaskSetManagerSuite.scala | 44 ++++++++++++++++++-
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 02d374dc37cd..3968fb7e6356 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -198,7 +198,7 @@ private[spark] class TaskSetManager(
   private[scheduler] var emittedTaskSizeWarning = false
 
   /** Add a task to all the pending-task lists that it should be on. */
-  private def addPendingTask(index: Int) {
+  private[spark] def addPendingTask(index: Int) {
     for (loc <- tasks(index).preferredLocations) {
       loc match {
         case e: ExecutorCacheTaskLocation =>
@@ -832,15 +832,6 @@ private[spark] class TaskSetManager(
 
     sched.dagScheduler.taskEnded(tasks(index), reason, null, accumUpdates, info)
 
-    if (successful(index)) {
-      logInfo(s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, but the task will not" +
-        s" be re-executed (either because the task failed with a shuffle data fetch failure," +
-        s" so the previous stage needs to be re-run, or because a different copy of the task" +
-        s" has already succeeded).")
-    } else {
-      addPendingTask(index)
-    }
-
     if (!isZombie && reason.countTowardsTaskFailures) {
       taskSetBlacklistHelperOpt.foreach(_.updateBlacklistForFailedTask(
         info.host, info.executorId, index))
@@ -854,6 +845,16 @@ private[spark] class TaskSetManager(
         return
       }
     }
+
+    if (successful(index)) {
+      logInfo(s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, but the task will not" +
+        s" be re-executed (either because the task failed with a shuffle data fetch failure," +
+        s" so the previous stage needs to be re-run, or because a different copy of the task" +
+        s" has already succeeded).")
+    } else {
+      addPendingTask(index)
+    }
+
     maybeFinishTaskSet()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 80fb67472581..e46900e4e504 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.Matchers.{any, anyInt, anyString}
-import org.mockito.Mockito.{mock, never, spy, verify, when}
+import org.mockito.Mockito.{mock, never, spy, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 
@@ -1172,6 +1172,48 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     assert(blacklistTracker.isNodeBlacklisted("host1"))
   }
 
+  test("update blacklist before adding pending task to avoid race condition") {
+    // When a task fails, it should apply the blacklist policy prior to
+    // retrying the task otherwise there's a race condition where run on
+    // the same executor that it was intended to be black listed from.
+    val conf = new SparkConf().
+      set(config.BLACKLIST_ENABLED, true)
+
+    // Create a task with two executors.
+    sc = new SparkContext("local", "test", conf)
+    val exec = "executor1"
+    val host = "host1"
+    val exec2 = "executor2"
+    val host2 = "host2"
+    sched = new FakeTaskScheduler(sc, (exec, host), (exec2, host2))
+    val taskSet = FakeTask.createTaskSet(1)
+
+    val clock = new ManualClock
+    val mockListenerBus = mock(classOf[LiveListenerBus])
+    val blacklistTracker = new BlacklistTracker(mockListenerBus, conf, None, clock)
+    val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(blacklistTracker))
+    val taskSetManagerSpy = spy(taskSetManager)
+
+    val taskDesc = taskSetManagerSpy.resourceOffer(exec, host, TaskLocality.ANY)
+
+    // Assert the task has been black listed on the executor it was last executed on.
+    when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
+      new Answer[Unit] {
+        override def answer(invocationOnMock: InvocationOnMock): Unit = {
+          val task = invocationOnMock.getArgumentAt(0, classOf[Int])
+          assert(taskSetManager.taskSetBlacklistHelperOpt.get.
+            isExecutorBlacklistedForTask(exec, task))
+        }
+      }
+    )
+
+    // Simulate a fake exception
+    val e = new ExceptionFailure("a", "b", Array(), "c", None)
+    taskSetManagerSpy.handleFailedTask(taskDesc.get.taskId, TaskState.FAILED, e)
+
+    verify(taskSetManagerSpy, times(1)).addPendingTask(anyInt())
+  }
+
   private def createTaskResult(
       id: Int,
       accumUpdates: Seq[AccumulatorV2[_, _]] = Seq.empty): DirectTaskResult[Int] = {

From c444d10868c808f4ae43becd5506bf944d9c2e9b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 10 Jul 2017 07:46:47 +0100
Subject: [PATCH 089/125] [MINOR][DOC] Remove obsolete `ec2-scripts.md`

## What changes were proposed in this pull request?

Since this document became obsolete, we had better remove this for Apache Spark 2.3.0. The original document is removed via SPARK-12735 on January 2016, and currently it's just redirection page. The only reference in Apache Spark website will go directly to the destination in https://github.com/apache/spark-website/pull/54.

## How was this patch tested?

N/A. This is a removal of documentation.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18578 from dongjoon-hyun/SPARK-REMOVE-EC2.
---
 docs/ec2-scripts.md | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 docs/ec2-scripts.md

diff --git a/docs/ec2-scripts.md b/docs/ec2-scripts.md
deleted file mode 100644
index 6cd39dbed055..000000000000
--- a/docs/ec2-scripts.md
+++ /dev/null
@@ -1,7 +0,0 @@
----		
-layout: global
-title: Running Spark on EC2
-redirect: https://github.com/amplab/spark-ec2#readme
----
-
-This document has been superseded and replaced by documentation at https://github.com/amplab/spark-ec2#readme

From 647963a26a2d4468ebd9b68111ebe68bee501fde Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 10 Jul 2017 15:58:34 +0800
Subject: [PATCH 090/125] [SPARK-20460][SQL] Make it more consistent to handle
 column name duplication

## What changes were proposed in this pull request?
This pr made it more consistent to handle column name duplication. In the current master, error handling is different when hitting column name duplication:
```
// json
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq("""{"a":1, "a":1}"""""").toDF().coalesce(1).write.mode("overwrite").text("/tmp/data")
scala> spark.read.format("json").schema(schema).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Reference 'a' is ambiguous, could be: a#12, a#13.;
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:287)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:181)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:153)

scala> spark.read.format("json").load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Duplicate column(s) : "a" found, cannot save to JSON format;
  at org.apache.spark.sql.execution.datasources.json.JsonDataSource.checkConstraints(JsonDataSource.scala:81)
  at org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:63)
  at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:57)
  at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:176)
  at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$7.apply(DataSource.scala:176)

// csv
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq("a,a", "1,1").toDF().coalesce(1).write.mode("overwrite").text("/tmp/data")
scala> spark.read.format("csv").schema(schema).option("header", false).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Reference 'a' is ambiguous, could be: a#41, a#42.;
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:287)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:181)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:153)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:152)

// If `inferSchema` is true, a CSV format is duplicate-safe (See SPARK-16896)
scala> spark.read.format("csv").option("header", true).load("/tmp/data").show
+---+---+
| a0| a1|
+---+---+
|  1|  1|
+---+---+

// parquet
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq((1, 1)).toDF("a", "b").coalesce(1).write.mode("overwrite").parquet("/tmp/data")
scala> spark.read.format("parquet").schema(schema).option("header", false).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Reference 'a' is ambiguous, could be: a#110, a#111.;
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:287)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:181)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:153)
  at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolve$1.apply(LogicalPlan.scala:152)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
```
When this patch applied, the results change to;
```

// json
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq("""{"a":1, "a":1}"""""").toDF().coalesce(1).write.mode("overwrite").text("/tmp/data")
scala> spark.read.format("json").schema(schema).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Found duplicate column(s) in datasource: "a";
  at org.apache.spark.sql.util.SchemaUtils$.checkColumnNameDuplication(SchemaUtil.scala:47)
  at org.apache.spark.sql.util.SchemaUtils$.checkSchemaColumnNameDuplication(SchemaUtil.scala:33)
  at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:186)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:368)

scala> spark.read.format("json").load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Found duplicate column(s) in datasource: "a";
  at org.apache.spark.sql.util.SchemaUtils$.checkColumnNameDuplication(SchemaUtil.scala:47)
  at org.apache.spark.sql.util.SchemaUtils$.checkSchemaColumnNameDuplication(SchemaUtil.scala:33)
  at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:186)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:368)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:156)

// csv
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq("a,a", "1,1").toDF().coalesce(1).write.mode("overwrite").text("/tmp/data")
scala> spark.read.format("csv").schema(schema).option("header", false).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Found duplicate column(s) in datasource: "a";
  at org.apache.spark.sql.util.SchemaUtils$.checkColumnNameDuplication(SchemaUtil.scala:47)
  at org.apache.spark.sql.util.SchemaUtils$.checkSchemaColumnNameDuplication(SchemaUtil.scala:33)
  at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:186)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:368)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)

scala> spark.read.format("csv").option("header", true).load("/tmp/data").show
+---+---+
| a0| a1|
+---+---+
|  1|  1|
+---+---+

// parquet
scala> val schema = StructType(StructField("a", IntegerType) :: StructField("a", IntegerType) :: Nil)
scala> Seq((1, 1)).toDF("a", "b").coalesce(1).write.mode("overwrite").parquet("/tmp/data")
scala> spark.read.format("parquet").schema(schema).option("header", false).load("/tmp/data").show
org.apache.spark.sql.AnalysisException: Found duplicate column(s) in datasource: "a";
  at org.apache.spark.sql.util.SchemaUtils$.checkColumnNameDuplication(SchemaUtil.scala:47)
  at org.apache.spark.sql.util.SchemaUtils$.checkSchemaColumnNameDuplication(SchemaUtil.scala:33)
  at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:186)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:368)
```

## How was this patch tested?
Added tests in `DataFrameReaderWriterSuite` and `SQLQueryTestSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17758 from maropu/SPARK-20460.
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 16 +---
 .../apache/spark/sql/util/SchemaUtils.scala   | 58 +++++++++---
 .../spark/sql/util/SchemaUtilsSuite.scala     | 83 +++++++++++++++++
 .../command/createDataSourceTables.scala      |  2 -
 .../spark/sql/execution/command/tables.scala  |  8 +-
 .../spark/sql/execution/command/views.scala   |  9 +-
 .../execution/datasources/DataSource.scala    | 43 +++++++--
 .../InsertIntoHadoopFsRelationCommand.scala   | 14 ++-
 .../datasources/PartitioningUtils.scala       | 10 +--
 .../datasources/jdbc/JdbcUtils.scala          | 11 +--
 .../datasources/json/JsonDataSource.scala     | 15 +---
 .../sql/execution/datasources/rules.scala     | 36 ++++----
 .../org/apache/spark/sql/DataFrameSuite.scala |  4 +-
 .../sql/execution/command/DDLSuite.scala      | 56 ++++++++----
 .../spark/sql/jdbc/JDBCWriteSuite.scala       |  4 +-
 .../sql/sources/ResolvedDataSourceSuite.scala |  5 +-
 .../sql/streaming/FileStreamSinkSuite.scala   | 37 ++++++++
 .../sql/test/DataFrameReaderWriterSuite.scala | 88 +++++++++++++++++++
 .../sql/hive/execution/HiveDDLSuite.scala     |  2 +-
 19 files changed, 382 insertions(+), 119 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c40d5f6031a2..b44d2ee69e1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 object SessionCatalog {
   val DEFAULT_DATABASE = "default"
@@ -188,19 +188,6 @@ class SessionCatalog(
     }
   }
 
-  private def checkDuplication(fields: Seq[StructField]): Unit = {
-    val columnNames = if (conf.caseSensitiveAnalysis) {
-      fields.map(_.name)
-    } else {
-      fields.map(_.name.toLowerCase)
-    }
-    if (columnNames.distinct.length != columnNames.length) {
-      val duplicateColumns = columnNames.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => x
-      }
-      throw new AnalysisException(s"Found duplicate column(s): ${duplicateColumns.mkString(", ")}")
-    }
-  }
   // ----------------------------------------------------------------------------
   // Databases
   // ----------------------------------------------------------------------------
@@ -353,7 +340,6 @@ class SessionCatalog(
     val tableIdentifier = TableIdentifier(table, Some(db))
     requireDbExists(db)
     requireTableExists(tableIdentifier)
-    checkDuplication(newSchema)
 
     val catalogTable = externalCatalog.getTable(db, table)
     val oldSchema = catalogTable.schema
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index e881685ce626..41ca270095ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.util
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.types.StructType
 
 
 /**
@@ -25,29 +27,63 @@ import org.apache.spark.internal.Logging
  *
  * TODO: Merge this file with [[org.apache.spark.ml.util.SchemaUtils]].
  */
-private[spark] object SchemaUtils extends Logging {
+private[spark] object SchemaUtils {
 
   /**
-   * Checks if input column names have duplicate identifiers. Prints a warning message if
+   * Checks if an input schema has duplicate column names. This throws an exception if the
+   * duplication exists.
+   *
+   * @param schema schema to check
+   * @param colType column type name, used in an exception message
+   * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not
+   */
+  def checkSchemaColumnNameDuplication(
+      schema: StructType, colType: String, caseSensitiveAnalysis: Boolean = false): Unit = {
+    checkColumnNameDuplication(schema.map(_.name), colType, caseSensitiveAnalysis)
+  }
+
+  // Returns true if a given resolver is case-sensitive
+  private def isCaseSensitiveAnalysis(resolver: Resolver): Boolean = {
+    if (resolver == caseSensitiveResolution) {
+      true
+    } else if (resolver == caseInsensitiveResolution) {
+      false
+    } else {
+      sys.error("A resolver to check if two identifiers are equal must be " +
+        "`caseSensitiveResolution` or `caseInsensitiveResolution` in o.a.s.sql.catalyst.")
+    }
+  }
+
+  /**
+   * Checks if input column names have duplicate identifiers. This throws an exception if
    * the duplication exists.
    *
    * @param columnNames column names to check
-   * @param colType column type name, used in a warning message
+   * @param colType column type name, used in an exception message
+   * @param resolver resolver used to determine if two identifiers are equal
+   */
+  def checkColumnNameDuplication(
+      columnNames: Seq[String], colType: String, resolver: Resolver): Unit = {
+    checkColumnNameDuplication(columnNames, colType, isCaseSensitiveAnalysis(resolver))
+  }
+
+  /**
+   * Checks if input column names have duplicate identifiers. This throws an exception if
+   * the duplication exists.
+   *
+   * @param columnNames column names to check
+   * @param colType column type name, used in an exception message
    * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not
    */
   def checkColumnNameDuplication(
       columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = {
-    val names = if (caseSensitiveAnalysis) {
-      columnNames
-    } else {
-      columnNames.map(_.toLowerCase)
-    }
+    val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase)
     if (names.distinct.length != names.length) {
       val duplicateColumns = names.groupBy(identity).collect {
         case (x, ys) if ys.length > 1 => s"`$x`"
       }
-      logWarning(s"Found duplicate column(s) $colType: ${duplicateColumns.mkString(", ")}. " +
-        "You might need to assign different column names.")
+      throw new AnalysisException(
+        s"Found duplicate column(s) $colType: ${duplicateColumns.mkString(", ")}")
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
new file mode 100644
index 000000000000..a25be2fe61db
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.types.StructType
+
+class SchemaUtilsSuite extends SparkFunSuite {
+
+  private def resolver(caseSensitiveAnalysis: Boolean): Resolver = {
+    if (caseSensitiveAnalysis) {
+      caseSensitiveResolution
+    } else {
+      caseInsensitiveResolution
+    }
+  }
+
+  Seq((true, ("a", "a"), ("b", "b")), (false, ("a", "A"), ("b", "B"))).foreach {
+      case (caseSensitive, (a0, a1), (b0, b1)) =>
+
+    val testType = if (caseSensitive) "case-sensitive" else "case-insensitive"
+    test(s"Check column name duplication in $testType cases") {
+      def checkExceptionCases(schemaStr: String, duplicatedColumns: Seq[String]): Unit = {
+        val expectedErrorMsg = "Found duplicate column(s) in SchemaUtilsSuite: " +
+          duplicatedColumns.map(c => s"`${c.toLowerCase}`").mkString(", ")
+        val schema = StructType.fromDDL(schemaStr)
+        var msg = intercept[AnalysisException] {
+          SchemaUtils.checkSchemaColumnNameDuplication(
+            schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+        }.getMessage
+        assert(msg.contains(expectedErrorMsg))
+        msg = intercept[AnalysisException] {
+          SchemaUtils.checkColumnNameDuplication(
+            schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
+        }.getMessage
+        assert(msg.contains(expectedErrorMsg))
+        msg = intercept[AnalysisException] {
+          SchemaUtils.checkColumnNameDuplication(
+            schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+        }.getMessage
+        assert(msg.contains(expectedErrorMsg))
+      }
+
+      checkExceptionCases(s"$a0 INT, b INT, $a1 INT", a0 :: Nil)
+      checkExceptionCases(s"$a0 INT, b INT, $a1 INT, $a0 INT", a0 :: Nil)
+      checkExceptionCases(s"$a0 INT, $b0 INT, $a1 INT, $a0 INT, $b1 INT", b0 :: a0 :: Nil)
+    }
+  }
+
+  test("Check no exception thrown for valid schemas") {
+    def checkNoExceptionCases(schemaStr: String, caseSensitive: Boolean): Unit = {
+      val schema = StructType.fromDDL(schemaStr)
+      SchemaUtils.checkSchemaColumnNameDuplication(
+        schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+      SchemaUtils.checkColumnNameDuplication(
+        schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
+      SchemaUtils.checkColumnNameDuplication(
+        schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+    }
+
+    checkNoExceptionCases("a INT, b INT, c INT", caseSensitive = true)
+    checkNoExceptionCases("Aa INT, b INT, aA INT", caseSensitive = true)
+
+    checkNoExceptionCases("a INT, b INT, c INT", caseSensitive = false)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 729bd39d821c..04b2534ca5eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.execution.command
 
 import java.net.URI
 
-import org.apache.hadoop.fs.Path
-
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 8ded1060f7bf..fa50d1272241 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -20,13 +20,11 @@ package org.apache.spark.sql.execution.command
 import java.io.File
 import java.net.URI
 import java.nio.file.FileSystems
-import java.util.Date
 
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 import scala.util.Try
 
-import org.apache.commons.lang3.StringEscapeUtils
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
@@ -42,6 +40,7 @@ import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.Utils
 
 /**
@@ -202,6 +201,11 @@ case class AlterTableAddColumnsCommand(
 
     // make sure any partition columns are at the end of the fields
     val reorderedSchema = catalogTable.dataSchema ++ columns ++ catalogTable.partitionSchema
+
+    SchemaUtils.checkColumnNameDuplication(
+      reorderedSchema.map(_.name), "in the table definition of " + table.identifier,
+      conf.caseSensitiveAnalysis)
+
     catalog.alterTableSchema(
       table, catalogTable.schema.copy(fields = reorderedSchema.toArray))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index a6d56ca91a3e..ffdfd527fa70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.types.MetadataBuilder
+import org.apache.spark.sql.util.SchemaUtils
 
 
 /**
@@ -355,15 +356,15 @@ object ViewHelper {
       properties: Map[String, String],
       session: SparkSession,
       analyzedPlan: LogicalPlan): Map[String, String] = {
+    val queryOutput = analyzedPlan.schema.fieldNames
+
     // Generate the query column names, throw an AnalysisException if there exists duplicate column
     // names.
-    val queryOutput = analyzedPlan.schema.fieldNames
-    assert(queryOutput.distinct.size == queryOutput.size,
-      s"The view output ${queryOutput.mkString("(", ",", ")")} contains duplicate column name.")
+    SchemaUtils.checkColumnNameDuplication(
+      queryOutput, "in the view definition", session.sessionState.conf.resolver)
 
     // Generate the view default database name.
     val viewDefaultDatabase = session.sessionState.catalog.getCurrentDatabase
-
     removeQueryColumnNames(properties) ++
       generateViewDefaultDatabase(viewDefaultDatabase) ++
       generateQueryColumnNames(queryOutput)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 75e530607570..d36a04f1fff8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -87,6 +87,14 @@ case class DataSource(
   lazy val providingClass: Class[_] = DataSource.lookupDataSource(className)
   lazy val sourceInfo: SourceInfo = sourceSchema()
   private val caseInsensitiveOptions = CaseInsensitiveMap(options)
+  private val equality = sparkSession.sessionState.conf.resolver
+
+  bucketSpec.map { bucket =>
+    SchemaUtils.checkColumnNameDuplication(
+      bucket.bucketColumnNames, "in the bucket definition", equality)
+    SchemaUtils.checkColumnNameDuplication(
+      bucket.sortColumnNames, "in the sort definition", equality)
+  }
 
   /**
    * Get the schema of the given FileFormat, if provided by `userSpecifiedSchema`, or try to infer
@@ -132,7 +140,6 @@ case class DataSource(
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
       // columns properly unless it is a Hive DataSource
       val resolved = tempFileIndex.partitionSchema.map { partitionField =>
-        val equality = sparkSession.sessionState.conf.resolver
         // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
         userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
           partitionField)
@@ -146,7 +153,6 @@ case class DataSource(
         inferredPartitions
       } else {
         val partitionFields = partitionColumns.map { partitionColumn =>
-          val equality = sparkSession.sessionState.conf.resolver
           userSpecifiedSchema.flatMap(_.find(c => equality(c.name, partitionColumn))).orElse {
             val inferredPartitions = tempFileIndex.partitionSchema
             val inferredOpt = inferredPartitions.find(p => equality(p.name, partitionColumn))
@@ -172,7 +178,6 @@ case class DataSource(
     }
 
     val dataSchema = userSpecifiedSchema.map { schema =>
-      val equality = sparkSession.sessionState.conf.resolver
       StructType(schema.filterNot(f => partitionSchema.exists(p => equality(p.name, f.name))))
     }.orElse {
       format.inferSchema(
@@ -184,9 +189,18 @@ case class DataSource(
         s"Unable to infer schema for $format. It must be specified manually.")
     }
 
-    SchemaUtils.checkColumnNameDuplication(
-      (dataSchema ++ partitionSchema).map(_.name), "in the data schema and the partition schema",
-      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    // We just print a waring message if the data schema and partition schema have the duplicate
+    // columns. This is because we allow users to do so in the previous Spark releases and
+    // we have the existing tests for the cases (e.g., `ParquetHadoopFsRelationSuite`).
+    // See SPARK-18108 and SPARK-21144 for related discussions.
+    try {
+      SchemaUtils.checkColumnNameDuplication(
+        (dataSchema ++ partitionSchema).map(_.name),
+        "in the data schema and the partition schema",
+        equality)
+    } catch {
+      case e: AnalysisException => logWarning(e.getMessage)
+    }
 
     (dataSchema, partitionSchema)
   }
@@ -391,6 +405,23 @@ case class DataSource(
           s"$className is not a valid Spark SQL Data Source.")
     }
 
+    relation match {
+      case hs: HadoopFsRelation =>
+        SchemaUtils.checkColumnNameDuplication(
+          hs.dataSchema.map(_.name),
+          "in the data schema",
+          equality)
+        SchemaUtils.checkColumnNameDuplication(
+          hs.partitionSchema.map(_.name),
+          "in the partition schema",
+          equality)
+      case _ =>
+        SchemaUtils.checkColumnNameDuplication(
+          relation.schema.map(_.name),
+          "in the data schema",
+          equality)
+    }
+
     relation
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 0031567d3d28..c1bcfb861078 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -21,7 +21,6 @@ import java.io.IOException
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.SparkContext
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition}
@@ -30,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * A command for writing data to a [[HadoopFsRelation]].  Supports both overwriting and appending.
@@ -64,13 +63,10 @@ case class InsertIntoHadoopFsRelationCommand(
     assert(children.length == 1)
 
     // Most formats don't do well with duplicate columns, so lets not allow that
-    if (query.schema.fieldNames.length != query.schema.fieldNames.distinct.length) {
-      val duplicateColumns = query.schema.fieldNames.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => "\"" + x + "\""
-      }.mkString(", ")
-      throw new AnalysisException(s"Duplicate column(s): $duplicateColumns found, " +
-        "cannot save to file.")
-    }
+    SchemaUtils.checkSchemaColumnNameDuplication(
+      query.schema,
+      s"when inserting into $outputPath",
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options)
     val fs = outputPath.getFileSystem(hadoopConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index f61c673baaa5..92358da6d6c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.SchemaUtils
 
 // TODO: We should tighten up visibility of the classes here once we clean up Hive coupling.
 
@@ -301,13 +302,8 @@ object PartitioningUtils {
       normalizedKey -> value
     }
 
-    if (normalizedPartSpec.map(_._1).distinct.length != normalizedPartSpec.length) {
-      val duplicateColumns = normalizedPartSpec.map(_._1).groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => x
-      }
-      throw new AnalysisException(s"Found duplicated columns in partition specification: " +
-        duplicateColumns.mkString(", "))
-    }
+    SchemaUtils.checkColumnNameDuplication(
+      normalizedPartSpec.map(_._1), "in the partition schema", resolver)
 
     normalizedPartSpec.toMap
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 55b2539c1338..bbe9024f13a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.NextIterator
 
@@ -749,14 +750,8 @@ object JdbcUtils extends Logging {
     val nameEquality = df.sparkSession.sessionState.conf.resolver
 
     // checks duplicate columns in the user specified column types.
-    userSchema.fieldNames.foreach { col =>
-      val duplicatesCols = userSchema.fieldNames.filter(nameEquality(_, col))
-      if (duplicatesCols.size >= 2) {
-        throw new AnalysisException(
-          "Found duplicate column(s) in createTableColumnTypes option value: " +
-            duplicatesCols.mkString(", "))
-      }
-    }
+    SchemaUtils.checkColumnNameDuplication(
+      userSchema.map(_.name), "in the createTableColumnTypes option value", nameEquality)
 
     // checks if user specified column names exist in the DataFrame schema
     userSchema.fieldNames.foreach { col =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 5a92a71d19e7..8b7c2709afde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -59,9 +59,7 @@ abstract class JsonDataSource extends Serializable {
       inputPaths: Seq[FileStatus],
       parsedOptions: JSONOptions): Option[StructType] = {
     if (inputPaths.nonEmpty) {
-      val jsonSchema = infer(sparkSession, inputPaths, parsedOptions)
-      checkConstraints(jsonSchema)
-      Some(jsonSchema)
+      Some(infer(sparkSession, inputPaths, parsedOptions))
     } else {
       None
     }
@@ -71,17 +69,6 @@ abstract class JsonDataSource extends Serializable {
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
       parsedOptions: JSONOptions): StructType
-
-  /** Constraints to be imposed on schema to be stored. */
-  private def checkConstraints(schema: StructType): Unit = {
-    if (schema.fieldNames.length != schema.fieldNames.distinct.length) {
-      val duplicateColumns = schema.fieldNames.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => "\"" + x + "\""
-      }.mkString(", ")
-      throw new AnalysisException(s"Duplicate column(s) : $duplicateColumns found, " +
-        s"cannot save to JSON format")
-    }
-  }
 }
 
 object JsonDataSource {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 3f4a78580f1e..41d40aa926fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{AtomicType, StructType}
+import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * Try to replaces [[UnresolvedRelation]]s if the plan is for direct query on files.
@@ -222,12 +223,10 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
   }
 
   private def normalizeCatalogTable(schema: StructType, table: CatalogTable): CatalogTable = {
-    val columnNames = if (sparkSession.sessionState.conf.caseSensitiveAnalysis) {
-      schema.map(_.name)
-    } else {
-      schema.map(_.name.toLowerCase)
-    }
-    checkDuplication(columnNames, "table definition of " + table.identifier)
+    SchemaUtils.checkSchemaColumnNameDuplication(
+      schema,
+      "in the table definition of " + table.identifier,
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
     val normalizedPartCols = normalizePartitionColumns(schema, table)
     val normalizedBucketSpec = normalizeBucketSpec(schema, table)
@@ -253,7 +252,10 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       partCols = table.partitionColumnNames,
       resolver = sparkSession.sessionState.conf.resolver)
 
-    checkDuplication(normalizedPartitionCols, "partition")
+    SchemaUtils.checkColumnNameDuplication(
+      normalizedPartitionCols,
+      "in the partition schema",
+      sparkSession.sessionState.conf.resolver)
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
       if (DDLUtils.isHiveTable(table)) {
@@ -283,8 +285,15 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
           tableCols = schema.map(_.name),
           bucketSpec = bucketSpec,
           resolver = sparkSession.sessionState.conf.resolver)
-        checkDuplication(normalizedBucketSpec.bucketColumnNames, "bucket")
-        checkDuplication(normalizedBucketSpec.sortColumnNames, "sort")
+
+        SchemaUtils.checkColumnNameDuplication(
+          normalizedBucketSpec.bucketColumnNames,
+          "in the bucket definition",
+          sparkSession.sessionState.conf.resolver)
+        SchemaUtils.checkColumnNameDuplication(
+          normalizedBucketSpec.sortColumnNames,
+          "in the sort definition",
+          sparkSession.sessionState.conf.resolver)
 
         normalizedBucketSpec.sortColumnNames.map(schema(_)).map(_.dataType).foreach {
           case dt if RowOrdering.isOrderable(dt) => // OK
@@ -297,15 +306,6 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
     }
   }
 
-  private def checkDuplication(colNames: Seq[String], colType: String): Unit = {
-    if (colNames.distinct.length != colNames.length) {
-      val duplicateColumns = colNames.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => x
-      }
-      failAnalysis(s"Found duplicate column(s) in $colType: ${duplicateColumns.mkString(", ")}")
-    }
-  }
-
   private def failAnalysis(msg: String) = throw new AnalysisException(msg)
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b2219b4eb8c1..a5a2e1c38d30 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1189,7 +1189,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       Seq((1, 2, 3), (2, 3, 4), (3, 4, 5)).toDF("column1", "column2", "column1")
         .write.format("parquet").save("temp")
     }
-    assert(e.getMessage.contains("Duplicate column(s)"))
+    assert(e.getMessage.contains("Found duplicate column(s) when inserting into"))
     assert(e.getMessage.contains("column1"))
     assert(!e.getMessage.contains("column2"))
 
@@ -1199,7 +1199,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
         .toDF("column1", "column2", "column3", "column1", "column3")
         .write.format("json").save("temp")
     }
-    assert(f.getMessage.contains("Duplicate column(s)"))
+    assert(f.getMessage.contains("Found duplicate column(s) when inserting into"))
     assert(f.getMessage.contains("column1"))
     assert(f.getMessage.contains("column3"))
     assert(!f.getMessage.contains("column2"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 5c40d8bb4b1e..5c0a6aa724bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -436,16 +436,13 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("create table - duplicate column names in the table definition") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int, a string) USING json")
-    }
-    assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
-
-    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-      val e2 = intercept[AnalysisException] {
-        sql("CREATE TABLE tbl(a int, A string) USING json")
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING parquet")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the table definition of `t`"))
       }
-      assert(e2.message == "Found duplicate column(s) in table definition of `tbl`: a")
     }
   }
 
@@ -466,17 +463,33 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("create table - column repeated in partition columns") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int) USING json PARTITIONED BY (a, a)")
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING parquet PARTITIONED BY ($c0, $c1)")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the partition schema"))
+      }
     }
-    assert(e.message == "Found duplicate column(s) in partition: a")
   }
 
-  test("create table - column repeated in bucket columns") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int) USING json CLUSTERED BY (a, a) INTO 4 BUCKETS")
+  test("create table - column repeated in bucket/sort columns") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        var errMsg = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING parquet CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the bucket definition"))
+
+        errMsg = intercept[AnalysisException] {
+          sql(s"""
+              |CREATE TABLE t($c0 INT, col INT) USING parquet CLUSTERED BY (col)
+              |  SORTED BY ($c0, $c1) INTO 2 BUCKETS
+             """.stripMargin)
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the sort definition"))
+      }
     }
-    assert(e.message == "Found duplicate column(s) in bucket: a")
   }
 
   test("Refresh table after changing the data source table partitioning") {
@@ -528,6 +541,17 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("create view - duplicate column names in the view definition") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"CREATE VIEW t AS SELECT * FROM VALUES (1, 1) AS t($c0, $c1)")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the view definition"))
+      }
+    }
+  }
+
   test("Alter/Describe Database") {
     val catalog = spark.sessionState.catalog
     val databaseNames = Seq("db1", "`database`")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 92f50a095f19..2334d5ae32dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Date, DriverManager, Timestamp}
+import java.sql.DriverManager
 import java.util.Properties
 
 import scala.collection.JavaConverters.propertiesAsScalaMapConverter
@@ -479,7 +479,7 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
           .jdbc(url1, "TEST.USERDBTYPETEST", properties)
       }.getMessage()
       assert(msg.contains(
-        "Found duplicate column(s) in createTableColumnTypes option value: name, NaMe"))
+        "Found duplicate column(s) in the createTableColumnTypes option value: `name`"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
index 0f97fd78d2ff..308c5079c44b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
@@ -21,11 +21,12 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.test.SharedSQLContext
 
-class ResolvedDataSourceSuite extends SparkFunSuite {
+class ResolvedDataSourceSuite extends SparkFunSuite with SharedSQLContext {
   private def getProvidingClass(name: String): Class[_] =
     DataSource(
-      sparkSession = null,
+      sparkSession = spark,
       className = name,
       options = Map(DateTimeUtils.TIMEZONE_OPTION -> DateTimeUtils.defaultTimeZone().getID)
     ).providingClass
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index bb6a27803bb2..6676099d426b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
@@ -352,4 +353,40 @@ class FileStreamSinkSuite extends StreamTest {
     assertAncestorIsNotMetadataDirectory(s"/a/b/c")
     assertAncestorIsNotMetadataDirectory(s"/a/b/c/${FileStreamSink.metadataDir}extra")
   }
+
+  test("SPARK-20460 Check name duplication in schema") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val inputData = MemoryStream[(Int, Int)]
+        val df = inputData.toDF()
+
+        val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+        val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+        var query: StreamingQuery = null
+        try {
+          query =
+            df.writeStream
+              .option("checkpointLocation", checkpointDir)
+              .format("json")
+              .start(outputDir)
+
+          inputData.addData((1, 1))
+
+          failAfter(streamingTimeout) {
+            query.processAllAvailable()
+          }
+        } finally {
+          if (query != null) {
+            query.stop()
+          }
+        }
+
+        val errorMsg = intercept[AnalysisException] {
+          spark.read.schema(s"$c0 INT, $c1 INT").json(outputDir).as[(Int, Int)]
+        }.getMessage
+        assert(errorMsg.contains("Found duplicate column(s) in the data schema: "))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 306aecb5bbc8..569bac156b53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -687,4 +688,91 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
     testRead(spark.read.schema(userSchemaString).text(dir, dir), data ++ data, userSchema)
     testRead(spark.read.schema(userSchemaString).text(Seq(dir, dir): _*), data ++ data, userSchema)
   }
+
+  test("SPARK-20460 Check name duplication in buckets") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        var errorMsg = intercept[AnalysisException] {
+          Seq((1, 1)).toDF("col", c0).write.bucketBy(2, c0, c1).saveAsTable("t")
+        }.getMessage
+        assert(errorMsg.contains("Found duplicate column(s) in the bucket definition"))
+
+        errorMsg = intercept[AnalysisException] {
+          Seq((1, 1)).toDF("col", c0).write.bucketBy(2, "col").sortBy(c0, c1).saveAsTable("t")
+        }.getMessage
+        assert(errorMsg.contains("Found duplicate column(s) in the sort definition"))
+      }
+    }
+  }
+
+  test("SPARK-20460 Check name duplication in schema") {
+    def checkWriteDataColumnDuplication(
+        format: String, colName0: String, colName1: String, tempDir: File): Unit = {
+      val errorMsg = intercept[AnalysisException] {
+        Seq((1, 1)).toDF(colName0, colName1).write.format(format).mode("overwrite")
+          .save(tempDir.getAbsolutePath)
+      }.getMessage
+      assert(errorMsg.contains("Found duplicate column(s) when inserting into"))
+    }
+
+    def checkReadUserSpecifiedDataColumnDuplication(
+        df: DataFrame, format: String, colName0: String, colName1: String, tempDir: File): Unit = {
+      val testDir = Utils.createTempDir(tempDir.getAbsolutePath)
+      df.write.format(format).mode("overwrite").save(testDir.getAbsolutePath)
+      val errorMsg = intercept[AnalysisException] {
+        spark.read.format(format).schema(s"$colName0 INT, $colName1 INT")
+          .load(testDir.getAbsolutePath)
+      }.getMessage
+      assert(errorMsg.contains("Found duplicate column(s) in the data schema:"))
+    }
+
+    def checkReadPartitionColumnDuplication(
+        format: String, colName0: String, colName1: String, tempDir: File): Unit = {
+      val testDir = Utils.createTempDir(tempDir.getAbsolutePath)
+      Seq(1).toDF("col").write.format(format).mode("overwrite")
+        .save(s"${testDir.getAbsolutePath}/$colName0=1/$colName1=1")
+      val errorMsg = intercept[AnalysisException] {
+        spark.read.format(format).load(testDir.getAbsolutePath)
+      }.getMessage
+      assert(errorMsg.contains("Found duplicate column(s) in the partition schema:"))
+    }
+
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        withTempDir { src =>
+          // Check CSV format
+          checkWriteDataColumnDuplication("csv", c0, c1, src)
+          checkReadUserSpecifiedDataColumnDuplication(
+            Seq((1, 1)).toDF("c0", "c1"), "csv", c0, c1, src)
+          // If `inferSchema` is true, a CSV format is duplicate-safe (See SPARK-16896)
+          var testDir = Utils.createTempDir(src.getAbsolutePath)
+          Seq("a,a", "1,1").toDF().coalesce(1).write.mode("overwrite").text(testDir.getAbsolutePath)
+          val df = spark.read.format("csv").option("inferSchema", true).option("header", true)
+            .load(testDir.getAbsolutePath)
+          checkAnswer(df, Row(1, 1))
+          checkReadPartitionColumnDuplication("csv", c0, c1, src)
+
+          // Check JSON format
+          checkWriteDataColumnDuplication("json", c0, c1, src)
+          checkReadUserSpecifiedDataColumnDuplication(
+            Seq((1, 1)).toDF("c0", "c1"), "json", c0, c1, src)
+          // Inferred schema cases
+          testDir = Utils.createTempDir(src.getAbsolutePath)
+          Seq(s"""{"$c0":3, "$c1":5}""").toDF().write.mode("overwrite")
+            .text(testDir.getAbsolutePath)
+          val errorMsg = intercept[AnalysisException] {
+            spark.read.format("json").option("inferSchema", true).load(testDir.getAbsolutePath)
+          }.getMessage
+          assert(errorMsg.contains("Found duplicate column(s) in the data schema:"))
+          checkReadPartitionColumnDuplication("json", c0, c1, src)
+
+          // Check Parquet format
+          checkWriteDataColumnDuplication("parquet", c0, c1, src)
+          checkReadUserSpecifiedDataColumnDuplication(
+            Seq((1, 1)).toDF("c0", "c1"), "parquet", c0, c1, src)
+          checkReadPartitionColumnDuplication("parquet", c0, c1, src)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 31fa3d244746..12daf3af11ab 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -345,7 +345,7 @@ class HiveDDLSuite
     val e = intercept[AnalysisException] {
       sql("CREATE TABLE tbl(a int) PARTITIONED BY (a string)")
     }
-    assert(e.message == "Found duplicate column(s) in table definition of `default`.`tbl`: a")
+    assert(e.message == "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`")
   }
 
   test("add/drop partition with location - managed table") {

From 6a06c4b03c4dd86241fb9d11b4360371488f0e53 Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Mon, 10 Jul 2017 21:06:58 +0800
Subject: [PATCH 091/125] [SPARK-21342] Fix DownloadCallback to work well with
 RetryingBlockFetcher.

## What changes were proposed in this pull request?

When `RetryingBlockFetcher` retries fetching blocks. There could be two `DownloadCallback`s download the same content to the same target file. It could cause `ShuffleBlockFetcherIterator` reading a partial result.

This pr proposes to create and delete the tmp files in `OneForOneBlockFetcher`

Author: jinxing <jinxing6042@126.com>
Author: Shixiong Zhu <zsxwing@gmail.com>

Closes #18565 from jinxing64/SPARK-21342.
---
 .../shuffle/ExternalShuffleClient.java        |  7 ++--
 .../shuffle/OneForOneBlockFetcher.java        | 34 +++++++++++-------
 .../spark/network/shuffle/ShuffleClient.java  | 13 +++++--
 .../shuffle/TempShuffleFileManager.java       | 36 +++++++++++++++++++
 .../network/sasl/SaslIntegrationSuite.java    |  2 +-
 .../shuffle/OneForOneBlockFetcherSuite.java   |  2 +-
 .../spark/network/BlockTransferService.scala  |  8 ++---
 .../netty/NettyBlockTransferService.scala     |  9 +++--
 .../storage/ShuffleBlockFetcherIterator.scala | 28 ++++++++++-----
 .../spark/storage/BlockManagerSuite.scala     |  4 +--
 .../ShuffleBlockFetcherIteratorSuite.scala    | 10 +++---
 11 files changed, 108 insertions(+), 45 deletions(-)
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
index 6ac9302517ee..31bd24e5038b 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.shuffle;
 
-import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -91,15 +90,15 @@ public void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      File[] shuffleFiles) {
+      TempShuffleFileManager tempShuffleFileManager) {
     checkInit();
     logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
     try {
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
           (blockIds1, listener1) -> {
             TransportClient client = clientFactory.createClient(host, port);
-            new OneForOneBlockFetcher(client, appId, execId, blockIds1, listener1, conf,
-              shuffleFiles).start();
+            new OneForOneBlockFetcher(client, appId, execId,
+              blockIds1, listener1, conf, tempShuffleFileManager).start();
           };
 
       int maxRetries = conf.maxIORetries();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index d46ce2e0e6b7..2f160d12af22 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -57,11 +57,21 @@ public class OneForOneBlockFetcher {
   private final String[] blockIds;
   private final BlockFetchingListener listener;
   private final ChunkReceivedCallback chunkCallback;
-  private TransportConf transportConf = null;
-  private File[] shuffleFiles = null;
+  private final TransportConf transportConf;
+  private final TempShuffleFileManager tempShuffleFileManager;
 
   private StreamHandle streamHandle = null;
 
+  public OneForOneBlockFetcher(
+    TransportClient client,
+    String appId,
+    String execId,
+    String[] blockIds,
+    BlockFetchingListener listener,
+    TransportConf transportConf) {
+    this(client, appId, execId, blockIds, listener, transportConf, null);
+  }
+
   public OneForOneBlockFetcher(
       TransportClient client,
       String appId,
@@ -69,18 +79,14 @@ public OneForOneBlockFetcher(
       String[] blockIds,
       BlockFetchingListener listener,
       TransportConf transportConf,
-      File[] shuffleFiles) {
+      TempShuffleFileManager tempShuffleFileManager) {
     this.client = client;
     this.openMessage = new OpenBlocks(appId, execId, blockIds);
     this.blockIds = blockIds;
     this.listener = listener;
     this.chunkCallback = new ChunkCallback();
     this.transportConf = transportConf;
-    if (shuffleFiles != null) {
-      this.shuffleFiles = shuffleFiles;
-      assert this.shuffleFiles.length == blockIds.length:
-        "Number of shuffle files should equal to blocks";
-    }
+    this.tempShuffleFileManager = tempShuffleFileManager;
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
@@ -119,9 +125,9 @@ public void onSuccess(ByteBuffer response) {
           // Immediately request all chunks -- we expect that the total size of the request is
           // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
           for (int i = 0; i < streamHandle.numChunks; i++) {
-            if (shuffleFiles != null) {
+            if (tempShuffleFileManager != null) {
               client.stream(OneForOneStreamManager.genStreamChunkId(streamHandle.streamId, i),
-                new DownloadCallback(shuffleFiles[i], i));
+                new DownloadCallback(i));
             } else {
               client.fetchChunk(streamHandle.streamId, i, chunkCallback);
             }
@@ -157,8 +163,8 @@ private class DownloadCallback implements StreamCallback {
     private File targetFile = null;
     private int chunkIndex;
 
-    DownloadCallback(File targetFile, int chunkIndex) throws IOException {
-      this.targetFile = targetFile;
+    DownloadCallback(int chunkIndex) throws IOException {
+      this.targetFile = tempShuffleFileManager.createTempShuffleFile();
       this.channel = Channels.newChannel(new FileOutputStream(targetFile));
       this.chunkIndex = chunkIndex;
     }
@@ -174,6 +180,9 @@ public void onComplete(String streamId) throws IOException {
       ManagedBuffer buffer = new FileSegmentManagedBuffer(transportConf, targetFile, 0,
         targetFile.length());
       listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
+      if (!tempShuffleFileManager.registerTempShuffleFileToClean(targetFile)) {
+        targetFile.delete();
+      }
     }
 
     @Override
@@ -182,6 +191,7 @@ public void onFailure(String streamId, Throwable cause) throws IOException {
       // On receipt of a failure, fail every block from chunkIndex onwards.
       String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length);
       failRemainingBlocks(remainingBlockIds, cause);
+      targetFile.delete();
     }
   }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
index 978ff5a2a869..9e77bee7f9ee 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -18,7 +18,6 @@
 package org.apache.spark.network.shuffle;
 
 import java.io.Closeable;
-import java.io.File;
 
 /** Provides an interface for reading shuffle files, either from an Executor or external service. */
 public abstract class ShuffleClient implements Closeable {
@@ -35,6 +34,16 @@ public void init(String appId) { }
    * Note that this API takes a sequence so the implementation can batch requests, and does not
    * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
    * the data of a block is fetched, rather than waiting for all blocks to be fetched.
+   *
+   * @param host the host of the remote node.
+   * @param port the port of the remote node.
+   * @param execId the executor id.
+   * @param blockIds block ids to fetch.
+   * @param listener the listener to receive block fetching status.
+   * @param tempShuffleFileManager TempShuffleFileManager to create and clean temp shuffle files.
+   *                               If it's not <code>null</code>, the remote blocks will be streamed
+   *                               into temp shuffle files to reduce the memory usage, otherwise,
+   *                               they will be kept in memory.
    */
   public abstract void fetchBlocks(
       String host,
@@ -42,5 +51,5 @@ public abstract void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      File[] shuffleFiles);
+      TempShuffleFileManager tempShuffleFileManager);
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java
new file mode 100644
index 000000000000..84a5ed6a276b
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+
+/**
+ * A manager to create temp shuffle block files to reduce the memory usage and also clean temp
+ * files when they won't be used any more.
+ */
+public interface TempShuffleFileManager {
+
+  /** Create a temp shuffle block file. */
+  File createTempShuffleFile();
+
+  /**
+   * Register a temp shuffle file to clean up when it won't be used any more. Return whether the
+   * file is registered successfully. If `false`, the caller should clean up the file by itself.
+   */
+  boolean registerTempShuffleFileToClean(File file);
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index 8110f1e004c7..02e6eb3a4467 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -204,7 +204,7 @@ public void onBlockFetchFailure(String blockId, Throwable t) {
 
       String[] blockIds = { "shuffle_0_1_2", "shuffle_0_3_4" };
       OneForOneBlockFetcher fetcher =
-          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf, null);
+          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf);
       fetcher.start();
       blockFetchLatch.await();
       checkSecurityException(exception.get());
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 61d82214e7d3..dc947a619bf0 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -131,7 +131,7 @@ private static BlockFetchingListener fetchBlocks(LinkedHashMap<String, ManagedBu
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
     OneForOneBlockFetcher fetcher =
-      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener, conf, null);
+      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener, conf);
 
     // Respond to the "OpenBlocks" message with an appropriate ShuffleStreamHandle with streamId 123
     doAnswer(invocationOnMock -> {
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 6860214c7fe3..fe5fd2da039b 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.network
 
-import java.io.{Closeable, File}
+import java.io.Closeable
 import java.nio.ByteBuffer
 
 import scala.concurrent.{Future, Promise}
@@ -26,7 +26,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempShuffleFileManager}
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.ThreadUtils
 
@@ -68,7 +68,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      shuffleFiles: Array[File]): Unit
+      tempShuffleFileManager: TempShuffleFileManager): Unit
 
   /**
    * Upload a single block to a remote node, available only after [[init]] is invoked.
@@ -101,7 +101,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           ret.flip()
           result.success(new NioManagedBuffer(ret))
         }
-      }, shuffleFiles = null)
+      }, tempShuffleFileManager = null)
     ThreadUtils.awaitResult(result.future, Duration.Inf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index b13a9c681e54..30ff93897f98 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.netty
 
-import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -30,7 +29,7 @@ import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap, TransportClientFactory}
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
 import org.apache.spark.network.server._
-import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher, TempShuffleFileManager}
 import org.apache.spark.network.shuffle.protocol.UploadBlock
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.JavaSerializer
@@ -90,14 +89,14 @@ private[spark] class NettyBlockTransferService(
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      shuffleFiles: Array[File]): Unit = {
+      tempShuffleFileManager: TempShuffleFileManager): Unit = {
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
         override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
           val client = clientFactory.createClient(host, port)
-          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener,
-            transportConf, shuffleFiles).start()
+          new OneForOneBlockFetcher(client, appId, execId, blockIds, listener,
+            transportConf, tempShuffleFileManager).start()
         }
       }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index a10f1feadd0a..81d822dc8a98 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -28,7 +28,7 @@ import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempShuffleFileManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBufferOutputStream
@@ -66,7 +66,7 @@ final class ShuffleBlockFetcherIterator(
     maxReqsInFlight: Int,
     maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean)
-  extends Iterator[(BlockId, InputStream)] with Logging {
+  extends Iterator[(BlockId, InputStream)] with TempShuffleFileManager with Logging {
 
   import ShuffleBlockFetcherIterator._
 
@@ -135,7 +135,8 @@ final class ShuffleBlockFetcherIterator(
    * A set to store the files used for shuffling remote huge blocks. Files in this set will be
    * deleted when cleanup. This is a layer of defensiveness against disk file leaks.
    */
-  val shuffleFilesSet = mutable.HashSet[File]()
+  @GuardedBy("this")
+  private[this] val shuffleFilesSet = mutable.HashSet[File]()
 
   initialize()
 
@@ -149,6 +150,19 @@ final class ShuffleBlockFetcherIterator(
     currentResult = null
   }
 
+  override def createTempShuffleFile(): File = {
+    blockManager.diskBlockManager.createTempLocalBlock()._2
+  }
+
+  override def registerTempShuffleFileToClean(file: File): Boolean = synchronized {
+    if (isZombie) {
+      false
+    } else {
+      shuffleFilesSet += file
+      true
+    }
+  }
+
   /**
    * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
    */
@@ -176,7 +190,7 @@ final class ShuffleBlockFetcherIterator(
     }
     shuffleFilesSet.foreach { file =>
       if (!file.delete()) {
-        logInfo("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath());
+        logWarning("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath())
       }
     }
   }
@@ -221,12 +235,8 @@ final class ShuffleBlockFetcherIterator(
     // already encrypted and compressed over the wire(w.r.t. the related configs), we can just fetch
     // the data and write it to file directly.
     if (req.size > maxReqSizeShuffleToMem) {
-      val shuffleFiles = blockIds.map { _ =>
-        blockManager.diskBlockManager.createTempLocalBlock()._2
-      }.toArray
-      shuffleFilesSet ++= shuffleFiles
       shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
-        blockFetchingListener, shuffleFiles)
+        blockFetchingListener, this)
     } else {
       shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
         blockFetchingListener, null)
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 086adccea954..755a61a438a6 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -45,7 +45,7 @@ import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
 import org.apache.spark.network.netty.{NettyBlockTransferService, SparkTransportConf}
 import org.apache.spark.network.server.{NoOpRpcHandler, TransportServer, TransportServerBootstrap}
-import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempShuffleFileManager}
 import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterExecutor}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
@@ -1382,7 +1382,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         execId: String,
         blockIds: Array[String],
         listener: BlockFetchingListener,
-        shuffleFiles: Array[File]): Unit = {
+        tempShuffleFileManager: TempShuffleFileManager): Unit = {
       listener.onBlockFetchSuccess("mockBlockId", new NioManagedBuffer(ByteBuffer.allocate(1)))
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 559b3faab8fd..6a70cedf769b 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -33,7 +33,7 @@ import org.scalatest.PrivateMethodTester
 import org.apache.spark.{SparkFunSuite, TaskContext}
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.shuffle.{BlockFetchingListener, TempShuffleFileManager}
 import org.apache.spark.network.util.LimitedInputStream
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.Utils
@@ -432,12 +432,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val remoteBlocks = Map[BlockId, ManagedBuffer](
       ShuffleBlockId(0, 0, 0) -> createMockManagedBuffer())
     val transfer = mock(classOf[BlockTransferService])
-    var shuffleFiles: Array[File] = null
+    var tempShuffleFileManager: TempShuffleFileManager = null
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
       .thenAnswer(new Answer[Unit] {
         override def answer(invocation: InvocationOnMock): Unit = {
           val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
-          shuffleFiles = invocation.getArguments()(5).asInstanceOf[Array[File]]
+          tempShuffleFileManager = invocation.getArguments()(5).asInstanceOf[TempShuffleFileManager]
           Future {
             listener.onBlockFetchSuccess(
               ShuffleBlockId(0, 0, 0).toString, remoteBlocks(ShuffleBlockId(0, 0, 0)))
@@ -466,13 +466,13 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     fetchShuffleBlock(blocksByAddress1)
     // `maxReqSizeShuffleToMem` is 200, which is greater than the block size 100, so don't fetch
     // shuffle block to disk.
-    assert(shuffleFiles === null)
+    assert(tempShuffleFileManager == null)
 
     val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq))
     fetchShuffleBlock(blocksByAddress2)
     // `maxReqSizeShuffleToMem` is 200, which is smaller than the block size 300, so fetch
     // shuffle block to disk.
-    assert(shuffleFiles != null)
+    assert(tempShuffleFileManager != null)
   }
 }

From 18b3b00ecfde6c694fb6fee4f4d07d04e3d08ccf Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Mon, 10 Jul 2017 09:26:42 -0700
Subject: [PATCH 092/125] [SPARK-21272] SortMergeJoin LeftAnti does not update
 numOutputRows

## What changes were proposed in this pull request?

Updating numOutputRows metric was missing from one return path of LeftAnti SortMergeJoin.

## How was this patch tested?

Non-zero output rows manually seen in metrics.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #18494 from juliuszsompolski/SPARK-21272.
---
 .../sql/execution/joins/SortMergeJoinExec.scala      |  1 +
 .../spark/sql/execution/metric/SQLMetricsSuite.scala | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 8445c26eeee5..639b8e00c121 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -290,6 +290,7 @@ case class SortMergeJoinExec(
                 currentLeftRow = smjScanner.getStreamedRow
                 val currentRightMatches = smjScanner.getBufferedMatches
                 if (currentRightMatches == null || currentRightMatches.length == 0) {
+                  numOutputRows += 1
                   return true
                 }
                 var found = false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index cb3405b2fe19..2911cbbeee47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -483,6 +483,18 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
+  test("SortMergeJoin(left-anti) metrics") {
+    val anti = testData2.filter("a > 2")
+    withTempView("antiData") {
+      anti.createOrReplaceTempView("antiData")
+      val df = spark.sql(
+        "SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a")
+      testSparkPlanMetrics(df, 1, Map(
+        0L -> ("SortMergeJoin", Map("number of output rows" -> 4L)))
+      )
+    }
+  }
+
   test("save metrics") {
     withTempPath { file =>
       // person creates a temporary view. get the DF before listing previous execution IDs

From 2bfd5accdce2ae31feeeddf213a019cf8ec97663 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 10 Jul 2017 10:40:03 -0700
Subject: [PATCH 093/125] [SPARK-21266][R][PYTHON] Support schema a
 DDL-formatted string in dapply/gapply/from_json

## What changes were proposed in this pull request?

This PR supports schema in a DDL formatted string for `from_json` in R/Python and `dapply` and `gapply` in R, which are commonly used and/or consistent with Scala APIs.

Additionally, this PR exposes `structType` in R to allow working around in other possible corner cases.

**Python**

`from_json`

```python
from pyspark.sql.functions import from_json

data = [(1, '''{"a": 1}''')]
df = spark.createDataFrame(data, ("key", "value"))
df.select(from_json(df.value, "a INT").alias("json")).show()
```

**R**

`from_json`

```R
df <- sql("SELECT named_struct('name', 'Bob') as people")
df <- mutate(df, people_json = to_json(df$people))
head(select(df, from_json(df$people_json, "name STRING")))
```

`structType.character`

```R
structType("a STRING, b INT")
```

`dapply`

```R
dapply(createDataFrame(list(list(1.0)), "a"), function(x) {x}, "a DOUBLE")
```

`gapply`

```R
gapply(createDataFrame(list(list(1.0)), "a"), "a", function(key, x) { x }, "a DOUBLE")
```

## How was this patch tested?

Doc tests for `from_json` in Python and unit tests `test_sparkSQL.R` in R.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18498 from HyukjinKwon/SPARK-21266.
---
 R/pkg/NAMESPACE                               |   2 +
 R/pkg/R/DataFrame.R                           |  36 ++++-
 R/pkg/R/functions.R                           |  12 +-
 R/pkg/R/group.R                               |   3 +
 R/pkg/R/schema.R                              |  29 +++-
 R/pkg/tests/fulltests/test_sparkSQL.R         | 136 ++++++++++--------
 python/pyspark/sql/functions.py               |  11 +-
 .../org/apache/spark/sql/functions.scala      |   7 +-
 8 files changed, 160 insertions(+), 76 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b7fdae58de45..232f5cf31f31 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -429,6 +429,7 @@ export("structField",
        "structField.character",
        "print.structField",
        "structType",
+       "structType.character",
        "structType.jobj",
        "structType.structField",
        "print.structType")
@@ -465,5 +466,6 @@ S3method(print, summary.GBTRegressionModel)
 S3method(print, summary.GBTClassificationModel)
 S3method(structField, character)
 S3method(structField, jobj)
+S3method(structType, character)
 S3method(structType, jobj)
 S3method(structType, structField)
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 3b9d42d6e715..e7a166c3014c 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1391,6 +1391,10 @@ setMethod("summarize",
           })
 
 dapplyInternal <- function(x, func, schema) {
+  if (is.character(schema)) {
+    schema <- structType(schema)
+  }
+
   packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                                connection = NULL)
 
@@ -1408,6 +1412,8 @@ dapplyInternal <- function(x, func, schema) {
   dataFrame(sdf)
 }
 
+setClassUnion("characterOrstructType", c("character", "structType"))
+
 #' dapply
 #'
 #' Apply a function to each partition of a SparkDataFrame.
@@ -1418,10 +1424,11 @@ dapplyInternal <- function(x, func, schema) {
 #'             to each partition will be passed.
 #'             The output of func should be a R data.frame.
 #' @param schema The schema of the resulting SparkDataFrame after the function is applied.
-#'               It must match the output of func.
+#'               It must match the output of func. Since Spark 2.3, the DDL-formatted string
+#'               is also supported for the schema.
 #' @family SparkDataFrame functions
 #' @rdname dapply
-#' @aliases dapply,SparkDataFrame,function,structType-method
+#' @aliases dapply,SparkDataFrame,function,characterOrstructType-method
 #' @name dapply
 #' @seealso \link{dapplyCollect}
 #' @export
@@ -1444,6 +1451,17 @@ dapplyInternal <- function(x, func, schema) {
 #'              y <- cbind(y, y[1] + 1L)
 #'            },
 #'            schema)
+#'
+#'   # The schema also can be specified in a DDL-formatted string.
+#'   schema <- "a INT, d DOUBLE, c STRING, d INT"
+#'   df1 <- dapply(
+#'            df,
+#'            function(x) {
+#'              y <- x[x[1] > 1, ]
+#'              y <- cbind(y, y[1] + 1L)
+#'            },
+#'            schema)
+#'
 #'   collect(df1)
 #'   # the result
 #'   #       a b c d
@@ -1452,7 +1470,7 @@ dapplyInternal <- function(x, func, schema) {
 #' }
 #' @note dapply since 2.0.0
 setMethod("dapply",
-          signature(x = "SparkDataFrame", func = "function", schema = "structType"),
+          signature(x = "SparkDataFrame", func = "function", schema = "characterOrstructType"),
           function(x, func, schema) {
             dapplyInternal(x, func, schema)
           })
@@ -1522,6 +1540,7 @@ setMethod("dapplyCollect",
 #' @param schema the schema of the resulting SparkDataFrame after the function is applied.
 #'               The schema must match to output of \code{func}. It has to be defined for each
 #'               output column with preferred output column name and corresponding data type.
+#'               Since Spark 2.3, the DDL-formatted string is also supported for the schema.
 #' @return A SparkDataFrame.
 #' @family SparkDataFrame functions
 #' @aliases gapply,SparkDataFrame-method
@@ -1541,7 +1560,7 @@ setMethod("dapplyCollect",
 #'
 #' Here our output contains three columns, the key which is a combination of two
 #' columns with data types integer and string and the mean which is a double.
-#' schema <-  structType(structField("a", "integer"), structField("c", "string"),
+#' schema <- structType(structField("a", "integer"), structField("c", "string"),
 #'   structField("avg", "double"))
 #' result <- gapply(
 #'   df,
@@ -1550,6 +1569,15 @@ setMethod("dapplyCollect",
 #'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
 #' }, schema)
 #'
+#' The schema also can be specified in a DDL-formatted string.
+#' schema <- "a INT, c STRING, avg DOUBLE"
+#' result <- gapply(
+#'   df,
+#'   c("a", "c"),
+#'   function(key, x) {
+#'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
+#' }, schema)
+#'
 #' We can also group the data and afterwards call gapply on GroupedData.
 #' For Example:
 #' gdf <- group_by(df, "a", "c")
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f28d26a51baa..86507f13f038 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2174,8 +2174,9 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #'
 #' @rdname column_collection_functions
 #' @param schema a structType object to use as the schema to use when parsing the JSON string.
+#'               Since Spark 2.3, the DDL-formatted string is also supported for the schema.
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
-#' @aliases from_json from_json,Column,structType-method
+#' @aliases from_json from_json,Column,characterOrstructType-method
 #' @export
 #' @examples
 #'
@@ -2188,10 +2189,15 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 #' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
 #' df2 <- mutate(df2, people_json = to_json(df2$people))
 #' schema <- structType(structField("name", "string"))
-#' head(select(df2, from_json(df2$people_json, schema)))}
+#' head(select(df2, from_json(df2$people_json, schema)))
+#' head(select(df2, from_json(df2$people_json, "name STRING")))}
 #' @note from_json since 2.2.0
-setMethod("from_json", signature(x = "Column", schema = "structType"),
+setMethod("from_json", signature(x = "Column", schema = "characterOrstructType"),
           function(x, schema, as.json.array = FALSE, ...) {
+            if (is.character(schema)) {
+              schema <- structType(schema)
+            }
+
             if (as.json.array) {
               jschema <- callJStatic("org.apache.spark.sql.types.DataTypes",
                                      "createArrayType",
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 17f5283abead..0a7be0e99397 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -233,6 +233,9 @@ setMethod("gapplyCollect",
           })
 
 gapplyInternal <- function(x, func, schema) {
+  if (is.character(schema)) {
+    schema <- structType(schema)
+  }
   packageNamesArr <- serialize(.sparkREnv[[".packages"]],
                        connection = NULL)
   broadcastArr <- lapply(ls(.broadcastNames),
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index cb5bdb90175b..d1ed6833d5d0 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -23,18 +23,24 @@
 #' Create a structType object that contains the metadata for a SparkDataFrame. Intended for
 #' use with createDataFrame and toDF.
 #'
-#' @param x a structField object (created with the field() function)
+#' @param x a structField object (created with the \code{structField} method). Since Spark 2.3,
+#'          this can be a DDL-formatted string, which is a comma separated list of field
+#'          definitions, e.g., "a INT, b STRING".
 #' @param ... additional structField objects
 #' @return a structType object
 #' @rdname structType
 #' @export
 #' @examples
 #'\dontrun{
-#' schema <-  structType(structField("a", "integer"), structField("c", "string"),
+#' schema <- structType(structField("a", "integer"), structField("c", "string"),
 #'                       structField("avg", "double"))
 #' df1 <- gapply(df, list("a", "c"),
 #'               function(key, x) { y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE) },
 #'               schema)
+#' schema <- structType("a INT, c STRING, avg DOUBLE")
+#' df1 <- gapply(df, list("a", "c"),
+#'               function(key, x) { y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE) },
+#'               schema)
 #' }
 #' @note structType since 1.4.0
 structType <- function(x, ...) {
@@ -68,6 +74,23 @@ structType.structField <- function(x, ...) {
   structType(stObj)
 }
 
+#' @rdname structType
+#' @method structType character
+#' @export
+structType.character <- function(x, ...) {
+  if (!is.character(x)) {
+    stop("schema must be a DDL-formatted string.")
+  }
+  if (length(list(...)) > 0) {
+    stop("multiple DDL-formatted strings are not supported")
+  }
+
+  stObj <- handledCallJStatic("org.apache.spark.sql.types.StructType",
+                              "fromDDL",
+                              x)
+  structType(stObj)
+}
+
 #' Print a Spark StructType.
 #'
 #' This function prints the contents of a StructType returned from the
@@ -102,7 +125,7 @@ print.structType <- function(x, ...) {
 #' field1 <- structField("a", "integer")
 #' field2 <- structField("c", "string")
 #' field3 <- structField("avg", "double")
-#' schema <-  structType(field1, field2, field3)
+#' schema <- structType(field1, field2, field3)
 #' df1 <- gapply(df, list("a", "c"),
 #'               function(key, x) { y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE) },
 #'               schema)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index a2bcb5aefe16..77052d4a2834 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -146,6 +146,13 @@ test_that("structType and structField", {
   expect_is(testSchema, "structType")
   expect_is(testSchema$fields()[[2]], "structField")
   expect_equal(testSchema$fields()[[1]]$dataType.toString(), "StringType")
+
+  testSchema <- structType("a STRING, b INT")
+  expect_is(testSchema, "structType")
+  expect_is(testSchema$fields()[[2]], "structField")
+  expect_equal(testSchema$fields()[[1]]$dataType.toString(), "StringType")
+
+  expect_error(structType("A stri"), "DataType stri is not supported.")
 })
 
 test_that("structField type strings", {
@@ -1480,13 +1487,15 @@ test_that("column functions", {
   j <- collect(select(df, alias(to_json(df$info), "json")))
   expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}")
   df <- as.DataFrame(j)
-  schema <- structType(structField("age", "integer"),
-                       structField("height", "double"))
-  s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
-  expect_equal(ncol(s), 1)
-  expect_equal(nrow(s), 3)
-  expect_is(s[[1]][[1]], "struct")
-  expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } )))
+  schemas <- list(structType(structField("age", "integer"), structField("height", "double")),
+                  "age INT, height DOUBLE")
+  for (schema in schemas) {
+    s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
+    expect_equal(ncol(s), 1)
+    expect_equal(nrow(s), 3)
+    expect_is(s[[1]][[1]], "struct")
+    expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } )))
+  }
 
   # passing option
   df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}")))
@@ -1504,14 +1513,15 @@ test_that("column functions", {
   # check if array type in string is correctly supported.
   jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
   df <- as.DataFrame(list(list("people" = jsonArr)))
-  schema <- structType(structField("name", "string"))
-  arr <- collect(select(df, alias(from_json(df$people, schema, as.json.array = TRUE), "arrcol")))
-  expect_equal(ncol(arr), 1)
-  expect_equal(nrow(arr), 1)
-  expect_is(arr[[1]][[1]], "list")
-  expect_equal(length(arr$arrcol[[1]]), 2)
-  expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
-  expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
+  for (schema in list(structType(structField("name", "string")), "name STRING")) {
+    arr <- collect(select(df, alias(from_json(df$people, schema, as.json.array = TRUE), "arrcol")))
+    expect_equal(ncol(arr), 1)
+    expect_equal(nrow(arr), 1)
+    expect_is(arr[[1]][[1]], "list")
+    expect_equal(length(arr$arrcol[[1]]), 2)
+    expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
+    expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
+  }
 
   # Test create_array() and create_map()
   df <- as.DataFrame(data.frame(
@@ -2885,30 +2895,33 @@ test_that("dapply() and dapplyCollect() on a DataFrame", {
   expect_identical(ldf, result)
 
   # Filter and add a column
-  schema <- structType(structField("a", "integer"), structField("b", "double"),
-                       structField("c", "string"), structField("d", "integer"))
-  df1 <- dapply(
-           df,
-           function(x) {
-             y <- x[x$a > 1, ]
-             y <- cbind(y, y$a + 1L)
-           },
-           schema)
-  result <- collect(df1)
-  expected <- ldf[ldf$a > 1, ]
-  expected$d <- expected$a + 1L
-  rownames(expected) <- NULL
-  expect_identical(expected, result)
-
-  result <- dapplyCollect(
-              df,
-              function(x) {
-                y <- x[x$a > 1, ]
-                y <- cbind(y, y$a + 1L)
-              })
-  expected1 <- expected
-  names(expected1) <- names(result)
-  expect_identical(expected1, result)
+  schemas <- list(structType(structField("a", "integer"), structField("b", "double"),
+                             structField("c", "string"), structField("d", "integer")),
+                  "a INT, b DOUBLE, c STRING, d INT")
+  for (schema in schemas) {
+    df1 <- dapply(
+             df,
+             function(x) {
+               y <- x[x$a > 1, ]
+               y <- cbind(y, y$a + 1L)
+             },
+             schema)
+    result <- collect(df1)
+    expected <- ldf[ldf$a > 1, ]
+    expected$d <- expected$a + 1L
+    rownames(expected) <- NULL
+    expect_identical(expected, result)
+
+    result <- dapplyCollect(
+                df,
+                function(x) {
+                  y <- x[x$a > 1, ]
+                  y <- cbind(y, y$a + 1L)
+                })
+    expected1 <- expected
+    names(expected1) <- names(result)
+    expect_identical(expected1, result)
+  }
 
   # Remove the added column
   df2 <- dapply(
@@ -3020,29 +3033,32 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
 
   # Computes the sum of second column by grouping on the first and third columns
   # and checks if the sum is larger than 2
-  schema <- structType(structField("a", "integer"), structField("e", "boolean"))
-  df2 <- gapply(
-    df,
-    c(df$"a", df$"c"),
-    function(key, x) {
-      y <- data.frame(key[1], sum(x$b) > 2)
-    },
-    schema)
-  actual <- collect(df2)$e
-  expected <- c(TRUE, TRUE)
-  expect_identical(actual, expected)
-
-  df2Collect <- gapplyCollect(
-    df,
-    c(df$"a", df$"c"),
-    function(key, x) {
-      y <- data.frame(key[1], sum(x$b) > 2)
-      colnames(y) <- c("a", "e")
-      y
-    })
-    actual <- df2Collect$e
+  schemas <- list(structType(structField("a", "integer"), structField("e", "boolean")),
+                  "a INT, e BOOLEAN")
+  for (schema in schemas) {
+    df2 <- gapply(
+      df,
+      c(df$"a", df$"c"),
+      function(key, x) {
+        y <- data.frame(key[1], sum(x$b) > 2)
+      },
+      schema)
+    actual <- collect(df2)$e
+    expected <- c(TRUE, TRUE)
     expect_identical(actual, expected)
 
+    df2Collect <- gapplyCollect(
+      df,
+      c(df$"a", df$"c"),
+      function(key, x) {
+        y <- data.frame(key[1], sum(x$b) > 2)
+        colnames(y) <- c("a", "e")
+        y
+      })
+      actual <- df2Collect$e
+      expect_identical(actual, expected)
+  }
+
   # Computes the arithmetic mean of the second column by grouping
   # on the first and third columns. Output the groupping value and the average.
   schema <-  structType(structField("a", "integer"), structField("c", "string"),
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 5d8ded83f667..f3e7d033e97c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1883,15 +1883,20 @@ def from_json(col, schema, options={}):
     string.
 
     :param col: string column in json format
-    :param schema: a StructType or ArrayType of StructType to use when parsing the json column
+    :param schema: a StructType or ArrayType of StructType to use when parsing the json column.
     :param options: options to control parsing. accepts the same options as the json datasource
 
+    .. note:: Since Spark 2.3, the DDL-formatted string or a JSON format string is also
+              supported for ``schema``.
+
     >>> from pyspark.sql.types import *
     >>> data = [(1, '''{"a": 1}''')]
     >>> schema = StructType([StructField("a", IntegerType())])
     >>> df = spark.createDataFrame(data, ("key", "value"))
     >>> df.select(from_json(df.value, schema).alias("json")).collect()
     [Row(json=Row(a=1))]
+    >>> df.select(from_json(df.value, "a INT").alias("json")).collect()
+    [Row(json=Row(a=1))]
     >>> data = [(1, '''[{"a": 1}]''')]
     >>> schema = ArrayType(StructType([StructField("a", IntegerType())]))
     >>> df = spark.createDataFrame(data, ("key", "value"))
@@ -1900,7 +1905,9 @@ def from_json(col, schema, options={}):
     """
 
     sc = SparkContext._active_spark_context
-    jc = sc._jvm.functions.from_json(_to_java_column(col), schema.json(), options)
+    if isinstance(schema, DataType):
+        schema = schema.json()
+    jc = sc._jvm.functions.from_json(_to_java_column(col), schema, options)
     return Column(jc)
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0c7b483f5c83..ebdeb42b0bfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2114,7 +2114,7 @@ object functions {
    * Calculates the hash code of given columns, and returns the result as an int column.
    *
    * @group misc_funcs
-   * @since 2.0
+   * @since 2.0.0
    */
   @scala.annotation.varargs
   def hash(cols: Column*): Column = withExpr {
@@ -3074,9 +3074,8 @@ object functions {
    * string.
    *
    * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1,
-   *               the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL
-   *               format is also supported for the schema.
+   * @param schema the schema to use when parsing the json string as a json string, it could be a
+   *               JSON format string or a DDL-formatted string.
    *
    * @group collection_funcs
    * @since 2.3.0

From d03aebbe6508ba441dc87f9546f27aeb27553d77 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 10 Jul 2017 15:21:03 -0700
Subject: [PATCH 094/125] [SPARK-13534][PYSPARK] Using Apache Arrow to increase
 performance of DataFrame.toPandas

## What changes were proposed in this pull request?
Integrate Apache Arrow with Spark to increase performance of `DataFrame.toPandas`.  This has been done by using Arrow to convert data partitions on the executor JVM to Arrow payload byte arrays where they are then served to the Python process.  The Python DataFrame can then collect the Arrow payloads where they are combined and converted to a Pandas DataFrame.  Data types except complex, date, timestamp, and decimal  are currently supported, otherwise an `UnsupportedOperation` exception is thrown.

Additions to Spark include a Scala package private method `Dataset.toArrowPayload` that will convert data partitions in the executor JVM to `ArrowPayload`s as byte arrays so they can be easily served.  A package private class/object `ArrowConverters` that provide data type mappings and conversion routines.  In Python, a private method `DataFrame._collectAsArrow` is added to collect Arrow payloads and a SQLConf "spark.sql.execution.arrow.enable" can be used in `toPandas()` to enable using Arrow (uses the old conversion by default).

## How was this patch tested?
Added a new test suite `ArrowConvertersSuite` that will run tests on conversion of Datasets to Arrow payloads for supported types.  The suite will generate a Dataset and matching Arrow JSON data, then the dataset is converted to an Arrow payload and finally validated against the JSON data.  This will ensure that the schema and data has been converted correctly.

Added PySpark tests to verify the `toPandas` method is producing equal DataFrames with and without pyarrow.  A roundtrip test to ensure the pandas DataFrame produced by pyspark is equal to a one made directly with pandas.

Author: Bryan Cutler <cutlerb@gmail.com>
Author: Li Jin <ice.xelloss@gmail.com>
Author: Li Jin <li.jin@twosigma.com>
Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #18459 from BryanCutler/toPandas_with_arrow-SPARK-13534.
---
 bin/pyspark                                   |    2 +-
 dev/deps/spark-deps-hadoop-2.6                |    5 +
 dev/deps/spark-deps-hadoop-2.7                |    5 +
 pom.xml                                       |   20 +
 python/pyspark/serializers.py                 |   17 +
 python/pyspark/sql/dataframe.py               |   48 +-
 python/pyspark/sql/tests.py                   |   78 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   22 +
 sql/core/pom.xml                              |    4 +
 .../scala/org/apache/spark/sql/Dataset.scala  |   20 +
 .../sql/execution/arrow/ArrowConverters.scala |  429 ++++++
 .../arrow/ArrowConvertersSuite.scala          | 1222 +++++++++++++++++
 12 files changed, 1859 insertions(+), 13 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala

diff --git a/bin/pyspark b/bin/pyspark
index d3b512eeb120..dd286277c1fc 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -68,7 +68,7 @@ if [[ -n "$SPARK_TESTING" ]]; then
   unset YARN_CONF_DIR
   unset HADOOP_CONF_DIR
   export PYTHONHASHSEED=0
-  exec "$PYSPARK_DRIVER_PYTHON" -m "$1"
+  exec "$PYSPARK_DRIVER_PYTHON" -m "$@"
   exit
 fi
 
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index c1325318d52f..1a6515be51cf 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -13,6 +13,9 @@ apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api-1.0.0-M20.jar
 api-util-1.0.0-M20.jar
 arpack_combined_all-0.1.jar
+arrow-format-0.4.0.jar
+arrow-memory-0.4.0.jar
+arrow-vector-0.4.0.jar
 avro-1.7.7.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
@@ -55,6 +58,7 @@ datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 eigenbase-properties-1.1.5.jar
+flatbuffers-1.2.0-3f79e055.jar
 gson-2.2.4.jar
 guava-14.0.1.jar
 guice-3.0.jar
@@ -77,6 +81,7 @@ hadoop-yarn-server-web-proxy-2.6.5.jar
 hk2-api-2.4.0-b34.jar
 hk2-locator-2.4.0-b34.jar
 hk2-utils-2.4.0-b34.jar
+hppc-0.7.1.jar
 htrace-core-3.0.4.jar
 httpclient-4.5.2.jar
 httpcore-4.4.4.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index ac5abd21807b..09e5a4288ca5 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -13,6 +13,9 @@ apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api-1.0.0-M20.jar
 api-util-1.0.0-M20.jar
 arpack_combined_all-0.1.jar
+arrow-format-0.4.0.jar
+arrow-memory-0.4.0.jar
+arrow-vector-0.4.0.jar
 avro-1.7.7.jar
 avro-ipc-1.7.7.jar
 avro-mapred-1.7.7-hadoop2.jar
@@ -55,6 +58,7 @@ datanucleus-core-3.2.10.jar
 datanucleus-rdbms-3.2.9.jar
 derby-10.12.1.1.jar
 eigenbase-properties-1.1.5.jar
+flatbuffers-1.2.0-3f79e055.jar
 gson-2.2.4.jar
 guava-14.0.1.jar
 guice-3.0.jar
@@ -77,6 +81,7 @@ hadoop-yarn-server-web-proxy-2.7.3.jar
 hk2-api-2.4.0-b34.jar
 hk2-locator-2.4.0-b34.jar
 hk2-utils-2.4.0-b34.jar
+hppc-0.7.1.jar
 htrace-core-3.1.0-incubating.jar
 httpclient-4.5.2.jar
 httpcore-4.4.4.jar
diff --git a/pom.xml b/pom.xml
index 5f524079495c..f124ba45007b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -181,6 +181,7 @@
     <paranamer.version>2.6</paranamer.version>
     <maven-antrun.version>1.8</maven-antrun.version>
     <commons-crypto.version>1.0.0</commons-crypto.version>
+    <arrow.version>0.4.0</arrow.version>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
@@ -1878,6 +1879,25 @@
         <artifactId>paranamer</artifactId>
         <version>${paranamer.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.arrow</groupId>
+        <artifactId>arrow-vector</artifactId>
+        <version>${arrow.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-annotations</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-handler</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index ea5e00e9eeef..d5c2a7518b18 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -182,6 +182,23 @@ def loads(self, obj):
         raise NotImplementedError
 
 
+class ArrowSerializer(FramedSerializer):
+    """
+    Serializes an Arrow stream.
+    """
+
+    def dumps(self, obj):
+        raise NotImplementedError
+
+    def loads(self, obj):
+        import pyarrow as pa
+        reader = pa.RecordBatchFileReader(pa.BufferReader(obj))
+        return reader.read_all()
+
+    def __repr__(self):
+        return "ArrowSerializer"
+
+
 class BatchedSerializer(Serializer):
 
     """
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 27a6dad8917d..944739bcd207 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -29,7 +29,8 @@
 
 from pyspark import copy_func, since
 from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
-from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
+from pyspark.serializers import ArrowSerializer, BatchedSerializer, PickleSerializer, \
+    UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
 from pyspark.sql.types import _parse_datatype_json_string
@@ -1710,7 +1711,8 @@ def toDF(self, *cols):
 
     @since(1.3)
     def toPandas(self):
-        """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
+        """
+        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 
         This is only available if Pandas is installed and available.
 
@@ -1723,18 +1725,42 @@ def toPandas(self):
         1    5    Bob
         """
         import pandas as pd
+        if self.sql_ctx.getConf("spark.sql.execution.arrow.enable", "false").lower() == "true":
+            try:
+                import pyarrow
+                tables = self._collectAsArrow()
+                if tables:
+                    table = pyarrow.concat_tables(tables)
+                    return table.to_pandas()
+                else:
+                    return pd.DataFrame.from_records([], columns=self.columns)
+            except ImportError as e:
+                msg = "note: pyarrow must be installed and available on calling Python process " \
+                      "if using spark.sql.execution.arrow.enable=true"
+                raise ImportError("%s\n%s" % (e.message, msg))
+        else:
+            dtype = {}
+            for field in self.schema:
+                pandas_type = _to_corrected_pandas_type(field.dataType)
+                if pandas_type is not None:
+                    dtype[field.name] = pandas_type
 
-        dtype = {}
-        for field in self.schema:
-            pandas_type = _to_corrected_pandas_type(field.dataType)
-            if pandas_type is not None:
-                dtype[field.name] = pandas_type
+            pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
 
-        pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
+            for f, t in dtype.items():
+                pdf[f] = pdf[f].astype(t, copy=False)
+            return pdf
 
-        for f, t in dtype.items():
-            pdf[f] = pdf[f].astype(t, copy=False)
-        return pdf
+    def _collectAsArrow(self):
+        """
+        Returns all records as list of deserialized ArrowPayloads, pyarrow must be installed
+        and available.
+
+        .. note:: Experimental.
+        """
+        with SCCallSiteSync(self._sc) as css:
+            port = self._jdf.collectAsArrowToPython()
+        return list(_load_from_socket(port, ArrowSerializer()))
 
     ##########################################################################################
     # Pandas compatibility
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 9db2f40474f7..bd8477e35f37 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -58,12 +58,21 @@
 from pyspark.sql import SparkSession, SQLContext, HiveContext, Column, Row
 from pyspark.sql.types import *
 from pyspark.sql.types import UserDefinedType, _infer_type, _make_type_verifier
-from pyspark.tests import ReusedPySparkTestCase, SparkSubmitTests
+from pyspark.tests import QuietTest, ReusedPySparkTestCase, SparkSubmitTests
 from pyspark.sql.functions import UserDefinedFunction, sha2, lit
 from pyspark.sql.window import Window
 from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
 
 
+_have_arrow = False
+try:
+    import pyarrow
+    _have_arrow = True
+except:
+    # No Arrow, but that's okay, we'll skip those tests
+    pass
+
+
 class UTCOffsetTimezone(datetime.tzinfo):
     """
     Specifies timezone in UTC offset
@@ -2843,6 +2852,73 @@ def __init__(self, **kwargs):
                 _make_type_verifier(data_type, nullable=False)(obj)
 
 
+@unittest.skipIf(not _have_arrow, "Arrow not installed")
+class ArrowTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.spark = SparkSession(cls.sc)
+        cls.spark.conf.set("spark.sql.execution.arrow.enable", "true")
+        cls.schema = StructType([
+            StructField("1_str_t", StringType(), True),
+            StructField("2_int_t", IntegerType(), True),
+            StructField("3_long_t", LongType(), True),
+            StructField("4_float_t", FloatType(), True),
+            StructField("5_double_t", DoubleType(), True)])
+        cls.data = [("a", 1, 10, 0.2, 2.0),
+                    ("b", 2, 20, 0.4, 4.0),
+                    ("c", 3, 30, 0.8, 6.0)]
+
+    def assertFramesEqual(self, df_with_arrow, df_without):
+        msg = ("DataFrame from Arrow is not equal" +
+               ("\n\nWith Arrow:\n%s\n%s" % (df_with_arrow, df_with_arrow.dtypes)) +
+               ("\n\nWithout:\n%s\n%s" % (df_without, df_without.dtypes)))
+        self.assertTrue(df_without.equals(df_with_arrow), msg=msg)
+
+    def test_unsupported_datatype(self):
+        schema = StructType([StructField("array", ArrayType(IntegerType(), False), True)])
+        df = self.spark.createDataFrame([([1, 2, 3],)], schema=schema)
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, lambda: df.toPandas())
+
+    def test_null_conversion(self):
+        df_null = self.spark.createDataFrame([tuple([None for _ in range(len(self.data[0]))])] +
+                                             self.data)
+        pdf = df_null.toPandas()
+        null_counts = pdf.isnull().sum().tolist()
+        self.assertTrue(all([c == 1 for c in null_counts]))
+
+    def test_toPandas_arrow_toggle(self):
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+        self.spark.conf.set("spark.sql.execution.arrow.enable", "false")
+        pdf = df.toPandas()
+        self.spark.conf.set("spark.sql.execution.arrow.enable", "true")
+        pdf_arrow = df.toPandas()
+        self.assertFramesEqual(pdf_arrow, pdf)
+
+    def test_pandas_round_trip(self):
+        import pandas as pd
+        import numpy as np
+        data_dict = {}
+        for j, name in enumerate(self.schema.names):
+            data_dict[name] = [self.data[i][j] for i in range(len(self.data))]
+        # need to convert these to numpy types first
+        data_dict["2_int_t"] = np.int32(data_dict["2_int_t"])
+        data_dict["4_float_t"] = np.float32(data_dict["4_float_t"])
+        pdf = pd.DataFrame(data=data_dict)
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+        pdf_arrow = df.toPandas()
+        self.assertFramesEqual(pdf_arrow, pdf)
+
+    def test_filtered_frame(self):
+        df = self.spark.range(3).toDF("i")
+        pdf = df.filter("i < 0").toPandas()
+        self.assertEqual(len(pdf.columns), 1)
+        self.assertEqual(pdf.columns[0], "i")
+        self.assertTrue(pdf.empty)
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests import *
     if xmlrunner:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 25152f3e32d6..643587a6eb09 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -855,6 +855,24 @@ object SQLConf {
       .intConf
       .createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
 
+  val ARROW_EXECUTION_ENABLE =
+    buildConf("spark.sql.execution.arrow.enable")
+      .internal()
+      .doc("Make use of Apache Arrow for columnar data transfers. Currently available " +
+        "for use with pyspark.sql.DataFrame.toPandas with the following data types: " +
+        "StringType, BinaryType, BooleanType, DoubleType, FloatType, ByteType, IntegerType, " +
+        "LongType, ShortType")
+      .booleanConf
+      .createWithDefault(false)
+
+  val ARROW_EXECUTION_MAX_RECORDS_PER_BATCH =
+    buildConf("spark.sql.execution.arrow.maxRecordsPerBatch")
+      .internal()
+      .doc("When using Apache Arrow, limit the maximum number of records that can be written " +
+        "to a single ArrowRecordBatch in memory. If set to zero or negative there is no limit.")
+      .intConf
+      .createWithDefault(10000)
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -1115,6 +1133,10 @@ class SQLConf extends Serializable with Logging {
 
   def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)
 
+  def arrowEnable: Boolean = getConf(ARROW_EXECUTION_ENABLE)
+
+  def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 1bc34a6b069d..661c31ded714 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,6 +103,10 @@
       <artifactId>jackson-databind</artifactId>
       <version>${fasterxml.jackson.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.xbean</groupId>
       <artifactId>xbean-asm5-shaded</artifactId>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index dfb51192c69b..a7773831df07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -46,6 +46,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.arrow.{ArrowConverters, ArrowPayload}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.python.EvaluatePython
@@ -2907,6 +2908,16 @@ class Dataset[T] private[sql](
     }
   }
 
+  /**
+   * Collect a Dataset as ArrowPayload byte arrays and serve to PySpark.
+   */
+  private[sql] def collectAsArrowToPython(): Int = {
+    withNewExecutionId {
+      val iter = toArrowPayload.collect().iterator.map(_.asPythonSerializable)
+      PythonRDD.serveIterator(iter, "serve-Arrow")
+    }
+  }
+
   private[sql] def toPythonIterator(): Int = {
     withNewExecutionId {
       PythonRDD.toLocalIteratorAndServe(javaToPython.rdd)
@@ -2988,4 +2999,13 @@ class Dataset[T] private[sql](
       Dataset(sparkSession, logicalPlan)
     }
   }
+
+  /** Convert to an RDD of ArrowPayload byte arrays */
+  private[sql] def toArrowPayload: RDD[ArrowPayload] = {
+    val schemaCaptured = this.schema
+    val maxRecordsPerBatch = sparkSession.sessionState.conf.arrowMaxRecordsPerBatch
+    queryExecution.toRdd.mapPartitionsInternal { iter =>
+      ArrowConverters.toPayloadIterator(iter, schemaCaptured, maxRecordsPerBatch)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
new file mode 100644
index 000000000000..6af5c7342237
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -0,0 +1,429 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.execution.arrow
+
+import java.io.ByteArrayOutputStream
+import java.nio.channels.Channels
+
+import scala.collection.JavaConverters._
+
+import io.netty.buffer.ArrowBuf
+import org.apache.arrow.memory.{BufferAllocator, RootAllocator}
+import org.apache.arrow.vector._
+import org.apache.arrow.vector.BaseValueVector.BaseMutator
+import org.apache.arrow.vector.file._
+import org.apache.arrow.vector.schema.{ArrowFieldNode, ArrowRecordBatch}
+import org.apache.arrow.vector.types.FloatingPointPrecision
+import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+
+/**
+ * Store Arrow data in a form that can be serialized by Spark and served to a Python process.
+ */
+private[sql] class ArrowPayload private[arrow] (payload: Array[Byte]) extends Serializable {
+
+  /**
+   * Convert the ArrowPayload to an ArrowRecordBatch.
+   */
+  def loadBatch(allocator: BufferAllocator): ArrowRecordBatch = {
+    ArrowConverters.byteArrayToBatch(payload, allocator)
+  }
+
+  /**
+   * Get the ArrowPayload as a type that can be served to Python.
+   */
+  def asPythonSerializable: Array[Byte] = payload
+}
+
+private[sql] object ArrowPayload {
+
+  /**
+   * Create an ArrowPayload from an ArrowRecordBatch and Spark schema.
+   */
+  def apply(
+      batch: ArrowRecordBatch,
+      schema: StructType,
+      allocator: BufferAllocator): ArrowPayload = {
+    new ArrowPayload(ArrowConverters.batchToByteArray(batch, schema, allocator))
+  }
+}
+
+private[sql] object ArrowConverters {
+
+  /**
+   * Map a Spark DataType to ArrowType.
+   */
+  private[arrow] def sparkTypeToArrowType(dataType: DataType): ArrowType = {
+    dataType match {
+      case BooleanType => ArrowType.Bool.INSTANCE
+      case ShortType => new ArrowType.Int(8 * ShortType.defaultSize, true)
+      case IntegerType => new ArrowType.Int(8 * IntegerType.defaultSize, true)
+      case LongType => new ArrowType.Int(8 * LongType.defaultSize, true)
+      case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+      case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+      case ByteType => new ArrowType.Int(8, true)
+      case StringType => ArrowType.Utf8.INSTANCE
+      case BinaryType => ArrowType.Binary.INSTANCE
+      case _ => throw new UnsupportedOperationException(s"Unsupported data type: $dataType")
+    }
+  }
+
+  /**
+   * Convert a Spark Dataset schema to Arrow schema.
+   */
+  private[arrow] def schemaToArrowSchema(schema: StructType): Schema = {
+    val arrowFields = schema.fields.map { f =>
+      new Field(f.name, f.nullable, sparkTypeToArrowType(f.dataType), List.empty[Field].asJava)
+    }
+    new Schema(arrowFields.toList.asJava)
+  }
+
+  /**
+   * Maps Iterator from InternalRow to ArrowPayload. Limit ArrowRecordBatch size in ArrowPayload
+   * by setting maxRecordsPerBatch or use 0 to fully consume rowIter.
+   */
+  private[sql] def toPayloadIterator(
+      rowIter: Iterator[InternalRow],
+      schema: StructType,
+      maxRecordsPerBatch: Int): Iterator[ArrowPayload] = {
+    new Iterator[ArrowPayload] {
+      private val _allocator = new RootAllocator(Long.MaxValue)
+      private var _nextPayload = if (rowIter.nonEmpty) convert() else null
+
+      override def hasNext: Boolean = _nextPayload != null
+
+      override def next(): ArrowPayload = {
+        val obj = _nextPayload
+        if (hasNext) {
+          if (rowIter.hasNext) {
+            _nextPayload = convert()
+          } else {
+            _allocator.close()
+            _nextPayload = null
+          }
+        }
+        obj
+      }
+
+      private def convert(): ArrowPayload = {
+        val batch = internalRowIterToArrowBatch(rowIter, schema, _allocator, maxRecordsPerBatch)
+        ArrowPayload(batch, schema, _allocator)
+      }
+    }
+  }
+
+  /**
+   * Iterate over InternalRows and write to an ArrowRecordBatch, stopping when rowIter is consumed
+   * or the number of records in the batch equals maxRecordsInBatch.  If maxRecordsPerBatch is 0,
+   * then rowIter will be fully consumed.
+   */
+  private def internalRowIterToArrowBatch(
+      rowIter: Iterator[InternalRow],
+      schema: StructType,
+      allocator: BufferAllocator,
+      maxRecordsPerBatch: Int = 0): ArrowRecordBatch = {
+
+    val columnWriters = schema.fields.zipWithIndex.map { case (field, ordinal) =>
+      ColumnWriter(field.dataType, ordinal, allocator).init()
+    }
+
+    val writerLength = columnWriters.length
+    var recordsInBatch = 0
+    while (rowIter.hasNext && (maxRecordsPerBatch <= 0 || recordsInBatch < maxRecordsPerBatch)) {
+      val row = rowIter.next()
+      var i = 0
+      while (i < writerLength) {
+        columnWriters(i).write(row)
+        i += 1
+      }
+      recordsInBatch += 1
+    }
+
+    val (fieldNodes, bufferArrays) = columnWriters.map(_.finish()).unzip
+    val buffers = bufferArrays.flatten
+
+    val rowLength = if (fieldNodes.nonEmpty) fieldNodes.head.getLength else 0
+    val recordBatch = new ArrowRecordBatch(rowLength,
+      fieldNodes.toList.asJava, buffers.toList.asJava)
+
+    buffers.foreach(_.release())
+    recordBatch
+  }
+
+  /**
+   * Convert an ArrowRecordBatch to a byte array and close batch to release resources. Once closed,
+   * the batch can no longer be used.
+   */
+  private[arrow] def batchToByteArray(
+      batch: ArrowRecordBatch,
+      schema: StructType,
+      allocator: BufferAllocator): Array[Byte] = {
+    val arrowSchema = ArrowConverters.schemaToArrowSchema(schema)
+    val root = VectorSchemaRoot.create(arrowSchema, allocator)
+    val out = new ByteArrayOutputStream()
+    val writer = new ArrowFileWriter(root, null, Channels.newChannel(out))
+
+    // Write a batch to byte stream, ensure the batch, allocator and writer are closed
+    Utils.tryWithSafeFinally {
+      val loader = new VectorLoader(root)
+      loader.load(batch)
+      writer.writeBatch()  // writeBatch can throw IOException
+    } {
+      batch.close()
+      root.close()
+      writer.close()
+    }
+    out.toByteArray
+  }
+
+  /**
+   * Convert a byte array to an ArrowRecordBatch.
+   */
+  private[arrow] def byteArrayToBatch(
+      batchBytes: Array[Byte],
+      allocator: BufferAllocator): ArrowRecordBatch = {
+    val in = new ByteArrayReadableSeekableByteChannel(batchBytes)
+    val reader = new ArrowFileReader(in, allocator)
+
+    // Read a batch from a byte stream, ensure the reader is closed
+    Utils.tryWithSafeFinally {
+      val root = reader.getVectorSchemaRoot  // throws IOException
+      val unloader = new VectorUnloader(root)
+      reader.loadNextBatch()  // throws IOException
+      unloader.getRecordBatch
+    } {
+      reader.close()
+    }
+  }
+}
+
+/**
+ * Interface for writing InternalRows to Arrow Buffers.
+ */
+private[arrow] trait ColumnWriter {
+  def init(): this.type
+  def write(row: InternalRow): Unit
+
+  /**
+   * Clear the column writer and return the ArrowFieldNode and ArrowBuf.
+   * This should be called only once after all the data is written.
+   */
+  def finish(): (ArrowFieldNode, Array[ArrowBuf])
+}
+
+/**
+ * Base class for flat arrow column writer, i.e., column without children.
+ */
+private[arrow] abstract class PrimitiveColumnWriter(val ordinal: Int)
+  extends ColumnWriter {
+
+  def getFieldType(dtype: ArrowType): FieldType = FieldType.nullable(dtype)
+
+  def valueVector: BaseDataValueVector
+  def valueMutator: BaseMutator
+
+  def setNull(): Unit
+  def setValue(row: InternalRow): Unit
+
+  protected var count = 0
+  protected var nullCount = 0
+
+  override def init(): this.type = {
+    valueVector.allocateNew()
+    this
+  }
+
+  override def write(row: InternalRow): Unit = {
+    if (row.isNullAt(ordinal)) {
+      setNull()
+      nullCount += 1
+    } else {
+      setValue(row)
+    }
+    count += 1
+  }
+
+  override def finish(): (ArrowFieldNode, Array[ArrowBuf]) = {
+    valueMutator.setValueCount(count)
+    val fieldNode = new ArrowFieldNode(count, nullCount)
+    val valueBuffers = valueVector.getBuffers(true)
+    (fieldNode, valueBuffers)
+  }
+}
+
+private[arrow] class BooleanColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableBitVector
+    = new NullableBitVector("BooleanValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableBitVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, if (row.getBoolean(ordinal)) 1 else 0 )
+}
+
+private[arrow] class ShortColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableSmallIntVector
+    = new NullableSmallIntVector("ShortValue", getFieldType(dtype: ArrowType), allocator)
+  override val valueMutator: NullableSmallIntVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getShort(ordinal))
+}
+
+private[arrow] class IntegerColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableIntVector
+    = new NullableIntVector("IntValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableIntVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getInt(ordinal))
+}
+
+private[arrow] class LongColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableBigIntVector
+    = new NullableBigIntVector("LongValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableBigIntVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getLong(ordinal))
+}
+
+private[arrow] class FloatColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableFloat4Vector
+    = new NullableFloat4Vector("FloatValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableFloat4Vector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getFloat(ordinal))
+}
+
+private[arrow] class DoubleColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableFloat8Vector
+    = new NullableFloat8Vector("DoubleValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableFloat8Vector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getDouble(ordinal))
+}
+
+private[arrow] class ByteColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableUInt1Vector
+    = new NullableUInt1Vector("ByteValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableUInt1Vector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit
+    = valueMutator.setSafe(count, row.getByte(ordinal))
+}
+
+private[arrow] class UTF8StringColumnWriter(
+    dtype: ArrowType,
+    ordinal: Int,
+    allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableVarCharVector
+    = new NullableVarCharVector("UTF8StringValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableVarCharVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit = {
+    val str = row.getUTF8String(ordinal)
+    valueMutator.setSafe(count, str.getByteBuffer, 0, str.numBytes)
+  }
+}
+
+private[arrow] class BinaryColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableVarBinaryVector
+    = new NullableVarBinaryVector("BinaryValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableVarBinaryVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit = {
+    val bytes = row.getBinary(ordinal)
+    valueMutator.setSafe(count, bytes, 0, bytes.length)
+  }
+}
+
+private[arrow] class DateColumnWriter(dtype: ArrowType, ordinal: Int, allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableDateDayVector
+    = new NullableDateDayVector("DateValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableDateDayVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit = {
+    valueMutator.setSafe(count, row.getInt(ordinal))
+  }
+}
+
+private[arrow] class TimeStampColumnWriter(
+    dtype: ArrowType,
+    ordinal: Int,
+    allocator: BufferAllocator)
+  extends PrimitiveColumnWriter(ordinal) {
+  override val valueVector: NullableTimeStampMicroVector
+    = new NullableTimeStampMicroVector("TimeStampValue", getFieldType(dtype), allocator)
+  override val valueMutator: NullableTimeStampMicroVector#Mutator = valueVector.getMutator
+
+  override def setNull(): Unit = valueMutator.setNull(count)
+  override def setValue(row: InternalRow): Unit = {
+    valueMutator.setSafe(count, row.getLong(ordinal))
+  }
+}
+
+private[arrow] object ColumnWriter {
+
+  /**
+   * Create an Arrow ColumnWriter given the type and ordinal of row.
+   */
+  def apply(dataType: DataType, ordinal: Int, allocator: BufferAllocator): ColumnWriter = {
+    val dtype = ArrowConverters.sparkTypeToArrowType(dataType)
+    dataType match {
+      case BooleanType => new BooleanColumnWriter(dtype, ordinal, allocator)
+      case ShortType => new ShortColumnWriter(dtype, ordinal, allocator)
+      case IntegerType => new IntegerColumnWriter(dtype, ordinal, allocator)
+      case LongType => new LongColumnWriter(dtype, ordinal, allocator)
+      case FloatType => new FloatColumnWriter(dtype, ordinal, allocator)
+      case DoubleType => new DoubleColumnWriter(dtype, ordinal, allocator)
+      case ByteType => new ByteColumnWriter(dtype, ordinal, allocator)
+      case StringType => new UTF8StringColumnWriter(dtype, ordinal, allocator)
+      case BinaryType => new BinaryColumnWriter(dtype, ordinal, allocator)
+      case DateType => new DateColumnWriter(dtype, ordinal, allocator)
+      case TimestampType => new TimeStampColumnWriter(dtype, ordinal, allocator)
+      case _ => throw new UnsupportedOperationException(s"Unsupported data type: $dataType")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
new file mode 100644
index 000000000000..159328cc0d95
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -0,0 +1,1222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.arrow
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
+import java.text.SimpleDateFormat
+import java.util.Locale
+
+import com.google.common.io.Files
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot}
+import org.apache.arrow.vector.file.json.JsonFileReader
+import org.apache.arrow.vector.util.Validator
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
+import org.apache.spark.util.Utils
+
+
+class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
+  import testImplicits._
+
+  private var tempDataPath: String = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    tempDataPath = Utils.createTempDir(namePrefix = "arrow").getAbsolutePath
+  }
+
+  test("collect to arrow record batch") {
+    val indexData = (1 to 6).toDF("i")
+    val arrowPayloads = indexData.toArrowPayload.collect()
+    assert(arrowPayloads.nonEmpty)
+    assert(arrowPayloads.length == indexData.rdd.getNumPartitions)
+    val allocator = new RootAllocator(Long.MaxValue)
+    val arrowRecordBatches = arrowPayloads.map(_.loadBatch(allocator))
+    val rowCount = arrowRecordBatches.map(_.getLength).sum
+    assert(rowCount === indexData.count())
+    arrowRecordBatches.foreach(batch => assert(batch.getNodes.size() > 0))
+    arrowRecordBatches.foreach(_.close())
+    allocator.close()
+  }
+
+  test("short conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_s",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 16
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 16
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_s",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 16
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 16
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_s",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 2, -2, 32767, -32768 ]
+         |    }, {
+         |      "name" : "b_s",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1, 0, 0, -2, 0, -32768 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_s = List[Short](1, -1, 2, -2, 32767, -32768)
+    val b_s = List[Option[Short]](Some(1), None, None, Some(-2), None, Some(-32768))
+    val df = a_s.zip(b_s).toDF("a_s", "b_s")
+
+    collectAndValidate(df, json, "integer-16bit.json")
+  }
+
+  test("int conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 2, -2, 2147483647, -2147483648 ]
+         |    }, {
+         |      "name" : "b_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1, 0, 0, -2, 0, -2147483648 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_i = List[Int](1, -1, 2, -2, 2147483647, -2147483648)
+    val b_i = List[Option[Int]](Some(1), None, None, Some(-2), None, Some(-2147483648))
+    val df = a_i.zip(b_i).toDF("a_i", "b_i")
+
+    collectAndValidate(df, json, "integer-32bit.json")
+  }
+
+  test("long conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_l",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 64
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_l",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 64
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_l",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 2, -2, 9223372036854775807, -9223372036854775808 ]
+         |    }, {
+         |      "name" : "b_l",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1, 0, 0, -2, 0, -9223372036854775808 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_l = List[Long](1, -1, 2, -2, 9223372036854775807L, -9223372036854775808L)
+    val b_l = List[Option[Long]](Some(1), None, None, Some(-2), None, Some(-9223372036854775808L))
+    val df = a_l.zip(b_l).toDF("a_l", "b_l")
+
+    collectAndValidate(df, json, "integer-64bit.json")
+  }
+
+  test("float conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_f",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "SINGLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_f",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "SINGLE"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_f",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1.0, 2.0, 0.01, 200.0, 0.0001, 20000.0 ]
+         |    }, {
+         |      "name" : "b_f",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1.1, 0.0, 0.0, 2.2, 0.0, 3.3 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_f = List(1.0f, 2.0f, 0.01f, 200.0f, 0.0001f, 20000.0f)
+    val b_f = List[Option[Float]](Some(1.1f), None, None, Some(2.2f), None, Some(3.3f))
+    val df = a_f.zip(b_f).toDF("a_f", "b_f")
+
+    collectAndValidate(df, json, "floating_point-single_precision.json")
+  }
+
+  test("double conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_d",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "DOUBLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_d",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "DOUBLE"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_d",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1.0, 2.0, 0.01, 200.0, 1.0E-4, 20000.0 ]
+         |    }, {
+         |      "name" : "b_d",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1.1, 0.0, 0.0, 2.2, 0.0, 3.3 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_d = List(1.0, 2.0, 0.01, 200.0, 0.0001, 20000.0)
+    val b_d = List[Option[Double]](Some(1.1), None, None, Some(2.2), None, Some(3.3))
+    val df = a_d.zip(b_d).toDF("a_d", "b_d")
+
+    collectAndValidate(df, json, "floating_point-double_precision.json")
+  }
+
+  test("index conversion") {
+    val data = List[Int](1, 2, 3, 4, 5, 6)
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, 2, 3, 4, 5, 6 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+    val df = data.toDF("i")
+
+    collectAndValidate(df, json, "indexData-ints.json")
+  }
+
+  test("mixed numeric type conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 16
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 16
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "SINGLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "c",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "d",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "DOUBLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "e",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 64
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, 2, 3, 4, 5, 6 ]
+         |    }, {
+         |      "name" : "b",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ]
+         |    }, {
+         |      "name" : "c",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, 2, 3, 4, 5, 6 ]
+         |    }, {
+         |      "name" : "d",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 ]
+         |    }, {
+         |      "name" : "e",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, 2, 3, 4, 5, 6 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val data = List(1, 2, 3, 4, 5, 6)
+    val data_tuples = for (d <- data) yield {
+      (d.toShort, d.toFloat, d.toInt, d.toDouble, d.toLong)
+    }
+    val df = data_tuples.toDF("a", "b", "c", "d", "e")
+
+    collectAndValidate(df, json, "mixed_numeric_types.json")
+  }
+
+  test("string type conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "upper_case",
+         |      "type" : {
+         |        "name" : "utf8"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "OFFSET",
+         |          "typeBitWidth" : 32
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 8
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "lower_case",
+         |      "type" : {
+         |        "name" : "utf8"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "OFFSET",
+         |          "typeBitWidth" : 32
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 8
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "null_str",
+         |      "type" : {
+         |        "name" : "utf8"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "OFFSET",
+         |          "typeBitWidth" : 32
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 8
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 3,
+         |    "columns" : [ {
+         |      "name" : "upper_case",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "OFFSET" : [ 0, 1, 2, 3 ],
+         |      "DATA" : [ "A", "B", "C" ]
+         |    }, {
+         |      "name" : "lower_case",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "OFFSET" : [ 0, 1, 2, 3 ],
+         |      "DATA" : [ "a", "b", "c" ]
+         |    }, {
+         |      "name" : "null_str",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 0 ],
+         |      "OFFSET" : [ 0, 2, 5, 5 ],
+         |      "DATA" : [ "ab", "CDE", "" ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val upperCase = Seq("A", "B", "C")
+    val lowerCase = Seq("a", "b", "c")
+    val nullStr = Seq("ab", "CDE", null)
+    val df = (upperCase, lowerCase, nullStr).zipped.toList
+      .toDF("upper_case", "lower_case", "null_str")
+
+    collectAndValidate(df, json, "stringData.json")
+  }
+
+  test("boolean type conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_bool",
+         |      "type" : {
+         |        "name" : "bool"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 1
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 4,
+         |    "columns" : [ {
+         |      "name" : "a_bool",
+         |      "count" : 4,
+         |      "VALIDITY" : [ 1, 1, 1, 1 ],
+         |      "DATA" : [ true, true, false, true ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+    val df = Seq(true, true, false, true).toDF("a_bool")
+    collectAndValidate(df, json, "boolData.json")
+  }
+
+  test("byte type conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_byte",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 8
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 8
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 4,
+         |    "columns" : [ {
+         |      "name" : "a_byte",
+         |      "count" : 4,
+         |      "VALIDITY" : [ 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 64, 127 ]
+         |    } ]
+         |  } ]
+         |}
+         |
+       """.stripMargin
+    val df = List[Byte](1.toByte, (-1).toByte, 64.toByte, Byte.MaxValue).toDF("a_byte")
+    collectAndValidate(df, json, "byteData.json")
+  }
+
+  test("binary type conversion") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_binary",
+         |      "type" : {
+         |        "name" : "binary"
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "OFFSET",
+         |          "typeBitWidth" : 32
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 8
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 3,
+         |    "columns" : [ {
+         |      "name" : "a_binary",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "OFFSET" : [ 0, 3, 4, 6 ],
+         |      "DATA" : [ "616263", "64", "6566" ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val data = Seq("abc", "d", "ef")
+    val rdd = sparkContext.parallelize(data.map(s => Row(s.getBytes("utf-8"))))
+    val df = spark.createDataFrame(rdd, StructType(Seq(StructField("a_binary", BinaryType))))
+
+    collectAndValidate(df, json, "binaryData.json")
+  }
+
+  test("floating-point NaN") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "NaN_f",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "SINGLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "NaN_d",
+         |      "type" : {
+         |        "name" : "floatingpoint",
+         |        "precision" : "DOUBLE"
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 64
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 2,
+         |    "columns" : [ {
+         |      "name" : "NaN_f",
+         |      "count" : 2,
+         |      "VALIDITY" : [ 1, 1 ],
+         |      "DATA" : [ 1.2000000476837158, "NaN" ]
+         |    }, {
+         |      "name" : "NaN_d",
+         |      "count" : 2,
+         |      "VALIDITY" : [ 1, 1 ],
+         |      "DATA" : [ "NaN", 1.2 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val fnan = Seq(1.2F, Float.NaN)
+    val dnan = Seq(Double.NaN, 1.2)
+    val df = fnan.zip(dnan).toDF("NaN_f", "NaN_d")
+
+    collectAndValidate(df, json, "nanData-floating_point.json")
+  }
+
+  test("partitioned DataFrame") {
+    val json1 =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 3,
+         |    "columns" : [ {
+         |      "name" : "a",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "DATA" : [ 1, 1, 2 ]
+         |    }, {
+         |      "name" : "b",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "DATA" : [ 1, 2, 1 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+    val json2 =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 3,
+         |    "columns" : [ {
+         |      "name" : "a",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "DATA" : [ 2, 3, 3 ]
+         |    }, {
+         |      "name" : "b",
+         |      "count" : 3,
+         |      "VALIDITY" : [ 1, 1, 1 ],
+         |      "DATA" : [ 2, 1, 2 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val arrowPayloads = testData2.toArrowPayload.collect()
+    // NOTE: testData2 should have 2 partitions -> 2 arrow batches in payload
+    assert(arrowPayloads.length === 2)
+    val schema = testData2.schema
+
+    val tempFile1 = new File(tempDataPath, "testData2-ints-part1.json")
+    val tempFile2 = new File(tempDataPath, "testData2-ints-part2.json")
+    Files.write(json1, tempFile1, StandardCharsets.UTF_8)
+    Files.write(json2, tempFile2, StandardCharsets.UTF_8)
+
+    validateConversion(schema, arrowPayloads(0), tempFile1)
+    validateConversion(schema, arrowPayloads(1), tempFile2)
+  }
+
+  test("empty frame collect") {
+    val arrowPayload = spark.emptyDataFrame.toArrowPayload.collect()
+    assert(arrowPayload.isEmpty)
+
+    val filteredDF = List[Int](1, 2, 3, 4, 5, 6).toDF("i")
+    val filteredArrowPayload = filteredDF.filter("i < 0").toArrowPayload.collect()
+    assert(filteredArrowPayload.isEmpty)
+  }
+
+  test("empty partition collect") {
+    val emptyPart = spark.sparkContext.parallelize(Seq(1), 2).toDF("i")
+    val arrowPayloads = emptyPart.toArrowPayload.collect()
+    assert(arrowPayloads.length === 1)
+    val allocator = new RootAllocator(Long.MaxValue)
+    val arrowRecordBatches = arrowPayloads.map(_.loadBatch(allocator))
+    assert(arrowRecordBatches.head.getLength == 1)
+    arrowRecordBatches.foreach(_.close())
+    allocator.close()
+  }
+
+  test("max records in batch conf") {
+    val totalRecords = 10
+    val maxRecordsPerBatch = 3
+    spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", maxRecordsPerBatch)
+    val df = spark.sparkContext.parallelize(1 to totalRecords, 2).toDF("i")
+    val arrowPayloads = df.toArrowPayload.collect()
+    val allocator = new RootAllocator(Long.MaxValue)
+    val arrowRecordBatches = arrowPayloads.map(_.loadBatch(allocator))
+    var recordCount = 0
+    arrowRecordBatches.foreach { batch =>
+      assert(batch.getLength > 0)
+      assert(batch.getLength <= maxRecordsPerBatch)
+      recordCount += batch.getLength
+      batch.close()
+    }
+    assert(recordCount == totalRecords)
+    allocator.close()
+    spark.conf.unset("spark.sql.execution.arrow.maxRecordsPerBatch")
+  }
+
+  testQuietly("unsupported types") {
+    def runUnsupported(block: => Unit): Unit = {
+      val msg = intercept[SparkException] {
+        block
+      }
+      assert(msg.getMessage.contains("Unsupported data type"))
+      assert(msg.getCause.getClass === classOf[UnsupportedOperationException])
+    }
+
+    runUnsupported { decimalData.toArrowPayload.collect() }
+    runUnsupported { arrayData.toDF().toArrowPayload.collect() }
+    runUnsupported { mapData.toDF().toArrowPayload.collect() }
+    runUnsupported { complexData.toArrowPayload.collect() }
+
+    val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS z", Locale.US)
+    val d1 = new Date(sdf.parse("2015-04-08 13:10:15.000 UTC").getTime)
+    val d2 = new Date(sdf.parse("2016-05-09 13:10:15.000 UTC").getTime)
+    runUnsupported { Seq(d1, d2).toDF("date").toArrowPayload.collect() }
+
+    val ts1 = new Timestamp(sdf.parse("2013-04-08 01:10:15.567 UTC").getTime)
+    val ts2 = new Timestamp(sdf.parse("2013-04-08 13:10:10.789 UTC").getTime)
+    runUnsupported { Seq(ts1, ts2).toDF("timestamp").toArrowPayload.collect() }
+  }
+
+  test("test Arrow Validator") {
+    val json =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "a_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "b_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 2, -2, 2147483647, -2147483648 ]
+         |    }, {
+         |      "name" : "b_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1, 0, 0, -2, 0, -2147483648 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+    val json_diff_col_order =
+      s"""
+         |{
+         |  "schema" : {
+         |    "fields" : [ {
+         |      "name" : "b_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : true,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    }, {
+         |      "name" : "a_i",
+         |      "type" : {
+         |        "name" : "int",
+         |        "isSigned" : true,
+         |        "bitWidth" : 32
+         |      },
+         |      "nullable" : false,
+         |      "children" : [ ],
+         |      "typeLayout" : {
+         |        "vectors" : [ {
+         |          "type" : "VALIDITY",
+         |          "typeBitWidth" : 1
+         |        }, {
+         |          "type" : "DATA",
+         |          "typeBitWidth" : 32
+         |        } ]
+         |      }
+         |    } ]
+         |  },
+         |  "batches" : [ {
+         |    "count" : 6,
+         |    "columns" : [ {
+         |      "name" : "a_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 1, 1, 1, 1, 1 ],
+         |      "DATA" : [ 1, -1, 2, -2, 2147483647, -2147483648 ]
+         |    }, {
+         |      "name" : "b_i",
+         |      "count" : 6,
+         |      "VALIDITY" : [ 1, 0, 0, 1, 0, 1 ],
+         |      "DATA" : [ 1, 0, 0, -2, 0, -2147483648 ]
+         |    } ]
+         |  } ]
+         |}
+       """.stripMargin
+
+    val a_i = List[Int](1, -1, 2, -2, 2147483647, -2147483648)
+    val b_i = List[Option[Int]](Some(1), None, None, Some(-2), None, Some(-2147483648))
+    val df = a_i.zip(b_i).toDF("a_i", "b_i")
+
+    // Different schema
+    intercept[IllegalArgumentException] {
+      collectAndValidate(df, json_diff_col_order, "validator_diff_schema.json")
+    }
+
+    // Different values
+    intercept[IllegalArgumentException] {
+      collectAndValidate(df.sort($"a_i".desc), json, "validator_diff_values.json")
+    }
+  }
+
+  /** Test that a converted DataFrame to Arrow record batch equals batch read from JSON file */
+  private def collectAndValidate(df: DataFrame, json: String, file: String): Unit = {
+    // NOTE: coalesce to single partition because can only load 1 batch in validator
+    val arrowPayload = df.coalesce(1).toArrowPayload.collect().head
+    val tempFile = new File(tempDataPath, file)
+    Files.write(json, tempFile, StandardCharsets.UTF_8)
+    validateConversion(df.schema, arrowPayload, tempFile)
+  }
+
+  private def validateConversion(
+      sparkSchema: StructType,
+      arrowPayload: ArrowPayload,
+      jsonFile: File): Unit = {
+    val allocator = new RootAllocator(Long.MaxValue)
+    val jsonReader = new JsonFileReader(jsonFile, allocator)
+
+    val arrowSchema = ArrowConverters.schemaToArrowSchema(sparkSchema)
+    val jsonSchema = jsonReader.start()
+    Validator.compareSchemas(arrowSchema, jsonSchema)
+
+    val arrowRoot = VectorSchemaRoot.create(arrowSchema, allocator)
+    val vectorLoader = new VectorLoader(arrowRoot)
+    val arrowRecordBatch = arrowPayload.loadBatch(allocator)
+    vectorLoader.load(arrowRecordBatch)
+    val jsonRoot = jsonReader.read()
+    Validator.compareVectorSchemaRoot(arrowRoot, jsonRoot)
+
+    jsonRoot.close()
+    jsonReader.close()
+    arrowRecordBatch.close()
+    arrowRoot.close()
+    allocator.close()
+  }
+}

From c3713fde86204bf3f027483914ff9e60e7aad261 Mon Sep 17 00:00:00 2001
From: chie8842 <chie8842@gmail.com>
Date: Mon, 10 Jul 2017 18:56:54 -0700
Subject: [PATCH 095/125] [SPARK-21358][EXAMPLES] Argument of
 repartitionandsortwithinpartitions at pyspark

## What changes were proposed in this pull request?
At example of repartitionAndSortWithinPartitions at rdd.py, third argument should be True or False.
I proposed fix of example code.

## How was this patch tested?
* I rename test_repartitionAndSortWithinPartitions to test_repartitionAndSortWIthinPartitions_asc to specify boolean argument.
* I added test_repartitionAndSortWithinPartitions_desc to test False pattern at third argument.

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: chie8842 <chie8842@gmail.com>

Closes #18586 from chie8842/SPARK-21358.
---
 python/pyspark/rdd.py   |  2 +-
 python/pyspark/tests.py | 12 ++++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 7dfa17f68a94..3325b65f8b60 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -608,7 +608,7 @@ def repartitionAndSortWithinPartitions(self, numPartitions=None, partitionFunc=p
         sort records by their keys.
 
         >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
-        >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, 2)
+        >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, True)
         >>> rdd2.glom().collect()
         [[(0, 5), (0, 8), (2, 6)], [(1, 3), (3, 8), (3, 8)]]
         """
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index bb13de563cdd..73ab442dfd79 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -1019,14 +1019,22 @@ def test_histogram(self):
         self.assertEqual((["ab", "ef"], [5]), rdd.histogram(1))
         self.assertRaises(TypeError, lambda: rdd.histogram(2))
 
-    def test_repartitionAndSortWithinPartitions(self):
+    def test_repartitionAndSortWithinPartitions_asc(self):
         rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)
 
-        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2)
+        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2, True)
         partitions = repartitioned.glom().collect()
         self.assertEqual(partitions[0], [(0, 5), (0, 8), (2, 6)])
         self.assertEqual(partitions[1], [(1, 3), (3, 8), (3, 8)])
 
+    def test_repartitionAndSortWithinPartitions_desc(self):
+        rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)
+
+        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2, False)
+        partitions = repartitioned.glom().collect()
+        self.assertEqual(partitions[0], [(2, 6), (0, 5), (0, 8)])
+        self.assertEqual(partitions[1], [(3, 8), (3, 8), (1, 3)])
+
     def test_repartition_no_skewed(self):
         num_partitions = 20
         a = self.sc.parallelize(range(int(1000)), 2)

From a2bec6c92a063f4a8e9ed75a9f3f06808485b6d7 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 10 Jul 2017 20:16:29 -0700
Subject: [PATCH 096/125] [SPARK-21043][SQL] Add unionByName in Dataset

## What changes were proposed in this pull request?
This pr added `unionByName` in `DataSet`.
Here is how to use:
```
val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
df1.unionByName(df2).show

// output:
// +----+----+----+
// |col0|col1|col2|
// +----+----+----+
// |   1|   2|   3|
// |   6|   4|   5|
// +----+----+----+
```

## How was this patch tested?
Added tests in `DataFrameSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #18300 from maropu/SPARK-21043-2.
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 60 +++++++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala | 87 +++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index a7773831df07..7f3ae0541151 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -53,6 +53,7 @@ import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.Utils
@@ -1734,6 +1735,65 @@ class Dataset[T] private[sql](
     CombineUnions(Union(logicalPlan, other.logicalPlan))
   }
 
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
+   * union (that does deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * The difference between this function and [[union]] is that this function
+   * resolves columns by name (not by position):
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
+   *   df1.unionByName(df2).show
+   *
+   *   // output:
+   *   // +----+----+----+
+   *   // |col0|col1|col2|
+   *   // +----+----+----+
+   *   // |   1|   2|   3|
+   *   // |   6|   4|   5|
+   *   // +----+----+----+
+   * }}}
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  def unionByName(other: Dataset[T]): Dataset[T] = withSetOperator {
+    // Check column name duplication
+    val resolver = sparkSession.sessionState.analyzer.resolver
+    val leftOutputAttrs = logicalPlan.output
+    val rightOutputAttrs = other.logicalPlan.output
+
+    SchemaUtils.checkColumnNameDuplication(
+      leftOutputAttrs.map(_.name),
+      "in the left attributes",
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    SchemaUtils.checkColumnNameDuplication(
+      rightOutputAttrs.map(_.name),
+      "in the right attributes",
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+
+    // Builds a project list for `other` based on `logicalPlan` output names
+    val rightProjectList = leftOutputAttrs.map { lattr =>
+      rightOutputAttrs.find { rattr => resolver(lattr.name, rattr.name) }.getOrElse {
+        throw new AnalysisException(
+          s"""Cannot resolve column name "${lattr.name}" among """ +
+            s"""(${rightOutputAttrs.map(_.name).mkString(", ")})""")
+      }
+    }
+
+    // Delegates failure checks to `CheckAnalysis`
+    val notFoundAttrs = rightOutputAttrs.diff(rightProjectList)
+    val rightChild = Project(rightProjectList ++ notFoundAttrs, other.logicalPlan)
+
+    // This breaks caching, but it's usually ok because it addresses a very specific use case:
+    // using union to union many files or partitions.
+    CombineUnions(Union(logicalPlan, rightChild))
+  }
+
   /**
    * Returns a new Dataset containing rows only in both this Dataset and another Dataset.
    * This is equivalent to `INTERSECT` in SQL.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index a5a2e1c38d30..5ae27032e0e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -111,6 +111,93 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     )
   }
 
+  test("union by name") {
+    var df1 = Seq((1, 2, 3)).toDF("a", "b", "c")
+    var df2 = Seq((3, 1, 2)).toDF("c", "a", "b")
+    val df3 = Seq((2, 3, 1)).toDF("b", "c", "a")
+    val unionDf = df1.unionByName(df2.unionByName(df3))
+    checkAnswer(unionDf,
+      Row(1, 2, 3) :: Row(1, 2, 3) :: Row(1, 2, 3) :: Nil
+    )
+
+    // Check if adjacent unions are combined into a single one
+    assert(unionDf.queryExecution.optimizedPlan.collect { case u: Union => true }.size == 1)
+
+    // Check failure cases
+    df1 = Seq((1, 2)).toDF("a", "c")
+    df2 = Seq((3, 4, 5)).toDF("a", "b", "c")
+    var errMsg = intercept[AnalysisException] {
+      df1.unionByName(df2)
+    }.getMessage
+    assert(errMsg.contains(
+      "Union can only be performed on tables with the same number of columns, " +
+        "but the first table has 2 columns and the second table has 3 columns"))
+
+    df1 = Seq((1, 2, 3)).toDF("a", "b", "c")
+    df2 = Seq((4, 5, 6)).toDF("a", "c", "d")
+    errMsg = intercept[AnalysisException] {
+      df1.unionByName(df2)
+    }.getMessage
+    assert(errMsg.contains("""Cannot resolve column name "b" among (a, c, d)"""))
+  }
+
+  test("union by name - type coercion") {
+    var df1 = Seq((1, "a")).toDF("c0", "c1")
+    var df2 = Seq((3, 1L)).toDF("c1", "c0")
+    checkAnswer(df1.unionByName(df2), Row(1L, "a") :: Row(1L, "3") :: Nil)
+
+    df1 = Seq((1, 1.0)).toDF("c0", "c1")
+    df2 = Seq((8L, 3.0)).toDF("c1", "c0")
+    checkAnswer(df1.unionByName(df2), Row(1.0, 1.0) :: Row(3.0, 8.0) :: Nil)
+
+    df1 = Seq((2.0f, 7.4)).toDF("c0", "c1")
+    df2 = Seq(("a", 4.0)).toDF("c1", "c0")
+    checkAnswer(df1.unionByName(df2), Row(2.0, "7.4") :: Row(4.0, "a") :: Nil)
+
+    df1 = Seq((1, "a", 3.0)).toDF("c0", "c1", "c2")
+    df2 = Seq((1.2, 2, "bc")).toDF("c2", "c0", "c1")
+    val df3 = Seq(("def", 1.2, 3)).toDF("c1", "c2", "c0")
+    checkAnswer(df1.unionByName(df2.unionByName(df3)),
+      Row(1, "a", 3.0) :: Row(2, "bc", 1.2) :: Row(3, "def", 1.2) :: Nil
+    )
+  }
+
+  test("union by name - check case sensitivity") {
+    def checkCaseSensitiveTest(): Unit = {
+      val df1 = Seq((1, 2, 3)).toDF("ab", "cd", "ef")
+      val df2 = Seq((4, 5, 6)).toDF("cd", "ef", "AB")
+      checkAnswer(df1.unionByName(df2), Row(1, 2, 3) :: Row(6, 4, 5) :: Nil)
+    }
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val errMsg2 = intercept[AnalysisException] {
+        checkCaseSensitiveTest()
+      }.getMessage
+      assert(errMsg2.contains("""Cannot resolve column name "ab" among (cd, ef, AB)"""))
+    }
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkCaseSensitiveTest()
+    }
+  }
+
+  test("union by name - check name duplication") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        var df1 = Seq((1, 1)).toDF(c0, c1)
+        var df2 = Seq((1, 1)).toDF("c0", "c1")
+        var errMsg = intercept[AnalysisException] {
+          df1.unionByName(df2)
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the left attributes:"))
+        df1 = Seq((1, 1)).toDF("c0", "c1")
+        df2 = Seq((1, 1)).toDF(c0, c1)
+        errMsg = intercept[AnalysisException] {
+          df1.unionByName(df2)
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the right attributes:"))
+      }
+    }
+  }
+
   test("empty data frame") {
     assert(spark.emptyDataFrame.columns.toSeq === Seq.empty[String])
     assert(spark.emptyDataFrame.count() === 0)

From 1471ee7af5a9952b60cf8c56d60cb6a7ec46cc69 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 11 Jul 2017 11:19:59 +0800
Subject: [PATCH 097/125] [SPARK-21350][SQL] Fix the error message when the
 number of arguments is wrong when invoking a UDF

### What changes were proposed in this pull request?
Users get a very confusing error when users specify a wrong number of parameters.
```Scala
    val df = spark.emptyDataFrame
    spark.udf.register("foo", (_: String).length)
    df.selectExpr("foo(2, 3, 4)")
```
```
org.apache.spark.sql.UDFSuite$$anonfun$9$$anonfun$apply$mcV$sp$12 cannot be cast to scala.Function3
java.lang.ClassCastException: org.apache.spark.sql.UDFSuite$$anonfun$9$$anonfun$apply$mcV$sp$12 cannot be cast to scala.Function3
	at org.apache.spark.sql.catalyst.expressions.ScalaUDF.<init>(ScalaUDF.scala:109)
```

This PR is to capture the exception and issue an error message that is consistent with what we did for built-in functions. After the fix, the error message is improved to
```
Invalid number of arguments for function foo; line 1 pos 0
org.apache.spark.sql.AnalysisException: Invalid number of arguments for function foo; line 1 pos 0
	at org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:119)
```

### How was this patch tested?
Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18574 from gatorsmile/statsCheck.
---
 .../apache/spark/sql/UDFRegistration.scala    | 412 +++++++++++++-----
 .../org/apache/spark/sql/JavaUDFSuite.java    |   8 +
 .../scala/org/apache/spark/sql/UDFSuite.scala |  13 +-
 3 files changed, 331 insertions(+), 102 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 8bdc0221888d..c4d0adb5236f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -111,7 +111,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
           val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
           val inputTypes = Try($inputTypes).toOption
-          def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+          def builder(e: Seq[Expression]) = if (e.length == $x) {
+            ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+          } else {
+             throw new AnalysisException("Invalid number of arguments for function " + name +
+               ". Expected: $x; Found: " + e.length)
+          }
           functionRegistry.createOrReplaceTempFunction(name, builder)
           UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
         }""")
@@ -123,16 +128,20 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
       val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs, Any]]"
       val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
       println(s"""
-         |/**
-         | * Register a user-defined function with ${i} arguments.
-         | * @since 1.3.0
-         | */
-         |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType): Unit = {
-         |  val func = f$anyCast.call($anyParams)
-         |  functionRegistry.createOrReplaceTempFunction(
-         |    name,
-         |    (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
-         |}""".stripMargin)
+        |/**
+        | * Register a user-defined function with ${i} arguments.
+        | * @since 1.3.0
+        | */
+        |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType): Unit = {
+        |  val func = f$anyCast.call($anyParams)
+        |def builder(e: Seq[Expression]) = if (e.length == $i) {
+        |  ScalaUDF(func, returnType, e)
+        |} else {
+        |  throw new AnalysisException("Invalid number of arguments for function " + name +
+        |    ". Expected: $i; Found: " + e.length)
+        |}
+        |functionRegistry.createOrReplaceTempFunction(name, builder)
+        |}""".stripMargin)
     }
     */
 
@@ -144,7 +153,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 0) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 0; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -157,7 +171,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 1) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 1; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -170,7 +189,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 2) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 2; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -183,7 +207,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 3) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 3; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -196,7 +225,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 4) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 4; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -209,7 +243,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 5) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 5; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -222,7 +261,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 6) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 6; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -235,7 +279,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 7) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 7; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -248,7 +297,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 8) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 8; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -261,7 +315,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 9) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 9; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -274,7 +333,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 10) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 10; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -287,7 +351,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 11) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 11; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -300,7 +369,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 12) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 12; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -313,7 +387,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 13) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 13; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -326,7 +405,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 14) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 14; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -339,7 +423,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 15) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 15; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -352,7 +441,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 16) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 16; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -365,7 +459,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 17) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 17; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -378,7 +477,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 18) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 18; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -391,7 +495,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 19) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 19; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -404,7 +513,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 20) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 20; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -417,7 +531,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 21) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 21; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -430,7 +549,12 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
     val ScalaReflection.Schema(dataType, nullable) = ScalaReflection.schemaFor[RT]
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: ScalaReflection.schemaFor[A22].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    def builder(e: Seq[Expression]) = if (e.length == 22) {
+      ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 22; Found: " + e.length)
+    }
     functionRegistry.createOrReplaceTempFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable)
   }
@@ -531,9 +655,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 1) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 1; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -542,9 +670,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 2) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 2; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -553,9 +685,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 3) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 3; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -564,9 +700,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 4) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 4; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -575,9 +715,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 5) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 5; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -586,9 +730,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 6) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 6; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -597,9 +745,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 7) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 7; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -608,9 +760,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 8) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 8; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -619,9 +775,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 9) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 9; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -630,9 +790,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 10) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 10; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -641,9 +805,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 11) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 11; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -652,9 +820,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 12) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 12; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -663,9 +835,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 13) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 13; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -674,9 +850,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 14) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 14; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -685,9 +865,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 15) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 15; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -696,9 +880,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 16) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 16; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -707,9 +895,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 17) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 17; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -718,9 +910,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 18) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 18; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -729,9 +925,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 19) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 19; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -740,9 +940,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 20) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 20; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -751,9 +955,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 21) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 21; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   /**
@@ -762,9 +970,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
     val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    functionRegistry.createOrReplaceTempFunction(
-      name,
-      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
+    def builder(e: Seq[Expression]) = if (e.length == 22) {
+      ScalaUDF(func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 22; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
   }
 
   // scalastyle:on line.size.limit
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index 250fa674d8ec..4fb2988f24d2 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -25,6 +25,7 @@
 import org.junit.Before;
 import org.junit.Test;
 
+import org.apache.spark.sql.AnalysisException;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.api.java.UDF2;
@@ -105,4 +106,11 @@ public void udf4Test() {
     }
     Assert.assertEquals(55, sum);
   }
+
+  @SuppressWarnings("unchecked")
+  @Test(expected = AnalysisException.class)
+  public void udf5Test() {
+    spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType);
+    List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList();
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index b4f744b193ad..335b882ace92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -71,12 +71,21 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("error reporting for incorrect number of arguments") {
+  test("error reporting for incorrect number of arguments - builtin function") {
     val df = spark.emptyDataFrame
     val e = intercept[AnalysisException] {
       df.selectExpr("substr('abcd', 2, 3, 4)")
     }
-    assert(e.getMessage.contains("arguments"))
+    assert(e.getMessage.contains("Invalid number of arguments for function substr"))
+  }
+
+  test("error reporting for incorrect number of arguments - udf") {
+    val df = spark.emptyDataFrame
+    val e = intercept[AnalysisException] {
+      spark.udf.register("foo", (_: String).length)
+      df.selectExpr("foo(2, 3, 4)")
+    }
+    assert(e.getMessage.contains("Invalid number of arguments for function foo"))
   }
 
   test("error reporting for undefined functions") {

From 833eab2c9bd273ee9577fbf9e480d3e3a4b7d203 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 11 Jul 2017 11:26:17 +0800
Subject: [PATCH 098/125] [SPARK-21369][CORE] Don't use Scala Tuple2 in
 common/network-*

## What changes were proposed in this pull request?

Remove all usages of Scala Tuple2 from common/network-* projects. Otherwise, Yarn users cannot use `spark.reducer.maxReqSizeShuffleToMem`.

## How was this patch tested?

Jenkins.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18593 from zsxwing/SPARK-21369.
---
 common/network-common/pom.xml                 |  3 ++-
 .../client/TransportResponseHandler.java      | 20 +++++++++----------
 .../server/OneForOneStreamManager.java        | 17 +++++-----------
 common/network-shuffle/pom.xml                |  1 +
 common/network-yarn/pom.xml                   |  1 +
 5 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 066970f24205..0254d0cefc36 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -90,7 +90,8 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
+      <scope>test</scope>
+   </dependency>
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 340b8b96aabc..7a3d96ceaef0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -24,10 +24,10 @@
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicLong;
 
-import scala.Tuple2;
-
 import com.google.common.annotations.VisibleForTesting;
 import io.netty.channel.Channel;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -58,7 +58,7 @@ public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
 
   private final Map<Long, RpcResponseCallback> outstandingRpcs;
 
-  private final Queue<Tuple2<String, StreamCallback>> streamCallbacks;
+  private final Queue<Pair<String, StreamCallback>> streamCallbacks;
   private volatile boolean streamActive;
 
   /** Records the time (in system nanoseconds) that the last fetch or RPC request was sent. */
@@ -92,7 +92,7 @@ public void removeRpcRequest(long requestId) {
 
   public void addStreamCallback(String streamId, StreamCallback callback) {
     timeOfLastRequestNs.set(System.nanoTime());
-    streamCallbacks.offer(new Tuple2<>(streamId, callback));
+    streamCallbacks.offer(ImmutablePair.of(streamId, callback));
   }
 
   @VisibleForTesting
@@ -119,9 +119,9 @@ private void failOutstandingRequests(Throwable cause) {
         logger.warn("RpcResponseCallback.onFailure throws exception", e);
       }
     }
-    for (Tuple2<String, StreamCallback> entry : streamCallbacks) {
+    for (Pair<String, StreamCallback> entry : streamCallbacks) {
       try {
-        entry._2().onFailure(entry._1(), cause);
+        entry.getValue().onFailure(entry.getKey(), cause);
       } catch (Exception e) {
         logger.warn("StreamCallback.onFailure throws exception", e);
       }
@@ -208,9 +208,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamResponse) {
       StreamResponse resp = (StreamResponse) message;
-      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      Pair<String, StreamCallback> entry = streamCallbacks.poll();
       if (entry != null) {
-        StreamCallback callback = entry._2();
+        StreamCallback callback = entry.getValue();
         if (resp.byteCount > 0) {
           StreamInterceptor interceptor = new StreamInterceptor(this, resp.streamId, resp.byteCount,
             callback);
@@ -235,9 +235,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamFailure) {
       StreamFailure resp = (StreamFailure) message;
-      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      Pair<String, StreamCallback> entry = streamCallbacks.poll();
       if (entry != null) {
-        StreamCallback callback = entry._2();
+        StreamCallback callback = entry.getValue();
         try {
           callback.onFailure(resp.streamId, new RuntimeException(resp.error));
         } catch (IOException ioe) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index ad8e8b44d201..85ca2f1728e6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -23,8 +23,6 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 
-import scala.Tuple2;
-
 import com.google.common.base.Preconditions;
 import io.netty.channel.Channel;
 import org.slf4j.Logger;
@@ -98,21 +96,16 @@ public ManagedBuffer getChunk(long streamId, int chunkIndex) {
 
   @Override
   public ManagedBuffer openStream(String streamChunkId) {
-    Tuple2<Long, Integer> streamIdAndChunkId = parseStreamChunkId(streamChunkId);
-    return getChunk(streamIdAndChunkId._1, streamIdAndChunkId._2);
-  }
-
-  public static String genStreamChunkId(long streamId, int chunkId) {
-    return String.format("%d_%d", streamId, chunkId);
-  }
-
-  public static Tuple2<Long, Integer> parseStreamChunkId(String streamChunkId) {
     String[] array = streamChunkId.split("_");
     assert array.length == 2:
       "Stream id and chunk index should be specified when open stream for fetching block.";
     long streamId = Long.valueOf(array[0]);
     int chunkIndex = Integer.valueOf(array[1]);
-    return new Tuple2<>(streamId, chunkIndex);
+    return getChunk(streamId, chunkIndex);
+  }
+
+  public static String genStreamChunkId(long streamId, int chunkId) {
+    return String.format("%d_%d", streamId, chunkId);
   }
 
   @Override
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 2de882adcb58..9968480ab765 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -69,6 +69,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <scope>test</scope>
     </dependency>
 
     <!--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 7e24315c5f39..ec2db6e5bb88 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -48,6 +48,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <scope>test</scope>
     </dependency>
 
     <!--

From 97a1aa2c70b1bf726d5f572789e150d168ac61e5 Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Tue, 11 Jul 2017 11:47:47 +0800
Subject: [PATCH 099/125] [SPARK-21315][SQL] Skip some spill files when
 generateIterator(startIndex) in ExternalAppendOnlyUnsafeRowArray.

## What changes were proposed in this pull request?

In current code, it is expensive to use `UnboundedFollowingWindowFunctionFrame`, because it is iterating from the start to lower bound every time calling `write` method. When traverse the iterator, it's possible to skip some spilled files thus to save some time.

## How was this patch tested?

Added unit test

Did a small test for benchmark:

Put 2000200 rows into `UnsafeExternalSorter`-- 2 spill files(each contains 1000000 rows) and inMemSorter contains 200 rows.
Move the iterator forward to index=2000001.

*With this change*:
`getIterator(2000001)`, it will cost almost 0ms~1ms;
*Without this change*:
`for(int i=0; i<2000001; i++)geIterator().loadNext()`, it will cost 300ms.

Author: jinxing <jinxing6042@126.com>

Closes #18541 from jinxing64/SPARK-21315.
---
 .../unsafe/sort/UnsafeExternalSorter.java     | 35 ++++++++++++++++---
 .../unsafe/sort/UnsafeSorterSpillWriter.java  |  4 +++
 .../sort/UnsafeExternalSorterSuite.java       | 34 +++++++++++++++++-
 .../ExternalAppendOnlyUnsafeRowArray.scala    | 22 ++----------
 ...nalAppendOnlyUnsafeRowArrayBenchmark.scala |  2 +-
 5 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 82d03e3e9190..a6e858ca7202 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -589,29 +589,54 @@ public long getKeyPrefix() {
   }
 
   /**
-   * Returns a iterator, which will return the rows in the order as inserted.
+   * Returns an iterator starts from startIndex, which will return the rows in the order as
+   * inserted.
    *
    * It is the caller's responsibility to call `cleanupResources()`
    * after consuming this iterator.
    *
    * TODO: support forced spilling
    */
-  public UnsafeSorterIterator getIterator() throws IOException {
+  public UnsafeSorterIterator getIterator(int startIndex) throws IOException {
     if (spillWriters.isEmpty()) {
       assert(inMemSorter != null);
-      return inMemSorter.getSortedIterator();
+      UnsafeSorterIterator iter = inMemSorter.getSortedIterator();
+      moveOver(iter, startIndex);
+      return iter;
     } else {
       LinkedList<UnsafeSorterIterator> queue = new LinkedList<>();
+      int i = 0;
       for (UnsafeSorterSpillWriter spillWriter : spillWriters) {
-        queue.add(spillWriter.getReader(serializerManager));
+        if (i + spillWriter.recordsSpilled() > startIndex) {
+          UnsafeSorterIterator iter = spillWriter.getReader(serializerManager);
+          moveOver(iter, startIndex - i);
+          queue.add(iter);
+        }
+        i += spillWriter.recordsSpilled();
       }
       if (inMemSorter != null) {
-        queue.add(inMemSorter.getSortedIterator());
+        UnsafeSorterIterator iter = inMemSorter.getSortedIterator();
+        moveOver(iter, startIndex - i);
+        queue.add(iter);
       }
       return new ChainedIterator(queue);
     }
   }
 
+  private void moveOver(UnsafeSorterIterator iter, int steps)
+      throws IOException {
+    if (steps > 0) {
+      for (int i = 0; i < steps; i++) {
+        if (iter.hasNext()) {
+          iter.loadNext();
+        } else {
+          throw new ArrayIndexOutOfBoundsException("Failed to move the iterator " + steps +
+            " steps forward");
+        }
+      }
+    }
+  }
+
   /**
    * Chain multiple UnsafeSorterIterator together as single one.
    */
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
index f9b549375544..850f247b045c 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java
@@ -155,4 +155,8 @@ public File getFile() {
   public UnsafeSorterSpillReader getReader(SerializerManager serializerManager) throws IOException {
     return new UnsafeSorterSpillReader(serializerManager, file, blockId);
   }
+
+  public int recordsSpilled() {
+    return numRecordsSpilled;
+  }
 }
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index d31d7c1c0900..cd5db1a70f72 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -395,7 +395,7 @@ public void forcedSpillingWithoutComparator() throws Exception {
         sorter.spill();
       }
     }
-    UnsafeSorterIterator iter = sorter.getIterator();
+    UnsafeSorterIterator iter = sorter.getIterator(0);
     for (int i = 0; i < n; i++) {
       iter.hasNext();
       iter.loadNext();
@@ -479,5 +479,37 @@ public void testPeakMemoryUsed() throws Exception {
     }
   }
 
+  @Test
+  public void testGetIterator() throws Exception {
+    final UnsafeExternalSorter sorter = newSorter();
+    for (int i = 0; i < 100; i++) {
+      insertNumber(sorter, i);
+    }
+    verifyIntIterator(sorter.getIterator(0), 0, 100);
+    verifyIntIterator(sorter.getIterator(79), 79, 100);
+
+    sorter.spill();
+    for (int i = 100; i < 200; i++) {
+      insertNumber(sorter, i);
+    }
+    sorter.spill();
+    verifyIntIterator(sorter.getIterator(79), 79, 200);
+
+    for (int i = 200; i < 300; i++) {
+      insertNumber(sorter, i);
+    }
+    verifyIntIterator(sorter.getIterator(79), 79, 300);
+    verifyIntIterator(sorter.getIterator(139), 139, 300);
+    verifyIntIterator(sorter.getIterator(279), 279, 300);
+  }
+
+  private void verifyIntIterator(UnsafeSorterIterator iter, int start, int end)
+      throws IOException {
+    for (int i = start; i < end; i++) {
+      assert (iter.hasNext());
+      iter.loadNext();
+      assert (Platform.getInt(iter.getBaseObject(), iter.getBaseOffset()) == i);
+    }
+  }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index 458ac4ba3637..c4d383421f97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -166,7 +166,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
     if (spillableArray == null) {
       new InMemoryBufferIterator(startIndex)
     } else {
-      new SpillableArrayIterator(spillableArray.getIterator, numFieldsPerRow, startIndex)
+      new SpillableArrayIterator(spillableArray.getIterator(startIndex), numFieldsPerRow)
     }
   }
 
@@ -204,29 +204,11 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
 
   private[this] class SpillableArrayIterator(
       iterator: UnsafeSorterIterator,
-      numFieldPerRow: Int,
-      startIndex: Int)
+      numFieldPerRow: Int)
     extends ExternalAppendOnlyUnsafeRowArrayIterator {
 
     private val currentRow = new UnsafeRow(numFieldPerRow)
 
-    def init(): Unit = {
-      var i = 0
-      while (i < startIndex) {
-        if (iterator.hasNext) {
-          iterator.loadNext()
-        } else {
-          throw new ArrayIndexOutOfBoundsException(
-            "Invalid `startIndex` provided for generating iterator over the array. " +
-              s"Total elements: $numRows, requested `startIndex`: $startIndex")
-        }
-        i += 1
-      }
-    }
-
-    // Traverse upto the given [[startIndex]]
-    init()
-
     override def hasNext(): Boolean = !isModified() && iterator.hasNext
 
     override def next(): UnsafeRow = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index 00c5f2550cbb..031ac38c17d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -130,7 +130,7 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
             false))
 
         val unsafeRow = new UnsafeRow(1)
-        val iter = array.getIterator
+        val iter = array.getIterator(0)
         while (iter.hasNext) {
           iter.loadNext()
           unsafeRow.pointTo(iter.getBaseObject, iter.getBaseOffset, iter.getRecordLength)

From d4d9e17b31daab9d0c07de24383abd585145fd9c Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 11 Jul 2017 15:23:03 +0900
Subject: [PATCH 100/125] [SPARK-20456][PYTHON][FOLLOWUP] Fix
 timezone-dependent doctests in unix_timestamp and from_unixtime

## What changes were proposed in this pull request?

This PR proposes to simply ignore the results in examples that are timezone-dependent in `unix_timestamp` and `from_unixtime`.

```
Failed example:
    time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect()
Expected:
    [Row(unix_time=1428476400)]
Got:unix_timestamp
    [Row(unix_time=1428418800)]
```

```
Failed example:
    time_df.select(from_unixtime('unix_time').alias('ts')).collect()
Expected:
    [Row(ts=u'2015-04-08 00:00:00')]
Got:
    [Row(ts=u'2015-04-08 16:00:00')]
```

## How was this patch tested?

Manually tested and `./run-tests --modules pyspark-sql`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18597 from HyukjinKwon/SPARK-20456.
---
 python/pyspark/sql/functions.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f3e7d033e97c..f203d85dd9e6 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1116,9 +1116,11 @@ def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
     representing the timestamp of that moment in the current system time zone in the given
     format.
 
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
     >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
     [Row(ts=u'2015-04-08 00:00:00')]
+    >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
@@ -1133,9 +1135,11 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
 
     if `timestamp` is None, then it returns current timestamp.
 
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect()
     [Row(unix_time=1428476400)]
+    >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     sc = SparkContext._active_spark_context
     if timestamp is None:

From a4baa8f48fe611e5c4d147f22fb2bb4c78d58a09 Mon Sep 17 00:00:00 2001
From: Michael Allman <michael@videoamp.com>
Date: Tue, 11 Jul 2017 14:50:11 +0800
Subject: [PATCH 101/125] [SPARK-20331][SQL] Enhanced Hive partition pruning
 predicate pushdown

(Link to Jira: https://issues.apache.org/jira/browse/SPARK-20331)

## What changes were proposed in this pull request?

Spark 2.1 introduced scalable support for Hive tables with huge numbers of partitions. Key to leveraging this support is the ability to prune unnecessary table partitions to answer queries. Spark supports a subset of the class of partition pruning predicates that the Hive metastore supports. If a user writes a query with a partition pruning predicate that is *not* supported by Spark, Spark falls back to loading all partitions and pruning client-side. We want to broaden Spark's current partition pruning predicate pushdown capabilities.

One of the key missing capabilities is support for disjunctions. For example, for a table partitioned by date, writing a query with a predicate like

    date = 20161011 or date = 20161014

will result in Spark fetching all partitions. For a table partitioned by date and hour, querying a range of hours across dates can be quite difficult to accomplish without fetching all partition metadata.

The current partition pruning support supports only comparisons against literals. We can expand that to foldable expressions by evaluating them at planning time.

We can also implement support for the "IN" comparison by expanding it to a sequence of "OR"s.

## How was this patch tested?

The `HiveClientSuite` and `VersionsSuite` were refactored and simplified to make Hive client-based, version-specific testing more modular and conceptually simpler. There are now two Hive test suites: `HiveClientSuite` and `HivePartitionFilteringSuite`. These test suites have a single-argument constructor taking a `version` parameter. As such, these test suites cannot be run by themselves. Instead, they have been bundled into "aggregation" test suites which run each suite for each Hive client version. These aggregation suites are called `HiveClientSuites` and `HivePartitionFilteringSuites`. The `VersionsSuite` and `HiveClientSuite` have been refactored into each of these aggregation suites, respectively.

`HiveClientSuite` and `HivePartitionFilteringSuite` subclass a new abstract class, `HiveVersionSuite`. `HiveVersionSuite` collects functionality related to testing a single Hive version and overrides relevant test suite methods to display version-specific information.

A new trait, `HiveClientVersions`, has been added with a sequence of Hive test versions.

Author: Michael Allman <michael@videoamp.com>

Closes #17633 from mallman/spark-20331-enhanced_partition_pruning_pushdown.
---
 .../spark/sql/hive/client/HiveShim.scala      |  71 +++++-
 .../sql/hive/client/HiveClientBuilder.scala   |   6 +-
 .../sql/hive/client/HiveClientSuite.scala     | 225 +++++++++++++++++-
 .../sql/hive/client/HiveClientSuites.scala    |  29 +++
 .../sql/hive/client/HiveClientVersions.scala  |  26 ++
 .../sql/hive/client/HiveVersionSuite.scala    |  46 ++++
 .../spark/sql/hive/client/VersionsSuite.scala |   4 +-
 7 files changed, 378 insertions(+), 29 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 7abb9f06b131..449a303b59ee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
+import scala.util.Try
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf
@@ -46,6 +47,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTableParti
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegralType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 /**
@@ -589,18 +591,67 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         col.getType.startsWith(serdeConstants.CHAR_TYPE_NAME))
       .map(col => col.getName).toSet
 
-    filters.collect {
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
-        s"${a.name} ${op.symbol} $v"
-      case op @ BinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
-        s"$v ${op.symbol} ${a.name}"
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
+    object ExtractableLiteral {
+      def unapply(expr: Expression): Option[String] = expr match {
+        case Literal(value, _: IntegralType) => Some(value.toString)
+        case Literal(value, _: StringType) => Some(quoteStringLiteral(value.toString))
+        case _ => None
+      }
+    }
+
+    object ExtractableLiterals {
+      def unapply(exprs: Seq[Expression]): Option[Seq[String]] = {
+        exprs.map(ExtractableLiteral.unapply).foldLeft(Option(Seq.empty[String])) {
+          case (Some(accum), Some(value)) => Some(accum :+ value)
+          case _ => None
+        }
+      }
+    }
+
+    object ExtractableValues {
+      private lazy val valueToLiteralString: PartialFunction[Any, String] = {
+        case value: Byte => value.toString
+        case value: Short => value.toString
+        case value: Int => value.toString
+        case value: Long => value.toString
+        case value: UTF8String => quoteStringLiteral(value.toString)
+      }
+
+      def unapply(values: Set[Any]): Option[Seq[String]] = {
+        values.toSeq.foldLeft(Option(Seq.empty[String])) {
+          case (Some(accum), value) if valueToLiteralString.isDefinedAt(value) =>
+            Some(accum :+ valueToLiteralString(value))
+          case _ => None
+        }
+      }
+    }
+
+    def convertInToOr(a: Attribute, values: Seq[String]): String = {
+      values.map(value => s"${a.name} = $value").mkString("(", " or ", ")")
+    }
+
+    lazy val convert: PartialFunction[Expression, String] = {
+      case In(a: Attribute, ExtractableLiterals(values))
+          if !varcharKeys.contains(a.name) && values.nonEmpty =>
+        convertInToOr(a, values)
+      case InSet(a: Attribute, ExtractableValues(values))
+          if !varcharKeys.contains(a.name) && values.nonEmpty =>
+        convertInToOr(a, values)
+      case op @ BinaryComparison(a: Attribute, ExtractableLiteral(value))
           if !varcharKeys.contains(a.name) =>
-        s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
-      case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
+        s"${a.name} ${op.symbol} $value"
+      case op @ BinaryComparison(ExtractableLiteral(value), a: Attribute)
           if !varcharKeys.contains(a.name) =>
-        s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
-    }.mkString(" and ")
+        s"$value ${op.symbol} ${a.name}"
+      case op @ And(expr1, expr2)
+          if convert.isDefinedAt(expr1) || convert.isDefinedAt(expr2) =>
+        (convert.lift(expr1) ++ convert.lift(expr2)).mkString("(", " and ", ")")
+      case op @ Or(expr1, expr2)
+          if convert.isDefinedAt(expr1) && convert.isDefinedAt(expr2) =>
+        s"(${convert(expr1)} or ${convert(expr2)})"
+    }
+
+    filters.map(convert.lift).collect { case Some(filterString) => filterString }.mkString(" and ")
   }
 
   private def quoteStringLiteral(str: String): String = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
index e85ea5a59427..ae804ce7c7b0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
@@ -25,9 +25,7 @@ import org.apache.hadoop.util.VersionInfo
 import org.apache.spark.SparkConf
 import org.apache.spark.util.Utils
 
-private[client] class HiveClientBuilder {
-  private val sparkConf = new SparkConf()
-
+private[client] object HiveClientBuilder {
   // In order to speed up test execution during development or in Jenkins, you can specify the path
   // of an existing Ivy cache:
   private val ivyPath: Option[String] = {
@@ -52,7 +50,7 @@ private[client] class HiveClientBuilder {
     IsolatedClientLoader.forVersion(
       hiveMetastoreVersion = version,
       hadoopVersion = VersionInfo.getVersion,
-      sparkConf = sparkConf,
+      sparkConf = new SparkConf(),
       hadoopConf = hadoopConf,
       config = buildConf(extraConf),
       ivyPath = ivyPath).createClient()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 4790331168bd..6a2c23a01552 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -19,21 +19,25 @@ package org.apache.spark.sql.hive.client
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf
+import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EmptyRow, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, In, InSet, LessThan, LessThanOrEqual, Like, Literal, Or}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.sql.types.{ByteType, IntegerType, StringType}
 
-class HiveClientSuite extends SparkFunSuite {
-  private val clientBuilder = new HiveClientBuilder
+// TODO: Refactor this to `HivePartitionFilteringSuite`
+class HiveClientSuite(version: String)
+    extends HiveVersionSuite(version) with BeforeAndAfterAll {
+  import CatalystSqlParser._
 
   private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname
 
-  test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") {
-    val testPartitionCount = 5
+  private val testPartitionCount = 3 * 24 * 4
 
+  private def init(tryDirectSql: Boolean): HiveClient = {
     val storageFormat = CatalogStorageFormat(
       locationUri = None,
       inputFormat = None,
@@ -43,19 +47,214 @@ class HiveClientSuite extends SparkFunSuite {
       properties = Map.empty)
 
     val hadoopConf = new Configuration()
-    hadoopConf.setBoolean(tryDirectSqlKey, false)
-    val client = clientBuilder.buildClient(HiveUtils.hiveExecutionVersion, hadoopConf)
-    client.runSqlHive("CREATE TABLE test (value INT) PARTITIONED BY (part INT)")
+    hadoopConf.setBoolean(tryDirectSqlKey, tryDirectSql)
+    val client = buildClient(hadoopConf)
+    client
+      .runSqlHive("CREATE TABLE test (value INT) PARTITIONED BY (ds INT, h INT, chunk STRING)")
+
+    val partitions =
+      for {
+        ds <- 20170101 to 20170103
+        h <- 0 to 23
+        chunk <- Seq("aa", "ab", "ba", "bb")
+      } yield CatalogTablePartition(Map(
+        "ds" -> ds.toString,
+        "h" -> h.toString,
+        "chunk" -> chunk
+      ), storageFormat)
+    assert(partitions.size == testPartitionCount)
 
-    val partitions = (1 to testPartitionCount).map { part =>
-      CatalogTablePartition(Map("part" -> part.toString), storageFormat)
-    }
     client.createPartitions(
       "default", "test", partitions, ignoreIfExists = false)
+    client
+  }
 
+  override def beforeAll() {
+    client = init(true)
+  }
+
+  test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") {
+    val client = init(false)
     val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"),
-      Seq(EqualTo(AttributeReference("part", IntegerType)(), Literal(3))))
+      Seq(parseExpression("ds=20170101")))
 
     assert(filteredPartitions.size == testPartitionCount)
   }
+
+  test("getPartitionsByFilter: ds=20170101") {
+    testMetastorePartitionFiltering(
+      "ds=20170101",
+      20170101 to 20170101,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: ds=(20170101 + 1) and h=0") {
+    // Should return all partitions where h=0 because getPartitionsByFilter does not support
+    // comparisons to non-literal values
+    testMetastorePartitionFiltering(
+      "ds=(20170101 + 1) and h=0",
+      20170101 to 20170103,
+      0 to 0,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: chunk='aa'") {
+    testMetastorePartitionFiltering(
+      "chunk='aa'",
+      20170101 to 20170103,
+      0 to 23,
+      "aa" :: Nil)
+  }
+
+  test("getPartitionsByFilter: 20170101=ds") {
+    testMetastorePartitionFiltering(
+      "20170101=ds",
+      20170101 to 20170101,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: ds=20170101 and h=10") {
+    testMetastorePartitionFiltering(
+      "ds=20170101 and h=10",
+      20170101 to 20170101,
+      10 to 10,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: ds=20170101 or ds=20170102") {
+    testMetastorePartitionFiltering(
+      "ds=20170101 or ds=20170102",
+      20170101 to 20170102,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: ds in (20170102, 20170103) (using IN expression)") {
+    testMetastorePartitionFiltering(
+      "ds in (20170102, 20170103)",
+      20170102 to 20170103,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
+  test("getPartitionsByFilter: ds in (20170102, 20170103) (using INSET expression)") {
+    testMetastorePartitionFiltering(
+      "ds in (20170102, 20170103)",
+      20170102 to 20170103,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil, {
+        case expr @ In(v, list) if expr.inSetConvertible =>
+          InSet(v, Set() ++ list.map(_.eval(EmptyRow)))
+      })
+  }
+
+  test("getPartitionsByFilter: chunk in ('ab', 'ba') (using IN expression)") {
+    testMetastorePartitionFiltering(
+      "chunk in ('ab', 'ba')",
+      20170101 to 20170103,
+      0 to 23,
+      "ab" :: "ba" :: Nil)
+  }
+
+  test("getPartitionsByFilter: chunk in ('ab', 'ba') (using INSET expression)") {
+    testMetastorePartitionFiltering(
+      "chunk in ('ab', 'ba')",
+      20170101 to 20170103,
+      0 to 23,
+      "ab" :: "ba" :: Nil, {
+        case expr @ In(v, list) if expr.inSetConvertible =>
+          InSet(v, Set() ++ list.map(_.eval(EmptyRow)))
+      })
+  }
+
+  test("getPartitionsByFilter: (ds=20170101 and h>=8) or (ds=20170102 and h<8)") {
+    val day1 = (20170101 to 20170101, 8 to 23, Seq("aa", "ab", "ba", "bb"))
+    val day2 = (20170102 to 20170102, 0 to 7, Seq("aa", "ab", "ba", "bb"))
+    testMetastorePartitionFiltering(
+      "(ds=20170101 and h>=8) or (ds=20170102 and h<8)",
+      day1 :: day2 :: Nil)
+  }
+
+  test("getPartitionsByFilter: (ds=20170101 and h>=8) or (ds=20170102 and h<(7+1))") {
+    val day1 = (20170101 to 20170101, 8 to 23, Seq("aa", "ab", "ba", "bb"))
+    // Day 2 should include all hours because we can't build a filter for h<(7+1)
+    val day2 = (20170102 to 20170102, 0 to 23, Seq("aa", "ab", "ba", "bb"))
+    testMetastorePartitionFiltering(
+      "(ds=20170101 and h>=8) or (ds=20170102 and h<(7+1))",
+      day1 :: day2 :: Nil)
+  }
+
+  test("getPartitionsByFilter: " +
+      "chunk in ('ab', 'ba') and ((ds=20170101 and h>=8) or (ds=20170102 and h<8))") {
+    val day1 = (20170101 to 20170101, 8 to 23, Seq("ab", "ba"))
+    val day2 = (20170102 to 20170102, 0 to 7, Seq("ab", "ba"))
+    testMetastorePartitionFiltering(
+      "chunk in ('ab', 'ba') and ((ds=20170101 and h>=8) or (ds=20170102 and h<8))",
+      day1 :: day2 :: Nil)
+  }
+
+  private def testMetastorePartitionFiltering(
+      filterString: String,
+      expectedDs: Seq[Int],
+      expectedH: Seq[Int],
+      expectedChunks: Seq[String]): Unit = {
+    testMetastorePartitionFiltering(
+      filterString,
+      (expectedDs, expectedH, expectedChunks) :: Nil,
+      identity)
+  }
+
+  private def testMetastorePartitionFiltering(
+      filterString: String,
+      expectedDs: Seq[Int],
+      expectedH: Seq[Int],
+      expectedChunks: Seq[String],
+      transform: Expression => Expression): Unit = {
+    testMetastorePartitionFiltering(
+      filterString,
+      (expectedDs, expectedH, expectedChunks) :: Nil,
+      identity)
+  }
+
+  private def testMetastorePartitionFiltering(
+      filterString: String,
+      expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])]): Unit = {
+    testMetastorePartitionFiltering(filterString, expectedPartitionCubes, identity)
+  }
+
+  private def testMetastorePartitionFiltering(
+      filterString: String,
+      expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])],
+      transform: Expression => Expression): Unit = {
+    val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"),
+      Seq(
+        transform(parseExpression(filterString))
+      ))
+
+    val expectedPartitionCount = expectedPartitionCubes.map {
+      case (expectedDs, expectedH, expectedChunks) =>
+        expectedDs.size * expectedH.size * expectedChunks.size
+    }.sum
+
+    val expectedPartitions = expectedPartitionCubes.map {
+      case (expectedDs, expectedH, expectedChunks) =>
+        for {
+          ds <- expectedDs
+          h <- expectedH
+          chunk <- expectedChunks
+        } yield Set(
+          "ds" -> ds.toString,
+          "h" -> h.toString,
+          "chunk" -> chunk
+        )
+    }.reduce(_ ++ _)
+
+    val actualFilteredPartitionCount = filteredPartitions.size
+
+    assert(actualFilteredPartitionCount == expectedPartitionCount,
+      s"Expected $expectedPartitionCount partitions but got $actualFilteredPartitionCount")
+    assert(filteredPartitions.map(_.spec.toSet).toSet == expectedPartitions.toSet)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
new file mode 100644
index 000000000000..de1be2115b2d
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import scala.collection.immutable.IndexedSeq
+
+import org.scalatest.Suite
+
+class HiveClientSuites extends Suite with HiveClientVersions {
+  override def nestedSuites: IndexedSeq[Suite] = {
+    // Hive 0.12 does not provide the partition filtering API we call
+    versions.filterNot(_ == "0.12").map(new HiveClientSuite(_))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
new file mode 100644
index 000000000000..2e7dfde8b2fa
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import scala.collection.immutable.IndexedSeq
+
+import org.apache.spark.SparkFunSuite
+
+private[client] trait HiveClientVersions {
+  protected val versions = IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1")
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
new file mode 100644
index 000000000000..986c6675cbb6
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.Tag
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.hive.HiveUtils
+
+private[client] abstract class HiveVersionSuite(version: String) extends SparkFunSuite {
+  protected var client: HiveClient = null
+
+  protected def buildClient(hadoopConf: Configuration): HiveClient = {
+    // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
+    // hive.metastore.schema.verification from false to true since 2.0
+    // For details, see the JIRA HIVE-6113 and HIVE-12463
+    if (version == "2.0" || version == "2.1") {
+      hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
+      hadoopConf.set("hive.metastore.schema.verification", "false")
+    }
+    HiveClientBuilder
+      .buildClient(version, hadoopConf, HiveUtils.hiveClientConfigurations(hadoopConf))
+  }
+
+  override def suiteName: String = s"${super.suiteName}($version)"
+
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Unit): Unit = {
+    super.test(s"$version: $testName", testTags: _*)(testFun)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index f109843f5be2..82fbdd645ebe 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -47,11 +47,11 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
  * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionality
  * is not fully tested.
  */
+// TODO: Refactor this to `HiveClientSuite` and make it a subclass of `HiveVersionSuite`
 @ExtendedHiveTest
 class VersionsSuite extends SparkFunSuite with Logging {
 
-  private val clientBuilder = new HiveClientBuilder
-  import clientBuilder.buildClient
+  import HiveClientBuilder.buildClient
 
   /**
    * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`

From 7514db1deca22b44ce18e4c571275ce79addc100 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 11 Jul 2017 11:11:08 +0100
Subject: [PATCH 102/125] [SPARK-21263][SQL] Do not allow partially parsing
 double and floats via NumberFormat in CSV

## What changes were proposed in this pull request?

This PR proposes to remove `NumberFormat.parse` use to disallow a case of partially parsed data. For example,

```
scala> spark.read.schema("a DOUBLE").option("mode", "FAILFAST").csv(Seq("10u12").toDS).show()
+----+
|   a|
+----+
|10.0|
+----+
```

## How was this patch tested?

Unit tests added in `UnivocityParserSuite` and `CSVSuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18532 from HyukjinKwon/SPARK-21263.
---
 .../datasources/csv/UnivocityParser.scala     |  8 ++-----
 .../execution/datasources/csv/CSVSuite.scala  | 21 +++++++++++++++++++
 .../csv/UnivocityParserSuite.scala            | 21 ++++++++++---------
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
index c3657acb7d86..0e41f3c7aa6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
@@ -111,9 +111,7 @@ class UnivocityParser(
         case options.nanValue => Float.NaN
         case options.negativeInf => Float.NegativeInfinity
         case options.positiveInf => Float.PositiveInfinity
-        case datum =>
-          Try(datum.toFloat)
-            .getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).floatValue())
+        case datum => datum.toFloat
       }
 
     case _: DoubleType => (d: String) =>
@@ -121,9 +119,7 @@ class UnivocityParser(
         case options.nanValue => Double.NaN
         case options.negativeInf => Double.NegativeInfinity
         case options.positiveInf => Double.PositiveInfinity
-        case datum =>
-          Try(datum.toDouble)
-            .getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).doubleValue())
+        case datum => datum.toDouble
       }
 
     case _: BooleanType => (d: String) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 89d9b69dec7e..487c84f629be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1174,4 +1174,25 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         }
       }
   }
+
+  test("SPARK-21263: Invalid float and double are handled correctly in different modes") {
+    val exception = intercept[SparkException] {
+      spark.read.schema("a DOUBLE")
+        .option("mode", "FAILFAST")
+        .csv(Seq("10u12").toDS())
+        .collect()
+    }
+    assert(exception.getMessage.contains("""input string: "10u12""""))
+
+    val count = spark.read.schema("a FLOAT")
+      .option("mode", "DROPMALFORMED")
+      .csv(Seq("10u12").toDS())
+      .count()
+    assert(count == 0)
+
+    val results = spark.read.schema("a FLOAT")
+      .option("mode", "PERMISSIVE")
+      .csv(Seq("10u12").toDS())
+    checkAnswer(results, Row(null))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala
index a74b22a4a88a..efbf73534bd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala
@@ -130,16 +130,17 @@ class UnivocityParserSuite extends SparkFunSuite {
       DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
   }
 
-  test("Float and Double Types are cast without respect to platform default Locale") {
-    val originalLocale = Locale.getDefault
-    try {
-      Locale.setDefault(new Locale("fr", "FR"))
-      // Would parse as 1.0 in fr-FR
-      val options = new CSVOptions(Map.empty[String, String], "GMT")
-      assert(parser.makeConverter("_1", FloatType, options = options).apply("1,00") == 100.0)
-      assert(parser.makeConverter("_1", DoubleType, options = options).apply("1,00") == 100.0)
-    } finally {
-      Locale.setDefault(originalLocale)
+  test("Throws exception for casting an invalid string to Float and Double Types") {
+    val options = new CSVOptions(Map.empty[String, String], "GMT")
+    val types = Seq(DoubleType, FloatType)
+    val input = Seq("10u000", "abc", "1 2/3")
+    types.foreach { dt =>
+      input.foreach { v =>
+        val message = intercept[NumberFormatException] {
+          parser.makeConverter("_1", dt, options = options).apply(v)
+        }.getMessage
+        assert(message.contains(v))
+      }
     }
   }
 

From 66d21686556681457aab6e44e19f5614c5635f0c Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Tue, 11 Jul 2017 21:52:54 +0800
Subject: [PATCH 103/125] [SPARK-21366][SQL][TEST] Add sql test for window
 functions

## What changes were proposed in this pull request?

Add sql test for window functions, also remove uncecessary test cases in `WindowQuerySuite`.

## How was this patch tested?

Added `window.sql` and the corresponding output file.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18591 from jiangxb1987/window.
---
 .../resources/sql-tests/inputs/window.sql     |  69 ++++++
 .../sql-tests/results/window.sql.out          | 204 ++++++++++++++++++
 .../sql/hive/execution/WindowQuerySuite.scala |  27 ---
 3 files changed, 273 insertions(+), 27 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/window.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/window.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
new file mode 100644
index 000000000000..c800fc3d4989
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -0,0 +1,69 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate);
+
+-- RowsBetween
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
+ORDER BY cate, val;
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- RangeBetween
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
+ORDER BY cate, val;
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- RangeBetween with reverse OrderBy
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- Window functions
+SELECT val, cate,
+max(val) OVER w AS max,
+min(val) OVER w AS min,
+min(val) OVER w AS min,
+count(val) OVER w AS count,
+sum(val) OVER w AS sum,
+avg(val) OVER w AS avg,
+stddev(val) OVER w AS stddev,
+first_value(val) OVER w AS first_value,
+first_value(val, true) OVER w AS first_value_ignore_null,
+first_value(val, false) OVER w AS first_value_contain_null,
+last_value(val) OVER w AS last_value,
+last_value(val, true) OVER w AS last_value_ignore_null,
+last_value(val, false) OVER w AS last_value_contain_null,
+rank() OVER w AS rank,
+dense_rank() OVER w AS dense_rank,
+cume_dist() OVER w AS cume_dist,
+percent_rank() OVER w AS percent_rank,
+ntile(2) OVER w AS ntile,
+row_number() OVER w AS row_number,
+var_pop(val) OVER w AS var_pop,
+var_samp(val) OVER w AS var_samp,
+approx_count_distinct(val) OVER w AS approx_count_distinct
+FROM testData
+WINDOW w AS (PARTITION BY cate ORDER BY val)
+ORDER BY cate, val;
+
+-- Null inputs
+SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val;
+
+-- OrderBy not specified
+SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val;
+
+-- Over clause is empty
+SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
+
+-- first_value()/last_value() over ()
+SELECT val, cate,
+first_value(false) OVER w AS first_value,
+first_value(true, true) OVER w AS first_value_ignore_null,
+first_value(false, false) OVER w AS first_value_contain_null,
+last_value(false) OVER w AS last_value,
+last_value(true, true) OVER w AS last_value_ignore_null,
+last_value(false, false) OVER w AS last_value_contain_null
+FROM testData
+WINDOW w AS ()
+ORDER BY cate, val;
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
new file mode 100644
index 000000000000..aa5856138ed8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -0,0 +1,204 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 11
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
+ORDER BY cate, val
+-- !query 1 schema
+struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
+-- !query 1 output
+NULL	NULL	0
+3	NULL	1
+NULL	a	0
+1	a	1
+1	a	1
+2	a	1
+1	b	1
+2	b	1
+3	b	1
+
+
+-- !query 2
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 2 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
+-- !query 2 output
+NULL	NULL	3
+3	NULL	3
+NULL	a	1
+1	a	2
+1	a	4
+2	a	4
+1	b	3
+2	b	6
+3	b	6
+
+
+-- !query 3
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
+ORDER BY cate, val
+-- !query 3 schema
+struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
+-- !query 3 output
+NULL	NULL	0
+3	NULL	1
+NULL	a	0
+1	a	2
+1	a	2
+2	a	3
+1	b	1
+2	b	2
+3	b	2
+
+
+-- !query 4
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 4 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 4 output
+NULL	NULL	NULL
+3	NULL	3
+NULL	a	NULL
+1	a	4
+1	a	4
+2	a	2
+1	b	3
+2	b	5
+3	b	3
+
+
+-- !query 5
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 5 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 5 output
+NULL	NULL	NULL
+3	NULL	3
+NULL	a	NULL
+1	a	2
+1	a	2
+2	a	4
+1	b	1
+2	b	3
+3	b	5
+
+
+-- !query 6
+SELECT val, cate,
+max(val) OVER w AS max,
+min(val) OVER w AS min,
+min(val) OVER w AS min,
+count(val) OVER w AS count,
+sum(val) OVER w AS sum,
+avg(val) OVER w AS avg,
+stddev(val) OVER w AS stddev,
+first_value(val) OVER w AS first_value,
+first_value(val, true) OVER w AS first_value_ignore_null,
+first_value(val, false) OVER w AS first_value_contain_null,
+last_value(val) OVER w AS last_value,
+last_value(val, true) OVER w AS last_value_ignore_null,
+last_value(val, false) OVER w AS last_value_contain_null,
+rank() OVER w AS rank,
+dense_rank() OVER w AS dense_rank,
+cume_dist() OVER w AS cume_dist,
+percent_rank() OVER w AS percent_rank,
+ntile(2) OVER w AS ntile,
+row_number() OVER w AS row_number,
+var_pop(val) OVER w AS var_pop,
+var_samp(val) OVER w AS var_samp,
+approx_count_distinct(val) OVER w AS approx_count_distinct
+FROM testData
+WINDOW w AS (PARTITION BY cate ORDER BY val)
+ORDER BY cate, val
+-- !query 6 schema
+struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint>
+-- !query 6 output
+NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0
+3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1
+2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2
+1	b	1	1	1	1	1	1.0	NaN	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NaN	1
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2
+3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3
+
+
+-- !query 7
+SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
+-- !query 7 schema
+struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
+-- !query 7 output
+NULL	NULL	NULL
+3	NULL	NULL
+NULL	a	NULL
+1	a	NULL
+1	a	NULL
+2	a	NULL
+1	b	NULL
+2	b	NULL
+3	b	NULL
+
+
+-- !query 8
+SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.AnalysisException
+Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+
+
+-- !query 9
+SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
+-- !query 9 schema
+struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query 9 output
+NULL	NULL	13	1.8571428571428572
+3	NULL	13	1.8571428571428572
+NULL	a	13	1.8571428571428572
+1	a	13	1.8571428571428572
+1	a	13	1.8571428571428572
+2	a	13	1.8571428571428572
+1	b	13	1.8571428571428572
+2	b	13	1.8571428571428572
+3	b	13	1.8571428571428572
+
+
+-- !query 10
+SELECT val, cate,
+first_value(false) OVER w AS first_value,
+first_value(true, true) OVER w AS first_value_ignore_null,
+first_value(false, false) OVER w AS first_value_contain_null,
+last_value(false) OVER w AS last_value,
+last_value(true, true) OVER w AS last_value_ignore_null,
+last_value(false, false) OVER w AS last_value_contain_null
+FROM testData
+WINDOW w AS ()
+ORDER BY cate, val
+-- !query 10 schema
+struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
+-- !query 10 output
+NULL	NULL	false	true	false	false	true	false
+3	NULL	false	true	false	false	true	false
+NULL	a	false	true	false	false	true	false
+1	a	false	true	false	false	true	false
+1	a	false	true	false	false	true	false
+2	a	false	true	false	false	true	false
+1	b	false	true	false	false	true	false
+2	b	false	true	false	false	true	false
+3	b	false	true	false	false	true	false
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index a20c758a83e7..3f9485dd018b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -232,31 +232,4 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666)))
       // scalastyle:on
   }
-
-  test("null arguments") {
-    checkAnswer(sql("""
-        |select  p_mfgr, p_name, p_size,
-        |sum(null) over(distribute by p_mfgr sort by p_name) as sum,
-        |avg(null) over(distribute by p_mfgr sort by p_name) as avg
-        |from part
-      """.stripMargin),
-      sql("""
-        |select  p_mfgr, p_name, p_size,
-        |null as sum,
-        |null as avg
-        |from part
-        """.stripMargin))
-  }
-
-  test("SPARK-16646: LAST_VALUE(FALSE) OVER ()") {
-    checkAnswer(sql("SELECT LAST_VALUE(FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT LAST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT LAST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
-  }
-
-  test("SPARK-16646: FIRST_VALUE(FALSE) OVER ()") {
-    checkAnswer(sql("SELECT FIRST_VALUE(FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT FIRST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT FIRST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
-  }
 }

From ebc124d4c44d4c84f7868f390f778c0ff5cd66cb Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 11 Jul 2017 22:03:10 +0800
Subject: [PATCH 104/125] [SPARK-21365][PYTHON] Deduplicate logics parsing DDL
 type/schema definition

## What changes were proposed in this pull request?

This PR deals with four points as below:

- Reuse existing DDL parser APIs rather than reimplementing within PySpark

- Support DDL formatted string, `field type, field type`.

- Support case-insensitivity for parsing.

- Support nested data types as below:

  **Before**
  ```
  >>> spark.createDataFrame([[[1]]], "struct<a: struct<b: int>>").show()
  ...
  ValueError: The strcut field string format is: 'field_name:field_type', but got: a: struct<b: int>
  ```

  ```
  >>> spark.createDataFrame([[[1]]], "a: struct<b: int>").show()
  ...
  ValueError: The strcut field string format is: 'field_name:field_type', but got: a: struct<b: int>
  ```

  ```
  >>> spark.createDataFrame([[1]], "a int").show()
  ...
  ValueError: Could not parse datatype: a int
  ```

  **After**
  ```
  >>> spark.createDataFrame([[[1]]], "struct<a: struct<b: int>>").show()
  +---+
  |  a|
  +---+
  |[1]|
  +---+
  ```

  ```
  >>> spark.createDataFrame([[[1]]], "a: struct<b: int>").show()
  +---+
  |  a|
  +---+
  |[1]|
  +---+
  ```

  ```
  >>> spark.createDataFrame([[1]], "a int").show()
  +---+
  |  a|
  +---+
  |  1|
  +---+
  ```

## How was this patch tested?

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18590 from HyukjinKwon/deduplicate-python-ddl.
---
 python/pyspark/sql/functions.py               | 16 +++-
 python/pyspark/sql/tests.py                   | 25 ++++++
 python/pyspark/sql/types.py                   | 88 +++++++------------
 .../spark/sql/api/python/PythonSQLUtils.scala | 25 ++++++
 4 files changed, 97 insertions(+), 57 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f203d85dd9e6..d45ff63355de 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2037,15 +2037,25 @@ def __init__(self, func, returnType, name=None):
                 "{0}".format(type(func)))
 
         self.func = func
-        self.returnType = (
-            returnType if isinstance(returnType, DataType)
-            else _parse_datatype_string(returnType))
+        self._returnType = returnType
         # Stores UserDefinedPythonFunctions jobj, once initialized
+        self._returnType_placeholder = None
         self._judf_placeholder = None
         self._name = name or (
             func.__name__ if hasattr(func, '__name__')
             else func.__class__.__name__)
 
+    @property
+    def returnType(self):
+        # This makes sure this is called after SparkContext is initialized.
+        # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string.
+        if self._returnType_placeholder is None:
+            if isinstance(self._returnType, DataType):
+                self._returnType_placeholder = self._returnType
+            else:
+                self._returnType_placeholder = _parse_datatype_string(self._returnType)
+        return self._returnType_placeholder
+
     @property
     def _judf(self):
         # It is possible that concurrent access, to newly created UDF,
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index bd8477e35f37..29e48a6ccf76 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1255,6 +1255,31 @@ def test_struct_type(self):
         with self.assertRaises(TypeError):
             not_a_field = struct1[9.9]
 
+    def test_parse_datatype_string(self):
+        from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
+        for k, t in _all_atomic_types.items():
+            if t != NullType:
+                self.assertEqual(t(), _parse_datatype_string(k))
+        self.assertEqual(IntegerType(), _parse_datatype_string("int"))
+        self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1  ,1)"))
+        self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )"))
+        self.assertEqual(DecimalType(11, 1), _parse_datatype_string("decimal(11,1)"))
+        self.assertEqual(
+            ArrayType(IntegerType()),
+            _parse_datatype_string("array<int >"))
+        self.assertEqual(
+            MapType(IntegerType(), DoubleType()),
+            _parse_datatype_string("map< int, double  >"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("struct<a:int, c:double >"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("a:int, c:double"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("a INT, c DOUBLE"))
+
     def test_metadata_null(self):
         from pyspark.sql.types import StructType, StringType, StructField
         schema = StructType([StructField("f1", StringType(), True, None),
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index f5505ed4722a..22fa273fc1aa 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -32,6 +32,7 @@
 from py4j.protocol import register_input_converter
 from py4j.java_gateway import JavaClass
 
+from pyspark import SparkContext
 from pyspark.serializers import CloudPickleSerializer
 
 __all__ = [
@@ -727,18 +728,6 @@ def __eq__(self, other):
 _BRACKETS = {'(': ')', '[': ']', '{': '}'}
 
 
-def _parse_basic_datatype_string(s):
-    if s in _all_atomic_types.keys():
-        return _all_atomic_types[s]()
-    elif s == "int":
-        return IntegerType()
-    elif _FIXED_DECIMAL.match(s):
-        m = _FIXED_DECIMAL.match(s)
-        return DecimalType(int(m.group(1)), int(m.group(2)))
-    else:
-        raise ValueError("Could not parse datatype: %s" % s)
-
-
 def _ignore_brackets_split(s, separator):
     """
     Splits the given string by given separator, but ignore separators inside brackets pairs, e.g.
@@ -771,32 +760,23 @@ def _ignore_brackets_split(s, separator):
     return parts
 
 
-def _parse_struct_fields_string(s):
-    parts = _ignore_brackets_split(s, ",")
-    fields = []
-    for part in parts:
-        name_and_type = _ignore_brackets_split(part, ":")
-        if len(name_and_type) != 2:
-            raise ValueError("The strcut field string format is: 'field_name:field_type', " +
-                             "but got: %s" % part)
-        field_name = name_and_type[0].strip()
-        field_type = _parse_datatype_string(name_and_type[1])
-        fields.append(StructField(field_name, field_type))
-    return StructType(fields)
-
-
 def _parse_datatype_string(s):
     """
     Parses the given data type string to a :class:`DataType`. The data type string format equals
     to :class:`DataType.simpleString`, except that top level struct type can omit
     the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use ``byte`` instead
     of ``tinyint`` for :class:`ByteType`. We can also use ``int`` as a short name
-    for :class:`IntegerType`.
+    for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted
+    string and case-insensitive strings.
 
     >>> _parse_datatype_string("int ")
     IntegerType
+    >>> _parse_datatype_string("INT ")
+    IntegerType
     >>> _parse_datatype_string("a: byte, b: decimal(  16 , 8   ) ")
     StructType(List(StructField(a,ByteType,true),StructField(b,DecimalType(16,8),true)))
+    >>> _parse_datatype_string("a DOUBLE, b STRING")
+    StructType(List(StructField(a,DoubleType,true),StructField(b,StringType,true)))
     >>> _parse_datatype_string("a: array< short>")
     StructType(List(StructField(a,ArrayType(ShortType,true),true)))
     >>> _parse_datatype_string(" map<string , string > ")
@@ -806,43 +786,43 @@ def _parse_datatype_string(s):
     >>> _parse_datatype_string("blabla") # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
-    ValueError:...
+    ParseException:...
     >>> _parse_datatype_string("a: int,") # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
-    ValueError:...
+    ParseException:...
     >>> _parse_datatype_string("array<int") # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
-    ValueError:...
+    ParseException:...
     >>> _parse_datatype_string("map<int, boolean>>") # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
-    ValueError:...
+    ParseException:...
     """
-    s = s.strip()
-    if s.startswith("array<"):
-        if s[-1] != ">":
-            raise ValueError("'>' should be the last char, but got: %s" % s)
-        return ArrayType(_parse_datatype_string(s[6:-1]))
-    elif s.startswith("map<"):
-        if s[-1] != ">":
-            raise ValueError("'>' should be the last char, but got: %s" % s)
-        parts = _ignore_brackets_split(s[4:-1], ",")
-        if len(parts) != 2:
-            raise ValueError("The map type string format is: 'map<key_type,value_type>', " +
-                             "but got: %s" % s)
-        kt = _parse_datatype_string(parts[0])
-        vt = _parse_datatype_string(parts[1])
-        return MapType(kt, vt)
-    elif s.startswith("struct<"):
-        if s[-1] != ">":
-            raise ValueError("'>' should be the last char, but got: %s" % s)
-        return _parse_struct_fields_string(s[7:-1])
-    elif ":" in s:
-        return _parse_struct_fields_string(s)
-    else:
-        return _parse_basic_datatype_string(s)
+    sc = SparkContext._active_spark_context
+
+    def from_ddl_schema(type_str):
+        return _parse_datatype_json_string(
+            sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json())
+
+    def from_ddl_datatype(type_str):
+        return _parse_datatype_json_string(
+            sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())
+
+    try:
+        # DDL format, "fieldname datatype, fieldname datatype".
+        return from_ddl_schema(s)
+    except Exception as e:
+        try:
+            # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
+            return from_ddl_datatype(s)
+        except:
+            try:
+                # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
+                return from_ddl_datatype("struct<%s>" % s.strip())
+            except:
+                raise e
 
 
 def _parse_datatype_json_string(json_string):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
new file mode 100644
index 000000000000..731feb914d25
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.python
+
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.types.DataType
+
+private[sql] object PythonSQLUtils {
+  def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)
+}

From 1cad31f00644d899d8e74d58c6eb4e9f72065473 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 11 Jul 2017 11:25:40 -0700
Subject: [PATCH 105/125] [SPARK-16019][YARN] Use separate RM poll interval
 when starting client AM.

Currently the code monitoring the launch of the client AM uses the value of
spark.yarn.report.interval as the interval for polling the RM; if someone
has that value to a really large interval, it would take that long to detect
that the client AM has started, which is not expected.

Instead, have a separate config for the interval to use when the client AM is
starting. The other config is still used in cluster mode, and to detect the
status of the client AM after it is already running.

Tested by running client and cluster mode apps with a modified value of
spark.yarn.report.interval, verifying client AM launch is detected before
that interval elapses.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18380 from vanzin/SPARK-16019.
---
 .../scala/org/apache/spark/deploy/yarn/Client.scala    |  6 ++++--
 .../scala/org/apache/spark/deploy/yarn/config.scala    | 10 ++++++++--
 .../scheduler/cluster/YarnClientSchedulerBackend.scala |  6 +++++-
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 1dd071591804..7caaa91e1af9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -986,13 +986,15 @@ private[spark] class Client(
    * @param appId ID of the application to monitor.
    * @param returnOnRunning Whether to also return the application state when it is RUNNING.
    * @param logApplicationReport Whether to log details of the application report every iteration.
+   * @param interval How often to poll the YARN RM for application status (in ms).
    * @return A pair of the yarn application state and the final application state.
    */
   def monitorApplication(
       appId: ApplicationId,
       returnOnRunning: Boolean = false,
-      logApplicationReport: Boolean = true): (YarnApplicationState, FinalApplicationStatus) = {
-    val interval = sparkConf.get(REPORT_INTERVAL)
+      logApplicationReport: Boolean = true,
+      interval: Long = sparkConf.get(REPORT_INTERVAL)):
+      (YarnApplicationState, FinalApplicationStatus) = {
     var lastState: YarnApplicationState = null
     while (true) {
       Thread.sleep(interval)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index d4108caab28c..187803cc6050 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -127,7 +127,7 @@ package object config {
     .stringConf
     .createOptional
 
-  /* Cluster-mode launcher configuration. */
+  /* Launcher configuration. */
 
   private[spark] val WAIT_FOR_APP_COMPLETION = ConfigBuilder("spark.yarn.submit.waitAppCompletion")
     .doc("In cluster mode, whether to wait for the application to finish before exiting the " +
@@ -136,10 +136,16 @@ package object config {
     .createWithDefault(true)
 
   private[spark] val REPORT_INTERVAL = ConfigBuilder("spark.yarn.report.interval")
-    .doc("Interval between reports of the current app status in cluster mode.")
+    .doc("Interval between reports of the current app status.")
     .timeConf(TimeUnit.MILLISECONDS)
     .createWithDefaultString("1s")
 
+  private[spark] val CLIENT_LAUNCH_MONITOR_INTERVAL =
+    ConfigBuilder("spark.yarn.clientLaunchMonitorInterval")
+      .doc("Interval between requests for status the client mode AM when starting the app.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("1s")
+
   /* Shared Client-mode AM / Driver configuration. */
 
   private[spark] val AM_MAX_WAIT_TIME = ConfigBuilder("spark.yarn.am.waitTime")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 60da356ad14a..d482376d14dd 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState
 
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil}
+import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.launcher.SparkAppHandle
 import org.apache.spark.scheduler.TaskSchedulerImpl
@@ -77,8 +78,11 @@ private[spark] class YarnClientSchedulerBackend(
    * This assumes both `client` and `appId` have already been set.
    */
   private def waitForApplication(): Unit = {
+    val monitorInterval = conf.get(CLIENT_LAUNCH_MONITOR_INTERVAL)
+
     assert(client != null && appId.isDefined, "Application has not been submitted yet!")
-    val (state, _) = client.monitorApplication(appId.get, returnOnRunning = true) // blocking
+    val (state, _) = client.monitorApplication(appId.get, returnOnRunning = true,
+      interval = monitorInterval) // blocking
     if (state == YarnApplicationState.FINISHED ||
       state == YarnApplicationState.FAILED ||
       state == YarnApplicationState.KILLED) {

From d3e071658f931f601cd4caaf00997ae411593a44 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 11 Jul 2017 15:44:29 -0700
Subject: [PATCH 106/125] [SPARK-19285][SQL] Implement UDF0

### What changes were proposed in this pull request?
This PR is to implement UDF0. `UDF0` is needed when users need to implement a JAVA UDF with no argument.

### How was this patch tested?
Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18598 from gatorsmile/udf0.
---
 .../org/apache/spark/sql/api/java/UDF0.java   | 30 +++++++++++++
 .../apache/spark/sql/UDFRegistration.scala    | 44 +++++++++++++------
 .../org/apache/spark/sql/JavaUDFSuite.java    |  8 ++++
 3 files changed, 69 insertions(+), 13 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/api/java/UDF0.java

diff --git a/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF0.java b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF0.java
new file mode 100644
index 000000000000..4eeb7be3f5ab
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/api/java/UDF0.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+import org.apache.spark.annotation.InterfaceStability;
+
+/**
+ * A Spark SQL UDF that has 0 arguments.
+ */
+@InterfaceStability.Stable
+public interface UDF0<R> extends Serializable {
+    R call() throws Exception;
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index c4d0adb5236f..c66d4057b913 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -122,25 +122,27 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         }""")
     }
 
-    (1 to 22).foreach { i =>
-      val extTypeArgs = (1 to i).map(_ => "_").mkString(", ")
-      val anyTypeArgs = (1 to i).map(_ => "Any").mkString(", ")
-      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs, Any]]"
+    (0 to 22).foreach { i =>
+      val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
+      val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
+      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
       val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
+      val version = if (i == 0) "2.3.0" else "1.3.0"
+      val funcCall = if (i == 0) "() => func" else "func"
       println(s"""
         |/**
         | * Register a user-defined function with ${i} arguments.
-        | * @since 1.3.0
+        | * @since $version
         | */
-        |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType): Unit = {
+        |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
         |  val func = f$anyCast.call($anyParams)
-        |def builder(e: Seq[Expression]) = if (e.length == $i) {
-        |  ScalaUDF(func, returnType, e)
-        |} else {
-        |  throw new AnalysisException("Invalid number of arguments for function " + name +
-        |    ". Expected: $i; Found: " + e.length)
-        |}
-        |functionRegistry.createOrReplaceTempFunction(name, builder)
+        |  def builder(e: Seq[Expression]) = if (e.length == $i) {
+        |    ScalaUDF($funcCall, returnType, e)
+        |  } else {
+        |    throw new AnalysisException("Invalid number of arguments for function " + name +
+        |      ". Expected: $i; Found: " + e.length)
+        |  }
+        |  functionRegistry.createOrReplaceTempFunction(name, builder)
         |}""".stripMargin)
     }
     */
@@ -592,6 +594,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
           }
 
           udfInterfaces(0).getActualTypeArguments.length match {
+            case 1 => register(name, udf.asInstanceOf[UDF0[_]], returnType)
             case 2 => register(name, udf.asInstanceOf[UDF1[_, _]], returnType)
             case 3 => register(name, udf.asInstanceOf[UDF2[_, _, _]], returnType)
             case 4 => register(name, udf.asInstanceOf[UDF3[_, _, _, _]], returnType)
@@ -649,6 +652,21 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     }
   }
 
+  /**
+   * Register a user-defined function with 0 arguments.
+   * @since 2.3.0
+   */
+  def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF0[Any]].call()
+    def builder(e: Seq[Expression]) = if (e.length == 0) {
+      ScalaUDF(() => func, returnType, e)
+    } else {
+      throw new AnalysisException("Invalid number of arguments for function " + name +
+        ". Expected: 0; Found: " + e.length)
+    }
+    functionRegistry.createOrReplaceTempFunction(name, builder)
+  }
+
   /**
    * Register a user-defined function with 1 arguments.
    * @since 1.3.0
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index 4fb2988f24d2..5bf188882618 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -113,4 +113,12 @@ public void udf5Test() {
     spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType);
     List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList();
   }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf6Test() {
+    spark.udf().register("returnOne", () -> 1, DataTypes.IntegerType);
+    Row result = spark.sql("SELECT returnOne()").head();
+    Assert.assertEquals(1, result.getInt(0));
+  }
 }

From 2cbfc975ba937a4eb761de7a6473b7747941f386 Mon Sep 17 00:00:00 2001
From: Jane Wang <janewang@fb.com>
Date: Tue, 11 Jul 2017 22:00:36 -0700
Subject: [PATCH 107/125] [SPARK-12139][SQL] REGEX Column Specification

## What changes were proposed in this pull request?
Hive interprets regular expression, e.g., `(a)?+.+` in query specification. This PR enables spark to support this feature when hive.support.quoted.identifiers is set to true.

## How was this patch tested?

- Add unittests in SQLQuerySuite.scala
- Run spark-shell tested the original failed query:
scala> hc.sql("SELECT `(a|b)?+.+` from test1").collect.foreach(println)

Author: Jane Wang <janewang@fb.com>

Closes #18023 from janewangfb/support_select_regex.
---
 .../sql/catalyst/analysis/unresolved.scala    |  29 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  43 ++-
 .../sql/catalyst/parser/ParserUtils.scala     |   6 +
 .../apache/spark/sql/internal/SQLConf.scala   |   8 +
 .../scala/org/apache/spark/sql/Dataset.scala  |  27 +-
 .../sql-tests/inputs/query_regex_column.sql   |  52 +++
 .../results/query_regex_column.sql.out        | 313 ++++++++++++++++++
 .../spark/sql/DataFrameAggregateSuite.scala   |  10 +-
 .../spark/sql/DataFrameNaFunctionsSuite.scala | 225 +++++++------
 .../apache/spark/sql/DataFrameStatSuite.scala |  87 ++---
 .../org/apache/spark/sql/DataFrameSuite.scala |  55 +--
 .../org/apache/spark/sql/DatasetSuite.scala   |  81 ++++-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  41 ++-
 .../datasources/json/JsonSuite.scala          |  38 ++-
 .../parquet/ParquetFilterSuite.scala          |   3 +-
 .../spark/sql/sources/DataSourceTest.scala    |   7 +-
 .../spark/sql/sources/TableScanSuite.scala    |  55 +--
 17 files changed, 825 insertions(+), 255 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/query_regex_column.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 42b9641bef27..fb322697c7c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIden
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
+import org.apache.spark.sql.catalyst.parser.ParserUtils
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
@@ -123,7 +124,10 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
 
   override def toString: String = s"'$name"
 
-  override def sql: String = quoteIdentifier(name)
+  override def sql: String = name match {
+    case ParserUtils.escapedIdentifier(_) | ParserUtils.qualifiedEscapedIdentifier(_, _) => name
+    case _ => quoteIdentifier(name)
+  }
 }
 
 object UnresolvedAttribute {
@@ -306,6 +310,29 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
   override def toString: String = target.map(_ + ".").getOrElse("") + "*"
 }
 
+/**
+ * Represents all of the input attributes to a given relational operator, for example in
+ * "SELECT `(id)?+.+` FROM ...".
+ *
+ * @param table an optional table that should be the target of the expansion.  If omitted all
+ *              tables' columns are produced.
+ */
+case class UnresolvedRegex(regexPattern: String, table: Option[String], caseSensitive: Boolean)
+  extends Star with Unevaluable {
+  override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
+    val pattern = if (caseSensitive) regexPattern else s"(?i)$regexPattern"
+    table match {
+      // If there is no table specified, use all input attributes that match expr
+      case None => input.output.filter(_.name.matches(pattern))
+      // If there is a table, pick out attributes that are part of this table that match expr
+      case Some(t) => input.output.filter(_.qualifier.exists(resolver(_, t)))
+        .filter(_.name.matches(pattern))
+    }
+  }
+
+  override def toString: String = table.map(_ + "." + regexPattern).getOrElse(regexPattern)
+}
+
 /**
  * Used to assign new names to Generator's output, such as hive udtf.
  * For example the SQL expression "stack(2, key, value, key, value) as (a, b)" could be represented
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a616b0f773f3..ad359e714bcc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1261,25 +1261,54 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a dereference expression. The return type depends on the type of the parent, this can
-   * either be a [[UnresolvedAttribute]] (if the parent is an [[UnresolvedAttribute]]), or an
-   * [[UnresolvedExtractValue]] if the parent is some expression.
+   * Currently only regex in expressions of SELECT statements are supported; in other
+   * places, e.g., where `(a)?+.+` = 2, regex are not meaningful.
+   */
+  private def canApplyRegex(ctx: ParserRuleContext): Boolean = withOrigin(ctx) {
+    var parent = ctx.getParent
+    while (parent != null) {
+      if (parent.isInstanceOf[NamedExpressionContext]) return true
+      parent = parent.getParent
+    }
+    return false
+  }
+
+  /**
+   * Create a dereference expression. The return type depends on the type of the parent.
+   * If the parent is an [[UnresolvedAttribute]], it can be a [[UnresolvedAttribute]] or
+   * a [[UnresolvedRegex]] for regex quoted in ``; if the parent is some other expression,
+   * it can be [[UnresolvedExtractValue]].
    */
   override def visitDereference(ctx: DereferenceContext): Expression = withOrigin(ctx) {
     val attr = ctx.fieldName.getText
     expression(ctx.base) match {
-      case UnresolvedAttribute(nameParts) =>
-        UnresolvedAttribute(nameParts :+ attr)
+      case unresolved_attr @ UnresolvedAttribute(nameParts) =>
+        ctx.fieldName.getStart.getText match {
+          case escapedIdentifier(columnNameRegex)
+            if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) =>
+            UnresolvedRegex(columnNameRegex, Some(unresolved_attr.name),
+              conf.caseSensitiveAnalysis)
+          case _ =>
+            UnresolvedAttribute(nameParts :+ attr)
+        }
       case e =>
         UnresolvedExtractValue(e, Literal(attr))
     }
   }
 
   /**
-   * Create an [[UnresolvedAttribute]] expression.
+   * Create an [[UnresolvedAttribute]] expression or a [[UnresolvedRegex]] if it is a regex
+   * quoted in ``
    */
   override def visitColumnReference(ctx: ColumnReferenceContext): Expression = withOrigin(ctx) {
-    UnresolvedAttribute.quoted(ctx.getText)
+    ctx.getStart.getText match {
+      case escapedIdentifier(columnNameRegex)
+        if conf.supportQuotedRegexColumnName && canApplyRegex(ctx) =>
+        UnresolvedRegex(columnNameRegex, None, conf.caseSensitiveAnalysis)
+      case _ =>
+        UnresolvedAttribute.quoted(ctx.getText)
+    }
+
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 77fdaa8255aa..9c1031e8033e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -177,6 +177,12 @@ object ParserUtils {
     sb.toString()
   }
 
+  /** the column name pattern in quoted regex without qualifier */
+  val escapedIdentifier = "`(.+)`".r
+
+  /** the column name pattern in quoted regex with qualifier */
+  val qualifiedEscapedIdentifier = ("(.+)" + """.""" + "`(.+)`").r
+
   /** Some syntactic sugar which makes it easier to work with optional clauses for LogicalPlans. */
   implicit class EnhancedLogicalPlan(val plan: LogicalPlan) extends AnyVal {
     /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 643587a6eb09..55558ca9f700 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -855,6 +855,12 @@ object SQLConf {
       .intConf
       .createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
 
+  val SUPPORT_QUOTED_REGEX_COLUMN_NAME = buildConf("spark.sql.parser.quotedRegexColumnNames")
+    .doc("When true, quoted Identifiers (using backticks) in SELECT statement are interpreted" +
+      " as regular expressions.")
+    .booleanConf
+    .createWithDefault(false)
+
   val ARROW_EXECUTION_ENABLE =
     buildConf("spark.sql.execution.arrow.enable")
       .internal()
@@ -1133,6 +1139,8 @@ class SQLConf extends Serializable with Logging {
 
   def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)
 
+  def supportQuotedRegexColumnName: Boolean = getConf(SUPPORT_QUOTED_REGEX_COLUMN_NAME)
+
   def arrowEnable: Boolean = getConf(ARROW_EXECUTION_ENABLE)
 
   def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7f3ae0541151..b825b6cd6160 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions}
 import org.apache.spark.sql.catalyst.optimizer.CombineUnions
-import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
@@ -1178,8 +1178,29 @@ class Dataset[T] private[sql](
     case "*" =>
       Column(ResolvedStar(queryExecution.analyzed.output))
     case _ =>
-      val expr = resolve(colName)
-      Column(expr)
+      if (sqlContext.conf.supportQuotedRegexColumnName) {
+        colRegex(colName)
+      } else {
+        val expr = resolve(colName)
+        Column(expr)
+      }
+  }
+
+  /**
+   * Selects column based on the column name specified as a regex and return it as [[Column]].
+   * @group untypedrel
+   * @since 2.3.0
+   */
+  def colRegex(colName: String): Column = {
+    val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+    colName match {
+      case ParserUtils.escapedIdentifier(columnNameRegex) =>
+        Column(UnresolvedRegex(columnNameRegex, None, caseSensitive))
+      case ParserUtils.qualifiedEscapedIdentifier(nameParts, columnNameRegex) =>
+        Column(UnresolvedRegex(columnNameRegex, Some(nameParts), caseSensitive))
+      case _ =>
+        Column(resolve(colName))
+    }
   }
 
   /**
diff --git a/sql/core/src/test/resources/sql-tests/inputs/query_regex_column.sql b/sql/core/src/test/resources/sql-tests/inputs/query_regex_column.sql
new file mode 100644
index 000000000000..ad96754826a4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/query_regex_column.sql
@@ -0,0 +1,52 @@
+set spark.sql.parser.quotedRegexColumnNames=false;
+
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, "1", "11"), (2, "2", "22"), (3, "3", "33"), (4, "4", "44"), (5, "5", "55"), (6, "6", "66")
+AS testData(key, value1, value2);
+
+CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES
+(1, 1, 1, 2), (1, 2, 1, 2), (2, 1, 2, 3), (2, 2, 2, 3), (3, 1, 3, 4), (3, 2, 3, 4)
+AS testData2(A, B, c, d);
+
+-- AnalysisException
+SELECT `(a)?+.+` FROM testData2 WHERE a = 1;
+SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1;
+SELECT `(a|b)` FROM testData2 WHERE a = 2;
+SELECT `(a|b)?+.+` FROM testData2 WHERE a = 2;
+SELECT SUM(`(a|b)?+.+`) FROM testData2;
+SELECT SUM(`(a)`) FROM testData2;
+
+set spark.sql.parser.quotedRegexColumnNames=true;
+
+-- Regex columns
+SELECT `(a)?+.+` FROM testData2 WHERE a = 1;
+SELECT `(A)?+.+` FROM testData2 WHERE a = 1;
+SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1;
+SELECT t.`(A)?+.+` FROM testData2 t WHERE a = 1;
+SELECT `(a|B)` FROM testData2 WHERE a = 2;
+SELECT `(A|b)` FROM testData2 WHERE a = 2;
+SELECT `(a|B)?+.+` FROM testData2 WHERE a = 2;
+SELECT `(A|b)?+.+` FROM testData2 WHERE a = 2;
+SELECT `(e|f)` FROM testData2;
+SELECT t.`(e|f)` FROM testData2 t;
+SELECT p.`(KEY)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3;
+SELECT p.`(key)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3;
+
+set spark.sql.caseSensitive=true;
+
+CREATE OR REPLACE TEMPORARY VIEW testdata3 AS SELECT * FROM VALUES
+(0, 1), (1, 2), (2, 3), (3, 4)
+AS testdata3(a, b);
+
+-- Regex columns
+SELECT `(A)?+.+` FROM testdata3;
+SELECT `(a)?+.+` FROM testdata3;
+SELECT `(A)?+.+` FROM testdata3 WHERE a > 1;
+SELECT `(a)?+.+` FROM testdata3 where `a` > 1;
+SELECT SUM(`a`) FROM testdata3;
+SELECT SUM(`(a)`) FROM testdata3;
+SELECT SUM(`(a)?+.+`) FROM testdata3;
+SELECT SUM(a) FROM testdata3 GROUP BY `a`;
+-- AnalysisException
+SELECT SUM(a) FROM testdata3 GROUP BY `(a)`;
+SELECT SUM(a) FROM testdata3 GROUP BY `(a)?+.+`;
diff --git a/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
new file mode 100644
index 000000000000..2dade86f35df
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
@@ -0,0 +1,313 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 34
+
+
+-- !query 0
+set spark.sql.parser.quotedRegexColumnNames=false
+-- !query 0 schema
+struct<key:string,value:string>
+-- !query 0 output
+spark.sql.parser.quotedRegexColumnNames	false
+
+
+-- !query 1
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, "1", "11"), (2, "2", "22"), (3, "3", "33"), (4, "4", "44"), (5, "5", "55"), (6, "6", "66")
+AS testData(key, value1, value2)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES
+(1, 1, 1, 2), (1, 2, 1, 2), (2, 1, 2, 3), (2, 2, 2, 3), (3, 1, 3, 4), (3, 2, 3, 4)
+AS testData2(A, B, c, d)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+SELECT `(a)?+.+` FROM testData2 WHERE a = 1
+-- !query 3 schema
+struct<>
+-- !query 3 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+
+
+-- !query 4
+SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 't.`(a)?+.+`' given input columns: [t.A, t.B, t.c, t.d]; line 1 pos 7
+
+
+-- !query 5
+SELECT `(a|b)` FROM testData2 WHERE a = 2
+-- !query 5 schema
+struct<>
+-- !query 5 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a|b)`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+
+
+-- !query 6
+SELECT `(a|b)?+.+` FROM testData2 WHERE a = 2
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a|b)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+
+
+-- !query 7
+SELECT SUM(`(a|b)?+.+`) FROM testData2
+-- !query 7 schema
+struct<>
+-- !query 7 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a|b)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
+
+
+-- !query 8
+SELECT SUM(`(a)`) FROM testData2
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a)`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
+
+
+-- !query 9
+set spark.sql.parser.quotedRegexColumnNames=true
+-- !query 9 schema
+struct<key:string,value:string>
+-- !query 9 output
+spark.sql.parser.quotedRegexColumnNames	true
+
+
+-- !query 10
+SELECT `(a)?+.+` FROM testData2 WHERE a = 1
+-- !query 10 schema
+struct<B:int,c:int,d:int>
+-- !query 10 output
+1	1	2
+2	1	2
+
+
+-- !query 11
+SELECT `(A)?+.+` FROM testData2 WHERE a = 1
+-- !query 11 schema
+struct<B:int,c:int,d:int>
+-- !query 11 output
+1	1	2
+2	1	2
+
+
+-- !query 12
+SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1
+-- !query 12 schema
+struct<B:int,c:int,d:int>
+-- !query 12 output
+1	1	2
+2	1	2
+
+
+-- !query 13
+SELECT t.`(A)?+.+` FROM testData2 t WHERE a = 1
+-- !query 13 schema
+struct<B:int,c:int,d:int>
+-- !query 13 output
+1	1	2
+2	1	2
+
+
+-- !query 14
+SELECT `(a|B)` FROM testData2 WHERE a = 2
+-- !query 14 schema
+struct<A:int,B:int>
+-- !query 14 output
+2	1
+2	2
+
+
+-- !query 15
+SELECT `(A|b)` FROM testData2 WHERE a = 2
+-- !query 15 schema
+struct<A:int,B:int>
+-- !query 15 output
+2	1
+2	2
+
+
+-- !query 16
+SELECT `(a|B)?+.+` FROM testData2 WHERE a = 2
+-- !query 16 schema
+struct<c:int,d:int>
+-- !query 16 output
+2	3
+2	3
+
+
+-- !query 17
+SELECT `(A|b)?+.+` FROM testData2 WHERE a = 2
+-- !query 17 schema
+struct<c:int,d:int>
+-- !query 17 output
+2	3
+2	3
+
+
+-- !query 18
+SELECT `(e|f)` FROM testData2
+-- !query 18 schema
+struct<>
+-- !query 18 output
+
+
+
+-- !query 19
+SELECT t.`(e|f)` FROM testData2 t
+-- !query 19 schema
+struct<>
+-- !query 19 output
+
+
+
+-- !query 20
+SELECT p.`(KEY)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3
+-- !query 20 schema
+struct<value1:string,value2:string,b:int,A:int,c:int,d:int>
+-- !query 20 output
+1	11	1	1	1	2
+1	11	2	1	1	2
+2	22	1	2	2	3
+2	22	2	2	2	3
+
+
+-- !query 21
+SELECT p.`(key)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3
+-- !query 21 schema
+struct<value1:string,value2:string,b:int,A:int,c:int,d:int>
+-- !query 21 output
+1	11	1	1	1	2
+1	11	2	1	1	2
+2	22	1	2	2	3
+2	22	2	2	2	3
+
+
+-- !query 22
+set spark.sql.caseSensitive=true
+-- !query 22 schema
+struct<key:string,value:string>
+-- !query 22 output
+spark.sql.caseSensitive	true
+
+
+-- !query 23
+CREATE OR REPLACE TEMPORARY VIEW testdata3 AS SELECT * FROM VALUES
+(0, 1), (1, 2), (2, 3), (3, 4)
+AS testdata3(a, b)
+-- !query 23 schema
+struct<>
+-- !query 23 output
+
+
+
+-- !query 24
+SELECT `(A)?+.+` FROM testdata3
+-- !query 24 schema
+struct<a:int,b:int>
+-- !query 24 output
+0	1
+1	2
+2	3
+3	4
+
+
+-- !query 25
+SELECT `(a)?+.+` FROM testdata3
+-- !query 25 schema
+struct<b:int>
+-- !query 25 output
+1
+2
+3
+4
+
+
+-- !query 26
+SELECT `(A)?+.+` FROM testdata3 WHERE a > 1
+-- !query 26 schema
+struct<a:int,b:int>
+-- !query 26 output
+2	3
+3	4
+
+
+-- !query 27
+SELECT `(a)?+.+` FROM testdata3 where `a` > 1
+-- !query 27 schema
+struct<b:int>
+-- !query 27 output
+3
+4
+
+
+-- !query 28
+SELECT SUM(`a`) FROM testdata3
+-- !query 28 schema
+struct<sum(a):bigint>
+-- !query 28 output
+6
+
+
+-- !query 29
+SELECT SUM(`(a)`) FROM testdata3
+-- !query 29 schema
+struct<sum(a):bigint>
+-- !query 29 output
+6
+
+
+-- !query 30
+SELECT SUM(`(a)?+.+`) FROM testdata3
+-- !query 30 schema
+struct<sum(b):bigint>
+-- !query 30 output
+10
+
+
+-- !query 31
+SELECT SUM(a) FROM testdata3 GROUP BY `a`
+-- !query 31 schema
+struct<sum(a):bigint>
+-- !query 31 output
+0
+1
+2
+3
+
+
+-- !query 32
+SELECT SUM(a) FROM testdata3 GROUP BY `(a)`
+-- !query 32 schema
+struct<>
+-- !query 32 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a)`' given input columns: [testdata3.a, testdata3.b]; line 1 pos 38
+
+
+-- !query 33
+SELECT SUM(a) FROM testdata3 GROUP BY `(a)?+.+`
+-- !query 33 schema
+struct<>
+-- !query 33 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`(a)?+.+`' given input columns: [testdata3.a, testdata3.b]; line 1 pos 38
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index b52d50b195bc..4568b67024ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -543,10 +543,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
 
   test("SPARK-17237 remove backticks in a pivot result schema") {
     val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
-    checkAnswer(
-      df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0),
-      Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
-    )
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      checkAnswer(
+        df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0),
+        Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
+      )
+    }
   }
 
   test("aggregate function in GROUP BY") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index e63c5cb194d6..47c9ba5847a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
-
 class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
@@ -111,119 +111,124 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext {
       (null, null)
     ).toDF("name", "spy")
 
-    val fillNumeric = input.na.fill(50.6)
-    checkAnswer(
-      fillNumeric,
-      Row("Bob", 16, 176.5) ::
-        Row("Alice", 50, 164.3) ::
-        Row("David", 60, 50.6) ::
-        Row("Nina", 25, 50.6) ::
-        Row("Amy", 50, 50.6) ::
-        Row(null, 50, 50.6) :: Nil)
-
-    // Make sure the columns are properly named.
-    assert(fillNumeric.columns.toSeq === input.columns.toSeq)
-
-    // string
-    checkAnswer(
-      input.na.fill("unknown").select("name"),
-      Row("Bob") :: Row("Alice") :: Row("David") ::
-        Row("Nina") :: Row("Amy") :: Row("unknown") :: Nil)
-    assert(input.na.fill("unknown").columns.toSeq === input.columns.toSeq)
-
-    // boolean
-    checkAnswer(
-      boolInput.na.fill(true).select("spy"),
-      Row(false) :: Row(true) :: Row(true) :: Row(true) :: Nil)
-    assert(boolInput.na.fill(true).columns.toSeq === boolInput.columns.toSeq)
-
-    // fill double with subset columns
-    checkAnswer(
-      input.na.fill(50.6, "age" :: Nil).select("name", "age"),
-      Row("Bob", 16) ::
-        Row("Alice", 50) ::
-        Row("David", 60) ::
-        Row("Nina", 25) ::
-        Row("Amy", 50) ::
-        Row(null, 50) :: Nil)
-
-    // fill boolean with subset columns
-    checkAnswer(
-      boolInput.na.fill(true, "spy" :: Nil).select("name", "spy"),
-      Row("Bob", false) ::
-        Row("Alice", true) ::
-        Row("Mallory", true) ::
-        Row(null, true) :: Nil)
-
-    // fill string with subset columns
-    checkAnswer(
-      Seq[(String, String)]((null, null)).toDF("col1", "col2").na.fill("test", "col1" :: Nil),
-      Row("test", null))
-
-    checkAnswer(
-      Seq[(Long, Long)]((1, 2), (-1, -2), (9123146099426677101L, 9123146560113991650L))
-        .toDF("a", "b").na.fill(0),
-      Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil
-    )
-
-    checkAnswer(
-      Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null),
-        (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2),
-      Row(0, 3.14) :: Row(9123146099426677101L, 0.2) :: Row(9123146560113991650L, 1.6)
-        :: Row(0, 0.2) :: Nil
-    )
-
-    checkAnswer(
-      Seq[(java.lang.Long, java.lang.Float)]((null, 3.14f), (9123146099426677101L, null),
-        (9123146560113991650L, 1.6f), (null, null)).toDF("a", "b").na.fill(0.2),
-      Row(0, 3.14f) :: Row(9123146099426677101L, 0.2f) :: Row(9123146560113991650L, 1.6f)
-        :: Row(0, 0.2f) :: Nil
-    )
-
-    checkAnswer(
-      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
-        .toDF("a", "b").na.fill(2.34),
-      Row(2, 1.23) :: Row(3, 2.34) :: Row(4, 3.45) :: Nil
-    )
-
-    checkAnswer(
-      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
-        .toDF("a", "b").na.fill(5),
-      Row(5, 1.23) :: Row(3, 5.0) :: Row(4, 3.45) :: Nil
-    )
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val fillNumeric = input.na.fill(50.6)
+      checkAnswer(
+        fillNumeric,
+        Row("Bob", 16, 176.5) ::
+          Row("Alice", 50, 164.3) ::
+          Row("David", 60, 50.6) ::
+          Row("Nina", 25, 50.6) ::
+          Row("Amy", 50, 50.6) ::
+          Row(null, 50, 50.6) :: Nil)
+
+      // Make sure the columns are properly named.
+      assert(fillNumeric.columns.toSeq === input.columns.toSeq)
+
+      // string
+      checkAnswer(
+        input.na.fill("unknown").select("name"),
+        Row("Bob") :: Row("Alice") :: Row("David") ::
+          Row("Nina") :: Row("Amy") :: Row("unknown") :: Nil)
+      assert(input.na.fill("unknown").columns.toSeq === input.columns.toSeq)
+
+      // boolean
+      checkAnswer(
+        boolInput.na.fill(true).select("spy"),
+        Row(false) :: Row(true) :: Row(true) :: Row(true) :: Nil)
+      assert(boolInput.na.fill(true).columns.toSeq === boolInput.columns.toSeq)
+
+      // fill double with subset columns
+      checkAnswer(
+        input.na.fill(50.6, "age" :: Nil).select("name", "age"),
+        Row("Bob", 16) ::
+          Row("Alice", 50) ::
+          Row("David", 60) ::
+          Row("Nina", 25) ::
+          Row("Amy", 50) ::
+          Row(null, 50) :: Nil)
+
+      // fill boolean with subset columns
+      checkAnswer(
+        boolInput.na.fill(true, "spy" :: Nil).select("name", "spy"),
+        Row("Bob", false) ::
+          Row("Alice", true) ::
+          Row("Mallory", true) ::
+          Row(null, true) :: Nil)
+
+      // fill string with subset columns
+      checkAnswer(
+        Seq[(String, String)]((null, null)).toDF("col1", "col2").na.fill("test", "col1" :: Nil),
+        Row("test", null))
+
+      checkAnswer(
+        Seq[(Long, Long)]((1, 2), (-1, -2), (9123146099426677101L, 9123146560113991650L))
+          .toDF("a", "b").na.fill(0),
+        Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil
+      )
+
+      checkAnswer(
+        Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null),
+          (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2),
+        Row(0, 3.14) :: Row(9123146099426677101L, 0.2) :: Row(9123146560113991650L, 1.6)
+          :: Row(0, 0.2) :: Nil
+      )
+
+      checkAnswer(
+        Seq[(java.lang.Long, java.lang.Float)]((null, 3.14f), (9123146099426677101L, null),
+          (9123146560113991650L, 1.6f), (null, null)).toDF("a", "b").na.fill(0.2),
+        Row(0, 3.14f) :: Row(9123146099426677101L, 0.2f) :: Row(9123146560113991650L, 1.6f)
+          :: Row(0, 0.2f) :: Nil
+      )
+
+      checkAnswer(
+        Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+          .toDF("a", "b").na.fill(2.34),
+        Row(2, 1.23) :: Row(3, 2.34) :: Row(4, 3.45) :: Nil
+      )
+
+      checkAnswer(
+        Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+          .toDF("a", "b").na.fill(5),
+        Row(5, 1.23) :: Row(3, 5.0) :: Row(4, 3.45) :: Nil
+      )
+    }
   }
 
   test("fill with map") {
-    val df = Seq[(String, String, java.lang.Integer, java.lang.Long,
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val df = Seq[(String, String, java.lang.Integer, java.lang.Long,
         java.lang.Float, java.lang.Double, java.lang.Boolean)](
-      (null, null, null, null, null, null, null))
-      .toDF("stringFieldA", "stringFieldB", "integerField", "longField",
-        "floatField", "doubleField", "booleanField")
-
-    val fillMap = Map(
-      "stringFieldA" -> "test",
-      "integerField" -> 1,
-      "longField" -> 2L,
-      "floatField" -> 3.3f,
-      "doubleField" -> 4.4d,
-      "booleanField" -> false)
-
-    val expectedRow = Row("test", null, 1, 2L, 3.3f, 4.4d, false)
-
-    checkAnswer(df.na.fill(fillMap), expectedRow)
-    checkAnswer(df.na.fill(fillMap.asJava), expectedRow) // Test Java version
-
-    // Ensure replacement values are cast to the column data type.
-    checkAnswer(df.na.fill(Map(
-      "integerField" -> 1d,
-      "longField" -> 2d,
-      "floatField" -> 3d,
-      "doubleField" -> 4d)),
-      Row(null, null, 1, 2L, 3f, 4d, null))
-
-    // Ensure column types do not change. Columns that have null values replaced
-    // will no longer be flagged as nullable, so do not compare schemas directly.
-    assert(df.na.fill(fillMap).schema.fields.map(_.dataType) === df.schema.fields.map(_.dataType))
+        (null, null, null, null, null, null, null))
+        .toDF("stringFieldA", "stringFieldB", "integerField", "longField",
+          "floatField", "doubleField", "booleanField")
+
+      val fillMap = Map(
+        "stringFieldA" -> "test",
+        "integerField" -> 1,
+        "longField" -> 2L,
+        "floatField" -> 3.3f,
+        "doubleField" -> 4.4d,
+        "booleanField" -> false)
+
+      val expectedRow = Row("test", null, 1, 2L, 3.3f, 4.4d, false)
+
+
+      checkAnswer(df.na.fill(fillMap), expectedRow)
+      checkAnswer(df.na.fill(fillMap.asJava), expectedRow) // Test Java version
+
+      // Ensure replacement values are cast to the column data type.
+      checkAnswer(df.na.fill(Map(
+        "integerField" -> 1d,
+        "longField" -> 2d,
+        "floatField" -> 3d,
+        "doubleField" -> 4d)),
+        Row(null, null, 1, 2L, 3f, 4d, null))
+
+      // Ensure column types do not change. Columns that have null values replaced
+      // will no longer be flagged as nullable, so do not compare schemas directly.
+      assert(df.na.fill(fillMap).schema.fields.map(_.dataType) === df.schema.fields.map(_.dataType))
+    }
   }
 
   test("replace") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index dd118f88e3bb..09502d05f770 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.Matchers._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 
@@ -263,52 +264,56 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
   }
 
   test("crosstab") {
-    val rng = new Random()
-    val data = Seq.tabulate(25)(i => (rng.nextInt(5), rng.nextInt(10)))
-    val df = data.toDF("a", "b")
-    val crosstab = df.stat.crosstab("a", "b")
-    val columnNames = crosstab.schema.fieldNames
-    assert(columnNames(0) === "a_b")
-    // reduce by key
-    val expected = data.map(t => (t, 1)).groupBy(_._1).mapValues(_.length)
-    val rows = crosstab.collect()
-    rows.foreach { row =>
-      val i = row.getString(0).toInt
-      for (col <- 1 until columnNames.length) {
-        val j = columnNames(col).toInt
-        assert(row.getLong(col) === expected.getOrElse((i, j), 0).toLong)
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val rng = new Random()
+      val data = Seq.tabulate(25)(i => (rng.nextInt(5), rng.nextInt(10)))
+      val df = data.toDF("a", "b")
+      val crosstab = df.stat.crosstab("a", "b")
+      val columnNames = crosstab.schema.fieldNames
+      assert(columnNames(0) === "a_b")
+      // reduce by key
+      val expected = data.map(t => (t, 1)).groupBy(_._1).mapValues(_.length)
+      val rows = crosstab.collect()
+      rows.foreach { row =>
+        val i = row.getString(0).toInt
+        for (col <- 1 until columnNames.length) {
+          val j = columnNames(col).toInt
+          assert(row.getLong(col) === expected.getOrElse((i, j), 0).toLong)
+        }
       }
     }
   }
 
   test("special crosstab elements (., '', null, ``)") {
-    val data = Seq(
-      ("a", Double.NaN, "ho"),
-      (null, 2.0, "ho"),
-      ("a.b", Double.NegativeInfinity, ""),
-      ("b", Double.PositiveInfinity, "`ha`"),
-      ("a", 1.0, null)
-    )
-    val df = data.toDF("1", "2", "3")
-    val ct1 = df.stat.crosstab("1", "2")
-    // column fields should be 1 + distinct elements of second column
-    assert(ct1.schema.fields.length === 6)
-    assert(ct1.collect().length === 4)
-    val ct2 = df.stat.crosstab("1", "3")
-    assert(ct2.schema.fields.length === 5)
-    assert(ct2.schema.fieldNames.contains("ha"))
-    assert(ct2.collect().length === 4)
-    val ct3 = df.stat.crosstab("3", "2")
-    assert(ct3.schema.fields.length === 6)
-    assert(ct3.schema.fieldNames.contains("NaN"))
-    assert(ct3.schema.fieldNames.contains("Infinity"))
-    assert(ct3.schema.fieldNames.contains("-Infinity"))
-    assert(ct3.collect().length === 4)
-    val ct4 = df.stat.crosstab("3", "1")
-    assert(ct4.schema.fields.length === 5)
-    assert(ct4.schema.fieldNames.contains("null"))
-    assert(ct4.schema.fieldNames.contains("a.b"))
-    assert(ct4.collect().length === 4)
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val data = Seq(
+        ("a", Double.NaN, "ho"),
+        (null, 2.0, "ho"),
+        ("a.b", Double.NegativeInfinity, ""),
+        ("b", Double.PositiveInfinity, "`ha`"),
+        ("a", 1.0, null)
+      )
+      val df = data.toDF("1", "2", "3")
+      val ct1 = df.stat.crosstab("1", "2")
+      // column fields should be 1 + distinct elements of second column
+      assert(ct1.schema.fields.length === 6)
+      assert(ct1.collect().length === 4)
+      val ct2 = df.stat.crosstab("1", "3")
+      assert(ct2.schema.fields.length === 5)
+      assert(ct2.schema.fieldNames.contains("ha"))
+      assert(ct2.collect().length === 4)
+      val ct3 = df.stat.crosstab("3", "2")
+      assert(ct3.schema.fields.length === 6)
+      assert(ct3.schema.fieldNames.contains("NaN"))
+      assert(ct3.schema.fieldNames.contains("Infinity"))
+      assert(ct3.schema.fieldNames.contains("-Infinity"))
+      assert(ct3.collect().length === 4)
+      val ct4 = df.stat.crosstab("3", "1")
+      assert(ct4.schema.fields.length === 5)
+      assert(ct4.schema.fieldNames.contains("null"))
+      assert(ct4.schema.fieldNames.contains("a.b"))
+      assert(ct4.collect().length === 4)
+    }
   }
 
   test("Frequent Items") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 5ae27032e0e9..3f3a6221d20b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1179,28 +1179,31 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-7551: support backticks for DataFrame attribute resolution") {
-    val df = spark.read.json(Seq("""{"a.b": {"c": {"d..e": {"f": 1}}}}""").toDS())
-    checkAnswer(
-      df.select(df("`a.b`.c.`d..e`.`f`")),
-      Row(1)
-    )
-
-    val df2 = spark.read.json(Seq("""{"a  b": {"c": {"d  e": {"f": 1}}}}""").toDS())
-    checkAnswer(
-      df2.select(df2("`a  b`.c.d  e.f")),
-      Row(1)
-    )
-
-    def checkError(testFun: => Unit): Unit = {
-      val e = intercept[org.apache.spark.sql.AnalysisException] {
-        testFun
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val df = spark.read.json(Seq("""{"a.b": {"c": {"d..e": {"f": 1}}}}""").toDS())
+      checkAnswer(
+        df.select(df("`a.b`.c.`d..e`.`f`")),
+        Row(1)
+      )
+
+      val df2 = spark.read.json(Seq("""{"a  b": {"c": {"d  e": {"f": 1}}}}""").toDS())
+      checkAnswer(
+        df2.select(df2("`a  b`.c.d  e.f")),
+        Row(1)
+      )
+
+      def checkError(testFun: => Unit): Unit = {
+        val e = intercept[org.apache.spark.sql.AnalysisException] {
+          testFun
+        }
+        assert(e.getMessage.contains("syntax error in attribute name:"))
       }
-      assert(e.getMessage.contains("syntax error in attribute name:"))
+
+      checkError(df("`abc.`c`"))
+      checkError(df("`abc`..d"))
+      checkError(df("`a`.b."))
+      checkError(df("`a.b`.c.`d"))
     }
-    checkError(df("`abc.`c`"))
-    checkError(df("`abc`..d"))
-    checkError(df("`a`.b."))
-    checkError(df("`a.b`.c.`d"))
   }
 
   test("SPARK-7324 dropDuplicates") {
@@ -1928,11 +1931,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-17957: outer join + na.fill") {
-    val df1 = Seq((1, 2), (2, 3)).toDF("a", "b")
-    val df2 = Seq((2, 5), (3, 4)).toDF("a", "c")
-    val joinedDf = df1.join(df2, Seq("a"), "outer").na.fill(0)
-    val df3 = Seq((3, 1)).toDF("a", "d")
-    checkAnswer(joinedDf.join(df3, "a"), Row(3, 0, 4, 1))
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val df1 = Seq((1, 2), (2, 3)).toDF("a", "b")
+      val df2 = Seq((2, 5), (3, 4)).toDF("a", "c")
+      val joinedDf = df1.join(df2, Seq("a"), "outer").na.fill(0)
+      val df3 = Seq((3, 1)).toDF("a", "d")
+      checkAnswer(joinedDf.join(df3, "a"), Row(3, 0, 4, 1))
+    }
   }
 
   test("SPARK-17123: Performing set operations that combine non-scala native types") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 87b7b090de3b..825840707d42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.util.sideBySide
-import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec}
+import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
@@ -244,6 +244,85 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       ("a", ClassData("a", 1)), ("b", ClassData("b", 2)), ("c", ClassData("c", 3)))
   }
 
+  test("REGEX column specification") {
+    val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
+
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      var e = intercept[AnalysisException] {
+        ds.select(expr("`(_1)?+.+`").as[Int])
+      }.getMessage
+      assert(e.contains("cannot resolve '`(_1)?+.+`'"))
+
+      e = intercept[AnalysisException] {
+        ds.select(expr("`(_1|_2)`").as[Int])
+      }.getMessage
+      assert(e.contains("cannot resolve '`(_1|_2)`'"))
+
+      e = intercept[AnalysisException] {
+        ds.select(ds("`(_1)?+.+`"))
+      }.getMessage
+      assert(e.contains("Cannot resolve column name \"`(_1)?+.+`\""))
+
+      e = intercept[AnalysisException] {
+        ds.select(ds("`(_1|_2)`"))
+      }.getMessage
+      assert(e.contains("Cannot resolve column name \"`(_1|_2)`\""))
+    }
+
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
+      checkDataset(
+        ds.select(ds.col("_2")).as[Int],
+        1, 2, 3)
+
+      checkDataset(
+        ds.select(ds.colRegex("`(_1)?+.+`")).as[Int],
+        1, 2, 3)
+
+      checkDataset(
+        ds.select(ds("`(_1|_2)`"))
+          .select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
+        ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
+
+      checkDataset(
+        ds.alias("g")
+          .select(ds("g.`(_1|_2)`"))
+          .select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
+        ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
+
+      checkDataset(
+        ds.select(ds("`(_1)?+.+`"))
+          .select(expr("_2").as[Int]),
+        1, 2, 3)
+
+      checkDataset(
+        ds.alias("g")
+          .select(ds("g.`(_1)?+.+`"))
+          .select(expr("_2").as[Int]),
+        1, 2, 3)
+
+      checkDataset(
+        ds.select(expr("`(_1)?+.+`").as[Int]),
+        1, 2, 3)
+      val m = ds.select(expr("`(_1|_2)`"))
+
+      checkDataset(
+        ds.select(expr("`(_1|_2)`"))
+          .select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
+        ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
+
+      checkDataset(
+        ds.alias("g")
+          .select(expr("g.`(_1)?+.+`").as[Int]),
+        1, 2, 3)
+
+      checkDataset(
+        ds.alias("g")
+          .select(expr("g.`(_1|_2)`"))
+          .select(expr("named_struct('a', _1, 'b', _2)").as[ClassData]),
+        ClassData("a", 1), ClassData("b", 2), ClassData("c", 3))
+    }
+  }
+
   test("filter") {
     val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
     checkDataset(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 472ff7385b19..c78ec6d9a89f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1219,7 +1219,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
   test("SPARK-3483 Special chars in column names") {
     val data = Seq("""{"key?number1": "value1", "key.number2": "value2"}""").toDS()
     spark.read.json(data).createOrReplaceTempView("records")
-    sql("SELECT `key?number1`, `key.number2` FROM records")
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      sql("SELECT `key?number1`, `key.number2` FROM records")
+    }
   }
 
   test("SPARK-3814 Support Bitwise & operator") {
@@ -1339,7 +1341,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       .json(Seq("""{"a": {"c.b": 1}, "b.$q": [{"a@!.q": 1}], "q.w": {"w.i&": [1]}}""").toDS())
       .createOrReplaceTempView("t")
 
-    checkAnswer(sql("SELECT a.`c.b`, `b.$q`[0].`a@!.q`, `q.w`.`w.i&`[0] FROM t"), Row(1, 1, 1))
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      checkAnswer(sql("SELECT a.`c.b`, `b.$q`[0].`a@!.q`, `q.w`.`w.i&`[0] FROM t"), Row(1, 1, 1))
+    }
   }
 
   test("SPARK-6583 order by aggregated function") {
@@ -1835,25 +1839,28 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     }
 
     // Create paths with unusual characters
-    val specialCharacterPath = sql(
-      """
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      val specialCharacterPath = sql(
+        """
         | SELECT struct(`col$.a_`, `a.b.c.`) as `r&&b.c` FROM
         |   (SELECT struct(a, b) as `col$.a_`, struct(b, a) as `a.b.c.` FROM testData2) tmp
       """.stripMargin)
-    withTempView("specialCharacterTable") {
-      specialCharacterPath.createOrReplaceTempView("specialCharacterTable")
-      checkAnswer(
-        specialCharacterPath.select($"`r&&b.c`.*"),
-        nestedStructData.select($"record.*"))
-      checkAnswer(
-        sql("SELECT `r&&b.c`.`col$.a_` FROM specialCharacterTable"),
+      withTempView("specialCharacterTable") {
+        specialCharacterPath.createOrReplaceTempView("specialCharacterTable")
+        checkAnswer(
+          specialCharacterPath.select($"`r&&b.c`.*"),
+          nestedStructData.select($"record.*"))
+        checkAnswer(
+        sql(
+          "SELECT `r&&b.c`.`col$.a_` FROM specialCharacterTable"),
         nestedStructData.select($"record.r1"))
-      checkAnswer(
-        sql("SELECT `r&&b.c`.`a.b.c.` FROM specialCharacterTable"),
-        nestedStructData.select($"record.r2"))
-      checkAnswer(
-        sql("SELECT `r&&b.c`.`col$.a_`.* FROM specialCharacterTable"),
-        nestedStructData.select($"record.r1.*"))
+        checkAnswer(
+          sql("SELECT `r&&b.c`.`a.b.c.` FROM specialCharacterTable"),
+          nestedStructData.select($"record.r2"))
+        checkAnswer(
+          sql("SELECT `r&&b.c`.`col$.a_`.* FROM specialCharacterTable"),
+          nestedStructData.select($"record.r1.*"))
+      }
     }
 
     // Try star expanding a scalar. This should fail.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 704823ad516c..1cde137edbb9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -937,14 +937,16 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       Row(Map("e" -> null)) :: Nil
     )
 
-    checkAnswer(
-      sql("select `map`['c'] from jsonWithSimpleMap"),
-      Row(null) ::
-      Row(null) ::
-      Row(3) ::
-      Row(1) ::
-      Row(null) :: Nil
-    )
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      checkAnswer(
+        sql("select `map`['c'] from jsonWithSimpleMap"),
+        Row(null) ::
+        Row(null) ::
+        Row(3) ::
+        Row(1) ::
+        Row(null) :: Nil
+      )
+    }
 
     val innerStruct = StructType(
       StructField("field1", ArrayType(IntegerType, true), true) ::
@@ -966,15 +968,17 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       Row(Map("f" -> Row(null, null))) :: Nil
     )
 
-    checkAnswer(
-      sql("select `map`['a'].field1, `map`['c'].field2 from jsonWithComplexMap"),
-      Row(Seq(1, 2, 3, null), null) ::
-      Row(null, null) ::
-      Row(null, 4) ::
-      Row(null, 3) ::
-      Row(null, null) ::
-      Row(null, null) :: Nil
-    )
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+      checkAnswer(
+        sql("select `map`['a'].field1, `map`['c'].field2 from jsonWithComplexMap"),
+        Row(Seq(1, 2, 3, null), null) ::
+        Row(null, null) ::
+        Row(null, 4) ::
+        Row(null, 3) ::
+        Row(null, null) ::
+        Row(null, null) :: Nil
+      )
+    }
   }
 
   test("SPARK-2096 Correctly parse dot notations") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 98427cfe3031..c43c1ec8b9a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -544,7 +544,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
 
     Seq(true, false).foreach { vectorized =>
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString,
-          SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> true.toString) {
+          SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> true.toString,
+          SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
         withTempPath { path =>
           Seq(Some(1), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
           val readBack = spark.read.parquet(path.getAbsolutePath).where("`col.dots` IS NOT NULL")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 80868fff897f..70338670c9f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -20,14 +20,17 @@ package org.apache.spark.sql.sources
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] abstract class DataSourceTest extends QueryTest {
 
-  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row]) {
+  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row], enableRegex: Boolean = false) {
     test(sqlString) {
-      checkAnswer(spark.sql(sqlString), expectedAnswer)
+      withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> enableRegex.toString) {
+        checkAnswer(spark.sql(sqlString), expectedAnswer)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index b01d15eb917e..65474a52dd0b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -247,32 +248,34 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
 
     assert(expectedSchema == spark.table("tableWithSchema").schema)
 
-    checkAnswer(
-      sql(
-        """SELECT
-          | `string$%Field`,
-          | cast(binaryField as string),
-          | booleanField,
-          | byteField,
-          | shortField,
-          | int_Field,
-          | `longField_:,<>=+/~^`,
-          | floatField,
-          | doubleField,
-          | decimalField1,
-          | decimalField2,
-          | dateField,
-          | timestampField,
-          | varcharField,
-          | charField,
-          | arrayFieldSimple,
-          | arrayFieldComplex,
-          | mapFieldSimple,
-          | mapFieldComplex,
-          | structFieldSimple,
-          | structFieldComplex FROM tableWithSchema""".stripMargin),
-      tableWithSchemaExpected
-    )
+    withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
+        checkAnswer(
+          sql(
+            """SELECT
+            | `string$%Field`,
+            | cast(binaryField as string),
+            | booleanField,
+            | byteField,
+            | shortField,
+            | int_Field,
+            | `longField_:,<>=+/~^`,
+            | floatField,
+            | doubleField,
+            | decimalField1,
+            | decimalField2,
+            | dateField,
+            | timestampField,
+            | varcharField,
+            | charField,
+            | arrayFieldSimple,
+            | arrayFieldComplex,
+            | mapFieldSimple,
+            | mapFieldComplex,
+            | structFieldSimple,
+            | structFieldComplex FROM tableWithSchema""".stripMargin),
+        tableWithSchemaExpected
+      )
+    }
   }
 
   sqlTest(

From 24367f23f77349a864da340573e39ab2168c5403 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Tue, 11 Jul 2017 23:02:20 -0700
Subject: [PATCH 108/125] [SPARK-21382] The note about Scala 2.10 in
 building-spark.md is wrong.

[https://issues.apache.org/jira/browse/SPARK-21382](https://issues.apache.org/jira/browse/SPARK-21382)
There should be "Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.3.0",right?

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18606 from liu-zhaokun/new07120923.
---
 docs/building-spark.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 777635a64f83..815843c54ad7 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -97,7 +97,7 @@ To produce a Spark package compiled with Scala 2.10, use the `-Dscala-2.10` prop
     ./dev/change-scala-version.sh 2.10
     ./build/mvn -Pyarn -Dscala-2.10 -DskipTests clean package
 
-Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.2.0.
+Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.3.0.
 
 ## Building submodules individually
 

From e16e8c7ad31762aaca5e2bc874de1540af9cc4b7 Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Wed, 12 Jul 2017 00:14:58 -0700
Subject: [PATCH 109/125] [SPARK-21146][CORE] Master/Worker should handle and
 shutdown when any thread gets UncaughtException

## What changes were proposed in this pull request?

Adding the default UncaughtExceptionHandler to the Worker.

## How was this patch tested?

I verified it manually, when any of the worker thread gets uncaught exceptions then the default UncaughtExceptionHandler will handle those exceptions.

Author: Devaraj K <devaraj@apache.org>

Closes #18357 from devaraj-kavali/SPARK-21146.
---
 .../scala/org/apache/spark/deploy/master/Master.scala |  4 +++-
 .../scala/org/apache/spark/deploy/worker/Worker.scala |  4 +++-
 .../scala/org/apache/spark/executor/Executor.scala    |  2 +-
 .../spark/util/SparkUncaughtExceptionHandler.scala    | 11 ++++++-----
 core/src/main/scala/org/apache/spark/util/Utils.scala |  4 +++-
 .../spark/deploy/mesos/MesosClusterDispatcher.scala   |  2 +-
 6 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 0dee25fb2ebe..4cc580eb75ca 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -36,7 +36,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
 
 private[deploy] class Master(
     override val rpcEnv: RpcEnv,
@@ -1045,6 +1045,8 @@ private[deploy] object Master extends Logging {
   val ENDPOINT_NAME = "Master"
 
   def main(argStrings: Array[String]) {
+    Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
+      exitOnUncaughtException = false))
     Utils.initDaemon(log)
     val conf = new SparkConf
     val args = new MasterArguments(argStrings, conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index bed47455680d..f6d3876e3bbf 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -38,7 +38,7 @@ import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rpc._
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
 
 private[deploy] class Worker(
     override val rpcEnv: RpcEnv,
@@ -737,6 +737,8 @@ private[deploy] object Worker extends Logging {
   val ENDPOINT_NAME = "Worker"
 
   def main(argStrings: Array[String]) {
+    Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
+      exitOnUncaughtException = false))
     Utils.initDaemon(log)
     val conf = new SparkConf
     val args = new WorkerArguments(argStrings, conf)
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 19e7eb086f41..21f0db103918 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -56,7 +56,7 @@ private[spark] class Executor(
     env: SparkEnv,
     userClassPath: Seq[URL] = Nil,
     isLocal: Boolean = false,
-    uncaughtExceptionHandler: UncaughtExceptionHandler = SparkUncaughtExceptionHandler)
+    uncaughtExceptionHandler: UncaughtExceptionHandler = new SparkUncaughtExceptionHandler)
   extends Logging {
 
   logInfo(s"Starting executor ID $executorId on host $executorHostname")
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index 95bf3f58bc77..e0f5af5250e7 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -20,11 +20,12 @@ package org.apache.spark.util
 import org.apache.spark.internal.Logging
 
 /**
- * The default uncaught exception handler for Executors terminates the whole process, to avoid
- * getting into a bad state indefinitely. Since Executors are relatively lightweight, it's better
- * to fail fast when things go wrong.
+ * The default uncaught exception handler for Spark daemons. It terminates the whole process for
+ * any Errors, and also terminates the process for Exceptions when the exitOnException flag is true.
+ *
+ * @param exitOnUncaughtException Whether to exit the process on UncaughtException.
  */
-private[spark] object SparkUncaughtExceptionHandler
+private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException: Boolean = true)
   extends Thread.UncaughtExceptionHandler with Logging {
 
   override def uncaughtException(thread: Thread, exception: Throwable) {
@@ -40,7 +41,7 @@ private[spark] object SparkUncaughtExceptionHandler
       if (!ShutdownHookManager.inShutdown()) {
         if (exception.isInstanceOf[OutOfMemoryError]) {
           System.exit(SparkExitCode.OOM)
-        } else {
+        } else if (exitOnUncaughtException) {
           System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b4caf68f0afa..584337a71cb4 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -76,6 +76,8 @@ private[spark] object CallSite {
 private[spark] object Utils extends Logging {
   val random = new Random()
 
+  private val sparkUncaughtExceptionHandler = new SparkUncaughtExceptionHandler
+
   /**
    * Define a default value for driver memory here since this value is referenced across the code
    * base and nearly all files already use Utils.scala
@@ -1274,7 +1276,7 @@ private[spark] object Utils extends Logging {
       block
     } catch {
       case e: ControlThrowable => throw e
-      case t: Throwable => SparkUncaughtExceptionHandler.uncaughtException(t)
+      case t: Throwable => sparkUncaughtExceptionHandler.uncaughtException(t)
     }
   }
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index 38b082ac0119..aa378c9d340f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -97,7 +97,7 @@ private[mesos] object MesosClusterDispatcher
   with CommandLineUtils {
 
   override def main(args: Array[String]) {
-    Thread.setDefaultUncaughtExceptionHandler(SparkUncaughtExceptionHandler)
+    Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler)
     Utils.initDaemon(log)
     val conf = new SparkConf
     val dispatcherArgs = new MesosClusterDispatcherArguments(args, conf)

From e0af76a36a67d409776bd379c6d6ef6d60356c06 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 12 Jul 2017 00:39:09 -0700
Subject: [PATCH 110/125] [SPARK-21370][SS] Add test for state reliability when
 one read-only state store aborts after read-write state store commits

## What changes were proposed in this pull request?

During Streaming Aggregation, we have two StateStores per task, one used as read-only in
`StateStoreRestoreExec`, and one read-write used in `StateStoreSaveExec`. `StateStore.abort`
will be called for these StateStores if they haven't committed their results. We need to
make sure that `abort` in read-only store after a `commit` in the read-write store doesn't
accidentally lead to the deletion of state.

This PR adds a test for this condition.

## How was this patch tested?

This PR adds a test.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #18603 from brkyvz/ss-test.
---
 .../streaming/state/StateStoreSuite.scala     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index c2087ec219e5..7cb86dc14384 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -665,6 +665,37 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     checkInvalidVersion(3)
   }
 
+  test("two concurrent StateStores - one for read-only and one for read-write") {
+    // During Streaming Aggregation, we have two StateStores per task, one used as read-only in
+    // `StateStoreRestoreExec`, and one read-write used in `StateStoreSaveExec`. `StateStore.abort`
+    // will be called for these StateStores if they haven't committed their results. We need to
+    // make sure that `abort` in read-only store after a `commit` in the read-write store doesn't
+    // accidentally lead to the deletion of state.
+    val dir = newDir()
+    val storeId = StateStoreId(dir, 0L, 1)
+    val provider0 = newStoreProvider(storeId)
+    // prime state
+    val store = provider0.getStore(0)
+    val key = "a"
+    put(store, key, 1)
+    store.commit()
+    assert(rowsToSet(store.iterator()) === Set(key -> 1))
+
+    // two state stores
+    val provider1 = newStoreProvider(storeId)
+    val restoreStore = provider1.getStore(1)
+    val saveStore = provider1.getStore(1)
+
+    put(saveStore, key, get(restoreStore, key).get + 1)
+    saveStore.commit()
+    restoreStore.abort()
+
+    // check that state is correct for next batch
+    val provider2 = newStoreProvider(storeId)
+    val finalStore = provider2.getStore(2)
+    assert(rowsToSet(finalStore.iterator()) === Set(key -> 2))
+  }
+
   /** Return a new provider with a random id */
   def newStoreProvider(): ProviderClass
 

From f587d2e3fa133051a64e4ec1aa788b554b552690 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Wed, 12 Jul 2017 15:48:44 +0800
Subject: [PATCH 111/125] [SPARK-20842][SQL] Upgrade to 1.2.2 for Hive
 Metastore Client 1.2

### What changes were proposed in this pull request?
Hive 1.2.2 release is available. Below is the list of bugs fixed in 1.2.2

https://issues.apache.org/jira/secure/ReleaseNote.jspa?version=12332952&styleName=Text&projectId=12310843

### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18063 from gatorsmile/upgradeHiveClientTo1.2.2.
---
 .../org/apache/spark/sql/hive/client/IsolatedClientLoader.scala | 2 +-
 .../main/scala/org/apache/spark/sql/hive/client/package.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index b8aa067cdb90..930f0dd4b32b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -92,7 +92,7 @@ private[hive] object IsolatedClientLoader extends Logging {
     case "14" | "0.14" | "0.14.0" => hive.v14
     case "1.0" | "1.0.0" => hive.v1_0
     case "1.1" | "1.1.0" => hive.v1_1
-    case "1.2" | "1.2.0" | "1.2.1" => hive.v1_2
+    case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2
     case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
     case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index f9635e36549e..c14154a3b3c2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -56,7 +56,7 @@ package object client {
         "net.hydromatic:linq4j",
         "net.hydromatic:quidem"))
 
-    case object v1_2 extends HiveVersion("1.2.1",
+    case object v1_2 extends HiveVersion("1.2.2",
       exclusions = Seq("eigenbase:eigenbase-properties",
         "org.apache.curator:*",
         "org.pentaho:pentaho-aggdesigner-algorithm",

From 5ed134ee213060882c6e3ed713473fa6cc158d36 Mon Sep 17 00:00:00 2001
From: Peng Meng <peng.meng@intel.com>
Date: Wed, 12 Jul 2017 11:02:04 +0100
Subject: [PATCH 112/125] [SPARK-21305][ML][MLLIB] Add options to disable
 multi-threading of native BLAS

## What changes were proposed in this pull request?

Many ML/MLLIB algorithms use native BLAS (like Intel MKL, ATLAS, OpenBLAS) to improvement the performance.
Many popular Native BLAS, like Intel MKL, OpenBLAS, use multi-threading technology, which will conflict with Spark.  Spark should provide options to disable multi-threading of Native BLAS.

https://github.com/xianyi/OpenBLAS/wiki/faq#multi-threaded
https://software.intel.com/en-us/articles/recommended-settings-for-calling-intel-mkl-routines-from-multi-threaded-applications

## How was this patch tested?
The existing UT.

Author: Peng Meng <peng.meng@intel.com>

Closes #18551 from mpjlu/optimzeBLAS.
---
 conf/spark-env.sh.template | 4 ++++
 docs/ml-guide.md           | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b9aab5a3712c..1663019ee575 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -61,3 +61,7 @@
 # - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
 # - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
 # - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index fb4621389ab9..adb1c9aaefcd 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -61,6 +61,12 @@ To configure `netlib-java` / Breeze to use system optimised binaries, include
 project and read the [netlib-java](https://github.com/fommil/netlib-java) documentation for your
 platform's additional installation instructions.
 
+The most popular native BLAS such as [Intel MKL](https://software.intel.com/en-us/mkl), [OpenBLAS](http://www.openblas.net), can use multiple threads in a single operation, which can conflict with Spark's execution model.
+
+Configuring these BLAS implementations to use a single thread for operations may actually improve performance (see [SPARK-21305](https://issues.apache.org/jira/browse/SPARK-21305)). It is usually optimal to match this to the number of cores each Spark task is configured to use, which is 1 by default and typically left at 1.
+
+Please refer to resources like the following to understand how to configure the number of threads these BLAS implementations use: [Intel MKL](https://software.intel.com/en-us/articles/recommended-settings-for-calling-intel-mkl-routines-from-multi-threaded-applications) and [OpenBLAS](https://github.com/xianyi/OpenBLAS/wiki/faq#multi-threaded).
+
 To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 or newer.
 
 [^1]: To learn more about the benefits and background of system optimised natives, you may wish to

From aaad34dc2f537f7eef50fc5f72a7f178800e8d38 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Wed, 12 Jul 2017 18:51:19 +0800
Subject: [PATCH 113/125] [SPARK-21007][SQL] Add SQL function - RIGHT && LEFT

## What changes were proposed in this pull request?
 Add  SQL function - RIGHT && LEFT, same as MySQL:
https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_left
https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_right

## How was this patch tested?
unit test

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18228 from 10110346/lx-wip-0607.
---
 .../catalyst/analysis/FunctionRegistry.scala  |  2 +
 .../expressions/stringExpressions.scala       | 43 +++++++++++++++++++
 .../sql-tests/inputs/string-functions.sql     |  6 +++
 .../results/string-functions.sql.out          | 34 ++++++++++++++-
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f4b3e86052d8..10b22ae562bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -346,6 +346,8 @@ object FunctionRegistry {
     expression[StringSplit]("split"),
     expression[Substring]("substr"),
     expression[Substring]("substring"),
+    expression[Left]("left"),
+    expression[Right]("right"),
     expression[SubstringIndex]("substring_index"),
     expression[StringTranslate]("translate"),
     expression[StringTrim]("trim"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 83fdcfce9c3b..d75b9d628032 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1198,6 +1198,49 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
   }
 }
 
+/**
+ * Returns the rightmost n characters from the string.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(str, len) - Returns the rightmost `len`(`len` can be string type) characters from the string `str`,if `len` is less or equal than 0 the result is an empty string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL', 3);
+       SQL
+  """)
+// scalastyle:on line.size.limit
+case class Right(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
+  def this(str: Expression, len: Expression) = {
+    this(str, len, If(IsNull(str), Literal(null, StringType), If(LessThanOrEqual(len, Literal(0)),
+      Literal(UTF8String.EMPTY_UTF8, StringType), new Substring(str, UnaryMinus(len)))))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(str, len)
+  override def sql: String = s"$prettyName(${str.sql}, ${len.sql})"
+}
+
+/**
+ * Returns the leftmost n characters from the string.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(str, len) - Returns the leftmost `len`(`len` can be string type) characters from the string `str`,if `len` is less or equal than 0 the result is an empty string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL', 3);
+       Spa
+  """)
+// scalastyle:on line.size.limit
+case class Left(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
+  def this(str: Expression, len: Expression) = {
+    this(str, len, Substring(str, Literal(1), len))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(str, len)
+  override def sql: String = s"$prettyName(${str.sql}, ${len.sql})"
+}
+
 /**
  * A function that returns the char length of the given string expression or
  * number of bytes of the given binary expression.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index c95f4817b7ce..40d0c064f5c4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -18,3 +18,9 @@ select length(uuid()), (uuid() <> uuid());
 
 -- position
 select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aaads', null);
+
+-- left && right
+select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null);
+select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a');
+select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null);
+select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a');
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index b0ae9d775d96..2d9b3d7d2ca3 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
+-- Number of queries: 12
 
 
 -- !query 0
@@ -86,3 +86,35 @@ select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aa
 struct<locate(bar, foobarbar, 1):int,locate(CAST(NULL AS STRING), foobarbar, 1):int,locate(aaads, CAST(NULL AS STRING), 1):int>
 -- !query 7 output
 4	NULL	NULL
+
+
+-- !query 8
+select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null)
+-- !query 8 schema
+struct<left('abcd', 2):string,left('abcd', 5):string,left('abcd', '2'):string,left('abcd', NULL):string>
+-- !query 8 output
+ab	abcd	ab	NULL
+
+
+-- !query 9
+select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a')
+-- !query 9 schema
+struct<left(NULL, -2):string,left('abcd', -2):string,left('abcd', 0):string,left('abcd', 'a'):string>
+-- !query 9 output
+NULL			NULL
+
+
+-- !query 10
+select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null)
+-- !query 10 schema
+struct<right('abcd', 2):string,right('abcd', 5):string,right('abcd', '2'):string,right('abcd', NULL):string>
+-- !query 10 output
+cd	abcd	cd	NULL
+
+
+-- !query 11
+select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a')
+-- !query 11 schema
+struct<right(NULL, -2):string,right('abcd', -2):string,right('abcd', 0):string,right('abcd', 'a'):string>
+-- !query 11 output
+NULL			NULL

From d2d2a5de186ddf381d0bdb353b23d64ff0224e7f Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Wed, 12 Jul 2017 22:09:03 +0800
Subject: [PATCH 114/125] [SPARK-18619][ML] Make
 QuantileDiscretizer/Bucketizer/StringIndexer/RFormula inherit from
 HasHandleInvalid

## What changes were proposed in this pull request?
1, HasHandleInvaild support override
2, Make QuantileDiscretizer/Bucketizer/StringIndexer/RFormula inherit from HasHandleInvalid

## How was this patch tested?
existing tests

[JIRA](https://issues.apache.org/jira/browse/SPARK-18619)

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #18582 from zhengruifeng/heritate_HasHandleInvalid.
---
 .../apache/spark/ml/feature/Bucketizer.scala  | 14 ++---
 .../ml/feature/QuantileDiscretizer.scala      | 13 ++--
 .../apache/spark/ml/feature/RFormula.scala    | 13 ++--
 .../spark/ml/feature/StringIndexer.scala      | 13 ++--
 .../ml/param/shared/SharedParamsCodeGen.scala |  2 +-
 .../spark/ml/param/shared/sharedParams.scala  |  2 +-
 .../GeneralizedLinearRegression.scala         |  2 +-
 .../ml/regression/LinearRegression.scala      | 14 ++---
 python/pyspark/ml/feature.py                  | 60 ++++++++-----------
 9 files changed, 53 insertions(+), 80 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 46b512f8aea7..6a11a75d1d56 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Model
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.expressions.UserDefinedFunction
@@ -36,7 +36,8 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
  */
 @Since("1.4.0")
 final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Model[Bucketizer] with HasInputCol with HasOutputCol with DefaultParamsWritable {
+  extends Model[Bucketizer] with HasHandleInvalid with HasInputCol with HasOutputCol
+    with DefaultParamsWritable {
 
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("bucketizer"))
@@ -84,17 +85,12 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
    * Default: "error"
    * @group param
    */
-  // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " +
-    "invalid entries. Options are skip (filter out rows with invalid values), " +
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "how to handle invalid entries. Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
     ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
 
-  /** @group getParam */
-  @Since("2.1.0")
-  def getHandleInvalid: String = $(handleInvalid)
-
   /** @group setParam */
   @Since("2.1.0")
   def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index feceeba866df..95e8830283de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.types.StructType
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.StructType
  * Params for [[QuantileDiscretizer]].
  */
 private[feature] trait QuantileDiscretizerBase extends Params
-  with HasInputCol with HasOutputCol {
+  with HasHandleInvalid with HasInputCol with HasOutputCol {
 
   /**
    * Number of buckets (quantiles, or categories) into which data points are grouped. Must
@@ -72,18 +72,13 @@ private[feature] trait QuantileDiscretizerBase extends Params
    * Default: "error"
    * @group param
    */
-  // TODO: SPARK-18619 Make QuantileDiscretizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " +
-    "invalid entries. Options are skip (filter out rows with invalid values), " +
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "how to handle invalid entries. Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
     ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
   setDefault(handleInvalid, Bucketizer.ERROR_INVALID)
 
-  /** @group getParam */
-  @Since("2.1.0")
-  def getHandleInvalid: String = $(handleInvalid)
-
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 61aa6463bb6d..bb7acaf118ea 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -27,7 +27,7 @@ import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineS
 import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators}
-import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol}
+import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasHandleInvalid, HasLabelCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.types._
@@ -108,7 +108,8 @@ private[feature] trait RFormulaBase extends HasFeaturesCol with HasLabelCol {
 @Experimental
 @Since("1.5.0")
 class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
-  extends Estimator[RFormulaModel] with RFormulaBase with DefaultParamsWritable {
+  extends Estimator[RFormulaModel] with RFormulaBase with HasHandleInvalid
+    with DefaultParamsWritable {
 
   @Since("1.5.0")
   def this() = this(Identifiable.randomUID("rFormula"))
@@ -141,8 +142,8 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
    * @group param
    */
   @Since("2.3.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "How to handle " +
-    "invalid data (unseen labels or NULL values). " +
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "How to handle invalid data (unseen labels or NULL values). " +
     "Options are 'skip' (filter out rows with invalid data), error (throw an error), " +
     "or 'keep' (put invalid data in a special additional bucket, at index numLabels).",
     ParamValidators.inArray(StringIndexer.supportedHandleInvalids))
@@ -152,10 +153,6 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   @Since("2.3.0")
   def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
 
-  /** @group getParam */
-  @Since("2.3.0")
-  def getHandleInvalid: String = $(handleInvalid)
-
   /** @group setParam */
   @Since("1.5.0")
   def setFeaturesCol(value: String): this.type = set(featuresCol, value)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index dfc902bd0b0f..2679ec310c47 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -26,7 +26,7 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model, Transformer}
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
@@ -36,7 +36,8 @@ import org.apache.spark.util.collection.OpenHashMap
 /**
  * Base trait for [[StringIndexer]] and [[StringIndexerModel]].
  */
-private[feature] trait StringIndexerBase extends Params with HasInputCol with HasOutputCol {
+private[feature] trait StringIndexerBase extends Params with HasHandleInvalid with HasInputCol
+  with HasOutputCol {
 
   /**
    * Param for how to handle invalid data (unseen labels or NULL values).
@@ -47,18 +48,14 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
    * @group param
    */
   @Since("1.6.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "How to handle " +
-    "invalid data (unseen labels or NULL values). " +
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "How to handle invalid data (unseen labels or NULL values). " +
     "Options are 'skip' (filter out rows with invalid data), error (throw an error), " +
     "or 'keep' (put invalid data in a special additional bucket, at index numLabels).",
     ParamValidators.inArray(StringIndexer.supportedHandleInvalids))
 
   setDefault(handleInvalid, StringIndexer.ERROR_INVALID)
 
-  /** @group getParam */
-  @Since("1.6.0")
-  def getHandleInvalid: String = $(handleInvalid)
-
   /**
    * Param for how to order labels of string column. The first label after ordering is assigned
    * an index of 0.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 23e0d45d943a..fd9b20ed9364 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -67,7 +67,7 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[String]("handleInvalid", "how to handle invalid entries. Options are skip (which " +
         "will filter out rows with bad values), or error (which will throw an error). More " +
         "options may be added later",
-        isValid = "ParamValidators.inArray(Array(\"skip\", \"error\"))"),
+        isValid = "ParamValidators.inArray(Array(\"skip\", \"error\"))", finalFields = false),
       ParamDesc[Boolean]("standardization", "whether to standardize the training features" +
         " before fitting the model", Some("true")),
       ParamDesc[Long]("seed", "random seed", Some("this.getClass.getName.hashCode.toLong")),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 1a8f499798b8..a29b45c3ec66 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -273,7 +273,7 @@ private[ml] trait HasHandleInvalid extends Params {
    * Param for how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.
    * @group param
    */
-  final val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later", ParamValidators.inArray(Array("skip", "error")))
+  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later", ParamValidators.inArray(Array("skip", "error")))
 
   /** @group getParam */
   final def getHandleInvalid: String = $(handleInvalid)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index c600b87bdc64..815607f0a76d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -171,7 +171,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
    *
    * @group param
    */
-  @Since("2.3.0")
+  @Since("2.0.0")
   final override val solver: Param[String] = new Param[String](this, "solver",
     "The solver algorithm for optimization. Supported options: " +
       s"${supportedSolvers.mkString(", ")}. (Default irls)",
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index ce5e0797915d..91cd229704a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -64,7 +64,7 @@ private[regression] trait LinearRegressionParams extends PredictorParams
    *
    * @group param
    */
-  @Since("2.3.0")
+  @Since("1.6.0")
   final override val solver: Param[String] = new Param[String](this, "solver",
     "The solver algorithm for optimization. Supported options: " +
       s"${supportedSolvers.mkString(", ")}. (Default auto)",
@@ -194,7 +194,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    */
   @Since("1.6.0")
   def setSolver(value: String): this.type = set(solver, value)
-  setDefault(solver -> AUTO)
+  setDefault(solver -> Auto)
 
   /**
    * Suggested depth for treeAggregate (greater than or equal to 2).
@@ -224,8 +224,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth)
     instr.logNumFeatures(numFeatures)
 
-    if (($(solver) == AUTO &&
-      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == NORMAL) {
+    if (($(solver) == Auto &&
+      numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal) {
       // For low dimensional data, WeightedLeastSquares is more efficient since the
       // training algorithm only requires one pass through the data. (SPARK-10668)
 
@@ -460,16 +460,16 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
   val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES
 
   /** String name for "auto". */
-  private[regression] val AUTO = "auto"
+  private[regression] val Auto = "auto"
 
   /** String name for "normal". */
-  private[regression] val NORMAL = "normal"
+  private[regression] val Normal = "normal"
 
   /** String name for "l-bfgs". */
   private[regression] val LBFGS = "l-bfgs"
 
   /** Set of solvers that LinearRegression supports. */
-  private[regression] val supportedSolvers = Array(AUTO, NORMAL, LBFGS)
+  private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS)
 }
 
 /**
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 25ad06f682ed..7eb1b9fac2f5 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -314,7 +314,8 @@ class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable)
 
 
 @inherit_doc
-class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
+                 JavaMLReadable, JavaMLWritable):
     """
     Maps a column of continuous features to a column of feature buckets.
 
@@ -398,20 +399,6 @@ def getSplits(self):
         """
         return self.getOrDefault(self.splits)
 
-    @since("2.1.0")
-    def setHandleInvalid(self, value):
-        """
-        Sets the value of :py:attr:`handleInvalid`.
-        """
-        return self._set(handleInvalid=value)
-
-    @since("2.1.0")
-    def getHandleInvalid(self):
-        """
-        Gets the value of :py:attr:`handleInvalid` or its default value.
-        """
-        return self.getOrDefault(self.handleInvalid)
-
 
 @inherit_doc
 class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
@@ -1623,7 +1610,8 @@ def getDegree(self):
 
 
 @inherit_doc
-class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
+                          JavaMLReadable, JavaMLWritable):
     """
     .. note:: Experimental
 
@@ -1743,20 +1731,6 @@ def getRelativeError(self):
         """
         return self.getOrDefault(self.relativeError)
 
-    @since("2.1.0")
-    def setHandleInvalid(self, value):
-        """
-        Sets the value of :py:attr:`handleInvalid`.
-        """
-        return self._set(handleInvalid=value)
-
-    @since("2.1.0")
-    def getHandleInvalid(self):
-        """
-        Gets the value of :py:attr:`handleInvalid` or its default value.
-        """
-        return self.getOrDefault(self.handleInvalid)
-
     def _create_model(self, java_model):
         """
         Private method to convert the java_model to a Python model.
@@ -2977,7 +2951,8 @@ def explainedVariance(self):
 
 
 @inherit_doc
-class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaMLWritable):
+class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
+               JavaMLReadable, JavaMLWritable):
     """
     .. note:: Experimental
 
@@ -3020,6 +2995,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM
     True
     >>> loadedRF.getLabelCol() == rf.getLabelCol()
     True
+    >>> loadedRF.getHandleInvalid() == rf.getHandleInvalid()
+    True
     >>> str(loadedRF)
     'RFormula(y ~ x + s) (uid=...)'
     >>> modelPath = temp_path + "/rFormulaModel"
@@ -3058,26 +3035,37 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, JavaMLReadable, JavaM
                                    "RFormula drops the same category as R when encoding strings.",
                                    typeConverter=TypeConverters.toString)
 
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are 'skip' (filter out rows with invalid values), " +
+                          "'error' (throw an error), or 'keep' (put invalid data in a special " +
+                          "additional bucket, at index numLabels).",
+                          typeConverter=TypeConverters.toString)
+
     @keyword_only
     def __init__(self, formula=None, featuresCol="features", labelCol="label",
-                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc"):
+                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                 handleInvalid="error"):
         """
         __init__(self, formula=None, featuresCol="features", labelCol="label", \
-                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc")
+                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
+                 handleInvalid="error")
         """
         super(RFormula, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid)
-        self._setDefault(forceIndexLabel=False, stringIndexerOrderType="frequencyDesc")
+        self._setDefault(forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                         handleInvalid="error")
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.5.0")
     def setParams(self, formula=None, featuresCol="features", labelCol="label",
-                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc"):
+                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                  handleInvalid="error"):
         """
         setParams(self, formula=None, featuresCol="features", labelCol="label", \
-                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc")
+                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
+                  handleInvalid="error")
         Sets params for RFormula.
         """
         kwargs = self._input_kwargs

From 780586a9f2400c3fdfdb9a6b954001a3c9663941 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 12 Jul 2017 09:23:54 -0700
Subject: [PATCH 115/125] [SPARK-17701][SQL] Refactor RowDataSourceScanExec so
 its sameResult call does not compare strings

## What changes were proposed in this pull request?

Currently, `RowDataSourceScanExec` and `FileSourceScanExec` rely on a "metadata" string map to implement equality comparison, since the RDDs they depend on cannot be directly compared. This has resulted in a number of correctness bugs around exchange reuse, e.g. SPARK-17673 and SPARK-16818.

To make these comparisons less brittle, we should refactor these classes to compare constructor parameters directly instead of relying on the metadata map.

This PR refactors `RowDataSourceScanExec`, `FileSourceScanExec` will be fixed in the follow-up PR.

## How was this patch tested?

existing tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18600 from cloud-fan/minor.
---
 .../sql/execution/DataSourceScanExec.scala    | 65 +++++++++----------
 .../spark/sql/execution/SparkPlan.scala       |  5 --
 .../spark/sql/execution/SparkPlanInfo.scala   |  4 +-
 .../datasources/DataSourceStrategy.scala      | 57 ++++++----------
 .../sql/execution/ui/SparkPlanGraph.scala     |  5 +-
 5 files changed, 56 insertions(+), 80 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index a0def68d88e0..588c937a13e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -33,21 +33,23 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat => ParquetSource}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.sources.{BaseRelation, Filter}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
   val relation: BaseRelation
-  val metastoreTableIdentifier: Option[TableIdentifier]
+  val tableIdentifier: Option[TableIdentifier]
 
   protected val nodeNamePrefix: String = ""
 
   override val nodeName: String = {
-    s"Scan $relation ${metastoreTableIdentifier.map(_.unquotedString).getOrElse("")}"
+    s"Scan $relation ${tableIdentifier.map(_.unquotedString).getOrElse("")}"
   }
 
+  // Metadata that describes more details of this scan.
+  protected def metadata: Map[String, String]
+
   override def simpleString: String = {
     val metadataEntries = metadata.toSeq.sorted.map {
       case (key, value) =>
@@ -73,34 +75,25 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
 
 /** Physical plan node for scanning data from a relation. */
 case class RowDataSourceScanExec(
-    output: Seq[Attribute],
+    fullOutput: Seq[Attribute],
+    requiredColumnsIndex: Seq[Int],
+    filters: Set[Filter],
+    handledFilters: Set[Filter],
     rdd: RDD[InternalRow],
     @transient relation: BaseRelation,
-    override val outputPartitioning: Partitioning,
-    override val metadata: Map[String, String],
-    override val metastoreTableIdentifier: Option[TableIdentifier])
+    override val tableIdentifier: Option[TableIdentifier])
   extends DataSourceScanExec {
 
+  def output: Seq[Attribute] = requiredColumnsIndex.map(fullOutput)
+
   override lazy val metrics =
     Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
-  val outputUnsafeRows = relation match {
-    case r: HadoopFsRelation if r.fileFormat.isInstanceOf[ParquetSource] =>
-      !SparkSession.getActiveSession.get.sessionState.conf.getConf(
-        SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
-    case _: HadoopFsRelation => true
-    case _ => false
-  }
-
   protected override def doExecute(): RDD[InternalRow] = {
-    val unsafeRow = if (outputUnsafeRows) {
-      rdd
-    } else {
-      rdd.mapPartitionsWithIndexInternal { (index, iter) =>
-        val proj = UnsafeProjection.create(schema)
-        proj.initialize(index)
-        iter.map(proj)
-      }
+    val unsafeRow = rdd.mapPartitionsWithIndexInternal { (index, iter) =>
+      val proj = UnsafeProjection.create(schema)
+      proj.initialize(index)
+      iter.map(proj)
     }
 
     val numOutputRows = longMetric("numOutputRows")
@@ -126,24 +119,31 @@ case class RowDataSourceScanExec(
     ctx.INPUT_ROW = row
     ctx.currentVars = null
     val columnsRowInput = exprRows.map(_.genCode(ctx))
-    val inputRow = if (outputUnsafeRows) row else null
     s"""
        |while ($input.hasNext()) {
        |  InternalRow $row = (InternalRow) $input.next();
        |  $numOutputRows.add(1);
-       |  ${consume(ctx, columnsRowInput, inputRow).trim}
+       |  ${consume(ctx, columnsRowInput, null).trim}
        |  if (shouldStop()) return;
        |}
      """.stripMargin
   }
 
-  // Only care about `relation` and `metadata` when canonicalizing.
+  override val metadata: Map[String, String] = {
+    val markedFilters = for (filter <- filters) yield {
+      if (handledFilters.contains(filter)) s"*$filter" else s"$filter"
+    }
+    Map(
+      "ReadSchema" -> output.toStructType.catalogString,
+      "PushedFilters" -> markedFilters.mkString("[", ", ", "]"))
+  }
+
+  // Don't care about `rdd` and `tableIdentifier` when canonicalizing.
   override lazy val canonicalized: SparkPlan =
     copy(
-      output.map(QueryPlan.normalizeExprId(_, output)),
+      fullOutput.map(QueryPlan.normalizeExprId(_, fullOutput)),
       rdd = null,
-      outputPartitioning = null,
-      metastoreTableIdentifier = None)
+      tableIdentifier = None)
 }
 
 /**
@@ -154,7 +154,7 @@ case class RowDataSourceScanExec(
  * @param requiredSchema Required schema of the underlying relation, excluding partition columns.
  * @param partitionFilters Predicates to use for partition pruning.
  * @param dataFilters Filters on non-partition columns.
- * @param metastoreTableIdentifier identifier for the table in the metastore.
+ * @param tableIdentifier identifier for the table in the metastore.
  */
 case class FileSourceScanExec(
     @transient relation: HadoopFsRelation,
@@ -162,7 +162,7 @@ case class FileSourceScanExec(
     requiredSchema: StructType,
     partitionFilters: Seq[Expression],
     dataFilters: Seq[Expression],
-    override val metastoreTableIdentifier: Option[TableIdentifier])
+    override val tableIdentifier: Option[TableIdentifier])
   extends DataSourceScanExec with ColumnarBatchScan  {
 
   val supportsBatch: Boolean = relation.fileFormat.supportBatch(
@@ -261,7 +261,6 @@ case class FileSourceScanExec(
   private val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy.translateFilter)
   logInfo(s"Pushed Filters: ${pushedDownFilters.mkString(",")}")
 
-  // These metadata values make scan plans uniquely identifiable for equality checking.
   override val metadata: Map[String, String] = {
     def seqToString(seq: Seq[Any]) = seq.mkString("[", ", ", "]")
     val location = relation.location
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index db975614c961..c7277c21cebb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -71,11 +71,6 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     super.makeCopy(newArgs)
   }
 
-  /**
-   * @return Metadata that describes more details of this SparkPlan.
-   */
-  def metadata: Map[String, String] = Map.empty
-
   /**
    * @return All metrics containing metrics of this SparkPlan.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
index 7aa93126fdab..06b69625fb53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -31,7 +31,6 @@ class SparkPlanInfo(
     val nodeName: String,
     val simpleString: String,
     val children: Seq[SparkPlanInfo],
-    val metadata: Map[String, String],
     val metrics: Seq[SQLMetricInfo]) {
 
   override def hashCode(): Int = {
@@ -58,7 +57,6 @@ private[execution] object SparkPlanInfo {
       new SQLMetricInfo(metric.name.getOrElse(key), metric.id, metric.metricType)
     }
 
-    new SparkPlanInfo(plan.nodeName, plan.simpleString, children.map(fromSparkPlan),
-      plan.metadata, metrics)
+    new SparkPlanInfo(plan.nodeName, plan.simpleString, children.map(fromSparkPlan), metrics)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e05a8d5f02bd..587b9b450ea2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.concurrent.Callable
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
@@ -288,10 +286,11 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
     case l @ LogicalRelation(baseRelation: TableScan, _, _) =>
       RowDataSourceScanExec(
         l.output,
+        l.output.indices,
+        Set.empty,
+        Set.empty,
         toCatalystRDD(l, baseRelation.buildScan()),
         baseRelation,
-        UnknownPartitioning(0),
-        Map.empty,
         None) :: Nil
 
     case _ => Nil
@@ -354,36 +353,10 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
     val (unhandledPredicates, pushedFilters, handledFilters) =
       selectFilters(relation.relation, candidatePredicates)
 
-    // A set of column attributes that are only referenced by pushed down filters.  We can eliminate
-    // them from requested columns.
-    val handledSet = {
-      val handledPredicates = filterPredicates.filterNot(unhandledPredicates.contains)
-      val unhandledSet = AttributeSet(unhandledPredicates.flatMap(_.references))
-      AttributeSet(handledPredicates.flatMap(_.references)) --
-        (projectSet ++ unhandledSet).map(relation.attributeMap)
-    }
-
     // Combines all Catalyst filter `Expression`s that are either not convertible to data source
     // `Filter`s or cannot be handled by `relation`.
     val filterCondition = unhandledPredicates.reduceLeftOption(expressions.And)
 
-    // These metadata values make scan plans uniquely identifiable for equality checking.
-    // TODO(SPARK-17701) using strings for equality checking is brittle
-    val metadata: Map[String, String] = {
-      val pairs = ArrayBuffer.empty[(String, String)]
-
-      // Mark filters which are handled by the underlying DataSource with an Astrisk
-      if (pushedFilters.nonEmpty) {
-        val markedFilters = for (filter <- pushedFilters) yield {
-            if (handledFilters.contains(filter)) s"*$filter" else s"$filter"
-        }
-        pairs += ("PushedFilters" -> markedFilters.mkString("[", ", ", "]"))
-      }
-      pairs += ("ReadSchema" ->
-        StructType.fromAttributes(projects.map(_.toAttribute)).catalogString)
-      pairs.toMap
-    }
-
     if (projects.map(_.toAttribute) == projects &&
         projectSet.size == projects.size &&
         filterSet.subsetOf(projectSet)) {
@@ -395,24 +368,36 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
         .asInstanceOf[Seq[Attribute]]
         // Match original case of attributes.
         .map(relation.attributeMap)
-        // Don't request columns that are only referenced by pushed filters.
-        .filterNot(handledSet.contains)
 
       val scan = RowDataSourceScanExec(
-        projects.map(_.toAttribute),
+        relation.output,
+        requestedColumns.map(relation.output.indexOf),
+        pushedFilters.toSet,
+        handledFilters,
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
-        relation.relation, UnknownPartitioning(0), metadata,
+        relation.relation,
         relation.catalogTable.map(_.identifier))
       filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan)
     } else {
+      // A set of column attributes that are only referenced by pushed down filters.  We can
+      // eliminate them from requested columns.
+      val handledSet = {
+        val handledPredicates = filterPredicates.filterNot(unhandledPredicates.contains)
+        val unhandledSet = AttributeSet(unhandledPredicates.flatMap(_.references))
+        AttributeSet(handledPredicates.flatMap(_.references)) --
+          (projectSet ++ unhandledSet).map(relation.attributeMap)
+      }
       // Don't request columns that are only referenced by pushed filters.
       val requestedColumns =
         (projectSet ++ filterSet -- handledSet).map(relation.attributeMap).toSeq
 
       val scan = RowDataSourceScanExec(
-        requestedColumns,
+        relation.output,
+        requestedColumns.map(relation.output.indexOf),
+        pushedFilters.toSet,
+        handledFilters,
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
-        relation.relation, UnknownPartitioning(0), metadata,
+        relation.relation,
         relation.catalogTable.map(_.identifier))
       execution.ProjectExec(
         projects, filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index 9d4ebcce4d10..884f945815e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -113,7 +113,7 @@ object SparkPlanGraph {
         }
         val node = new SparkPlanGraphNode(
           nodeIdGenerator.getAndIncrement(), planInfo.nodeName,
-          planInfo.simpleString, planInfo.metadata, metrics)
+          planInfo.simpleString, metrics)
         if (subgraph == null) {
           nodes += node
         } else {
@@ -143,7 +143,6 @@ private[ui] class SparkPlanGraphNode(
     val id: Long,
     val name: String,
     val desc: String,
-    val metadata: Map[String, String],
     val metrics: Seq[SQLPlanMetric]) {
 
   def makeDotNode(metricsValue: Map[Long, String]): String = {
@@ -177,7 +176,7 @@ private[ui] class SparkPlanGraphCluster(
     desc: String,
     val nodes: mutable.ArrayBuffer[SparkPlanGraphNode],
     metrics: Seq[SQLPlanMetric])
-  extends SparkPlanGraphNode(id, name, desc, Map.empty, metrics) {
+  extends SparkPlanGraphNode(id, name, desc, metrics) {
 
   override def makeDotNode(metricsValue: Map[Long, String]): String = {
     val duration = metrics.filter(_.name.startsWith(WholeStageCodegenExec.PIPELINE_DURATION_METRIC))

From e08d06b37bc96cc48fec1c5e40f73e0bca09c616 Mon Sep 17 00:00:00 2001
From: Kohki Nishio <taroplus@me.com>
Date: Thu, 13 Jul 2017 08:22:40 +0800
Subject: [PATCH 116/125] [SPARK-18646][REPL] Set parent classloader as null
 for ExecutorClassLoader

## What changes were proposed in this pull request?

`ClassLoader` will preferentially load class from `parent`. Only when `parent` is null or the load failed, that it will call the overridden `findClass` function. To avoid the potential issue caused by loading class using inappropriate class loader, we should set the `parent` of `ClassLoader` to null, so that we can fully control which class loader is used.

This is take over of #17074,  the primary author of this PR is taroplus .

Should close #17074 after this PR get merged.

## How was this patch tested?

Add test case in `ExecutorClassLoaderSuite`.

Author: Kohki Nishio <taroplus@me.com>
Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18614 from jiangxb1987/executor_classloader.
---
 .../spark/repl/ExecutorClassLoader.scala      | 17 ++++---
 .../spark/repl/ExecutorClassLoaderSuite.scala | 46 +++++++++++++++++++
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32451af..127f67329f26 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to specify if user class path
+ * should be first. This class loader delegates getting/finding resources to parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is used. For detailed
+ * discussion, see SPARK-18646.
  */
 class ExecutorClassLoader(
     conf: SparkConf,
     env: SparkEnv,
     classUri: String,
     parent: ClassLoader,
-    userClassPathFirst: Boolean) extends ClassLoader with Logging {
+    userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bddb778..092d3c272b8f 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
     }
   }
 
+  test("child over system classloader") {
+    // JavaFileObject for scala.Option class
+    val scalaOptionFile = new SimpleJavaFileObject(
+      URI.create(s"string:///scala/Option.java"),
+      JavaFileObject.Kind.SOURCE) {
+
+      override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
+        "package scala; class Option {}"
+      }
+    }
+    // compile fake scala.Option class
+    ToolProvider
+      .getSystemJavaCompiler
+      .getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
+
+    // create 'scala' dir in tempDir1
+    val scalaDir = new File(tempDir1, "scala")
+    assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
+
+    // move the generated class into scala dir
+    val filename = "Option.class"
+    val result = new File(filename)
+    assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
+
+    val out = new File(scalaDir, filename)
+    Files.move(result, out)
+    assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
+
+    // construct class loader tree
+    val parentLoader = new URLClassLoader(urls2, null)
+    val classLoader = new ExecutorClassLoader(
+      new SparkConf(), null, url1, parentLoader, true)
+
+    // load 'scala.Option', using ClassforName to do the exact same behavior as
+    // what JavaDeserializationStream does
+
+    // scalastyle:off classforname
+    val optionClass = Class.forName("scala.Option", false, classLoader)
+    // scalastyle:on classforname
+
+    assert(optionClass.getClassLoader == classLoader,
+      "scala.Option didn't come from ExecutorClassLoader")
+  }
+
   test("child first") {
     val parentLoader = new URLClassLoader(urls2, null)
     val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)

From 425c4ada4c24e338b45d0e9987071f05c5766fa5 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 13 Jul 2017 17:06:24 +0800
Subject: [PATCH 117/125] [SPARK-19810][BUILD][CORE] Remove support for Scala
 2.10

## What changes were proposed in this pull request?

- Remove Scala 2.10 build profiles and support
- Replace some 2.10 support in scripts with commented placeholders for 2.12 later
- Remove deprecated API calls from 2.10 support
- Remove usages of deprecated context bounds where possible
- Remove Scala 2.10 workarounds like ScalaReflectionLock
- Other minor Scala warning fixes

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #17150 from srowen/SPARK-19810.
---
 R/pkg/R/sparkR.R                              |    4 +-
 R/pkg/tests/fulltests/test_client.R           |    4 +-
 bin/load-spark-env.cmd                        |   22 +-
 bin/load-spark-env.sh                         |   22 +-
 build/mvn                                     |    6 +-
 .../scala/org/apache/spark/Accumulable.scala  |    3 +-
 .../scala/org/apache/spark/SparkContext.scala |   15 +-
 .../spark/rdd/SequenceFileRDDFunctions.scala  |   54 +-
 .../org/apache/spark/rpc/RpcTimeout.scala     |    4 +-
 .../org/apache/spark/ui/JettyUtils.scala      |    6 +-
 .../spark/util/logging/FileAppender.scala     |    8 +-
 .../scala/org/apache/spark/FileSuite.scala    |    4 +-
 .../spark/deploy/SparkSubmitUtilsSuite.scala  |    6 +-
 .../spark/deploy/master/MasterSuite.scala     |    3 +-
 .../apache/spark/executor/ExecutorSuite.scala |    1 +
 .../spark/rdd/LocalCheckpointSuite.scala      |    1 +
 .../OutputCommitCoordinatorSuite.scala        |    2 +-
 .../spark/scheduler/SparkListenerSuite.scala  |    2 +-
 .../scheduler/TaskSetBlacklistSuite.scala     |    4 +-
 .../serializer/KryoSerializerSuite.scala      |    4 +-
 .../spark/util/TimeStampedHashMapSuite.scala  |    2 +-
 .../apache/spark/util/VersionUtilsSuite.scala |    8 +-
 .../util/collection/AppendOnlyMapSuite.scala  |    4 +-
 .../util/collection/ExternalSorterSuite.scala |   14 +-
 .../util/collection/OpenHashMapSuite.scala    |    5 +-
 dev/change-scala-version.sh                   |    4 +-
 dev/change-version-to-2.10.sh                 |   23 -
 dev/change-version-to-2.11.sh                 |   23 -
 dev/create-release/release-build.sh           |   23 +-
 docs/building-spark.md                        |    8 -
 docs/index.md                                 |    3 +-
 .../graphx/AggregateMessagesExample.scala     |    2 +-
 .../spark/examples/mllib/AbstractParams.scala |    2 +-
 .../spark/examples/sql/SparkSQLExample.scala  |    2 -
 .../spark/streaming/kafka/KafkaRDD.scala      |    4 +-
 .../org/apache/spark/graphx/EdgeContext.scala |    2 +-
 .../launcher/AbstractCommandBuilder.java      |   20 +-
 .../org/apache/spark/ml/linalg/Matrices.scala |    2 +-
 .../spark/ml/linalg/MatricesSuite.scala       |   20 +-
 .../org/apache/spark/ml/tree/treeModels.scala |    2 +-
 .../apache/spark/mllib/linalg/Matrices.scala  |    2 +-
 .../linalg/distributed/BlockMatrix.scala      |    4 +-
 .../spark/ml/recommendation/ALSSuite.scala    |    2 +-
 .../spark/mllib/clustering/LDASuite.scala     |    2 +-
 .../spark/mllib/linalg/MatricesSuite.scala    |   20 +-
 pom.xml                                       |   68 +-
 project/SparkBuild.scala                      |   23 +-
 python/run-tests.py                           |    3 +-
 repl/pom.xml                                  |   26 +-
 .../scala/org/apache/spark/repl/Main.scala    |   37 -
 .../apache/spark/repl/SparkCommandLine.scala  |   46 -
 .../apache/spark/repl/SparkExprTyper.scala    |  114 --
 .../org/apache/spark/repl/SparkHelper.scala   |   39 -
 .../org/apache/spark/repl/SparkILoop.scala    | 1145 -----------
 .../apache/spark/repl/SparkILoopInit.scala    |  168 --
 .../org/apache/spark/repl/SparkIMain.scala    | 1808 -----------------
 .../org/apache/spark/repl/SparkImports.scala  |  239 ---
 .../spark/repl/SparkJLineCompletion.scala     |  403 ----
 .../apache/spark/repl/SparkJLineReader.scala  |   90 -
 .../spark/repl/SparkMemberHandlers.scala      |  232 ---
 .../spark/repl/SparkRunnerSettings.scala      |   31 -
 .../org/apache/spark/repl/ReplSuite.scala     |  366 ----
 .../execution/UnsafeExternalRowSorter.java    |    4 +-
 .../spark/sql/catalyst/ScalaReflection.scala  |   36 +-
 .../sql/catalyst/analysis/Analyzer.scala      |    4 +-
 .../expressions/codegen/package.scala         |    4 +-
 .../expressions/namedExpressions.scala        |    2 +-
 .../apache/spark/sql/catalyst/package.scala   |    6 -
 .../util/AbstractScalaRowIterator.scala       |   30 -
 .../apache/spark/sql/types/BinaryType.scala   |    3 +-
 .../apache/spark/sql/types/BooleanType.scala  |    3 +-
 .../org/apache/spark/sql/types/ByteType.scala |    3 +-
 .../org/apache/spark/sql/types/DateType.scala |    4 +-
 .../apache/spark/sql/types/DecimalType.scala  |    3 +-
 .../apache/spark/sql/types/DoubleType.scala   |    3 +-
 .../apache/spark/sql/types/FloatType.scala    |    3 +-
 .../spark/sql/types/HiveStringType.scala      |    5 +-
 .../apache/spark/sql/types/IntegerType.scala  |    4 +-
 .../org/apache/spark/sql/types/LongType.scala |    3 +-
 .../apache/spark/sql/types/ShortType.scala    |    3 +-
 .../apache/spark/sql/types/StringType.scala   |    3 +-
 .../spark/sql/types/TimestampType.scala       |    4 +-
 .../optimizer/JoinOptimizationSuite.scala     |   24 +-
 .../optimizer/OptimizeCodegenSuite.scala      |    4 +-
 .../scala/org/apache/spark/sql/Column.scala   |    2 +-
 .../execution/OptimizeMetadataOnlyQuery.scala |    8 +-
 .../spark/sql/execution/QueryExecution.scala  |    4 +-
 .../execution/python/ExtractPythonUDFs.scala  |    2 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |    4 +-
 .../spark/sql/StatisticsCollectionSuite.scala |   20 +-
 .../org/apache/spark/sql/SubquerySuite.scala  |   30 +-
 .../datasources/parquet/ParquetIOSuite.scala  |    2 +-
 .../execution/metric/SQLMetricsSuite.scala    |   60 +-
 .../sql/execution/ui/SQLListenerSuite.scala   |    2 +-
 .../expressions/ReduceAggregatorSuite.scala   |   14 +-
 .../StreamingQueryListenerSuite.scala         |    2 -
 .../org/apache/spark/sql/hive/HiveUtils.scala |    4 +-
 .../sql/hive/HiveMetastoreCatalogSuite.scala  |    8 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |    2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |    2 +
 .../sql/hive/execution/HiveQuerySuite.scala   |    2 +-
 101 files changed, 311 insertions(+), 5231 deletions(-)
 delete mode 100755 dev/change-version-to-2.10.sh
 delete mode 100755 dev/change-version-to-2.11.sh
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
 delete mode 100644 repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
 delete mode 100644 repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/AbstractScalaRowIterator.scala

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f2d2620e5447..81507ea7186a 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -113,7 +113,7 @@ sparkR.stop <- function() {
 #'                  list(spark.executor.memory="4g"),
 #'                  list(LD_LIBRARY_PATH="/directory of JVM libraries (libjvm.so) on workers/"),
 #'                  c("one.jar", "two.jar", "three.jar"),
-#'                  c("com.databricks:spark-avro_2.10:2.0.1"))
+#'                  c("com.databricks:spark-avro_2.11:2.0.1"))
 #'}
 #' @note sparkR.init since 1.4.0
 sparkR.init <- function(
@@ -357,7 +357,7 @@ sparkRHive.init <- function(jsc = NULL) {
 #' sparkR.session("yarn-client", "SparkR", "/home/spark",
 #'                list(spark.executor.memory="4g"),
 #'                c("one.jar", "two.jar", "three.jar"),
-#'                c("com.databricks:spark-avro_2.10:2.0.1"))
+#'                c("com.databricks:spark-avro_2.11:2.0.1"))
 #' sparkR.session(spark.master = "yarn-client", spark.executor.memory = "4g")
 #'}
 #' @note sparkR.session since 2.0.0
diff --git a/R/pkg/tests/fulltests/test_client.R b/R/pkg/tests/fulltests/test_client.R
index 0cf25fe1dbf3..de624b572cc2 100644
--- a/R/pkg/tests/fulltests/test_client.R
+++ b/R/pkg/tests/fulltests/test_client.R
@@ -37,7 +37,7 @@ test_that("multiple packages don't produce a warning", {
 
 test_that("sparkJars sparkPackages as character vectors", {
   args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
-                                  c("com.databricks:spark-avro_2.10:2.0.1"))
+                                  c("com.databricks:spark-avro_2.11:2.0.1"))
   expect_match(args, "--jars one.jar,two.jar,three.jar")
-  expect_match(args, "--packages com.databricks:spark-avro_2.10:2.0.1")
+  expect_match(args, "--packages com.databricks:spark-avro_2.11:2.0.1")
 })
diff --git a/bin/load-spark-env.cmd b/bin/load-spark-env.cmd
index 0977025c2036..993aa31a4c37 100644
--- a/bin/load-spark-env.cmd
+++ b/bin/load-spark-env.cmd
@@ -35,21 +35,21 @@ if [%SPARK_ENV_LOADED%] == [] (
 
 rem Setting SPARK_SCALA_VERSION if not already set.
 
-set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-2.11"
-set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-2.10"
+rem set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-2.11"
+rem set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-2.12"
 
 if [%SPARK_SCALA_VERSION%] == [] (
 
-  if exist %ASSEMBLY_DIR2% if exist %ASSEMBLY_DIR1% (
-    echo "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected."
-    echo "Either clean one of them or, set SPARK_SCALA_VERSION=2.11 in spark-env.cmd."
-    exit 1
-  )
-  if exist %ASSEMBLY_DIR2% (
+  rem if exist %ASSEMBLY_DIR2% if exist %ASSEMBLY_DIR1% (
+  rem   echo "Presence of build for multiple Scala versions detected."
+  rem   echo "Either clean one of them or, set SPARK_SCALA_VERSION=2.11 in spark-env.cmd."
+  rem   exit 1
+  rem )
+  rem if exist %ASSEMBLY_DIR2% (
     set SPARK_SCALA_VERSION=2.11
-  ) else (
-    set SPARK_SCALA_VERSION=2.10
-  )
+  rem ) else (
+  rem   set SPARK_SCALA_VERSION=2.12
+  rem )
 )
 exit /b 0
 
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 8a2f709960a2..9de62039c51e 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -46,18 +46,18 @@ fi
 
 if [ -z "$SPARK_SCALA_VERSION" ]; then
 
-  ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
-  ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.10"
+  #ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
+  #ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.12"
 
-  if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
-    echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
-    echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
-    exit 1
-  fi
+  #if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
+  #  echo -e "Presence of build for multiple Scala versions detected." 1>&2
+  #  echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
+  #  exit 1
+  #fi
 
-  if [ -d "$ASSEMBLY_DIR2" ]; then
+  #if [ -d "$ASSEMBLY_DIR2" ]; then
     export SPARK_SCALA_VERSION="2.11"
-  else
-    export SPARK_SCALA_VERSION="2.10"
-  fi
+  #else
+  #  export SPARK_SCALA_VERSION="2.12"
+  #fi
 fi
diff --git a/build/mvn b/build/mvn
index 1e393c331dd8..efa4f9364ea5 100755
--- a/build/mvn
+++ b/build/mvn
@@ -91,13 +91,13 @@ install_mvn() {
 
 # Install zinc under the build/ folder
 install_zinc() {
-  local zinc_path="zinc-0.3.11/bin/zinc"
+  local zinc_path="zinc-0.3.15/bin/zinc"
   [ ! -f "${_DIR}/${zinc_path}" ] && ZINC_INSTALL_FLAG=1
   local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com}
 
   install_app \
-    "${TYPESAFE_MIRROR}/zinc/0.3.11" \
-    "zinc-0.3.11.tgz" \
+    "${TYPESAFE_MIRROR}/zinc/0.3.15" \
+    "zinc-0.3.15.tgz" \
     "${zinc_path}"
   ZINC_BIN="${_DIR}/${zinc_path}"
 }
diff --git a/core/src/main/scala/org/apache/spark/Accumulable.scala b/core/src/main/scala/org/apache/spark/Accumulable.scala
index 5532931e2a79..3092074232d1 100644
--- a/core/src/main/scala/org/apache/spark/Accumulable.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulable.scala
@@ -201,7 +201,8 @@ trait AccumulableParam[R, T] extends Serializable {
 
 @deprecated("use AccumulatorV2", "2.0.0")
 private[spark] class
-GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T]
+GrowableAccumulableParam[R : ClassTag, T]
+  (implicit rg: R => Growable[T] with TraversableOnce[T] with Serializable)
   extends AccumulableParam[R, T] {
 
   def addAccumulator(growable: R, elem: T): R = {
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b2a26c51d4de..e1ce66a547bc 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -183,8 +183,6 @@ class SparkContext(config: SparkConf) extends Logging {
   // log out Spark Version in Spark driver log
   logInfo(s"Running Spark version $SPARK_VERSION")
 
-  warnDeprecatedVersions()
-
   /* ------------------------------------------------------------------------------------- *
    | Private variables. These variables keep the internal state of the context, and are    |
    | not accessible by the outside world. They're mutable since we want to initialize all  |
@@ -349,13 +347,6 @@ class SparkContext(config: SparkConf) extends Logging {
     value
   }
 
-  private def warnDeprecatedVersions(): Unit = {
-    val javaVersion = System.getProperty("java.version").split("[+.\\-]+", 3)
-    if (scala.util.Properties.releaseVersion.exists(_.startsWith("2.10"))) {
-      logWarning("Support for Scala 2.10 is deprecated as of Spark 2.1.0")
-    }
-  }
-
   /** Control our logLevel. This overrides any user-defined log settings.
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
@@ -1396,6 +1387,8 @@ class SparkContext(config: SparkConf) extends Logging {
   @deprecated("use AccumulatorV2", "2.0.0")
   def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T]
       (initialValue: R): Accumulable[R, T] = {
+    // TODO the context bound (<%) above should be replaced with simple type bound and implicit
+    // conversion but is a breaking change. This should be fixed in Spark 3.x.
     val param = new GrowableAccumulableParam[R, T]
     val acc = new Accumulable(initialValue, param)
     cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
@@ -2605,9 +2598,9 @@ object SparkContext extends Logging {
    */
   private[spark] val LEGACY_DRIVER_IDENTIFIER = "<driver>"
 
-  private implicit def arrayToArrayWritable[T <% Writable: ClassTag](arr: Traversable[T])
+  private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Traversable[T])
     : ArrayWritable = {
-    def anyToWritable[U <% Writable](u: U): Writable = u
+    def anyToWritable[U <: Writable](u: U): Writable = u
 
     new ArrayWritable(classTag[T].runtimeClass.asInstanceOf[Class[Writable]],
         arr.map(x => anyToWritable(x)).toArray)
diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 86a332790fb0..02def89dd8c2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.rdd
 
-import scala.reflect.{classTag, ClassTag}
+import scala.reflect.ClassTag
 
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.io.compress.CompressionCodec
@@ -39,40 +39,8 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag
   extends Logging
   with Serializable {
 
-  private val keyWritableClass =
-    if (_keyWritableClass == null) {
-      // pre 1.3.0, we need to use Reflection to get the Writable class
-      getWritableClass[K]()
-    } else {
-      _keyWritableClass
-    }
-
-  private val valueWritableClass =
-    if (_valueWritableClass == null) {
-      // pre 1.3.0, we need to use Reflection to get the Writable class
-      getWritableClass[V]()
-    } else {
-      _valueWritableClass
-    }
-
-  private def getWritableClass[T <% Writable: ClassTag](): Class[_ <: Writable] = {
-    val c = {
-      if (classOf[Writable].isAssignableFrom(classTag[T].runtimeClass)) {
-        classTag[T].runtimeClass
-      } else {
-        // We get the type of the Writable class by looking at the apply method which converts
-        // from T to Writable. Since we have two apply methods we filter out the one which
-        // is not of the form "java.lang.Object apply(java.lang.Object)"
-        implicitly[T => Writable].getClass.getDeclaredMethods().filter(
-            m => m.getReturnType().toString != "class java.lang.Object" &&
-                 m.getName() == "apply")(0).getReturnType
-
-      }
-       // TODO: use something like WritableConverter to avoid reflection
-    }
-    c.asInstanceOf[Class[_ <: Writable]]
-  }
-
+  // TODO the context bound (<%) above should be replaced with simple type bound and implicit
+  // conversion but is a breaking change. This should be fixed in Spark 3.x.
 
   /**
    * Output the RDD as a Hadoop SequenceFile using the Writable types we infer from the RDD's key
@@ -90,24 +58,24 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag
     // valueWritableClass at the compile time. To implement that, we need to add type parameters to
     // SequenceFileRDDFunctions. however, SequenceFileRDDFunctions is a public class so it will be a
     // breaking change.
-    val convertKey = self.keyClass != keyWritableClass
-    val convertValue = self.valueClass != valueWritableClass
+    val convertKey = self.keyClass != _keyWritableClass
+    val convertValue = self.valueClass != _valueWritableClass
 
-    logInfo("Saving as sequence file of type (" + keyWritableClass.getSimpleName + "," +
-      valueWritableClass.getSimpleName + ")" )
+    logInfo("Saving as sequence file of type " +
+      s"(${_keyWritableClass.getSimpleName},${_valueWritableClass.getSimpleName})" )
     val format = classOf[SequenceFileOutputFormat[Writable, Writable]]
     val jobConf = new JobConf(self.context.hadoopConfiguration)
     if (!convertKey && !convertValue) {
-      self.saveAsHadoopFile(path, keyWritableClass, valueWritableClass, format, jobConf, codec)
+      self.saveAsHadoopFile(path, _keyWritableClass, _valueWritableClass, format, jobConf, codec)
     } else if (!convertKey && convertValue) {
       self.map(x => (x._1, anyToWritable(x._2))).saveAsHadoopFile(
-        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
+        path, _keyWritableClass, _valueWritableClass, format, jobConf, codec)
     } else if (convertKey && !convertValue) {
       self.map(x => (anyToWritable(x._1), x._2)).saveAsHadoopFile(
-        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
+        path, _keyWritableClass, _valueWritableClass, format, jobConf, codec)
     } else if (convertKey && convertValue) {
       self.map(x => (anyToWritable(x._1), anyToWritable(x._2))).saveAsHadoopFile(
-        path, keyWritableClass, valueWritableClass, format, jobConf, codec)
+        path, _keyWritableClass, _valueWritableClass, format, jobConf, codec)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
index 0557b7a3cc0b..3dc41f7f1279 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
@@ -125,9 +125,9 @@ private[spark] object RpcTimeout {
     var foundProp: Option[(String, String)] = None
     while (itr.hasNext && foundProp.isEmpty) {
       val propKey = itr.next()
-      conf.getOption(propKey).foreach { prop => foundProp = Some(propKey, prop) }
+      conf.getOption(propKey).foreach { prop => foundProp = Some((propKey, prop)) }
     }
-    val finalProp = foundProp.getOrElse(timeoutPropList.head, defaultValue)
+    val finalProp = foundProp.getOrElse((timeoutPropList.head, defaultValue))
     val timeout = { Utils.timeStringAsSeconds(finalProp._2).seconds }
     new RpcTimeout(timeout, finalProp._1)
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index b9371c7ad7b4..0fa96713eaf9 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -54,7 +54,7 @@ private[spark] object JettyUtils extends Logging {
   // implicit conversion from many types of functions to jetty Handlers.
   type Responder[T] = HttpServletRequest => T
 
-  class ServletParams[T <% AnyRef](val responder: Responder[T],
+  class ServletParams[T <: AnyRef](val responder: Responder[T],
     val contentType: String,
     val extractFn: T => String = (in: Any) => in.toString) {}
 
@@ -68,7 +68,7 @@ private[spark] object JettyUtils extends Logging {
   implicit def textResponderToServlet(responder: Responder[String]): ServletParams[String] =
     new ServletParams(responder, "text/plain")
 
-  def createServlet[T <% AnyRef](
+  def createServlet[T <: AnyRef](
       servletParams: ServletParams[T],
       securityMgr: SecurityManager,
       conf: SparkConf): HttpServlet = {
@@ -113,7 +113,7 @@ private[spark] object JettyUtils extends Logging {
   }
 
   /** Create a context handler that responds to a request with the given path prefix */
-  def createServletHandler[T <% AnyRef](
+  def createServletHandler[T <: AnyRef](
       path: String,
       servletParams: ServletParams[T],
       securityMgr: SecurityManager,
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index 8a0cc709bccc..2f9ad4c8cc3e 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -125,16 +125,16 @@ private[spark] object FileAppender extends Logging {
       val validatedParams: Option[(Long, String)] = rollingInterval match {
         case "daily" =>
           logInfo(s"Rolling executor logs enabled for $file with daily rolling")
-          Some(24 * 60 * 60 * 1000L, "--yyyy-MM-dd")
+          Some((24 * 60 * 60 * 1000L, "--yyyy-MM-dd"))
         case "hourly" =>
           logInfo(s"Rolling executor logs enabled for $file with hourly rolling")
-          Some(60 * 60 * 1000L, "--yyyy-MM-dd--HH")
+          Some((60 * 60 * 1000L, "--yyyy-MM-dd--HH"))
         case "minutely" =>
           logInfo(s"Rolling executor logs enabled for $file with rolling every minute")
-          Some(60 * 1000L, "--yyyy-MM-dd--HH-mm")
+          Some((60 * 1000L, "--yyyy-MM-dd--HH-mm"))
         case IntParam(seconds) =>
           logInfo(s"Rolling executor logs enabled for $file with rolling $seconds seconds")
-          Some(seconds * 1000L, "--yyyy-MM-dd--HH-mm-ss")
+          Some((seconds * 1000L, "--yyyy-MM-dd--HH-mm-ss"))
         case _ =>
           logWarning(s"Illegal interval for rolling executor logs [$rollingInterval], " +
               s"rolling logs not enabled")
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 5be0121db58a..02728180ac82 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -113,11 +113,11 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
     val normalFile = new File(normalDir, "part-00000")
     val normalContent = sc.sequenceFile[String, String](normalDir).collect
-    assert(normalContent === Array.fill(100)("abc", "abc"))
+    assert(normalContent === Array.fill(100)(("abc", "abc")))
 
     val compressedFile = new File(compressedOutputDir, "part-00000" + codec.getDefaultExtension)
     val compressedContent = sc.sequenceFile[String, String](compressedOutputDir).collect
-    assert(compressedContent === Array.fill(100)("abc", "abc"))
+    assert(compressedContent === Array.fill(100)(("abc", "abc")))
 
     assert(compressedFile.length < normalFile.length)
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 57024786b95e..88b77e514342 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -93,8 +93,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("add dependencies works correctly") {
     val md = SparkSubmitUtils.getModuleDescriptor
-    val artifacts = SparkSubmitUtils.extractMavenCoordinates("com.databricks:spark-csv_2.10:0.1," +
-      "com.databricks:spark-avro_2.10:0.1")
+    val artifacts = SparkSubmitUtils.extractMavenCoordinates("com.databricks:spark-csv_2.11:0.1," +
+      "com.databricks:spark-avro_2.11:0.1")
 
     SparkSubmitUtils.addDependenciesToIvy(md, artifacts, "default")
     assert(md.getDependencies.length === 2)
@@ -196,7 +196,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       SparkSubmitUtils.buildIvySettings(None, None),
       isTest = true)
     assert(path === "", "should return empty path")
-    val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.10", "1.2.0")
+    val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.11", "1.2.0")
     IvyTestUtils.withRepository(main, None, None) { repo =>
       val files = SparkSubmitUtils.resolveMavenCoordinates(
         coordinates + "," + main.toString,
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index a2232126787f..84b3a29b58bf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -80,6 +80,7 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
         case Some(appId) =>
           apps.remove(appId)
           master.send(UnregisterApplication(appId))
+        case None =>
       }
       driverIdToAppId.remove(driverId)
   }
@@ -575,7 +576,7 @@ class MasterSuite extends SparkFunSuite
       override val rpcEnv: RpcEnv = master.rpcEnv
 
       override def receive: PartialFunction[Any, Unit] = {
-        case KillExecutor(_, appId, execId) => killedExecutors.add(appId, execId)
+        case KillExecutor(_, appId, execId) => killedExecutors.add((appId, execId))
         case KillDriver(driverId) => killedDrivers.add(driverId)
       }
     })
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index efcad140350b..601dde6c6328 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -25,6 +25,7 @@ import java.util.concurrent.{CountDownLatch, TimeUnit}
 
 import scala.collection.mutable.Map
 import scala.concurrent.duration._
+import scala.language.postfixOps
 
 import org.mockito.ArgumentCaptor
 import org.mockito.Matchers.{any, eq => meq}
diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
index 9e204f5cc33f..478f0690f8e4 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.rdd
 
 import scala.concurrent.duration._
+import scala.language.postfixOps
 
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 1579b614ea5b..60b595532198 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -115,7 +115,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
               locality: TaskLocality.Value): Option[(Int, TaskLocality.Value)] = {
             if (!hasDequeuedSpeculatedTask) {
               hasDequeuedSpeculatedTask = true
-              Some(0, TaskLocality.PROCESS_LOCAL)
+              Some((0, TaskLocality.PROCESS_LOCAL))
             } else {
               None
             }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index f3d0bc19675f..481603b40357 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -299,7 +299,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val d2 = d.map { i => w(i) -> i * 2 }.setName("shuffle input 1")
     val d3 = d.map { i => w(i) -> (0 to (i % 5)) }.setName("shuffle input 2")
     val d4 = d2.cogroup(d3, numSlices).map { case (k, (v1, v2)) =>
-      w(k) -> (v1.size, v2.size)
+      (w(k), (v1.size, v2.size))
     }
     d4.setName("A Cogroup")
     d4.collectAsMap()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
index 6b52c10b2c68..f1392e9db6bf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
@@ -86,8 +86,8 @@ class TaskSetBlacklistSuite extends SparkFunSuite {
     Seq("exec1", "exec2").foreach { exec =>
       assert(
         execToFailures(exec).taskToFailureCountAndFailureTime === Map(
-          0 -> (1, 0),
-          1 -> (1, 0)
+          0 -> ((1, 0)),
+          1 -> ((1, 0))
         )
       )
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 7c3922e47fbb..eaec098b8d78 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -276,7 +276,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("kryo with collect for specialized tuples") {
-    assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === (1, 11))
+    assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === ((1, 11)))
   }
 
   test("kryo with SerializableHyperLogLog") {
@@ -475,7 +475,7 @@ class KryoSerializerAutoResetDisabledSuite extends SparkFunSuite with SharedSpar
     val deserializationStream = serInstance.deserializeStream(new ByteArrayInputStream(worldWorld))
     assert(deserializationStream.readValue[Any]() === world)
     deserializationStream.close()
-    assert(serInstance.deserialize[Any](helloHello) === (hello, hello))
+    assert(serInstance.deserialize[Any](helloHello) === ((hello, hello)))
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
index fd9add76909b..dcae78900fde 100644
--- a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
@@ -115,7 +115,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
       testMap2("k1") = "v1"
       testMap2 --= keys
       assert(testMap2.size === 1)
-      assert(testMap2.iterator.toSeq.head === ("k1", "v1"))
+      assert(testMap2.iterator.toSeq.head === (("k1", "v1")))
 
       // +
       val testMap3 = testMap2 + (("k0", "v0"))
diff --git a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
index aaf79ebd4f9f..b36d6be231d3 100644
--- a/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/VersionUtilsSuite.scala
@@ -58,10 +58,10 @@ class VersionUtilsSuite extends SparkFunSuite {
   }
 
   test("Parse Spark major and minor versions") {
-    assert(majorMinorVersion("2.0") === (2, 0))
-    assert(majorMinorVersion("12.10.11") === (12, 10))
-    assert(majorMinorVersion("2.0.1-SNAPSHOT") === (2, 0))
-    assert(majorMinorVersion("2.0.x") === (2, 0))
+    assert(majorMinorVersion("2.0") === ((2, 0)))
+    assert(majorMinorVersion("12.10.11") === ((12, 10)))
+    assert(majorMinorVersion("2.0.1-SNAPSHOT") === ((2, 0)))
+    assert(majorMinorVersion("2.0.x") === ((2, 0)))
     withClue("majorMinorVersion parsing should fail for invalid major version number") {
       intercept[IllegalArgumentException] {
         majorMinorVersion("2z.0")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
index a2a6d703860f..6b4e92879d6c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/AppendOnlyMapSuite.scala
@@ -181,9 +181,9 @@ class AppendOnlyMapSuite extends SparkFunSuite {
     // Should be sorted by key
     assert(it.hasNext)
     var previous = it.next()
-    assert(previous == (null, "happy new year!"))
+    assert(previous == ((null, "happy new year!")))
     previous = it.next()
-    assert(previous == ("1", "2014"))
+    assert(previous == (("1", "2014")))
     while (it.hasNext) {
       val kv = it.next()
       assert(kv._1.toInt > previous._1.toInt)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 6bcc601e13ec..47173b89e91e 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -388,13 +388,13 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter.insertAll(elements)
     assert(sorter.numSpills > 0, "sorter did not spill")
     val iter = sorter.partitionedIterator.map(p => (p._1, p._2.toList))
-    assert(iter.next() === (0, Nil))
-    assert(iter.next() === (1, List((1, 1))))
-    assert(iter.next() === (2, (0 until 1000).map(x => (2, 2)).toList))
-    assert(iter.next() === (3, Nil))
-    assert(iter.next() === (4, Nil))
-    assert(iter.next() === (5, List((5, 5))))
-    assert(iter.next() === (6, Nil))
+    assert(iter.next() === ((0, Nil)))
+    assert(iter.next() === ((1, List((1, 1)))))
+    assert(iter.next() === ((2, (0 until 1000).map(x => (2, 2)).toList)))
+    assert(iter.next() === ((3, Nil)))
+    assert(iter.next() === ((4, Nil)))
+    assert(iter.next() === ((5, List((5, 5)))))
+    assert(iter.next() === ((6, Nil)))
     sorter.stop()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 335ecb9320ab..08a320028898 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -75,7 +75,7 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     for ((k, v) <- map) {
       set.add((k, v))
     }
-    val expected = (1 to 1000).map(x => (x.toString, x)) :+ (null.asInstanceOf[String], -1)
+    val expected = (1 to 1000).map(x => (x.toString, x)) :+ ((null.asInstanceOf[String], -1))
     assert(set === expected.toSet)
   }
 
@@ -103,7 +103,8 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     for ((k, v) <- map) {
       set.add((k, v))
     }
-    val expected = (1 to 1000).map(_.toString).map(x => (x, x)) :+ (null.asInstanceOf[String], "-1")
+    val expected =
+      (1 to 1000).map(_.toString).map(x => (x, x)) :+ ((null.asInstanceOf[String], "-1"))
     assert(set === expected.toSet)
   }
 
diff --git a/dev/change-scala-version.sh b/dev/change-scala-version.sh
index d7975dfb6475..022e68c2796c 100755
--- a/dev/change-scala-version.sh
+++ b/dev/change-scala-version.sh
@@ -19,7 +19,7 @@
 
 set -e
 
-VALID_VERSIONS=( 2.10 2.11 )
+VALID_VERSIONS=( 2.11 2.12 )
 
 usage() {
   echo "Usage: $(basename $0) [-h|--help] <version>
@@ -45,7 +45,7 @@ check_scala_version() {
 check_scala_version "$TO_VERSION"
 
 if [ $TO_VERSION = "2.11" ]; then
-  FROM_VERSION="2.10"
+  FROM_VERSION="2.12"
 else
   FROM_VERSION="2.11"
 fi
diff --git a/dev/change-version-to-2.10.sh b/dev/change-version-to-2.10.sh
deleted file mode 100755
index b718d94f849d..000000000000
--- a/dev/change-version-to-2.10.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# This script exists for backwards compatibility. Use change-scala-version.sh instead.
-echo "This script is deprecated. Please instead run: change-scala-version.sh 2.10"
-
-$(dirname $0)/change-scala-version.sh 2.10
diff --git a/dev/change-version-to-2.11.sh b/dev/change-version-to-2.11.sh
deleted file mode 100755
index 93087959a38d..000000000000
--- a/dev/change-version-to-2.11.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# This script exists for backwards compatibility. Use change-scala-version.sh instead.
-echo "This script is deprecated. Please instead run: change-scala-version.sh 2.11"
-
-$(dirname $0)/change-scala-version.sh 2.11
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index a72307a28ad7..9bf2899e340e 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -155,10 +155,10 @@ if [[ "$1" == "package" ]]; then
 
     cd spark-$SPARK_VERSION-bin-$NAME
 
-    # TODO There should probably be a flag to make-distribution to allow 2.10 support
-    if [[ $FLAGS == *scala-2.10* ]]; then
-      ./dev/change-scala-version.sh 2.10
-    fi
+    # TODO There should probably be a flag to make-distribution to allow 2.12 support
+    #if [[ $FLAGS == *scala-2.12* ]]; then
+    #  ./dev/change-scala-version.sh 2.12
+    #fi
 
     export ZINC_PORT=$ZINC_PORT
     echo "Creating distribution: $NAME ($FLAGS)"
@@ -305,9 +305,9 @@ if [[ "$1" == "publish-snapshot" ]]; then
   export ZINC_PORT=$(python -S -c "import random; print random.randrange(3030,4030)")
 
   $MVN -DzincPort=$ZINC_PORT --settings $tmp_settings -DskipTests $PUBLISH_PROFILES deploy
-  ./dev/change-scala-version.sh 2.10
-  $MVN -DzincPort=$ZINC_PORT -Dscala-2.10 --settings $tmp_settings \
-    -DskipTests $PUBLISH_PROFILES clean deploy
+  #./dev/change-scala-version.sh 2.12
+  #$MVN -DzincPort=$ZINC_PORT -Pscala-2.12 --settings $tmp_settings \
+  #  -DskipTests $PUBLISH_PROFILES clean deploy
 
   # Clean-up Zinc nailgun process
   /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill
@@ -342,16 +342,13 @@ if [[ "$1" == "publish-release" ]]; then
 
   $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests $PUBLISH_PROFILES clean install
 
-  ./dev/change-scala-version.sh 2.10
-
-  $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -Dscala-2.10 \
-    -DskipTests $PUBLISH_PROFILES clean install
+  #./dev/change-scala-version.sh 2.12
+  #$MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -Pscala-2.12 \
+  #  -DskipTests $PUBLISH_PROFILES clean install
 
   # Clean-up Zinc nailgun process
   /usr/sbin/lsof -P |grep $ZINC_PORT | grep LISTEN | awk '{ print $2; }' | xargs kill
 
-  ./dev/change-version-to-2.10.sh
-
   pushd $tmp_repo/org/apache/spark
 
   # Remove any extra files generated during install
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 815843c54ad7..69d83023b228 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -91,14 +91,6 @@ like ZooKeeper and Hadoop itself.
 
     ./build/mvn -Pmesos -DskipTests clean package
 
-## Building for Scala 2.10
-To produce a Spark package compiled with Scala 2.10, use the `-Dscala-2.10` property:
-
-    ./dev/change-scala-version.sh 2.10
-    ./build/mvn -Pyarn -Dscala-2.10 -DskipTests clean package
-
-Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.3.0.
-
 ## Building submodules individually
 
 It's possible to build Spark sub-modules using the `mvn -pl` option.
diff --git a/docs/index.md b/docs/index.md
index 81ed4653b46b..07b6b171014e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,8 +31,7 @@ uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scal
 ({{site.SCALA_BINARY_VERSION}}.x).
 
 Note that support for Java 7, Python 2.6 and old Hadoop versions before 2.6.5 were removed as of Spark 2.2.0.
-
-Note that support for Scala 2.10 is deprecated as of Spark 2.1.0, and may be removed in Spark 2.3.0.
+Support for Scala 2.10 was removed as of 2.3.0.
 
 # Running the Examples and Shell
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala
index 8f8262db374b..8441b5a9dd8a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala
@@ -52,7 +52,7 @@ object AggregateMessagesExample {
       triplet => { // Map Function
         if (triplet.srcAttr > triplet.dstAttr) {
           // Send message to destination vertex containing counter and age
-          triplet.sendToDst(1, triplet.srcAttr)
+          triplet.sendToDst((1, triplet.srcAttr))
         }
       },
       // Add counter and age
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
index ae6057758d6f..8985c8565c53 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
@@ -38,7 +38,7 @@ abstract class AbstractParams[T: TypeTag] {
    */
   override def toString: String = {
     val tpe = tag.tpe
-    val allAccessors = tpe.declarations.collect {
+    val allAccessors = tpe.decls.collect {
       case m: MethodSymbol if m.isCaseAccessor => m
     }
     val mirror = runtimeMirror(getClass.getClassLoader)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index b9a612d96a57..958361a6684c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -29,8 +29,6 @@ import org.apache.spark.sql.types._
 object SparkSQLExample {
 
   // $example on:create_ds$
-  // Note: Case classes in Scala 2.10 can support only up to 22 fields. To work around this limit,
-  // you can use custom classes that implement the Product interface
   case class Person(name: String, age: Long)
   // $example off:create_ds$
 
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
index 2b925774a2f7..5ea52b6ad36a 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
@@ -259,8 +259,8 @@ object KafkaRDD {
       messageHandler: MessageAndMetadata[K, V] => R
     ): KafkaRDD[K, V, U, T, R] = {
     val leaders = untilOffsets.map { case (tp, lo) =>
-        tp -> (lo.host, lo.port)
-    }.toMap
+        tp -> ((lo.host, lo.port))
+    }
 
     val offsetRanges = fromOffsets.map { case (tp, fo) =>
         val uo = untilOffsets(tp)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
index 23430179f12e..3b96a420b34a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeContext.scala
@@ -63,5 +63,5 @@ object EdgeContext {
    * }}}
    */
   def unapply[VD, ED, A](edge: EdgeContext[VD, ED, A]): Some[(VertexId, VertexId, VD, VD, ED)] =
-    Some(edge.srcId, edge.dstId, edge.srcAttr, edge.dstAttr, edge.attr)
+    Some((edge.srcId, edge.dstId, edge.srcAttr, edge.dstAttr, edge.attr))
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 6c0c3ebcaebf..481ff20e424d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -229,17 +229,17 @@ String getScalaVersion() {
       return scala;
     }
     String sparkHome = getSparkHome();
-    File scala210 = new File(sparkHome, "launcher/target/scala-2.10");
+    //File scala212 = new File(sparkHome, "launcher/target/scala-2.12");
     File scala211 = new File(sparkHome, "launcher/target/scala-2.11");
-    checkState(!scala210.isDirectory() || !scala211.isDirectory(),
-      "Presence of build for both scala versions (2.10 and 2.11) detected.\n" +
-      "Either clean one of them or set SPARK_SCALA_VERSION in your environment.");
-    if (scala210.isDirectory()) {
-      return "2.10";
-    } else {
-      checkState(scala211.isDirectory(), "Cannot find any build directories.");
-      return "2.11";
-    }
+    //checkState(!scala210.isDirectory() || !scala211.isDirectory(),
+    //  "Presence of build for multiple Scala versions detected.\n" +
+    //  "Either clean one of them or set SPARK_SCALA_VERSION in your environment.");
+    //if (scala212.isDirectory()) {
+    //  return "2.12";
+    //} else {
+    checkState(scala211.isDirectory(), "Cannot find any build directories.");
+    return "2.11";
+    //}
   }
 
   String getSparkHome() {
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index 07f3bc27280b..66c53624419d 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -856,7 +856,7 @@ object SparseMatrix {
     var prevRow = -1
     var prevVal = 0.0
     // Append a dummy entry to include the last one at the end of the loop.
-    (sortedEntries.view :+ (numRows, numCols, 1.0)).foreach { case (i, j, v) =>
+    (sortedEntries.view :+ ((numRows, numCols, 1.0))).foreach { case (i, j, v) =>
       if (v != 0) {
         if (i == prevRow && j == prevCol) {
           prevVal += v
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
index 9f8202086817..7fb9034d6501 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
@@ -633,22 +633,22 @@ class MatricesSuite extends SparkMLFunSuite {
       dnMap.put((i, j), value)
     }
     assert(dnMap.size === 6)
-    assert(dnMap(0, 0) === 1.0)
-    assert(dnMap(1, 0) === 2.0)
-    assert(dnMap(2, 0) === 0.0)
-    assert(dnMap(0, 1) === 0.0)
-    assert(dnMap(1, 1) === 4.0)
-    assert(dnMap(2, 1) === 5.0)
+    assert(dnMap((0, 0)) === 1.0)
+    assert(dnMap((1, 0)) === 2.0)
+    assert(dnMap((2, 0)) === 0.0)
+    assert(dnMap((0, 1)) === 0.0)
+    assert(dnMap((1, 1)) === 4.0)
+    assert(dnMap((2, 1)) === 5.0)
 
     val spMap = MutableMap[(Int, Int), Double]()
     sp.foreachActive { (i, j, value) =>
       spMap.put((i, j), value)
     }
     assert(spMap.size === 4)
-    assert(spMap(0, 0) === 1.0)
-    assert(spMap(1, 0) === 2.0)
-    assert(spMap(1, 1) === 4.0)
-    assert(spMap(2, 1) === 5.0)
+    assert(spMap((0, 0)) === 1.0)
+    assert(spMap((1, 0)) === 2.0)
+    assert(spMap((1, 1)) === 4.0)
+    assert(spMap((2, 1)) === 5.0)
   }
 
   test("horzcat, vertcat, eye, speye") {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 0d6e9034e5ce..4aa4c3617e7f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -436,7 +436,7 @@ private[ml] object EnsembleModelReadWrite {
     val treesMetadataRDD: RDD[(Int, (Metadata, Double))] = sql.read.parquet(treesMetadataPath)
       .select("treeID", "metadata", "weights").as[(Int, String, Double)].rdd.map {
       case (treeID: Int, json: String, weights: Double) =>
-        treeID -> (DefaultParamsReader.parseMetadata(json, treeClassName), weights)
+        treeID -> ((DefaultParamsReader.parseMetadata(json, treeClassName), weights))
     }
 
     val treesMetadataWeights = treesMetadataRDD.sortByKey().values.collect()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 2b2b5fe49ea3..bf9b4cfe15b2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -797,7 +797,7 @@ object SparseMatrix {
     var prevRow = -1
     var prevVal = 0.0
     // Append a dummy entry to include the last one at the end of the loop.
-    (sortedEntries.view :+ (numRows, numCols, 1.0)).foreach { case (i, j, v) =>
+    (sortedEntries.view :+ ((numRows, numCols, 1.0))).foreach { case (i, j, v) =>
       if (v != 0) {
         if (i == prevRow && j == prevCol) {
           prevVal += v
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 20d68a34bf3e..7caacd13b345 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -275,7 +275,7 @@ class BlockMatrix @Since("1.3.0") (
     val rows = blocks.flatMap { case ((blockRowIdx, blockColIdx), mat) =>
       mat.rowIter.zipWithIndex.map {
         case (vector, rowIdx) =>
-          blockRowIdx * rowsPerBlock + rowIdx -> (blockColIdx, vector.asBreeze)
+          blockRowIdx * rowsPerBlock + rowIdx -> ((blockColIdx, vector.asBreeze))
       }
     }.groupByKey().map { case (rowIdx, vectors) =>
       val numberNonZeroPerRow = vectors.map(_._2.activeSize).sum.toDouble / cols.toDouble
@@ -286,7 +286,7 @@ class BlockMatrix @Since("1.3.0") (
         BDV.zeros[Double](cols)
       }
 
-      vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) =>
+      vectors.foreach { case (blockColIdx: Int, vec: BV[_]) =>
         val offset = colsPerBlock * blockColIdx
         wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 0a0fea255c7f..45d3f9b4c53b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -898,7 +898,7 @@ class ALSStorageSuite
     // check final factor RDD default storage levels
     val defaultFactorRDDs = sc.getPersistentRDDs.collect {
       case (id, rdd) if rdd.name == "userFactors" || rdd.name == "itemFactors" =>
-        rdd.name -> (id, rdd.getStorageLevel)
+        rdd.name -> ((id, rdd.getStorageLevel))
     }.toMap
     defaultFactorRDDs.foreach { case (_, (id, level)) =>
       assert(level == StorageLevel.MEMORY_AND_DISK)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 086bb211a9e4..8906e52faebe 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -151,7 +151,7 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
     // Check: topTopicAssignments
     // Make sure it assigns a topic to each term appearing in each doc.
     val topTopicAssignments: Map[Long, (Array[Int], Array[Int])] =
-      model.topicAssignments.collect().map(x => x._1 -> (x._2, x._3)).toMap
+      model.topicAssignments.collect().map(x => x._1 -> ((x._2, x._3))).toMap
     assert(topTopicAssignments.keys.max < tinyCorpus.length)
     tinyCorpus.foreach { case (docID: Long, doc: Vector) =>
       if (topTopicAssignments.contains(docID)) {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 93c00d80974c..6736e7d3db51 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -241,22 +241,22 @@ class MatricesSuite extends SparkFunSuite {
       dnMap.put((i, j), value)
     }
     assert(dnMap.size === 6)
-    assert(dnMap(0, 0) === 1.0)
-    assert(dnMap(1, 0) === 2.0)
-    assert(dnMap(2, 0) === 0.0)
-    assert(dnMap(0, 1) === 0.0)
-    assert(dnMap(1, 1) === 4.0)
-    assert(dnMap(2, 1) === 5.0)
+    assert(dnMap((0, 0)) === 1.0)
+    assert(dnMap((1, 0)) === 2.0)
+    assert(dnMap((2, 0)) === 0.0)
+    assert(dnMap((0, 1)) === 0.0)
+    assert(dnMap((1, 1)) === 4.0)
+    assert(dnMap((2, 1)) === 5.0)
 
     val spMap = MutableMap[(Int, Int), Double]()
     sp.foreachActive { (i, j, value) =>
       spMap.put((i, j), value)
     }
     assert(spMap.size === 4)
-    assert(spMap(0, 0) === 1.0)
-    assert(spMap(1, 0) === 2.0)
-    assert(spMap(1, 1) === 4.0)
-    assert(spMap(2, 1) === 5.0)
+    assert(spMap((0, 0)) === 1.0)
+    assert(spMap((1, 0)) === 2.0)
+    assert(spMap((1, 1)) === 4.0)
+    assert(spMap((2, 1)) === 5.0)
   }
 
   test("horzcat, vertcat, eye, speye") {
diff --git a/pom.xml b/pom.xml
index f124ba45007b..bea2798cc861 100644
--- a/pom.xml
+++ b/pom.xml
@@ -498,7 +498,7 @@
         <groupId>org.slf4j</groupId>
         <artifactId>jcl-over-slf4j</artifactId>
         <version>${slf4j.version}</version>
-        <!-- <scope>runtime</scope> --> <!-- more correct, but scalac 2.10.3 doesn't like it -->
+        <scope>runtime</scope>
       </dependency>
       <dependency>
         <groupId>log4j</groupId>
@@ -1859,9 +1859,9 @@
         <version>${antlr4.version}</version>
       </dependency>
       <dependency>
-        <groupId>${jline.groupid}</groupId>
+        <groupId>jline</groupId>
         <artifactId>jline</artifactId>
-        <version>${jline.version}</version>
+        <version>2.12.1</version>
       </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
@@ -1933,6 +1933,7 @@
                       -->
                       <exclude>org.jboss.netty</exclude>
                       <exclude>org.codehaus.groovy</exclude>
+                      <exclude>*:*_2.10</exclude>
                     </excludes>
                     <searchTransitive>true</searchTransitive>
                   </bannedDependencies>
@@ -1987,6 +1988,8 @@
               <arg>-unchecked</arg>
               <arg>-deprecation</arg>
               <arg>-feature</arg>
+              <arg>-explaintypes</arg>
+              <arg>-Yno-adapted-args</arg>
             </args>
             <jvmArgs>
               <jvmArg>-Xms1024m</jvmArg>
@@ -2585,44 +2588,6 @@
       </modules>
     </profile>
 
-    <profile>
-      <id>scala-2.10</id>
-      <activation>
-        <property><name>scala-2.10</name></property>
-      </activation>
-      <properties>
-        <scala.version>2.10.6</scala.version>
-        <scala.binary.version>2.10</scala.binary.version>
-        <jline.version>${scala.version}</jline.version>
-        <jline.groupid>org.scala-lang</jline.groupid>
-      </properties>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-enforcer-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>enforce-versions</id>
-                <goals>
-                  <goal>enforce</goal>
-                </goals>
-                <configuration>
-                  <rules>
-                    <bannedDependencies>
-                      <excludes combine.children="append">
-                        <exclude>*:*_2.11</exclude>
-                      </excludes>
-                    </bannedDependencies>
-                  </rules>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-
     <profile>
       <id>test-java-home</id>
       <activation>
@@ -2633,16 +2598,18 @@
       </properties>
     </profile>
 
+    <!-- Exists for backwards compatibility; profile doesn't do anything -->
     <profile>
       <id>scala-2.11</id>
-      <activation>
-        <property><name>!scala-2.10</name></property>
-      </activation>
+    </profile>
+
+    <!-- Draft of Scala 2.12 profile for later -->
+    <!--
+    <profile>
+      <id>scala-2.12</id>
       <properties>
-        <scala.version>2.11.8</scala.version>
-        <scala.binary.version>2.11</scala.binary.version>
-        <jline.version>2.12.1</jline.version>
-        <jline.groupid>jline</jline.groupid>
+        <scala.version>2.12.1</scala.version>
+        <scala.binary.version>2.12</scala.binary.version>
       </properties>
       <build>
         <plugins>
@@ -2659,7 +2626,7 @@
                   <rules>
                     <bannedDependencies>
                       <excludes combine.children="append">
-                        <exclude>*:*_2.10</exclude>
+                        <exclude>*:*_2.11</exclude>
                       </excludes>
                     </bannedDependencies>
                   </rules>
@@ -2670,10 +2637,11 @@
         </plugins>
       </build>
     </profile>
+    -->
 
     <!--
      This is a profile to enable the use of the ASF snapshot and staging repositories
-     during a build. It is useful when testing againt nightly or RC releases of dependencies.
+     during a build. It is useful when testing against nightly or RC releases of dependencies.
      It MUST NOT be used when building copies of Spark to use in production of for distribution,
      -->
     <profile>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 89b0c7a3ab7b..41f3a0451aa8 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -87,19 +87,11 @@ object SparkBuild extends PomBuild {
   val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
 
   override val profiles = {
-    val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
+    Properties.envOrNone("SBT_MAVEN_PROFILES") match {
       case None => Seq("sbt")
       case Some(v) =>
         v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
     }
-
-    if (System.getProperty("scala-2.10") == "") {
-      // To activate scala-2.10 profile, replace empty property value to non-empty value
-      // in the same way as Maven which handles -Dname as -Dname=true before executes build process.
-      // see: https://github.com/apache/maven/blob/maven-3.0.4/maven-embedder/src/main/java/org/apache/maven/cli/MavenCli.java#L1082
-      System.setProperty("scala-2.10", "true")
-    }
-    profiles
   }
 
   Properties.envOrNone("SBT_MAVEN_PROPERTIES") match {
@@ -234,9 +226,7 @@ object SparkBuild extends PomBuild {
     },
 
     javacJVMVersion := "1.8",
-    // SBT Scala 2.10 build still doesn't support Java 8, because scalac 2.10 doesn't, but,
-    // it also doesn't touch Java 8 code and it's OK to emit Java 7 bytecode in this case
-    scalacJVMVersion := (if (System.getProperty("scala-2.10") == "true") "1.7" else "1.8"),
+    scalacJVMVersion := "1.8",
 
     javacOptions in Compile ++= Seq(
       "-encoding", "UTF-8",
@@ -477,7 +467,6 @@ object OldDeps {
 
   def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
     name := "old-deps",
-    scalaVersion := "2.10.5",
     libraryDependencies := allPreviousArtifactKeys.value.flatten
   )
 }
@@ -756,13 +745,7 @@ object CopyDependencies {
 object TestSettings {
   import BuildCommons._
 
-  private val scalaBinaryVersion =
-    if (System.getProperty("scala-2.10") == "true") {
-      "2.10"
-    } else {
-      "2.11"
-    }
-
+  private val scalaBinaryVersion = "2.11"
   lazy val settings = Seq (
     // Fork new JVMs for tests and set Java options for those
     fork := true,
diff --git a/python/run-tests.py b/python/run-tests.py
index b2e50435bb19..afd3d29a0ff9 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -54,7 +54,8 @@ def print_red(text):
 LOGGER = logging.getLogger()
 
 # Find out where the assembly jars are located.
-for scala in ["2.11", "2.10"]:
+# Later, add back 2.12 to this list:
+for scala in ["2.11"]:
     build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
     if os.path.isdir(build_dir):
         SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
diff --git a/repl/pom.xml b/repl/pom.xml
index 6d133a3cfff7..51eb9b60dd54 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -32,8 +32,8 @@
 
   <properties>
     <sbt.project.name>repl</sbt.project.name>
-    <extra.source.dir>scala-2.10/src/main/scala</extra.source.dir>
-    <extra.testsource.dir>scala-2.10/src/test/scala</extra.testsource.dir>
+    <extra.source.dir>scala-2.11/src/main/scala</extra.source.dir>
+    <extra.testsource.dir>scala-2.11/src/test/scala</extra.testsource.dir>
   </properties>
 
   <dependencies>
@@ -71,7 +71,7 @@
       <version>${scala.version}</version>
     </dependency>
     <dependency>
-      <groupId>${jline.groupid}</groupId>
+      <groupId>jline</groupId>
       <artifactId>jline</artifactId>
     </dependency>
      <dependency>
@@ -170,23 +170,17 @@
       </plugin>
     </plugins>
   </build>
+  
+  <!--
   <profiles>
     <profile>
-      <id>scala-2.10</id>
-      <activation>
-        <property><name>scala-2.10</name></property>
-      </activation>
-    </profile>
-
-    <profile>
-      <id>scala-2.11</id>
-      <activation>
-        <property><name>!scala-2.10</name></property>
-      </activation>
+      <id>scala-2.12</id>
       <properties>
-        <extra.source.dir>scala-2.11/src/main/scala</extra.source.dir>
-        <extra.testsource.dir>scala-2.11/src/test/scala</extra.testsource.dir>
+        <extra.source.dir>scala-2.12/src/main/scala</extra.source.dir>
+        <extra.testsource.dir>scala-2.12/src/test/scala</extra.testsource.dir>
       </properties>
     </profile>
   </profiles>
+  -->
+
 </project>
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
deleted file mode 100644
index fba321be9188..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import org.apache.spark.internal.Logging
-
-object Main extends Logging {
-
-  initializeLogIfNecessary(true)
-  Signaling.cancelOnInterrupt()
-
-  private var _interp: SparkILoop = _
-
-  def interp = _interp
-
-  def interp_=(i: SparkILoop) { _interp = i }
-
-  def main(args: Array[String]) {
-    _interp = new SparkILoop
-    _interp.process(args)
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
deleted file mode 100644
index be9b79021d2a..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc.{CompilerCommand, Settings}
-
-import org.apache.spark.annotation.DeveloperApi
-
-/**
- * Command class enabling Spark-specific command line options (provided by
- * <i>org.apache.spark.repl.SparkRunnerSettings</i>).
- *
- * @example new SparkCommandLine(Nil).settings
- *
- * @param args The list of command line arguments
- * @param settings The underlying settings to associate with this set of
- *                 command-line options
- */
-@DeveloperApi
-class SparkCommandLine(args: List[String], override val settings: Settings)
-    extends CompilerCommand(args, settings) {
-  def this(args: List[String], error: String => Unit) {
-    this(args, new SparkRunnerSettings(error))
-  }
-
-  def this(args: List[String]) {
-    // scalastyle:off println
-    this(args, str => Console.println("Error: " + str))
-    // scalastyle:on println
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
deleted file mode 100644
index 2b5d56a89590..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkExprTyper.scala
+++ /dev/null
@@ -1,114 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.reflect.internal.util.BatchSourceFile
-import scala.tools.nsc.ast.parser.Tokens.EOF
-
-import org.apache.spark.internal.Logging
-
-private[repl] trait SparkExprTyper extends Logging {
-  val repl: SparkIMain
-
-  import repl._
-  import global.{ reporter => _, Import => _, _ }
-  import definitions._
-  import syntaxAnalyzer.{ UnitParser, UnitScanner, token2name }
-  import naming.freshInternalVarName
-
-  object codeParser extends { val global: repl.global.type = repl.global } with CodeHandlers[Tree] {
-    def applyRule[T](code: String, rule: UnitParser => T): T = {
-      reporter.reset()
-      val scanner = newUnitParser(code)
-      val result  = rule(scanner)
-
-      if (!reporter.hasErrors)
-        scanner.accept(EOF)
-
-      result
-    }
-
-    def defns(code: String) = stmts(code) collect { case x: DefTree => x }
-    def expr(code: String)  = applyRule(code, _.expr())
-    def stmts(code: String) = applyRule(code, _.templateStats())
-    def stmt(code: String)  = stmts(code).last  // guaranteed nonempty
-  }
-
-  /** Parse a line into a sequence of trees. Returns None if the input is incomplete. */
-  def parse(line: String): Option[List[Tree]] = debugging(s"""parse("$line")""")  {
-    var isIncomplete = false
-    reporter.withIncompleteHandler((_, _) => isIncomplete = true) {
-      val trees = codeParser.stmts(line)
-      if (reporter.hasErrors) {
-        Some(Nil)
-      } else if (isIncomplete) {
-        None
-      } else {
-        Some(trees)
-      }
-    }
-  }
-  // def parsesAsExpr(line: String) = {
-  //   import codeParser._
-  //   (opt expr line).isDefined
-  // }
-
-  def symbolOfLine(code: String): Symbol = {
-    def asExpr(): Symbol = {
-      val name  = freshInternalVarName()
-      // Typing it with a lazy val would give us the right type, but runs
-      // into compiler bugs with things like existentials, so we compile it
-      // behind a def and strip the NullaryMethodType which wraps the expr.
-      val line = "def " + name + " = {\n" + code + "\n}"
-
-      interpretSynthetic(line) match {
-        case IR.Success =>
-          val sym0 = symbolOfTerm(name)
-          // drop NullaryMethodType
-          val sym = sym0.cloneSymbol setInfo afterTyper(sym0.info.finalResultType)
-          if (sym.info.typeSymbol eq UnitClass) NoSymbol else sym
-        case _          => NoSymbol
-      }
-    }
-    def asDefn(): Symbol = {
-      val old = repl.definedSymbolList.toSet
-
-      interpretSynthetic(code) match {
-        case IR.Success =>
-          repl.definedSymbolList filterNot old match {
-            case Nil        => NoSymbol
-            case sym :: Nil => sym
-            case syms       => NoSymbol.newOverloaded(NoPrefix, syms)
-          }
-        case _ => NoSymbol
-      }
-    }
-    beQuietDuring(asExpr()) orElse beQuietDuring(asDefn())
-  }
-
-  private var typeOfExpressionDepth = 0
-  def typeOfExpression(expr: String, silent: Boolean = true): Type = {
-    if (typeOfExpressionDepth > 2) {
-      logDebug("Terminating typeOfExpression recursion for expression: " + expr)
-      return NoType
-    }
-    typeOfExpressionDepth += 1
-    // Don't presently have a good way to suppress undesirable success output
-    // while letting errors through, so it is first trying it silently: if there
-    // is an error, and errors are desired, then it re-evaluates non-silently
-    // to induce the error message.
-    try beSilentDuring(symbolOfLine(expr).tpe) match {
-      case NoType if !silent => symbolOfLine(expr).tpe // generate error
-      case tpe               => tpe
-    }
-    finally typeOfExpressionDepth -= 1
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
deleted file mode 100644
index 955be17a73b8..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkHelper.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package scala.tools.nsc
-
-import org.apache.spark.annotation.DeveloperApi
-
-// NOTE: Forced to be public (and in scala.tools.nsc package) to access the
-//       settings "explicitParentLoader" method
-
-/**
- * Provides exposure for the explicitParentLoader method on settings instances.
- */
-@DeveloperApi
-object SparkHelper {
-  /**
-   * Retrieves the explicit parent loader for the provided settings.
-   *
-   * @param settings The settings whose explicit parent loader to retrieve
-   *
-   * @return The Optional classloader representing the explicit parent loader
-   */
-  @DeveloperApi
-  def explicitParentLoader(settings: Settings) = settings.explicitParentLoader
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
deleted file mode 100644
index b7237a6ce822..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ /dev/null
@@ -1,1145 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Alexander Spoon
- */
-
-package org.apache.spark.repl
-
-
-import java.net.URL
-
-import scala.reflect.io.AbstractFile
-import scala.tools.nsc._
-import scala.tools.nsc.backend.JavaPlatform
-import scala.tools.nsc.interpreter._
-import scala.tools.nsc.interpreter.{Results => IR}
-import Predef.{println => _, _}
-import java.io.{BufferedReader, FileReader}
-import java.net.URI
-import java.util.concurrent.locks.ReentrantLock
-import scala.sys.process.Process
-import scala.tools.nsc.interpreter.session._
-import scala.util.Properties.{jdkHome, javaVersion}
-import scala.tools.util.{Javap}
-import scala.annotation.tailrec
-import scala.collection.mutable.ListBuffer
-import scala.concurrent.ops
-import scala.tools.nsc.util._
-import scala.tools.nsc.interpreter._
-import scala.tools.nsc.io.{File, Directory}
-import scala.reflect.NameTransformer._
-import scala.tools.nsc.util.ScalaClassLoader._
-import scala.tools.util._
-import scala.language.{implicitConversions, existentials, postfixOps}
-import scala.reflect.{ClassTag, classTag}
-import scala.tools.reflect.StdRuntimeTags._
-
-import java.lang.{Class => jClass}
-import scala.reflect.api.{Mirror, TypeCreator, Universe => ApiUniverse}
-
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.util.Utils
-
-/** The Scala interactive shell.  It provides a read-eval-print loop
- *  around the Interpreter class.
- *  After instantiation, clients should call the main() method.
- *
- *  If no in0 is specified, then input will come from the console, and
- *  the class will attempt to provide input editing feature such as
- *  input history.
- *
- *  @author Moez A. Abdel-Gawad
- *  @author  Lex Spoon
- *  @version 1.2
- */
-@DeveloperApi
-class SparkILoop(
-    private val in0: Option[BufferedReader],
-    protected val out: JPrintWriter,
-    val master: Option[String]
-) extends AnyRef with LoopCommands with SparkILoopInit with Logging {
-  def this(in0: BufferedReader, out: JPrintWriter, master: String) = this(Some(in0), out, Some(master))
-  def this(in0: BufferedReader, out: JPrintWriter) = this(Some(in0), out, None)
-  def this() = this(None, new JPrintWriter(Console.out, true), None)
-
-  private var in: InteractiveReader = _   // the input stream from which commands come
-
-  // NOTE: Exposed in package for testing
-  private[repl] var settings: Settings = _
-
-  private[repl] var intp: SparkIMain = _
-
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter = intp
-  @deprecated("Use `intp` instead.", "2.9.0") def interpreter_= (i: SparkIMain): Unit = intp = i
-
-  /** Having inherited the difficult "var-ness" of the repl instance,
-   *  I'm trying to work around it by moving operations into a class from
-   *  which it will appear a stable prefix.
-   */
-  private def onIntp[T](f: SparkIMain => T): T = f(intp)
-
-  class IMainOps[T <: SparkIMain](val intp: T) {
-    import intp._
-    import global._
-
-    def printAfterTyper(msg: => String) =
-      intp.reporter printMessage afterTyper(msg)
-
-    /** Strip NullaryMethodType artifacts. */
-    private def replInfo(sym: Symbol) = {
-      sym.info match {
-        case NullaryMethodType(restpe) if sym.isAccessor  => restpe
-        case info                                         => info
-      }
-    }
-    def echoTypeStructure(sym: Symbol) =
-      printAfterTyper("" + deconstruct.show(replInfo(sym)))
-
-    def echoTypeSignature(sym: Symbol, verbose: Boolean) = {
-      if (verbose) SparkILoop.this.echo("// Type signature")
-      printAfterTyper("" + replInfo(sym))
-
-      if (verbose) {
-        SparkILoop.this.echo("\n// Internal Type structure")
-        echoTypeStructure(sym)
-      }
-    }
-  }
-  implicit def stabilizeIMain(intp: SparkIMain) = new IMainOps[intp.type](intp)
-
-  /** TODO -
-   *  -n normalize
-   *  -l label with case class parameter names
-   *  -c complete - leave nothing out
-   */
-  private def typeCommandInternal(expr: String, verbose: Boolean): Result = {
-    onIntp { intp =>
-      val sym = intp.symbolOfLine(expr)
-      if (sym.exists) intp.echoTypeSignature(sym, verbose)
-      else ""
-    }
-  }
-
-  // NOTE: Must be public for visibility
-  @DeveloperApi
-  var sparkContext: SparkContext = _
-
-  override def echoCommandMessage(msg: String) {
-    intp.reporter printMessage msg
-  }
-
-  // def isAsync = !settings.Yreplsync.value
-  private[repl] def isAsync = false
-  // lazy val power = new Power(intp, new StdReplVals(this))(tagOfStdReplVals, classTag[StdReplVals])
-  private def history = in.history
-
-  /** The context class loader at the time this object was created */
-  protected val originalClassLoader = Utils.getContextOrSparkClassLoader
-
-  // classpath entries added via :cp
-  private var addedClasspath: String = ""
-
-  /** A reverse list of commands to replay if the user requests a :replay */
-  private var replayCommandStack: List[String] = Nil
-
-  /** A list of commands to replay if the user requests a :replay */
-  private def replayCommands = replayCommandStack.reverse
-
-  /** Record a command for replay should the user request a :replay */
-  private def addReplay(cmd: String) = replayCommandStack ::= cmd
-
-  private def savingReplayStack[T](body: => T): T = {
-    val saved = replayCommandStack
-    try body
-    finally replayCommandStack = saved
-  }
-  private def savingReader[T](body: => T): T = {
-    val saved = in
-    try body
-    finally in = saved
-  }
-
-
-  private def sparkCleanUp() {
-    echo("Stopping spark context.")
-    intp.beQuietDuring {
-      command("sc.stop()")
-    }
-  }
-  /** Close the interpreter and set the var to null. */
-  private def closeInterpreter() {
-    if (intp ne null) {
-      sparkCleanUp()
-      intp.close()
-      intp = null
-    }
-  }
-
-  class SparkILoopInterpreter extends SparkIMain(settings, out) {
-    outer =>
-
-    override private[repl] lazy val formatting = new Formatting {
-      def prompt = SparkILoop.this.prompt
-    }
-    override protected def parentClassLoader = SparkHelper.explicitParentLoader(settings).getOrElse(classOf[SparkILoop].getClassLoader)
-  }
-
-  /**
-   * Constructs a new interpreter.
-   */
-  protected def createInterpreter() {
-    require(settings != null)
-
-    if (addedClasspath != "") settings.classpath.append(addedClasspath)
-    val addedJars =
-      if (Utils.isWindows) {
-        // Strip any URI scheme prefix so we can add the correct path to the classpath
-        // e.g. file:/C:/my/path.jar -> C:/my/path.jar
-        getAddedJars().map { jar => new URI(jar).getPath.stripPrefix("/") }
-      } else {
-        // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20).
-        getAddedJars().map { jar => new URI(jar).getPath }
-      }
-    // work around for Scala bug
-    val totalClassPath = addedJars.foldLeft(
-      settings.classpath.value)((l, r) => ClassPath.join(l, r))
-    this.settings.classpath.value = totalClassPath
-
-    intp = new SparkILoopInterpreter
-  }
-
-  /** print a friendly help message */
-  private def helpCommand(line: String): Result = {
-    if (line == "") helpSummary()
-    else uniqueCommand(line) match {
-      case Some(lc) => echo("\n" + lc.longHelp)
-      case _        => ambiguousError(line)
-    }
-  }
-  private def helpSummary() = {
-    val usageWidth  = commands map (_.usageMsg.length) max
-    val formatStr   = "%-" + usageWidth + "s %s %s"
-
-    echo("All commands can be abbreviated, e.g. :he instead of :help.")
-    echo("Those marked with a * have more detailed help, e.g. :help imports.\n")
-
-    commands foreach { cmd =>
-      val star = if (cmd.hasLongHelp) "*" else " "
-      echo(formatStr.format(cmd.usageMsg, star, cmd.help))
-    }
-  }
-  private def ambiguousError(cmd: String): Result = {
-    matchingCommands(cmd) match {
-      case Nil  => echo(cmd + ": no such command.  Type :help for help.")
-      case xs   => echo(cmd + " is ambiguous: did you mean " + xs.map(":" + _.name).mkString(" or ") + "?")
-    }
-    Result(true, None)
-  }
-  private def matchingCommands(cmd: String) = commands filter (_.name startsWith cmd)
-  private def uniqueCommand(cmd: String): Option[LoopCommand] = {
-    // this lets us add commands willy-nilly and only requires enough command to disambiguate
-    matchingCommands(cmd) match {
-      case List(x)  => Some(x)
-      // exact match OK even if otherwise appears ambiguous
-      case xs       => xs find (_.name == cmd)
-    }
-  }
-  private var fallbackMode = false
-
-  private def toggleFallbackMode() {
-    val old = fallbackMode
-    fallbackMode = !old
-    System.setProperty("spark.repl.fallback", fallbackMode.toString)
-    echo(s"""
-      |Switched ${if (old) "off" else "on"} fallback mode without restarting.
-      |       If you have defined classes in the repl, it would
-      |be good to redefine them incase you plan to use them. If you still run
-      |into issues it would be good to restart the repl and turn on `:fallback`
-      |mode as first command.
-      """.stripMargin)
-  }
-
-  /** Show the history */
-  private lazy val historyCommand = new LoopCommand("history", "show the history (optional num is commands to show)") {
-    override def usage = "[num]"
-    def defaultLines = 20
-
-    def apply(line: String): Result = {
-      if (history eq NoHistory)
-        return "No history available."
-
-      val xs      = words(line)
-      val current = history.index
-      val count   = try xs.head.toInt catch { case _: Exception => defaultLines }
-      val lines   = history.asStrings takeRight count
-      val offset  = current - lines.size + 1
-
-      for ((line, index) <- lines.zipWithIndex)
-        echo("%3d  %s".format(index + offset, line))
-    }
-  }
-
-  // When you know you are most likely breaking into the middle
-  // of a line being typed.  This softens the blow.
-  private[repl] def echoAndRefresh(msg: String) = {
-    echo("\n" + msg)
-    in.redrawLine()
-  }
-  private[repl] def echo(msg: String) = {
-    out println msg
-    out.flush()
-  }
-  private def echoNoNL(msg: String) = {
-    out print msg
-    out.flush()
-  }
-
-  /** Search the history */
-  private def searchHistory(_cmdline: String) {
-    val cmdline = _cmdline.toLowerCase
-    val offset  = history.index - history.size + 1
-
-    for ((line, index) <- history.asStrings.zipWithIndex ; if line.toLowerCase contains cmdline)
-      echo("%d %s".format(index + offset, line))
-  }
-
-  private var currentPrompt = Properties.shellPromptString
-
-  /**
-   * Sets the prompt string used by the REPL.
-   *
-   * @param prompt The new prompt string
-   */
-  @DeveloperApi
-  def setPrompt(prompt: String) = currentPrompt = prompt
-
-  /**
-   * Represents the current prompt string used by the REPL.
-   *
-   * @return The current prompt string
-   */
-  @DeveloperApi
-  def prompt = currentPrompt
-
-  import LoopCommand.{ cmd, nullary }
-
-  /** Standard commands */
-  private lazy val standardCommands = List(
-    cmd("cp", "<path>", "add a jar or directory to the classpath", addClasspath),
-    cmd("help", "[command]", "print this summary or command-specific help", helpCommand),
-    historyCommand,
-    cmd("h?", "<string>", "search the history", searchHistory),
-    cmd("imports", "[name name ...]", "show import history, identifying sources of names", importsCommand),
-    cmd("implicits", "[-v]", "show the implicits in scope", implicitsCommand),
-    cmd("javap", "<path|class>", "disassemble a file or class name", javapCommand),
-    cmd("load", "<path>", "load and interpret a Scala file", loadCommand),
-    nullary("paste", "enter paste mode: all input up to ctrl-D compiled together", pasteCommand),
-//    nullary("power", "enable power user mode", powerCmd),
-    nullary("quit", "exit the repl", () => Result(false, None)),
-    nullary("replay", "reset execution and replay all previous commands", replay),
-    nullary("reset", "reset the repl to its initial state, forgetting all session entries", resetCommand),
-    shCommand,
-    nullary("silent", "disable/enable automatic printing of results", verbosity),
-    nullary("fallback", """
-                           |disable/enable advanced repl changes, these fix some issues but may introduce others.
-                           |This mode will be removed once these fixes stablize""".stripMargin, toggleFallbackMode),
-    cmd("type", "[-v] <expr>", "display the type of an expression without evaluating it", typeCommand),
-    nullary("warnings", "show the suppressed warnings from the most recent line which had any", warningsCommand)
-  )
-
-  /** Power user commands */
-  private lazy val powerCommands: List[LoopCommand] = List(
-    // cmd("phase", "<phase>", "set the implicit phase for power commands", phaseCommand)
-  )
-
-  // private def dumpCommand(): Result = {
-  //   echo("" + power)
-  //   history.asStrings takeRight 30 foreach echo
-  //   in.redrawLine()
-  // }
-  // private def valsCommand(): Result = power.valsDescription
-
-  private val typeTransforms = List(
-    "scala.collection.immutable." -> "immutable.",
-    "scala.collection.mutable."   -> "mutable.",
-    "scala.collection.generic."   -> "generic.",
-    "java.lang."                  -> "jl.",
-    "scala.runtime."              -> "runtime."
-  )
-
-  private def importsCommand(line: String): Result = {
-    val tokens    = words(line)
-    val handlers  = intp.languageWildcardHandlers ++ intp.importHandlers
-    val isVerbose = tokens contains "-v"
-
-    handlers.filterNot(_.importedSymbols.isEmpty).zipWithIndex foreach {
-      case (handler, idx) =>
-        val (types, terms) = handler.importedSymbols partition (_.name.isTypeName)
-        val imps           = handler.implicitSymbols
-        val found          = tokens filter (handler importsSymbolNamed _)
-        val typeMsg        = if (types.isEmpty) "" else types.size + " types"
-        val termMsg        = if (terms.isEmpty) "" else terms.size + " terms"
-        val implicitMsg    = if (imps.isEmpty) "" else imps.size + " are implicit"
-        val foundMsg       = if (found.isEmpty) "" else found.mkString(" // imports: ", ", ", "")
-        val statsMsg       = List(typeMsg, termMsg, implicitMsg) filterNot (_ == "") mkString ("(", ", ", ")")
-
-        intp.reporter.printMessage("%2d) %-30s %s%s".format(
-          idx + 1,
-          handler.importString,
-          statsMsg,
-          foundMsg
-        ))
-    }
-  }
-
-  private def implicitsCommand(line: String): Result = onIntp { intp =>
-    import intp._
-    import global._
-
-    def p(x: Any) = intp.reporter.printMessage("" + x)
-
-    // If an argument is given, only show a source with that
-    // in its name somewhere.
-    val args     = line split "\\s+"
-    val filtered = intp.implicitSymbolsBySource filter {
-      case (source, syms) =>
-        (args contains "-v") || {
-          if (line == "") (source.fullName.toString != "scala.Predef")
-          else (args exists (source.name.toString contains _))
-        }
-    }
-
-    if (filtered.isEmpty)
-      return "No implicits have been imported other than those in Predef."
-
-    filtered foreach {
-      case (source, syms) =>
-        p("/* " + syms.size + " implicit members imported from " + source.fullName + " */")
-
-        // This groups the members by where the symbol is defined
-        val byOwner = syms groupBy (_.owner)
-        val sortedOwners = byOwner.toList sortBy { case (owner, _) => afterTyper(source.info.baseClasses indexOf owner) }
-
-        sortedOwners foreach {
-          case (owner, members) =>
-            // Within each owner, we cluster results based on the final result type
-            // if there are more than a couple, and sort each cluster based on name.
-            // This is really just trying to make the 100 or so implicits imported
-            // by default into something readable.
-            val memberGroups: List[List[Symbol]] = {
-              val groups = members groupBy (_.tpe.finalResultType) toList
-              val (big, small) = groups partition (_._2.size > 3)
-              val xss = (
-                (big sortBy (_._1.toString) map (_._2)) :+
-                (small flatMap (_._2))
-              )
-
-              xss map (xs => xs sortBy (_.name.toString))
-            }
-
-            val ownerMessage = if (owner == source) " defined in " else " inherited from "
-            p("  /* " + members.size + ownerMessage + owner.fullName + " */")
-
-            memberGroups foreach { group =>
-              group foreach (s => p("  " + intp.symbolDefString(s)))
-              p("")
-            }
-        }
-        p("")
-    }
-  }
-
-  private def findToolsJar() = {
-    val jdkPath = Directory(jdkHome)
-    val jar     = jdkPath / "lib" / "tools.jar" toFile;
-
-    if (jar isFile)
-      Some(jar)
-    else if (jdkPath.isDirectory)
-      jdkPath.deepFiles find (_.name == "tools.jar")
-    else None
-  }
-  private def addToolsJarToLoader() = {
-    val cl = findToolsJar match {
-      case Some(tools) => ScalaClassLoader.fromURLs(Seq(tools.toURL), intp.classLoader)
-      case _           => intp.classLoader
-    }
-    if (Javap.isAvailable(cl)) {
-      logDebug(":javap available.")
-      cl
-    }
-    else {
-      logDebug(":javap unavailable: no tools.jar at " + jdkHome)
-      intp.classLoader
-    }
-  }
-
-  private def newJavap() = new JavapClass(addToolsJarToLoader(), new SparkIMain.ReplStrippingWriter(intp)) {
-    override def tryClass(path: String): Array[Byte] = {
-      val hd :: rest = path split '.' toList;
-      // If there are dots in the name, the first segment is the
-      // key to finding it.
-      if (rest.nonEmpty) {
-        intp optFlatName hd match {
-          case Some(flat) =>
-            val clazz = flat :: rest mkString NAME_JOIN_STRING
-            val bytes = super.tryClass(clazz)
-            if (bytes.nonEmpty) bytes
-            else super.tryClass(clazz + MODULE_SUFFIX_STRING)
-          case _          => super.tryClass(path)
-        }
-      }
-      else {
-        // Look for Foo first, then Foo$, but if Foo$ is given explicitly,
-        // we have to drop the $ to find object Foo, then tack it back onto
-        // the end of the flattened name.
-        def className  = intp flatName path
-        def moduleName = (intp flatName path.stripSuffix(MODULE_SUFFIX_STRING)) + MODULE_SUFFIX_STRING
-
-        val bytes = super.tryClass(className)
-        if (bytes.nonEmpty) bytes
-        else super.tryClass(moduleName)
-      }
-    }
-  }
-  // private lazy val javap = substituteAndLog[Javap]("javap", NoJavap)(newJavap())
-  private lazy val javap =
-    try newJavap()
-    catch { case _: Exception => null }
-
-  // Still todo: modules.
-  private def typeCommand(line0: String): Result = {
-    line0.trim match {
-      case ""                      => ":type [-v] <expression>"
-      case s if s startsWith "-v " => typeCommandInternal(s stripPrefix "-v " trim, true)
-      case s                       => typeCommandInternal(s, false)
-    }
-  }
-
-  private def warningsCommand(): Result = {
-    if (intp.lastWarnings.isEmpty)
-      "Can't find any cached warnings."
-    else
-      intp.lastWarnings foreach { case (pos, msg) => intp.reporter.warning(pos, msg) }
-  }
-
-  private def javapCommand(line: String): Result = {
-    if (javap == null)
-      ":javap unavailable, no tools.jar at %s.  Set JDK_HOME.".format(jdkHome)
-    else if (javaVersion startsWith "1.7")
-      ":javap not yet working with java 1.7"
-    else if (line == "")
-      ":javap [-lcsvp] [path1 path2 ...]"
-    else
-      javap(words(line)) foreach { res =>
-        if (res.isError) return "Failed: " + res.value
-        else res.show()
-      }
-  }
-
-  private def wrapCommand(line: String): Result = {
-    def failMsg = "Argument to :wrap must be the name of a method with signature [T](=> T): T"
-    onIntp { intp =>
-      import intp._
-      import global._
-
-      words(line) match {
-        case Nil            =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => "Current execution wrapper: " + s
-          }
-        case "clear" :: Nil =>
-          intp.executionWrapper match {
-            case ""   => "No execution wrapper is set."
-            case s    => intp.clearExecutionWrapper() ; "Cleared execution wrapper."
-          }
-        case wrapper :: Nil =>
-          intp.typeOfExpression(wrapper) match {
-            case PolyType(List(targ), MethodType(List(arg), restpe)) =>
-              intp setExecutionWrapper intp.pathToTerm(wrapper)
-              "Set wrapper to '" + wrapper + "'"
-            case tp =>
-              failMsg + "\nFound: <unknown>"
-          }
-        case _ => failMsg
-      }
-    }
-  }
-
-  private def pathToPhaseWrapper = intp.pathToTerm("$r") + ".phased.atCurrent"
-  // private def phaseCommand(name: String): Result = {
-  //   val phased: Phased = power.phased
-  //   import phased.NoPhaseName
-
-  //   if (name == "clear") {
-  //     phased.set(NoPhaseName)
-  //     intp.clearExecutionWrapper()
-  //     "Cleared active phase."
-  //   }
-  //   else if (name == "") phased.get match {
-  //     case NoPhaseName => "Usage: :phase <expr> (e.g. typer, erasure.next, erasure+3)"
-  //     case ph          => "Active phase is '%s'.  (To clear, :phase clear)".format(phased.get)
-  //   }
-  //   else {
-  //     val what = phased.parse(name)
-  //     if (what.isEmpty || !phased.set(what))
-  //       "'" + name + "' does not appear to represent a valid phase."
-  //     else {
-  //       intp.setExecutionWrapper(pathToPhaseWrapper)
-  //       val activeMessage =
-  //         if (what.toString.length == name.length) "" + what
-  //         else "%s (%s)".format(what, name)
-
-  //       "Active phase is now: " + activeMessage
-  //     }
-  //   }
-  // }
-
-  /**
-   * Provides a list of available commands.
-   *
-   * @return The list of commands
-   */
-  @DeveloperApi
-  def commands: List[LoopCommand] = standardCommands /*++ (
-    if (isReplPower) powerCommands else Nil
-  )*/
-
-  private val replayQuestionMessage =
-    """|That entry seems to have slain the compiler.  Shall I replay
-       |your session? I can re-run each line except the last one.
-       |[y/n]
-    """.trim.stripMargin
-
-  private def crashRecovery(ex: Throwable): Boolean = {
-    echo(ex.toString)
-    ex match {
-      case _: NoSuchMethodError | _: NoClassDefFoundError =>
-        echo("\nUnrecoverable error.")
-        throw ex
-      case _  =>
-        def fn(): Boolean =
-          try in.readYesOrNo(replayQuestionMessage, { echo("\nYou must enter y or n.") ; fn() })
-          catch { case _: RuntimeException => false }
-
-        if (fn()) replay()
-        else echo("\nAbandoning crashed session.")
-    }
-    true
-  }
-
-  /** The main read-eval-print loop for the repl.  It calls
-   *  command() for each line of input, and stops when
-   *  command() returns false.
-   */
-  private def loop() {
-    def readOneLine() = {
-      out.flush()
-      in readLine prompt
-    }
-    // return false if repl should exit
-    def processLine(line: String): Boolean = {
-      if (isAsync) {
-        if (!awaitInitialized()) return false
-        runThunks()
-      }
-      if (line eq null) false               // assume null means EOF
-      else command(line) match {
-        case Result(false, _)           => false
-        case Result(_, Some(finalLine)) => addReplay(finalLine) ; true
-        case _                          => true
-      }
-    }
-    def innerLoop() {
-      val shouldContinue = try {
-        processLine(readOneLine())
-      } catch {case t: Throwable => crashRecovery(t)}
-      if (shouldContinue)
-        innerLoop()
-    }
-    innerLoop()
-  }
-
-  /** interpret all lines from a specified file */
-  private def interpretAllFrom(file: File) {
-    savingReader {
-      savingReplayStack {
-        file applyReader { reader =>
-          in = SimpleReader(reader, out, false)
-          echo("Loading " + file + "...")
-          loop()
-        }
-      }
-    }
-  }
-
-  /** create a new interpreter and replay the given commands */
-  private def replay() {
-    reset()
-    if (replayCommandStack.isEmpty)
-      echo("Nothing to replay.")
-    else for (cmd <- replayCommands) {
-      echo("Replaying: " + cmd)  // flush because maybe cmd will have its own output
-      command(cmd)
-      echo("")
-    }
-  }
-  private def resetCommand() {
-    echo("Resetting repl state.")
-    if (replayCommandStack.nonEmpty) {
-      echo("Forgetting this session history:\n")
-      replayCommands foreach echo
-      echo("")
-      replayCommandStack = Nil
-    }
-    if (intp.namedDefinedTerms.nonEmpty)
-      echo("Forgetting all expression results and named terms: " + intp.namedDefinedTerms.mkString(", "))
-    if (intp.definedTypes.nonEmpty)
-      echo("Forgetting defined types: " + intp.definedTypes.mkString(", "))
-
-    reset()
-  }
-
-  private def reset() {
-    intp.reset()
-    // unleashAndSetPhase()
-  }
-
-  /** fork a shell and run a command */
-  private lazy val shCommand = new LoopCommand("sh", "run a shell command (result is implicitly => List[String])") {
-    override def usage = "<command line>"
-    def apply(line: String): Result = line match {
-      case ""   => showUsage()
-      case _    =>
-        val toRun = classOf[ProcessResult].getName + "(" + string2codeQuoted(line) + ")"
-        intp interpret toRun
-        ()
-    }
-  }
-
-  private def withFile(filename: String)(action: File => Unit) {
-    val f = File(filename)
-
-    if (f.exists) action(f)
-    else echo("That file does not exist")
-  }
-
-  private def loadCommand(arg: String) = {
-    var shouldReplay: Option[String] = None
-    withFile(arg)(f => {
-      interpretAllFrom(f)
-      shouldReplay = Some(":load " + arg)
-    })
-    Result(true, shouldReplay)
-  }
-
-  private def addAllClasspath(args: Seq[String]): Unit = {
-    var added = false
-    var totalClasspath = ""
-    for (arg <- args) {
-      val f = File(arg).normalize
-      if (f.exists) {
-        added = true
-        addedClasspath = ClassPath.join(addedClasspath, f.path)
-        totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath)
-        intp.addUrlsToClassPath(f.toURI.toURL)
-        sparkContext.addJar(f.toURI.toURL.getPath)
-      }
-    }
-  }
-
-  private def addClasspath(arg: String): Unit = {
-    val f = File(arg).normalize
-    if (f.exists) {
-      addedClasspath = ClassPath.join(addedClasspath, f.path)
-      intp.addUrlsToClassPath(f.toURI.toURL)
-      sparkContext.addJar(f.toURI.toURL.getPath)
-      echo("Added '%s'.  Your new classpath is:\n\"%s\"".format(f.path, intp.global.classPath.asClasspathString))
-    }
-    else echo("The path '" + f + "' doesn't seem to exist.")
-  }
-
-
-  private def powerCmd(): Result = {
-    if (isReplPower) "Already in power mode."
-    else enablePowerMode(false)
-  }
-
-  private[repl] def enablePowerMode(isDuringInit: Boolean) = {
-    // replProps.power setValue true
-    // unleashAndSetPhase()
-    // asyncEcho(isDuringInit, power.banner)
-  }
-  // private def unleashAndSetPhase() {
-//     if (isReplPower) {
-// //      power.unleash()
-//       // Set the phase to "typer"
-//       intp beSilentDuring phaseCommand("typer")
-//     }
-//   }
-
-  private def asyncEcho(async: Boolean, msg: => String) {
-    if (async) asyncMessage(msg)
-    else echo(msg)
-  }
-
-  private def verbosity() = {
-    // val old = intp.printResults
-    // intp.printResults = !old
-    // echo("Switched " + (if (old) "off" else "on") + " result printing.")
-  }
-
-  /**
-   * Run one command submitted by the user.  Two values are returned:
-   * (1) whether to keep running, (2) the line to record for replay,
-   * if any.
-   */
-  private[repl] def command(line: String): Result = {
-    if (line startsWith ":") {
-      val cmd = line.tail takeWhile (x => !x.isWhitespace)
-      uniqueCommand(cmd) match {
-        case Some(lc) => lc(line.tail stripPrefix cmd dropWhile (_.isWhitespace))
-        case _        => ambiguousError(cmd)
-      }
-    }
-    else if (intp.global == null) Result(false, None)  // Notice failure to create compiler
-    else Result(true, interpretStartingWith(line))
-  }
-
-  private def readWhile(cond: String => Boolean) = {
-    Iterator continually in.readLine("") takeWhile (x => x != null && cond(x))
-  }
-
-  private def pasteCommand(): Result = {
-    echo("// Entering paste mode (ctrl-D to finish)\n")
-    val code = readWhile(_ => true) mkString "\n"
-    echo("\n// Exiting paste mode, now interpreting.\n")
-    intp interpret code
-    ()
-  }
-
-  private object paste extends Pasted {
-    val ContinueString = "     | "
-    val PromptString   = "scala> "
-
-    def interpret(line: String): Unit = {
-      echo(line.trim)
-      intp interpret line
-      echo("")
-    }
-
-    def transcript(start: String) = {
-      echo("\n// Detected repl transcript paste: ctrl-D to finish.\n")
-      apply(Iterator(start) ++ readWhile(_.trim != PromptString.trim))
-    }
-  }
-  import paste.{ ContinueString, PromptString }
-
-  /**
-   * Interpret expressions starting with the first line.
-   * Read lines until a complete compilation unit is available
-   * or until a syntax error has been seen.  If a full unit is
-   * read, go ahead and interpret it.  Return the full string
-   * to be recorded for replay, if any.
-   */
-  private def interpretStartingWith(code: String): Option[String] = {
-    // signal completion non-completion input has been received
-    in.completion.resetVerbosity()
-
-    def reallyInterpret = {
-      val reallyResult = intp.interpret(code)
-      (reallyResult, reallyResult match {
-        case IR.Error       => None
-        case IR.Success     => Some(code)
-        case IR.Incomplete  =>
-          if (in.interactive && code.endsWith("\n\n")) {
-            echo("You typed two blank lines.  Starting a new command.")
-            None
-          }
-          else in.readLine(ContinueString) match {
-            case null =>
-              // we know compilation is going to fail since we're at EOF and the
-              // parser thinks the input is still incomplete, but since this is
-              // a file being read non-interactively we want to fail.  So we send
-              // it straight to the compiler for the nice error message.
-              intp.compileString(code)
-              None
-
-            case line => interpretStartingWith(code + "\n" + line)
-          }
-      })
-    }
-
-    /** Here we place ourselves between the user and the interpreter and examine
-     *  the input they are ostensibly submitting.  We intervene in several cases:
-     *
-     *  1) If the line starts with "scala> " it is assumed to be an interpreter paste.
-     *  2) If the line starts with "." (but not ".." or "./") it is treated as an invocation
-     *     on the previous result.
-     *  3) If the Completion object's execute returns Some(_), we inject that value
-     *     and avoid the interpreter, as it's likely not valid scala code.
-     */
-    if (code == "") None
-    else if (!paste.running && code.trim.startsWith(PromptString)) {
-      paste.transcript(code)
-      None
-    }
-    else if (Completion.looksLikeInvocation(code) && intp.mostRecentVar != "") {
-      interpretStartingWith(intp.mostRecentVar + code)
-    }
-    else if (code.trim startsWith "//") {
-      // line comment, do nothing
-      None
-    }
-    else
-      reallyInterpret._2
-  }
-
-  // runs :load `file` on any files passed via -i
-  private def loadFiles(settings: Settings) = settings match {
-    case settings: SparkRunnerSettings =>
-      for (filename <- settings.loadfiles.value) {
-        val cmd = ":load " + filename
-        command(cmd)
-        addReplay(cmd)
-        echo("")
-      }
-    case _ =>
-  }
-
-  /** Tries to create a JLineReader, falling back to SimpleReader:
-   *  unless settings or properties are such that it should start
-   *  with SimpleReader.
-   */
-  private def chooseReader(settings: Settings): InteractiveReader = {
-    if (settings.Xnojline.value || Properties.isEmacsShell)
-      SimpleReader()
-    else try new SparkJLineReader(
-      if (settings.noCompletion.value) NoCompletion
-      else new SparkJLineCompletion(intp)
-    )
-    catch {
-      case ex @ (_: Exception | _: NoClassDefFoundError) =>
-        echo("Failed to created SparkJLineReader: " + ex + "\nFalling back to SimpleReader.")
-        SimpleReader()
-    }
-  }
-
-  private val u: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
-  private val m = u.runtimeMirror(Utils.getSparkClassLoader)
-  private def tagOfStaticClass[T: ClassTag]: u.TypeTag[T] =
-    u.TypeTag[T](
-      m,
-      new TypeCreator {
-        def apply[U <: ApiUniverse with Singleton](m: Mirror[U]): U # Type =
-          m.staticClass(classTag[T].runtimeClass.getName).toTypeConstructor.asInstanceOf[U # Type]
-      })
-
-  private def process(settings: Settings): Boolean = savingContextLoader {
-    this.settings = settings
-    createInterpreter()
-
-    // sets in to some kind of reader depending on environmental cues
-    in = in0 match {
-      case Some(reader) => SimpleReader(reader, out, true)
-      case None         =>
-        // some post-initialization
-        chooseReader(settings) match {
-          case x: SparkJLineReader => addThunk(x.consoleReader.postInit) ; x
-          case x                   => x
-        }
-    }
-    lazy val tagOfSparkIMain = tagOfStaticClass[org.apache.spark.repl.SparkIMain]
-    // Bind intp somewhere out of the regular namespace where
-    // we can get at it in generated code.
-    addThunk(intp.quietBind(NamedParam[SparkIMain]("$intp", intp)(tagOfSparkIMain, classTag[SparkIMain])))
-    addThunk({
-      import scala.tools.nsc.io._
-      import Properties.userHome
-      import scala.compat.Platform.EOL
-      val autorun = replProps.replAutorunCode.option flatMap (f => io.File(f).safeSlurp())
-      if (autorun.isDefined) intp.quietRun(autorun.get)
-    })
-
-    addThunk(printWelcome())
-    addThunk(initializeSpark())
-
-    // it is broken on startup; go ahead and exit
-    if (intp.reporter.hasErrors)
-      return false
-
-    // This is about the illusion of snappiness.  We call initialize()
-    // which spins off a separate thread, then print the prompt and try
-    // our best to look ready.  The interlocking lazy vals tend to
-    // inter-deadlock, so we break the cycle with a single asynchronous
-    // message to an rpcEndpoint.
-    if (isAsync) {
-      intp initialize initializedCallback()
-      createAsyncListener() // listens for signal to run postInitialization
-    }
-    else {
-      intp.initializeSynchronous()
-      postInitialization()
-    }
-    // printWelcome()
-
-    loadFiles(settings)
-
-    try loop()
-    catch AbstractOrMissingHandler()
-    finally closeInterpreter()
-
-    true
-  }
-
-  // NOTE: Must be public for visibility
-  @DeveloperApi
-  def createSparkSession(): SparkSession = {
-    val execUri = System.getenv("SPARK_EXECUTOR_URI")
-    val jars = getAddedJars()
-    val conf = new SparkConf()
-      .setMaster(getMaster())
-      .setJars(jars)
-      .setIfMissing("spark.app.name", "Spark shell")
-      // SparkContext will detect this configuration and register it with the RpcEnv's
-      // file server, setting spark.repl.class.uri to the actual URI for executors to
-      // use. This is sort of ugly but since executors are started as part of SparkContext
-      // initialization in certain cases, there's an initialization order issue that prevents
-      // this from being set after SparkContext is instantiated.
-      .set("spark.repl.class.outputDir", intp.outputDir.getAbsolutePath())
-    if (execUri != null) {
-      conf.set("spark.executor.uri", execUri)
-    }
-
-    val builder = SparkSession.builder.config(conf)
-    val sparkSession = if (SparkSession.hiveClassesArePresent) {
-      logInfo("Creating Spark session with Hive support")
-      builder.enableHiveSupport().getOrCreate()
-    } else {
-      logInfo("Creating Spark session")
-      builder.getOrCreate()
-    }
-    sparkContext = sparkSession.sparkContext
-    sparkSession
-  }
-
-  private def getMaster(): String = {
-    val master = this.master match {
-      case Some(m) => m
-      case None =>
-        val envMaster = sys.env.get("MASTER")
-        val propMaster = sys.props.get("spark.master")
-        propMaster.orElse(envMaster).getOrElse("local[*]")
-    }
-    master
-  }
-
-  /** process command-line arguments and do as they request */
-  def process(args: Array[String]): Boolean = {
-    val command = new SparkCommandLine(args.toList, msg => echo(msg))
-    def neededHelp(): String =
-      (if (command.settings.help.value) command.usageMsg + "\n" else "") +
-      (if (command.settings.Xhelp.value) command.xusageMsg + "\n" else "")
-
-    // if they asked for no help and command is valid, we call the real main
-    neededHelp() match {
-      case ""     => command.ok && process(command.settings)
-      case help   => echoNoNL(help) ; true
-    }
-  }
-
-  @deprecated("Use `process` instead", "2.9.0")
-  private def main(settings: Settings): Unit = process(settings)
-
-  @DeveloperApi
-  def getAddedJars(): Array[String] = {
-    val conf = new SparkConf().setMaster(getMaster())
-    val envJars = sys.env.get("ADD_JARS")
-    if (envJars.isDefined) {
-      logWarning("ADD_JARS environment variable is deprecated, use --jar spark submit argument instead")
-    }
-    val jars = {
-      val userJars = Utils.getUserJars(conf, isShell = true)
-      if (userJars.isEmpty) {
-        envJars.getOrElse("")
-      } else {
-        userJars.mkString(",")
-      }
-    }
-    Utils.resolveURIs(jars).split(",").filter(_.nonEmpty)
-  }
-
-}
-
-object SparkILoop extends Logging {
-  implicit def loopToInterpreter(repl: SparkILoop): SparkIMain = repl.intp
-  private def echo(msg: String) = Console println msg
-
-  // Designed primarily for use by test code: take a String with a
-  // bunch of code, and prints out a transcript of what it would look
-  // like if you'd just typed it into the repl.
-  private[repl] def runForTranscript(code: String, settings: Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val output = new JPrintWriter(new OutputStreamWriter(ostream), true) {
-          override def write(str: String) = {
-            // completely skip continuation lines
-            if (str forall (ch => ch.isWhitespace || ch == '|')) ()
-            // print a newline on empty scala prompts
-            else if ((str contains '\n') && (str.trim == "scala> ")) super.write("\n")
-            else super.write(str)
-          }
-        }
-        val input = new BufferedReader(new StringReader(code)) {
-          override def readLine(): String = {
-            val s = super.readLine()
-            // helping out by printing the line being interpreted.
-            if (s != null)
-              // scalastyle:off println
-              output.println(s)
-              // scalastyle:on println
-            s
-          }
-        }
-        val repl = new SparkILoop(input, output)
-
-        if (settings.classpath.isDefault)
-          settings.classpath.value = sys.props("java.class.path")
-
-        repl.getAddedJars().map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_))
-
-        repl process settings
-      }
-    }
-  }
-
-  /** Creates an interpreter loop with default settings and feeds
-   *  the given code to it as input.
-   */
-  private[repl] def run(code: String, sets: Settings = new Settings): String = {
-    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }
-
-    stringFromStream { ostream =>
-      Console.withOut(ostream) {
-        val input    = new BufferedReader(new StringReader(code))
-        val output   = new JPrintWriter(new OutputStreamWriter(ostream), true)
-        val repl     = new ILoop(input, output)
-
-        if (sets.classpath.isDefault)
-          sets.classpath.value = sys.props("java.class.path")
-
-        repl process sets
-      }
-    }
-  }
-  private[repl] def run(lines: List[String]): String = run(lines map (_ + "\n") mkString)
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
deleted file mode 100644
index 5f0d92bccd80..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala
+++ /dev/null
@@ -1,168 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.tools.nsc.util.stackTraceString
-
-import org.apache.spark.SPARK_VERSION
-
-/**
- *  Machinery for the asynchronous initialization of the repl.
- */
-private[repl] trait SparkILoopInit {
-  self: SparkILoop =>
-
-  /** Print a welcome message */
-  def printWelcome() {
-    echo("""Welcome to
-      ____              __
-     / __/__  ___ _____/ /__
-    _\ \/ _ \/ _ `/ __/  '_/
-   /___/ .__/\_,_/_/ /_/\_\   version %s
-      /_/
-""".format(SPARK_VERSION))
-    import Properties._
-    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
-      versionString, javaVmName, javaVersion)
-    echo(welcomeMsg)
-    echo("Type in expressions to have them evaluated.")
-    echo("Type :help for more information.")
-   }
-
-  protected def asyncMessage(msg: String) {
-    if (isReplInfo || isReplPower)
-      echoAndRefresh(msg)
-  }
-
-  private val initLock = new java.util.concurrent.locks.ReentrantLock()
-  private val initCompilerCondition = initLock.newCondition() // signal the compiler is initialized
-  private val initLoopCondition = initLock.newCondition()     // signal the whole repl is initialized
-  private val initStart = System.nanoTime
-
-  private def withLock[T](body: => T): T = {
-    initLock.lock()
-    try body
-    finally initLock.unlock()
-  }
-  // a condition used to ensure serial access to the compiler.
-  @volatile private var initIsComplete = false
-  @volatile private var initError: String = null
-  private def elapsed() = "%.3f".format((System.nanoTime - initStart).toDouble / 1000000000L)
-
-  // the method to be called when the interpreter is initialized.
-  // Very important this method does nothing synchronous (i.e. do
-  // not try to use the interpreter) because until it returns, the
-  // repl's lazy val `global` is still locked.
-  protected def initializedCallback() = withLock(initCompilerCondition.signal())
-
-  // Spins off a thread which awaits a single message once the interpreter
-  // has been initialized.
-  protected def createAsyncListener() = {
-    io.spawn {
-      withLock(initCompilerCondition.await())
-      asyncMessage("[info] compiler init time: " + elapsed() + " s.")
-      postInitialization()
-    }
-  }
-
-  // called from main repl loop
-  protected def awaitInitialized(): Boolean = {
-    if (!initIsComplete)
-      withLock { while (!initIsComplete) initLoopCondition.await() }
-    if (initError != null) {
-      // scalastyle:off println
-      println("""
-        |Failed to initialize the REPL due to an unexpected error.
-        |This is a bug, please, report it along with the error diagnostics printed below.
-        |%s.""".stripMargin.format(initError)
-      )
-      // scalastyle:on println
-      false
-    } else true
-  }
-  // private def warningsThunks = List(
-  //   () => intp.bind("lastWarnings", "" + typeTag[List[(Position, String)]], intp.lastWarnings _),
-  // )
-
-  protected def postInitThunks = List[Option[() => Unit]](
-    Some(intp.setContextClassLoader _),
-    if (isReplPower) Some(() => enablePowerMode(true)) else None
-  ).flatten
-  // ++ (
-  //   warningsThunks
-  // )
-  // called once after init condition is signalled
-  protected def postInitialization() {
-    try {
-      postInitThunks foreach (f => addThunk(f()))
-      runThunks()
-    } catch {
-      case ex: Throwable =>
-        initError = stackTraceString(ex)
-        throw ex
-    } finally {
-      initIsComplete = true
-
-      if (isAsync) {
-        asyncMessage("[info] total init time: " + elapsed() + " s.")
-        withLock(initLoopCondition.signal())
-      }
-    }
-  }
-
-  def initializeSpark() {
-    intp.beQuietDuring {
-      command("""
-        @transient val spark = org.apache.spark.repl.Main.interp.createSparkSession()
-        @transient val sc = {
-          val _sc = spark.sparkContext
-          if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) {
-            val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null)
-            if (proxyUrl != null) {
-              println(s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}")
-            } else {
-              println(s"Spark Context Web UI is available at Spark Master Public URL")
-            }
-          } else {
-            _sc.uiWebUrl.foreach {
-              webUrl => println(s"Spark context Web UI available at ${webUrl}")
-            }
-          }
-          println("Spark context available as 'sc' " +
-            s"(master = ${_sc.master}, app id = ${_sc.applicationId}).")
-          println("Spark session available as 'spark'.")
-          _sc
-        }
-        """)
-      command("import org.apache.spark.SparkContext._")
-      command("import spark.implicits._")
-      command("import spark.sql")
-      command("import org.apache.spark.sql.functions._")
-    }
-  }
-
-  // code to be executed only after the interpreter is initialized
-  // and the lazy val `global` can be accessed without risk of deadlock.
-  private var pendingThunks: List[() => Unit] = Nil
-  protected def addThunk(body: => Unit) = synchronized {
-    pendingThunks :+= (() => body)
-  }
-  protected def runThunks(): Unit = synchronized {
-    if (pendingThunks.nonEmpty)
-      logDebug("Clearing " + pendingThunks.size + " thunks.")
-
-    while (pendingThunks.nonEmpty) {
-      val thunk = pendingThunks.head
-      pendingThunks = pendingThunks.tail
-      thunk()
-    }
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
deleted file mode 100644
index 74a04d5a42bb..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ /dev/null
@@ -1,1808 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Martin Odersky
- */
-
-package org.apache.spark.repl
-
-import java.io.File
-
-import scala.tools.nsc._
-import scala.tools.nsc.backend.JavaPlatform
-import scala.tools.nsc.interpreter._
-
-import Predef.{ println => _, _ }
-import scala.tools.nsc.util.{MergedClassPath, stringFromWriter, ScalaClassLoader, stackTraceString}
-import scala.reflect.internal.util._
-import java.net.URL
-import scala.sys.BooleanProp
-import io.{AbstractFile, PlainFile, VirtualDirectory}
-
-import reporters._
-import symtab.Flags
-import scala.reflect.internal.Names
-import scala.tools.util.PathResolver
-import ScalaClassLoader.URLClassLoader
-import scala.tools.nsc.util.Exceptional.unwrap
-import scala.collection.{ mutable, immutable }
-import scala.util.control.Exception.{ ultimately }
-import SparkIMain._
-import java.util.concurrent.Future
-import typechecker.Analyzer
-import scala.language.implicitConversions
-import scala.reflect.runtime.{ universe => ru }
-import scala.reflect.{ ClassTag, classTag }
-import scala.tools.reflect.StdRuntimeTags._
-import scala.util.control.ControlThrowable
-
-import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
-import org.apache.spark.annotation.DeveloperApi
-
-// /** directory to save .class files to */
-// private class ReplVirtualDirectory(out: JPrintWriter) extends VirtualDirectory("((memory))", None) {
-//   private def pp(root: AbstractFile, indentLevel: Int) {
-//     val spaces = "    " * indentLevel
-//     out.println(spaces + root.name)
-//     if (root.isDirectory)
-//       root.toList sortBy (_.name) foreach (x => pp(x, indentLevel + 1))
-//   }
-//   // print the contents hierarchically
-//   def show() = pp(this, 0)
-// }
-
-  /** An interpreter for Scala code.
-   *
-   *  The main public entry points are compile(), interpret(), and bind().
-   *  The compile() method loads a complete Scala file.  The interpret() method
-   *  executes one line of Scala code at the request of the user.  The bind()
-   *  method binds an object to a variable that can then be used by later
-   *  interpreted code.
-   *
-   *  The overall approach is based on compiling the requested code and then
-   *  using a Java classloader and Java reflection to run the code
-   *  and access its results.
-   *
-   *  In more detail, a single compiler instance is used
-   *  to accumulate all successfully compiled or interpreted Scala code.  To
-   *  "interpret" a line of code, the compiler generates a fresh object that
-   *  includes the line of code and which has public member(s) to export
-   *  all variables defined by that code.  To extract the result of an
-   *  interpreted line to show the user, a second "result object" is created
-   *  which imports the variables exported by the above object and then
-   *  exports members called "$eval" and "$print". To accommodate user expressions
-   *  that read from variables or methods defined in previous statements, "import"
-   *  statements are used.
-   *
-   *  This interpreter shares the strengths and weaknesses of using the
-   *  full compiler-to-Java.  The main strength is that interpreted code
-   *  behaves exactly as does compiled code, including running at full speed.
-   *  The main weakness is that redefining classes and methods is not handled
-   *  properly, because rebinding at the Java level is technically difficult.
-   *
-   *  @author Moez A. Abdel-Gawad
-   *  @author Lex Spoon
-   */
-  @DeveloperApi
-  class SparkIMain(
-      initialSettings: Settings,
-      val out: JPrintWriter,
-      propagateExceptions: Boolean = false)
-    extends SparkImports with Logging { imain =>
-
-    private val conf = new SparkConf()
-
-    private val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
-    /** Local directory to save .class files too */
-    private[repl] val outputDir = {
-      val rootDir = conf.getOption("spark.repl.classdir").getOrElse(Utils.getLocalDir(conf))
-      Utils.createTempDir(root = rootDir, namePrefix = "repl")
-    }
-    if (SPARK_DEBUG_REPL) {
-      echo("Output directory: " + outputDir)
-    }
-
-    /**
-     * Returns the path to the output directory containing all generated
-     * class files that will be served by the REPL class server.
-     */
-    @DeveloperApi
-    lazy val getClassOutputDirectory = outputDir
-
-    private val virtualDirectory                              = new PlainFile(outputDir) // "directory" for classfiles
-    /** Jetty server that will serve our classes to worker nodes */
-    private var currentSettings: Settings             = initialSettings
-    private var printResults                                  = true      // whether to print result lines
-    private var totalSilence                                  = false     // whether to print anything
-    private var _initializeComplete                   = false     // compiler is initialized
-    private var _isInitialized: Future[Boolean]       = null      // set up initialization future
-    private var bindExceptions                        = true      // whether to bind the lastException variable
-    private var _executionWrapper                     = ""        // code to be wrapped around all lines
-
-    /** We're going to go to some trouble to initialize the compiler asynchronously.
-     *  It's critical that nothing call into it until it's been initialized or we will
-     *  run into unrecoverable issues, but the perceived repl startup time goes
-     *  through the roof if we wait for it.  So we initialize it with a future and
-     *  use a lazy val to ensure that any attempt to use the compiler object waits
-     *  on the future.
-     */
-    private var _classLoader: AbstractFileClassLoader = null                              // active classloader
-    private val _compiler: Global                     = newCompiler(settings, reporter)   // our private compiler
-
-    private trait ExposeAddUrl extends URLClassLoader { def addNewUrl(url: URL) = this.addURL(url) }
-    private var _runtimeClassLoader: URLClassLoader with ExposeAddUrl = null              // wrapper exposing addURL
-
-    private val nextReqId = {
-      var counter = 0
-      () => { counter += 1 ; counter }
-    }
-
-    private def compilerClasspath: Seq[URL] = (
-      if (isInitializeComplete) global.classPath.asURLs
-      else new PathResolver(settings).result.asURLs  // the compiler's classpath
-      )
-    // NOTE: Exposed to repl package since accessed indirectly from SparkIMain
-    private[repl] def settings = currentSettings
-    private def mostRecentLine = prevRequestList match {
-      case Nil      => ""
-      case req :: _ => req.originalLine
-    }
-    // Run the code body with the given boolean settings flipped to true.
-    private def withoutWarnings[T](body: => T): T = beQuietDuring {
-      val saved = settings.nowarn.value
-      if (!saved)
-        settings.nowarn.value = true
-
-      try body
-      finally if (!saved) settings.nowarn.value = false
-    }
-
-    /** construct an interpreter that reports to Console */
-    def this(settings: Settings) = this(settings, new NewLinePrintWriter(new ConsoleWriter, true))
-    def this() = this(new Settings())
-
-    private lazy val repllog: Logger = new Logger {
-      val out: JPrintWriter = imain.out
-      val isInfo: Boolean  = BooleanProp keyExists "scala.repl.info"
-      val isDebug: Boolean = BooleanProp keyExists "scala.repl.debug"
-      val isTrace: Boolean = BooleanProp keyExists "scala.repl.trace"
-    }
-    private[repl] lazy val formatting: Formatting = new Formatting {
-      val prompt = Properties.shellPromptString
-    }
-
-    // NOTE: Exposed to repl package since used by SparkExprTyper and SparkILoop
-    private[repl] lazy val reporter: ConsoleReporter = new SparkIMain.ReplReporter(this)
-
-    /**
-     * Determines if errors were reported (typically during compilation).
-     *
-     * @note This is not for runtime errors
-     *
-     * @return True if had errors, otherwise false
-     */
-    @DeveloperApi
-    def isReportingErrors = reporter.hasErrors
-
-    import formatting._
-    import reporter.{ printMessage, withoutTruncating }
-
-    // This exists mostly because using the reporter too early leads to deadlock.
-    private def echo(msg: String) { Console println msg }
-    private def _initSources = List(new BatchSourceFile("<init>", "class $repl_$init { }"))
-    private def _initialize() = {
-      try {
-        // todo. if this crashes, REPL will hang
-        new _compiler.Run() compileSources _initSources
-        _initializeComplete = true
-        true
-      }
-      catch AbstractOrMissingHandler()
-    }
-    private def tquoted(s: String) = "\"\"\"" + s + "\"\"\""
-
-    // argument is a thunk to execute after init is done
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def initialize(postInitSignal: => Unit) {
-      synchronized {
-        if (_isInitialized == null) {
-          _isInitialized = io.spawn {
-            try _initialize()
-            finally postInitSignal
-          }
-        }
-      }
-    }
-
-    /**
-     * Initializes the underlying compiler/interpreter in a blocking fashion.
-     *
-     * @note Must be executed before using SparkIMain!
-     */
-    @DeveloperApi
-    def initializeSynchronous(): Unit = {
-      if (!isInitializeComplete) {
-        _initialize()
-        assert(global != null, global)
-      }
-    }
-    private def isInitializeComplete = _initializeComplete
-
-    /** the public, go through the future compiler */
-
-    /**
-     * The underlying compiler used to generate ASTs and execute code.
-     */
-    @DeveloperApi
-    lazy val global: Global = {
-      if (isInitializeComplete) _compiler
-      else {
-        // If init hasn't been called yet you're on your own.
-        if (_isInitialized == null) {
-          logWarning("Warning: compiler accessed before init set up.  Assuming no postInit code.")
-          initialize(())
-        }
-        //       // blocks until it is ; false means catastrophic failure
-        if (_isInitialized.get()) _compiler
-        else null
-      }
-    }
-    @deprecated("Use `global` for access to the compiler instance.", "2.9.0")
-    private lazy val compiler: global.type = global
-
-    import global._
-    import definitions.{ScalaPackage, JavaLangPackage, termMember, typeMember}
-    import rootMirror.{RootClass, getClassIfDefined, getModuleIfDefined, getRequiredModule, getRequiredClass}
-
-    private implicit class ReplTypeOps(tp: Type) {
-      def orElse(other: => Type): Type    = if (tp ne NoType) tp else other
-      def andAlso(fn: Type => Type): Type = if (tp eq NoType) tp else fn(tp)
-    }
-
-    // TODO: If we try to make naming a lazy val, we run into big time
-    // scalac unhappiness with what look like cycles.  It has not been easy to
-    // reduce, but name resolution clearly takes different paths.
-    // NOTE: Exposed to repl package since used by SparkExprTyper
-    private[repl] object naming extends {
-      val global: imain.global.type = imain.global
-    } with Naming {
-      // make sure we don't overwrite their unwisely named res3 etc.
-      def freshUserTermName(): TermName = {
-        val name = newTermName(freshUserVarName())
-        if (definedNameMap contains name) freshUserTermName()
-        else name
-      }
-      def isUserTermName(name: Name) = isUserVarName("" + name)
-      def isInternalTermName(name: Name) = isInternalVarName("" + name)
-    }
-    import naming._
-
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] object deconstruct extends {
-      val global: imain.global.type = imain.global
-    } with StructuredTypeStrings
-
-    // NOTE: Exposed to repl package since used by SparkImports
-    private[repl] lazy val memberHandlers = new {
-      val intp: imain.type = imain
-    } with SparkMemberHandlers
-    import memberHandlers._
-
-    /**
-     * Suppresses overwriting print results during the operation.
-     *
-     * @param body The block to execute
-     * @tparam T The return type of the block
-     *
-     * @return The result from executing the block
-     */
-    @DeveloperApi
-    def beQuietDuring[T](body: => T): T = {
-      val saved = printResults
-      printResults = false
-      try body
-      finally printResults = saved
-    }
-
-    /**
-     * Completely masks all output during the operation (minus JVM standard
-     * out and error).
-     *
-     * @param operation The block to execute
-     * @tparam T The return type of the block
-     *
-     * @return The result from executing the block
-     */
-    @DeveloperApi
-    def beSilentDuring[T](operation: => T): T = {
-      val saved = totalSilence
-      totalSilence = true
-      try operation
-      finally totalSilence = saved
-    }
-
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def quietRun[T](code: String) = beQuietDuring(interpret(code))
-
-    private def logAndDiscard[T](label: String, alt: => T): PartialFunction[Throwable, T] = {
-      case t: ControlThrowable => throw t
-      case t: Throwable        =>
-        logDebug(label + ": " + unwrap(t))
-        logDebug(stackTraceString(unwrap(t)))
-      alt
-    }
-    /** takes AnyRef because it may be binding a Throwable or an Exceptional */
-
-    private def withLastExceptionLock[T](body: => T, alt: => T): T = {
-      assert(bindExceptions, "withLastExceptionLock called incorrectly.")
-      bindExceptions = false
-
-      try     beQuietDuring(body)
-      catch   logAndDiscard("withLastExceptionLock", alt)
-      finally bindExceptions = true
-    }
-
-    /**
-     * Contains the code (in string form) representing a wrapper around all
-     * code executed by this instance.
-     *
-     * @return The wrapper code as a string
-     */
-    @DeveloperApi
-    def executionWrapper = _executionWrapper
-
-    /**
-     * Sets the code to use as a wrapper around all code executed by this
-     * instance.
-     *
-     * @param code The wrapper code as a string
-     */
-    @DeveloperApi
-    def setExecutionWrapper(code: String) = _executionWrapper = code
-
-    /**
-     * Clears the code used as a wrapper around all code executed by
-     * this instance.
-     */
-    @DeveloperApi
-    def clearExecutionWrapper() = _executionWrapper = ""
-
-    /** interpreter settings */
-    private lazy val isettings = new SparkISettings(this)
-
-    /**
-     * Instantiates a new compiler used by SparkIMain. Overridable to provide
-     * own instance of a compiler.
-     *
-     * @param settings The settings to provide the compiler
-     * @param reporter The reporter to use for compiler output
-     *
-     * @return The compiler as a Global
-     */
-    @DeveloperApi
-    protected def newCompiler(settings: Settings, reporter: Reporter): ReplGlobal = {
-      settings.outputDirs setSingleOutput virtualDirectory
-      settings.exposeEmptyPackage.value = true
-      new Global(settings, reporter) with ReplGlobal {
-        override def toString: String = "<global>"
-      }
-    }
-
-    /**
-     * Adds any specified jars to the compile and runtime classpaths.
-     *
-     * @note Currently only supports jars, not directories
-     * @param urls The list of items to add to the compile and runtime classpaths
-     */
-    @DeveloperApi
-    def addUrlsToClassPath(urls: URL*): Unit = {
-      new Run // Needed to force initialization of "something" to correctly load Scala classes from jars
-      urls.foreach(_runtimeClassLoader.addNewUrl) // Add jars/classes to runtime for execution
-      updateCompilerClassPath(urls: _*)           // Add jars/classes to compile time for compiling
-    }
-
-    private def updateCompilerClassPath(urls: URL*): Unit = {
-      require(!global.forMSIL) // Only support JavaPlatform
-
-      val platform = global.platform.asInstanceOf[JavaPlatform]
-
-      val newClassPath = mergeUrlsIntoClassPath(platform, urls: _*)
-
-      // NOTE: Must use reflection until this is exposed/fixed upstream in Scala
-      val fieldSetter = platform.getClass.getMethods
-        .find(_.getName.endsWith("currentClassPath_$eq")).get
-      fieldSetter.invoke(platform, Some(newClassPath))
-
-      // Reload all jars specified into our compiler
-      global.invalidateClassPathEntries(urls.map(_.getPath): _*)
-    }
-
-    private def mergeUrlsIntoClassPath(platform: JavaPlatform, urls: URL*): MergedClassPath[AbstractFile] = {
-      // Collect our new jars/directories and add them to the existing set of classpaths
-      val allClassPaths = (
-        platform.classPath.asInstanceOf[MergedClassPath[AbstractFile]].entries ++
-        urls.map(url => {
-          platform.classPath.context.newClassPath(
-            if (url.getProtocol == "file") {
-              val f = new File(url.getPath)
-              if (f.isDirectory)
-                io.AbstractFile.getDirectory(f)
-              else
-                io.AbstractFile.getFile(f)
-            } else {
-              io.AbstractFile.getURL(url)
-            }
-          )
-        })
-      ).distinct
-
-      // Combine all of our classpaths (old and new) into one merged classpath
-      new MergedClassPath(allClassPaths, platform.classPath.context)
-    }
-
-    /**
-     * Represents the parent classloader used by this instance. Can be
-     * overridden to provide alternative classloader.
-     *
-     * @return The classloader used as the parent loader of this instance
-     */
-    @DeveloperApi
-    protected def parentClassLoader: ClassLoader =
-      SparkHelper.explicitParentLoader(settings).getOrElse( this.getClass.getClassLoader() )
-
-    /* A single class loader is used for all commands interpreted by this Interpreter.
-     It would also be possible to create a new class loader for each command
-     to interpret.  The advantages of the current approach are:
-
-    - Expressions are only evaluated one time.  This is especially
-    significant for I/O, e.g. "val x = Console.readLine"
-
-    The main disadvantage is:
-
-    - Objects, classes, and methods cannot be rebound.  Instead, definitions
-    shadow the old ones, and old code objects refer to the old
-    definitions.
-    */
-    private def resetClassLoader() = {
-      logDebug("Setting new classloader: was " + _classLoader)
-      _classLoader = null
-      ensureClassLoader()
-    }
-    private final def ensureClassLoader() {
-      if (_classLoader == null)
-        _classLoader = makeClassLoader()
-    }
-
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def classLoader: AbstractFileClassLoader = {
-      ensureClassLoader()
-      _classLoader
-    }
-    private class TranslatingClassLoader(parent: ClassLoader) extends AbstractFileClassLoader(virtualDirectory, parent) {
-      /** Overridden here to try translating a simple name to the generated
-       *  class name if the original attempt fails.  This method is used by
-       *  getResourceAsStream as well as findClass.
-       */
-      override protected def findAbstractFile(name: String): AbstractFile = {
-        super.findAbstractFile(name) match {
-          // deadlocks on startup if we try to translate names too early
-          case null if isInitializeComplete =>
-            generatedName(name) map (x => super.findAbstractFile(x)) orNull
-          case file                         =>
-            file
-        }
-      }
-    }
-    private def makeClassLoader(): AbstractFileClassLoader =
-      new TranslatingClassLoader(parentClassLoader match {
-        case null   => ScalaClassLoader fromURLs compilerClasspath
-        case p      =>
-          _runtimeClassLoader = new URLClassLoader(compilerClasspath, p) with ExposeAddUrl
-          _runtimeClassLoader
-      })
-
-    private def getInterpreterClassLoader() = classLoader
-
-    // Set the current Java "context" class loader to this interpreter's class loader
-    // NOTE: Exposed to repl package since used by SparkILoopInit
-    private[repl] def setContextClassLoader() = classLoader.setAsContext()
-
-    /**
-     * Returns the real name of a class based on its repl-defined name.
-     *
-     * ==Example==
-     * Given a simple repl-defined name, returns the real name of
-     * the class representing it, e.g. for "Bippy" it may return
-     * {{{
-     *     $line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$Bippy
-     * }}}
-     *
-     * @param simpleName The repl-defined name whose real name to retrieve
-     *
-     * @return Some real name if the simple name exists, else None
-     */
-    @DeveloperApi
-    def generatedName(simpleName: String): Option[String] = {
-      if (simpleName endsWith nme.MODULE_SUFFIX_STRING) optFlatName(simpleName.init) map (_ + nme.MODULE_SUFFIX_STRING)
-      else optFlatName(simpleName)
-    }
-
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def flatName(id: String)    = optFlatName(id) getOrElse id
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def optFlatName(id: String) = requestForIdent(id) map (_ fullFlatName id)
-
-    /**
-     * Retrieves all simple names contained in the current instance.
-     *
-     * @return A list of sorted names
-     */
-    @DeveloperApi
-    def allDefinedNames = definedNameMap.keys.toList.sorted
-
-    private def pathToType(id: String): String = pathToName(newTypeName(id))
-    // NOTE: Exposed to repl package since used by SparkILoop
-    private[repl] def pathToTerm(id: String): String = pathToName(newTermName(id))
-
-    /**
-     * Retrieves the full code path to access the specified simple name
-     * content.
-     *
-     * @param name The simple name of the target whose path to determine
-     *
-     * @return The full path used to access the specified target (name)
-     */
-    @DeveloperApi
-    def pathToName(name: Name): String = {
-      if (definedNameMap contains name)
-        definedNameMap(name) fullPath name
-      else name.toString
-    }
-
-    /** Most recent tree handled which wasn't wholly synthetic. */
-    private def mostRecentlyHandledTree: Option[Tree] = {
-      prevRequests.reverse foreach { req =>
-        req.handlers.reverse foreach {
-          case x: MemberDefHandler if x.definesValue && !isInternalTermName(x.name) => return Some(x.member)
-          case _ => ()
-        }
-      }
-      None
-    }
-
-    /** Stubs for work in progress. */
-    private def handleTypeRedefinition(name: TypeName, old: Request, req: Request) = {
-      for (t1 <- old.simpleNameOfType(name) ; t2 <- req.simpleNameOfType(name)) {
-        logDebug("Redefining type '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    private def handleTermRedefinition(name: TermName, old: Request, req: Request) = {
-      for (t1 <- old.compilerTypeOf get name ; t2 <- req.compilerTypeOf get name) {
-    //    Printing the types here has a tendency to cause assertion errors, like
-        //   assertion failed: fatal: <refinement> has owner value x, but a class owner is required
-        // so DBG is by-name now to keep it in the family.  (It also traps the assertion error,
-        // but we don't want to unnecessarily risk hosing the compiler's internal state.)
-        logDebug("Redefining term '%s'\n  %s -> %s".format(name, t1, t2))
-      }
-    }
-
-    private def recordRequest(req: Request) {
-      if (req == null || referencedNameMap == null)
-        return
-
-      prevRequests += req
-      req.referencedNames foreach (x => referencedNameMap(x) = req)
-
-      // warning about serially defining companions.  It'd be easy
-      // enough to just redefine them together but that may not always
-      // be what people want so I'm waiting until I can do it better.
-      for {
-        name   <- req.definedNames filterNot (x => req.definedNames contains x.companionName)
-        oldReq <- definedNameMap get name.companionName
-        newSym <- req.definedSymbols get name
-        oldSym <- oldReq.definedSymbols get name.companionName
-        if Seq(oldSym, newSym).permutations exists { case Seq(s1, s2) => s1.isClass && s2.isModule }
-      } {
-        afterTyper(replwarn(s"warning: previously defined $oldSym is not a companion to $newSym."))
-        replwarn("Companions must be defined together; you may wish to use :paste mode for this.")
-      }
-
-      // Updating the defined name map
-      req.definedNames foreach { name =>
-        if (definedNameMap contains name) {
-          if (name.isTypeName) handleTypeRedefinition(name.toTypeName, definedNameMap(name), req)
-          else handleTermRedefinition(name.toTermName, definedNameMap(name), req)
-        }
-         definedNameMap(name) = req
-      }
-    }
-
-    private def replwarn(msg: => String) {
-      if (!settings.nowarnings.value)
-        printMessage(msg)
-    }
-
-    private def isParseable(line: String): Boolean = {
-      beSilentDuring {
-        try parse(line) match {
-          case Some(xs) => xs.nonEmpty  // parses as-is
-          case None     => true         // incomplete
-        }
-        catch { case x: Exception =>    // crashed the compiler
-          replwarn("Exception in isParseable(\"" + line + "\"): " + x)
-           false
-         }
-      }
-    }
-
-    private def compileSourcesKeepingRun(sources: SourceFile*) = {
-      val run = new Run()
-      reporter.reset()
-      run compileSources sources.toList
-      (!reporter.hasErrors, run)
-    }
-
-    /**
-     * Compiles specified source files.
-     *
-     * @param sources The sequence of source files to compile
-     *
-     * @return True if successful, otherwise false
-     */
-    @DeveloperApi
-    def compileSources(sources: SourceFile*): Boolean =
-      compileSourcesKeepingRun(sources: _*)._1
-
-    /**
-     * Compiles a string of code.
-     *
-     * @param code The string of code to compile
-     *
-     * @return True if successful, otherwise false
-     */
-    @DeveloperApi
-    def compileString(code: String): Boolean =
-      compileSources(new BatchSourceFile("<script>", code))
-
-    /** Build a request from the user. `trees` is `line` after being parsed.
-     */
-    private def buildRequest(line: String, trees: List[Tree]): Request = {
-      executingRequest = new Request(line, trees)
-      executingRequest
-    }
-
-  // rewriting "5 // foo" to "val x = { 5 // foo }" creates broken code because
-  // the close brace is commented out.  Strip single-line comments.
-  // ... but for error message output reasons this is not used, and rather than
-  // enclosing in braces it is constructed like "val x =\n5 // foo".
-  private def removeComments(line: String): String = {
-    showCodeIfDebugging(line) // as we're about to lose our // show
-    line.lines map (s => s indexOf "//" match {
-      case -1   => s
-      case idx  => s take idx
-    }) mkString "\n"
-  }
-
-  private def safePos(t: Tree, alt: Int): Int =
-    try t.pos.startOrPoint
-    catch { case _: UnsupportedOperationException => alt }
-
-  // Given an expression like 10 * 10 * 10 we receive the parent tree positioned
-  // at a '*'.  So look at each subtree and find the earliest of all positions.
-  private def earliestPosition(tree: Tree): Int = {
-    var pos = Int.MaxValue
-    tree foreach { t =>
-      pos = math.min(pos, safePos(t, Int.MaxValue))
-    }
-    pos
-  }
-
-
-  private def requestFromLine(line: String, synthetic: Boolean): Either[IR.Result, Request] = {
-    val content = indentCode(line)
-    val trees = parse(content) match {
-      case None         => return Left(IR.Incomplete)
-      case Some(Nil)    => return Left(IR.Error) // parse error or empty input
-      case Some(trees)  => trees
-    }
-    logDebug(
-      trees map (t => {
-        // [Eugene to Paul] previously it just said `t map ...`
-        // because there was an implicit conversion from Tree to a list of Trees
-        // however Martin and I have removed the conversion
-        // (it was conflicting with the new reflection API),
-        // so I had to rewrite this a bit
-        val subs = t collect { case sub => sub }
-        subs map (t0 =>
-          "  " + safePos(t0, -1) + ": " + t0.shortClass + "\n"
-                ) mkString ""
-      }) mkString "\n"
-    )
-    // If the last tree is a bare expression, pinpoint where it begins using the
-    // AST node position and snap the line off there.  Rewrite the code embodied
-    // by the last tree as a ValDef instead, so we can access the value.
-    trees.last match {
-      case _:Assign                        => // we don't want to include assignments
-        case _:TermTree | _:Ident | _:Select => // ... but do want other unnamed terms.
-          val varName  = if (synthetic) freshInternalVarName() else freshUserVarName()
-      val rewrittenLine = (
-        // In theory this would come out the same without the 1-specific test, but
-        // it's a cushion against any more sneaky parse-tree position vs. code mismatches:
-        // this way such issues will only arise on multiple-statement repl input lines,
-        // which most people don't use.
-        if (trees.size == 1) "val " + varName + " =\n" + content
-        else {
-          // The position of the last tree
-          val lastpos0 = earliestPosition(trees.last)
-          // Oh boy, the parser throws away parens so "(2+2)" is mispositioned,
-          // with increasingly hard to decipher positions as we move on to "() => 5",
-          // (x: Int) => x + 1, and more.  So I abandon attempts to finesse and just
-          // look for semicolons and newlines, which I'm sure is also buggy.
-          val (raw1, raw2) = content splitAt lastpos0
-          logDebug("[raw] " + raw1 + "   <--->   " + raw2)
-
-          val adjustment = (raw1.reverse takeWhile (ch => (ch != ';') && (ch != '\n'))).size
-          val lastpos = lastpos0 - adjustment
-
-          // the source code split at the laboriously determined position.
-          val (l1, l2) = content splitAt lastpos
-          logDebug("[adj] " + l1 + "   <--->   " + l2)
-
-          val prefix   = if (l1.trim == "") "" else l1 + ";\n"
-          // Note to self: val source needs to have this precise structure so that
-          // error messages print the user-submitted part without the "val res0 = " part.
-          val combined   = prefix + "val " + varName + " =\n" + l2
-
-          logDebug(List(
-            "    line" -> line,
-            " content" -> content,
-            "     was" -> l2,
-            "combined" -> combined) map {
-              case (label, s) => label + ": '" + s + "'"
-            } mkString "\n"
-          )
-          combined
-        }
-      )
-        // Rewriting    "foo ; bar ; 123"
-        // to           "foo ; bar ; val resXX = 123"
-        requestFromLine(rewrittenLine, synthetic) match {
-          case Right(req) => return Right(req withOriginalLine line)
-          case x          => return x
-        }
-      case _ =>
-    }
-    Right(buildRequest(line, trees))
-  }
-
-  // normalize non-public types so we don't see protected aliases like Self
-  private def normalizeNonPublic(tp: Type) = tp match {
-    case TypeRef(_, sym, _) if sym.isAliasType && !sym.isPublic => tp.dealias
-    case _                                                      => tp
-  }
-
-  /**
-   * Interpret one line of input. All feedback, including parse errors
-   * and evaluation results, are printed via the supplied compiler's
-   * reporter. Values defined are available for future interpreted strings.
-   *
-   * @note This assigns variables with user name structure like "res0"
-   *
-   * @param line The line representing the code to interpret
-   *
-   * @return Whether the line was interpreted successfully, or failed due to
-   *         incomplete code, compilation error, or runtime error
-   */
-  @DeveloperApi
-  def interpret(line: String): IR.Result = interpret(line, false)
-
-  /**
-   * Interpret one line of input. All feedback, including parse errors
-   * and evaluation results, are printed via the supplied compiler's
-   * reporter. Values defined are available for future interpreted strings.
-   *
-   * @note This assigns variables with synthetic (generated) name structure
-   *       like "$ires0"
-   *
-   * @param line The line representing the code to interpret
-   *
-   * @return Whether the line was interpreted successfully, or failed due to
-   *         incomplete code, compilation error, or runtime error
-   */
-  @DeveloperApi
-  def interpretSynthetic(line: String): IR.Result = interpret(line, true)
-
-  private def interpret(line: String, synthetic: Boolean): IR.Result = {
-    def loadAndRunReq(req: Request) = {
-      classLoader.setAsContext()
-      val (result, succeeded) = req.loadAndRun
-
-      /** To our displeasure, ConsoleReporter offers only printMessage,
-       *  which tacks a newline on the end.  Since that breaks all the
-       *  output checking, we have to take one off to balance.
-       */
-      if (succeeded) {
-        if (printResults && result != "")
-          printMessage(result stripSuffix "\n")
-        else if (isReplDebug) // show quiet-mode activity
-          printMessage(result.trim.lines map ("[quiet] " + _) mkString "\n")
-
-        // Book-keeping.  Have to record synthetic requests too,
-        // as they may have been issued for information, e.g. :type
-        recordRequest(req)
-        IR.Success
-      }
-        else {
-          // don't truncate stack traces
-          withoutTruncating(printMessage(result))
-          IR.Error
-        }
-    }
-
-    if (global == null) IR.Error
-    else requestFromLine(line, synthetic) match {
-      case Left(result) => result
-      case Right(req)   =>
-        // null indicates a disallowed statement type; otherwise compile and
-        // fail if false (implying e.g. a type error)
-        if (req == null || !req.compile) IR.Error
-        else loadAndRunReq(req)
-    }
-  }
-
-  /**
-   * Bind a specified name to a specified value.  The name may
-   * later be used by expressions passed to interpret.
-   *
-   * @note This binds via compilation and interpretation
-   *
-   * @param name The variable name to bind
-   * @param boundType The type of the variable, as a string
-   * @param value The object value to bind to it
-   *
-   * @return An indication of whether the binding succeeded or failed
-   *         using interpreter results
-   */
-  @DeveloperApi
-  def bind(name: String, boundType: String, value: Any, modifiers: List[String] = Nil): IR.Result = {
-    val bindRep = new ReadEvalPrint()
-    val run = bindRep.compile("""
-                              |object %s {
-                                |  var value: %s = _
-                              |  def set(x: Any) = value = x.asInstanceOf[%s]
-                              |}
-                              """.stripMargin.format(bindRep.evalName, boundType, boundType)
-                            )
-    bindRep.callEither("set", value) match {
-      case Left(ex) =>
-        logDebug("Set failed in bind(%s, %s, %s)".format(name, boundType, value))
-        logDebug(util.stackTraceString(ex))
-        IR.Error
-
-      case Right(_) =>
-        val line = "%sval %s = %s.value".format(modifiers map (_ + " ") mkString, name, bindRep.evalPath)
-      logDebug("Interpreting: " + line)
-      interpret(line)
-    }
-  }
-
-  /**
-   * Bind a specified name to a specified value directly.
-   *
-   * @note This updates internal bound names directly
-   *
-   * @param name The variable name to bind
-   * @param boundType The type of the variable, as a string
-   * @param value The object value to bind to it
-   *
-   * @return An indication of whether the binding succeeded or failed
-   *         using interpreter results
-   */
-  @DeveloperApi
-  def directBind(name: String, boundType: String, value: Any): IR.Result = {
-    val result = bind(name, boundType, value)
-    if (result == IR.Success)
-      directlyBoundNames += newTermName(name)
-    result
-  }
-
-  private def directBind(p: NamedParam): IR.Result                                    = directBind(p.name, p.tpe, p.value)
-  private def directBind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = directBind((name, value))
-
-  /**
-   * Overwrites previously-bound val with a new instance.
-   *
-   * @param p The named parameters used to provide the name, value, and type
-   *
-   * @return The results of rebinding the named val
-   */
-  @DeveloperApi
-  def rebind(p: NamedParam): IR.Result = {
-    val name     = p.name
-    val oldType  = typeOfTerm(name) orElse { return IR.Error }
-    val newType  = p.tpe
-    val tempName = freshInternalVarName()
-
-    quietRun("val %s = %s".format(tempName, name))
-    quietRun("val %s = %s.asInstanceOf[%s]".format(name, tempName, newType))
-  }
-  private def quietImport(ids: String*): IR.Result = beQuietDuring(addImports(ids: _*))
-
-  /**
-   * Executes an import statement per "id" provided
-   *
-   * @example addImports("org.apache.spark.SparkContext")
-   *
-   * @param ids The series of "id" strings used for import statements
-   *
-   * @return The results of importing the series of "id" strings
-   */
-  @DeveloperApi
-  def addImports(ids: String*): IR.Result =
-    if (ids.isEmpty) IR.Success
-    else interpret("import " + ids.mkString(", "))
-
-  // NOTE: Exposed to repl package since used by SparkILoop
-  private[repl] def quietBind(p: NamedParam): IR.Result                               = beQuietDuring(bind(p))
-  private def bind(p: NamedParam): IR.Result                                    = bind(p.name, p.tpe, p.value)
-  private def bind[T: ru.TypeTag : ClassTag](name: String, value: T): IR.Result = bind((name, value))
-  private def bindSyntheticValue(x: Any): IR.Result                             = bindValue(freshInternalVarName(), x)
-  private def bindValue(x: Any): IR.Result                                      = bindValue(freshUserVarName(), x)
-  private def bindValue(name: String, x: Any): IR.Result                        = bind(name, TypeStrings.fromValue(x), x)
-
-  /**
-   * Reset this interpreter, forgetting all user-specified requests.
-   */
-  @DeveloperApi
-  def reset() {
-    clearExecutionWrapper()
-    resetClassLoader()
-    resetAllCreators()
-    prevRequests.clear()
-    referencedNameMap.clear()
-    definedNameMap.clear()
-    virtualDirectory.delete()
-    virtualDirectory.create()
-  }
-
-  /**
-   * Stops the underlying REPL class server and flushes the reporter used
-   * for compiler output.
-   */
-  @DeveloperApi
-  def close() {
-    reporter.flush()
-  }
-
-  /**
-   * Captures the session names (which are set by system properties) once, instead of for each line.
-   */
-  @DeveloperApi
-  object FixedSessionNames {
-    val lineName    = sessionNames.line
-    val readName    = sessionNames.read
-    val evalName    = sessionNames.eval
-    val printName   = sessionNames.print
-    val resultName  = sessionNames.result
-  }
-
-  /** Here is where we:
-   *
-   *  1) Read some source code, and put it in the "read" object.
-   *  2) Evaluate the read object, and put the result in the "eval" object.
-   *  3) Create a String for human consumption, and put it in the "print" object.
-   *
-   *  Read! Eval! Print! Some of that not yet centralized here.
-   */
-  class ReadEvalPrint(val lineId: Int) {
-    def this() = this(freshLineId())
-
-    private var lastRun: Run = _
-    private var evalCaught: Option[Throwable] = None
-    private var conditionalWarnings: List[ConditionalWarning] = Nil
-
-    val packageName = FixedSessionNames.lineName + lineId
-    val readName    = FixedSessionNames.readName
-    val evalName    = FixedSessionNames.evalName
-    val printName   = FixedSessionNames.printName
-    val resultName  = FixedSessionNames.resultName
-
-    def bindError(t: Throwable) = {
-      // Immediately throw the exception if we are asked to propagate them
-      if (propagateExceptions) {
-        throw unwrap(t)
-      }
-      if (!bindExceptions) // avoid looping if already binding
-        throw t
-
-      val unwrapped = unwrap(t)
-      withLastExceptionLock[String]({
-        directBind[Throwable]("lastException", unwrapped)(tagOfThrowable, classTag[Throwable])
-        util.stackTraceString(unwrapped)
-      }, util.stackTraceString(unwrapped))
-    }
-
-    // TODO: split it out into a package object and a regular
-    // object and we can do that much less wrapping.
-    def packageDecl = "package " + packageName
-
-    def pathTo(name: String)   = packageName + "." + name
-    def packaged(code: String) = packageDecl + "\n\n" + code
-
-    def readPath  = pathTo(readName)
-    def evalPath  = pathTo(evalName)
-    def printPath = pathTo(printName)
-
-    def call(name: String, args: Any*): AnyRef = {
-      val m = evalMethod(name)
-      logDebug("Invoking: " + m)
-      if (args.nonEmpty)
-        logDebug("  with args: " + args.mkString(", "))
-
-      m.invoke(evalClass, args.map(_.asInstanceOf[AnyRef]): _*)
-    }
-
-    def callEither(name: String, args: Any*): Either[Throwable, AnyRef] =
-      try Right(call(name, args: _*))
-    catch { case ex: Throwable => Left(ex) }
-
-    def callOpt(name: String, args: Any*): Option[AnyRef] =
-      try Some(call(name, args: _*))
-    catch { case ex: Throwable => bindError(ex) ; None }
-
-    class EvalException(msg: String, cause: Throwable) extends RuntimeException(msg, cause) { }
-
-    private def evalError(path: String, ex: Throwable) =
-      throw new EvalException("Failed to load '" + path + "': " + ex.getMessage, ex)
-
-    private def load(path: String): Class[_] = {
-      // scalastyle:off classforname
-      try Class.forName(path, true, classLoader)
-      catch { case ex: Throwable => evalError(path, unwrap(ex)) }
-      // scalastyle:on classforname
-    }
-
-    lazy val evalClass = load(evalPath)
-    lazy val evalValue = callEither(resultName) match {
-      case Left(ex)      => evalCaught = Some(ex) ; None
-      case Right(result) => Some(result)
-    }
-
-    def compile(source: String): Boolean = compileAndSaveRun("<console>", source)
-
-    /** The innermost object inside the wrapper, found by
-     * following accessPath into the outer one.
-     */
-    def resolvePathToSymbol(accessPath: String): Symbol = {
-      // val readRoot  = getRequiredModule(readPath)   // the outermost wrapper
-      // MATEI: Changed this to getClass because the root object is no longer a module (Scala singleton object)
-
-      val readRoot  = rootMirror.getClassByName(newTypeName(readPath))   // the outermost wrapper
-      (accessPath split '.').foldLeft(readRoot: Symbol) {
-        case (sym, "")    => sym
-        case (sym, name)  => afterTyper(termMember(sym, name))
-      }
-    }
-    /** We get a bunch of repeated warnings for reasons I haven't
-     *  entirely figured out yet.  For now, squash.
-     */
-    private def updateRecentWarnings(run: Run) {
-      def loop(xs: List[(Position, String)]): List[(Position, String)] = xs match {
-        case Nil                  => Nil
-        case ((pos, msg)) :: rest =>
-          val filtered = rest filter { case (pos0, msg0) =>
-            (msg != msg0) || (pos.lineContent.trim != pos0.lineContent.trim) || {
-              // same messages and same line content after whitespace removal
-              // but we want to let through multiple warnings on the same line
-              // from the same run.  The untrimmed line will be the same since
-              // there's no whitespace indenting blowing it.
-              (pos.lineContent == pos0.lineContent)
-            }
-                                    }
-        ((pos, msg)) :: loop(filtered)
-      }
-     // PRASHANT: This leads to a NoSuchMethodError for _.warnings. Yet to figure out its purpose.
-      // val warnings = loop(run.allConditionalWarnings flatMap (_.warnings))
-      // if (warnings.nonEmpty)
-      //   mostRecentWarnings = warnings
-    }
-    private def evalMethod(name: String) = evalClass.getMethods filter (_.getName == name) match {
-      case Array(method) => method
-      case xs            => sys.error("Internal error: eval object " + evalClass + ", " + xs.mkString("\n", "\n", ""))
-    }
-    private def compileAndSaveRun(label: String, code: String) = {
-      showCodeIfDebugging(code)
-      val (success, run) = compileSourcesKeepingRun(new BatchSourceFile(label, packaged(code)))
-      updateRecentWarnings(run)
-      lastRun = run
-      success
-    }
-  }
-
-  /** One line of code submitted by the user for interpretation */
-  // private
-  class Request(val line: String, val trees: List[Tree]) {
-    val reqId = nextReqId()
-    val lineRep = new ReadEvalPrint()
-
-    private var _originalLine: String = null
-    def withOriginalLine(s: String): this.type = { _originalLine = s ; this }
-    def originalLine = if (_originalLine == null) line else _originalLine
-
-    /** handlers for each tree in this request */
-    val handlers: List[MemberHandler] = trees map (memberHandlers chooseHandler _)
-    def defHandlers = handlers collect { case x: MemberDefHandler => x }
-
-    /** all (public) names defined by these statements */
-    val definedNames = handlers flatMap (_.definedNames)
-
-    /** list of names used by this expression */
-    val referencedNames: List[Name] = handlers flatMap (_.referencedNames)
-
-    /** def and val names */
-    def termNames = handlers flatMap (_.definesTerm)
-    def typeNames = handlers flatMap (_.definesType)
-    def definedOrImported = handlers flatMap (_.definedOrImported)
-    def definedSymbolList = defHandlers flatMap (_.definedSymbols)
-
-    def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
-    def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
-
-    val definedClasses = handlers.exists {
-      case _: ClassHandler => true
-      case _ => false
-    }
-
-    /** Code to import bound names from previous lines - accessPath is code to
-     * append to objectName to access anything bound by request.
-     */
-    val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
-      importsCode(referencedNames.toSet, definedClasses)
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: String) = {
-      // lineRep.readPath + accessPath + ".`%s`".format(vname)
-      lineRep.readPath + ".INSTANCE" + accessPath + ".`%s`".format(vname)
-    }
-      /** Same as fullpath, but after it has been flattened, so:
-       *  $line5.$iw.$iw.$iw.Bippy      // fullPath
-       *  $line5.$iw$$iw$$iw$Bippy      // fullFlatName
-       */
-      def fullFlatName(name: String) =
-        // lineRep.readPath + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-        lineRep.readPath + ".INSTANCE" + accessPath.replace('.', '$') + nme.NAME_JOIN_STRING + name
-
-    /** The unmangled symbol name, but supplemented with line info. */
-    def disambiguated(name: Name): String = name + " (in " + lineRep + ")"
-
-    /** Code to access a variable with the specified name */
-    def fullPath(vname: Name): String = fullPath(vname.toString)
-
-    /** the line of code to compute */
-    def toCompute = line
-
-    /** generate the source code for the object that computes this request */
-    private object ObjectSourceCode extends CodeAssembler[MemberHandler] {
-      def path = pathToTerm("$intp")
-      def envLines = {
-        if (!isReplPower) Nil // power mode only for now
-        // $intp is not bound; punt, but include the line.
-        else if (path == "$intp") List(
-          "def $line = " + tquoted(originalLine),
-          "def $trees = Nil"
-        )
-        else List(
-          "def $line  = " + tquoted(originalLine),
-          "def $req = %s.requestForReqId(%s).orNull".format(path, reqId),
-          "def $trees = if ($req eq null) Nil else $req.trees".format(lineRep.readName, path, reqId)
-        )
-      }
-
-      val preamble = s"""
-        |class ${lineRep.readName} extends Serializable {
-        |  ${envLines.map("  " + _ + ";\n").mkString}
-        |  $importsPreamble
-        |
-        |  // If we need to construct any objects defined in the REPL on an executor we will need
-        |  // to pass the outer scope to the appropriate encoder.
-        |  org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this)
-        |  ${indentCode(toCompute)}
-      """.stripMargin
-      val postamble = importsTrailer + "\n}" + "\n" +
-        "object " + lineRep.readName + " {\n" +
-        "  val INSTANCE = new " + lineRep.readName + "();\n" +
-        "}\n"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-
-      /*
-      val preamble = """
-        |object %s extends Serializable {
-        |%s%s%s
-      """.stripMargin.format(lineRep.readName, envLines.map("  " + _ + ";\n").mkString, importsPreamble, indentCode(toCompute))
-      val postamble = importsTrailer + "\n}"
-      val generate = (m: MemberHandler) => m extraCodeToEvaluate Request.this
-      */
-
-    }
-
-    private object ResultObjectSourceCode extends CodeAssembler[MemberHandler] {
-      /** We only want to generate this code when the result
-       *  is a value which can be referred to as-is.
-       */
-      val evalResult =
-        if (!handlers.last.definesValue) ""
-        else handlers.last.definesTerm match {
-          case Some(vname) if typeOf contains vname =>
-            "lazy val %s = %s".format(lineRep.resultName, fullPath(vname))
-          case _  => ""
-        }
-
-      // first line evaluates object to make sure constructor is run
-      // initial "" so later code can uniformly be: + etc
-      val preamble = """
-      |object %s {
-      |  %s
-      |  val %s: String = %s {
-      |    %s
-      |    (""
-      """.stripMargin.format(
-        lineRep.evalName, evalResult, lineRep.printName,
-        executionWrapper, lineRep.readName + ".INSTANCE" + accessPath
-      )
-      val postamble = """
-      |    )
-      |  }
-      |}
-      """.stripMargin
-      val generate = (m: MemberHandler) => m resultExtractionCode Request.this
-    }
-
-    // get it
-    def getEvalTyped[T] : Option[T] = getEval map (_.asInstanceOf[T])
-    def getEval: Option[AnyRef] = {
-      // ensure it has been compiled
-      compile
-      // try to load it and call the value method
-      lineRep.evalValue filterNot (_ == null)
-    }
-
-    /** Compile the object file.  Returns whether the compilation succeeded.
-     *  If all goes well, the "types" map is computed. */
-    lazy val compile: Boolean = {
-      // error counting is wrong, hence interpreter may overlook failure - so we reset
-      reporter.reset()
-
-      // compile the object containing the user's code
-      lineRep.compile(ObjectSourceCode(handlers)) && {
-        // extract and remember types
-        typeOf
-        typesOfDefinedTerms
-
-        // Assign symbols to the original trees
-        // TODO - just use the new trees.
-        defHandlers foreach { dh =>
-          val name = dh.member.name
-          definedSymbols get name foreach { sym =>
-            dh.member setSymbol sym
-           logDebug("Set symbol of " + name + " to " + sym.defString)
-          }
-        }
-
-        // compile the result-extraction object
-        withoutWarnings(lineRep compile ResultObjectSourceCode(handlers))
-      }
-    }
-
-    lazy val resultSymbol = lineRep.resolvePathToSymbol(accessPath)
-    def applyToResultMember[T](name: Name, f: Symbol => T) = afterTyper(f(resultSymbol.info.nonPrivateDecl(name)))
-
-    /* typeOf lookup with encoding */
-    def lookupTypeOf(name: Name) = typeOf.getOrElse(name, typeOf(global.encode(name.toString)))
-    def simpleNameOfType(name: TypeName) = (compilerTypeOf get name) map (_.typeSymbol.simpleName)
-
-    private def typeMap[T](f: Type => T) =
-      mapFrom[Name, Name, T](termNames ++ typeNames)(x => f(cleanMemberDecl(resultSymbol, x)))
-
-    /** Types of variables defined by this request. */
-    lazy val compilerTypeOf = typeMap[Type](x => x) withDefaultValue NoType
-    /** String representations of same. */
-    lazy val typeOf         = typeMap[String](tp => afterTyper(tp.toString))
-
-    // lazy val definedTypes: Map[Name, Type] = {
-    //   typeNames map (x => x -> afterTyper(resultSymbol.info.nonPrivateDecl(x).tpe)) toMap
-    // }
-    lazy val definedSymbols = (
-      termNames.map(x => x -> applyToResultMember(x, x => x)) ++
-      typeNames.map(x => x -> compilerTypeOf(x).typeSymbolDirect)
-    ).toMap[Name, Symbol] withDefaultValue NoSymbol
-
-    lazy val typesOfDefinedTerms = mapFrom[Name, Name, Type](termNames)(x => applyToResultMember(x, _.tpe))
-
-    /** load and run the code using reflection */
-    def loadAndRun: (String, Boolean) = {
-      try   { ("" + (lineRep call sessionNames.print), true) }
-      catch { case ex: Throwable => (lineRep.bindError(ex), false) }
-    }
-
-    override def toString = "Request(line=%s, %s trees)".format(line, trees.size)
-  }
-
-  /**
-   * Returns the name of the most recent interpreter result. Useful for
-   * for extracting information regarding the previous result.
-   *
-   * @return The simple name of the result (such as res0)
-   */
-  @DeveloperApi
-  def mostRecentVar: String =
-    if (mostRecentlyHandledTree.isEmpty) ""
-    else "" + (mostRecentlyHandledTree.get match {
-      case x: ValOrDefDef           => x.name
-      case Assign(Ident(name), _)   => name
-      case ModuleDef(_, name, _)    => name
-      case _                        => naming.mostRecentVar
-    })
-
-  private var mostRecentWarnings: List[(global.Position, String)] = Nil
-
-  /**
-   * Returns a list of recent warnings from compiler execution.
-   *
-   * @return The list of tuples (compiler position, warning)
-   */
-  @DeveloperApi
-  def lastWarnings = mostRecentWarnings
-
-  def treesForRequestId(id: Int): List[Tree] =
-    requestForReqId(id).toList flatMap (_.trees)
-
-  def requestForReqId(id: Int): Option[Request] =
-    if (executingRequest != null && executingRequest.reqId == id) Some(executingRequest)
-    else prevRequests find (_.reqId == id)
-
-  def requestForName(name: Name): Option[Request] = {
-    assert(definedNameMap != null, "definedNameMap is null")
-    definedNameMap get name
-  }
-
-  def requestForIdent(line: String): Option[Request] =
-    requestForName(newTermName(line)) orElse requestForName(newTypeName(line))
-
-  def requestHistoryForName(name: Name): List[Request] =
-    prevRequests.toList.reverse filter (_.definedNames contains name)
-
-
-  def definitionForName(name: Name): Option[MemberHandler] =
-    requestForName(name) flatMap { req =>
-      req.handlers find (_.definedNames contains name)
-    }
-
-  /**
-   * Retrieves the object representing the id (variable name, method name,
-   * class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated content to retrieve
-   *
-   * @return Some containing term name (id) representation if exists, else None
-   */
-  @DeveloperApi
-  def valueOfTerm(id: String): Option[AnyRef] =
-    requestForName(newTermName(id)) flatMap (_.getEval)
-
-  /**
-   * Retrieves the class representing the id (variable name, method name,
-   * class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated class to retrieve
-   *
-   * @return Some containing term name (id) class if exists, else None
-   */
-  @DeveloperApi
-  def classOfTerm(id: String): Option[JClass] =
-    valueOfTerm(id) map (_.getClass)
-
-  /**
-   * Retrieves the type representing the id (variable name, method name,
-   * class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated type to retrieve
-   *
-   * @return The Type information about the term name (id) provided
-   */
-  @DeveloperApi
-  def typeOfTerm(id: String): Type = newTermName(id) match {
-    case nme.ROOTPKG  => RootClass.tpe
-    case name         => requestForName(name).fold(NoType: Type)(_ compilerTypeOf name)
-  }
-
-  /**
-   * Retrieves the symbol representing the id (variable name, method name,
-   * class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated symbol to retrieve
-   *
-   * @return The Symbol information about the term name (id) provided
-   */
-  @DeveloperApi
-  def symbolOfTerm(id: String): Symbol =
-    requestForIdent(newTermName(id)).fold(NoSymbol: Symbol)(_ definedTermSymbol id)
-
-  // TODO: No use yet, but could be exposed as a DeveloperApi
-  private def symbolOfType(id: String): Symbol =
-    requestForName(newTypeName(id)).fold(NoSymbol: Symbol)(_ definedTypeSymbol id)
-
-  /**
-   * Retrieves the runtime class and type representing the id (variable name,
-   * method name, class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated runtime class and type to retrieve
-   *
-   * @return Some runtime class and Type information as a tuple for the
-   *         provided term name if it exists, else None
-   */
-  @DeveloperApi
-  def runtimeClassAndTypeOfTerm(id: String): Option[(JClass, Type)] = {
-    classOfTerm(id) flatMap { clazz =>
-      new RichClass(clazz).supers find(c => !(new RichClass(c).isScalaAnonymous)) map { nonAnon =>
-        (nonAnon, runtimeTypeOfTerm(id))
-      }
-    }
-  }
-
-  /**
-   * Retrieves the runtime type representing the id (variable name,
-   * method name, class name, etc) provided.
-   *
-   * @param id The id (variable name, method name, class name, etc) whose
-   *           associated runtime type to retrieve
-   *
-   * @return The runtime Type information about the term name (id) provided
-   */
-  @DeveloperApi
-  def runtimeTypeOfTerm(id: String): Type = {
-    typeOfTerm(id) andAlso { tpe =>
-      val clazz      = classOfTerm(id) getOrElse { return NoType }
-      val staticSym  = tpe.typeSymbol
-      val runtimeSym = getClassIfDefined(clazz.getName)
-
-      if ((runtimeSym != NoSymbol) && (runtimeSym != staticSym) && (runtimeSym isSubClass staticSym))
-        runtimeSym.info
-      else NoType
-    }
-  }
-
-  private def cleanMemberDecl(owner: Symbol, member: Name): Type = afterTyper {
-    normalizeNonPublic {
-      owner.info.nonPrivateDecl(member).tpe match {
-        case NullaryMethodType(tp) => tp
-        case tp                    => tp
-      }
-    }
-  }
-
-  private object exprTyper extends {
-    val repl: SparkIMain.this.type = imain
-  } with SparkExprTyper { }
-
-  /**
-   * Constructs a list of abstract syntax trees representing the provided code.
-   *
-   * @param line The line of code to parse and construct into ASTs
-   *
-   * @return Some list of ASTs if the line is valid, else None
-   */
-  @DeveloperApi
-  def parse(line: String): Option[List[Tree]] = exprTyper.parse(line)
-
-  /**
-   * Constructs a Symbol representing the final result of the expression
-   * provided or representing the definition provided.
-   *
-   * @param code The line of code
-   *
-   * @return The Symbol or NoSymbol (found under scala.reflect.internal)
-   */
-  @DeveloperApi
-  def symbolOfLine(code: String): Symbol =
-    exprTyper.symbolOfLine(code)
-
-  /**
-   * Constructs type information based on the provided expression's final
-   * result or the definition provided.
-   *
-   * @param expr The expression or definition
-   *
-   * @param silent Whether to output information while constructing the type
-   *
-   * @return The type information or an error
-   */
-  @DeveloperApi
-  def typeOfExpression(expr: String, silent: Boolean = true): Type =
-    exprTyper.typeOfExpression(expr, silent)
-
-  protected def onlyTerms(xs: List[Name]) = xs collect { case x: TermName => x }
-  protected def onlyTypes(xs: List[Name]) = xs collect { case x: TypeName => x }
-
-  /**
-   * Retrieves the defined, public names in the compiler.
-   *
-   * @return The list of matching "term" names
-   */
-  @DeveloperApi
-  def definedTerms      = onlyTerms(allDefinedNames) filterNot isInternalTermName
-
-  /**
-   * Retrieves the defined type names in the compiler.
-   *
-   * @return The list of matching type names
-   */
-  @DeveloperApi
-  def definedTypes      = onlyTypes(allDefinedNames)
-
-  /**
-   * Retrieves the defined symbols in the compiler.
-   *
-   * @return The set of matching Symbol instances
-   */
-  @DeveloperApi
-  def definedSymbols    = prevRequestList.flatMap(_.definedSymbols.values).toSet[Symbol]
-
-  /**
-   * Retrieves the list of public symbols in the compiler.
-   *
-   * @return The list of public Symbol instances
-   */
-  @DeveloperApi
-  def definedSymbolList = prevRequestList flatMap (_.definedSymbolList) filterNot (s => isInternalTermName(s.name))
-
-  // Terms with user-given names (i.e. not res0 and not synthetic)
-
-  /**
-   * Retrieves defined, public names that are not res0 or the result of a direct bind.
-   *
-   * @return The list of matching "term" names
-   */
-  @DeveloperApi
-  def namedDefinedTerms = definedTerms filterNot (x => isUserVarName("" + x) || directlyBoundNames(x))
-
-  private def findName(name: Name) = definedSymbols find (_.name == name) getOrElse NoSymbol
-
-  /** Translate a repl-defined identifier into a Symbol.
-   */
-  private def apply(name: String): Symbol =
-    types(name) orElse terms(name)
-
-  private def types(name: String): Symbol = {
-    val tpname = newTypeName(name)
-    findName(tpname) orElse getClassIfDefined(tpname)
-  }
-  private def terms(name: String): Symbol = {
-    val termname = newTypeName(name)
-    findName(termname) orElse getModuleIfDefined(termname)
-  }
-  // [Eugene to Paul] possibly you could make use of TypeTags here
-  private def types[T: ClassTag] : Symbol = types(classTag[T].runtimeClass.getName)
-  private def terms[T: ClassTag] : Symbol = terms(classTag[T].runtimeClass.getName)
-  private def apply[T: ClassTag] : Symbol = apply(classTag[T].runtimeClass.getName)
-
-  /**
-   * Retrieves the Symbols representing classes in the compiler.
-   *
-   * @return The list of matching ClassSymbol instances
-   */
-  @DeveloperApi
-  def classSymbols  = allDefSymbols collect { case x: ClassSymbol => x }
-
-  /**
-   * Retrieves the Symbols representing methods in the compiler.
-   *
-   * @return The list of matching MethodSymbol instances
-   */
-  @DeveloperApi
-  def methodSymbols = allDefSymbols collect { case x: MethodSymbol => x }
-
-  /** the previous requests this interpreter has processed */
-  private var executingRequest: Request = _
-  private val prevRequests       = mutable.ListBuffer[Request]()
-  private val referencedNameMap  = mutable.Map[Name, Request]()
-  private val definedNameMap     = mutable.Map[Name, Request]()
-  private val directlyBoundNames = mutable.Set[Name]()
-
-  private def allHandlers    = prevRequestList flatMap (_.handlers)
-  private def allDefHandlers = allHandlers collect { case x: MemberDefHandler => x }
-  private def allDefSymbols  = allDefHandlers map (_.symbol) filter (_ ne NoSymbol)
-
-  private def lastRequest         = if (prevRequests.isEmpty) null else prevRequests.last
-  // NOTE: Exposed to repl package since used by SparkImports
-  private[repl] def prevRequestList     = prevRequests.toList
-  private def allSeenTypes        = prevRequestList flatMap (_.typeOf.values.toList) distinct
-  private def allImplicits        = allHandlers filter (_.definesImplicit) flatMap (_.definedNames)
-  // NOTE: Exposed to repl package since used by SparkILoop and SparkImports
-  private[repl] def importHandlers      = allHandlers collect { case x: ImportHandler => x }
-
-  /**
-   * Retrieves a list of unique defined and imported names in the compiler.
-   *
-   * @return The list of "term" names
-   */
-  def visibleTermNames: List[Name] = definedTerms ++ importedTerms distinct
-
-  /** Another entry point for tab-completion, ids in scope */
-  // NOTE: Exposed to repl package since used by SparkJLineCompletion
-  private[repl] def unqualifiedIds = visibleTermNames map (_.toString) filterNot (_ contains "$") sorted
-
-  /** Parse the ScalaSig to find type aliases */
-  private def aliasForType(path: String) = ByteCode.aliasForType(path)
-
-  private def withoutUnwrapping(op: => Unit): Unit = {
-    val saved = isettings.unwrapStrings
-    isettings.unwrapStrings = false
-    try op
-    finally isettings.unwrapStrings = saved
-  }
-
-  // NOTE: Exposed to repl package since used by SparkILoop
-  private[repl] def symbolDefString(sym: Symbol) = {
-    TypeStrings.quieter(
-      afterTyper(sym.defString),
-      sym.owner.name + ".this.",
-      sym.owner.fullName + "."
-    )
-  }
-
-  private def showCodeIfDebugging(code: String) {
-    /** Secret bookcase entrance for repl debuggers: end the line
-     *  with "// show" and see what's going on.
-     */
-    def isShow    = code.lines exists (_.trim endsWith "// show")
-    def isShowRaw = code.lines exists (_.trim endsWith "// raw")
-
-    // old style
-    beSilentDuring(parse(code)) foreach { ts =>
-      ts foreach { t =>
-        if (isShow || isShowRaw)
-          withoutUnwrapping(echo(asCompactString(t)))
-        else
-          withoutUnwrapping(logDebug(asCompactString(t)))
-      }
-    }
-  }
-
-  // debugging
-  // NOTE: Exposed to repl package since accessed indirectly from SparkIMain
-  //       and SparkJLineCompletion
-  private[repl] def debugging[T](msg: String)(res: T) = {
-    logDebug(msg + " " + res)
-    res
-  }
-}
-
-/** Utility methods for the Interpreter. */
-object SparkIMain {
-  // The two name forms this is catching are the two sides of this assignment:
-  //
-  // $line3.$read.$iw.$iw.Bippy =
-  //   $line3.$read$$iw$$iw$Bippy@4a6a00ca
-  private def removeLineWrapper(s: String) = s.replaceAll("""\$line\d+[./]\$(read|eval|print)[$.]""", "")
-  private def removeIWPackages(s: String)  = s.replaceAll("""\$(iw|iwC|read|eval|print)[$.]""", "")
-  private def removeSparkVals(s: String)   = s.replaceAll("""\$VAL[0-9]+[$.]""", "")
-
-  def stripString(s: String)               = removeSparkVals(removeIWPackages(removeLineWrapper(s)))
-
-  trait CodeAssembler[T] {
-    def preamble: String
-    def generate: T => String
-    def postamble: String
-
-    def apply(contributors: List[T]): String = stringFromWriter { code =>
-      code println preamble
-      contributors map generate foreach (code println _)
-      code println postamble
-    }
-  }
-
-  trait StrippingWriter {
-    def isStripping: Boolean
-    def stripImpl(str: String): String
-    def strip(str: String): String = if (isStripping) stripImpl(str) else str
-  }
-  trait TruncatingWriter {
-    def maxStringLength: Int
-    def isTruncating: Boolean
-    def truncate(str: String): String = {
-      if (isTruncating && (maxStringLength != 0 && str.length > maxStringLength))
-        (str take maxStringLength - 3) + "..."
-      else str
-    }
-  }
-  abstract class StrippingTruncatingWriter(out: JPrintWriter)
-          extends JPrintWriter(out)
-             with StrippingWriter
-             with TruncatingWriter {
-    self =>
-
-    def clean(str: String): String = truncate(strip(str))
-    override def write(str: String) = super.write(clean(str))
-  }
-  class ReplStrippingWriter(intp: SparkIMain) extends StrippingTruncatingWriter(intp.out) {
-    import intp._
-    def maxStringLength    = isettings.maxPrintString
-    def isStripping        = isettings.unwrapStrings
-    def isTruncating       = reporter.truncationOK
-
-    def stripImpl(str: String): String = naming.unmangle(str)
-  }
-
-  class ReplReporter(intp: SparkIMain) extends ConsoleReporter(intp.settings, null, new ReplStrippingWriter(intp)) {
-    override def printMessage(msg: String) {
-      // Avoiding deadlock when the compiler starts logging before
-      // the lazy val is done.
-      if (intp.isInitializeComplete) {
-        if (intp.totalSilence) ()
-        else super.printMessage(msg)
-      }
-      // scalastyle:off println
-      else Console.println(msg)
-      // scalastyle:on println
-    }
-  }
-}
-
-class SparkISettings(intp: SparkIMain) extends Logging {
-  /** A list of paths where :load should look */
-  var loadPath = List(".")
-
-  /** Set this to true to see repl machinery under -Yrich-exceptions.
-   */
-  var showInternalStackTraces = false
-
-  /** The maximum length of toString to use when printing the result
-   *  of an evaluation.  0 means no maximum.  If a printout requires
-   *  more than this number of characters, then the printout is
-   *  truncated.
-   */
-  var maxPrintString = 800
-
-  /** The maximum number of completion candidates to print for tab
-   *  completion without requiring confirmation.
-   */
-  var maxAutoprintCompletion = 250
-
-  /** String unwrapping can be disabled if it is causing issues.
-   *  Settings this to false means you will see Strings like "$iw.$iw.".
-   */
-  var unwrapStrings = true
-
-  def deprecation_=(x: Boolean) = {
-    val old = intp.settings.deprecation.value
-    intp.settings.deprecation.value = x
-    if (!old && x) logDebug("Enabled -deprecation output.")
-    else if (old && !x) logDebug("Disabled -deprecation output.")
-  }
-
-  def deprecation: Boolean = intp.settings.deprecation.value
-
-  def allSettings = Map(
-    "maxPrintString" -> maxPrintString,
-    "maxAutoprintCompletion" -> maxAutoprintCompletion,
-    "unwrapStrings" -> unwrapStrings,
-    "deprecation" -> deprecation
-  )
-
-  private def allSettingsString =
-    allSettings.toList sortBy (_._1) map { case (k, v) => "  " + k + " = " + v + "\n" } mkString
-
-  override def toString = """
-    | SparkISettings {
-    | %s
-    | }""".stripMargin.format(allSettingsString)
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
deleted file mode 100644
index f22776592c28..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkImports.scala
+++ /dev/null
@@ -1,239 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.collection.{ mutable, immutable }
-
-private[repl] trait SparkImports {
-  self: SparkIMain =>
-
-  import global._
-  import definitions.{ ScalaPackage, JavaLangPackage, PredefModule }
-  import memberHandlers._
-
-  def isNoImports = settings.noimports.value
-  def isNoPredef  = settings.nopredef.value
-
-  /** Synthetic import handlers for the language defined imports. */
-  private def makeWildcardImportHandler(sym: Symbol): ImportHandler = {
-    val hd :: tl = sym.fullName.split('.').toList map newTermName
-    val tree = Import(
-      tl.foldLeft(Ident(hd): Tree)((x, y) => Select(x, y)),
-      ImportSelector.wildList
-    )
-    tree setSymbol sym
-    new ImportHandler(tree)
-  }
-
-  /** Symbols whose contents are language-defined to be imported. */
-  def languageWildcardSyms: List[Symbol] = List(JavaLangPackage, ScalaPackage, PredefModule)
-  def languageWildcards: List[Type] = languageWildcardSyms map (_.tpe)
-  def languageWildcardHandlers = languageWildcardSyms map makeWildcardImportHandler
-
-  def allImportedNames = importHandlers flatMap (_.importedNames)
-  def importedTerms    = onlyTerms(allImportedNames)
-  def importedTypes    = onlyTypes(allImportedNames)
-
-  /** Types which have been wildcard imported, such as:
-   *    val x = "abc" ; import x._  // type java.lang.String
-   *    import java.lang.String._   // object java.lang.String
-   *
-   *  Used by tab completion.
-   *
-   *  XXX right now this gets import x._ and import java.lang.String._,
-   *  but doesn't figure out import String._.  There's a lot of ad hoc
-   *  scope twiddling which should be swept away in favor of digging
-   *  into the compiler scopes.
-   */
-  def sessionWildcards: List[Type] = {
-    importHandlers filter (_.importsWildcard) map (_.targetType) distinct
-  }
-  def wildcardTypes = languageWildcards ++ sessionWildcards
-
-  def languageSymbols        = languageWildcardSyms flatMap membersAtPickler
-  def sessionImportedSymbols = importHandlers flatMap (_.importedSymbols)
-  def importedSymbols        = languageSymbols ++ sessionImportedSymbols
-  def importedTermSymbols    = importedSymbols collect { case x: TermSymbol => x }
-  def importedTypeSymbols    = importedSymbols collect { case x: TypeSymbol => x }
-  def implicitSymbols        = importedSymbols filter (_.isImplicit)
-
-  def importedTermNamed(name: String): Symbol =
-    importedTermSymbols find (_.name.toString == name) getOrElse NoSymbol
-
-  /** Tuples of (source, imported symbols) in the order they were imported.
-   */
-  def importedSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    val lang    = languageWildcardSyms map (sym => (sym, membersAtPickler(sym)))
-    val session = importHandlers filter (_.targetType != NoType) map { mh =>
-      (mh.targetType.typeSymbol, mh.importedSymbols)
-    }
-
-    lang ++ session
-  }
-  def implicitSymbolsBySource: List[(Symbol, List[Symbol])] = {
-    importedSymbolsBySource map {
-      case (k, vs) => (k, vs filter (_.isImplicit))
-    } filterNot (_._2.isEmpty)
-  }
-
-  /** Compute imports that allow definitions from previous
-   *  requests to be visible in a new request.  Returns
-   *  three pieces of related code:
-   *
-   *  1. An initial code fragment that should go before
-   *  the code of the new request.
-   *
-   *  2. A code fragment that should go after the code
-   *  of the new request.
-   *
-   *  3. An access path which can be traversed to access
-   *  any bindings inside code wrapped by #1 and #2 .
-   *
-   * The argument is a set of Names that need to be imported.
-   *
-   * Limitations: This method is not as precise as it could be.
-   * (1) It does not process wildcard imports to see what exactly
-   * they import.
-   * (2) If it imports any names from a request, it imports all
-   * of them, which is not really necessary.
-   * (3) It imports multiple same-named implicits, but only the
-   * last one imported is actually usable.
-   */
-  case class SparkComputedImports(prepend: String, append: String, access: String)
-  def fallback = System.getProperty("spark.repl.fallback", "false").toBoolean
-
-  protected def importsCode(wanted: Set[Name], definedClass: Boolean): SparkComputedImports = {
-    /** Narrow down the list of requests from which imports
-     *  should be taken.  Removes requests which cannot contribute
-     *  useful imports for the specified set of wanted names.
-     */
-    case class ReqAndHandler(req: Request, handler: MemberHandler) { }
-
-    def reqsToUse: List[ReqAndHandler] = {
-      /**
-       * Loop through a list of MemberHandlers and select which ones to keep.
-       * 'wanted' is the set of names that need to be imported.
-       */
-      def select(reqs: List[ReqAndHandler], wanted: Set[Name]): List[ReqAndHandler] = {
-        // Single symbol imports might be implicits! See bug #1752.  Rather than
-        // try to finesse this, we will mimic all imports for now.
-        def keepHandler(handler: MemberHandler) = handler match {
-       /* This case clause tries to "precisely" import only what is required. And in this
-        * it may miss out on some implicits, because implicits are not known in `wanted`. Thus 
-        * it is suitable for defining classes. AFAIK while defining classes implicits are not
-        * needed.*/
-          case h: ImportHandler if definedClass && !fallback => 
-            h.importedNames.exists(x => wanted.contains(x))
-          case _: ImportHandler  => true
-          case x                 => x.definesImplicit || (x.definedNames exists wanted)
-        }
-
-        reqs match {
-          case Nil                                    => Nil
-          case rh :: rest if !keepHandler(rh.handler) => select(rest, wanted)
-          case rh :: rest                             =>
-            import rh.handler._
-            val newWanted = wanted ++ referencedNames -- definedNames -- importedNames
-            rh :: select(rest, newWanted)
-        }
-      }
-
-      /** Flatten the handlers out and pair each with the original request */
-      select(allReqAndHandlers reverseMap { case (r, h) => ReqAndHandler(r, h) }, wanted).reverse
-    }
-
-    val code, trailingBraces, accessPath = new StringBuilder
-    val currentImps = mutable.HashSet[Name]()
-
-    // add code for a new object to hold some imports
-    def addWrapper() {
-      val impname = nme.INTERPRETER_IMPORT_WRAPPER
-      code append "class %sC extends Serializable {\n".format(impname)
-      trailingBraces append "}\nval " + impname + " = new " + impname + "C;\n"
-      accessPath append ("." + impname)
-
-      currentImps.clear
-      // code append "object %s {\n".format(impname)
-      // trailingBraces append "}\n"
-      // accessPath append ("." + impname)
-
-      // currentImps.clear
-    }
-
-    addWrapper()
-
-    // loop through previous requests, adding imports for each one
-    for (ReqAndHandler(req, handler) <- reqsToUse) {
-      handler match {
-        // If the user entered an import, then just use it; add an import wrapping
-        // level if the import might conflict with some other import
-        case x: ImportHandler =>
-          if (x.importsWildcard || currentImps.exists(x.importedNames contains _))
-            addWrapper()
-
-          code append (x.member + "\n")
-
-          // give wildcard imports a import wrapper all to their own
-          if (x.importsWildcard) addWrapper()
-          else currentImps ++= x.importedNames
-
-        // For other requests, import each defined name.
-        // import them explicitly instead of with _, so that
-        // ambiguity errors will not be generated. Also, quote
-        // the name of the variable, so that we don't need to
-        // handle quoting keywords separately.
-        case x: ClassHandler if !fallback =>
-        // I am trying to guess if the import is a defined class
-        // This is an ugly hack, I am not 100% sure of the consequences.
-        // Here we, let everything but "defined classes" use the import with val.
-        // The reason for this is, otherwise the remote executor tries to pull the
-        // classes involved and may fail.
-          for (imv <- x.definedNames) {
-            val objName = req.lineRep.readPath
-            code.append("import " + objName + ".INSTANCE" + req.accessPath + ".`" + imv + "`\n")
-          }
-
-        case x =>
-          for (imv <- x.definedNames) {
-            if (currentImps contains imv) addWrapper()
-            val objName = req.lineRep.readPath
-            val valName = "$VAL" + newValId()
-
-            if(!code.toString.endsWith(".`" + imv + "`;\n")) { // Which means already imported
-                code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-                code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            }
-            // code.append("val " + valName + " = " + objName + ".INSTANCE;\n")
-            // code.append("import " + valName + req.accessPath + ".`" + imv + "`;\n")
-            // code append ("import " + (req fullPath imv) + "\n")
-            currentImps += imv
-          }
-      }
-    }
-    // add one extra wrapper, to prevent warnings in the common case of
-    // redefining the value bound in the last interpreter request.
-    addWrapper()
-    SparkComputedImports(code.toString, trailingBraces.toString, accessPath.toString)
-  }
-
-  private def allReqAndHandlers =
-    prevRequestList flatMap (req => req.handlers map (req -> _))
-
-  private def membersAtPickler(sym: Symbol): List[Symbol] =
-    beforePickler(sym.info.nonPrivateMembers.toList)
-
-  private var curValId = 0
-
-  private def newValId(): Int = {
-    curValId += 1
-    curValId
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
deleted file mode 100644
index 1ba17dfd8e3d..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineCompletion.scala
+++ /dev/null
@@ -1,403 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Paul Phillips
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.tools.jline._
-import scala.tools.jline.console.completer._
-import Completion._
-import scala.collection.mutable.ListBuffer
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
-
-/**
- * Represents an auto-completion tool for the supplied interpreter that
- * utilizes supplied queries for valid completions based on the current
- * contents of the internal buffer.
- *
- * @param intp The interpreter to use for information retrieval to do with
- *             auto completion
- */
-@DeveloperApi
-class SparkJLineCompletion(val intp: SparkIMain) extends Completion with CompletionOutput with Logging {
-  // NOTE: Exposed in package as used in quite a few classes
-  // NOTE: Must be public to override the global found in CompletionOutput
-  val global: intp.global.type = intp.global
-
-  import global._
-  import definitions.{ PredefModule, AnyClass, AnyRefClass, ScalaPackage, JavaLangPackage }
-  import rootMirror.{ RootClass, getModuleIfDefined }
-  type ExecResult = Any
-  import intp.{ debugging }
-
-  /**
-   * Represents the level of verbosity. Increments with consecutive tabs.
-   */
-  @DeveloperApi
-  var verbosity: Int = 0
-
-  /**
-   * Resets the level of verbosity to zero.
-   */
-  @DeveloperApi
-  def resetVerbosity() = verbosity = 0
-
-  private def getSymbol(name: String, isModule: Boolean) = (
-    if (isModule) getModuleIfDefined(name)
-    else getModuleIfDefined(name)
-  )
-  private def getType(name: String, isModule: Boolean) = getSymbol(name, isModule).tpe
-  private def typeOf(name: String)                     = getType(name, false)
-  private def moduleOf(name: String)                   = getType(name, true)
-
-  trait CompilerCompletion {
-    def tp: Type
-    def effectiveTp = tp match {
-      case MethodType(Nil, resType)   => resType
-      case NullaryMethodType(resType) => resType
-      case _                          => tp
-    }
-
-    // for some reason any's members don't show up in subclasses, which
-    // we need so 5.<tab> offers asInstanceOf etc.
-    private def anyMembers = AnyClass.tpe.nonPrivateMembers
-    def anyRefMethodsToShow = Set("isInstanceOf", "asInstanceOf", "toString")
-
-    def tos(sym: Symbol): String = sym.decodedName
-    def memberNamed(s: String) = afterTyper(effectiveTp member newTermName(s))
-    def hasMethod(s: String) = memberNamed(s).isMethod
-
-    // XXX we'd like to say "filterNot (_.isDeprecated)" but this causes the
-    // compiler to crash for reasons not yet known.
-    def members     = afterTyper((effectiveTp.nonPrivateMembers.toList ++ anyMembers) filter (_.isPublic))
-    def methods     = members.toList filter (_.isMethod)
-    def packages    = members.toList filter (_.isPackage)
-    def aliases     = members.toList filter (_.isAliasType)
-
-    def memberNames   = members map tos
-    def methodNames   = methods map tos
-    def packageNames  = packages map tos
-    def aliasNames    = aliases map tos
-  }
-
-  object NoTypeCompletion extends TypeMemberCompletion(NoType) {
-    override def memberNamed(s: String) = NoSymbol
-    override def members = Nil
-    override def follow(s: String) = None
-    override def alternativesFor(id: String) = Nil
-  }
-
-  object TypeMemberCompletion {
-    def apply(tp: Type, runtimeType: Type, param: NamedParam): TypeMemberCompletion = {
-      new TypeMemberCompletion(tp) {
-        var upgraded = false
-        lazy val upgrade = {
-          intp rebind param
-          intp.reporter.printMessage("\nRebinding stable value %s from %s to %s".format(param.name, tp, param.tpe))
-          upgraded = true
-          new TypeMemberCompletion(runtimeType)
-        }
-        override def completions(verbosity: Int) = {
-          super.completions(verbosity) ++ (
-            if (verbosity == 0) Nil
-            else upgrade.completions(verbosity)
-          )
-        }
-        override def follow(s: String) = super.follow(s) orElse {
-          if (upgraded) upgrade.follow(s)
-          else None
-        }
-        override def alternativesFor(id: String) = super.alternativesFor(id) ++ (
-          if (upgraded) upgrade.alternativesFor(id)
-          else Nil
-        ) distinct
-      }
-    }
-    def apply(tp: Type): TypeMemberCompletion = {
-      if (tp eq NoType) NoTypeCompletion
-      else if (tp.typeSymbol.isPackageClass) new PackageCompletion(tp)
-      else new TypeMemberCompletion(tp)
-    }
-    def imported(tp: Type) = new ImportCompletion(tp)
-  }
-
-  class TypeMemberCompletion(val tp: Type) extends CompletionAware
-                                              with CompilerCompletion {
-    def excludeEndsWith: List[String] = Nil
-    def excludeStartsWith: List[String] = List("<") // <byname>, <repeated>, etc.
-    def excludeNames: List[String] = (anyref.methodNames filterNot anyRefMethodsToShow) :+ "_root_"
-
-    def methodSignatureString(sym: Symbol) = {
-      IMain stripString afterTyper(new MethodSymbolOutput(sym).methodString())
-    }
-
-    def exclude(name: String): Boolean = (
-      (name contains "$") ||
-      (excludeNames contains name) ||
-      (excludeEndsWith exists (name endsWith _)) ||
-      (excludeStartsWith exists (name startsWith _))
-    )
-    def filtered(xs: List[String]) = xs filterNot exclude distinct
-
-    def completions(verbosity: Int) =
-      debugging(tp + " completions ==> ")(filtered(memberNames))
-
-    override def follow(s: String): Option[CompletionAware] =
-      debugging(tp + " -> '" + s + "' ==> ")(Some(TypeMemberCompletion(memberNamed(s).tpe)) filterNot (_ eq NoTypeCompletion))
-
-    override def alternativesFor(id: String): List[String] =
-      debugging(id + " alternatives ==> ") {
-        val alts = members filter (x => x.isMethod && tos(x) == id) map methodSignatureString
-
-        if (alts.nonEmpty) "" :: alts else Nil
-      }
-
-    override def toString = "%s (%d members)".format(tp, members.size)
-  }
-
-  class PackageCompletion(tp: Type) extends TypeMemberCompletion(tp) {
-    override def excludeNames = anyref.methodNames
-  }
-
-  class LiteralCompletion(lit: Literal) extends TypeMemberCompletion(lit.value.tpe) {
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(memberNames)
-      case _    => memberNames
-    }
-  }
-
-  class ImportCompletion(tp: Type) extends TypeMemberCompletion(tp) {
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(members filterNot (_.isSetter) map tos)
-      case _    => super.completions(verbosity)
-    }
-  }
-
-  // not for completion but for excluding
-  object anyref extends TypeMemberCompletion(AnyRefClass.tpe) { }
-
-  // the unqualified vals/defs/etc visible in the repl
-  object ids extends CompletionAware {
-    override def completions(verbosity: Int) = intp.unqualifiedIds ++ List("classOf") //, "_root_")
-    // now we use the compiler for everything.
-    override def follow(id: String): Option[CompletionAware] = {
-      if (!completions(0).contains(id))
-        return None
-
-      val tpe = intp typeOfExpression id
-      if (tpe == NoType)
-        return None
-
-      def default = Some(TypeMemberCompletion(tpe))
-
-      // only rebinding vals in power mode for now.
-      if (!isReplPower) default
-      else intp runtimeClassAndTypeOfTerm id match {
-        case Some((clazz, runtimeType)) =>
-          val sym = intp.symbolOfTerm(id)
-          if (sym.isStable) {
-            val param = new NamedParam.Untyped(id, intp valueOfTerm id getOrElse null)
-            Some(TypeMemberCompletion(tpe, runtimeType, param))
-          }
-          else default
-        case _        =>
-          default
-      }
-    }
-    override def toString = "<repl ids> (%s)".format(completions(0).size)
-  }
-
-  // user-issued wildcard imports like "import global._" or "import String._"
-  private def imported = intp.sessionWildcards map TypeMemberCompletion.imported
-
-  // literal Ints, Strings, etc.
-  object literals extends CompletionAware {
-    def simpleParse(code: String): Tree = newUnitParser(code).templateStats().last
-    def completions(verbosity: Int) = Nil
-
-    override def follow(id: String) = simpleParse(id) match {
-      case x: Literal   => Some(new LiteralCompletion(x))
-      case _            => None
-    }
-  }
-
-  // top level packages
-  object rootClass extends TypeMemberCompletion(RootClass.tpe) {
-    override def completions(verbosity: Int) = super.completions(verbosity) :+ "_root_"
-    override def follow(id: String) = id match {
-      case "_root_" => Some(this)
-      case _        => super.follow(id)
-    }
-  }
-  // members of Predef
-  object predef extends TypeMemberCompletion(PredefModule.tpe) {
-    override def excludeEndsWith    = super.excludeEndsWith ++ List("Wrapper", "ArrayOps")
-    override def excludeStartsWith  = super.excludeStartsWith ++ List("wrap")
-    override def excludeNames       = anyref.methodNames
-
-    override def exclude(name: String) = super.exclude(name) || (
-      (name contains "2")
-    )
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => Nil
-      case _    => super.completions(verbosity)
-    }
-  }
-  // members of scala.*
-  object scalalang extends PackageCompletion(ScalaPackage.tpe) {
-    def arityClasses = List("Product", "Tuple", "Function")
-    def skipArity(name: String) = arityClasses exists (x => name != x && (name startsWith x))
-    override def exclude(name: String) = super.exclude(name) || (
-      skipArity(name)
-    )
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(packageNames ++ aliasNames)
-      case _    => super.completions(verbosity)
-    }
-  }
-  // members of java.lang.*
-  object javalang extends PackageCompletion(JavaLangPackage.tpe) {
-    override lazy val excludeEndsWith   = super.excludeEndsWith ++ List("Exception", "Error")
-    override lazy val excludeStartsWith = super.excludeStartsWith ++ List("CharacterData")
-
-    override def completions(verbosity: Int) = verbosity match {
-      case 0    => filtered(packageNames)
-      case _    => super.completions(verbosity)
-    }
-  }
-
-  // the list of completion aware objects which should be consulted
-  // for top level unqualified, it's too noisy to let much in.
-  private lazy val topLevelBase: List[CompletionAware] = List(ids, rootClass, predef, scalalang, javalang, literals)
-  private def topLevel = topLevelBase ++ imported
-  private def topLevelThreshold = 50
-
-  // the first tier of top level objects (doesn't include file completion)
-  private def topLevelFor(parsed: Parsed): List[String] = {
-    val buf = new ListBuffer[String]
-    topLevel foreach { ca =>
-      buf ++= (ca completionsFor parsed)
-
-      if (buf.size > topLevelThreshold)
-        return buf.toList.sorted
-    }
-    buf.toList
-  }
-
-  // the most recent result
-  private def lastResult = Forwarder(() => ids follow intp.mostRecentVar)
-
-  private def lastResultFor(parsed: Parsed) = {
-    /** The logic is a little tortured right now because normally '.' is
-     *  ignored as a delimiter, but on .<tab> it needs to be propagated.
-     */
-    val xs = lastResult completionsFor parsed
-    if (parsed.isEmpty) xs map ("." + _) else xs
-  }
-
-  // generic interface for querying (e.g. interpreter loop, testing)
-  private def completions(buf: String): List[String] =
-    topLevelFor(Parsed.dotted(buf + ".", buf.length + 1))
-
-  /**
-   * Constructs a new ScalaCompleter for auto completion.
-   *
-   * @return The new JLineTabCompletion instance
-   */
-  @DeveloperApi
-  def completer(): ScalaCompleter = new JLineTabCompletion
-
-  /** This gets a little bit hairy.  It's no small feat delegating everything
-   *  and also keeping track of exactly where the cursor is and where it's supposed
-   *  to end up.  The alternatives mechanism is a little hacky: if there is an empty
-   *  string in the list of completions, that means we are expanding a unique
-   *  completion, so don't update the "last" buffer because it'll be wrong.
-   */
-  class JLineTabCompletion extends ScalaCompleter {
-    // For recording the buffer on the last tab hit
-    private var lastBuf: String = ""
-    private var lastCursor: Int = -1
-
-    // Does this represent two consecutive tabs?
-    def isConsecutiveTabs(buf: String, cursor: Int) =
-      cursor == lastCursor && buf == lastBuf
-
-    // Longest common prefix
-    def commonPrefix(xs: List[String]): String = {
-      if (xs.isEmpty || xs.contains("")) ""
-      else xs.head.head match {
-        case ch =>
-          if (xs.tail forall (_.head == ch)) "" + ch + commonPrefix(xs map (_.tail))
-          else ""
-      }
-    }
-
-    // This is jline's entry point for completion.
-    override def complete(buf: String, cursor: Int): Candidates = {
-      verbosity = if (isConsecutiveTabs(buf, cursor)) verbosity + 1 else 0
-      logDebug("\ncomplete(%s, %d) last = (%s, %d), verbosity: %s".format(buf, cursor, lastBuf, lastCursor, verbosity))
-
-      // we don't try lower priority completions unless higher ones return no results.
-      def tryCompletion(p: Parsed, completionFunction: Parsed => List[String]): Option[Candidates] = {
-        val winners = completionFunction(p)
-        if (winners.isEmpty)
-          return None
-        val newCursor =
-          if (winners contains "") p.cursor
-          else {
-            val advance = commonPrefix(winners)
-            lastCursor = p.position + advance.length
-            lastBuf = (buf take p.position) + advance
-            logDebug("tryCompletion(%s, _) lastBuf = %s, lastCursor = %s, p.position = %s".format(
-              p, lastBuf, lastCursor, p.position))
-            p.position
-          }
-
-        Some(Candidates(newCursor, winners))
-      }
-
-      def mkDotted      = Parsed.dotted(buf, cursor) withVerbosity verbosity
-      def mkUndelimited = Parsed.undelimited(buf, cursor) withVerbosity verbosity
-
-      // a single dot is special cased to completion on the previous result
-      def lastResultCompletion =
-        if (!looksLikeInvocation(buf)) None
-        else tryCompletion(Parsed.dotted(buf drop 1, cursor), lastResultFor)
-
-      def tryAll = (
-                  lastResultCompletion
-           orElse tryCompletion(mkDotted, topLevelFor)
-        getOrElse Candidates(cursor, Nil)
-      )
-
-      /**
-       *  This is the kickoff point for all manner of theoretically
-       *  possible compiler unhappiness. The fault may be here or
-       *  elsewhere, but we don't want to crash the repl regardless.
-       *  The compiler makes it impossible to avoid catching Throwable
-       *  with its unfortunate tendency to throw java.lang.Errors and
-       *  AssertionErrors as the hats drop. We take two swings at it
-       *  because there are some spots which like to throw an assertion
-       *  once, then work after that. Yeah, what can I say.
-       */
-      try tryAll
-      catch { case ex: Throwable =>
-        logWarning("Error: complete(%s, %s) provoked".format(buf, cursor) + ex)
-        Candidates(cursor,
-          if (isReplDebug) List("<error:" + ex + ">")
-          else Nil
-        )
-      }
-    }
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
deleted file mode 100644
index 016e0f039f4f..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkJLineReader.scala
+++ /dev/null
@@ -1,90 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author Stepan Koltsov
- */
-
-package org.apache.spark.repl
-
-import scala.reflect.io.{Path, File}
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-import scala.tools.nsc.interpreter.session.JLineHistory.JLineFileHistory
-
-import scala.tools.jline.console.ConsoleReader
-import scala.tools.jline.console.completer._
-import session._
-import scala.collection.JavaConverters._
-import Completion._
-import io.Streamable.slurp
-
-/**
- *  Reads from the console using JLine.
- */
-private[repl] class SparkJLineReader(_completion: => Completion) extends InteractiveReader {
-  val interactive = true
-  val consoleReader = new JLineConsoleReader()
-
-  lazy val completion = _completion
-  lazy val history: JLineHistory = new SparkJLineHistory
-
-  private def term = consoleReader.getTerminal()
-  def reset() = term.reset()
-  def init()  = term.init()
-
-  def scalaToJline(tc: ScalaCompleter): Completer = new Completer {
-    def complete(_buf: String, cursor: Int, candidates: JList[CharSequence]): Int = {
-      val buf   = if (_buf == null) "" else _buf
-      val Candidates(newCursor, newCandidates) = tc.complete(buf, cursor)
-      newCandidates foreach (candidates add _)
-      newCursor
-    }
-  }
-
-  class JLineConsoleReader extends ConsoleReader with ConsoleReaderHelper {
-    if ((history: History) ne NoHistory)
-      this setHistory history
-
-    // working around protected/trait/java insufficiencies.
-    def goBack(num: Int): Unit = back(num)
-    def readOneKey(prompt: String) = {
-      this.print(prompt)
-      this.flush()
-      this.readVirtualKey()
-    }
-    def eraseLine() = consoleReader.resetPromptLine("", "", 0)
-    def redrawLineAndFlush(): Unit = { flush() ; drawLine() ; flush() }
-    // override def readLine(prompt: String): String
-
-    // A hook for running code after the repl is done initializing.
-    lazy val postInit: Unit = {
-      this setBellEnabled false
-
-      if (completion ne NoCompletion) {
-        val argCompletor: ArgumentCompleter =
-          new ArgumentCompleter(new JLineDelimiter, scalaToJline(completion.completer()))
-        argCompletor setStrict false
-
-        this addCompleter argCompletor
-        this setAutoprintThreshold 400 // max completion candidates without warning
-      }
-    }
-  }
-
-  def currentLine = consoleReader.getCursorBuffer.buffer.toString
-  def redrawLine() = consoleReader.redrawLineAndFlush()
-  def eraseLine() = consoleReader.eraseLine()
-  // Alternate implementation, not sure if/when I need this.
-  // def eraseLine() = while (consoleReader.delete()) { }
-  def readOneLine(prompt: String) = consoleReader readLine prompt
-  def readOneKey(prompt: String)  = consoleReader readOneKey prompt
-}
-
-/** Changes the default history file to not collide with the scala repl's. */
-private[repl] class SparkJLineHistory extends JLineFileHistory {
-  import Properties.userHome
-
-  def defaultFileName = ".spark_history"
-  override protected lazy val historyFile = File(Path(userHome) / defaultFileName)
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
deleted file mode 100644
index 4de9714247c1..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkMemberHandlers.scala
+++ /dev/null
@@ -1,232 +0,0 @@
-// scalastyle:off
-
-/* NSC -- new Scala compiler
- * Copyright 2005-2013 LAMP/EPFL
- * @author  Martin Odersky
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc._
-import scala.tools.nsc.interpreter._
-
-import scala.collection.{ mutable, immutable }
-import scala.PartialFunction.cond
-import scala.reflect.internal.Chars
-import scala.reflect.internal.Flags._
-import scala.language.implicitConversions
-
-private[repl] trait SparkMemberHandlers {
-  val intp: SparkIMain
-
-  import intp.{ Request, global, naming }
-  import global._
-  import naming._
-
-  private def codegenln(leadingPlus: Boolean, xs: String*): String = codegen(leadingPlus, (xs ++ Array("\n")): _*)
-  private def codegenln(xs: String*): String = codegenln(true, xs: _*)
-
-  private def codegen(xs: String*): String = codegen(true, xs: _*)
-  private def codegen(leadingPlus: Boolean, xs: String*): String = {
-    val front = if (leadingPlus) "+ " else ""
-    front + (xs map string2codeQuoted mkString " + ")
-  }
-  private implicit def name2string(name: Name) = name.toString
-
-  /** A traverser that finds all mentioned identifiers, i.e. things
-   *  that need to be imported.  It might return extra names.
-   */
-  private class ImportVarsTraverser extends Traverser {
-    val importVars = new mutable.HashSet[Name]()
-
-    override def traverse(ast: Tree) = ast match {
-      case Ident(name) =>
-        // XXX this is obviously inadequate but it's going to require some effort
-        // to get right.
-        if (name.toString startsWith "x$") ()
-        else importVars += name
-      case _        => super.traverse(ast)
-    }
-  }
-  private object ImportVarsTraverser {
-    def apply(member: Tree) = {
-      val ivt = new ImportVarsTraverser()
-      ivt traverse member
-      ivt.importVars.toList
-    }
-  }
-
-  def chooseHandler(member: Tree): MemberHandler = member match {
-    case member: DefDef        => new DefHandler(member)
-    case member: ValDef        => new ValHandler(member)
-    case member: Assign        => new AssignHandler(member)
-    case member: ModuleDef     => new ModuleHandler(member)
-    case member: ClassDef      => new ClassHandler(member)
-    case member: TypeDef       => new TypeAliasHandler(member)
-    case member: Import        => new ImportHandler(member)
-    case DocDef(_, documented) => chooseHandler(documented)
-    case member                => new GenericHandler(member)
-  }
-
-  sealed abstract class MemberDefHandler(override val member: MemberDef) extends MemberHandler(member) {
-    def symbol          = if (member.symbol eq null) NoSymbol else member.symbol
-    def name: Name      = member.name
-    def mods: Modifiers = member.mods
-    def keyword         = member.keyword
-    def prettyName      = name.decode
-
-    override def definesImplicit = member.mods.isImplicit
-    override def definesTerm: Option[TermName] = Some(name.toTermName) filter (_ => name.isTermName)
-    override def definesType: Option[TypeName] = Some(name.toTypeName) filter (_ => name.isTypeName)
-    override def definedSymbols = if (symbol eq NoSymbol) Nil else List(symbol)
-  }
-
-  /** Class to handle one member among all the members included
-   *  in a single interpreter request.
-   */
-  sealed abstract class MemberHandler(val member: Tree) {
-    def definesImplicit = false
-    def definesValue    = false
-    def isLegalTopLevel = false
-
-    def definesTerm     = Option.empty[TermName]
-    def definesType     = Option.empty[TypeName]
-
-    lazy val referencedNames = ImportVarsTraverser(member)
-    def importedNames        = List[Name]()
-    def definedNames         = definesTerm.toList ++ definesType.toList
-    def definedOrImported    = definedNames ++ importedNames
-    def definedSymbols       = List[Symbol]()
-
-    def extraCodeToEvaluate(req: Request): String = ""
-    def resultExtractionCode(req: Request): String = ""
-
-    private def shortName = this.getClass.toString split '.' last
-    override def toString = shortName + referencedNames.mkString(" (refs: ", ", ", ")")
-  }
-
-  class GenericHandler(member: Tree) extends MemberHandler(member)
-
-  class ValHandler(member: ValDef) extends MemberDefHandler(member) {
-    val maxStringElements = 1000  // no need to mkString billions of elements
-    override def definesValue = true
-
-    override def resultExtractionCode(req: Request): String = {
-      val isInternal = isUserVarName(name) && req.lookupTypeOf(name) == "Unit"
-      if (!mods.isPublic || isInternal) ""
-      else {
-        // if this is a lazy val we avoid evaluating it here
-        val resultString =
-          if (mods.isLazy) codegenln(false, "<lazy>")
-          else any2stringOf(req fullPath name, maxStringElements)
-
-        val vidString =
-          if (replProps.vids) """" + " @ " + "%%8x".format(System.identityHashCode(%s)) + " """.trim.format(req fullPath name)
-          else ""
-
-        """ + "%s%s: %s = " + %s""".format(string2code(prettyName), vidString, string2code(req typeOf name), resultString)
-      }
-    }
-  }
-
-  class DefHandler(member: DefDef) extends MemberDefHandler(member) {
-    private def vparamss = member.vparamss
-    private def isMacro = member.symbol hasFlag MACRO
-    // true if not a macro and 0-arity
-    override def definesValue = !isMacro && flattensToEmpty(vparamss)
-    override def resultExtractionCode(req: Request) =
-      if (mods.isPublic) codegenln(name, ": ", req.typeOf(name)) else ""
-  }
-
-  class AssignHandler(member: Assign) extends MemberHandler(member) {
-    val Assign(lhs, rhs) = member
-    val name = newTermName(freshInternalVarName())
-
-    override def definesTerm = Some(name)
-    override def definesValue = true
-    override def extraCodeToEvaluate(req: Request) =
-      """val %s = %s""".format(name, lhs)
-
-    /** Print out lhs instead of the generated varName */
-    override def resultExtractionCode(req: Request) = {
-      val lhsType = string2code(req lookupTypeOf name)
-      val res     = string2code(req fullPath name)
-      """ + "%s: %s = " + %s + "\n" """.format(string2code(lhs.toString), lhsType, res) + "\n"
-    }
-  }
-
-  class ModuleHandler(module: ModuleDef) extends MemberDefHandler(module) {
-    override def definesTerm = Some(name)
-    override def definesValue = true
-    override def isLegalTopLevel = true
-
-    override def resultExtractionCode(req: Request) = codegenln("defined module ", name)
-  }
-
-  class ClassHandler(member: ClassDef) extends MemberDefHandler(member) {
-    override def definesType = Some(name.toTypeName)
-    override def definesTerm = Some(name.toTermName) filter (_ => mods.isCase)
-    override def isLegalTopLevel = true
-
-    override def resultExtractionCode(req: Request) =
-      codegenln("defined %s %s".format(keyword, name))
-  }
-
-  class TypeAliasHandler(member: TypeDef) extends MemberDefHandler(member) {
-    private def isAlias = mods.isPublic && treeInfo.isAliasTypeDef(member)
-    override def definesType = Some(name.toTypeName) filter (_ => isAlias)
-
-    override def resultExtractionCode(req: Request) =
-      codegenln("defined type alias ", name) + "\n"
-  }
-
-  class ImportHandler(imp: Import) extends MemberHandler(imp) {
-    val Import(expr, selectors) = imp
-    def targetType: Type = intp.typeOfExpression("" + expr)
-    override def isLegalTopLevel = true
-
-    def createImportForName(name: Name): String = {
-      selectors foreach {
-        case sel @ ImportSelector(old, _, `name`, _)  => return "import %s.{ %s }".format(expr, sel)
-        case _ => ()
-      }
-      "import %s.%s".format(expr, name)
-    }
-    // TODO: Need to track these specially to honor Predef masking attempts,
-    // because they must be the leading imports in the code generated for each
-    // line.  We can use the same machinery as Contexts now, anyway.
-    def isPredefImport = isReferenceToPredef(expr)
-
-    // wildcard imports, e.g. import foo._
-    private def selectorWild    = selectors filter (_.name == nme.USCOREkw)
-    // renamed imports, e.g. import foo.{ bar => baz }
-    private def selectorRenames = selectors map (_.rename) filterNot (_ == null)
-
-    /** Whether this import includes a wildcard import */
-    val importsWildcard = selectorWild.nonEmpty
-
-    /** Whether anything imported is implicit .*/
-    def importsImplicit = implicitSymbols.nonEmpty
-
-    def implicitSymbols = importedSymbols filter (_.isImplicit)
-    def importedSymbols = individualSymbols ++ wildcardSymbols
-
-    lazy val individualSymbols: List[Symbol] =
-      beforePickler(individualNames map (targetType nonPrivateMember _))
-
-    lazy val wildcardSymbols: List[Symbol] =
-      if (importsWildcard) beforePickler(targetType.nonPrivateMembers.toList)
-      else Nil
-
-    /** Complete list of names imported by a wildcard */
-    lazy val wildcardNames: List[Name]   = wildcardSymbols map (_.name)
-    lazy val individualNames: List[Name] = selectorRenames filterNot (_ == nme.USCOREkw) flatMap (_.bothNames)
-
-    /** The names imported by this statement */
-    override lazy val importedNames: List[Name] = wildcardNames ++ individualNames
-    lazy val importsSymbolNamed: Set[String] = importedNames map (_.toString) toSet
-
-    def importString = imp.toString
-    override def resultExtractionCode(req: Request) = codegenln(importString) + "\n"
-  }
-}
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
deleted file mode 100644
index 94c801ebec7c..000000000000
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkRunnerSettings.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import scala.tools.nsc.Settings
-
-/**
- * <i>scala.tools.nsc.Settings</i> implementation adding Spark-specific REPL
- * command line options.
- */
-private[repl] class SparkRunnerSettings(error: String => Unit) extends Settings(error) {
-  val loadfiles = MultiStringSetting(
-      "-i",
-      "file",
-      "load a file (assumes the code is given interactively)")
-}
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
deleted file mode 100644
index b3688c960687..000000000000
--- a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.repl
-
-import java.io._
-import java.net.URLClassLoader
-
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.commons.lang3.StringEscapeUtils
-import org.apache.spark.util.Utils
-
-
-class ReplSuite extends SparkFunSuite {
-
-  def runInterpreter(master: String, input: String): String = {
-    val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
-
-    val in = new BufferedReader(new StringReader(input + "\n"))
-    val out = new StringWriter()
-    val cl = getClass.getClassLoader
-    var paths = new ArrayBuffer[String]
-    if (cl.isInstanceOf[URLClassLoader]) {
-      val urlLoader = cl.asInstanceOf[URLClassLoader]
-      for (url <- urlLoader.getURLs) {
-        if (url.getProtocol == "file") {
-          paths += url.getFile
-        }
-      }
-    }
-    val classpath = paths.map(new File(_).getAbsolutePath).mkString(File.pathSeparator)
-
-    val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
-    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
-
-    val interp = new SparkILoop(in, new PrintWriter(out), master)
-    org.apache.spark.repl.Main.interp = interp
-    interp.process(Array("-classpath", classpath))
-    org.apache.spark.repl.Main.interp = null
-    if (interp.sparkContext != null) {
-      interp.sparkContext.stop()
-    }
-    if (oldExecutorClasspath != null) {
-      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
-    } else {
-      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
-    }
-    return out.toString
-  }
-
-  def assertContains(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(isContain,
-      "Interpreter output did not contain '" + message + "':\n" + output)
-  }
-
-  def assertDoesNotContain(message: String, output: String) {
-    val isContain = output.contains(message)
-    assert(!isContain,
-      "Interpreter output contained '" + message + "':\n" + output)
-  }
-
-  test("propagation of local properties") {
-    // A mock ILoop that doesn't install the SIGINT handler.
-    class ILoop(out: PrintWriter) extends SparkILoop(None, out, None) {
-      settings = new scala.tools.nsc.Settings
-      settings.usejavacp.value = true
-      org.apache.spark.repl.Main.interp = this
-      override def createInterpreter() {
-        intp = new SparkILoopInterpreter
-        intp.setContextClassLoader()
-      }
-    }
-
-    val out = new StringWriter()
-    val interp = new ILoop(new PrintWriter(out))
-    interp.sparkContext = new SparkContext("local", "repl-test")
-    interp.createInterpreter()
-    interp.intp.initialize()
-    interp.sparkContext.setLocalProperty("someKey", "someValue")
-
-    // Make sure the value we set in the caller to interpret is propagated in the thread that
-    // interprets the command.
-    interp.interpret("org.apache.spark.repl.Main.interp.sparkContext.getLocalProperty(\"someKey\")")
-    assert(out.toString.contains("someValue"))
-
-    interp.sparkContext.stop()
-    System.clearProperty("spark.driver.port")
-  }
-
-  test("simple foreach with accumulator") {
-    val output = runInterpreter("local",
-      """
-        |val accum = sc.longAccumulator
-        |sc.parallelize(1 to 10).foreach(x => accum.add(x))
-        |accum.value
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res1: Long = 55", output)
-  }
-
-  test("external vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("external classes") {
-    val output = runInterpreter("local",
-      """
-        |class C {
-        |def foo = 5
-        |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 50", output)
-  }
-
-  test("external functions") {
-    val output = runInterpreter("local",
-      """
-        |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 110", output)
-  }
-
-  test("external functions that access vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("broadcast vars") {
-    // Test that the value that a broadcast var had when it was created is used,
-    // even if that variable is then modified in the driver program
-    // TODO: This doesn't actually work for arrays when we run in local mode!
-    val output = runInterpreter("local",
-      """
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
-  }
-
-  test("interacting with files") {
-    val tempDir = Utils.createTempDir()
-    val out = new FileWriter(tempDir + "/input")
-    out.write("Hello world!\n")
-    out.write("What's up?\n")
-    out.write("Goodbye\n")
-    out.close()
-    val output = runInterpreter("local",
-      """
-        |var file = sc.textFile("%s").cache()
-        |file.count()
-        |file.count()
-        |file.count()
-      """.stripMargin.format(StringEscapeUtils.escapeJava(
-        tempDir.getAbsolutePath + File.separator + "input")))
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Long = 3", output)
-    assertContains("res1: Long = 3", output)
-    assertContains("res2: Long = 3", output)
-    Utils.deleteRecursively(tempDir)
-  }
-
-  test("local-cluster mode") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-  }
-
-  test("SPARK-1199 two instances of same class don't type check.") {
-    val output = runInterpreter("local",
-      """
-        |case class Sum(exp: String, exp2: String)
-        |val a = Sum("A", "B")
-        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
-        |b(a)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2452 compound statements.") {
-    val output = runInterpreter("local",
-      """
-        |val x = 4 ; def f() = x
-        |f()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2576 importing SQLContext.implicits._") {
-    // We need to use local-cluster to test this case.
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-        |import sqlContext.implicits._
-        |case class TestCaseClass(value: Int)
-        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect()
-        |
-        |// Test Dataset Serialization in the REPL
-        |Seq(TestCaseClass(1)).toDS().collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-8461 SQL with codegen") {
-    val output = runInterpreter("local",
-    """
-      |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-      |sqlContext.setConf("spark.sql.codegen", "true")
-      |sqlContext.range(0, 100).filter('id > 50).count()
-    """.stripMargin)
-    assertContains("Long = 49", output)
-    assertDoesNotContain("java.lang.ClassNotFoundException", output)
-  }
-
-  test("Datasets and encoders") {
-    val output = runInterpreter("local",
-      """
-        |import org.apache.spark.sql.functions._
-        |import org.apache.spark.sql.{Encoder, Encoders}
-        |import org.apache.spark.sql.expressions.Aggregator
-        |import org.apache.spark.sql.TypedColumn
-        |val simpleSum = new Aggregator[Int, Int, Int] {
-        |  def zero: Int = 0                     // The initial value.
-        |  def reduce(b: Int, a: Int) = b + a    // Add an element to the running total
-        |  def merge(b1: Int, b2: Int) = b1 + b2 // Merge intermediate values.
-        |  def finish(b: Int) = b                // Return the final result.
-        |  def bufferEncoder: Encoder[Int] = Encoders.scalaInt
-        |  def outputEncoder: Encoder[Int] = Encoders.scalaInt
-        |}.toColumn
-        |
-        |val ds = Seq(1, 2, 3, 4).toDS()
-        |ds.select(simpleSum).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2632 importing a method from non serializable class and not using it.") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-    """
-      |class TestClass() { def testMethod = 3 }
-      |val t = new TestClass
-      |import t.testMethod
-      |case class TestCaseClass(value: Int)
-      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect()
-    """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  if (System.getenv("MESOS_NATIVE_JAVA_LIBRARY") != null) {
-    test("running on Mesos") {
-      val output = runInterpreter("localquiet",
-        """
-          |var v = 7
-          |def getV() = v
-          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-          |v = 10
-          |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-          |var array = new Array[Int](5)
-          |val broadcastArray = sc.broadcast(array)
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-          |array(0) = 5
-          |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-        """.stripMargin)
-      assertDoesNotContain("error:", output)
-      assertDoesNotContain("Exception", output)
-      assertContains("res0: Int = 70", output)
-      assertContains("res1: Int = 100", output)
-      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    }
-  }
-
-  test("collecting objects of class defined in repl") {
-    val output = runInterpreter("local[2]",
-      """
-        |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
-  }
-
-  test("collecting objects of class defined in repl - shuffling") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |case class Foo(i: Int)
-        |val list = List((1, Foo(1)), (1, Foo(2)))
-        |val ret = sc.parallelize(list).groupByKey().collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[(Int, Iterable[Foo])] = Array((1,", output)
-  }
-}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
index c29b002a998c..aadfcaa56cc2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 
+import scala.collection.AbstractIterator;
 import scala.collection.Iterator;
 import scala.math.Ordering;
 
@@ -28,7 +29,6 @@
 import org.apache.spark.TaskContext;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
-import org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.collection.unsafe.sort.PrefixComparator;
@@ -145,7 +145,7 @@ public Iterator<UnsafeRow> sort() throws IOException {
         // here in order to prevent memory leaks.
         cleanupResources();
       }
-      return new AbstractScalaRowIterator<UnsafeRow>() {
+      return new AbstractIterator<UnsafeRow>() {
 
         private final int numFields = schema.length();
         private UnsafeRow row = new UnsafeRow(numFields);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 4d5401f30d39..004b4ef8f69f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -41,8 +41,7 @@ object ScalaReflection extends ScalaReflection {
   // Since we are creating a runtime mirror using the class loader of current thread,
   // we need to use def at here. So, every time we call mirror, it is using the
   // class loader of the current thread.
-  // SPARK-13640: Synchronize this because universe.runtimeMirror is not thread-safe in Scala 2.10.
-  override def mirror: universe.Mirror = ScalaReflectionLock.synchronized {
+  override def mirror: universe.Mirror = {
     universe.runtimeMirror(Thread.currentThread().getContextClassLoader)
   }
 
@@ -62,7 +61,7 @@ object ScalaReflection extends ScalaReflection {
    */
   def dataTypeFor[T : TypeTag]: DataType = dataTypeFor(localTypeOf[T])
 
-  private def dataTypeFor(tpe: `Type`): DataType = ScalaReflectionLock.synchronized {
+  private def dataTypeFor(tpe: `Type`): DataType = {
     tpe match {
       case t if t <:< definitions.IntTpe => IntegerType
       case t if t <:< definitions.LongTpe => LongType
@@ -94,7 +93,7 @@ object ScalaReflection extends ScalaReflection {
    * Special handling is performed for primitive types to map them back to their raw
    * JVM form instead of the Scala Array that handles auto boxing.
    */
-  private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized {
+  private def arrayClassFor(tpe: `Type`): ObjectType = {
     val cls = tpe match {
       case t if t <:< definitions.IntTpe => classOf[Array[Int]]
       case t if t <:< definitions.LongTpe => classOf[Array[Long]]
@@ -141,7 +140,7 @@ object ScalaReflection extends ScalaReflection {
   private def deserializerFor(
       tpe: `Type`,
       path: Option[Expression],
-      walkedTypePath: Seq[String]): Expression = ScalaReflectionLock.synchronized {
+      walkedTypePath: Seq[String]): Expression = {
 
     /** Returns the current path with a sub-field extracted. */
     def addToPath(part: String, dataType: DataType, walkedTypePath: Seq[String]): Expression = {
@@ -329,8 +328,8 @@ object ScalaReflection extends ScalaReflection {
           }
         }
 
-        val companion = t.normalize.typeSymbol.companionSymbol.typeSignature
-        val cls = companion.member(newTermName("newBuilder")) match {
+        val companion = t.dealias.typeSymbol.companion.typeSignature
+        val cls = companion.member(TermName("newBuilder")) match {
           case NoSymbol if t <:< localTypeOf[Seq[_]] => classOf[Seq[_]]
           case NoSymbol if t <:< localTypeOf[scala.collection.Set[_]] =>
             classOf[scala.collection.Set[_]]
@@ -349,7 +348,7 @@ object ScalaReflection extends ScalaReflection {
           mirror.runtimeClass(t.typeSymbol.asClass)
         )
 
-      case t if t.typeSymbol.annotations.exists(_.tpe =:= typeOf[SQLUserDefinedType]) =>
+      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         val obj = NewInstance(
           udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(),
@@ -436,7 +435,7 @@ object ScalaReflection extends ScalaReflection {
       inputObject: Expression,
       tpe: `Type`,
       walkedTypePath: Seq[String],
-      seenTypeSet: Set[`Type`] = Set.empty): Expression = ScalaReflectionLock.synchronized {
+      seenTypeSet: Set[`Type`] = Set.empty): Expression = {
 
     def toCatalystArray(input: Expression, elementType: `Type`): Expression = {
       dataTypeFor(elementType) match {
@@ -591,7 +590,7 @@ object ScalaReflection extends ScalaReflection {
       case t if t <:< localTypeOf[java.lang.Boolean] =>
         Invoke(inputObject, "booleanValue", BooleanType)
 
-      case t if t.typeSymbol.annotations.exists(_.tpe =:= typeOf[SQLUserDefinedType]) =>
+      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t)
           .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         val obj = NewInstance(
@@ -643,7 +642,7 @@ object ScalaReflection extends ScalaReflection {
    * Returns true if the given type is option of product type, e.g. `Option[Tuple2]`. Note that,
    * we also treat [[DefinedByConstructorParams]] as product type.
    */
-  def optionOfProductType(tpe: `Type`): Boolean = ScalaReflectionLock.synchronized {
+  def optionOfProductType(tpe: `Type`): Boolean = {
     tpe match {
       case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
@@ -705,9 +704,9 @@ object ScalaReflection extends ScalaReflection {
   def schemaFor[T: TypeTag]: Schema = schemaFor(localTypeOf[T])
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
-  def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized {
+  def schemaFor(tpe: `Type`): Schema = {
     tpe match {
-      case t if t.typeSymbol.annotations.exists(_.tpe =:= typeOf[SQLUserDefinedType]) =>
+      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
       case t if UDTRegistration.exists(getClassNameFromType(t)) =>
@@ -814,10 +813,9 @@ trait ScalaReflection {
    *
    * @see SPARK-5281
    */
-  // SPARK-13640: Synchronize this because TypeTag.tpe is not thread-safe in Scala 2.10.
-  def localTypeOf[T: TypeTag]: `Type` = ScalaReflectionLock.synchronized {
+  def localTypeOf[T: TypeTag]: `Type` = {
     val tag = implicitly[TypeTag[T]]
-    tag.in(mirror).tpe.normalize
+    tag.in(mirror).tpe.dealias
   }
 
   /**
@@ -866,9 +864,9 @@ trait ScalaReflection {
   }
 
   protected def constructParams(tpe: Type): Seq[Symbol] = {
-    val constructorSymbol = tpe.member(nme.CONSTRUCTOR)
+    val constructorSymbol = tpe.member(termNames.CONSTRUCTOR)
     val params = if (constructorSymbol.isMethod) {
-      constructorSymbol.asMethod.paramss
+      constructorSymbol.asMethod.paramLists
     } else {
       // Find the primary constructor, and use its parameter ordering.
       val primaryConstructorSymbol: Option[Symbol] = constructorSymbol.asTerm.alternatives.find(
@@ -876,7 +874,7 @@ trait ScalaReflection {
       if (primaryConstructorSymbol.isEmpty) {
         sys.error("Internal SQL error: Product object did not have a primary constructor.")
       } else {
-        primaryConstructorSymbol.get.asMethod.paramss
+        primaryConstructorSymbol.get.asMethod.paramLists
       }
     }
     params.flatten
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 53536496d045..7745709e07fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1518,9 +1518,9 @@ class Analyzer(
        */
       def unapply(e: Expression): Option[(Generator, Seq[String], Boolean)] = e match {
         case Alias(GeneratorOuter(g: Generator), name) if g.resolved => Some((g, name :: Nil, true))
-        case MultiAlias(GeneratorOuter(g: Generator), names) if g.resolved => Some(g, names, true)
+        case MultiAlias(GeneratorOuter(g: Generator), names) if g.resolved => Some((g, names, true))
         case Alias(g: Generator, name) if g.resolved => Some((g, name :: Nil, false))
-        case MultiAlias(g: Generator, names) if g.resolved => Some(g, names, false)
+        case MultiAlias(g: Generator, names) if g.resolved => Some((g, names, false))
         case _ => None
       }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
index 41128fe389d4..9ff5b87cea0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/package.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.reflect.internal.util.AbstractFileClassLoader
+
 import org.apache.spark.sql.catalyst.rules
 import org.apache.spark.util.Utils
 
@@ -51,7 +53,7 @@ package object codegen {
       val classLoader =
         generatedClass
           .getClassLoader
-          .asInstanceOf[scala.tools.nsc.interpreter.AbstractFileClassLoader]
+          .asInstanceOf[AbstractFileClassLoader]
       val generatedBytes = classLoader.classBytes(generatedClass.getName)
 
       val packageDir = new java.io.File(dumpDirectory, generatedClass.getPackage.getName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 29c33804f077..b898484e0ecf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -30,7 +30,7 @@ object NamedExpression {
   private val curId = new java.util.concurrent.atomic.AtomicLong()
   private[expressions] val jvmId = UUID.randomUUID()
   def newExprId: ExprId = ExprId(curId.getAndIncrement(), jvmId)
-  def unapply(expr: NamedExpression): Option[(String, DataType)] = Some(expr.name, expr.dataType)
+  def unapply(expr: NamedExpression): Option[(String, DataType)] = Some((expr.name, expr.dataType))
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
index f9c88d496e89..a6b10cfe75ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
@@ -22,10 +22,4 @@ package org.apache.spark.sql
  * considered an internal API to Spark SQL and are subject to change between minor releases.
  */
 package object catalyst {
-  /**
-   * A JVM-global lock that should be used to prevent thread safety issues when using things in
-   * scala.reflect.*.  Note that Scala Reflection API is made thread-safe in 2.11, but not yet for
-   * 2.10.* builds.  See SI-6240 for more details.
-   */
-  protected[sql] object ScalaReflectionLock
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/AbstractScalaRowIterator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/AbstractScalaRowIterator.scala
deleted file mode 100644
index 0c7205b3c665..000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/AbstractScalaRowIterator.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.util
-
-/**
- * Shim to allow us to implement [[scala.Iterator]] in Java. Scala 2.11+ has an AbstractIterator
- * class for this, but that class is `private[scala]` in 2.10. We need to explicitly fix this to
- * `Row` in order to work around a spurious IntelliJ compiler error. This cannot be an abstract
- * class because that leads to compilation errors under Scala 2.11.
- */
-class AbstractScalaRowIterator[T] extends Iterator[T] {
-  override def hasNext: Boolean = throw new NotImplementedError
-
-  override def next(): T = throw new NotImplementedError
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index 02c8318b4d41..032d6b54aeb7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -21,7 +21,6 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.sql.catalyst.util.TypeUtils
 
 
@@ -37,7 +36,7 @@ class BinaryType private() extends AtomicType {
 
   private[sql] type InternalType = Array[Byte]
 
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
 
   private[sql] val ordering = new Ordering[InternalType] {
     def compare(x: Array[Byte], y: Array[Byte]): Int = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index cee78f4b4ac1..63f354d2243c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -21,7 +21,6 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 
 /**
@@ -35,7 +34,7 @@ class BooleanType private() extends AtomicType {
   // this type. Otherwise, the companion object would be of type "BooleanType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Boolean
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index b1dd5eda36bd..5854c3f5ba11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -21,7 +21,6 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
  * The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
@@ -34,7 +33,7 @@ class ByteType private() extends IntegralType {
   // this type. Otherwise, the companion object would be of type "ByteType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Byte
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Byte]]
   private[sql] val integral = implicitly[Integral[Byte]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 0c0574b84553..9e70dd486a12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -21,8 +21,6 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
-
 
 /**
  * A date type, supporting "0001-01-01" through "9999-12-31".
@@ -40,7 +38,7 @@ class DateType private() extends AtomicType {
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Int
 
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
 
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 5c4bc5e33c53..6e050c18b8ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -23,7 +23,6 @@ import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.sql.catalyst.expressions.Expression
 
 
@@ -57,7 +56,7 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   def this() = this(10)
 
   private[sql] type InternalType = Decimal
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = Decimal.DecimalIsFractional
   private[sql] val fractional = Decimal.DecimalIsFractional
   private[sql] val ordering = Decimal.DecimalIsFractional
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index 400f7aed6ae7..a5c79ff01ca0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -22,7 +22,6 @@ import scala.math.Numeric.DoubleAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.util.Utils
 
 /**
@@ -36,7 +35,7 @@ class DoubleType private() extends FractionalType {
   // this type. Otherwise, the companion object would be of type "DoubleType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Double
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Double]]
   private[sql] val fractional = implicitly[Fractional[Double]]
   private[sql] val ordering = new Ordering[Double] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index b9812b236d57..352147ec936c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -22,7 +22,6 @@ import scala.math.Numeric.FloatAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.util.Utils
 
 /**
@@ -36,7 +35,7 @@ class FloatType private() extends FractionalType {
   // this type. Otherwise, the companion object would be of type "FloatType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Float
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Float]]
   private[sql] val fractional = implicitly[Fractional[Float]]
   private[sql] val ordering = new Ordering[Float] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
index b319eb70bc13..e0bca937d1d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.types
 import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -32,9 +31,7 @@ sealed abstract class HiveStringType extends AtomicType {
 
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized {
-    typeTag[InternalType]
-  }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
 
   override def defaultSize: Int = length
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index dca612ecbfed..a85e3729188d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -21,8 +21,6 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
-
 
 /**
  * The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
@@ -35,7 +33,7 @@ class IntegerType private() extends IntegralType {
   // this type. Otherwise, the companion object would be of type "IntegerType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Int
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Int]]
   private[sql] val integral = implicitly[Integral[Int]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
index 396c3355701c..0997028fc105 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -21,7 +21,6 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
  * The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
@@ -34,7 +33,7 @@ class LongType private() extends IntegralType {
   // this type. Otherwise, the companion object would be of type "LongType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Long
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Long]]
   private[sql] val integral = implicitly[Integral[Long]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 1410d5ba0e0b..ee655c338b59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -21,7 +21,6 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
  * The data type representing `Short` values. Please use the singleton `DataTypes.ShortType`.
@@ -34,7 +33,7 @@ class ShortType private() extends IntegralType {
   // this type. Otherwise, the companion object would be of type "ShortType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Short
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val numeric = implicitly[Numeric[Short]]
   private[sql] val integral = implicitly[Integral[Short]]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
index d1c0da3479d7..59b124cda7d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -21,7 +21,6 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -35,7 +34,7 @@ class StringType private() extends AtomicType {
   // this type. Otherwise, the companion object would be of type "StringType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = UTF8String
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index 287599542005..fdb91e049992 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -21,8 +21,6 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
-
 
 /**
  * The data type representing `java.sql.Timestamp` values.
@@ -37,7 +35,7 @@ class TimestampType private() extends AtomicType {
   // Defined with a private constructor so the companion object is the only possible instantiation.
   private[sql] type InternalType = Long
 
-  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized { typeTag[InternalType] }
+  @transient private[sql] lazy val tag = typeTag[InternalType]
 
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 2f30a78f0321..97733a754ccc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -70,27 +70,27 @@ class JoinOptimizationSuite extends PlanTest {
 
     testExtract(x, None)
     testExtract(x.where("x.b".attr === 1), None)
-    testExtract(x.join(y), Some(Seq(x, y), Seq()))
+    testExtract(x.join(y), Some((Seq(x, y), Seq())))
     testExtract(x.join(y, condition = Some("x.b".attr === "y.d".attr)),
-      Some(Seq(x, y), Seq("x.b".attr === "y.d".attr)))
+      Some((Seq(x, y), Seq("x.b".attr === "y.d".attr))))
     testExtract(x.join(y).where("x.b".attr === "y.d".attr),
-      Some(Seq(x, y), Seq("x.b".attr === "y.d".attr)))
-    testExtract(x.join(y).join(z), Some(Seq(x, y, z), Seq()))
+      Some((Seq(x, y), Seq("x.b".attr === "y.d".attr))))
+    testExtract(x.join(y).join(z), Some((Seq(x, y, z), Seq())))
     testExtract(x.join(y).where("x.b".attr === "y.d".attr).join(z),
-      Some(Seq(x, y, z), Seq("x.b".attr === "y.d".attr)))
-    testExtract(x.join(y).join(x.join(z)), Some(Seq(x, y, x.join(z)), Seq()))
+      Some((Seq(x, y, z), Seq("x.b".attr === "y.d".attr))))
+    testExtract(x.join(y).join(x.join(z)), Some((Seq(x, y, x.join(z)), Seq())))
     testExtract(x.join(y).join(x.join(z)).where("x.b".attr === "y.d".attr),
-      Some(Seq(x, y, x.join(z)), Seq("x.b".attr === "y.d".attr)))
+      Some((Seq(x, y, x.join(z)), Seq("x.b".attr === "y.d".attr))))
 
-    testExtractCheckCross(x.join(y, Cross), Some(Seq((x, Cross), (y, Cross)), Seq()))
+    testExtractCheckCross(x.join(y, Cross), Some((Seq((x, Cross), (y, Cross)), Seq())))
     testExtractCheckCross(x.join(y, Cross).join(z, Cross),
-      Some(Seq((x, Cross), (y, Cross), (z, Cross)), Seq()))
+      Some((Seq((x, Cross), (y, Cross), (z, Cross)), Seq())))
     testExtractCheckCross(x.join(y, Cross, Some("x.b".attr === "y.d".attr)).join(z, Cross),
-      Some(Seq((x, Cross), (y, Cross), (z, Cross)), Seq("x.b".attr === "y.d".attr)))
+      Some((Seq((x, Cross), (y, Cross), (z, Cross)), Seq("x.b".attr === "y.d".attr))))
     testExtractCheckCross(x.join(y, Inner, Some("x.b".attr === "y.d".attr)).join(z, Cross),
-      Some(Seq((x, Inner), (y, Inner), (z, Cross)), Seq("x.b".attr === "y.d".attr)))
+      Some((Seq((x, Inner), (y, Inner), (z, Cross)), Seq("x.b".attr === "y.d".attr))))
     testExtractCheckCross(x.join(y, Cross, Some("x.b".attr === "y.d".attr)).join(z, Inner),
-      Some(Seq((x, Cross), (y, Cross), (z, Inner)), Seq("x.b".attr === "y.d".attr)))
+      Some((Seq((x, Cross), (y, Cross), (z, Inner)), Seq("x.b".attr === "y.d".attr))))
   }
 
   test("reorder inner joins") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
index b71067c0af3a..2abf9fe6aa49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCodegenSuite.scala
@@ -43,8 +43,8 @@ class OptimizeCodegenSuite extends PlanTest {
       CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen())
 
     assertEquivalent(
-      CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2)),
-      CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2)))
+      CaseWhen(List.fill(100)((TrueLiteral, Literal(1))), Literal(2)),
+      CaseWhen(List.fill(100)((TrueLiteral, Literal(1))), Literal(2)))
   }
 
   test("Nested CaseWhen Codegen.") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index bd1669b6dba6..b546cccb7ea9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -505,7 +505,7 @@ class Column(val expr: Expression) extends Logging {
    */
   def when(condition: Column, value: Any): Column = this.expr match {
     case CaseWhen(branches, None) =>
-      withExpr { CaseWhen(branches :+ (condition.expr, lit(value).expr)) }
+      withExpr { CaseWhen(branches :+ ((condition.expr, lit(value).expr))) }
     case CaseWhen(branches, Some(_)) =>
       throw new IllegalArgumentException(
         "when() cannot be applied once otherwise() is applied")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index 5cfad9126986..02f45abaa3f0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -133,20 +133,20 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
       case l @ LogicalRelation(fsRelation: HadoopFsRelation, _, _)
         if fsRelation.partitionSchema.nonEmpty =>
         val partAttrs = getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l)
-        Some(AttributeSet(partAttrs), l)
+        Some((AttributeSet(partAttrs), l))
 
       case relation: CatalogRelation if relation.tableMeta.partitionColumnNames.nonEmpty =>
         val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
-        Some(AttributeSet(partAttrs), relation)
+        Some((AttributeSet(partAttrs), relation))
 
       case p @ Project(projectList, child) if projectList.forall(_.deterministic) =>
         unapply(child).flatMap { case (partAttrs, relation) =>
-          if (p.references.subsetOf(partAttrs)) Some(p.outputSet, relation) else None
+          if (p.references.subsetOf(partAttrs)) Some((p.outputSet, relation)) else None
         }
 
       case f @ Filter(condition, child) if condition.deterministic =>
         unapply(child).flatMap { case (partAttrs, relation) =>
-          if (f.references.subsetOf(partAttrs)) Some(partAttrs, relation) else None
+          if (f.references.subsetOf(partAttrs)) Some((partAttrs, relation)) else None
         }
 
       case _ => None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 9533144214a1..b56fbd4284d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -155,7 +155,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     def toHiveStructString(a: (Any, DataType)): String = a match {
       case (struct: Row, StructType(fields)) =>
         struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
         }.mkString("{", ",", "}")
       case (seq: Seq[_], ArrayType(typ, _)) =>
         seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
@@ -173,7 +173,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     a match {
       case (struct: Row, StructType(fields)) =>
         struct.toSeq.zip(fields).map {
-          case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+          case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
         }.mkString("{", ",", "}")
       case (seq: Seq[_], ArrayType(typ, _)) =>
         seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index 69b4b7bb07de..2b3c5f054893 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -133,7 +133,7 @@ object ExtractPythonUDFs extends Rule[SparkPlan] with PredicateHelper {
         val validUdfs = udfs.filter { udf =>
           // Check to make sure that the UDF can be evaluated with only the input of this child.
           udf.references.subsetOf(child.outputSet)
-        }.toArray  // Turn it into an array since iterators cannot be serialized in Scala 2.10
+        }.toArray
         if (validUdfs.nonEmpty) {
           val resultAttrs = udfs.zipWithIndex.map { case (u, i) =>
             AttributeReference(s"pythonUDF$i", u.dataType)()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 825840707d42..69d110e41427 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -359,7 +359,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
 
   test("reduce") {
     val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
-    assert(ds.reduce((a, b) => ("sum", a._2 + b._2)) == ("sum", 6))
+    assert(ds.reduce((a, b) => ("sum", a._2 + b._2)) == (("sum", 6)))
   }
 
   test("joinWith, flat schema") {
@@ -784,7 +784,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
   test("SPARK-14000: case class with tuple type field") {
     checkDataset(
       Seq(TupleClass((1, "a"))).toDS(),
-      TupleClass(1, "a")
+      TupleClass((1, "a"))
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index b80bd80e93e8..9e459ed00c8d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -176,16 +176,16 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
 
   test("number format in statistics") {
     val numbers = Seq(
-      BigInt(0) -> ("0.0 B", "0"),
-      BigInt(100) -> ("100.0 B", "100"),
-      BigInt(2047) -> ("2047.0 B", "2.05E+3"),
-      BigInt(2048) -> ("2.0 KB", "2.05E+3"),
-      BigInt(3333333) -> ("3.2 MB", "3.33E+6"),
-      BigInt(4444444444L) -> ("4.1 GB", "4.44E+9"),
-      BigInt(5555555555555L) -> ("5.1 TB", "5.56E+12"),
-      BigInt(6666666666666666L) -> ("5.9 PB", "6.67E+15"),
-      BigInt(1L << 10 ) * (1L << 60) -> ("1024.0 EB", "1.18E+21"),
-      BigInt(1L << 11) * (1L << 60) -> ("2.36E+21 B", "2.36E+21")
+      BigInt(0) -> (("0.0 B", "0")),
+      BigInt(100) -> (("100.0 B", "100")),
+      BigInt(2047) -> (("2047.0 B", "2.05E+3")),
+      BigInt(2048) -> (("2.0 KB", "2.05E+3")),
+      BigInt(3333333) -> (("3.2 MB", "3.33E+6")),
+      BigInt(4444444444L) -> (("4.1 GB", "4.44E+9")),
+      BigInt(5555555555555L) -> (("5.1 TB", "5.56E+12")),
+      BigInt(6666666666666666L) -> (("5.9 PB", "6.67E+15")),
+      BigInt(1L << 10 ) * (1L << 60) -> (("1024.0 EB", "1.18E+21")),
+      BigInt(1L << 11) * (1L << 60) -> (("2.36E+21 B", "2.36E+21"))
     )
     numbers.foreach { case (input, (expectedSize, expectedRows)) =>
       val stats = Statistics(sizeInBytes = input, rowCount = Some(input))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 7bcb419e8df6..274694b99541 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -27,23 +27,23 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
   val row = identity[(java.lang.Integer, java.lang.Double)](_)
 
   lazy val l = Seq(
-    row(1, 2.0),
-    row(1, 2.0),
-    row(2, 1.0),
-    row(2, 1.0),
-    row(3, 3.0),
-    row(null, null),
-    row(null, 5.0),
-    row(6, null)).toDF("a", "b")
+    row((1, 2.0)),
+    row((1, 2.0)),
+    row((2, 1.0)),
+    row((2, 1.0)),
+    row((3, 3.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("a", "b")
 
   lazy val r = Seq(
-    row(2, 3.0),
-    row(2, 3.0),
-    row(3, 2.0),
-    row(4, 1.0),
-    row(null, null),
-    row(null, 5.0),
-    row(6, null)).toDF("c", "d")
+    row((2, 3.0)),
+    row((2, 3.0)),
+    row((3, 2.0)),
+    row((4, 1.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("c", "d")
 
   lazy val t = r.filter($"c".isNotNull && $"d".isNotNull)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 94a2f9a00b3f..d76990b482db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -211,7 +211,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
   }
 
   testStandardAndLegacyModes("nested map with struct as value type") {
-    val data = (1 to 4).map(i => Tuple1(Map(i -> (i, s"val_$i"))))
+    val data = (1 to 4).map(i => Tuple1(Map(i -> ((i, s"val_$i")))))
     withParquetDataFrame(data) { df =>
       checkAnswer(df, data.map { case Tuple1(m) =>
         Row(m.mapValues(struct => Row(struct.productIterator.toSeq: _*)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 2911cbbeee47..fd793233b0bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -146,8 +146,8 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     // PhysicalRDD(nodeId = 1) -> Filter(nodeId = 0)
     val df = person.filter('age < 25)
     testSparkPlanMetrics(df, 1, Map(
-      0L -> ("Filter", Map(
-        "number of output rows" -> 1L)))
+      0L -> (("Filter", Map(
+        "number of output rows" -> 1L))))
     )
   }
 
@@ -170,8 +170,8 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       Map("number of output rows" -> 1L,
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
     testSparkPlanMetrics(df, 1, Map(
-      2L -> ("HashAggregate", expected1(0)),
-      0L -> ("HashAggregate", expected1(1)))
+      2L -> (("HashAggregate", expected1(0))),
+      0L -> (("HashAggregate", expected1(1))))
     )
 
     // 2 partitions and each partition contains 2 keys
@@ -182,8 +182,8 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       Map("number of output rows" -> 3L,
         "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))
     testSparkPlanMetrics(df2, 1, Map(
-      2L -> ("HashAggregate", expected2(0)),
-      0L -> ("HashAggregate", expected2(1)))
+      2L -> (("HashAggregate", expected2(0))),
+      0L -> (("HashAggregate", expected2(1))))
     )
   }
 
@@ -234,15 +234,15 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     // -> ObjectHashAggregate(nodeId = 0)
     val df = testData2.groupBy().agg(collect_set('a)) // 2 partitions
     testSparkPlanMetrics(df, 1, Map(
-      2L -> ("ObjectHashAggregate", Map("number of output rows" -> 2L)),
-      0L -> ("ObjectHashAggregate", Map("number of output rows" -> 1L)))
+      2L -> (("ObjectHashAggregate", Map("number of output rows" -> 2L))),
+      0L -> (("ObjectHashAggregate", Map("number of output rows" -> 1L))))
     )
 
     // 2 partitions and each partition contains 2 keys
     val df2 = testData2.groupBy('a).agg(collect_set('a))
     testSparkPlanMetrics(df2, 1, Map(
-      2L -> ("ObjectHashAggregate", Map("number of output rows" -> 4L)),
-      0L -> ("ObjectHashAggregate", Map("number of output rows" -> 3L)))
+      2L -> (("ObjectHashAggregate", Map("number of output rows" -> 4L))),
+      0L -> (("ObjectHashAggregate", Map("number of output rows" -> 3L))))
     )
   }
 
@@ -264,9 +264,9 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = spark.sql(
         "SELECT * FROM testData2 JOIN testDataForJoin ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df, 1, Map(
-        0L -> ("SortMergeJoin", Map(
+        0L -> (("SortMergeJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
-          "number of output rows" -> 4L)))
+          "number of output rows" -> 4L))))
       )
     }
   }
@@ -282,17 +282,17 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = spark.sql(
         "SELECT * FROM testData2 left JOIN testDataForJoin ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df, 1, Map(
-        0L -> ("SortMergeJoin", Map(
+        0L -> (("SortMergeJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
-          "number of output rows" -> 8L)))
+          "number of output rows" -> 8L))))
       )
 
       val df2 = spark.sql(
         "SELECT * FROM testDataForJoin right JOIN testData2 ON testData2.a = testDataForJoin.a")
       testSparkPlanMetrics(df2, 1, Map(
-        0L -> ("SortMergeJoin", Map(
+        0L -> (("SortMergeJoin", Map(
           // It's 4 because we only read 3 rows in the first partition and 1 row in the second one
-          "number of output rows" -> 8L)))
+          "number of output rows" -> 8L))))
       )
     }
   }
@@ -304,9 +304,9 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     // ... -> BroadcastHashJoin(nodeId = 1) -> TungstenProject(nodeId = 0)
     val df = df1.join(broadcast(df2), "key")
     testSparkPlanMetrics(df, 2, Map(
-      1L -> ("BroadcastHashJoin", Map(
+      1L -> (("BroadcastHashJoin", Map(
         "number of output rows" -> 2L,
-        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)")))
+        "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))))
     )
   }
 
@@ -365,9 +365,9 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = df1.join(df2, "key")
       val metrics = getSparkPlanMetrics(df, 1, Set(1L))
       testSparkPlanMetrics(df, 1, Map(
-        1L -> ("ShuffledHashJoin", Map(
+        1L -> (("ShuffledHashJoin", Map(
           "number of output rows" -> 2L,
-          "avg hash probe (min, med, max)" -> "\n(1, 1, 1)")))
+          "avg hash probe (min, med, max)" -> "\n(1, 1, 1)"))))
       )
     }
   }
@@ -426,14 +426,14 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     // ... -> BroadcastHashJoin(nodeId = 0)
     val df = df1.join(broadcast(df2), $"key" === $"key2", "left_outer")
     testSparkPlanMetrics(df, 2, Map(
-      0L -> ("BroadcastHashJoin", Map(
-        "number of output rows" -> 5L)))
+      0L -> (("BroadcastHashJoin", Map(
+        "number of output rows" -> 5L))))
     )
 
     val df3 = df1.join(broadcast(df2), $"key" === $"key2", "right_outer")
     testSparkPlanMetrics(df3, 2, Map(
-      0L -> ("BroadcastHashJoin", Map(
-        "number of output rows" -> 6L)))
+      0L -> (("BroadcastHashJoin", Map(
+        "number of output rows" -> 6L))))
     )
   }
 
@@ -448,8 +448,8 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
           "SELECT * FROM testData2 left JOIN testDataForJoin ON " +
             "testData2.a * testDataForJoin.a != testData2.a + testDataForJoin.a")
         testSparkPlanMetrics(df, 3, Map(
-          1L -> ("BroadcastNestedLoopJoin", Map(
-            "number of output rows" -> 12L)))
+          1L -> (("BroadcastNestedLoopJoin", Map(
+            "number of output rows" -> 12L))))
         )
       }
     }
@@ -462,8 +462,8 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     // ... -> BroadcastHashJoin(nodeId = 0)
     val df = df1.join(broadcast(df2), $"key" === $"key2", "leftsemi")
     testSparkPlanMetrics(df, 2, Map(
-      0L -> ("BroadcastHashJoin", Map(
-        "number of output rows" -> 2L)))
+      0L -> (("BroadcastHashJoin", Map(
+        "number of output rows" -> 2L))))
     )
   }
 
@@ -477,7 +477,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
         val df = spark.sql(
           "SELECT * FROM testData2 JOIN testDataForJoin")
         testSparkPlanMetrics(df, 1, Map(
-          0L -> ("CartesianProduct", Map("number of output rows" -> 12L)))
+          0L -> (("CartesianProduct", Map("number of output rows" -> 12L))))
         )
       }
     }
@@ -490,7 +490,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
       val df = spark.sql(
         "SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a")
       testSparkPlanMetrics(df, 1, Map(
-        0L -> ("SortMergeJoin", Map("number of output rows" -> 4L)))
+        0L -> (("SortMergeJoin", Map("number of output rows" -> 4L))))
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
index 82eff5e6491e..2c6763ed79a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
@@ -395,7 +395,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
     }
     // Listener tracks only SQL metrics, not other accumulators
     assert(trackedAccums.size === 1)
-    assert(trackedAccums.head === (sqlMetricInfo.id, sqlMetricInfo.update.get))
+    assert(trackedAccums.head === ((sqlMetricInfo.id, sqlMetricInfo.update.get)))
   }
 
   test("driver side SQL metrics") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala
index d826d3f54d92..f65dcdf119c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ReduceAggregatorSuite.scala
@@ -27,7 +27,7 @@ class ReduceAggregatorSuite extends SparkFunSuite {
     val encoder: ExpressionEncoder[Int] = ExpressionEncoder()
     val func = (v1: Int, v2: Int) => v1 + v2
     val aggregator: ReduceAggregator[Int] = new ReduceAggregator(func)(Encoders.scalaInt)
-    assert(aggregator.zero == (false, null))
+    assert(aggregator.zero == (false, null).asInstanceOf[(Boolean, Int)])
   }
 
   test("reduce, merge and finish") {
@@ -36,22 +36,22 @@ class ReduceAggregatorSuite extends SparkFunSuite {
     val aggregator: ReduceAggregator[Int] = new ReduceAggregator(func)(Encoders.scalaInt)
 
     val firstReduce = aggregator.reduce(aggregator.zero, 1)
-    assert(firstReduce == (true, 1))
+    assert(firstReduce == ((true, 1)))
 
     val secondReduce = aggregator.reduce(firstReduce, 2)
-    assert(secondReduce == (true, 3))
+    assert(secondReduce == ((true, 3)))
 
     val thirdReduce = aggregator.reduce(secondReduce, 3)
-    assert(thirdReduce == (true, 6))
+    assert(thirdReduce == ((true, 6)))
 
     val mergeWithZero1 = aggregator.merge(aggregator.zero, firstReduce)
-    assert(mergeWithZero1 == (true, 1))
+    assert(mergeWithZero1 == ((true, 1)))
 
     val mergeWithZero2 = aggregator.merge(secondReduce, aggregator.zero)
-    assert(mergeWithZero2 == (true, 3))
+    assert(mergeWithZero2 == ((true, 3)))
 
     val mergeTwoReduced = aggregator.merge(firstReduce, secondReduce)
-    assert(mergeTwoReduced == (true, 4))
+    assert(mergeTwoReduced == ((true, 4)))
 
     assert(aggregator.finish(firstReduce)== 1)
     assert(aggregator.finish(secondReduce) == 3)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 59c6a6fade17..d21d1f106947 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -20,12 +20,10 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import scala.collection.mutable
-import scala.concurrent.duration._
 import scala.language.reflectiveCalls
 
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.AsyncAssertions.Waiter
-import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.BeforeAndAfter
 import org.scalatest.PrivateMethodTester._
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 3de60c7fc131..a29d7a7565ee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -414,7 +414,7 @@ private[spark] object HiveUtils extends Logging {
   protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
     case (struct: Row, StructType(fields)) =>
       struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
       }.mkString("{", ",", "}")
     case (seq: Seq[_], ArrayType(typ, _)) =>
       seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
@@ -437,7 +437,7 @@ private[spark] object HiveUtils extends Logging {
   protected def toHiveStructString(a: (Any, DataType)): String = a match {
     case (struct: Row, StructType(fields)) =>
       struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
+        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
       }.mkString("{", ",", "}")
     case (seq: Seq[_], ArrayType(typ, _)) =>
       seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index d8fd68b63d1e..8140f883ee54 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -80,17 +80,17 @@ class DataSourceWithHiveMetastoreCatalogSuite
   ).coalesce(1)
 
   Seq(
-    "parquet" -> (
+    "parquet" -> ((
       "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
       "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
       "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
-    ),
+    )),
 
-    "orc" -> (
+    "orc" -> ((
       "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
       "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat",
       "org.apache.hadoop.hive.ql.io.orc.OrcSerde"
-    )
+    ))
   ).foreach { case (provider, (inputFormat, outputFormat, serde)) =>
     test(s"Persist non-partitioned $provider relation into metastore as managed table") {
       withTable("t") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 5f15a705a2e9..17715465d38f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -151,7 +151,7 @@ class HiveSparkSubmitSuite
     // the HiveContext code mistakenly overrides the class loader that contains user classes.
     // For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8489/*scala.
     val version = Properties.versionNumberString match {
-      case v if v.startsWith("2.10") || v.startsWith("2.11") => v.substring(0, 4)
+      case v if v.startsWith("2.12") || v.startsWith("2.11") => v.substring(0, 4)
       case x => throw new Exception(s"Unsupported Scala Version: $x")
     }
     val jarDir = getTestResourcePath("regression-test-SPARK-8489")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 12daf3af11ab..5b62e37311d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 import java.net.URI
 
+import scala.language.existentials
+
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterEach
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index cf3376036072..799abc1d0c42 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -795,7 +795,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("SELECT * FROM m").collect().zip(sql("SELECT * FROM src LIMIT 10").collect()).foreach {
       case (Row(map: Map[_, _]), Row(key: Int, value: String)) =>
         assert(map.size === 1)
-        assert(map.head === (key, value))
+        assert(map.head === ((key, value)))
     }
   }
 

From af80e01b577c02005b4c4d217f22297f484e5da7 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Thu, 13 Jul 2017 10:10:29 +0100
Subject: [PATCH 118/125] [SPARK-21373][CORE] Update Jetty to 9.3.20.v20170531

## What changes were proposed in this pull request?

This PR upgrades jetty to the latest version 9.3.20.v20170531. The version includes the fix of CVE-2017-9735.

Here are links to descriptions for CVE-2017-9735.
* https://nvd.nist.gov/vuln/detail/CVE-2017-9735
* https://github.com/eclipse/jetty.project/issues/1556

Here is [a release note](https://github.com/eclipse/jetty.project/blob/jetty-9.3.x/VERSION.txt) for the latest jetty

## How was this patch tested?

tested by existing test suites

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18601 from kiszk/SPARK-21373.
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index bea2798cc861..1e51ddd63618 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,7 +133,7 @@
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.8.2</parquet.version>
     <hive.parquet.version>1.6.0</hive.parquet.version>
-    <jetty.version>9.3.11.v20160721</jetty.version>
+    <jetty.version>9.3.20.v20170531</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.8.0</chill.version>
     <ivy.version>2.4.0</ivy.version>

From d8257b99ddae23f702f312640a5335ddb4554403 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <st.kontopoulos@gmail.com>
Date: Thu, 13 Jul 2017 10:37:15 -0700
Subject: [PATCH 119/125] [SPARK-21403][MESOS] fix --packages for mesos

## What changes were proposed in this pull request?
Fixes --packages flag for mesos in cluster mode. Probably I will handle standalone and Yarn in another commit, I need to investigate those cases as they are different.

## How was this patch tested?
Tested with a community 1.9 dc/os cluster. packages were successfully resolved in cluster mode within a container.

andrewor14  susanxhuynh ArtRand srowen  pls review.

Author: Stavros Kontopoulos <st.kontopoulos@gmail.com>

Closes #18587 from skonto/fix_packages_mesos_cluster.
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 89 +++++++++++--------
 1 file changed, 50 insertions(+), 39 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index abde04062c4b..0ea14361b2f7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -273,6 +273,25 @@ object SparkSubmit extends CommandLineUtils {
       }
     }
 
+    // Fail fast, the following modes are not supported or applicable
+    (clusterManager, deployMode) match {
+      case (STANDALONE, CLUSTER) if args.isPython =>
+        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
+          "applications on standalone clusters.")
+      case (STANDALONE, CLUSTER) if args.isR =>
+        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
+          "applications on standalone clusters.")
+      case (LOCAL, CLUSTER) =>
+        printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
+      case (_, CLUSTER) if isShell(args.primaryResource) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
+      case (_, CLUSTER) if isSqlShell(args.mainClass) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
+      case (_, CLUSTER) if isThriftServer(args.mainClass) =>
+        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
+      case _ =>
+    }
+
     // Update args.deployMode if it is null. It will be passed down as a Spark property later.
     (args.deployMode, deployMode) match {
       case (null, CLIENT) => args.deployMode = "client"
@@ -282,36 +301,40 @@ object SparkSubmit extends CommandLineUtils {
     val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER
     val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER
 
-    // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
-    // too for packages that include Python code
-    val exclusions: Seq[String] =
+    if (!isMesosCluster) {
+      // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
+      // too for packages that include Python code
+      val exclusions: Seq[String] =
       if (!StringUtils.isBlank(args.packagesExclusions)) {
         args.packagesExclusions.split(",")
       } else {
         Nil
       }
 
-    // Create the IvySettings, either load from file or build defaults
-    val ivySettings = args.sparkProperties.get("spark.jars.ivySettings").map { ivySettingsFile =>
-      SparkSubmitUtils.loadIvySettings(ivySettingsFile, Option(args.repositories),
-        Option(args.ivyRepoPath))
-    }.getOrElse {
-      SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath))
-    }
+      // Create the IvySettings, either load from file or build defaults
+      val ivySettings = args.sparkProperties.get("spark.jars.ivySettings").map { ivySettingsFile =>
+        SparkSubmitUtils.loadIvySettings(ivySettingsFile, Option(args.repositories),
+          Option(args.ivyRepoPath))
+      }.getOrElse {
+        SparkSubmitUtils.buildIvySettings(Option(args.repositories), Option(args.ivyRepoPath))
+      }
 
-    val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages,
-      ivySettings, exclusions = exclusions)
-    if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
-      args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
-      if (args.isPython) {
-        args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
+      val resolvedMavenCoordinates = SparkSubmitUtils.resolveMavenCoordinates(args.packages,
+        ivySettings, exclusions = exclusions)
+
+
+      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+        args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
+        if (args.isPython) {
+          args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
+        }
       }
-    }
 
-    // install any R packages that may have been passed through --jars or --packages.
-    // Spark Packages may contain R source code inside the jar.
-    if (args.isR && !StringUtils.isBlank(args.jars)) {
-      RPackageUtils.checkAndBuildRPackage(args.jars, printStream, args.verbose)
+      // install any R packages that may have been passed through --jars or --packages.
+      // Spark Packages may contain R source code inside the jar.
+      if (args.isR && !StringUtils.isBlank(args.jars)) {
+        RPackageUtils.checkAndBuildRPackage(args.jars, printStream, args.verbose)
+      }
     }
 
     val hadoopConf = new HadoopConfiguration()
@@ -343,24 +366,6 @@ object SparkSubmit extends CommandLineUtils {
       }.orNull
     }
 
-    // The following modes are not supported or applicable
-    (clusterManager, deployMode) match {
-      case (STANDALONE, CLUSTER) if args.isPython =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
-          "applications on standalone clusters.")
-      case (STANDALONE, CLUSTER) if args.isR =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
-          "applications on standalone clusters.")
-      case (LOCAL, CLUSTER) =>
-        printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
-      case (_, CLUSTER) if isShell(args.primaryResource) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
-      case (_, CLUSTER) if isSqlShell(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
-      case (_, CLUSTER) if isThriftServer(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
-      case _ =>
-    }
 
     // If we're running a python app, set the main class to our specific python runner
     if (args.isPython && deployMode == CLIENT) {
@@ -468,6 +473,12 @@ object SparkSubmit extends CommandLineUtils {
       OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         sysProp = "spark.driver.extraLibraryPath"),
 
+      // Mesos only - propagate attributes for dependency resolution at the driver side
+      OptionAssigner(args.packages, MESOS, CLUSTER, sysProp = "spark.jars.packages"),
+      OptionAssigner(args.repositories, MESOS, CLUSTER, sysProp = "spark.jars.repositories"),
+      OptionAssigner(args.ivyRepoPath, MESOS, CLUSTER, sysProp = "spark.jars.ivy"),
+      OptionAssigner(args.packagesExclusions, MESOS, CLUSTER, sysProp = "spark.jars.excludes"),
+
       // Yarn only
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"),
       OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,

From 5c8edfc4a8864f4091998901bbca062cd8466b6f Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 13 Jul 2017 10:41:19 -0700
Subject: [PATCH 120/125] [SPARK-15526][MLLIB] Shade JPMML

## What changes were proposed in this pull request?

Shade JPMML classes (`org.jpmml.**`) and related PMML model classes (`org.dmg.pmml.**`). This insulates downstream users from the version of JPMML in Spark, allows us to upgrade more freely, and allows downstream users to use a different version. JPMML minor releases are not generally forwards/backwards compatible.

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #18584 from srowen/SPARK-15526.
---
 pom.xml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pom.xml b/pom.xml
index 1e51ddd63618..11cd028c9914 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2363,6 +2363,7 @@
               <include>org.eclipse.jetty:jetty-util</include>
               <include>org.eclipse.jetty:jetty-server</include>
               <include>com.google.guava:guava</include>
+              <include>org.jpmml:*</include>
             </includes>
           </artifactSet>
           <relocations>
@@ -2377,6 +2378,14 @@
               <pattern>com.google.common</pattern>
               <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
             </relocation>
+            <relocation>
+              <pattern>org.dmg.pmml</pattern>
+              <shadedPattern>${spark.shade.packageName}.dmg.pmml</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.jpmml</pattern>
+              <shadedPattern>${spark.shade.packageName}.jpmml</shadedPattern>
+            </relocation>
           </relocations>
         </configuration>
         <executions>

From 2ca1ed90e1ea89ff5cec2c41bce71028854287d5 Mon Sep 17 00:00:00 2001
From: Robert Kruszewski <robertk@palantir.com>
Date: Thu, 13 Jul 2017 23:06:28 +0100
Subject: [PATCH 121/125] resolve conflicts

---
 .../org/apache/spark/deploy/SparkSubmit.scala | 28 +++----------------
 pom.xml                                       |  6 ----
 project/SparkBuild.scala                      |  4 ---
 .../parquet/ParquetFilterSuite.scala          |  4 ---
 4 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 30330dec0b98..22288e3f6846 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -277,6 +277,10 @@ object SparkSubmit extends CommandLineUtils {
 
     // Fail fast, the following modes are not supported or applicable
     (clusterManager, deployMode) match {
+      case (KUBERNETES, CLIENT) =>
+        printErrorAndExit("Client mode is currently not supported for Kubernetes.")
+      case (KUBERNETES, CLUSTER) if args.isPython || args.isR =>
+        printErrorAndExit("Kubernetes does not currently support python or R applications.")
       case (STANDALONE, CLUSTER) if args.isPython =>
         printErrorAndExit("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
@@ -350,29 +354,6 @@ object SparkSubmit extends CommandLineUtils {
     })
     // scalastyle:on runtimeaddshutdownhook
 
-<<<<<<< HEAD
-    // The following modes are not supported or applicable
-    (clusterManager, deployMode) match {
-      case (KUBERNETES, CLIENT) =>
-        printErrorAndExit("Client mode is currently not supported for Kubernetes.")
-      case (KUBERNETES, CLUSTER) if args.isPython || args.isR =>
-        printErrorAndExit("Kubernetes does not currently support python or R applications.")
-      case (STANDALONE, CLUSTER) if args.isPython =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for python " +
-          "applications on standalone clusters.")
-      case (STANDALONE, CLUSTER) if args.isR =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
-          "applications on standalone clusters.")
-      case (LOCAL, CLUSTER) =>
-        printErrorAndExit("Cluster deploy mode is not compatible with master \"local\"")
-      case (_, CLUSTER) if isShell(args.primaryResource) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark shells.")
-      case (_, CLUSTER) if isSqlShell(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark SQL shell.")
-      case (_, CLUSTER) if isThriftServer(args.mainClass) =>
-        printErrorAndExit("Cluster deploy mode is not applicable to Spark Thrift server.")
-      case _ =>
-=======
     // Resolve glob path for different resources.
     args.jars = Option(args.jars).map(resolveGlobPaths(_, hadoopConf)).orNull
     args.files = Option(args.files).map(resolveGlobPaths(_, hadoopConf)).orNull
@@ -390,7 +371,6 @@ object SparkSubmit extends CommandLineUtils {
       args.pyFiles = Option(args.pyFiles).map {
         downloadFileList(_, targetDir, args.sparkProperties, hadoopConf)
       }.orNull
->>>>>>> master
     }
 
 
diff --git a/pom.xml b/pom.xml
index 5bc8c567a5e1..5c4fce875b14 100644
--- a/pom.xml
+++ b/pom.xml
@@ -133,7 +133,6 @@
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <derby.version>10.12.1.1</derby.version>
-<<<<<<< HEAD
     <parquet.version>1.9.1-palantir3</parquet.version>
     <feign.version>8.18.0</feign.version>
     <okhttp3.version>3.8.0</okhttp3.version>
@@ -142,11 +141,6 @@
     <retrofit.version>2.2.0</retrofit.version>
     <bouncycastle.version>1.54</bouncycastle.version>
     <jetty.version>9.4.6.v20170531</jetty.version>
-=======
-    <parquet.version>1.8.2</parquet.version>
-    <hive.parquet.version>1.6.0</hive.parquet.version>
-    <jetty.version>9.3.20.v20170531</jetty.version>
->>>>>>> master
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.8.0</chill.version>
     <ivy.version>2.4.0</ivy.version>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 1ecf4966259e..ccbd1e92d5cb 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -92,11 +92,7 @@ object SparkBuild extends PomBuild {
   val projectsMap: mutable.Map[String, Seq[Setting[_]]] = mutable.Map.empty
 
   override val profiles = {
-<<<<<<< HEAD
-    val profiles = Properties.propOrNone("sbt.maven.profiles") orElse Properties.envOrNone("SBT_MAVEN_PROFILES") match {
-=======
     Properties.envOrNone("SBT_MAVEN_PROFILES") match {
->>>>>>> master
       case None => Seq("sbt")
       case Some(v) =>
         v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 242478e05069..d353bf39530f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -546,13 +546,9 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     import testImplicits._
 
     Seq(true, false).foreach { vectorized =>
-<<<<<<< HEAD
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
-=======
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString,
           SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> true.toString,
           SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
->>>>>>> master
         withTempPath { path =>
           Seq(Some(1), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
           assert(spark.read.parquet(path.getAbsolutePath).where("`col.dots` > 0").count() == 1)

From cb8d5cc90ff8d3c991ff33da41b136ab7634f71b Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Thu, 13 Jul 2017 15:25:38 -0700
Subject: [PATCH 122/125] [SPARK-21376][YARN] Fix yarn client token expire
 issue when cleaning the staging files in long running scenario

## What changes were proposed in this pull request?

This issue happens in long running application with yarn cluster mode, because yarn#client doesn't sync token with AM, so it will always keep the initial token, this token may be expired in the long running scenario, so when yarn#client tries to clean up staging directory after application finished, it will use this expired token and meet token expire issue.

## How was this patch tested?

Manual verification is secure cluster.

Author: jerryshao <sshao@hortonworks.com>

Closes #18617 from jerryshao/SPARK-21376.
---
 .../org/apache/spark/deploy/yarn/Client.scala | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7caaa91e1af9..a5b0e19095aa 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,6 +21,7 @@ import java.io.{File, FileOutputStream, IOException, OutputStreamWriter}
 import java.net.{InetAddress, UnknownHostException, URI}
 import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
+import java.security.PrivilegedExceptionAction
 import java.util.{Locale, Properties, UUID}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
@@ -192,16 +193,32 @@ private[spark] class Client(
    * Cleanup application staging directory.
    */
   private def cleanupStagingDir(appId: ApplicationId): Unit = {
-    val stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
-    try {
-      val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
-      val fs = stagingDirPath.getFileSystem(hadoopConf)
-      if (!preserveFiles && fs.delete(stagingDirPath, true)) {
-        logInfo(s"Deleted staging directory $stagingDirPath")
+    if (sparkConf.get(PRESERVE_STAGING_FILES)) {
+      return
+    }
+
+    def cleanupStagingDirInternal(): Unit = {
+      val stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
+      try {
+        val fs = stagingDirPath.getFileSystem(hadoopConf)
+        if (fs.delete(stagingDirPath, true)) {
+          logInfo(s"Deleted staging directory $stagingDirPath")
+        }
+      } catch {
+        case ioe: IOException =>
+          logWarning("Failed to cleanup staging dir " + stagingDirPath, ioe)
       }
-    } catch {
-      case ioe: IOException =>
-        logWarning("Failed to cleanup staging dir " + stagingDirPath, ioe)
+    }
+
+    if (isClusterMode && principal != null && keytab != null) {
+      val newUgi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
+      newUgi.doAs(new PrivilegedExceptionAction[Unit] {
+        override def run(): Unit = {
+          cleanupStagingDirInternal()
+        }
+      })
+    } else {
+      cleanupStagingDirInternal()
     }
   }
 

From 9c973e4b9172e591776ef470fc36d353e3a1a59a Mon Sep 17 00:00:00 2001
From: Robert Kruszewski <robertk@palantir.com>
Date: Fri, 14 Jul 2017 14:47:41 +0100
Subject: [PATCH 123/125] linting

---
 .../apache/spark/shuffle/sort/ShuffleExternalSorter.java    | 6 ++++--
 dev/lint-java                                               | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index 338faaadb33d..c6b7507628c7 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -120,14 +120,16 @@ final class ShuffleExternalSorter extends MemoryConsumer {
     this.taskContext = taskContext;
     this.numPartitions = numPartitions;
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
-    this.fileBufferSizeBytes = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
+    this.fileBufferSizeBytes =
+            (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
     this.numElementsForSpillThreshold =
       conf.getLong("spark.shuffle.spill.numElementsForceSpillThreshold", 1024 * 1024 * 1024);
     this.writeMetrics = writeMetrics;
     this.inMemSorter = new ShuffleInMemorySorter(
       this, initialSize, conf.getBoolean("spark.shuffle.sort.useRadixSort", true));
     this.peakMemoryUsedBytes = getMemoryUsage();
-    this.diskWriteBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
+    this.diskWriteBufferSize =
+            (int) (long) conf.get(package$.MODULE$.SHUFFLE_DISK_WRITE_BUFFER_SIZE());
   }
 
   /**
diff --git a/dev/lint-java b/dev/lint-java
index c2e80538ef2a..2f9b09a61575 100755
--- a/dev/lint-java
+++ b/dev/lint-java
@@ -20,7 +20,7 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
-ERRORS=$($SCRIPT_DIR/../build/mvn -Pkinesis-asl -Pmesos -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
+ERRORS=$($SCRIPT_DIR/../build/mvn -Pkubernetes -Pkinesis-asl -Pmesos -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
 
 if test ! -z "$ERRORS"; then
     echo -e "Checkstyle checks failed at following occurrences:\n$ERRORS"

From 0e0cc0a5b80c18178d74d9447228af2f6b15e8fb Mon Sep 17 00:00:00 2001
From: Robert Kruszewski <robertk@palantir.com>
Date: Fri, 14 Jul 2017 16:47:15 +0100
Subject: [PATCH 124/125] update dependencies

---
 dev/deps/spark-deps-hadoop-palantir | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dev/deps/spark-deps-hadoop-palantir b/dev/deps/spark-deps-hadoop-palantir
index 061ef46ac37d..88555e3a85a2 100644
--- a/dev/deps/spark-deps-hadoop-palantir
+++ b/dev/deps/spark-deps-hadoop-palantir
@@ -13,6 +13,9 @@ apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api-1.0.0-M20.jar
 api-util-1.0.0-M20.jar
 arpack_combined_all-0.1.jar
+arrow-format-0.4.0.jar
+arrow-memory-0.4.0.jar
+arrow-vector-0.4.0.jar
 automaton-1.11-8.jar
 avro-1.8.1.jar
 avro-ipc-1.8.1.jar
@@ -70,6 +73,7 @@ dropwizard-util-0.9.2.jar
 dropwizard-validation-0.9.2.jar
 eigenbase-properties-1.1.5.jar
 fastutil-7.0.13.jar
+flatbuffers-1.2.0-3f79e055.jar
 generex-1.0.1.jar
 gson-2.2.4.jar
 guava-14.0.1.jar
@@ -96,6 +100,7 @@ hibernate-validator-5.2.2.Final.jar
 hk2-api-2.5.0-b32.jar
 hk2-locator-2.5.0-b32.jar
 hk2-utils-2.5.0-b32.jar
+hppc-0.7.1.jar
 htrace-core4-4.0.1-incubating.jar
 httpclient-4.5.3.jar
 httpcore-4.4.6.jar
@@ -200,7 +205,7 @@ parquet-jackson-1.9.1-palantir3.jar
 pmml-model-1.2.15.jar
 pmml-schema-1.2.15.jar
 protobuf-java-2.5.0.jar
-py4j-0.10.4.jar
+py4j-0.10.6.jar
 pyrolite-4.13.jar
 retrofit-2.2.0.jar
 scala-compiler-2.11.8.jar

From d56829aefc8540bc32d74c03757f3c5d49604394 Mon Sep 17 00:00:00 2001
From: Robert Kruszewski <robertk@palantir.com>
Date: Fri, 14 Jul 2017 19:26:12 +0100
Subject: [PATCH 125/125] py4j in conda version

---
 .../scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index f29a57fe95f2..6ae41fc84c3d 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -336,7 +336,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     // needed locations.
     val sparkHome = sys.props("spark.test.home")
     val pythonPath = Seq(
-      s"$sparkHome/python/lib/py4j-0.10.4-src.zip",
+      s"$sparkHome/python/lib/py4j-0.10.6-src.zip",
       s"$sparkHome/python")
     val extraEnvVars = Map(
       "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),