Skip to content

Commit 3a845d3

Browse files
marmbrusJoshRosen
authored andcommitted
[SQL] Update Hive test harness for Hive 12 and 13
As part of the upgrade I also copy the newest version of the query tests, and whitelist a bunch of new ones that are now passing. Author: Michael Armbrust <[email protected]> Closes #2936 from marmbrus/fix13tests and squashes the following commits: d9cbdab [Michael Armbrust] Remove user specific tests 65801cd [Michael Armbrust] style and rat 8f6b09a [Michael Armbrust] Update test harness to work with both Hive 12 and 13. f044843 [Michael Armbrust] Update Hive query tests and golden files to 0.13
1 parent 898b22a commit 3a845d3

File tree

8,166 files changed

+38307
-47487
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

8,166 files changed

+38307
-47487
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ sbt-launch-lib.bash
4848
plugins.sbt
4949
work
5050
.*\.q
51+
.*\.qv
5152
golden
5253
test.out/*
5354
.*iml

dev/run-tests

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
167167
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
168168
# This must be a single argument, as it is.
169169
if [ -n "$_RUN_SQL_TESTS" ]; then
170-
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-0.12.0"
170+
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
171171
fi
172172

173173
if [ -n "$_SQL_TESTS_ONLY" ]; then

project/SparkBuild.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,11 @@ object Hive {
253253
|import org.apache.spark.sql.hive._
254254
|import org.apache.spark.sql.hive.test.TestHive._
255255
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
256-
cleanupCommands in console := "sparkContext.stop()"
256+
cleanupCommands in console := "sparkContext.stop()",
257+
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
258+
// in order to generate golden files. This is only required for developers who are adding new
259+
// new query tests.
260+
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
257261
)
258262

259263
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ object HiveTypeCoercion {
5252
*/
5353
trait HiveTypeCoercion {
5454

55+
import HiveTypeCoercion._
56+
5557
val typeCoercionRules =
5658
PropagateTypes ::
5759
ConvertNaNs ::
@@ -340,6 +342,13 @@ trait HiveTypeCoercion {
340342
// Skip nodes who's children have not been resolved yet.
341343
case e if !e.childrenResolved => e
342344

345+
case a @ CreateArray(children) if !a.resolved =>
346+
val commonType = a.childTypes.reduce(
347+
(a,b) =>
348+
findTightestCommonType(a,b).getOrElse(StringType))
349+
CreateArray(
350+
children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))
351+
343352
// Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
344353
case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
345354
case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
@@ -356,6 +365,10 @@ trait HiveTypeCoercion {
356365
Average(Cast(e, LongType))
357366
case Average(e @ FractionalType()) if e.dataType != DoubleType =>
358367
Average(Cast(e, DoubleType))
368+
369+
// Hive lets you do aggregation of timestamps... for some reason
370+
case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
371+
case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
359372
}
360373
}
361374

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,28 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio
101101

102102
override def toString = s"$child.$fieldName"
103103
}
104+
105+
/**
106+
* Returns an Array containing the evaluation of all children expressions.
107+
*/
108+
case class CreateArray(children: Seq[Expression]) extends Expression {
109+
override type EvaluatedType = Any
110+
111+
lazy val childTypes = children.map(_.dataType).distinct
112+
113+
override lazy val resolved =
114+
childrenResolved && childTypes.size <= 1
115+
116+
override def dataType: DataType = {
117+
assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
118+
ArrayType(childTypes.headOption.getOrElse(NullType))
119+
}
120+
121+
override def nullable: Boolean = false
122+
123+
override def eval(input: Row): Any = {
124+
children.map(_.eval(input))
125+
}
126+
127+
override def toString = s"Array(${children.mkString(",")})"
128+
}

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
3333
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
3434
import org.apache.spark.sql.catalyst.rules.RuleExecutor
3535
import org.apache.spark.sql.catalyst.types.DataType
36-
import org.apache.spark.sql.columnar.InMemoryRelation
3736
import org.apache.spark.sql.execution.{SparkStrategies, _}
3837
import org.apache.spark.sql.json._
3938
import org.apache.spark.sql.parquet.ParquetRelation

0 commit comments

Comments
 (0)