Skip to content

Commit 8f6b09a

Browse files
committed
Update test harness to work with both Hive 12 and 13.
1 parent f044843 commit 8f6b09a

File tree

21 files changed

+184
-76
lines changed

21 files changed

+184
-76
lines changed

dev/run-tests

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
167167
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
168168
# This must be a single argument, as it is.
169169
if [ -n "$_RUN_SQL_TESTS" ]; then
170-
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-0.12.0"
170+
SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
171171
fi
172172

173173
if [ -n "$_SQL_TESTS_ONLY" ]; then

project/SparkBuild.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,11 @@ object Hive {
253253
|import org.apache.spark.sql.hive._
254254
|import org.apache.spark.sql.hive.test.TestHive._
255255
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin,
256-
cleanupCommands in console := "sparkContext.stop()"
256+
cleanupCommands in console := "sparkContext.stop()",
257+
// Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
258+
// in order to generate golden files. This is only required for developers who are adding new
259+
// new query tests.
260+
fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }
257261
)
258262

259263
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ object HiveTypeCoercion {
5252
*/
5353
trait HiveTypeCoercion {
5454

55+
import HiveTypeCoercion._
56+
5557
val typeCoercionRules =
5658
PropagateTypes ::
5759
ConvertNaNs ::
@@ -340,6 +342,13 @@ trait HiveTypeCoercion {
340342
// Skip nodes who's children have not been resolved yet.
341343
case e if !e.childrenResolved => e
342344

345+
case a @ CreateArray(children) if !a.resolved =>
346+
val commonType = a.childTypes.reduce(
347+
(a,b) =>
348+
findTightestCommonType(a,b).getOrElse(StringType))
349+
CreateArray(
350+
children.map(c => if (c.dataType == commonType) c else Cast(c, commonType)))
351+
343352
// Promote SUM, SUM DISTINCT and AVERAGE to largest types to prevent overflows.
344353
case s @ Sum(e @ DecimalType()) => s // Decimal is already the biggest.
345354
case Sum(e @ IntegralType()) if e.dataType != LongType => Sum(Cast(e, LongType))
@@ -356,6 +365,10 @@ trait HiveTypeCoercion {
356365
Average(Cast(e, LongType))
357366
case Average(e @ FractionalType()) if e.dataType != DoubleType =>
358367
Average(Cast(e, DoubleType))
368+
369+
// Hive lets you do aggregation of timestamps... for some reason
370+
case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
371+
case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
359372
}
360373
}
361374

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,28 @@ case class GetField(child: Expression, fieldName: String) extends UnaryExpressio
101101

102102
override def toString = s"$child.$fieldName"
103103
}
104+
105+
/**
106+
* Returns an Array containing the evaluation of all children expressions.
107+
*/
108+
case class CreateArray(children: Seq[Expression]) extends Expression {
109+
override type EvaluatedType = Any
110+
111+
lazy val childTypes = children.map(_.dataType).distinct
112+
113+
override lazy val resolved =
114+
childrenResolved && childTypes.size <= 1
115+
116+
override def dataType: DataType = {
117+
assert(resolved, s"Invalid dataType of mixed ArrayType ${childTypes.mkString(",")}")
118+
ArrayType(childTypes.headOption.getOrElse(NullType))
119+
}
120+
121+
override def nullable: Boolean = false
122+
123+
override def eval(input: Row): Any = {
124+
children.map(_.eval(input))
125+
}
126+
127+
override def toString = s"Array(${children.mkString(",")})"
128+
}

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.optimizer.{Optimizer, DefaultOptimizer}
3333
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
3434
import org.apache.spark.sql.catalyst.rules.RuleExecutor
3535
import org.apache.spark.sql.catalyst.types.DataType
36-
import org.apache.spark.sql.columnar.InMemoryRelation
3736
import org.apache.spark.sql.execution.{SparkStrategies, _}
3837
import org.apache.spark.sql.json._
3938
import org.apache.spark.sql.parquet.ParquetRelation

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 68 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import java.util.{Locale, TimeZone}
2323
import org.scalatest.BeforeAndAfter
2424

2525
import org.apache.spark.sql.SQLConf
26+
import org.apache.spark.sql.hive.HiveShim
2627
import org.apache.spark.sql.hive.test.TestHive
2728

2829
/**
@@ -135,6 +136,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
135136
"stats20",
136137
"alter_merge_stats",
137138
"columnstats.*",
139+
"annotate_stats.*",
140+
"database_drop",
141+
"index_serde",
138142

139143

140144
// Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
@@ -211,32 +215,27 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
211215
"describe_comment_indent",
212216

213217
// Limit clause without a ordering, which causes failure.
214-
"orc_predicate_pushdown"
215-
)
218+
"orc_predicate_pushdown",
219+
220+
// Requires precision decimal support:
221+
"decimal_1",
222+
"udf_pmod",
223+
"udf_when",
224+
"udf_case",
225+
"udf_to_double",
226+
"udf_to_float",
227+
228+
// Needs constant object inspectors
229+
"udf_round",
230+
"udf7"
231+
) ++ HiveShim.compatibilityBlackList
216232

217233
/**
218234
* The set of tests that are believed to be working in catalyst. Tests not on whiteList or
219235
* blacklist are implicitly marked as ignored.
220236
*/
221237
override def whiteList = Seq(
222238
"add_part_exist",
223-
"dynamic_partition_skip_default",
224-
"infer_bucket_sort_dyn_part",
225-
"load_dyn_part1",
226-
"load_dyn_part2",
227-
"load_dyn_part3",
228-
"load_dyn_part4",
229-
"load_dyn_part5",
230-
"load_dyn_part6",
231-
"load_dyn_part7",
232-
"load_dyn_part8",
233-
"load_dyn_part9",
234-
"load_dyn_part10",
235-
"load_dyn_part11",
236-
"load_dyn_part12",
237-
"load_dyn_part13",
238-
"load_dyn_part14",
239-
"load_dyn_part14_win",
240239
"add_part_multiple",
241240
"add_partition_no_whitelist",
242241
"add_partition_with_whitelist",
@@ -256,6 +255,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
256255
"alter_varchar2",
257256
"alter_view_as_select",
258257
"ambiguous_col",
258+
"annotate_stats_join",
259+
"annotate_stats_limit",
260+
"annotate_stats_part",
261+
"annotate_stats_table",
262+
"annotate_stats_union",
259263
"auto_join0",
260264
"auto_join1",
261265
"auto_join10",
@@ -299,6 +303,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
299303
"auto_sortmerge_join_13",
300304
"auto_sortmerge_join_14",
301305
"auto_sortmerge_join_15",
306+
"auto_sortmerge_join_16",
302307
"auto_sortmerge_join_2",
303308
"auto_sortmerge_join_3",
304309
"auto_sortmerge_join_4",
@@ -340,7 +345,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
340345
"create_skewed_table1",
341346
"create_struct_table",
342347
"cross_join",
348+
"cross_product_check_1",
349+
"cross_product_check_2",
343350
"ct_case_insensitive",
351+
"database_drop",
344352
"database_location",
345353
"database_properties",
346354
"date_2",
@@ -360,32 +368,42 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
360368
"diff_part_input_formats",
361369
"disable_file_format_check",
362370
"disallow_incompatible_type_change_off",
371+
"distinct_stats",
372+
"drop_database_removes_partition_dirs",
363373
"drop_function",
364374
"drop_index",
375+
"drop_index_removes_partition_dirs",
365376
"drop_multi_partitions",
366377
"drop_partitions_filter",
367378
"drop_partitions_filter2",
368379
"drop_partitions_filter3",
369380
"drop_partitions_ignore_protection",
370381
"drop_table",
371382
"drop_table2",
383+
"drop_table_removes_partition_dirs",
372384
"drop_view",
385+
"dynamic_partition_skip_default",
373386
"escape_clusterby1",
374387
"escape_distributeby1",
375388
"escape_orderby1",
376389
"escape_sortby1",
390+
"explain_rearrange",
377391
"fetch_aggregation",
392+
"fileformat_mix",
378393
"fileformat_sequencefile",
379394
"fileformat_text",
380395
"filter_join_breaktask",
381396
"filter_join_breaktask2",
382397
"groupby1",
383398
"groupby11",
399+
"groupby12",
400+
"groupby1_limit",
384401
"groupby1_map",
385402
"groupby1_map_nomap",
386403
"groupby1_map_skew",
387404
"groupby1_noskew",
388405
"groupby2",
406+
"groupby2_limit",
389407
"groupby2_map",
390408
"groupby2_map_skew",
391409
"groupby2_noskew",
@@ -406,6 +424,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
406424
"groupby7_map_multi_single_reducer",
407425
"groupby7_map_skew",
408426
"groupby7_noskew",
427+
"groupby7_noskew_multi_single_reducer",
409428
"groupby8",
410429
"groupby8_map",
411430
"groupby8_map_skew",
@@ -432,6 +451,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
432451
"groupby_sort_test_1",
433452
"having",
434453
"implicit_cast1",
454+
"index_serde",
455+
"infer_bucket_sort_dyn_part",
435456
"innerjoin",
436457
"inoutdriver",
437458
"input",
@@ -502,7 +523,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
502523
"join17",
503524
"join18",
504525
"join19",
505-
"join_1to1",
506526
"join2",
507527
"join20",
508528
"join21",
@@ -534,6 +554,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
534554
"join7",
535555
"join8",
536556
"join9",
557+
"join_1to1",
537558
"join_array",
538559
"join_casesensitive",
539560
"join_empty",
@@ -557,7 +578,21 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
557578
"literal_double",
558579
"literal_ints",
559580
"literal_string",
581+
"load_dyn_part1",
582+
"load_dyn_part10",
583+
"load_dyn_part11",
584+
"load_dyn_part12",
585+
"load_dyn_part13",
586+
"load_dyn_part14",
587+
"load_dyn_part14_win",
588+
"load_dyn_part2",
589+
"load_dyn_part3",
590+
"load_dyn_part4",
591+
"load_dyn_part5",
592+
"load_dyn_part6",
560593
"load_dyn_part7",
594+
"load_dyn_part8",
595+
"load_dyn_part9",
561596
"load_file_with_space_in_the_name",
562597
"loadpart1",
563598
"louter_join_ppr",
@@ -578,13 +613,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
578613
"merge1",
579614
"merge2",
580615
"mergejoins",
581-
"multigroupby_singlemr",
616+
"multiMapJoin1",
617+
"multiMapJoin2",
582618
"multi_insert_gby",
583619
"multi_insert_gby3",
584620
"multi_insert_lateral_view",
585621
"multi_join_union",
586-
"multiMapJoin1",
587-
"multiMapJoin2",
622+
"multigroupby_singlemr",
588623
"noalias_subq1",
589624
"nomore_ambiguous_table_col",
590625
"nonblock_op_deduplicate",
@@ -607,10 +642,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
607642
"outer_join_ppr",
608643
"parallel",
609644
"parenthesis_star_by",
610-
"partcols1",
611645
"part_inherit_tbl_props",
612646
"part_inherit_tbl_props_empty",
613647
"part_inherit_tbl_props_with_star",
648+
"partcols1",
614649
"partition_date",
615650
"partition_schema1",
616651
"partition_serde_format",
@@ -641,7 +676,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
641676
"ppd_outer_join5",
642677
"ppd_random",
643678
"ppd_repeated_alias",
644-
"ppd_transform",
645679
"ppd_udf_col",
646680
"ppd_union",
647681
"ppr_allchildsarenull",
@@ -674,15 +708,15 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
674708
"serde_regex",
675709
"serde_reported_schema",
676710
"set_variable_sub",
677-
"show_create_table_partitioned",
678-
"show_create_table_delimited",
711+
"show_columns",
679712
"show_create_table_alter",
680-
"show_create_table_view",
681-
"show_create_table_serde",
682713
"show_create_table_db_table",
714+
"show_create_table_delimited",
683715
"show_create_table_does_not_exist",
684716
"show_create_table_index",
685-
"show_columns",
717+
"show_create_table_partitioned",
718+
"show_create_table_serde",
719+
"show_create_table_view",
686720
"show_describe_func_quotes",
687721
"show_functions",
688722
"show_partitions",
@@ -738,12 +772,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
738772
"udaf_covar_pop",
739773
"udaf_covar_samp",
740774
"udaf_histogram_numeric",
741-
"udf_10_trims",
742775
"udf2",
743776
"udf6",
744777
"udf7",
745778
"udf8",
746779
"udf9",
780+
"udf_10_trims",
781+
"udf_E",
782+
"udf_PI",
747783
"udf_abs",
748784
"udf_acos",
749785
"udf_add",
@@ -774,14 +810,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
774810
"udf_cos",
775811
"udf_count",
776812
"udf_date_add",
777-
"udf_datediff",
778813
"udf_date_sub",
814+
"udf_datediff",
779815
"udf_day",
780816
"udf_dayofmonth",
781817
"udf_degrees",
782818
"udf_div",
783819
"udf_double",
784-
"udf_E",
785820
"udf_elt",
786821
"udf_equal",
787822
"udf_exp",
@@ -826,7 +861,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
826861
"udf_nvl",
827862
"udf_or",
828863
"udf_parse_url",
829-
"udf_PI",
830864
"udf_pmod",
831865
"udf_positive",
832866
"udf_pow",

0 commit comments

Comments
 (0)