From 9ce8846dfc3678649cc041dc762fcc6ed8038527 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sun, 15 Jan 2017 23:11:45 +0900 Subject: [PATCH 1/2] Fix flaky, newly introduced and missed test failures on Windows --- .../org/apache/spark/scheduler/SparkListenerSuite.scala | 2 +- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 6 +++--- .../apache/spark/sql/hive/execution/HiveQuerySuite.scala | 6 ++++-- .../apache/spark/sql/hive/execution/HiveSerDeSuite.scala | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index e8a88d4909a8..fefa8072e61f 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -229,7 +229,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match } val numSlices = 16 - val d = sc.parallelize(0 to 1e3.toInt, numSlices).map(w) + val d = sc.parallelize(0 to 1e4.toInt, numSlices).map(w) d.count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) listener.stageInfos.size should be (1) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index e3f166724968..99b67f5da23d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -221,8 +221,8 @@ class HiveDDLSuite sql( s""" |ALTER TABLE $tab ADD - |PARTITION (ds='2008-04-08', hr=11) LOCATION '$part1Path' - |PARTITION (ds='2008-04-08', hr=12) LOCATION '$part2Path' + |PARTITION (ds='2008-04-08', hr=11) LOCATION '${part1Path.toURI}' + |PARTITION (ds='2008-04-08', hr=12) LOCATION '${part2Path.toURI}' """.stripMargin) assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty)) @@ -1252,7 +1252,7 @@ class HiveDDLSuite s""" |CREATE TABLE t(id int) USING hive |OPTIONS(fileFormat 'orc', compression 'Zlib') - |LOCATION '${path.getCanonicalPath}' + |LOCATION '${path.toURI}' """.stripMargin) val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t")) assert(DDLUtils.isHiveTable(table)) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 2ae66d1b2f8a..b6c4e6b9d9b8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -461,7 +461,8 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd |('serialization.last.column.takes.rest'='true') USING 'cat' AS (tKey, tValue) |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |WITH SERDEPROPERTIES ('serialization.last.column.takes.rest'='true') FROM src; - """.stripMargin.replaceAll(System.lineSeparator(), " ")) + """.stripMargin.replaceAll(System.lineSeparator(), " "), + skip = !TestUtils.testCommandAvailable("/bin/bash")) createQueryTest("transform with SerDe4", """ @@ -470,7 +471,8 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd |('serialization.last.column.takes.rest'='true') USING 'cat' ROW FORMAT SERDE |'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' WITH SERDEPROPERTIES |('serialization.last.column.takes.rest'='true') FROM src; - """.stripMargin.replaceAll(System.lineSeparator(), " ")) + """.stripMargin.replaceAll(System.lineSeparator(), " "), + skip = !TestUtils.testCommandAvailable("/bin/bash")) createQueryTest("LIKE", "SELECT * FROM src WHERE value LIKE '%1%'") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala index b8af0b39c839..ec620c2403e3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala @@ -34,7 +34,7 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) - sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") + sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt").toURI}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset From 68595694cf6462414d9c9ca5c4a300f42ca2e3ab Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Wed, 18 Jan 2017 19:15:45 +0900 Subject: [PATCH 2/2] Address comments and fix another one --- .../scala/org/apache/spark/scheduler/SparkListenerSuite.scala | 2 +- .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala | 2 ++ .../test/scala/org/apache/spark/sql/hive/parquetSuites.scala | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala index fefa8072e61f..f5575ce1e157 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala @@ -229,7 +229,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match } val numSlices = 16 - val d = sc.parallelize(0 to 1e4.toInt, numSlices).map(w) + val d = sc.parallelize(0 to 10000, numSlices).map(w) d.count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) listener.stageInfos.size should be (1) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index b6c4e6b9d9b8..6a3e80bcf566 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -388,6 +388,8 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd } } + // Some tests suing script transformation are skipped as it requires `/bin/bash` which + // can be missing or differently located. createQueryTest("transform", "SELECT TRANSFORM (key) USING 'cat' AS (tKey) FROM src", skip = !TestUtils.testCommandAvailable("/bin/bash")) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index aa4a150a4b80..e9239ea56f1f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -612,7 +612,7 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { test("Explicitly added partitions should be readable after load") { withTable("test_added_partitions") { withTempDir { src => - val newPartitionDir = src.getCanonicalPath + val newPartitionDir = src.toURI.toString spark.range(2).selectExpr("cast(id as string)").toDF("a").write .mode("overwrite") .parquet(newPartitionDir) @@ -645,7 +645,7 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest { test("Non-partitioned table readable after load") { withTable("tab") { withTempDir { src => - val newPartitionDir = src.getCanonicalPath + val newPartitionDir = src.toURI.toString spark.range(2).selectExpr("cast(id as string)").toDF("a").write .mode("overwrite") .parquet(newPartitionDir)