From 2d7ffa65be68a34a1c80daea207b26ffbb008afd Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Fri, 5 Oct 2018 02:57:46 -0700 Subject: [PATCH 1/5] [SPARK-25611][SPARK-25622] Improve test run time of CompressionCodecSuite --- .../org/apache/spark/sql/hive/CompressionCodecSuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index 1bd7e52c88ec..ca8f83bee33e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.util.Locale import scala.collection.JavaConverters._ +import scala.util.Random import org.apache.hadoop.fs.Path import org.apache.orc.OrcConf.COMPRESS @@ -229,8 +230,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo tableCompressionCodecs: List[String]) (assertionCompressionCodec: (Option[String], String, String, Long) => Unit): Unit = { withSQLConf(getConvertMetastoreConfName(format) -> convertMetastore.toString) { - tableCompressionCodecs.foreach { tableCompression => - compressionCodecs.foreach { sessionCompressionCodec => + Random.shuffle(tableCompressionCodecs).take(1).foreach { tableCompression => + Random.shuffle(compressionCodecs).take(1).foreach { sessionCompressionCodec => withSQLConf(getSparkCompressionConfName(format) -> sessionCompressionCodec) { // 'tableCompression = null' means no table-level compression val compression = Option(tableCompression) From 01f1f97114892174cf52996c297e14ae6800628b Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Sat, 6 Oct 2018 14:58:23 -0700 Subject: [PATCH 2/5] Remove redundant loops --- .../org/apache/spark/sql/hive/CompressionCodecSuite.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index ca8f83bee33e..d96fcf74b497 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -21,7 +21,6 @@ import java.io.File import java.util.Locale import scala.collection.JavaConverters._ -import scala.util.Random import org.apache.hadoop.fs.Path import org.apache.orc.OrcConf.COMPRESS @@ -230,8 +229,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo tableCompressionCodecs: List[String]) (assertionCompressionCodec: (Option[String], String, String, Long) => Unit): Unit = { withSQLConf(getConvertMetastoreConfName(format) -> convertMetastore.toString) { - Random.shuffle(tableCompressionCodecs).take(1).foreach { tableCompression => - Random.shuffle(compressionCodecs).take(1).foreach { sessionCompressionCodec => + tableCompressionCodecs.foreach { tableCompression => + compressionCodecs.filterNot(_ == tableCompression).foreach { sessionCompressionCodec => withSQLConf(getSparkCompressionConfName(format) -> sessionCompressionCodec) { // 'tableCompression = null' means no table-level compression val compression = Option(tableCompression) From 361f5f756af55e8c874f6a1661947824dbf75d1a Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Sun, 7 Oct 2018 03:02:28 -0700 Subject: [PATCH 3/5] Improve time further - reduce codec combinations to 3 --- .../sql/hive/CompressionCodecSuite.scala | 54 ++++++++----------- 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index d96fcf74b497..63a701bb20f5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -229,8 +229,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo tableCompressionCodecs: List[String]) (assertionCompressionCodec: (Option[String], String, String, Long) => Unit): Unit = { withSQLConf(getConvertMetastoreConfName(format) -> convertMetastore.toString) { - tableCompressionCodecs.foreach { tableCompression => - compressionCodecs.filterNot(_ == tableCompression).foreach { sessionCompressionCodec => + tableCompressionCodecs.zipAll(compressionCodecs, null, "SNAPPY").foreach { + case (tableCompression, sessionCompressionCodec) => withSQLConf(getSparkCompressionConfName(format) -> sessionCompressionCodec) { // 'tableCompression = null' means no table-level compression val compression = Option(tableCompression) @@ -240,7 +240,6 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo compression, sessionCompressionCodec, realCompressionCodec, tableSize) } } - } } } } @@ -262,7 +261,10 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo } } - def checkForTableWithCompressProp(format: String, compressCodecs: List[String]): Unit = { + def checkForTableWithCompressProp( + format: String, + tableCompressCodecs: List[String], + sessionCompressCodecs: List[String]): Unit = { Seq(true, false).foreach { isPartitioned => Seq(true, false).foreach { convertMetastore => Seq(true, false).foreach { usingCTAS => @@ -271,10 +273,10 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo isPartitioned, convertMetastore, usingCTAS, - compressionCodecs = compressCodecs, - tableCompressionCodecs = compressCodecs) { + compressionCodecs = sessionCompressCodecs, + tableCompressionCodecs = tableCompressCodecs) { case (tableCodec, sessionCodec, realCodec, tableSize) => - val expectCodec = tableCodec.get + val expectCodec = if (tableCodec.isDefined) tableCodec.get else sessionCodec assert(expectCodec == realCodec) assert(checkTableSize( format, expectCodec, isPartitioned, convertMetastore, usingCTAS, tableSize)) @@ -284,36 +286,22 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo } } - def checkForTableWithoutCompressProp(format: String, compressCodecs: List[String]): Unit = { - Seq(true, false).foreach { isPartitioned => - Seq(true, false).foreach { convertMetastore => - Seq(true, false).foreach { usingCTAS => - checkTableCompressionCodecForCodecs( - format, - isPartitioned, - convertMetastore, - usingCTAS, - compressionCodecs = compressCodecs, - tableCompressionCodecs = List(null)) { - case (tableCodec, sessionCodec, realCodec, tableSize) => - // Always expect session-level take effect - assert(sessionCodec == realCodec) - assert(checkTableSize( - format, sessionCodec, isPartitioned, convertMetastore, usingCTAS, tableSize)) - } - } - } - } - } - test("both table-level and session-level compression are set") { - checkForTableWithCompressProp("parquet", List("UNCOMPRESSED", "SNAPPY", "GZIP")) - checkForTableWithCompressProp("orc", List("NONE", "SNAPPY", "ZLIB")) + checkForTableWithCompressProp("parquet", + tableCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"), + sessionCompressCodecs = List("SNAPPY", "GZIP", "SNAPPY")) + checkForTableWithCompressProp("orc", + tableCompressCodecs = List("NONE", "SNAPPY", "ZLIB"), + sessionCompressCodecs = List("SNAPPY", "ZLIB", "SNAPPY")) } test("table-level compression is not set but session-level compressions is set ") { - checkForTableWithoutCompressProp("parquet", List("UNCOMPRESSED", "SNAPPY", "GZIP")) - checkForTableWithoutCompressProp("orc", List("NONE", "SNAPPY", "ZLIB")) + checkForTableWithCompressProp("parquet", + tableCompressCodecs = List.empty, + sessionCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP")) + checkForTableWithCompressProp("orc", + tableCompressCodecs = List.empty, + sessionCompressCodecs = List("NONE", "SNAPPY", "ZLIB")) } def checkTableWriteWithCompressionCodecs(format: String, compressCodecs: List[String]): Unit = { From 3dbc5dceb9c2e3f727f7044de6463a809a8a9518 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Sun, 7 Oct 2018 10:00:18 -0700 Subject: [PATCH 4/5] fix1 --- .../scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index 63a701bb20f5..60e01b575101 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -276,7 +276,7 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo compressionCodecs = sessionCompressCodecs, tableCompressionCodecs = tableCompressCodecs) { case (tableCodec, sessionCodec, realCodec, tableSize) => - val expectCodec = if (tableCodec.isDefined) tableCodec.get else sessionCodec + val expectCodec = tableCodec.getOrElse(sessionCodec) assert(expectCodec == realCodec) assert(checkTableSize( format, expectCodec, isPartitioned, convertMetastore, usingCTAS, tableSize)) From fdab980be523a81c24a01bd8688ae272a8b3d87a Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 8 Oct 2018 08:22:06 -0700 Subject: [PATCH 5/5] Code review --- .../org/apache/spark/sql/hive/CompressionCodecSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala index 60e01b575101..398f4d2efbbf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CompressionCodecSuite.scala @@ -262,9 +262,9 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo } def checkForTableWithCompressProp( - format: String, - tableCompressCodecs: List[String], - sessionCompressCodecs: List[String]): Unit = { + format: String, + tableCompressCodecs: List[String], + sessionCompressCodecs: List[String]): Unit = { Seq(true, false).foreach { isPartitioned => Seq(true, false).foreach { convertMetastore => Seq(true, false).foreach { usingCTAS =>