Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo
tableCompressionCodecs: List[String])
(assertionCompressionCodec: (Option[String], String, String, Long) => Unit): Unit = {
withSQLConf(getConvertMetastoreConfName(format) -> convertMetastore.toString) {
tableCompressionCodecs.foreach { tableCompression =>
compressionCodecs.foreach { sessionCompressionCodec =>
tableCompressionCodecs.zipAll(compressionCodecs, null, "SNAPPY").foreach {
case (tableCompression, sessionCompressionCodec) =>
withSQLConf(getSparkCompressionConfName(format) -> sessionCompressionCodec) {
// 'tableCompression = null' means no table-level compression
val compression = Option(tableCompression)
Expand All @@ -240,7 +240,6 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo
compression, sessionCompressionCodec, realCompressionCodec, tableSize)
}
}
}
}
}
}
Expand All @@ -262,7 +261,10 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo
}
}

def checkForTableWithCompressProp(format: String, compressCodecs: List[String]): Unit = {
def checkForTableWithCompressProp(
format: String,
tableCompressCodecs: List[String],
sessionCompressCodecs: List[String]): Unit = {
Seq(true, false).foreach { isPartitioned =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's see, the two tests before were taking 2:20 and 0:47. Now they take about 0:43 each. That's clearly a win in the first case, not much in the second, as expected. I'm OK with this as an improvement, myself.

I wonder if we need all 8 combinations in this triply nested loop though. Incidentally it could be written more easily as such, but I know this was existing code:

for (isPartitioned <- Seq(true, false); convertMetastore <- Seq(true, false); usingCTAS <- Seq(true, false)) {

@fjh100456 what do you think? is it important to test all combinations, or could we get away with setting all true and all false without much of any loss here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This combination provides different test scenarios,they are not quite the same on writing data and table property passing, all of which have the potential to affect the test results. So I think it's necessary to keep.

Seq(true, false).foreach { convertMetastore =>
Seq(true, false).foreach { usingCTAS =>
Expand All @@ -271,10 +273,10 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo
isPartitioned,
convertMetastore,
usingCTAS,
compressionCodecs = compressCodecs,
tableCompressionCodecs = compressCodecs) {
compressionCodecs = sessionCompressCodecs,
tableCompressionCodecs = tableCompressCodecs) {
case (tableCodec, sessionCodec, realCodec, tableSize) =>
val expectCodec = tableCodec.get
val expectCodec = tableCodec.getOrElse(sessionCodec)
assert(expectCodec == realCodec)
assert(checkTableSize(
format, expectCodec, isPartitioned, convertMetastore, usingCTAS, tableSize))
Expand All @@ -284,36 +286,22 @@ class CompressionCodecSuite extends TestHiveSingleton with ParquetTest with Befo
}
}

def checkForTableWithoutCompressProp(format: String, compressCodecs: List[String]): Unit = {
Seq(true, false).foreach { isPartitioned =>
Seq(true, false).foreach { convertMetastore =>
Seq(true, false).foreach { usingCTAS =>
checkTableCompressionCodecForCodecs(
format,
isPartitioned,
convertMetastore,
usingCTAS,
compressionCodecs = compressCodecs,
tableCompressionCodecs = List(null)) {
case (tableCodec, sessionCodec, realCodec, tableSize) =>
// Always expect session-level take effect
assert(sessionCodec == realCodec)
assert(checkTableSize(
format, sessionCodec, isPartitioned, convertMetastore, usingCTAS, tableSize))
}
}
}
}
}

test("both table-level and session-level compression are set") {
checkForTableWithCompressProp("parquet", List("UNCOMPRESSED", "SNAPPY", "GZIP"))
checkForTableWithCompressProp("orc", List("NONE", "SNAPPY", "ZLIB"))
checkForTableWithCompressProp("parquet",
tableCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"),
sessionCompressCodecs = List("SNAPPY", "GZIP", "SNAPPY"))
checkForTableWithCompressProp("orc",
tableCompressCodecs = List("NONE", "SNAPPY", "ZLIB"),
sessionCompressCodecs = List("SNAPPY", "ZLIB", "SNAPPY"))
}

test("table-level compression is not set but session-level compressions is set ") {
checkForTableWithoutCompressProp("parquet", List("UNCOMPRESSED", "SNAPPY", "GZIP"))
checkForTableWithoutCompressProp("orc", List("NONE", "SNAPPY", "ZLIB"))
checkForTableWithCompressProp("parquet",
tableCompressCodecs = List.empty,
sessionCompressCodecs = List("UNCOMPRESSED", "SNAPPY", "GZIP"))
checkForTableWithCompressProp("orc",
tableCompressCodecs = List.empty,
sessionCompressCodecs = List("NONE", "SNAPPY", "ZLIB"))
}

def checkTableWriteWithCompressionCodecs(format: String, compressCodecs: List[String]): Unit = {
Expand Down