From 1126a898598ad1d49b9a1e23a781a0127e6fe4c3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 8 Jun 2023 12:15:23 +0800 Subject: [PATCH 1/3] lz4raw --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 8d1e73cb86f52..47b8474953bb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -984,11 +984,12 @@ object SQLConf { "`parquet.compression` is specified in the table-specific options/properties, the " + "precedence would be `compression`, `parquet.compression`, " + "`spark.sql.parquet.compression.codec`. Acceptable values include: none, uncompressed, " + - "snappy, gzip, lzo, brotli, lz4, zstd.") + "snappy, gzip, lzo, brotli, lz4, lz4raw, zstd.") .version("1.1.1") .stringConf .transform(_.toLowerCase(Locale.ROOT)) - .checkValues(Set("none", "uncompressed", "snappy", "gzip", "lzo", "lz4", "brotli", "zstd")) + .checkValues( + Set("none", "uncompressed", "snappy", "gzip", "lzo", "brotli", "lz4", "lz4raw", "zstd")) .createWithDefault("snappy") val PARQUET_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.filterPushdown") From 1eb56a6d2b1fb373c08365136ad93aca348d1c5d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 8 Jun 2023 12:16:57 +0800 Subject: [PATCH 2/3] "lz4raw" -> CompressionCodecName.LZ4_RAW, --- .../sql/execution/datasources/parquet/ParquetOptions.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala index d20edbde00be5..023d2460959cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala @@ -94,8 +94,9 @@ object ParquetOptions extends DataSourceOptions { "snappy" -> CompressionCodecName.SNAPPY, "gzip" -> CompressionCodecName.GZIP, "lzo" -> CompressionCodecName.LZO, - "lz4" -> CompressionCodecName.LZ4, "brotli" -> CompressionCodecName.BROTLI, + "lz4" -> CompressionCodecName.LZ4, + "lz4raw" -> CompressionCodecName.LZ4_RAW, "zstd" -> CompressionCodecName.ZSTD) def getParquetCompressionCodecName(name: String): String = { From 4e78ff22d9cdcc8936a52ac98b594055123c6244 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 8 Jun 2023 13:37:52 +0800 Subject: [PATCH 3/3] Update FileSourceCodecSuite.scala --- .../spark/sql/execution/datasources/FileSourceCodecSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index 8d0bae515210f..09a348cd29451 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -59,7 +59,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite { // Exclude "brotli" because the com.github.rdblue:brotli-codec dependency is not available // on Maven Central. override protected def availableCodecs: Seq[String] = { - Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4") + Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4", "lz4raw") } }