diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java index a8c8ecaf75c8..7fb120604d82 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkCompatibility.java @@ -62,6 +62,7 @@ import static io.trino.tests.product.iceberg.TestIcebergSparkCompatibility.CreateMode.CREATE_TABLE_AS_SELECT; import static io.trino.tests.product.iceberg.TestIcebergSparkCompatibility.CreateMode.CREATE_TABLE_WITH_NO_DATA_AND_INSERT; import static io.trino.tests.product.iceberg.TestIcebergSparkCompatibility.StorageFormat.AVRO; +import static io.trino.tests.product.iceberg.util.IcebergTestUtils.getTableLocation; import static io.trino.tests.product.utils.QueryExecutors.onSpark; import static io.trino.tests.product.utils.QueryExecutors.onTrino; import static java.lang.String.format; @@ -2087,6 +2088,40 @@ public void testHandlingPartitionSchemaEvolutionInPartitionMetadata() ImmutableMap.of("old_partition_key", "3", "new_partition_key", "null", "value_day", "null", "value_month", "null"))); } + @Test(groups = {ICEBERG, PROFILE_SPECIFIC_TESTS}) + public void testMetadataCompressionCodecGzip() + { + // Verify that Trino can read and write to a table created by Spark + String baseTableName = "test_metadata_compression_codec_gzip" + randomTableSuffix(); + String trinoTableName = trinoTableName(baseTableName); + String sparkTableName = sparkTableName(baseTableName); + + onSpark().executeQuery("CREATE TABLE " + sparkTableName + "(col int) USING iceberg TBLPROPERTIES ('write.metadata.compression-codec'='gzip')"); + onSpark().executeQuery("INSERT INTO " + sparkTableName + " VALUES (1)"); + onTrino().executeQuery("INSERT INTO " + trinoTableName + " VALUES (2)"); + + assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(row(1), row(2)); + + // Verify that all metadata file is compressed as Gzip + String tableLocation = getTableLocation(trinoTableName); + List metadataFiles = hdfsClient.listDirectory(tableLocation + "/metadata").stream() + .filter(file -> file.endsWith("metadata.json")) + .collect(toImmutableList()); + Assertions.assertThat(metadataFiles) + .isNotEmpty() + .filteredOn(file -> file.endsWith("gz.metadata.json")) + .isEqualTo(metadataFiles); + + // Change 'write.metadata.compression-codec' to none and insert and select the table in Trino + onSpark().executeQuery("ALTER TABLE " + sparkTableName + " SET TBLPROPERTIES ('write.metadata.compression-codec'='none')"); + assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(row(1), row(2)); + + onTrino().executeQuery("INSERT INTO " + trinoTableName + " VALUES (3)"); + assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(row(1), row(2), row(3)); + + onSpark().executeQuery("DROP TABLE " + sparkTableName); + } + private void validatePartitioning(String baseTableName, String sparkTableName, List> expectedValues) { List trinoResult = expectedValues.stream().map(m -> diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkDropTableCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkDropTableCompatibility.java index b7fd73a2f053..a11c5599ef49 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkDropTableCompatibility.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestIcebergSparkDropTableCompatibility.java @@ -26,16 +26,14 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; -import static com.google.common.base.Verify.verify; import static io.trino.tests.product.TestGroups.ICEBERG; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; import static io.trino.tests.product.hive.Engine.SPARK; import static io.trino.tests.product.hive.Engine.TRINO; import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix; +import static io.trino.tests.product.iceberg.util.IcebergTestUtils.getTableLocation; import static io.trino.tests.product.utils.QueryExecutors.onSpark; import static io.trino.tests.product.utils.QueryExecutors.onTrino; import static java.lang.String.format; @@ -86,18 +84,6 @@ public void testCleanupOnDropTable(Engine tableCreatorEngine, Engine tableDroppe dataFilePaths.forEach(dataFilePath -> assertFileExistence(dataFilePath, expectExists, format("The data file %s removed after dropping the table", dataFilePath))); } - private String getTableLocation(String tableName) - { - Pattern locationPattern = Pattern.compile(".*location = 'hdfs://hadoop-master:9000(.*?)'.*", Pattern.DOTALL); - Matcher m = locationPattern.matcher((String) onTrino().executeQuery("SHOW CREATE TABLE " + tableName).row(0).get(0)); - if (m.find()) { - String location = m.group(1); - verify(!m.find(), "Unexpected second match"); - return location; - } - throw new IllegalStateException("Location not found in SHOW CREATE TABLE result"); - } - private void assertFileExistence(String path, boolean exists, String description) { Assertions.assertThat(hdfsClient.exist(path)).as(description).isEqualTo(exists); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/util/IcebergTestUtils.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/util/IcebergTestUtils.java new file mode 100644 index 000000000000..f6716a5aa4b1 --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/util/IcebergTestUtils.java @@ -0,0 +1,37 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.iceberg.util; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.common.base.Verify.verify; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; + +public final class IcebergTestUtils +{ + private IcebergTestUtils() {} + + public static String getTableLocation(String tableName) + { + Pattern locationPattern = Pattern.compile(".*location = 'hdfs://hadoop-master:9000(.*?)'.*", Pattern.DOTALL); + Matcher m = locationPattern.matcher((String) onTrino().executeQuery("SHOW CREATE TABLE " + tableName).row(0).get(0)); + if (m.find()) { + String location = m.group(1); + verify(!m.find(), "Unexpected second match"); + return location; + } + throw new IllegalStateException("Location not found in SHOW CREATE TABLE result"); + } +}