diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1c7beee5cc2..6c32bdb7bc38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -719,7 +719,6 @@ jobs: (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.AWS_ACCESS_KEY_ID != '' || env.AWS_SECRET_ACCESS_KEY != '' || env.GCP_CREDENTIALS_KEY != '') run: | $MAVEN test ${MAVEN_TEST} -pl :trino-iceberg ${{ format('-P {0}', matrix.profile) }} \ - -Ds3.bucket=${S3_BUCKET} \ -Dtesting.gcp-storage-bucket="${GCP_STORAGE_BUCKET}" \ -Dtesting.gcp-credentials-key="${GCP_CREDENTIALS_KEY}" \ -Dhive.hadoop2.azure-abfs-container="${ABFS_CONTAINER}" \ diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 691c2ae0426c..2b13a018dc94 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -168,7 +168,6 @@ import static com.google.common.collect.Sets.difference; import static com.google.common.primitives.Ints.max; import static io.trino.filesystem.Locations.appendPath; -import static io.trino.filesystem.Locations.getParent; import static io.trino.plugin.deltalake.DataFileInfo.DataFileType.DATA; import static io.trino.plugin.deltalake.DeltaLakeAnalyzeProperties.AnalyzeMode.FULL_REFRESH; import static io.trino.plugin.deltalake.DeltaLakeAnalyzeProperties.AnalyzeMode.INCREMENTAL; @@ -1903,7 +1902,7 @@ private void checkWriteAllowed(ConnectorSession session, DeltaLakeTableHandle ta private boolean allowWrite(ConnectorSession session, DeltaLakeTableHandle tableHandle) { try { - String tableMetadataDirectory = appendPath(getParent(tableHandle.getLocation()), tableHandle.getTableName()); + String tableMetadataDirectory = getTransactionLogDir(tableHandle.getLocation()); boolean requiresOptIn = transactionLogWriterFactory.newWriter(session, tableMetadataDirectory).isUnsafe(); return !requiresOptIn || unsafeWritesEnabled; } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaS3AndGlueMetastoreTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaS3AndGlueMetastoreTest.java index 533b87b749c2..cc12d940b2ee 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaS3AndGlueMetastoreTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/glue/TestDeltaS3AndGlueMetastoreTest.java @@ -14,7 +14,6 @@ package io.trino.plugin.deltalake.metastore.glue; import com.google.common.collect.ImmutableMap; -import io.trino.hdfs.TrinoFileSystemCache; import io.trino.plugin.deltalake.DeltaLakeQueryRunner; import io.trino.plugin.hive.BaseS3AndGlueMetastoreTest; import io.trino.testing.DistributedQueryRunner; @@ -42,8 +41,6 @@ public TestDeltaS3AndGlueMetastoreTest() protected QueryRunner createQueryRunner() throws Exception { - closeAfterClass(TrinoFileSystemCache.INSTANCE::closeAll); - metastore = createTestingGlueHiveMetastore(Path.of(schemaPath())); DistributedQueryRunner queryRunner = DeltaLakeQueryRunner.builder() .setCatalogName(DELTA_CATALOG) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java index 130852a5368e..49d6456d1a47 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseS3AndGlueMetastoreTest.java @@ -17,11 +17,11 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.google.common.collect.ImmutableList; import io.trino.Session; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.spi.connector.SchemaNotFoundException; import io.trino.testing.AbstractTestQueryFramework; +import org.intellij.lang.annotations.Language; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; @@ -32,9 +32,11 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; import static com.google.common.base.Verify.verify; import static com.google.common.collect.Sets.union; +import static io.trino.plugin.hive.BaseS3AndGlueMetastoreTest.LocationPattern.TWO_TRAILING_SLASHES; import static io.trino.plugin.hive.S3Assert.s3Path; import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.DataProviders.toDataProvider; @@ -43,6 +45,7 @@ import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public abstract class BaseS3AndGlueMetastoreTest extends AbstractTestQueryFramework @@ -85,154 +88,159 @@ public void tearDown() @DataProvider public Object[][] locationPatternsDataProvider() { - return cartesianProduct(trueFalse(), locationPatterns().stream().collect(toDataProvider())); - } - - protected List locationPatterns() - { - return ImmutableList.builder() - .add("s3://%s/%s/regular/%s") - .add("s3://%s/%s/trailing_slash/%s/") - .add("s3://%s/%s//double_slash/%s") - .add("s3://%s/%s/a%%percent/%s") - .add("s3://%s/%s/a whitespace/%s") - .add("s3://%s/%s/trailing_whitespace/%s ") - .build(); + return cartesianProduct(trueFalse(), Stream.of(LocationPattern.values()).collect(toDataProvider())); } @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_basic_operations_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? "," + partitionByKeyword + " = ARRAY['col_str']" : ""); + String actualTableLocation; assertUpdate("CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (location = '" + location + "'" + partitionQueryPart + ") " + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); - validateTableLocation(tableName, location); - - assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - - assertUpdate("UPDATE " + tableName + " SET col_str = 'other' WHERE col_int = 2", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); - - assertUpdate("DELETE FROM " + tableName + " WHERE col_int = 3", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str4', 4)"); - - assertThat(getTableFiles(location)).isNotEmpty(); - validateDataFiles(partitioned ? "col_str" : "", tableName, location); - validateMetadataFiles(location); - - assertUpdate("DROP TABLE " + tableName); - validateFilesAfterDrop(location); + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); + actualTableLocation = validateTableLocation(tableName, location); + + assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); + + if (locationPattern == TWO_TRAILING_SLASHES && !partitioned && getClass().getName().contains(".deltalake.")) { + // TODO (https://github.com/trinodb/trino/issues/17966): updates fail when Delta table is declared with location ending with two slashes + assertThatThrownBy(() -> query("UPDATE " + tableName + " SET col_str = 'other' WHERE col_int = 2")) + .hasMessageMatching("path \\[(s3://.*)/([-a-zA-Z0-9_]+)] must be a subdirectory of basePath \\[(\\1)//]"); + return; + } + assertUpdate("UPDATE " + tableName + " SET col_str = 'other' WHERE col_int = 2", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); + + assertUpdate("DELETE FROM " + tableName + " WHERE col_int = 3", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str4', 4)"); + + assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); + validateDataFiles(partitioned ? "col_str" : "", tableName, actualTableLocation); + validateMetadataFiles(actualTableLocation); + } + validateFilesAfterDrop(actualTableLocation); } @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, String locationPattern) + public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) { String schemaName = "test_basic_operations_schema_" + randomNameSuffix(); - String schemaLocation = locationPattern.formatted(bucketName, schemaName, schemaName); + String schemaLocation = locationPattern.locationForSchema(bucketName, schemaName); String tableName = "test_basic_operations_table_" + randomNameSuffix(); String qualifiedTableName = schemaName + "." + tableName; String partitionQueryPart = (partitioned ? "WITH (" + partitionByKeyword + " = ARRAY['col_str'])" : ""); + String actualTableLocation; assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); - - assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_int int, col_str varchar)" + partitionQueryPart); - // in case of regular CREATE TABLE, location has generated suffix - String expectedTableLocationPattern = (schemaLocation.endsWith("/") ? schemaLocation : schemaLocation + "/") + tableName + "-[a-z0-9]+"; - String actualTableLocation = getTableLocation(qualifiedTableName); - assertThat(actualTableLocation).matches(expectedTableLocationPattern); - - assertUpdate("INSERT INTO " + qualifiedTableName + " (col_str, col_int) VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); - assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); - - assertUpdate("UPDATE " + qualifiedTableName + " SET col_str = 'other' WHERE col_int = 2", 1); - assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3)"); - - assertUpdate("DELETE FROM " + qualifiedTableName + " WHERE col_int = 3", 1); - assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('other', 2)"); - - assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); - validateDataFiles(partitioned ? "col_str" : "", qualifiedTableName, actualTableLocation); - validateMetadataFiles(actualTableLocation); - - assertUpdate("DROP TABLE " + qualifiedTableName); - assertThat(getTableFiles(actualTableLocation)).isEmpty(); - - assertUpdate("DROP SCHEMA " + schemaName); + try (UncheckedCloseable ignoredDropSchema = onClose("DROP SCHEMA " + schemaName)) { + assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); + + assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_int int, col_str varchar)" + partitionQueryPart); + try (UncheckedCloseable ignoredDropTable = onClose("DROP TABLE " + qualifiedTableName)) { + // in case of regular CREATE TABLE, location has generated suffix + String expectedTableLocationPattern = (schemaLocation.endsWith("/") ? schemaLocation : schemaLocation + "/") + tableName + "-[a-z0-9]+"; + actualTableLocation = getTableLocation(qualifiedTableName); + assertThat(actualTableLocation).matches(expectedTableLocationPattern); + + assertUpdate("INSERT INTO " + qualifiedTableName + " (col_str, col_int) VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); + assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); + + assertUpdate("UPDATE " + qualifiedTableName + " SET col_str = 'other' WHERE col_int = 2", 1); + assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3)"); + + assertUpdate("DELETE FROM " + qualifiedTableName + " WHERE col_int = 3", 1); + assertQuery("SELECT col_str, col_int FROM " + qualifiedTableName, "VALUES ('str1', 1), ('other', 2)"); + + assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); + validateDataFiles(partitioned ? "col_str" : "", qualifiedTableName, actualTableLocation); + validateMetadataFiles(actualTableLocation); + } + assertThat(getTableFiles(actualTableLocation)).isEmpty(); + } assertThat(getTableFiles(actualTableLocation)).isEmpty(); } @Test(dataProvider = "locationPatternsDataProvider") - public void testMergeWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testMergeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_merge_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? "," + partitionByKeyword + " = ARRAY['col_str']" : ""); + String actualTableLocation; assertUpdate("CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (location = '" + location + "'" + partitionQueryPart + ") " + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); - - assertUpdate("MERGE INTO " + tableName + " USING (VALUES 1) t(x) ON false" + - " WHEN NOT MATCHED THEN INSERT VALUES ('str4', 4)", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - - assertUpdate("MERGE INTO " + tableName + " USING (VALUES 2) t(x) ON col_int = x" + - " WHEN MATCHED THEN UPDATE SET col_str = 'other'", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); - - assertUpdate("MERGE INTO " + tableName + " USING (VALUES 3) t(x) ON col_int = x" + - " WHEN MATCHED THEN DELETE", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str4', 4)"); - - assertThat(getTableFiles(location)).isNotEmpty(); - validateDataFiles(partitioned ? "col_str" : "", tableName, location); - validateMetadataFiles(location); - - assertUpdate("DROP TABLE " + tableName); - validateFilesAfterDrop(location); + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { + actualTableLocation = validateTableLocation(tableName, location); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); + + assertUpdate("MERGE INTO " + tableName + " USING (VALUES 1) t(x) ON false" + + " WHEN NOT MATCHED THEN INSERT VALUES ('str4', 4)", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); + + if (locationPattern == TWO_TRAILING_SLASHES && !partitioned && getClass().getName().contains(".deltalake.")) { + // TODO (https://github.com/trinodb/trino/issues/17966): merge fails when Delta table is declared with location ending with two slashes + assertThatThrownBy(() -> query("MERGE INTO " + tableName + " USING (VALUES 2) t(x) ON col_int = x" + + " WHEN MATCHED THEN UPDATE SET col_str = 'other'")) + .hasMessageMatching("path \\[(s3://.*)/([-a-zA-Z0-9_]+)] must be a subdirectory of basePath \\[(\\1)//]"); + return; + } + assertUpdate("MERGE INTO " + tableName + " USING (VALUES 2) t(x) ON col_int = x" + + " WHEN MATCHED THEN UPDATE SET col_str = 'other'", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str3', 3), ('str4', 4)"); + + assertUpdate("MERGE INTO " + tableName + " USING (VALUES 3) t(x) ON col_int = x" + + " WHEN MATCHED THEN DELETE", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('other', 2), ('str4', 4)"); + + assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); + validateDataFiles(partitioned ? "col_str" : "", tableName, actualTableLocation); + validateMetadataFiles(actualTableLocation); + } + validateFilesAfterDrop(actualTableLocation); } @Test(dataProvider = "locationPatternsDataProvider") - public void testOptimizeWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testOptimizeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_optimize_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? "," + partitionByKeyword + " = ARRAY['value']" : ""); String locationQueryPart = locationKeyword + "= '" + location + "'"; assertUpdate("CREATE TABLE " + tableName + " (key integer, value varchar) " + "WITH (" + locationQueryPart + partitionQueryPart + ")"); - try { + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { // create multiple data files, INSERT with multiple values would create only one file (if not partitioned) assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'one')", 1); assertUpdate("INSERT INTO " + tableName + " VALUES (2, 'a//double_slash')", 1); assertUpdate("INSERT INTO " + tableName + " VALUES (3, 'a%percent')", 1); assertUpdate("INSERT INTO " + tableName + " VALUES (4, 'a//double_slash')", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (5, 'a///triple_slash')", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (6, 'trailing_slash/')", 1); + assertUpdate("INSERT INTO " + tableName + " VALUES (7, 'two_trailing_slashes//')", 1); assertUpdate("INSERT INTO " + tableName + " VALUES (11, 'one')", 1); Set initialFiles = getActiveFiles(tableName); - assertThat(initialFiles).hasSize(5); + assertThat(initialFiles).hasSize(8); Session session = sessionForOptimize(); computeActual(session, "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); assertThat(query("SELECT sum(key), listagg(value, ' ') WITHIN GROUP (ORDER BY value) FROM " + tableName)) - .matches("VALUES (BIGINT '21', VARCHAR 'a%percent a//double_slash a//double_slash one one')"); + .matches("VALUES (BIGINT '39', VARCHAR 'a%percent a///triple_slash a//double_slash a//double_slash one one trailing_slash/ two_trailing_slashes//')"); Set updatedFiles = getActiveFiles(tableName); validateFilesAfterOptimize(getTableLocation(tableName), initialFiles, updatedFiles); } - finally { - assertUpdate("DROP TABLE " + tableName); - } } protected Session sessionForOptimize() @@ -250,9 +258,11 @@ protected void validateFilesAfterOptimize(String location, Set initialFi protected abstract void validateMetadataFiles(String location); - protected void validateTableLocation(String tableName, String location) + protected String validateTableLocation(String tableName, String expectedLocation) { - assertThat(getTableLocation(tableName)).isEqualTo(location); + String actualTableLocation = getTableLocation(tableName); + assertThat(actualTableLocation).isEqualTo(expectedLocation); + return actualTableLocation; } protected void validateFilesAfterDrop(String location) @@ -304,6 +314,12 @@ protected List getTableFiles(String location) .toList(); } + protected UncheckedCloseable onClose(@Language("SQL") String sql) + { + requireNonNull(sql, "sql is null"); + return () -> assertUpdate(sql); + } + protected String schemaPath() { return "s3://%s/%s".formatted(bucketName, schemaName); @@ -313,4 +329,41 @@ protected void verifyPathExist(String path) { assertThat(s3Path(s3, path)).exists(); } + + protected enum LocationPattern + { + REGULAR("s3://%s/%s/regular/%s"), + TRAILING_SLASH("s3://%s/%s/trailing_slash/%s/"), + TWO_TRAILING_SLASHES("s3://%s/%s/two_trailing_slashes/%s//"), + DOUBLE_SLASH("s3://%s/%s//double_slash/%s"), + TRIPLE_SLASH("s3://%s/%s///triple_slash/%s"), + PERCENT("s3://%s/%s/a%%percent/%s"), + WHITESPACE("s3://%s/%s/a whitespace/%s"), + TRAILING_WHITESPACE("s3://%s/%s/trailing_whitespace/%s "), + /**/; + + private final String locationPattern; + + LocationPattern(String locationPattern) + { + this.locationPattern = requireNonNull(locationPattern, "locationPattern is null"); + } + + public String locationForSchema(String bucketName, String schemaName) + { + return locationPattern.formatted(bucketName, "warehouse", schemaName); + } + + public String locationForTable(String bucketName, String schemaName, String tableName) + { + return locationPattern.formatted(bucketName, schemaName, tableName); + } + } + + protected interface UncheckedCloseable + extends AutoCloseable + { + @Override + void close(); + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java index 4df2b89396ac..5f22419b9204 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveS3AndGlueMetastoreTest.java @@ -15,7 +15,6 @@ import com.google.common.collect.ImmutableMap; import io.trino.Session; -import io.trino.hdfs.TrinoFileSystemCache; import io.trino.spi.security.Identity; import io.trino.spi.security.SelectedRole; import io.trino.testing.DistributedQueryRunner; @@ -27,6 +26,9 @@ import java.util.Optional; import java.util.Set; +import static io.trino.plugin.hive.BaseS3AndGlueMetastoreTest.LocationPattern.DOUBLE_SLASH; +import static io.trino.plugin.hive.BaseS3AndGlueMetastoreTest.LocationPattern.TRIPLE_SLASH; +import static io.trino.plugin.hive.BaseS3AndGlueMetastoreTest.LocationPattern.TWO_TRAILING_SLASHES; import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.createTestingGlueHiveMetastore; import static io.trino.spi.security.SelectedRole.Type.ROLE; import static io.trino.testing.TestingNames.randomNameSuffix; @@ -47,8 +49,6 @@ public TestHiveS3AndGlueMetastoreTest() protected QueryRunner createQueryRunner() throws Exception { - closeAfterClass(TrinoFileSystemCache.INSTANCE::closeAll); - metastore = createTestingGlueHiveMetastore(Path.of(schemaPath())); Session session = createSession(Optional.of(new SelectedRole(ROLE, Optional.of("admin")))); @@ -115,79 +115,80 @@ protected void validateFilesAfterOptimize(String location, Set initialFi @Override // Row-level modifications are not supported for Hive tables @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testBasicOperationsWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_basic_operations_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? ",partitioned_by = ARRAY['col_int']" : ""); String create = "CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (external_location = '" + location + "'" + partitionQueryPart + ") " + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)"; - if (locationPattern.contains("double_slash")) { + if (locationPattern == DOUBLE_SLASH || locationPattern == TRIPLE_SLASH || locationPattern == TWO_TRAILING_SLASHES) { assertQueryFails(create, "\\QUnsupported location that cannot be internally represented: " + location); return; } assertUpdate(create, 3); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); - - String actualTableLocation = getTableLocation(tableName); - assertThat(actualTableLocation).isEqualTo(location); + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3)"); - assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); + String actualTableLocation = getTableLocation(tableName); + assertThat(actualTableLocation).isEqualTo(location); - assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); - validateDataFiles(partitioned ? "col_int" : "", tableName, actualTableLocation); + assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - assertUpdate("DROP TABLE " + tableName); + assertThat(getTableFiles(actualTableLocation)).isNotEmpty(); + validateDataFiles(partitioned ? "col_int" : "", tableName, actualTableLocation); + } } @Override // Row-level modifications are not supported for Hive tables @Test(dataProvider = "locationPatternsDataProvider") - public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, String locationPattern) + public void testBasicOperationsWithProvidedSchemaLocation(boolean partitioned, LocationPattern locationPattern) { String schemaName = "test_basic_operations_schema_" + randomNameSuffix(); - String schemaLocation = locationPattern.formatted(bucketName, schemaName, schemaName); + String schemaLocation = locationPattern.locationForSchema(bucketName, schemaName); String tableName = "test_basic_operations_table_" + randomNameSuffix(); String qualifiedTableName = schemaName + "." + tableName; String partitionQueryPart = (partitioned ? " WITH (partitioned_by = ARRAY['col_int'])" : ""); + String actualTableLocation; assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); - - assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int int)" + partitionQueryPart); - String expectedTableLocation = ((schemaLocation.endsWith("/") ? schemaLocation : schemaLocation + "/") + tableName) - // Hive normalizes double slash - .replaceAll("(? super.testOptimizeWithProvidedTableLocation(partitioned, locationPattern)) .hasMessageStartingWith("Unsupported location that cannot be internally represented: ") .hasStackTraceContaining("SQL: CREATE TABLE test_optimize_"); @@ -197,59 +198,58 @@ public void testOptimizeWithProvidedTableLocation(boolean partitioned, String lo } @Test(dataProvider = "locationPatternsDataProvider") - public void testAnalyzeWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_analyze_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? ",partitioned_by = ARRAY['col_int']" : ""); String create = "CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (external_location = '" + location + "'" + partitionQueryPart + ") " + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)"; - if (locationPattern.contains("double_slash")) { + if (locationPattern == DOUBLE_SLASH || locationPattern == TRIPLE_SLASH || locationPattern == TWO_TRAILING_SLASHES) { assertQueryFails(create, "\\QUnsupported location that cannot be internally represented: " + location); return; } assertUpdate(create, 3); - - assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - - //Check statistics collection on write - if (partitioned) { - assertQuery("SHOW STATS FOR " + tableName, """ - VALUES - ('col_str', 0.0, 1.0, 0.0, null, null, null), - ('col_int', null, 4.0, 0.0, null, 1, 4), - (null, null, null, null, 4.0, null, null)"""); - } - else { - assertQuery("SHOW STATS FOR " + tableName, """ - VALUES - ('col_str', 16.0, 3.0, 0.0, null, null, null), - ('col_int', null, 3.0, 0.0, null, 1, 4), - (null, null, null, null, 4.0, null, null)"""); + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { + assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); + + // Check statistics collection on write + if (partitioned) { + assertQuery("SHOW STATS FOR " + tableName, """ + VALUES + ('col_str', 0.0, 1.0, 0.0, null, null, null), + ('col_int', null, 4.0, 0.0, null, 1, 4), + (null, null, null, null, 4.0, null, null)"""); + } + else { + assertQuery("SHOW STATS FOR " + tableName, """ + VALUES + ('col_str', 16.0, 3.0, 0.0, null, null, null), + ('col_int', null, 3.0, 0.0, null, 1, 4), + (null, null, null, null, 4.0, null, null)"""); + } + + // Check statistics collection explicitly + assertUpdate("ANALYZE " + tableName, 4); + + if (partitioned) { + assertQuery("SHOW STATS FOR " + tableName, """ + VALUES + ('col_str', 16.0, 1.0, 0.0, null, null, null), + ('col_int', null, 4.0, 0.0, null, 1, 4), + (null, null, null, null, 4.0, null, null)"""); + } + else { + assertQuery("SHOW STATS FOR " + tableName, """ + VALUES + ('col_str', 16.0, 4.0, 0.0, null, null, null), + ('col_int', null, 4.0, 0.0, null, 1, 4), + (null, null, null, null, 4.0, null, null)"""); + } } - - //Check statistics collection explicitly - assertUpdate("ANALYZE " + tableName, 4); - - if (partitioned) { - assertQuery("SHOW STATS FOR " + tableName, """ - VALUES - ('col_str', 16.0, 1.0, 0.0, null, null, null), - ('col_int', null, 4.0, 0.0, null, 1, 4), - (null, null, null, null, 4.0, null, null)"""); - } - else { - assertQuery("SHOW STATS FOR " + tableName, """ - VALUES - ('col_str', 16.0, 4.0, 0.0, null, null, null), - ('col_int', null, 4.0, 0.0, null, 1, 4), - (null, null, null, null, 4.0, null, null)"""); - } - - assertUpdate("DROP TABLE " + tableName); } @Test @@ -285,15 +285,15 @@ public void testCreateSchemaWithIncorrectLocation() String qualifiedTableName = schemaName + "." + tableName; assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); + try (UncheckedCloseable ignored = onClose("DROP SCHEMA " + schemaName)) { + assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); - assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str, col_int) AS VALUES ('str1', 1)")) - .hasMessageContaining("Fragment is not allowed in a file system location"); + assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str, col_int) AS VALUES ('str1', 1)")) + .hasMessageContaining("Fragment is not allowed in a file system location"); - assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int integer)")) - .hasMessageContaining("Fragment is not allowed in a file system location"); - - assertUpdate("DROP SCHEMA " + schemaName); + assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int integer)")) + .hasMessageContaining("Fragment is not allowed in a file system location"); + } } @Test @@ -304,8 +304,8 @@ public void testSchemaNameEscape() String tableName = "test_table_schema_escaped_" + randomNameSuffix(); assertUpdate("CREATE SCHEMA \"%2$s\" WITH (location = 's3://%1$s/%2$s')".formatted(bucketName, schemaName)); - assertQueryFails("CREATE TABLE \"" + schemaName + "\"." + tableName + " (col) AS VALUES 1", "Failed checking path: .*"); - - assertUpdate("DROP SCHEMA \"" + schemaName + "\""); + try (UncheckedCloseable ignored = onClose("DROP SCHEMA \"" + schemaName + "\"")) { + assertQueryFails("CREATE TABLE \"" + schemaName + "\"." + tableName + " (col) AS VALUES 1", "Failed checking path: .*"); + } } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java index 76daa56560ff..247be6409f18 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java @@ -43,7 +43,6 @@ import io.trino.testing.QueryRunner; import org.apache.iceberg.FileFormat; import org.testng.annotations.AfterClass; -import org.testng.annotations.Parameters; import org.testng.annotations.Test; import java.util.List; @@ -72,11 +71,10 @@ public class TestIcebergGlueCatalogConnectorSmokeTest private final AWSGlueAsync glueClient; private final TrinoFileSystemFactory fileSystemFactory; - @Parameters("s3.bucket") - public TestIcebergGlueCatalogConnectorSmokeTest(String bucketName) + public TestIcebergGlueCatalogConnectorSmokeTest() { super(FileFormat.PARQUET); - this.bucketName = requireNonNull(bucketName, "bucketName is null"); + this.bucketName = requireNonNull(System.getenv("S3_BUCKET"), "Environment S3_BUCKET was not set"); this.schemaName = "test_iceberg_smoke_" + randomNameSuffix(); glueClient = AWSGlueAsyncClientBuilder.defaultClient(); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java index aae36ebc3095..df087ed452c0 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergS3AndGlueMetastoreTest.java @@ -14,12 +14,10 @@ package io.trino.plugin.iceberg.catalog.glue; import com.google.common.collect.ImmutableMap; -import io.trino.hdfs.TrinoFileSystemCache; import io.trino.plugin.hive.BaseS3AndGlueMetastoreTest; import io.trino.plugin.iceberg.IcebergQueryRunner; import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; -import org.testng.annotations.Parameters; import org.testng.annotations.Test; import java.nio.file.Path; @@ -28,24 +26,22 @@ import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.createTestingGlueHiveMetastore; import static io.trino.testing.TestingNames.randomNameSuffix; +import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestIcebergS3AndGlueMetastoreTest extends BaseS3AndGlueMetastoreTest { - @Parameters("s3.bucket") - public TestIcebergS3AndGlueMetastoreTest(String bucketName) + public TestIcebergS3AndGlueMetastoreTest() { - super("partitioning", "location", bucketName); + super("partitioning", "location", requireNonNull(System.getenv("S3_BUCKET"), "Environment S3_BUCKET was not set")); } @Override protected QueryRunner createQueryRunner() throws Exception { - closeAfterClass(TrinoFileSystemCache.INSTANCE::closeAll); - metastore = createTestingGlueHiveMetastore(Path.of(schemaPath())); DistributedQueryRunner queryRunner = IcebergQueryRunner.builder() .setIcebergProperties(ImmutableMap.builder() @@ -81,15 +77,15 @@ protected void validateMetadataFiles(String location) } @Override - protected void validateTableLocation(String tableName, String location) + protected String validateTableLocation(String tableName, String expectedLocation) { - if (location.endsWith("/")) { - //Iceberg removes trailing slash from location, and it's expected. - assertThat(getTableLocation(tableName) + "/").isEqualTo(location); - } - else { - assertThat(getTableLocation(tableName)).isEqualTo(location); + // Iceberg removes trailing slashes from location, and it's expected. + if (expectedLocation.endsWith("/")) { + expectedLocation = expectedLocation.replaceFirst("/+$", ""); } + String actualTableLocation = getTableLocation(tableName); + assertThat(actualTableLocation).isEqualTo(expectedLocation); + return actualTableLocation; } private Set getAllMetadataDataFilesFromTableDirectory(String tableLocation) @@ -108,35 +104,34 @@ protected Set getAllDataFilesFromTableDirectory(String tableLocation) } @Test(dataProvider = "locationPatternsDataProvider") - public void testAnalyzeWithProvidedTableLocation(boolean partitioned, String locationPattern) + public void testAnalyzeWithProvidedTableLocation(boolean partitioned, LocationPattern locationPattern) { String tableName = "test_analyze_" + randomNameSuffix(); - String location = locationPattern.formatted(bucketName, schemaName, tableName); + String location = locationPattern.locationForTable(bucketName, schemaName, tableName); String partitionQueryPart = (partitioned ? ",partitioning = ARRAY['col_str']" : ""); assertUpdate("CREATE TABLE " + tableName + "(col_str, col_int)" + "WITH (location = '" + location + "'" + partitionQueryPart + ") " + "AS VALUES ('str1', 1), ('str2', 2), ('str3', 3)", 3); - - assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); - assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); - - String expectedStatistics = """ - VALUES - ('col_str', null, 4.0, 0.0, null, null, null), - ('col_int', null, 4.0, 0.0, null, 1, 4), - (null, null, null, null, 4.0, null, null)"""; - - //Check extended statistics collection on write - assertQuery("SHOW STATS FOR " + tableName, expectedStatistics); - - // drop stats - assertUpdate("ALTER TABLE " + tableName + " EXECUTE DROP_EXTENDED_STATS"); - //Check extended statistics collection explicitly - assertUpdate("ANALYZE " + tableName); - assertQuery("SHOW STATS FOR " + tableName, expectedStatistics); - - assertUpdate("DROP TABLE " + tableName); + try (UncheckedCloseable ignored = onClose("DROP TABLE " + tableName)) { + assertUpdate("INSERT INTO " + tableName + " VALUES ('str4', 4)", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES ('str1', 1), ('str2', 2), ('str3', 3), ('str4', 4)"); + + String expectedStatistics = """ + VALUES + ('col_str', null, 4.0, 0.0, null, null, null), + ('col_int', null, 4.0, 0.0, null, 1, 4), + (null, null, null, null, 4.0, null, null)"""; + + // Check extended statistics collection on write + assertQuery("SHOW STATS FOR " + tableName, expectedStatistics); + + // drop stats + assertUpdate("ALTER TABLE " + tableName + " EXECUTE DROP_EXTENDED_STATS"); + // Check extended statistics collection explicitly + assertUpdate("ANALYZE " + tableName); + assertQuery("SHOW STATS FOR " + tableName, expectedStatistics); + } } @Test @@ -170,14 +165,14 @@ public void testCreateSchemaWithIncorrectLocation() String qualifiedTableName = schemaName + "." + tableName; assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); - assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); + try (UncheckedCloseable ignored = onClose("DROP SCHEMA " + schemaName)) { + assertThat(getSchemaLocation(schemaName)).isEqualTo(schemaLocation); - assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int int)")) - .hasMessageContaining("location contains a fragment"); + assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + "(col_str varchar, col_int int)")) + .hasMessageContaining("location contains a fragment"); - assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + " AS SELECT * FROM tpch.tiny.nation")) - .hasMessageContaining("location contains a fragment"); - - assertUpdate("DROP SCHEMA " + schemaName); + assertThatThrownBy(() -> assertUpdate("CREATE TABLE " + qualifiedTableName + " AS SELECT * FROM tpch.tiny.nation")) + .hasMessageContaining("location contains a fragment"); + } } } diff --git a/testing/trino-testing/src/main/java/io/trino/testing/sql/TestTable.java b/testing/trino-testing/src/main/java/io/trino/testing/sql/TestTable.java index 8f80948ab500..4e125cdd4ef9 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/sql/TestTable.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/sql/TestTable.java @@ -44,13 +44,6 @@ public TestTable(SqlExecutor sqlExecutor, String namePrefix, String tableDefinit createAndInsert(rowsToInsert); } - public TestTable(SqlExecutor sqlExecutor, String namePrefix) - { - this.sqlExecutor = sqlExecutor; - this.name = namePrefix + randomNameSuffix(); - this.tableDefinition = null; - } - public void createAndInsert(List rowsToInsert) { sqlExecutor.execute(format("CREATE TABLE %s %s", name, tableDefinition));