diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java index 34f17235e817..b9eba5275bb2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveConfig.java @@ -93,6 +93,10 @@ public class HiveConfig private boolean immutablePartitions; private Optional insertExistingPartitionsBehavior = Optional.empty(); private boolean createEmptyBucketFiles; + // This is meant to protect users who are misusing schema locations (by + // putting schemas in locations with extraneous files), so default to false + // to avoid deleting those files if Trino is unable to check. + private boolean deleteSchemaLocationsFallback; private int maxPartitionsPerWriter = 100; private int maxOpenSortFiles = 50; private int writeValidationThreads = 16; @@ -522,6 +526,19 @@ public HiveConfig setCreateEmptyBucketFiles(boolean createEmptyBucketFiles) return this; } + public boolean isDeleteSchemaLocationsFallback() + { + return this.deleteSchemaLocationsFallback; + } + + @Config("hive.delete-schema-locations-fallback") + @ConfigDescription("Whether schema locations should be deleted when Trino can't determine whether they contain external files.") + public HiveConfig setDeleteSchemaLocationsFallback(boolean deleteSchemaLocationsFallback) + { + this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; + return this; + } + @Min(1) public int getMaxPartitionsPerWriter() { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 7854b233900c..78fb6d4e7f3f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -812,7 +812,7 @@ public void createSchema(ConnectorSession session, String schemaName, Map hiveTransactionHeartbeatInterval, @@ -152,6 +155,7 @@ public HiveMetadataFactory( this.skipTargetCleanupOnRollback = skipTargetCleanupOnRollback; this.writesToNonManagedTablesEnabled = writesToNonManagedTablesEnabled; this.createsOfNonManagedTablesEnabled = createsOfNonManagedTablesEnabled; + this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; this.translateHiveViews = translateHiveViews; this.hideDeltaLakeTables = hideDeltaLakeTables; this.perTransactionCacheMaximumSize = perTransactionCacheMaximumSize; @@ -197,6 +201,7 @@ public TransactionalMetadata create(boolean autoCommit) updateExecutor, skipDeletionForAlter, skipTargetCleanupOnRollback, + deleteSchemaLocationsFallback, hiveTransactionHeartbeatInterval, heartbeatService); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java index 0de71207fe39..cae8aaa17d76 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java @@ -131,9 +131,9 @@ public void createDatabase(HiveIdentity identity, Database database) delegate.createDatabase(identity, database); } - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { - delegate.dropDatabase(identity, databaseName); + delegate.dropDatabase(identity, databaseName, deleteData); } public void renameDatabase(HiveIdentity identity, String databaseName, String newDatabaseName) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java index 025ee381956b..6a57dc39f887 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java @@ -66,7 +66,7 @@ default void updatePartitionStatistics(HiveIdentity identity, Table table, Strin void createDatabase(HiveIdentity identity, Database database); - void dropDatabase(HiveIdentity identity, String databaseName); + void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData); void renameDatabase(HiveIdentity identity, String databaseName, String newDatabaseName); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/RecordingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/RecordingHiveMetastore.java index 4c640c5a166c..da3ae85a8344 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/RecordingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/RecordingHiveMetastore.java @@ -296,10 +296,10 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { verifyRecordingMode(); - delegate.dropDatabase(identity, databaseName); + delegate.dropDatabase(identity, databaseName, deleteData); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index f273609dcdfb..5138ec2132cb 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -42,6 +42,7 @@ import io.trino.plugin.hive.security.SqlStandardAccessControlMetadataMetastore; import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.SchemaNotFoundException; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.TableNotFoundException; import io.trino.spi.predicate.TupleDomain; @@ -133,6 +134,7 @@ public class SemiTransactionalHiveMetastore private final Executor updateExecutor; private final boolean skipDeletionForAlter; private final boolean skipTargetCleanupOnRollback; + private final boolean deleteSchemaLocationsFallback; private final ScheduledExecutorService heartbeatExecutor; private final Optional configuredTransactionHeartbeatInterval; @@ -169,6 +171,7 @@ public SemiTransactionalHiveMetastore( Executor updateExecutor, boolean skipDeletionForAlter, boolean skipTargetCleanupOnRollback, + boolean deleteSchemaLocationsFallback, Optional hiveTransactionHeartbeatInterval, ScheduledExecutorService heartbeatService) { @@ -179,6 +182,7 @@ public SemiTransactionalHiveMetastore( this.updateExecutor = requireNonNull(updateExecutor, "updateExecutor is null"); this.skipDeletionForAlter = skipDeletionForAlter; this.skipTargetCleanupOnRollback = skipTargetCleanupOnRollback; + this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; this.heartbeatExecutor = heartbeatService; this.configuredTransactionHeartbeatInterval = requireNonNull(hiveTransactionHeartbeatInterval, "hiveTransactionHeartbeatInterval is null"); } @@ -365,9 +369,32 @@ public synchronized void createDatabase(HiveIdentity identity, Database database setExclusive((delegate, hdfsEnvironment) -> delegate.createDatabase(identity, database)); } - public synchronized void dropDatabase(HiveIdentity identity, String schemaName) + public synchronized void dropDatabase(ConnectorSession session, String schemaName) { - setExclusive((delegate, hdfsEnvironment) -> delegate.dropDatabase(identity, schemaName)); + Optional location = delegate.getDatabase(schemaName) + .orElseThrow(() -> new SchemaNotFoundException(schemaName)) + .getLocation() + .map(Path::new); + + setExclusive((delegate, hdfsEnvironment) -> { + HiveIdentity identity = new HiveIdentity(session); + + // If we see files in the schema location, don't delete it. + // If we see no files, request deletion. + // If we fail to check the schema location, behave according to fallback. + boolean deleteData = location.map(path -> { + HdfsContext context = new HdfsContext(session); + try (FileSystem fs = hdfsEnvironment.getFileSystem(context, path)) { + return !fs.listFiles(path, false).hasNext(); + } + catch (IOException | RuntimeException e) { + log.warn(e, "Could not check schema directory '%s'", path); + return deleteSchemaLocationsFallback; + } + }).orElse(deleteSchemaLocationsFallback); + + delegate.dropDatabase(identity, schemaName, deleteData); + }); } public synchronized void renameDatabase(HiveIdentity identity, String source, String target) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java index 715c9167cb09..4e58588be5b4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java @@ -260,7 +260,7 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { throw new TrinoException(NOT_SUPPORTED, "dropDatabase"); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index 24c51c051af7..caa7e0793a5c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -476,11 +476,11 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { identity = updateIdentity(identity); try { - delegate.dropDatabase(identity, databaseName); + delegate.dropDatabase(identity, databaseName, deleteData); } finally { invalidateDatabase(databaseName); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java index 7dd8cc471e13..79ef11d3ba08 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java @@ -136,7 +136,7 @@ public class FileHiveMetastore { private static final String PUBLIC_ROLE_NAME = "public"; private static final String ADMIN_ROLE_NAME = "admin"; - private static final String TRINO_SCHEMA_FILE_NAME = ".trinoSchema"; + private static final String TRINO_SCHEMA_FILE_NAME_SUFFIX = ".trinoSchema"; private static final String TRINO_PERMISSIONS_DIRECTORY_NAME = ".trinoPermissions"; public static final String ROLES_FILE_NAME = ".roles"; public static final String ROLE_GRANTS_FILE_NAME = ".roleGrants"; @@ -204,10 +204,16 @@ public synchronized void createDatabase(HiveIdentity identity, Database database Path databaseMetadataDirectory = getDatabaseMetadataDirectory(database.getDatabaseName()); writeSchemaFile(DATABASE, databaseMetadataDirectory, databaseCodec, new DatabaseMetadata(currentVersion, database), false); + try { + metadataFileSystem.mkdirs(databaseMetadataDirectory); + } + catch (IOException e) { + throw new TrinoException(HIVE_METASTORE_ERROR, "Could not write database", e); + } } @Override - public synchronized void dropDatabase(HiveIdentity identity, String databaseName) + public synchronized void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { requireNonNull(databaseName, "databaseName is null"); @@ -216,7 +222,13 @@ public synchronized void dropDatabase(HiveIdentity identity, String databaseName throw new TrinoException(HIVE_METASTORE_ERROR, "Database " + databaseName + " is not empty"); } - deleteMetadataDirectory(getDatabaseMetadataDirectory(databaseName)); + // Either delete the entire database directory or just its metadata files + if (deleteData) { + deleteDirectoryAndSchema(DATABASE, getDatabaseMetadataDirectory(databaseName)); + } + else { + deleteSchemaFile(DATABASE, getDatabaseMetadataDirectory(databaseName)); + } } @Override @@ -280,7 +292,7 @@ private void verifyDatabaseNotExists(String databaseName) @Override public synchronized List getAllDatabases() { - List databases = getChildSchemaDirectories(catalogDirectory).stream() + List databases = getChildSchemaDirectories(DATABASE, catalogDirectory).stream() .map(Path::getName) .collect(toList()); return ImmutableList.copyOf(databases); @@ -492,7 +504,7 @@ private List listAllTables(String databaseName) } Path databaseMetadataDirectory = getDatabaseMetadataDirectory(databaseName); - List tables = getChildSchemaDirectories(databaseMetadataDirectory).stream() + List tables = getChildSchemaDirectories(TABLE, databaseMetadataDirectory).stream() .map(Path::getName) .collect(toImmutableList()); return tables; @@ -522,10 +534,10 @@ public synchronized void dropTable(HiveIdentity identity, String databaseName, S // It is safe to delete the whole meta directory for external tables and views if (!table.getTableType().equals(MANAGED_TABLE.name()) || deleteData) { - deleteMetadataDirectory(tableMetadataDirectory); + deleteDirectoryAndSchema(TABLE, tableMetadataDirectory); } else { - // in this case we only wan to delete the metadata of a managed table + // in this case we only want to delete the metadata of a managed table deleteSchemaFile(TABLE, tableMetadataDirectory); deleteTablePrivileges(table); } @@ -576,7 +588,7 @@ public synchronized void renameTable(HiveIdentity identity, String databaseName, throw new TrinoException(HIVE_METASTORE_ERROR, "Could not create new table directory"); } // Iceberg metadata references files in old path, so these cannot be moved. Moving table description (metadata from metastore perspective) only. - if (!metadataFileSystem.rename(getSchemaPath(oldPath), getSchemaPath(newPath))) { + if (!metadataFileSystem.rename(getSchemaPath(TABLE, oldPath), getSchemaPath(TABLE, newPath))) { throw new TrinoException(HIVE_METASTORE_ERROR, "Could not rename table schema file"); } // TODO drop data files when table is being dropped @@ -734,7 +746,7 @@ public synchronized void addPartitions(HiveIdentity identity, String databaseNam Partition partition = partitionWithStatistics.getPartition(); verifiedPartition(table, partition); Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues()); - Path schemaPath = getSchemaPath(partitionMetadataDirectory); + Path schemaPath = getSchemaPath(PARTITION, partitionMetadataDirectory); if (metadataFileSystem.exists(schemaPath)) { throw new TrinoException(HIVE_METASTORE_ERROR, "Partition already exists"); } @@ -814,7 +826,7 @@ public synchronized void dropPartition(HiveIdentity identity, String databaseNam Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partitionValues); if (deleteData) { - deleteMetadataDirectory(partitionMetadataDirectory); + deleteDirectoryAndSchema(PARTITION, partitionMetadataDirectory); } else { deleteSchemaFile(PARTITION, partitionMetadataDirectory); @@ -1015,7 +1027,7 @@ private synchronized Optional> getAllPartitionNames(HiveIdentity id private boolean isValidPartition(Table table, String partitionName) { try { - return metadataFileSystem.exists(getSchemaPath(getPartitionMetadataDirectory(table, partitionName))); + return metadataFileSystem.exists(getSchemaPath(PARTITION, getPartitionMetadataDirectory(table, partitionName))); } catch (IOException e) { return false; @@ -1185,7 +1197,7 @@ private synchronized void deleteTablePrivileges(Table table) } } - private List getChildSchemaDirectories(Path metadataDirectory) + private List getChildSchemaDirectories(SchemaType type, Path metadataDirectory) { try { if (!metadataFileSystem.isDirectory(metadataDirectory)) { @@ -1201,7 +1213,7 @@ private List getChildSchemaDirectories(Path metadataDirectory) if (childPath.getName().startsWith(".")) { continue; } - if (metadataFileSystem.isFile(getSchemaPath(childPath))) { + if (metadataFileSystem.isFile(getSchemaPath(type, childPath))) { childSchemaDirectories.add(childPath); } } @@ -1233,15 +1245,19 @@ private Set readAllPermissions(Path permissionsDirectory) } } - private void deleteMetadataDirectory(Path metadataDirectory) + private void deleteDirectoryAndSchema(SchemaType type, Path metadataDirectory) { try { - Path schemaPath = getSchemaPath(metadataDirectory); + Path schemaPath = getSchemaPath(type, metadataDirectory); if (!metadataFileSystem.isFile(schemaPath)) { // if there is no schema file, assume this is not a database, partition or table return; } + // Delete the schema file first, so it can never exist after the directory is deleted. + // (For cases when the schema file isn't in the metadata directory.) + deleteSchemaFile(type, metadataDirectory); + if (!metadataFileSystem.delete(metadataDirectory, true)) { throw new TrinoException(HIVE_METASTORE_ERROR, "Could not delete metadata directory"); } @@ -1273,7 +1289,7 @@ private void checkVersion(Optional writerVersion) private Optional readSchemaFile(SchemaType type, Path metadataDirectory, JsonCodec codec) { - return readFile(type + " schema", getSchemaPath(metadataDirectory), codec); + return readFile(type + " schema", getSchemaPath(type, metadataDirectory), codec); } private Optional readFile(String type, Path path, JsonCodec codec) @@ -1295,7 +1311,7 @@ private Optional readFile(String type, Path path, JsonCodec codec) private void writeSchemaFile(SchemaType type, Path directory, JsonCodec codec, T value, boolean overwrite) { - writeFile(type + " schema", getSchemaPath(directory), codec, value, overwrite); + writeFile(type + " schema", getSchemaPath(type, directory), codec, value, overwrite); } private void writeFile(String type, Path path, JsonCodec codec, T value, boolean overwrite) @@ -1324,7 +1340,7 @@ private void writeFile(String type, Path path, JsonCodec codec, T value, private void deleteSchemaFile(SchemaType type, Path metadataDirectory) { try { - if (!metadataFileSystem.delete(getSchemaPath(metadataDirectory), false)) { + if (!metadataFileSystem.delete(getSchemaPath(type, metadataDirectory), false)) { throw new TrinoException(HIVE_METASTORE_ERROR, "Could not delete " + type + " schema"); } } @@ -1380,9 +1396,14 @@ private Path getRoleGrantsFile() return new Path(catalogDirectory, ROLE_GRANTS_FILE_NAME); } - private static Path getSchemaPath(Path metadataDirectory) + private static Path getSchemaPath(SchemaType type, Path metadataDirectory) { - return new Path(metadataDirectory, TRINO_SCHEMA_FILE_NAME); + if (type == DATABASE) { + return new Path( + requireNonNull(metadataDirectory.getParent(), "Can't use root directory as database path"), + format(".%s%s", metadataDirectory.getName(), TRINO_SCHEMA_FILE_NAME_SUFFIX)); + } + return new Path(metadataDirectory, TRINO_SCHEMA_FILE_NAME_SUFFIX); } private static boolean isChildDirectory(Path parentDirectory, Path childDirectory) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index c74ed116f6e3..8c3dc39d61bf 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -542,9 +542,17 @@ public void createDatabase(HiveIdentity identity, Database database) } } + // TODO: respect deleteData @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { + Optional location = Optional.empty(); + if (deleteData) { + location = getDatabase(databaseName) + .orElseThrow(() -> new SchemaNotFoundException(databaseName)) + .getLocation(); + } + try { stats.getDropDatabase().call(() -> glueClient.deleteDatabase(new DeleteDatabaseRequest().withCatalogId(catalogId).withName(databaseName))); @@ -555,6 +563,10 @@ public void dropDatabase(HiveIdentity identity, String databaseName) catch (AmazonServiceException e) { throw new TrinoException(HIVE_METASTORE_ERROR, e); } + + if (deleteData) { + location.ifPresent(path -> deleteDir(hdfsContext, hdfsEnvironment, new Path(path), true)); + } } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index c08b0d8f4c57..79752d51aec8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -165,9 +165,9 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { - delegate.dropDatabase(identity, databaseName); + delegate.dropDatabase(identity, databaseName, deleteData); } @Override diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 772c877ac6c9..4aeccf49e079 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -1010,7 +1010,7 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { try { retry() @@ -1018,7 +1018,7 @@ public void dropDatabase(HiveIdentity identity, String databaseName) .stopOnIllegalExceptions() .run("dropDatabase", stats.getDropDatabase().wrap(() -> { try (ThriftMetastoreClient client = createMetastoreClient(identity)) { - client.dropDatabase(databaseName, true, false); + client.dropDatabase(databaseName, deleteData, false); } return null; })); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java index 3658b2b91b7a..2b8a615684af 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java @@ -48,7 +48,7 @@ public interface ThriftMetastore { void createDatabase(HiveIdentity identity, Database database); - void dropDatabase(HiveIdentity identity, String databaseName); + void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData); void alterDatabase(HiveIdentity identity, String databaseName, Database database); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java index ad5faff986f9..c331802241cd 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHive.java @@ -815,6 +815,7 @@ protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetas false, false, true, + true, false, 1000, Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java index ceb82f67301d..a2bf1e4cd5d0 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/AbstractTestHiveLocal.java @@ -114,7 +114,7 @@ public void cleanup() throws IOException { try { - getMetastoreClient().dropDatabase(HIVE_IDENTITY, testDbName); + getMetastoreClient().dropDatabase(HIVE_IDENTITY, testDbName, true); } finally { deleteRecursively(tempDir.toPath(), ALLOW_INSECURE); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java index 75866f6a4998..5ce35ab89f68 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveConfig.java @@ -63,6 +63,7 @@ public void testDefaults() .setImmutablePartitions(false) .setInsertExistingPartitionsBehavior(APPEND) .setCreateEmptyBucketFiles(false) + .setDeleteSchemaLocationsFallback(false) .setSortedWritingEnabled(true) .setPropagateTableScanSortingProperties(false) .setMaxPartitionsPerWriter(100) @@ -137,6 +138,7 @@ public void testExplicitPropertyMappings() .put("hive.immutable-partitions", "true") .put("hive.insert-existing-partitions-behavior", "OVERWRITE") .put("hive.create-empty-bucket-files", "true") + .put("hive.delete-schema-locations-fallback", "true") .put("hive.max-partitions-per-writers", "222") .put("hive.max-open-sort-files", "333") .put("hive.write-validation-threads", "11") @@ -216,6 +218,7 @@ public void testExplicitPropertyMappings() .setImmutablePartitions(true) .setInsertExistingPartitionsBehavior(OVERWRITE) .setCreateEmptyBucketFiles(true) + .setDeleteSchemaLocationsFallback(true) .setMaxPartitionsPerWriter(222) .setMaxOpenSortFiles(333) .setWriteValidationThreads(11) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestRecordingHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestRecordingHiveMetastore.java index d06114059cea..34c9a584f6f3 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestRecordingHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestRecordingHiveMetastore.java @@ -132,7 +132,7 @@ public void testRecordingHiveMetastore() JsonCodec jsonCodec = createJsonCodec(); RecordingHiveMetastore recordingHiveMetastore = new RecordingHiveMetastore(new TestingHiveMetastore(), recordingConfig, jsonCodec); validateMetadata(recordingHiveMetastore); - recordingHiveMetastore.dropDatabase(HIVE_CONTEXT, "other_database"); + recordingHiveMetastore.dropDatabase(HIVE_CONTEXT, "other_database", true); recordingHiveMetastore.writeRecording(); RecordingMetastoreConfig replayingConfig = recordingConfig @@ -264,7 +264,7 @@ public List getAllViews(String databaseName) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { // noop for test purpose } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java index a39faa8b4682..dec6d353d922 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestSemiTransactionalHiveMetastore.java @@ -85,6 +85,7 @@ private SemiTransactionalHiveMetastore getSemiTransactionalHiveMetastoreWithDrop directExecutor(), false, false, + true, Optional.empty(), newScheduledThreadPool(1)); } @@ -122,6 +123,7 @@ private SemiTransactionalHiveMetastore getSemiTransactionalHiveMetastoreWithUpda updateExecutor, false, false, + true, Optional.empty(), newScheduledThreadPool(1)); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java index fdcb82715f75..f63437d14a18 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java @@ -105,7 +105,7 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { throw new UnsupportedOperationException(); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java index 579991dea773..cd0302eea866 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/InMemoryThriftMetastore.java @@ -130,8 +130,9 @@ public synchronized void createDatabase(HiveIdentity identity, Database database } } + // TODO: respect deleteData @Override - public synchronized void dropDatabase(HiveIdentity identity, String databaseName) + public synchronized void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { if (!databases.containsKey(databaseName)) { throw new SchemaNotFoundException(databaseName); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoCatalogFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoCatalogFactory.java index 060898f56f40..02f8805bc203 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoCatalogFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoCatalogFactory.java @@ -15,6 +15,7 @@ import io.trino.plugin.base.CatalogName; import io.trino.plugin.hive.HdfsEnvironment; +import io.trino.plugin.hive.HiveConfig; import io.trino.plugin.hive.NodeVersion; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; @@ -39,6 +40,7 @@ public class TrinoCatalogFactory private final CatalogType catalogType; private final boolean isUniqueTableLocation; private final boolean isUsingSystemSecurity; + private final boolean deleteSchemaLocationsFallback; @Inject public TrinoCatalogFactory( @@ -49,7 +51,8 @@ public TrinoCatalogFactory( TypeManager typeManager, IcebergTableOperationsProvider tableOperationsProvider, NodeVersion nodeVersion, - IcebergSecurityConfig securityConfig) + IcebergSecurityConfig securityConfig, + HiveConfig hiveConfig) { this.catalogName = requireNonNull(catalogName, "catalogName is null"); this.metastore = requireNonNull(metastore, "metastore is null"); @@ -61,6 +64,7 @@ public TrinoCatalogFactory( this.catalogType = config.getCatalogType(); this.isUniqueTableLocation = config.isUniqueTableLocation(); this.isUsingSystemSecurity = securityConfig.getSecuritySystem() == SYSTEM; + this.deleteSchemaLocationsFallback = requireNonNull(hiveConfig).isDeleteSchemaLocationsFallback(); } public TrinoCatalog create() @@ -76,7 +80,8 @@ public TrinoCatalog create() tableOperationsProvider, trinoVersion, isUniqueTableLocation, - isUsingSystemSecurity); + isUsingSystemSecurity, + deleteSchemaLocationsFallback); case GLUE: // TODO not supported yet throw new TrinoException(NOT_SUPPORTED, "Unknown Trino Iceberg catalog type"); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoHiveCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoHiveCatalog.java index c36209e1551a..93b1ebc53832 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoHiveCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoHiveCatalog.java @@ -47,6 +47,7 @@ import io.trino.spi.connector.ViewNotFoundException; import io.trino.spi.security.TrinoPrincipal; import io.trino.spi.type.TypeManager; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.PartitionSpec; @@ -130,6 +131,7 @@ class TrinoHiveCatalog private final String trinoVersion; private final boolean useUniqueTableLocation; private final boolean isUsingSystemSecurity; + private final boolean deleteSchemaLocationsFallback; private final Map tableMetadataCache = new ConcurrentHashMap<>(); private final ViewReaderUtil.PrestoViewReader viewReader = new ViewReaderUtil.PrestoViewReader(); @@ -142,7 +144,8 @@ public TrinoHiveCatalog( IcebergTableOperationsProvider tableOperationsProvider, String trinoVersion, boolean useUniqueTableLocation, - boolean isUsingSystemSecurity) + boolean isUsingSystemSecurity, + boolean deleteSchemaLocationsFallback) { this.catalogName = requireNonNull(catalogName, "catalogName is null"); this.metastore = requireNonNull(metastore, "metastore is null"); @@ -152,6 +155,7 @@ public TrinoHiveCatalog( this.trinoVersion = requireNonNull(trinoVersion, "trinoVersion is null"); this.useUniqueTableLocation = useUniqueTableLocation; this.isUsingSystemSecurity = isUsingSystemSecurity; + this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; } @Override @@ -215,7 +219,27 @@ public boolean dropNamespace(ConnectorSession session, String namespace) !listViews(session, Optional.of(namespace)).isEmpty()) { throw new TrinoException(SCHEMA_NOT_EMPTY, "Schema not empty: " + namespace); } - metastore.dropDatabase(new HiveIdentity(session), namespace); + + Optional location = metastore.getDatabase(namespace) + .orElseThrow(() -> new SchemaNotFoundException(namespace)) + .getLocation() + .map(Path::new); + + // If we see files in the schema location, don't delete it. + // If we see no files, request deletion. + // If we fail to check the schema location, behave according to fallback. + boolean deleteData = location.map(path -> { + HdfsContext context = new HdfsContext(session); + try (FileSystem fs = hdfsEnvironment.getFileSystem(context, path)) { + return !fs.listFiles(path, false).hasNext(); + } + catch (IOException e) { + log.warn(e, "Could not check schema directory '%s'", path); + return deleteSchemaLocationsFallback; + } + }).orElse(deleteSchemaLocationsFallback); + + metastore.dropDatabase(new HiveIdentity(session), namespace, deleteData); return true; } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/CountingAccessFileHiveMetastore.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/CountingAccessFileHiveMetastore.java index 23fe74fedd22..be279f7c388c 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/CountingAccessFileHiveMetastore.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/CountingAccessFileHiveMetastore.java @@ -124,7 +124,7 @@ public void createDatabase(HiveIdentity identity, Database database) } @Override - public void dropDatabase(HiveIdentity identity, String databaseName) + public void dropDatabase(HiveIdentity identity, String databaseName, boolean deleteData) { throw new UnsupportedOperationException(); } diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java index 8d602ef5d4dc..3b3a8acbfc8c 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java @@ -30,8 +30,10 @@ import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts; import static io.trino.tests.product.launcher.env.EnvironmentContainers.COORDINATOR; import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP; +import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS; import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_HADOOP_INIT_D; import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_PRESTO_ETC; +import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TEMPTO_PROFILE_CONFIG; import static java.util.Objects.requireNonNull; import static org.testcontainers.utility.MountableFile.forHostPath; @@ -57,13 +59,21 @@ public EnvSinglenodeSparkHive(Standard standard, Hadoop hadoop, DockerFiles dock @Override public void extendEnvironment(Environment.Builder builder) { + String dockerImageName = "ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion; + builder.configureContainer(HADOOP, dockerContainer -> { - dockerContainer.setDockerImageName("ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion); + dockerContainer.setDockerImageName(dockerImageName); dockerContainer.withCopyFileToContainer( forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-hdp3/apply-hdp3-config.sh")), CONTAINER_HADOOP_INIT_D + "apply-hdp3-config.sh"); }); + builder.configureContainer(TESTS, dockerContainer -> { + dockerContainer.withCopyFileToContainer( + forHostPath(dockerFiles.getDockerFilesHostPath("conf/tempto/tempto-configuration-for-hive3.yaml")), + CONTAINER_TEMPTO_PROFILE_CONFIG); + }); + builder.configureContainer(COORDINATOR, container -> container .withCopyFileToContainer( forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-spark-hive/hive.properties")), diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java index 30e488d0d8e2..ee0468db0fd4 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java @@ -30,8 +30,10 @@ import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts; import static io.trino.tests.product.launcher.env.EnvironmentContainers.COORDINATOR; import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP; +import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS; import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_HADOOP_INIT_D; import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_PRESTO_ETC; +import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TEMPTO_PROFILE_CONFIG; import static java.util.Objects.requireNonNull; import static org.testcontainers.utility.MountableFile.forHostPath; @@ -57,13 +59,21 @@ public EnvSinglenodeSparkIceberg(Standard standard, Hadoop hadoop, DockerFiles d @Override public void extendEnvironment(Environment.Builder builder) { + String dockerImageName = "ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion; + builder.configureContainer(HADOOP, container -> { - container.setDockerImageName("ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion); + container.setDockerImageName(dockerImageName); container.withCopyFileToContainer( forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-spark-iceberg/apply-hive-config-for-iceberg.sh")), CONTAINER_HADOOP_INIT_D + "/apply-hive-config-for-iceberg.sh"); }); + builder.configureContainer(TESTS, dockerContainer -> { + dockerContainer.withCopyFileToContainer( + forHostPath(dockerFiles.getDockerFilesHostPath("conf/tempto/tempto-configuration-for-hive3.yaml")), + CONTAINER_TEMPTO_PROFILE_CONFIG); + }); + builder.configureContainer(COORDINATOR, container -> container .withCopyFileToContainer( forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-spark-iceberg/iceberg.properties")), diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestCreateDropSchema.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestCreateDropSchema.java index c1f94d260054..b9f86fad53a0 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestCreateDropSchema.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestCreateDropSchema.java @@ -20,11 +20,10 @@ import org.testng.annotations.Test; import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; -import static io.trino.tempto.query.QueryExecutor.query; -import static io.trino.tests.product.utils.QueryExecutors.onHive; +import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix; import static io.trino.tests.product.utils.QueryExecutors.onTrino; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; public class TestCreateDropSchema extends ProductTest @@ -39,18 +38,120 @@ public class TestCreateDropSchema @Test public void testCreateDropSchema() { - onHive().executeQuery("DROP DATABASE IF EXISTS test_drop_schema CASCADE"); + String schemaName = "test_drop_schema_" + randomTableSuffix(); + String schemaDir = format("%s/%s.db", warehouseDirectory, schemaName); - onTrino().executeQuery("CREATE SCHEMA test_drop_schema"); - assertTrue(hdfsClient.exist(warehouseDirectory + "/test_drop_schema.db")); + onTrino().executeQuery("CREATE SCHEMA " + schemaName); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); - onTrino().executeQuery("CREATE TABLE test_drop_schema.test_drop (col1 int)"); - assertQueryFailure(() -> query("DROP SCHEMA test_drop_schema")) - .hasMessageContaining("line 1:1: Cannot drop non-empty schema 'test_drop_schema'"); + onTrino().executeQuery(format("CREATE TABLE %s.test_drop (col1 int)", schemaName)); + assertQueryFailure(() -> onTrino().executeQuery("DROP SCHEMA " + schemaName)) + .hasMessageContaining(format("line 1:1: Cannot drop non-empty schema '%s'", schemaName)); - onTrino().executeQuery("DROP TABLE test_drop_schema.test_drop"); + onTrino().executeQuery(format("DROP TABLE %s.test_drop", schemaName)); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test + public void testDropSchemaWithLocationWithoutExternalFiles() + { + String schemaName = "schema_with_empty_location_" + randomTableSuffix(); + String schemaDir = warehouseDirectory + "/schema-with-empty-location/"; + + onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test + public void testDropSchemaFilesWithoutLocation() + { + String schemaName = "schema_without_location_" + randomTableSuffix(); + String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName); + + onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test + public void testDropSchemaFilesWithLocationWithExternalFile() + { + String schemaName = "schema_with_nonempty_location_" + randomTableSuffix(); + String schemaDir = warehouseDirectory + "/schema-with-nonempty-location/"; + + // Create file in schema directory before creating schema + String externalFile = schemaDir + "external-file"; + hdfsClient.createDirectory(schemaDir); + hdfsClient.saveFile(externalFile, ""); + + onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, true, "schema directory exists after dropping schema"); + assertFileExistence(externalFile, true, "external file exists after dropping schema"); + + hdfsClient.delete(externalFile); + } - onTrino().executeQuery("DROP SCHEMA test_drop_schema"); - assertFalse(hdfsClient.exist(warehouseDirectory + "/test_drop_schema.db")); + // Tests create/drop schema transactions with default schema location + @Test + public void testDropSchemaFilesTransactions() + { + String schemaName = "schema_directory_transactions_" + randomTableSuffix(); + String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName); + + onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + + onTrino().executeQuery("START TRANSACTION"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + onTrino().executeQuery("ROLLBACK"); + assertFileExistence(schemaDir, true, "schema directory exists after rollback"); + + // Sanity check: schema is still working + onTrino().executeQuery(format("CREATE TABLE %s.test_table (i integer)", schemaName)); + onTrino().executeQuery(format("DROP TABLE %s.test_table", schemaName)); + + onTrino().executeQuery("START TRANSACTION"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + onTrino().executeQuery("COMMIT"); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test + public void testDropSchemaFilesTransactionsWithExternalFile() + { + String schemaName = "schema_transactions_with_external_files_" + randomTableSuffix(); + String schemaDir = warehouseDirectory + "/schema-transactions-with-external-files/"; + + // Create file in schema directory before creating schema + String externalFile = schemaDir + "external-file"; + hdfsClient.createDirectory(schemaDir); + hdfsClient.saveFile(externalFile, ""); + + onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir)); + + onTrino().executeQuery("START TRANSACTION"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + onTrino().executeQuery("ROLLBACK"); + assertFileExistence(externalFile, true, "external file exists after rolling back drop schema"); + + // Sanity check: schema is still working + onTrino().executeQuery(format("CREATE TABLE %s.test_table (i integer)", schemaName)); + onTrino().executeQuery(format("DROP TABLE %s.test_table", schemaName)); + + onTrino().executeQuery("START TRANSACTION"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + onTrino().executeQuery("COMMIT"); + assertFileExistence(externalFile, true, "schema directory exists after committing drop schema"); + } + + private void assertFileExistence(String path, boolean exists, String description) + { + assertThat(hdfsClient.exist(path)).as("%s (%s)", description, path).isEqualTo(exists); } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java new file mode 100644 index 000000000000..b7f9fdefd194 --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/iceberg/TestCreateDropSchema.java @@ -0,0 +1,128 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.iceberg; + +import com.google.inject.name.Named; +import io.trino.tempto.BeforeTestWithContext; +import io.trino.tempto.ProductTest; +import io.trino.tempto.hadoop.hdfs.HdfsClient; +import io.trino.tempto.query.QueryExecutionException; +import org.testng.annotations.Test; + +import javax.inject.Inject; + +import static io.trino.tests.product.TestGroups.ICEBERG; +import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.fail; + +public class TestCreateDropSchema + extends ProductTest +{ + @Inject + private HdfsClient hdfsClient; + + @Inject + @Named("databases.hive.warehouse_directory_path") + private String warehouseDirectory; + + @BeforeTestWithContext + public void useIceberg() + { + onTrino().executeQuery("USE iceberg.default"); + } + + @Test(groups = ICEBERG) + public void testDropSchemaWithLocationWithoutExternalFiles() + { + String schemaName = "schema_with_empty_location_" + randomTableSuffix(); + String schemaDir = warehouseDirectory + "/schema-with-empty-location/"; + + onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test(groups = ICEBERG) + public void testDropSchemaFilesWithoutLocation() + { + String schemaName = "schema_without_location_" + randomTableSuffix(); + String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName); + + onTrino().executeQuery(format("CREATE SCHEMA %s", schemaName)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, false, "schema directory exists after dropping schema"); + } + + @Test(groups = ICEBERG) + public void testDropSchemaFilesWithLocationWithExternalFile() + { + String schemaName = "schema_with_nonempty_location_" + randomTableSuffix(); + String schemaDir = warehouseDirectory + "/schema-with-nonempty-location/"; + + // Create file in schema directory before creating schema + String externalFile = schemaDir + "external-file"; + hdfsClient.createDirectory(schemaDir); + hdfsClient.saveFile(externalFile, ""); + + onTrino().executeQuery(format("CREATE SCHEMA %s WITH (location = '%s')", schemaName, schemaDir)); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + onTrino().executeQuery("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, true, "schema directory exists after dropping schema"); + assertFileExistence(externalFile, true, "external file exists after dropping schema"); + + hdfsClient.delete(externalFile); + hdfsClient.delete(schemaDir); + } + + @Test(groups = ICEBERG) + public void testDropSchemaWithExternalFileWithoutLocation() + { + String schemaName = "schema_with_files_in_default_location_" + randomTableSuffix(); + String schemaDir = format("%s/%s.db/", warehouseDirectory, schemaName); + + // Create file in schema directory before creating schema + String externalFile = schemaDir + "external-file"; + hdfsClient.createDirectory(schemaDir); + hdfsClient.saveFile(externalFile, ""); + + onTrino().executeQuery("CREATE SCHEMA " + schemaName); + assertFileExistence(schemaDir, true, "schema directory exists after creating schema"); + assertQuerySucceeds("DROP SCHEMA " + schemaName); + assertFileExistence(schemaDir, true, "schema directory exists after dropping schema"); + assertFileExistence(externalFile, true, "external file exists after dropping schema"); + + hdfsClient.delete(externalFile); + hdfsClient.delete(schemaDir); + } + + private void assertFileExistence(String path, boolean exists, String description) + { + assertThat(hdfsClient.exist(path)).as(description).isEqualTo(exists); + } + + private static void assertQuerySucceeds(String query) + { + try { + onTrino().executeQuery(query); + } + catch (QueryExecutionException e) { + fail(format("Expected query to succeed: %s", query), e.getCause()); + } + } +}