diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst index 919c75d995e25..fc930ab127234 100644 --- a/presto-docs/src/main/sphinx/connector/iceberg.rst +++ b/presto-docs/src/main/sphinx/connector/iceberg.rst @@ -265,6 +265,22 @@ Property Name Description Otherwise, it will be ignored. ======================================================= ============================================================= ============ +Configure the `Amazon S3 `_ +properties to specify a S3 location as the warehouse directory for the Hadoop catalog. This way, both metadata and data +of Iceberg tables are stored in S3. An example configuration includes: + +.. code-block:: none + + connector.name=iceberg + iceberg.catalog.type=hadoop + iceberg.catalog.warehouse=s3://iceberg_bucket/warehouse + + hive.s3.use-instance-credentials=false + hive.s3.aws-access-key=accesskey + hive.s3.aws-secret-key=secretkey + hive.s3.endpoint=http://192.168.0.103:9878 + hive.s3.path-style-access=true + Configuration Properties ------------------------ diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java index 560bf7dd8f847..a44b52a43b2d6 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java @@ -68,7 +68,6 @@ import javax.annotation.concurrent.ThreadSafe; import javax.inject.Inject; -import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayDeque; @@ -287,9 +286,19 @@ else if (table.getTableType().equals(EXTERNAL_TABLE)) { } if (!table.getTableType().equals(VIRTUAL_VIEW)) { - File location = new File(new Path(table.getStorage().getLocation()).toUri()); - checkArgument(location.isDirectory(), "Table location is not a directory: %s", location); - checkArgument(location.exists(), "Table directory does not exist: %s", location); + try { + Path tableLocation = new Path(table.getStorage().getLocation()); + FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, tableLocation); + if (!fileSystem.isDirectory(tableLocation)) { + throw new PrestoException(HIVE_METASTORE_ERROR, "Table location is not a directory: " + tableLocation); + } + if (!fileSystem.exists(tableLocation)) { + throw new PrestoException(HIVE_METASTORE_ERROR, "Table directory does not exist: " + tableLocation); + } + } + catch (IOException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, "Could not validate table location", e); + } } writeSchemaFile("table", tableMetadataDirectory, tableCodec, new TableMetadata(table), false); @@ -1168,25 +1177,13 @@ private synchronized void setTablePrivileges( requireNonNull(tableName, "tableName is null"); requireNonNull(privileges, "privileges is null"); - try { - Table table = getRequiredTable(metastoreContext, databaseName, tableName); - - Path permissionsDirectory = getPermissionsDirectory(table); - - metadataFileSystem.mkdirs(permissionsDirectory); - if (!metadataFileSystem.isDirectory(permissionsDirectory)) { - throw new PrestoException(HIVE_METASTORE_ERROR, "Could not create permissions directory"); - } + Table table = getRequiredTable(metastoreContext, databaseName, tableName); - Path permissionFilePath = getPermissionsPath(permissionsDirectory, grantee); - List permissions = privileges.stream() - .map(PermissionMetadata::new) - .collect(toList()); - writeFile("permissions", permissionFilePath, permissionsCodec, permissions, true); - } - catch (IOException e) { - throw new PrestoException(HIVE_METASTORE_ERROR, e); - } + Path permissionFilePath = getPermissionsPath(getPermissionsDirectory(table), grantee); + List permissions = privileges.stream() + .map(PermissionMetadata::new) + .collect(toList()); + writeFile("permissions", permissionFilePath, permissionsCodec, permissions, true); } private Set readConstraintsFile(String databaseName, String tableName) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/s3/HiveS3Module.java b/presto-hive/src/main/java/com/facebook/presto/hive/s3/HiveS3Module.java index 7b671f8bc4989..8e186c52ee387 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/s3/HiveS3Module.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/s3/HiveS3Module.java @@ -39,7 +39,7 @@ public class HiveS3Module { private static final String EMR_FS_CLASS_NAME = "com.amazon.ws.emr.hadoop.fs.EmrFileSystem"; - private final String connectorId; + protected final String connectorId; public HiveS3Module(String connectorId) { @@ -80,7 +80,7 @@ public AWSSecurityMappingsSupplier provideAWSSecurityMappingsSupplier(AWSSecurit return new AWSSecurityMappingsSupplier(config.getConfigFile(), config.getRefreshPeriod()); } - private void bindSecurityMapping(Binder binder) + protected void bindSecurityMapping(Binder binder) { if (buildConfigObject(AWSSecurityMappingConfig.class).getConfigFile().isPresent() && buildConfigObject(AWSSecurityMappingConfig.class).getMappingType().equals(S3)) { @@ -89,7 +89,7 @@ private void bindSecurityMapping(Binder binder) } } - private static void validateEmrFsClass() + protected static void validateEmrFsClass() { // verify that the class exists try { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/s3/PrestoS3FileSystem.java b/presto-hive/src/main/java/com/facebook/presto/hive/s3/PrestoS3FileSystem.java index 2cda2c392e956..2fdae74784623 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/s3/PrestoS3FileSystem.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/s3/PrestoS3FileSystem.java @@ -168,16 +168,16 @@ public class PrestoS3FileSystem private static final String DIRECTORY_SUFFIX = "_$folder$"; private static final DataSize BLOCK_SIZE = new DataSize(32, MEGABYTE); private static final DataSize MAX_SKIP_SIZE = new DataSize(1, MEGABYTE); - private static final String PATH_SEPARATOR = "/"; + protected static final String PATH_SEPARATOR = "/"; private static final Duration BACKOFF_MIN_SLEEP = new Duration(1, SECONDS); private static final int HTTP_RANGE_NOT_SATISFIABLE = 416; private static final MediaType X_DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); private static final MediaType OCTET_STREAM_MEDIA_TYPE = MediaType.create("application", "octet-stream"); private static final Set GLACIER_STORAGE_CLASSES = ImmutableSet.of(Glacier.toString(), DeepArchive.toString()); - private URI uri; + protected URI uri; private Path workingDirectory; - private AmazonS3 s3; + protected AmazonS3 s3; private AWSCredentialsProvider credentialsProvider; private File stagingDirectory; private int maxAttempts; @@ -396,8 +396,7 @@ private static boolean isDirectory(PrestoS3ObjectMetadata metadata) } return mediaType.is(X_DIRECTORY_MEDIA_TYPE) || - (mediaType.is(OCTET_STREAM_MEDIA_TYPE) - && metadata.isKeyNeedsPathSeparator() + (metadata.isKeyNeedsPathSeparator() && objectMetadata.getContentLength() == 0); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/BaseTestHiveInsertOverwrite.java b/presto-hive/src/test/java/com/facebook/presto/hive/BaseTestHiveInsertOverwrite.java index 4312db7760bb5..0d8bf37cb647b 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/BaseTestHiveInsertOverwrite.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/BaseTestHiveInsertOverwrite.java @@ -27,6 +27,7 @@ import java.util.List; import static com.facebook.airlift.testing.Closeables.closeAllRuntimeException; +import static com.facebook.presto.hive.containers.HiveMinIODataLake.EMPTY_DIR; import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -134,6 +135,21 @@ public void testInsertOverwritePartitionedAndBucketedExternalTable() assertOverwritePartition(externalTableName); } + @Test + public void testCreateExternalTableOnEmptyS3Directory() + { + String testTable = getTestTableName(); + String tableName = testTable.substring(testTable.lastIndexOf('.') + 1); + computeActual(getCreateTableStatement( + tableName, + "partitioned_by=ARRAY['regionkey']", + "bucketed_by = ARRAY['nationkey']", + "bucket_count = 3", + format("external_location = 's3a://%s/%s'", this.bucketName, EMPTY_DIR))); + MaterializedResult materializedRows = computeActual("select * from " + tableName); + assertEquals(materializedRows.getRowCount(), 0); + } + protected void assertOverwritePartition(String testTable) { computeActual(format( diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/containers/HiveMinIODataLake.java b/presto-hive/src/test/java/com/facebook/presto/hive/containers/HiveMinIODataLake.java index 59deb44ebe492..ce30207783bb3 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/containers/HiveMinIODataLake.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/containers/HiveMinIODataLake.java @@ -29,12 +29,14 @@ import java.util.concurrent.atomic.AtomicBoolean; import static com.facebook.presto.hive.containers.HiveHadoopContainer.HIVE3_IMAGE; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; import static java.util.Objects.requireNonNull; import static org.testcontainers.containers.Network.newNetwork; public class HiveMinIODataLake implements Closeable { + public static final String EMPTY_DIR = "test-empty-dir-" + randomTableSuffix() + "/"; public static final String ACCESS_KEY = "accesskey"; public static final String SECRET_KEY = "secretkey"; @@ -99,6 +101,7 @@ public void start() new BasicAWSCredentials(ACCESS_KEY, SECRET_KEY))) .build(); s3Client.createBucket(this.bucketName); + s3Client.putObject(this.bucketName, EMPTY_DIR, ""); } finally { isStarted.set(true); diff --git a/presto-iceberg/pom.xml b/presto-iceberg/pom.xml index 36cbe08db3b03..eddcec456b9fe 100644 --- a/presto-iceberg/pom.xml +++ b/presto-iceberg/pom.xml @@ -598,6 +598,28 @@ test + + org.testcontainers + testcontainers + test + + + org.slf4j + slf4j-api + + + + + + com.amazonaws + aws-java-sdk-core + + + + com.amazonaws + aws-java-sdk-s3 + + org.apache.iceberg iceberg-core diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java index bab2cb74a63d3..67126ed660667 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java @@ -26,7 +26,7 @@ import com.facebook.presto.hive.authentication.HiveAuthenticationModule; import com.facebook.presto.hive.gcs.HiveGcsModule; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; -import com.facebook.presto.hive.s3.HiveS3Module; +import com.facebook.presto.iceberg.s3.IcebergS3Module; import com.facebook.presto.plugin.base.security.AllowAllAccessControl; import com.facebook.presto.spi.NodeManager; import com.facebook.presto.spi.PageIndexerFactory; @@ -81,7 +81,7 @@ public static Connector createConnector( new JsonModule(), new IcebergCommonModule(catalogName), new IcebergCatalogModule(catalogName, metastore), - new HiveS3Module(catalogName), + new IcebergS3Module(catalogName), new HiveGcsModule(), new HiveAuthenticationModule(), new CachingModule(), diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/IcebergS3Module.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/IcebergS3Module.java new file mode 100644 index 0000000000000..6c8cb67d91f20 --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/IcebergS3Module.java @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.s3; + +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.hive.s3.HiveS3Module; +import com.facebook.presto.hive.s3.PrestoS3FileSystem; +import com.facebook.presto.hive.s3.PrestoS3FileSystemStats; +import com.facebook.presto.hive.s3.S3ConfigurationUpdater; +import com.facebook.presto.hive.s3.S3FileSystemType; +import com.google.inject.Binder; +import com.google.inject.Scopes; + +import static com.facebook.airlift.configuration.ConfigBinder.configBinder; +import static org.weakref.jmx.ObjectNames.generatedNameOf; +import static org.weakref.jmx.guice.ExportBinder.newExporter; + +public class IcebergS3Module + extends HiveS3Module +{ + public IcebergS3Module(String connectorId) + { + super(connectorId); + } + + @Override + protected void setup(Binder binder) + { + S3FileSystemType type = buildConfigObject(HiveClientConfig.class).getS3FileSystemType(); + if (type == S3FileSystemType.PRESTO) { + bindSecurityMapping(binder); + + binder.bind(S3ConfigurationUpdater.class).to(PrestoIcebergS3ConfigurationUpdater.class).in(Scopes.SINGLETON); + configBinder(binder).bindConfig(HiveS3Config.class); + + binder.bind(PrestoS3FileSystemStats.class).toInstance(PrestoS3FileSystem.getFileSystemStats()); + newExporter(binder).export(PrestoS3FileSystemStats.class).as(generatedNameOf(PrestoS3FileSystem.class, connectorId)); + } + else if (type == S3FileSystemType.EMRFS) { + validateEmrFsClass(); + binder.bind(S3ConfigurationUpdater.class).to(EmrFsS3ConfigurationUpdater.class).in(Scopes.SINGLETON); + } + else if (type == S3FileSystemType.HADOOP_DEFAULT) { + // configuration is done using Hadoop configuration files + binder.bind(S3ConfigurationUpdater.class).to(HadoopDefaultConfigurationUpdater.class).in(Scopes.SINGLETON); + } + else { + throw new RuntimeException("Unknown file system type: " + type); + } + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergNativeS3FileSystem.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergNativeS3FileSystem.java new file mode 100644 index 0000000000000..e93e323546554 --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergNativeS3FileSystem.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.s3; + +import com.amazonaws.AmazonClientException; +import com.facebook.presto.hive.s3.PrestoS3FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; + +public class PrestoIcebergNativeS3FileSystem + extends PrestoS3FileSystem +{ + @Override + public boolean mkdirs(Path f, FsPermission permission) + { + try { + s3.putObject(getBucketName(uri), keyFromPath(f) + PATH_SEPARATOR, ""); + return true; + } + catch (AmazonClientException e) { + return false; + } + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergS3ConfigurationUpdater.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergS3ConfigurationUpdater.java new file mode 100644 index 0000000000000..331b7b2186528 --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/s3/PrestoIcebergS3ConfigurationUpdater.java @@ -0,0 +1,49 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.s3; + +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.hive.s3.PrestoS3ConfigurationUpdater; +import com.facebook.presto.iceberg.IcebergConfig; +import org.apache.hadoop.conf.Configuration; + +import javax.inject.Inject; + +import static com.facebook.presto.iceberg.CatalogType.HADOOP; + +public class PrestoIcebergS3ConfigurationUpdater + extends PrestoS3ConfigurationUpdater +{ + private final IcebergConfig icebergConfig; + + @Inject + public PrestoIcebergS3ConfigurationUpdater(HiveS3Config config, IcebergConfig icebergConfig) + { + super(config); + this.icebergConfig = icebergConfig; + } + + @Override + public void updateConfiguration(Configuration config) + { + super.updateConfiguration(config); + + if (this.icebergConfig.getCatalogType().equals(HADOOP)) { + // re-map filesystem schemes to match Amazon Elastic MapReduce + config.set("fs.s3.impl", PrestoIcebergNativeS3FileSystem.class.getName()); + config.set("fs.s3a.impl", PrestoIcebergNativeS3FileSystem.class.getName()); + config.set("fs.s3n.impl", PrestoIcebergNativeS3FileSystem.class.getName()); + } + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java index 5bdaa3db84ae4..c926d60dc9cc1 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java @@ -32,6 +32,9 @@ import com.facebook.presto.hive.HiveHdfsConfiguration; import com.facebook.presto.hive.MetastoreClientConfig; import com.facebook.presto.hive.authentication.NoHdfsAuthentication; +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.hive.s3.PrestoS3ConfigurationUpdater; +import com.facebook.presto.hive.s3.S3ConfigurationUpdater; import com.facebook.presto.iceberg.delete.DeleteFile; import com.facebook.presto.metadata.CatalogMetadata; import com.facebook.presto.metadata.Metadata; @@ -63,6 +66,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileScanTask; @@ -96,7 +100,6 @@ import java.lang.reflect.Field; import java.net.URI; import java.nio.ByteBuffer; -import java.nio.file.Path; import java.time.LocalDateTime; import java.time.LocalTime; import java.time.format.DateTimeFormatter; @@ -1679,14 +1682,14 @@ public void testMetadataVersionsMaintainingProperties() // Table `test_table_with_default_setting_properties`'s current metadata record all 5 previous metadata files assertEquals(defaultTableMetadata.previousFiles().size(), 5); - FileSystem fileSystem = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), new org.apache.hadoop.fs.Path(settingTable.location())); + FileSystem fileSystem = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), new Path(settingTable.location())); // Table `test_table_with_setting_properties`'s all existing metadata files count is 2 - FileStatus[] settingTableFiles = fileSystem.listStatus(new org.apache.hadoop.fs.Path(settingTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION)); + FileStatus[] settingTableFiles = fileSystem.listStatus(new Path(settingTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION)); assertEquals(settingTableFiles.length, 2); // Table `test_table_with_default_setting_properties`'s all existing metadata files count is 6 - FileStatus[] defaultTableFiles = fileSystem.listStatus(new org.apache.hadoop.fs.Path(defaultTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION)); + FileStatus[] defaultTableFiles = fileSystem.listStatus(new Path(defaultTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION)); assertEquals(defaultTableFiles.length, 6); } finally { @@ -2238,12 +2241,12 @@ private void testCheckDeleteFiles(Table icebergTable, int expectedSize, List writer = Parquet.writeDeletes(HadoopOutputFile.fromPath(path, fs)) .createWriterFunc(GenericParquetWriter::buildWriter) .forTable(icebergTable) @@ -2270,13 +2273,13 @@ private void writeEqualityDeleteToNationTable(Table icebergTable, Map overwriteValues, Map partitionValues) throws Exception { - Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory(); + java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory(); File metastoreDir = getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile(); - org.apache.hadoop.fs.Path metadataDir = new org.apache.hadoop.fs.Path(metastoreDir.toURI()); + Path metadataDir = new Path(metastoreDir.toURI()); String deleteFileName = "delete_file_" + randomUUID(); FileSystem fs = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), metadataDir); Schema deleteRowSchema = icebergTable.schema().select(overwriteValues.keySet()); - Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(metadataDir, deleteFileName), fs)) + Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(HadoopOutputFile.fromPath(new Path(metadataDir, deleteFileName), fs)) .forTable(icebergTable) .rowSchema(deleteRowSchema) .createWriterFunc(GenericParquetWriter::buildWriter) @@ -2297,13 +2300,19 @@ private void writeEqualityDeleteToNationTable(Table icebergTable, Map {}), + ImmutableSet.of(), hiveClientConfig); return new HdfsEnvironment(hdfsConfiguration, metastoreClientConfig, new NoHdfsAuthentication()); } @@ -2325,18 +2334,18 @@ protected Table loadTable(String tableName) protected Map getProperties() { - File metastoreDir = getCatalogDirectory(); + Path metastoreDir = getCatalogDirectory(); return ImmutableMap.of("warehouse", metastoreDir.toString()); } - protected File getCatalogDirectory() + protected Path getCatalogDirectory() { - Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory(); + java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory(); switch (catalogType) { case HIVE: case HADOOP: case NESSIE: - return getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile(); + return new Path(getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile().toURI()); } throw new PrestoException(NOT_SUPPORTED, "Unsupported Presto Iceberg catalog type " + catalogType); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java index 6e37c8cb41bac..aae51883aa8c4 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java @@ -43,6 +43,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.OptionalInt; @@ -202,13 +203,11 @@ public static DistributedQueryRunner createIcebergQueryRunner( String catalogType = extraConnectorProperties.getOrDefault("iceberg.catalog.type", HIVE.name()); Path icebergDataDirectory = getIcebergDataDirectoryPath(queryRunner.getCoordinator().getDataDirectory(), catalogType, format, addStorageFormatToPath); - Map icebergProperties = ImmutableMap.builder() - .put("iceberg.file-format", format.name()) - .putAll(getConnectorProperties(CatalogType.valueOf(catalogType), icebergDataDirectory)) - .putAll(extraConnectorProperties) - .build(); - - queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties); + Map icebergProperties = new HashMap<>(); + icebergProperties.put("iceberg.file-format", format.name()); + icebergProperties.putAll(getConnectorProperties(CatalogType.valueOf(catalogType), icebergDataDirectory)); + icebergProperties.putAll(extraConnectorProperties); + queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.copyOf(icebergProperties)); if (addJmxPlugin) { queryRunner.installPlugin(new JmxPlugin()); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergFileWriter.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergFileWriter.java index 734d47172ded8..44fc80cf2a01b 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergFileWriter.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergFileWriter.java @@ -25,10 +25,13 @@ import com.facebook.presto.hive.FileFormatDataSourceStats; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveCompressionCodec; import com.facebook.presto.hive.HiveDwrfEncryptionProvider; +import com.facebook.presto.hive.MetastoreClientConfig; import com.facebook.presto.hive.NodeVersion; import com.facebook.presto.hive.OrcFileWriterConfig; +import com.facebook.presto.hive.s3.HiveS3Config; import com.facebook.presto.metadata.SessionPropertyManager; import com.facebook.presto.parquet.FileParquetDataSource; import com.facebook.presto.parquet.cache.MetadataReader; @@ -61,6 +64,7 @@ import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.iceberg.IcebergAbstractMetadata.toIcebergSchema; +import static com.facebook.presto.iceberg.IcebergDistributedTestBase.getHdfsEnvironment; import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; import static com.facebook.presto.iceberg.IcebergSessionProperties.dataSizeSessionProperty; import static com.facebook.presto.metadata.SessionPropertyManager.createTestingSessionPropertyManager; @@ -113,7 +117,7 @@ public void setup() throws Exception this.connectorSession = session.toConnectorSession(connectorId); TypeManager typeManager = new TestingTypeManager(); this.hdfsContext = new HdfsContext(connectorSession); - HdfsEnvironment hdfsEnvironment = IcebergDistributedTestBase.getHdfsEnvironment(); + HdfsEnvironment hdfsEnvironment = getHdfsEnvironment(new HiveClientConfig(), new MetastoreClientConfig(), new HiveS3Config()); this.icebergFileWriterFactory = new IcebergFileWriterFactory(hdfsEnvironment, typeManager, new FileFormatDataSourceStats(), new NodeVersion("test"), new OrcFileWriterConfig(), HiveDwrfEncryptionProvider.NO_ENCRYPTION); } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/container/IcebergMinIODataLake.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/container/IcebergMinIODataLake.java new file mode 100644 index 0000000000000..3bd6b55a20460 --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/container/IcebergMinIODataLake.java @@ -0,0 +1,117 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.container; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.facebook.presto.testing.containers.MinIOContainer; +import com.facebook.presto.util.AutoCloseableCloser; +import com.google.common.collect.ImmutableMap; +import org.testcontainers.containers.Network; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; + +import static java.util.Objects.requireNonNull; +import static org.testcontainers.containers.Network.newNetwork; + +public class IcebergMinIODataLake + implements Closeable +{ + public static final String ACCESS_KEY = "accesskey"; + public static final String SECRET_KEY = "secretkey"; + + private final String bucketName; + private final String warehouseDir; + private final MinIOContainer minIOContainer; + + private final AtomicBoolean isStarted = new AtomicBoolean(false); + private final AutoCloseableCloser closer = AutoCloseableCloser.create(); + + public IcebergMinIODataLake(String bucketName, String warehouseDir) + { + this.bucketName = requireNonNull(bucketName, "bucketName is null"); + this.warehouseDir = requireNonNull(warehouseDir, "warehouseDir is null"); + Network network = closer.register(newNetwork()); + this.minIOContainer = closer.register( + MinIOContainer.builder() + .withNetwork(network) + .withEnvVars(ImmutableMap.builder() + .put("MINIO_ACCESS_KEY", ACCESS_KEY) + .put("MINIO_SECRET_KEY", SECRET_KEY) + .build()) + .build()); + } + + public void start() + { + if (isStarted()) { + return; + } + try { + this.minIOContainer.start(); + AmazonS3 s3Client = AmazonS3ClientBuilder + .standard() + .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration( + "http://localhost:" + minIOContainer.getMinioApiEndpoint().getPort(), + "us-east-1")) + .withPathStyleAccessEnabled(true) + .withCredentials(new AWSStaticCredentialsProvider( + new BasicAWSCredentials(ACCESS_KEY, SECRET_KEY))) + .build(); + s3Client.createBucket(this.bucketName); + s3Client.putObject(this.bucketName, this.warehouseDir, ""); + } + finally { + isStarted.set(true); + } + } + + public boolean isStarted() + { + return isStarted.get(); + } + + public void stop() + { + if (!isStarted()) { + return; + } + try { + closer.close(); + } + catch (Exception e) { + throw new RuntimeException("Failed to stop IcebergMinioDataLake", e); + } + finally { + isStarted.set(false); + } + } + + public MinIOContainer getMinio() + { + return minIOContainer; + } + + @Override + public void close() + throws IOException + { + stop(); + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergDistributedOnS3Hadoop.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergDistributedOnS3Hadoop.java new file mode 100644 index 0000000000000..5e38761eec72a --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergDistributedOnS3Hadoop.java @@ -0,0 +1,124 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.hadoop; + +import com.facebook.presto.hive.HdfsContext; +import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.MetastoreClientConfig; +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.iceberg.IcebergDistributedTestBase; +import com.facebook.presto.iceberg.IcebergQueryRunner; +import com.facebook.presto.iceberg.container.IcebergMinIODataLake; +import com.facebook.presto.testing.QueryRunner; +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import java.net.URI; + +import static com.facebook.presto.iceberg.CatalogType.HADOOP; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY; +import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; +import static com.google.common.io.Files.createTempDir; +import static java.lang.String.format; + +public class TestIcebergDistributedOnS3Hadoop + extends IcebergDistributedTestBase +{ + static final String WAREHOUSE_DIR = "warehouse/"; + String bucketName = "test-iceberg-hadoop-s3-" + randomTableSuffix(); + private IcebergMinIODataLake dockerizedS3DataLake; + HostAndPort hostAndPort; + + public TestIcebergDistributedOnS3Hadoop() + { + super(HADOOP); + } + + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.createIcebergQueryRunner(ImmutableMap.of(), HADOOP, + ImmutableMap.of( + "iceberg.catalog.warehouse", format("s3://%s/%s", bucketName, WAREHOUSE_DIR), + "hive.s3.use-instance-credentials", "false", + "hive.s3.aws-access-key", ACCESS_KEY, + "hive.s3.aws-secret-key", SECRET_KEY, + "hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()), + "hive.s3.path-style-access", "true")); + } + + @BeforeClass + @Override + public void init() + throws Exception + { + this.dockerizedS3DataLake = new IcebergMinIODataLake(bucketName, WAREHOUSE_DIR); + this.dockerizedS3DataLake.start(); + hostAndPort = this.dockerizedS3DataLake.getMinio().getMinioApiEndpoint(); + super.init(); + } + + @AfterClass + public void tearDown() + { + if (dockerizedS3DataLake != null) { + dockerizedS3DataLake.stop(); + } + } + + @Override + public void testCreateTableWithCustomLocation() + { + String tableName = "test_hadoop_table_with_custom_location"; + URI tableTargetURI = createTempDir().toURI(); + assertQueryFails(format("create table %s (a int, b varchar)" + " with (location = '%s')", tableName, tableTargetURI.toString()), + "Cannot set a custom location for a path-based table.*"); + } + + protected Path getCatalogDirectory() + { + return new Path(URI.create(format("s3://%s/%s", bucketName, WAREHOUSE_DIR))); + } + + protected HdfsEnvironment getHdfsEnvironment() + { + HiveClientConfig hiveClientConfig = new HiveClientConfig(); + MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig(); + HiveS3Config hiveS3Config = new HiveS3Config() + .setS3AwsAccessKey(ACCESS_KEY) + .setS3AwsSecretKey(SECRET_KEY) + .setS3PathStyleAccess(true) + .setS3Endpoint(format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort())) + .setS3UseInstanceCredentials(false); + return getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config); + } + + protected Table loadTable(String tableName) + { + Configuration configuration = getHdfsEnvironment().getConfiguration(new HdfsContext(SESSION), getCatalogDirectory()); + Catalog catalog = CatalogUtil.loadCatalog(HADOOP.getCatalogImpl(), "test-hive", getProperties(), configuration); + return catalog.loadTable(TableIdentifier.of("tpch", tableName)); + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogOnS3DistributedQueries.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogOnS3DistributedQueries.java new file mode 100644 index 0000000000000..185bb81dd882e --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hadoop/TestIcebergHadoopCatalogOnS3DistributedQueries.java @@ -0,0 +1,86 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.hadoop; + +import com.facebook.presto.iceberg.IcebergQueryRunner; +import com.facebook.presto.iceberg.TestIcebergDistributedQueries; +import com.facebook.presto.iceberg.container.IcebergMinIODataLake; +import com.facebook.presto.testing.QueryRunner; +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import static com.facebook.presto.iceberg.CatalogType.HADOOP; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; +import static java.lang.String.format; + +public class TestIcebergHadoopCatalogOnS3DistributedQueries + extends TestIcebergDistributedQueries +{ + static final String WAREHOUSE_DIR = "warehouse/"; + String bucketName = "test-iceberg-hadoop-s3-" + randomTableSuffix(); + private IcebergMinIODataLake dockerizedS3DataLake; + HostAndPort hostAndPort; + + public TestIcebergHadoopCatalogOnS3DistributedQueries() + { + super(HADOOP); + } + + @BeforeClass + @Override + public void init() + throws Exception + { + this.dockerizedS3DataLake = new IcebergMinIODataLake(bucketName, WAREHOUSE_DIR); + this.dockerizedS3DataLake.start(); + hostAndPort = this.dockerizedS3DataLake.getMinio().getMinioApiEndpoint(); + super.init(); + } + + @AfterClass + public void tearDown() + { + if (dockerizedS3DataLake != null) { + dockerizedS3DataLake.stop(); + } + } + + protected boolean supportsViews() + { + return false; + } + + @Override + public void testRenameTable() + { + // Rename table are not supported by hadoop catalog + } + + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.createIcebergQueryRunner(ImmutableMap.of(), HADOOP, + ImmutableMap.of( + "iceberg.catalog.warehouse", format("s3://%s/%s", bucketName, WAREHOUSE_DIR), + "hive.s3.use-instance-credentials", "false", + "hive.s3.aws-access-key", ACCESS_KEY, + "hive.s3.aws-secret-key", SECRET_KEY, + "hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()), + "hive.s3.path-style-access", "true")); + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java index 3954524b959b2..fdb1f7b487152 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedHive.java @@ -83,7 +83,7 @@ protected Table loadTable(String tableName) protected ExtendedHiveMetastore getFileHiveMetastore() { FileHiveMetastore fileHiveMetastore = new FileHiveMetastore(getHdfsEnvironment(), - getCatalogDirectory().getPath(), + getCatalogDirectory().toString(), "test"); return memoizeMetastore(fileHiveMetastore, false, 1000, 0); } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedOnS3Hive.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedOnS3Hive.java new file mode 100644 index 0000000000000..fcff66983701f --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergDistributedOnS3Hive.java @@ -0,0 +1,99 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.hive; + +import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.MetastoreClientConfig; +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.iceberg.IcebergQueryRunner; +import com.facebook.presto.iceberg.container.IcebergMinIODataLake; +import com.facebook.presto.testing.QueryRunner; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import org.apache.hadoop.fs.Path; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import java.net.URI; + +import static com.facebook.presto.iceberg.CatalogType.HIVE; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; +import static java.lang.String.format; + +public class TestIcebergDistributedOnS3Hive + extends TestIcebergDistributedHive +{ + static final String WAREHOUSE_DIR = "warehouse/"; + String bucketName = "test-iceberg-hadoop-s3-" + randomTableSuffix(); + private IcebergMinIODataLake dockerizedS3DataLake; + HostAndPort hostAndPort; + + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.createIcebergQueryRunner(ImmutableMap.of(), HIVE, + ImmutableMap.of( + "iceberg.hive-statistics-merge-strategy", Joiner.on(",").join(NUMBER_OF_DISTINCT_VALUES.name(), TOTAL_SIZE_IN_BYTES.name()), + "hive.metastore", "file", + "hive.metastore.catalog.dir", format("s3://%s/%s", bucketName, WAREHOUSE_DIR), + "hive.s3.use-instance-credentials", "false", + "hive.s3.aws-access-key", ACCESS_KEY, + "hive.s3.aws-secret-key", SECRET_KEY, + "hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()), + "hive.s3.path-style-access", "true")); + } + + @BeforeClass + @Override + public void init() + throws Exception + { + this.dockerizedS3DataLake = new IcebergMinIODataLake(bucketName, WAREHOUSE_DIR); + this.dockerizedS3DataLake.start(); + hostAndPort = this.dockerizedS3DataLake.getMinio().getMinioApiEndpoint(); + super.init(); + } + + @AfterClass + public void tearDown() + { + if (dockerizedS3DataLake != null) { + dockerizedS3DataLake.stop(); + } + } + + protected Path getCatalogDirectory() + { + return new Path(URI.create(format("s3://%s/%s", bucketName, WAREHOUSE_DIR))); + } + + protected HdfsEnvironment getHdfsEnvironment() + { + HiveClientConfig hiveClientConfig = new HiveClientConfig(); + MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig(); + HiveS3Config hiveS3Config = new HiveS3Config() + .setS3AwsAccessKey(ACCESS_KEY) + .setS3AwsSecretKey(SECRET_KEY) + .setS3PathStyleAccess(true) + .setS3Endpoint(format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort())) + .setS3UseInstanceCredentials(false); + return getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config); + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogOnS3DistributedQueries.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogOnS3DistributedQueries.java new file mode 100644 index 0000000000000..42f01120578a2 --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestIcebergHiveCatalogOnS3DistributedQueries.java @@ -0,0 +1,112 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.hive; + +import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.MetastoreClientConfig; +import com.facebook.presto.hive.s3.HiveS3Config; +import com.facebook.presto.iceberg.IcebergDistributedTestBase; +import com.facebook.presto.iceberg.IcebergQueryRunner; +import com.facebook.presto.iceberg.TestIcebergDistributedQueries; +import com.facebook.presto.iceberg.container.IcebergMinIODataLake; +import com.facebook.presto.testing.QueryRunner; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.net.HostAndPort; +import org.apache.hadoop.fs.Path; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; + +import java.net.URI; + +import static com.facebook.presto.iceberg.CatalogType.HIVE; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY; +import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; +import static java.lang.String.format; + +public class TestIcebergHiveCatalogOnS3DistributedQueries + extends TestIcebergDistributedQueries +{ + static final String WAREHOUSE_DIR = "warehouse/"; + String bucketName = "test-iceberg-hadoop-s3-" + randomTableSuffix(); + private IcebergMinIODataLake dockerizedS3DataLake; + HostAndPort hostAndPort; + + public TestIcebergHiveCatalogOnS3DistributedQueries() + { + super(HIVE, ImmutableMap.of("iceberg.hive-statistics-merge-strategy", Joiner.on(",").join(NUMBER_OF_DISTINCT_VALUES.name(), TOTAL_SIZE_IN_BYTES.name()))); + } + + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.createIcebergQueryRunner(ImmutableMap.of(), HIVE, + ImmutableMap.of( + "iceberg.hive-statistics-merge-strategy", Joiner.on(",").join(NUMBER_OF_DISTINCT_VALUES.name(), TOTAL_SIZE_IN_BYTES.name()), + "hive.metastore", "file", + "hive.metastore.catalog.dir", format("s3://%s/%s", bucketName, WAREHOUSE_DIR), + "hive.s3.use-instance-credentials", "false", + "hive.s3.aws-access-key", ACCESS_KEY, + "hive.s3.aws-secret-key", SECRET_KEY, + "hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()), + "hive.s3.path-style-access", "true")); + } + + @BeforeClass + @Override + public void init() + throws Exception + { + this.dockerizedS3DataLake = new IcebergMinIODataLake(bucketName, WAREHOUSE_DIR); + this.dockerizedS3DataLake.start(); + hostAndPort = this.dockerizedS3DataLake.getMinio().getMinioApiEndpoint(); + super.init(); + } + + @AfterClass + public void tearDown() + { + if (dockerizedS3DataLake != null) { + dockerizedS3DataLake.stop(); + } + } + + protected Path getCatalogDirectory() + { + return new Path(URI.create(format("s3://%s/%s", bucketName, WAREHOUSE_DIR))); + } + + protected HdfsEnvironment getHdfsEnvironment() + { + HiveClientConfig hiveClientConfig = new HiveClientConfig(); + MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig(); + HiveS3Config hiveS3Config = new HiveS3Config() + .setS3AwsAccessKey(ACCESS_KEY) + .setS3AwsSecretKey(SECRET_KEY) + .setS3PathStyleAccess(true) + .setS3Endpoint(format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort())) + .setS3UseInstanceCredentials(false); + return IcebergDistributedTestBase.getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config); + } + + @Override + public void testRenameTable() + { + // Rename table are not supported by hive catalog + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/nessie/TestIcebergDistributedNessie.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/nessie/TestIcebergDistributedNessie.java index d9a5884ad52c2..c72612e799fa7 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/nessie/TestIcebergDistributedNessie.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/nessie/TestIcebergDistributedNessie.java @@ -18,11 +18,11 @@ import com.facebook.presto.testing.QueryRunner; import com.facebook.presto.testing.containers.NessieContainer; import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.fs.Path; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import java.io.File; import java.util.Map; import static com.facebook.presto.iceberg.CatalogType.NESSIE; @@ -43,7 +43,7 @@ protected TestIcebergDistributedNessie() @Override protected Map getProperties() { - File metastoreDir = getCatalogDirectory(); + Path metastoreDir = getCatalogDirectory(); return ImmutableMap.of("warehouse", metastoreDir.toString(), "uri", nessieContainer.getRestApiUri()); } diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/IcebergRestTestUtil.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/IcebergRestTestUtil.java index 4baf4560b28c6..193fc0a22975d 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/IcebergRestTestUtil.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/IcebergRestTestUtil.java @@ -20,6 +20,9 @@ import com.facebook.airlift.node.NodeInfo; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.hive.HiveClientConfig; +import com.facebook.presto.hive.MetastoreClientConfig; +import com.facebook.presto.hive.s3.HiveS3Config; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.testing.TestingConnectorSession; import com.google.common.collect.ImmutableList; @@ -62,7 +65,7 @@ public static Map restConnectorProperties(String serverUri) public static TestingHttpServer getRestServer(String location) { JdbcCatalog backingCatalog = new JdbcCatalog(); - HdfsEnvironment hdfsEnvironment = getHdfsEnvironment(); + HdfsEnvironment hdfsEnvironment = getHdfsEnvironment(new HiveClientConfig(), new MetastoreClientConfig(), new HiveS3Config()); backingCatalog.setConf(hdfsEnvironment.getConfiguration(new HdfsContext(SESSION), new Path(location))); Map properties = ImmutableMap.builder()