From feec16ce874a3d7ef38657db0483ca630d0f6ca6 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 29 Aug 2022 14:36:02 +0900 Subject: [PATCH] Add test for hive.metastore.thrift.delete-files-on-drop config property --- .../bin/run_hive_s3_tests.sh | 1 + plugin/trino-hive-hadoop2/pom.xml | 8 ++ .../hive/TestHiveThriftMetastoreWithS3.java | 126 ++++++++++++++++++ .../resources/s3/hive-core-site.template.xml | 43 ++++++ .../io/trino/plugin/hive/HiveQueryRunner.java | 11 +- .../plugin/hive/s3/S3HiveQueryRunner.java | 13 +- 6 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java create mode 100644 plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml diff --git a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh index 6ad38ab66de5..57c3c090bf75 100755 --- a/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh +++ b/plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh @@ -92,6 +92,7 @@ set +e -Dhive.hadoop2.metastoreHost=localhost \ -Dhive.hadoop2.metastorePort=9083 \ -Dhive.hadoop2.databaseName=default \ + -Dhive.hadoop2.s3.endpoint="${S3_BUCKET_ENDPOINT}" \ -Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \ -Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \ -Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \ diff --git a/plugin/trino-hive-hadoop2/pom.xml b/plugin/trino-hive-hadoop2/pom.xml index a46caf1cb549..94b78972aabd 100644 --- a/plugin/trino-hive-hadoop2/pom.xml +++ b/plugin/trino-hive-hadoop2/pom.xml @@ -146,6 +146,12 @@ test + + io.trino + trino-testing-containers + test + + io.airlift testing @@ -180,6 +186,7 @@ **/TestHive.java **/TestHiveAlluxioMetastore.java + **/TestHiveThriftMetastoreWithS3.java **/TestHiveFileSystemS3.java **/TestHiveFileSystemS3SelectPushdown.java **/TestHiveFileSystemS3SelectJsonPushdown.java @@ -220,6 +227,7 @@ maven-surefire-plugin + **/TestHiveThriftMetastoreWithS3.java **/TestHiveFileSystemS3.java **/TestHiveFileSystemS3SelectPushdown.java **/TestHiveFileSystemS3SelectPushdownWithSplits.java diff --git a/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java new file mode 100644 index 000000000000..fee023194326 --- /dev/null +++ b/plugin/trino-hive-hadoop2/src/test/java/io/trino/plugin/hive/TestHiveThriftMetastoreWithS3.java @@ -0,0 +1,126 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive; + +import com.google.common.collect.ImmutableMap; +import com.google.common.io.Resources; +import io.trino.plugin.hive.containers.HiveHadoop; +import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; +import io.trino.plugin.hive.s3.S3HiveQueryRunner; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.QueryRunner; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; + +import static io.trino.testing.sql.TestTable.randomTableSuffix; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Objects.requireNonNull; + +public class TestHiveThriftMetastoreWithS3 + extends AbstractTestQueryFramework +{ + private final String awsS3endpoint; + private final String awsAccessKey; + private final String awsSecretKey; + private final String writableBucket; + private final String schemaName; + private final Path hadoopCoreSiteXmlTempFile; + + @Parameters({ + "hive.hadoop2.s3.endpoint", + "hive.hadoop2.s3.awsAccessKey", + "hive.hadoop2.s3.awsSecretKey", + "hive.hadoop2.s3.writableBucket", + }) + public TestHiveThriftMetastoreWithS3( + String awsS3endpoint, + String awsAccessKey, + String awsSecretKey, + String writableBucket) + throws IOException + { + this.awsS3endpoint = requireNonNull(awsS3endpoint, "awsS3endpoint is null"); + this.awsAccessKey = requireNonNull(awsAccessKey, "awsAccessKey is null"); + this.awsSecretKey = requireNonNull(awsSecretKey, "awsSecretKey is null"); + this.writableBucket = requireNonNull(writableBucket, "writableBucket is null"); + this.schemaName = "test_thrift_s3_" + randomTableSuffix(); + + String coreSiteXmlContent = Resources.toString(Resources.getResource("s3/hive-core-site.template.xml"), UTF_8) + .replace("%S3_BUCKET_ENDPOINT%", awsS3endpoint) + .replace("%AWS_ACCESS_KEY_ID%", awsAccessKey) + .replace("%AWS_SECRET_ACCESS_KEY%", awsSecretKey); + + hadoopCoreSiteXmlTempFile = Files.createTempFile("core-site", ".xml", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); + hadoopCoreSiteXmlTempFile.toFile().deleteOnExit(); + Files.writeString(hadoopCoreSiteXmlTempFile, coreSiteXmlContent); + } + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + HiveHadoop hiveHadoop = HiveHadoop.builder() + .withFilesToMount(ImmutableMap.of("/etc/hadoop/conf/core-site.xml", hadoopCoreSiteXmlTempFile.normalize().toAbsolutePath().toString())) + .build(); + hiveHadoop.start(); + + return S3HiveQueryRunner.builder() + .setHiveMetastoreEndpoint(hiveHadoop.getHiveMetastoreEndpoint()) + .setS3Endpoint(awsS3endpoint) + .setS3AccessKey(awsAccessKey) + .setS3SecretKey(awsSecretKey) + .setBucketName(writableBucket) + .setPopulateTpchData(false) + .setThriftMetastoreConfig(new ThriftMetastoreConfig().setDeleteFilesOnDrop(true)) + .build(); + } + + @BeforeClass + public void setUp() + { + String schemaLocation = "s3a://%s/%s".formatted(writableBucket, schemaName); + assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"); + } + + @AfterClass(alwaysRun = true) + public void tearDown() + { + assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); + } + + @Test + public void testRecreateTable() + { + String tableName = "%s.test_recreate_table_%s".formatted(schemaName, randomTableSuffix()); + assertUpdate("CREATE TABLE " + tableName + "(col int)"); + try { + assertUpdate("INSERT INTO " + tableName + " VALUES (1)", 1); + assertUpdate("DROP TABLE " + tableName); + + // Recreating a table throws "Target directory for table 'xxx' already exists" in real S3 (not MinIO) + // when 'hive.metastore.thrift.delete-files-on-drop' config property is false + assertUpdate("CREATE TABLE " + tableName + "(col int)"); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + } + } +} diff --git a/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml b/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml new file mode 100644 index 000000000000..a3dc6ad47d4b --- /dev/null +++ b/plugin/trino-hive-hadoop2/src/test/resources/s3/hive-core-site.template.xml @@ -0,0 +1,43 @@ + + + + fs.defaultFS + hdfs://hadoop-master:9000 + + + + fs.s3a.endpoint + %S3_BUCKET_ENDPOINT% + + + + fs.s3.awsAccessKeyId + %AWS_ACCESS_KEY_ID% + + + + fs.s3.awsSecretAccessKey + %AWS_SECRET_ACCESS_KEY% + + + + fs.s3a.access.key + %AWS_ACCESS_KEY_ID% + + + + fs.s3a.secret.key + %AWS_SECRET_ACCESS_KEY% + + + + + hadoop.proxyuser.hive.hosts + * + + + + hadoop.proxyuser.hive.groups + * + + diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java index 3a12ac6d7090..ef45edf9989a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveQueryRunner.java @@ -117,6 +117,7 @@ public static class Builder> private Optional directoryLister = Optional.empty(); private boolean tpcdsCatalogEnabled; private String security = SQL_STANDARD; + private boolean populateTpchData = true; private ColumnNaming tpchColumnNaming = SIMPLIFIED; private DecimalTypeMapping tpchDecimalTypeMapping = DOUBLE; @@ -197,6 +198,12 @@ public SELF setSecurity(String security) return self(); } + public SELF setPopulateTpchData(boolean populateTpchData) + { + this.populateTpchData = populateTpchData; + return self(); + } + public SELF setTpchColumnNaming(ColumnNaming tpchColumnNaming) { this.tpchColumnNaming = requireNonNull(tpchColumnNaming, "tpchColumnNaming is null"); @@ -255,7 +262,9 @@ public DistributedQueryRunner build() queryRunner.createCatalog(HIVE_CATALOG, "hive", hiveProperties); queryRunner.createCatalog(HIVE_BUCKETED_CATALOG, "hive", hiveBucketedProperties); - populateData(queryRunner, metastore); + if (populateTpchData) { + populateData(queryRunner, metastore); + } return queryRunner; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/S3HiveQueryRunner.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/S3HiveQueryRunner.java index 2e676c70a683..5537aae7e0de 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/S3HiveQueryRunner.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/s3/S3HiveQueryRunner.java @@ -21,6 +21,7 @@ import io.trino.plugin.hive.containers.HiveMinioDataLake; import io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore; import io.trino.plugin.hive.metastore.thrift.TestingMetastoreLocator; +import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig; import io.trino.testing.DistributedQueryRunner; import io.trino.tpch.TpchTable; @@ -85,6 +86,7 @@ public static class Builder { private HostAndPort hiveMetastoreEndpoint; private Duration thriftMetastoreTimeout = TestingMetastoreLocator.TIMEOUT; + private ThriftMetastoreConfig thriftMetastoreConfig = new ThriftMetastoreConfig(); private String s3Endpoint; private String s3AccessKey; private String s3SecretKey; @@ -102,6 +104,12 @@ public Builder setThriftMetastoreTimeout(Duration thriftMetastoreTimeout) return this; } + public Builder setThriftMetastoreConfig(ThriftMetastoreConfig thriftMetastoreConfig) + { + this.thriftMetastoreConfig = requireNonNull(thriftMetastoreConfig, "thriftMetastoreConfig is null"); + return this; + } + public Builder setS3Endpoint(String s3Endpoint) { this.s3Endpoint = requireNonNull(s3Endpoint, "s3Endpoint is null"); @@ -136,7 +144,9 @@ public DistributedQueryRunner build() requireNonNull(s3SecretKey, "s3SecretKey is null"); requireNonNull(bucketName, "bucketName is null"); String lowerCaseS3Endpoint = s3Endpoint.toLowerCase(Locale.ENGLISH); - checkArgument(lowerCaseS3Endpoint.startsWith("http://") || lowerCaseS3Endpoint.startsWith("https://"), "Expected http URI for S3 endpoint; got %s", s3Endpoint); + checkArgument( + lowerCaseS3Endpoint.matches("s3.*\\.amazonaws\\.com") || lowerCaseS3Endpoint.startsWith("http://") || lowerCaseS3Endpoint.startsWith("https://"), + "Expected S3 hostname or http URI for S3 endpoint; got %s", s3Endpoint); addHiveProperty("hive.s3.endpoint", s3Endpoint); addHiveProperty("hive.s3.aws-access-key", s3AccessKey); @@ -145,6 +155,7 @@ public DistributedQueryRunner build() setMetastore(distributedQueryRunner -> new BridgingHiveMetastore( testingThriftHiveMetastoreBuilder() .metastoreClient(hiveMetastoreEndpoint, thriftMetastoreTimeout) + .thriftMetastoreConfig(thriftMetastoreConfig) .build())); setInitialSchemasLocationBase("s3a://" + bucketName); // cannot use s3:// as Hive metastore is not configured to accept it return super.build();