From 7884267a0fe448ed524cfa56e959f86b9cfc6867 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Mon, 17 Oct 2022 22:40:24 +0200 Subject: [PATCH] Add `singlenode-hudi` product test environment --- .../env/environment/EnvSinglenodeHudi.java | 146 ++++++++++++++++++ .../launcher/suite/suites/SuiteHudi.java | 37 +++++ .../singlenode-hudi/hive.properties | 9 ++ .../singlenode-hudi/hudi.properties | 7 + .../singlenode-hudi/spark-defaults.conf | 19 +++ .../io/trino/tests/product/TestGroups.java | 1 + .../product/hudi/TestHudiCompatibility.java | 73 +++++++++ .../tests/product/utils/QueryExecutors.java | 27 ++++ .../main/resources/tempto-configuration.yaml | 10 ++ 9 files changed, 329 insertions(+) create mode 100644 testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHudi.java create mode 100644 testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHudi.java create mode 100644 testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hive.properties create mode 100644 testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hudi.properties create mode 100644 testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/spark-defaults.conf create mode 100644 testing/trino-product-tests/src/main/java/io/trino/tests/product/hudi/TestHudiCompatibility.java diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHudi.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHudi.java new file mode 100644 index 000000000000..0e549521e17e --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHudi.java @@ -0,0 +1,146 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.launcher.env.environment; + +import com.google.common.collect.ImmutableList; +import io.trino.tests.product.launcher.docker.DockerFiles; +import io.trino.tests.product.launcher.env.DockerContainer; +import io.trino.tests.product.launcher.env.Environment; +import io.trino.tests.product.launcher.env.EnvironmentConfig; +import io.trino.tests.product.launcher.env.EnvironmentProvider; +import io.trino.tests.product.launcher.env.common.Hadoop; +import io.trino.tests.product.launcher.env.common.Minio; +import io.trino.tests.product.launcher.env.common.Standard; +import io.trino.tests.product.launcher.env.common.TestsEnvironment; +import io.trino.tests.product.launcher.testcontainers.PortBinder; + +import javax.inject.Inject; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Set; + +import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts; +import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP; +import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS; +import static io.trino.tests.product.launcher.env.common.Minio.MINIO_CONTAINER_NAME; +import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TEMPTO_PROFILE_CONFIG; +import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TRINO_ETC; +import static java.util.Objects.requireNonNull; +import static org.testcontainers.utility.MountableFile.forHostPath; + +/** + * EnvSinglenodeHudi test environment consists of: + * - Hive (used for metastore) (HDP 3.1) + * - Spark with Hudi + * - MinIO S3-compatible storage to store table data + */ +@TestsEnvironment +public class EnvSinglenodeHudi + extends EnvironmentProvider +{ + private static final int SPARK_THRIFT_PORT = 10213; + + private static final String SPARK_CONTAINER_NAME = "spark"; + + private static final String DEFAULT_S3_BUCKET_NAME = "trino-ci-test"; + + private final DockerFiles dockerFiles; + private final PortBinder portBinder; + private final String hadoopImagesVersion; + private final DockerFiles.ResourceProvider configDir; + + @Inject + public EnvSinglenodeHudi( + Standard standard, + Hadoop hadoop, + DockerFiles dockerFiles, + EnvironmentConfig config, + PortBinder portBinder, + Minio minio) + { + super(ImmutableList.of(standard, hadoop, minio)); + this.dockerFiles = requireNonNull(dockerFiles, "dockerFiles is null"); + this.portBinder = requireNonNull(portBinder, "portBinder is null"); + this.hadoopImagesVersion = config.getHadoopImagesVersion(); + this.configDir = dockerFiles.getDockerFilesHostDirectory("conf/environment/singlenode-hudi"); + } + + @Override + public void extendEnvironment(Environment.Builder builder) + { + String s3Bucket = getS3Bucket(); + + // Using hdp3.1 so we are using Hive metastore with version close to versions of hive-*.jars Spark uses + builder.configureContainer(HADOOP, container -> { + container.setDockerImageName("ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion); + }); + + builder.addConnector("hive", forHostPath(configDir.getPath("hive.properties"))); + builder.addConnector( + "hudi", + forHostPath(configDir.getPath("hudi.properties")), + CONTAINER_TRINO_ETC + "/catalog/hudi.properties"); + + builder.configureContainer(TESTS, dockerContainer -> { + dockerContainer.withEnv("S3_BUCKET", s3Bucket) + .withCopyFileToContainer( + forHostPath(dockerFiles.getDockerFilesHostPath("conf/tempto/tempto-configuration-for-hive3.yaml")), + CONTAINER_TEMPTO_PROFILE_CONFIG); + }); + + builder.addContainer(createSparkContainer()) + // Ensure Hive metastore is up; Spark needs to access it during startup + .containerDependsOn(SPARK_CONTAINER_NAME, HADOOP); + + // Initialize buckets in Minio + FileAttribute> posixFilePermissions = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")); + Path minioBucketDirectory; + try { + minioBucketDirectory = Files.createTempDirectory("trino-ci-test", posixFilePermissions); + minioBucketDirectory.toFile().deleteOnExit(); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + builder.configureContainer(MINIO_CONTAINER_NAME, container -> + container.withCopyFileToContainer(forHostPath(minioBucketDirectory), "/data/" + s3Bucket)); + } + + @SuppressWarnings("resource") + private DockerContainer createSparkContainer() + { + DockerContainer container = new DockerContainer("ghcr.io/trinodb/testing/spark3-hudi:" + hadoopImagesVersion, SPARK_CONTAINER_NAME) + .withCopyFileToContainer(forHostPath(configDir.getPath("spark-defaults.conf")), "/spark/conf/spark-defaults.conf") + .waitingFor(forSelectedPorts(SPARK_THRIFT_PORT)); + + portBinder.exposePort(container, SPARK_THRIFT_PORT); + + return container; + } + + private String getS3Bucket() + { + String s3Bucket = System.getenv("S3_BUCKET"); + if (s3Bucket == null) { + s3Bucket = DEFAULT_S3_BUCKET_NAME; + } + return s3Bucket; + } +} diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHudi.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHudi.java new file mode 100644 index 000000000000..e01c64cd11c5 --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteHudi.java @@ -0,0 +1,37 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.launcher.suite.suites; + +import com.google.common.collect.ImmutableList; +import io.trino.tests.product.launcher.env.EnvironmentConfig; +import io.trino.tests.product.launcher.env.environment.EnvSinglenodeHudi; +import io.trino.tests.product.launcher.suite.Suite; +import io.trino.tests.product.launcher.suite.SuiteTestRun; + +import java.util.List; + +import static io.trino.tests.product.launcher.suite.SuiteTestRun.testOnEnvironment; + +public class SuiteHudi + extends Suite +{ + @Override + public List getTestRuns(EnvironmentConfig config) + { + return ImmutableList.of( + testOnEnvironment(EnvSinglenodeHudi.class) + .withGroups("configured_features", "hudi") + .build()); + } +} diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hive.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hive.properties new file mode 100644 index 000000000000..d3659681a8a4 --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hive.properties @@ -0,0 +1,9 @@ +connector.name=hive +hive.metastore.uri=thrift://hadoop-master:9083 +hive.non-managed-table-writes-enabled=true +hive.s3.aws-access-key=minio-access-key +hive.s3.aws-secret-key=minio-secret-key +hive.s3.endpoint=http://minio:9080/ +hive.s3.path-style-access=true +hive.s3.ssl.enabled=false +hive.security=allow-all diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hudi.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hudi.properties new file mode 100644 index 000000000000..20ee1e582e64 --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/hudi.properties @@ -0,0 +1,7 @@ +connector.name=hudi +hive.metastore.uri=thrift://hadoop-master:9083 +hive.s3.aws-access-key=minio-access-key +hive.s3.aws-secret-key=minio-secret-key +hive.s3.endpoint=http://minio:9080/ +hive.s3.path-style-access=true +hive.s3.ssl.enabled=false diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/spark-defaults.conf b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/spark-defaults.conf new file mode 100644 index 000000000000..824ec3b2627e --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/singlenode-hudi/spark-defaults.conf @@ -0,0 +1,19 @@ +spark.sql.catalogImplementation=hive +spark.sql.warehouse.dir=hdfs://hadoop-master:9000/user/hive/warehouse +spark.sql.hive.thriftServer.singleSession=false + +spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension +spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog +spark.serializer=org.apache.spark.serializer.KryoSerializer + +spark.hadoop.fs.defaultFS=hdfs://hadoop-master:9000 +spark.hive.metastore.uris=thrift://hadoop-master:9083 +spark.hive.metastore.warehouse.dir=hdfs://hadoop-master:9000/user/hive/warehouse +spark.hive.metastore.schema.verification=false + +spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem +spark.hadoop.fs.s3n.impl=org.apache.hadoop.fs.s3a.S3AFileSystem +spark.hadoop.fs.s3a.endpoint=http://minio:9080 +spark.hadoop.fs.s3a.path.style.access=true +spark.hadoop.fs.s3a.access.key=minio-access-key +spark.hadoop.fs.s3a.secret.key=minio-secret-key diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java index 606ea898c4a8..b596a0908c5a 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/TestGroups.java @@ -81,6 +81,7 @@ public final class TestGroups public static final String DELTA_LAKE_DATABRICKS = "delta-lake-databricks"; public static final String DELTA_LAKE_EXCLUDE_73 = "delta-lake-exclude-73"; public static final String DELTA_LAKE_EXCLUDE_91 = "delta-lake-exclude-91"; + public static final String HUDI = "hudi"; public static final String PARQUET = "parquet"; private TestGroups() {} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hudi/TestHudiCompatibility.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hudi/TestHudiCompatibility.java new file mode 100644 index 000000000000..282956afbbbc --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hudi/TestHudiCompatibility.java @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.hudi; + +import com.google.common.collect.ImmutableList; +import io.trino.tempto.BeforeTestWithContext; +import io.trino.tempto.ProductTest; +import io.trino.tempto.assertions.QueryAssert; +import org.testng.annotations.Test; + +import java.util.List; + +import static io.trino.tempto.assertions.QueryAssert.Row.row; +import static io.trino.tempto.assertions.QueryAssert.assertThat; +import static io.trino.tests.product.TestGroups.HUDI; +import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; +import static io.trino.tests.product.hive.util.TemporaryHiveTable.randomTableSuffix; +import static io.trino.tests.product.utils.QueryExecutors.onHudi; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class TestHudiCompatibility + extends ProductTest +{ + protected String bucketName; + + @BeforeTestWithContext + public void setUp() + { + bucketName = requireNonNull(System.getenv("S3_BUCKET"), "Environment variable not set: S3_BUCKET"); + } + + @Test(groups = {HUDI, PROFILE_SPECIFIC_TESTS}) + public void testDemo() + { + String tableName = "test_hudi_demo_" + randomTableSuffix(); + String tableDirectory = "hudi-compatibility-test-" + tableName; + + onHudi().executeQuery(format("CREATE TABLE default.%s (uuid int, col string) USING hudi LOCATION 's3://%s/%s'", + tableName, + bucketName, + tableDirectory)); + + onHudi().executeQuery("insert into default." + tableName + " select 1, 'Trino'"); + onHudi().executeQuery("insert into default." + tableName + " select 2, 'rocks'"); + + List expectedRows = ImmutableList.of( + row(1, "Trino"), + row(2, "rocks")); + + try { + assertThat(onHudi().executeQuery("SELECT uuid, col FROM default." + tableName)) + .containsOnly(expectedRows); + assertThat(onTrino().executeQuery("SELECT uuid, col FROM hudi.default." + tableName)) + .containsOnly(expectedRows); + } + finally { + onHudi().executeQuery("DROP TABLE default." + tableName); + } + } +} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/utils/QueryExecutors.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/utils/QueryExecutors.java index 2ba0d7cc0a68..f8345887acd8 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/utils/QueryExecutors.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/utils/QueryExecutors.java @@ -155,5 +155,32 @@ public void close() }; } + public static QueryExecutor onHudi() + { + return new QueryExecutor() + { + private final QueryExecutor delegate = testContext().getDependency(QueryExecutor.class, "hudi"); + + @Override + public QueryResult executeQuery(String sql, QueryParam... params) + throws QueryExecutionException + { + return delegate.executeQuery(sql, params); + } + + @Override + public Connection getConnection() + { + return delegate.getConnection(); + } + + @Override + public void close() + { + delegate.close(); + } + }; + } + private QueryExecutors() {} } diff --git a/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml b/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml index 13743718a4e8..d23b38c784e0 100644 --- a/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml +++ b/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml @@ -174,6 +174,16 @@ databases: jdbc_user: hive jdbc_password: na + hudi: + jdbc_driver_class: org.apache.hive.jdbc.HiveDriver + schema: default + prepare_statement: + - USE ${databases.hudi.schema} + table_manager_type: jdbc + jdbc_url: jdbc:hive2://spark:10213 + jdbc_user: hive + jdbc_password: na + tests: hdfs: path: /tmp/product-test