Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,9 @@ jobs:
# this suite is not meant to be run with different configs
- config: default
suite: suite-iceberg
# this suite is not meant to be run with different configs
- config: default
suite: suite-hudi
EOF
- name: Build PT matrix (all)
if: |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
Comment thread
codope marked this conversation as resolved.
Outdated
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import com.google.common.collect.ImmutableList;
import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.DockerContainer;
import io.trino.tests.product.launcher.env.Environment;
import io.trino.tests.product.launcher.env.EnvironmentConfig;
import io.trino.tests.product.launcher.env.EnvironmentProvider;
import io.trino.tests.product.launcher.env.common.Hadoop;
import io.trino.tests.product.launcher.env.common.Minio;
import io.trino.tests.product.launcher.env.common.Standard;
import io.trino.tests.product.launcher.env.common.TestsEnvironment;
import io.trino.tests.product.launcher.testcontainers.PortBinder;

import javax.inject.Inject;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.attribute.PosixFilePermissions;
import java.util.Set;

import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
import static io.trino.tests.product.launcher.env.common.Minio.MINIO_CONTAINER_NAME;
import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TRINO_ETC;
import static java.util.Objects.requireNonNull;
import static org.testcontainers.utility.MountableFile.forHostPath;

/**
* EnvSinglenodeHudi test environment consists of:
* - Hive (used for metastore) (HDP 3.1)
* - Spark with Hudi
* - MinIO S3-compatible storage to store table data
*/
@TestsEnvironment
public class EnvSinglenodeHudi
extends EnvironmentProvider
{
private static final int SPARK_THRIFT_PORT = 10213;

private static final String SPARK_CONTAINER_NAME = "spark";
private static final String S3_BUCKET_NAME = "trino-ci-test";

private final PortBinder portBinder;
private final String hadoopImagesVersion;
private final DockerFiles.ResourceProvider configDir;

@Inject
public EnvSinglenodeHudi(
Standard standard,
Hadoop hadoop,
DockerFiles dockerFiles,
EnvironmentConfig config,
PortBinder portBinder,
Minio minio)
{
super(ImmutableList.of(standard, hadoop, minio));
this.portBinder = requireNonNull(portBinder, "portBinder is null");
this.hadoopImagesVersion = config.getHadoopImagesVersion();
this.configDir = dockerFiles.getDockerFilesHostDirectory("conf/environment/singlenode-hudi");
}

@Override
public void extendEnvironment(Environment.Builder builder)
{
// Using hdp3.1 so we are using Hive metastore with version close to versions of hive-*.jars Spark uses
builder.configureContainer(HADOOP, container -> container.setDockerImageName("ghcr.io/trinodb/testing/hdp3.1-hive:" + hadoopImagesVersion));

builder.addConnector(
"hudi",
forHostPath(configDir.getPath("hudi.properties")),
CONTAINER_TRINO_ETC + "/catalog/hudi.properties");

builder.configureContainer(TESTS, dockerContainer -> dockerContainer.withEnv("S3_BUCKET", S3_BUCKET_NAME));

builder.addContainer(createSparkContainer())
Comment thread
codope marked this conversation as resolved.
Outdated
// Ensure Hive metastore is up; Spark needs to access it during startup
.containerDependsOn(SPARK_CONTAINER_NAME, HADOOP);

// Initialize buckets in Minio
FileAttribute<Set<PosixFilePermission>> posixFilePermissions = PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"));
Path minioBucketDirectory;
try {
minioBucketDirectory = Files.createTempDirectory("trino-ci-test", posixFilePermissions);
minioBucketDirectory.toFile().deleteOnExit();
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
builder.configureContainer(MINIO_CONTAINER_NAME, container ->
container.withCopyFileToContainer(forHostPath(minioBucketDirectory), "/data/" + S3_BUCKET_NAME));
}

@SuppressWarnings("resource")
private DockerContainer createSparkContainer()
{
DockerContainer container = new DockerContainer("ghcr.io/trinodb/testing/spark3-hudi:" + hadoopImagesVersion, SPARK_CONTAINER_NAME)
.withCopyFileToContainer(forHostPath(configDir.getPath("spark-defaults.conf")), "/spark/conf/spark-defaults.conf")
.withCopyFileToContainer(forHostPath(configDir.getPath("log4j.properties")), "/spark/conf/log4j.properties")
.waitingFor(forSelectedPorts(SPARK_THRIFT_PORT));

portBinder.exposePort(container, SPARK_THRIFT_PORT);

return container;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.suite.suites;

import com.google.common.collect.ImmutableList;
import io.trino.tests.product.launcher.env.EnvironmentConfig;
import io.trino.tests.product.launcher.env.environment.EnvSinglenodeHudi;
import io.trino.tests.product.launcher.suite.Suite;
import io.trino.tests.product.launcher.suite.SuiteTestRun;

import java.util.List;

import static io.trino.tests.product.launcher.suite.SuiteTestRun.testOnEnvironment;

public class SuiteHudi
Comment thread
codope marked this conversation as resolved.
Outdated
extends Suite
{
@Override
public List<SuiteTestRun> getTestRuns(EnvironmentConfig config)
{
return ImmutableList.of(
testOnEnvironment(EnvSinglenodeHudi.class)
.withGroups("configured_features", "hudi")
.build());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
connector.name=hudi
hive.metastore.uri=thrift://hadoop-master:9083
hive.s3.aws-access-key=minio-access-key
hive.s3.aws-secret-key=minio-secret-key
hive.s3.endpoint=http://minio:9080/
hive.s3.path-style-access=true
hive.s3.ssl.enabled=false
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
# For deploying Spark ThriftServer
# SPARK-34128?Suppress undesirable TTransportException warnings involved in THRIFT-4805
log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter
log4j.appender.console.filter.1.StringToMatch=Thrift error occurred during processing of message
log4j.appender.console.filter.1.AcceptOnMatch=false
Comment thread
codope marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
spark.sql.catalogImplementation=hive
Comment thread
ebyhr marked this conversation as resolved.
Outdated
spark.sql.warehouse.dir=hdfs://hadoop-master:9000/user/hive/warehouse
spark.sql.hive.thriftServer.singleSession=false

spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension
spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog
spark.serializer=org.apache.spark.serializer.KryoSerializer

spark.hadoop.fs.defaultFS=hdfs://hadoop-master:9000
spark.hive.metastore.uris=thrift://hadoop-master:9083
spark.hive.metastore.warehouse.dir=hdfs://hadoop-master:9000/user/hive/warehouse
spark.hive.metastore.schema.verification=false

spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
spark.hadoop.fs.s3n.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
spark.hadoop.fs.s3a.endpoint=http://minio:9080
spark.hadoop.fs.s3a.path.style.access=true
spark.hadoop.fs.s3a.access.key=minio-access-key
spark.hadoop.fs.s3a.secret.key=minio-secret-key
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public final class TestGroups
public static final String DELTA_LAKE_EXCLUDE_73 = "delta-lake-exclude-73";
public static final String DELTA_LAKE_EXCLUDE_91 = "delta-lake-exclude-91";
public static final String DELTA_LAKE_EXCLUDE_113 = "delta-lake-exclude-113";
public static final String HUDI = "hudi";
public static final String PARQUET = "parquet";

private TestGroups() {}
Expand Down
Loading