Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ jobs:
if [ "${AWS_ACCESS_KEY_ID}" != "" ]; then
source plugin/trino-hive-hadoop2/conf/hive-tests-${{ matrix.config }}.sh &&
plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh
if [ matrix.config == 'config-hdp3' ]; then
# JsonSerde class needed for the S3 Select JSON tests is only available on hdp3.
plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh
fi
fi
- name: Run Hive Glue Tests
env:
Expand Down
58 changes: 58 additions & 0 deletions plugin/trino-hive-hadoop2/bin/run_hive_s3_select_json_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash

# Similar to run_hive_s3_tests.sh, but has only Amazon S3 Select JSON tests. This is in a separate file as the JsonSerDe
# class is only available in Hadoop 3.1 version, and so we would only test JSON pushdown against the 3.1 version.

set -euo pipefail -x

. "${BASH_SOURCE%/*}/common.sh"

abort_if_not_gib_impacted

check_vars S3_BUCKET S3_BUCKET_ENDPOINT \
AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY

cleanup_hadoop_docker_containers
start_hadoop_docker_containers

test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)-s3select-json"

# insert AWS credentials
deploy_core_site_xml core-site.xml.s3-template \
AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET_ENDPOINT

# create test tables
# can't use create_test_tables because the first table is created with different commands
table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_external_fs_json/"
exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}"
exec_in_hadoop_master_container /docker/files/hadoop-put.sh /docker/files/test_table.json{,.gz,.bz2} "${table_path}"
exec_in_hadoop_master_container sudo -Eu hive beeline -u jdbc:hive2://localhost:10000/default -n hive -e "
CREATE EXTERNAL TABLE trino_s3select_test_external_fs_json(col_1 bigint, col_2 bigint)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION '${table_path}'"

stop_unnecessary_hadoop_services

# restart hive-metastore to apply S3 changes in core-site.xml
docker exec "$(hadoop_master_container)" supervisorctl restart hive-metastore
retry check_hadoop

# run product tests
pushd "${PROJECT_ROOT}"
set +e
./mvnw ${MAVEN_TEST:--B} -pl :trino-hive-hadoop2 test -P test-hive-hadoop2-s3-select-json \
-DHADOOP_USER_NAME=hive \
-Dhive.hadoop2.metastoreHost=localhost \
-Dhive.hadoop2.metastorePort=9083 \
-Dhive.hadoop2.databaseName=default \
-Dhive.hadoop2.s3.awsAccessKey="${AWS_ACCESS_KEY_ID}" \
-Dhive.hadoop2.s3.awsSecretKey="${AWS_SECRET_ACCESS_KEY}" \
-Dhive.hadoop2.s3.writableBucket="${S3_BUCKET}" \
-Dhive.hadoop2.s3.testDirectory="${test_directory}"
EXIT_CODE=$?
set -e
popd

cleanup_hadoop_docker_containers

exit "${EXIT_CODE}"
11 changes: 0 additions & 11 deletions plugin/trino-hive-hadoop2/bin/run_hive_s3_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ check_vars S3_BUCKET S3_BUCKET_ENDPOINT \
AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY

cleanup_hadoop_docker_containers

# Use Hadoop version 3.1 for S3 tests as the JSON SerDe class is not available in lower versions.
export HADOOP_BASE_IMAGE="ghcr.io/trinodb/testing/hdp3.1-hive"
start_hadoop_docker_containers

test_directory="$(date '+%Y%m%d-%H%M%S')-$(uuidgen | sha1sum | cut -b 1-6)"
Expand Down Expand Up @@ -69,14 +66,6 @@ exec_in_hadoop_master_container /usr/bin/hive -e "
STORED AS TEXTFILE
LOCATION '${table_path}'"

table_path="s3a://${S3_BUCKET}/${test_directory}/trino_s3select_test_external_fs_json/"
exec_in_hadoop_master_container hadoop fs -mkdir -p "${table_path}"
exec_in_hadoop_master_container hadoop fs -put -f /docker/files/test_table.json{,.gz,.bz2} "${table_path}"
exec_in_hadoop_master_container /usr/bin/hive -e "
CREATE EXTERNAL TABLE trino_s3select_test_external_fs_json(col_1 bigint, col_2 bigint)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION '${table_path}'"

stop_unnecessary_hadoop_services

# restart hive-metastore to apply S3 changes in core-site.xml
Expand Down
47 changes: 47 additions & 0 deletions plugin/trino-hive-hadoop2/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,48 @@
</dependency>

<!-- used by tests but also needed transitively -->
<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-hadoop-toolkit</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-hdfs</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-plugin-toolkit</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.trino.hadoop</groupId>
<artifactId>hadoop-apache</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>concurrent</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>json</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>stats</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>com.qubole.rubix</groupId>
<artifactId>rubix-presto-shaded</artifactId>
Expand Down Expand Up @@ -146,6 +176,7 @@
<exclude>**/TestHiveAlluxioMetastore.java</exclude>
<exclude>**/TestHiveFileSystemS3.java</exclude>
<exclude>**/TestHiveFileSystemS3SelectPushdown.java</exclude>
<exclude>**/TestHiveFileSystemS3SelectJsonPushdown.java</exclude>
<exclude>**/TestHiveFileSystemWasb.java</exclude>
<exclude>**/TestHiveFileSystemAbfsAccessKey.java</exclude>
<exclude>**/TestHiveFileSystemAbfsOAuth.java</exclude>
Expand Down Expand Up @@ -190,6 +221,22 @@
</plugins>
</build>
</profile>
<profile>
<id>test-hive-hadoop2-s3-select-json</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>**/TestHiveFileSystemS3SelectJsonPushdown.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>test-hive-hadoop2-wasb</id>
<build>
Expand Down
Loading