Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/bin/redshift/delete-aws-redshift.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

set -uo pipefail

REDSHIFT_SCRIPTS_DIR="${BASH_SOURCE%/*}"

if [[ ! -f "${REDSHIFT_SCRIPTS_DIR}/.cluster-identifier" ]]; then
echo "Missing file ${REDSHIFT_SCRIPTS_DIR}/.cluster-identifier"
exit 0
fi

REDSHIFT_CLUSTER_IDENTIFIER=$(cat $REDSHIFT_SCRIPTS_DIR/.cluster-identifier)

echo "Deleting Amazon Redshift cluster $REDSHIFT_CLUSTER_IDENTIFIER"
REDSHIFT_DELETE_CLUSTER_OUTPUT=$(aws redshift delete-cluster --cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER --skip-final-cluster-snapshot)

if [ -z "${REDSHIFT_DELETE_CLUSTER_OUTPUT}" ]; then
echo ${REDSHIFT_DELETE_CLUSTER_OUTPUT}
# Don't fail the build because of cleanup issues
exit 0
fi

echo "Waiting for the Amazon Redshift cluster $REDSHIFT_CLUSTER_IDENTIFIER to be deleted"
aws redshift wait cluster-deleted \
--cluster-identifier $REDSHIFT_CLUSTER_IDENTIFIER
if [ "$?" -ne 0 ]
then
echo "Amazon Redshift cluster $REDSHIFT_CLUSTER_IDENTIFIER deletion has timed out"
else
echo "Amazon Redshift cluster $REDSHIFT_CLUSTER_IDENTIFIER has been deleted"
fi

rm -f ${REDSHIFT_SCRIPTS_DIR}/.cluster-identifier
exit 0
54 changes: 54 additions & 0 deletions .github/bin/redshift/setup-aws-redshift.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bash

set -euo pipefail

REDSHIFT_SCRIPTS_DIR="${BASH_SOURCE%/*}"

# Redshift requires passwords containing at least a digit, a lower case letter and a upper case letter.
# Having no warranty that openssl will output a string following the above mentioned password policy,
# add explicitly the string 'Red1!' to the password
REDSHIFT_PASSWORD="$(openssl rand -base64 16 | tr -dc 'a-zA-Z0-9')Red1!"

REDSHIFT_CLUSTER_IDENTIFIER=trino-redshift-ci-cluster-$(openssl rand -hex 8)

REDSHIFT_CLUSTER_TTL=$(date -u -d "+2 hours" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || date -u -v "+2H" +"%Y-%m-%dT%H:%M:%SZ")

echo "Creating the Amazon Redshift cluster ${REDSHIFT_CLUSTER_IDENTIFIER} on the region ${AWS_REGION}."
REDSHIFT_CREATE_CLUSTER_OUTPUT=$(aws redshift create-cluster \
--db-name testdb \
--region ${AWS_REGION} \
--node-type dc2.large \
--number-of-nodes 1 \
--master-username admin \
--master-user-password ${REDSHIFT_PASSWORD} \
--cluster-identifier ${REDSHIFT_CLUSTER_IDENTIFIER} \
--cluster-subnet-group-name ${REDSHIFT_SUBNET_GROUP_NAME} \
--cluster-type single-node\
--vpc-security-group-ids "${REDSHIFT_VPC_SECURITY_GROUP_IDS}" \
--iam-roles ${REDSHIFT_IAM_ROLES} \
--automated-snapshot-retention-period 0 \
--publicly-accessible \
--tags Key=cloud,Value=aws Key=environment,Value=test Key=project,Value=trino-redshift Key=ttl,Value=${REDSHIFT_CLUSTER_TTL})

if [ -z "${REDSHIFT_CREATE_CLUSTER_OUTPUT}" ]; then
# Only show errors
echo ${REDSHIFT_CREATE_CLUSTER_OUTPUT}
exit 1
fi

echo ${REDSHIFT_CLUSTER_IDENTIFIER} > ${REDSHIFT_SCRIPTS_DIR}/.cluster-identifier
echo "Waiting for the Amazon Redshift cluster ${REDSHIFT_CLUSTER_IDENTIFIER} on the region ${AWS_REGION} to be available."

# Wait for the cluster to become available
aws redshift wait cluster-available \
--cluster-identifier ${REDSHIFT_CLUSTER_IDENTIFIER}

echo "The Amazon Redshift cluster ${REDSHIFT_CLUSTER_IDENTIFIER} on the region ${AWS_REGION} is available for queries."

REDSHIFT_CLUSTER_DESCRIPTION=$(aws redshift describe-clusters --cluster-identifier ${REDSHIFT_CLUSTER_IDENTIFIER})

export REDSHIFT_ENDPOINT=$(echo ${REDSHIFT_CLUSTER_DESCRIPTION} | jq -r '.Clusters[0].Endpoint.Address' )
export REDSHIFT_PORT=$(echo ${REDSHIFT_CLUSTER_DESCRIPTION} | jq -r '.Clusters[0].Endpoint.Port' )
export REDSHIFT_CLUSTER_DATABASE_NAME=$(echo ${REDSHIFT_CLUSTER_DESCRIPTION} | jq -r '.Clusters[0].DBName' )
export REDSHIFT_USER=$(echo ${REDSHIFT_CLUSTER_DESCRIPTION} | jq -r '.Clusters[0].MasterUsername' )
export REDSHIFT_PASSWORD
34 changes: 34 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ jobs:
- { modules: plugin/trino-postgresql }
- { modules: plugin/trino-raptor-legacy }
- { modules: plugin/trino-redis }
- { modules: plugin/trino-redshift, profile: cloud-tests }
- { modules: plugin/trino-singlestore }
- { modules: plugin/trino-sqlserver }
- { modules: testing/trino-faulttolerant-tests, profile: default }
Expand Down Expand Up @@ -601,6 +602,7 @@ jobs:
&& ! (contains(matrix.modules, 'trino-delta-lake') && contains(matrix.profile, 'gcs-tests'))
&& ! (contains(matrix.modules, 'trino-iceberg') && contains(matrix.profile, 'cloud-tests'))
&& ! (contains(matrix.modules, 'trino-bigquery') && contains(matrix.profile, 'cloud-tests-arrow'))
&& ! (contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'cloud-tests'))
run: $MAVEN test ${MAVEN_TEST} -pl ${{ matrix.modules }} ${{ matrix.profile != '' && format('-P {0}', matrix.profile) || '' }}
# Additional tests for selected modules
- name: Cloud Delta Lake Tests
Expand Down Expand Up @@ -682,6 +684,38 @@ jobs:
-Dhive.hadoop2.azure-abfs-container="${ABFS_CONTAINER}" \
-Dhive.hadoop2.azure-abfs-account="${ABFS_ACCOUNT}" \
-Dhive.hadoop2.azure-abfs-access-key="${ABFS_ACCESS_KEY}"
- name: Cloud Redshift Tests
env:
AWS_REGION: ${{ vars.REDSHIFT_AWS_REGION }}
AWS_ACCESS_KEY_ID: ${{ secrets.REDSHIFT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.REDSHIFT_AWS_SECRET_ACCESS_KEY }}
REDSHIFT_SUBNET_GROUP_NAME: ${{ vars.REDSHIFT_SUBNET_GROUP_NAME }}
REDSHIFT_IAM_ROLES: ${{ vars.REDSHIFT_IAM_ROLES }}
REDSHIFT_VPC_SECURITY_GROUP_IDS: ${{ vars.REDSHIFT_VPC_SECURITY_GROUP_IDS }}
REDSHIFT_S3_TPCH_TABLES_ROOT: ${{ vars.REDSHIFT_S3_TPCH_TABLES_ROOT }}
if: >-
contains(matrix.modules, 'trino-redshift') && contains(matrix.profile, 'cloud-tests') &&
(env.AWS_ACCESS_KEY_ID != '' || env.REDSHIFT_SUBNET_GROUP_NAME != '')
run: |
source .github/bin/redshift/setup-aws-redshift.sh

$MAVEN test ${MAVEN_TEST} -pl :trino-redshift ${{ format('-P {0}', matrix.profile) }} \
-Dtest.redshift.jdbc.user="${REDSHIFT_USER}" \
-Dtest.redshift.jdbc.password="${REDSHIFT_PASSWORD}" \
-Dtest.redshift.jdbc.endpoint="${REDSHIFT_ENDPOINT}:${REDSHIFT_PORT}/" \
-Dtest.redshift.s3.tpch.tables.root="${REDSHIFT_S3_TPCH_TABLES_ROOT}" \
-Dtest.redshift.iam.role="${REDSHIFT_IAM_ROLES}" \
-Dtest.redshift.aws.region="${AWS_REGION}" \
-Dtest.redshift.aws.access-key="${AWS_ACCESS_KEY_ID}" \
-Dtest.redshift.aws.secret-key="${AWS_SECRET_ACCESS_KEY}"
- name: Cleanup ephemeral Redshift Cluster
env:
AWS_REGION: ${{ vars.REDSHIFT_AWS_REGION }}
AWS_ACCESS_KEY_ID: ${{ secrets.REDSHIFT_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.REDSHIFT_AWS_SECRET_ACCESS_KEY }}
# Cancelled workflows may have left the ephemeral cluster running
if: always()
run: .github/bin/redshift/delete-aws-redshift.sh
- name: Sanitize artifact name
if: always()
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ product-test-reports
/impacted-features.log
.github/test-matrix.yaml
.github/test-pt-matrix.yaml
/bin/redshift/.cluster-identifier
110 changes: 109 additions & 1 deletion plugin/trino-redshift/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ is a single dc2.large instance. Additionally, you will need a S3 bucket
containing TPCH tiny data in Parquet format. The files should be named:

```
s3://<your_bucket>/tpch/tiny/<table_name>.parquet
s3://<your_bucket>/tpch/tiny/<table_name>/*.parquet
```

To run the tests set the following system properties:
Expand All @@ -18,3 +18,111 @@ test.redshift.jdbc.password=<password>
test.redshift.s3.tpch.tables.root=<your_bucket>
test.redshift.iam.role=<your_iam_arm_to_access_bucket>
```

## Redshift Cluster CI Infrastructure setup

### AWS VPC setup
On _AWS VPC_ service create a VPC - `redshift-vpc`.
Key properties to configure on the VPC:

- `IPv4 CIDR`: `192.168.0.0/16`

Create for the `redshift-vpc` an Internet Gateway - `redshift-igw`.

Create a subnet for the VPC `redshift-public-subnet`.
In the route table of the subnet make sure to add the route
`Destination 0.0.0.0/0` to `Target` the previously created
internet gateway `redshift-igw`.

Create a Security Group `redshift-sg`.
Make the following adjustments in the security group to allow access to the
Redshift cluster from the general purpose Github CI runners:

- add an Inbound rule accepting `All traffic` from Source `0.0.0.0/0`
- add an Outbound rule for `All traffic` to destination `0.0.0.0/0`

### Amazon Redshift setup

Create a subnet group `cluster-subnet-group-trino-ci` associated with
the VPC `redshift-vpc` and the VPC subnet `redshift-public-subnet`.

### AWS IAM setup

Create the AWS IAM role `redshift-ci` and add to it
the `AmazonRedshiftAllCommandsFullAccess` policy.
This role will be passed to the ephemeral Redshift cluster to provide it with
the ability to execute `COPY` from AWS S3 bucket.

Ensure that the AWS IAM user used by the CI process does have the ability to
create ephemeral Amazon Redshift clusters:

```
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "PassRoleToRedshiftCluster",
"Effect": "Allow",
"Action": "iam:PassRole",
"Resource": "arn:aws:iam::894365193301:role/redshift-ci"
},
{
"Sid": "RedshiftClusterManagement",
"Effect": "Allow",
"Action": [
"redshift:DeleteTags",
"redshift:DeleteCluster",
"redshift:CreateTags",
"redshift:CreateCluster",
"redshift:DescribeClusters",
"redshift:DescribeLoggingStatus"
],
"Resource": "arn:aws:redshift:us-east-2:894365193301:cluster:trino-redshift-ci-cluster-*"
},
{
"Sid": "DescribeRedshiftVpcComponents",
"Effect": "Allow",
"Action": [
"ec2:DescribeInternetGateways",
"ec2:DescribeAddresses",
"ec2:DescribeAvailabilityZones",
"ec2:DescribeVpcs",
"ec2:DescribeAccountAttributes",
"ec2:DescribeSubnets",
"ec2:DescribeSecurityGroups"
],
"Resource": "*"
}
]
}
```

### AWS S3 setup

The `trino-redshift` tests rely on a Redshift cluster
having TPCH tables filled with data.
Create an AWS S3 bucket and add to it the parquet content
of `tpch` tables saved locally through the `trino-hive` connector
via commands like:

```
CREATE TABLE hive.tiny.table_name WITH (format= 'parquet') AS TABLE tpch.sf1.table_name
```

The content of the S3 bucket should look like this:

```
s3://<your_bucket>/tpch/tiny/<table_name>/*.parquet
```

where `table_name` is:

- `customer`
- `lineitem`
- `nation`
- `orders`
- `part`
- `partsupp`
- `region`
- `supplier`

24 changes: 24 additions & 0 deletions plugin/trino-redshift/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@
<configuration>
<excludes>
<exclude>**/TestRedshiftAutomaticJoinPushdown.java</exclude>
<exclude>**/TestRedshiftConnectorSmokeTest.java</exclude>
<exclude>**/TestRedshiftConnectorTest.java</exclude>
<exclude>**/TestRedshiftTableStatisticsReader.java</exclude>
<exclude>**/TestRedshiftTypeMapping.java</exclude>
Expand All @@ -195,5 +196,28 @@
</plugins>
</build>
</profile>

<profile>
<id>cloud-tests</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<!-- Run only the smoke tests of the connector on the CI environment due to unpredictable -->
<!-- locations of GitHub runners which can lead to increased client latency on the -->
<!-- JDBC operations performed on the ephemeral AWS Redshift cluster. -->
<include>**/TestRedshiftConnectorSmokeTest.java</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public static DistributedQueryRunner createRedshiftQueryRunner(
runner.installPlugin(new RedshiftPlugin());
runner.createCatalog(TEST_CATALOG, CONNECTOR_NAME, properties);

executeInRedshift("CREATE SCHEMA IF NOT EXISTS " + TEST_SCHEMA);
executeInRedshiftWithRetry("CREATE SCHEMA IF NOT EXISTS " + TEST_SCHEMA);
createUserIfNotExists(NON_GRANTED_USER, JDBC_PASSWORD);
createUserIfNotExists(GRANTED_USER, JDBC_PASSWORD);

Expand Down Expand Up @@ -197,7 +197,7 @@ private static synchronized void provisionTables(Session session, QueryRunner qu

private static void copyFromS3(QueryRunner queryRunner, Session session, String name)
{
String s3Path = format("%s/%s/%s.parquet", S3_TPCH_TABLES_ROOT, TPCH_CATALOG, name);
String s3Path = format("%s/%s/%s/%s/", S3_TPCH_TABLES_ROOT, TPCH_CATALOG, TINY_SCHEMA_NAME, name);
log.info("Creating table %s in Redshift copying from %s", name, s3Path);

// Create table in ephemeral Redshift cluster with no data
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.redshift;

import com.google.common.collect.ImmutableMap;
import io.trino.plugin.jdbc.BaseJdbcConnectorSmokeTest;
import io.trino.testing.QueryRunner;
import io.trino.testing.TestingConnectorBehavior;

import static io.trino.plugin.redshift.RedshiftQueryRunner.createRedshiftQueryRunner;
import static io.trino.testing.TestingConnectorBehavior.SUPPORTS_RENAME_TABLE_ACROSS_SCHEMAS;

public class TestRedshiftConnectorSmokeTest
extends BaseJdbcConnectorSmokeTest
{
@Override
@SuppressWarnings("DuplicateBranchesInSwitch") // options here are grouped per-feature
protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior)
{
switch (connectorBehavior) {
case SUPPORTS_RENAME_TABLE_ACROSS_SCHEMAS:
return false;

default:
return super.hasBehavior(connectorBehavior);
}
}

@Override
protected QueryRunner createQueryRunner()
throws Exception
{
return createRedshiftQueryRunner(
ImmutableMap.of(),
ImmutableMap.of(),
REQUIRED_TPCH_TABLES);
}
}