diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b01d29f65306..c63e0bb139b5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -476,7 +476,6 @@ jobs: - { modules: lib/trino-filesystem-azure, profile: cloud-tests } - { modules: lib/trino-filesystem-gcs, profile: cloud-tests } - { modules: lib/trino-filesystem-s3, profile: cloud-tests } - - { modules: lib/trino-hdfs, profile: cloud-tests } - { modules: plugin/trino-bigquery } - { modules: plugin/trino-bigquery, profile: cloud-tests-2 } - { modules: plugin/trino-cassandra } @@ -564,7 +563,6 @@ jobs: && ! (contains(matrix.modules, 'trino-filesystem-azure') && contains(matrix.profile, 'cloud-tests')) && ! (contains(matrix.modules, 'trino-filesystem-gcs') && contains(matrix.profile, 'cloud-tests')) && ! (contains(matrix.modules, 'trino-filesystem-s3') && contains(matrix.profile, 'cloud-tests')) - && ! (contains(matrix.modules, 'trino-hdfs') && contains(matrix.profile, 'cloud-tests')) run: $MAVEN test ${MAVEN_TEST} -pl ${{ matrix.modules }} ${{ matrix.profile != '' && format('-P {0}', matrix.profile) || '' }} # Additional tests for selected modules - name: HDFS file system cache isolated JVM tests @@ -572,19 +570,6 @@ jobs: if: contains(matrix.modules, 'trino-hdfs') run: | $MAVEN test ${MAVEN_TEST} -pl :trino-hdfs -P test-isolated-jvm-suites - - name: Hadoop FileSystem Cloud Tests - id: tests-hdfs - env: - AWS_ACCESS_KEY_ID: ${{ vars.TRINO_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.TRINO_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ vars.TRINO_AWS_REGION }} - S3_BUCKET: ${{ vars.TRINO_S3_BUCKET }} - S3_BUCKET_ENDPOINT: "s3.${{ vars.TRINO_AWS_REGION }}.amazonaws.com" - if: >- - contains(matrix.modules, 'trino-hdfs') && contains(matrix.profile, 'cloud-tests') && - (env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || env.AWS_ACCESS_KEY_ID != '' || env.AWS_SECRET_ACCESS_KEY != '') - run: | - $MAVEN test ${MAVEN_TEST} -pl :trino-hdfs -P cloud-tests - name: S3 FileSystem Cloud Tests id: tests-s3 env: @@ -802,7 +787,6 @@ jobs: has-failed-tests: >- ${{ steps.tests.outcome == 'failure' || steps.tests-hdfs-isolated.outcome == 'failure' - || steps.tests-hdfs.outcome == 'failure' || steps.tests-s3.outcome == 'failure' || steps.tests-azure.outcome == 'failure' || steps.tests-gcs.outcome == 'failure' diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index 481ed9f55d66..efaee7acfe08 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -67,8 +67,9 @@ The connector supports accessing the following file systems: * [](/object-storage/file-system-s3) * [](/object-storage/file-system-hdfs) -You must enable and configure the specific file system access. [Legacy -support](file-system-legacy) is not recommended and will be removed. +Enable and configure the file system that your catalog uses. Use +`fs.hadoop.enabled` only for HDFS; see [legacy file system +support](file-system-legacy) for migration details. ### Delta Lake general configuration properties diff --git a/docs/src/main/sphinx/connector/hive.md b/docs/src/main/sphinx/connector/hive.md index fac97796f954..d86491f5692c 100644 --- a/docs/src/main/sphinx/connector/hive.md +++ b/docs/src/main/sphinx/connector/hive.md @@ -342,8 +342,9 @@ The connector supports accessing the following file systems: * [](/object-storage/file-system-s3) * [](/object-storage/file-system-hdfs) -You must enable and configure the specific file system access. [Legacy -support](file-system-legacy) is not recommended and will be removed. +Enable and configure the file system that your catalog uses. Use +`fs.hadoop.enabled` only for HDFS; see [legacy file system +support](file-system-legacy) for migration details. (hive-fte-support)= ### Fault-tolerant execution support diff --git a/docs/src/main/sphinx/connector/hudi.md b/docs/src/main/sphinx/connector/hudi.md index bcffc01b5063..598e547f874d 100644 --- a/docs/src/main/sphinx/connector/hudi.md +++ b/docs/src/main/sphinx/connector/hudi.md @@ -114,8 +114,9 @@ The connector supports accessing the following file systems: * [](/object-storage/file-system-s3) * [](/object-storage/file-system-hdfs) -You must enable and configure the specific file system access. [Legacy -support](file-system-legacy) is not recommended and will be removed. +Enable and configure the file system that your catalog uses. Use +`fs.hadoop.enabled` only for HDFS; see [legacy file system +support](file-system-legacy) for migration details. ## SQL support diff --git a/docs/src/main/sphinx/connector/iceberg.md b/docs/src/main/sphinx/connector/iceberg.md index def1c4b8ed43..9bb3cd7e88ad 100644 --- a/docs/src/main/sphinx/connector/iceberg.md +++ b/docs/src/main/sphinx/connector/iceberg.md @@ -278,8 +278,9 @@ The connector supports accessing the following file systems: * [](/object-storage/file-system-s3) * [](/object-storage/file-system-hdfs) -You must enable and configure the specific file system access. [Legacy -support](file-system-legacy) is not recommended and will be removed. +Enable and configure the file system that your catalog uses. Use +`fs.hadoop.enabled` only for HDFS; see [legacy file system +support](file-system-legacy) for migration details. ## Type mapping diff --git a/docs/src/main/sphinx/connector/lakehouse.md b/docs/src/main/sphinx/connector/lakehouse.md index 1c65e0f33c3f..1e31796347c3 100644 --- a/docs/src/main/sphinx/connector/lakehouse.md +++ b/docs/src/main/sphinx/connector/lakehouse.md @@ -65,7 +65,9 @@ The connector supports accessing the following file systems: * [](/object-storage/file-system-s3) * [](/object-storage/file-system-hdfs) -You must enable and configure the specific file system access. +Enable and configure the file system that your catalog uses. Use +`fs.hadoop.enabled` only for HDFS; see [legacy file system +support](file-system-legacy) for migration details. ## Examples diff --git a/docs/src/main/sphinx/object-storage.md b/docs/src/main/sphinx/object-storage.md index b48d914c76d3..0b37cbcadfd3 100644 --- a/docs/src/main/sphinx/object-storage.md +++ b/docs/src/main/sphinx/object-storage.md @@ -45,9 +45,8 @@ system support. - Activate the [native implementation for S3 storage support](/object-storage/file-system-s3). Defaults to `false`. * - `fs.hadoop.enabled` - - Activate [support for HDFS](/object-storage/file-system-hdfs) and [legacy - support for other file systems](file-system-legacy) using the HDFS - libraries. Defaults to `false`. + - Activate [support for HDFS](/object-storage/file-system-hdfs) using the + HDFS libraries. Defaults to `false`. ::: (file-system-native)= @@ -68,21 +67,19 @@ for use. (file-system-legacy)= ## Legacy file system support -The default behavior uses legacy libraries that originate from the Hadoop -ecosystem. It should only be used for accessing the Hadoop Distributed File +The HDFS libraries are used for accessing the Hadoop Distributed File System (HDFS): - [](/object-storage/file-system-hdfs) -All four connectors can use the deprecated `hive.*` properties for access to -other object storage system as *legacy* support. These properties will be -removed in a future release. Additional documentation is available with the Hive -connector and relevant migration guides pages: +Legacy object storage support through `fs.hadoop.enabled` and deprecated +`hive.*` properties is no longer available. Use the native implementations for +Azure Storage, Google Cloud Storage, and S3. If you are migrating older catalog +configurations, refer to the following guides: -- [](/connector/hive) - [Azure Storage migration from hive.azure.* properties](fs-legacy-azure-migration) - [Google Cloud Storage migration from hive.gcs.* properties](fs-legacy-gcs-migration) -- [S3 migration from hive.s3.* properties](fs-legacy-s3-migration) +- [S3 migration from hive.s3.* properties](fs-legacy-s3-migration) (object-storage-other)= ## Other object storage support diff --git a/docs/src/main/sphinx/object-storage/file-system-azure.md b/docs/src/main/sphinx/object-storage/file-system-azure.md index 8ca7ec1da334..72627fb4da86 100644 --- a/docs/src/main/sphinx/object-storage/file-system-azure.md +++ b/docs/src/main/sphinx/object-storage/file-system-azure.md @@ -148,17 +148,19 @@ storage accounts: (fs-legacy-azure-migration)= ## Migration from legacy Azure Storage file system -Trino includes legacy Azure Storage support to use with a catalog using the -Delta Lake, Hive, Hudi, or Iceberg connectors. Upgrading existing deployments to -the current native implementation is recommended. Legacy support is deprecated -and will be removed. +Previous Trino releases included a legacy Azure Storage file system +implementation used by catalogs configured with `fs.hadoop.enabled` and +`hive.azure.*` properties. That legacy support has been removed. Use the +native Azure file system implementation. To migrate a catalog to use the native file system implementation for Azure, make the following edits to your catalog configuration: 1. Add the `fs.native-azure.enabled=true` catalog configuration property. -2. Configure the `azure.auth-type` catalog configuration property. -3. Refer to the following table to rename your existing legacy catalog +2. If your catalog enabled `fs.hadoop.enabled` only for legacy Azure Storage + access, remove that property. +3. Configure the `azure.auth-type` catalog configuration property. +4. Refer to the following table to rename your existing legacy catalog configuration properties to the corresponding native configuration properties. Supported configuration values are identical unless otherwise noted. @@ -186,7 +188,7 @@ make the following edits to your catalog configuration: - ::: -4. Remove the following legacy configuration properties if they exist in your +5. Remove the following legacy configuration properties if they exist in your catalog configuration: * `hive.azure.abfs-storage-account` diff --git a/docs/src/main/sphinx/object-storage/file-system-gcs.md b/docs/src/main/sphinx/object-storage/file-system-gcs.md index 195ab2782afd..25ecda690d61 100644 --- a/docs/src/main/sphinx/object-storage/file-system-gcs.md +++ b/docs/src/main/sphinx/object-storage/file-system-gcs.md @@ -91,16 +91,18 @@ Cloud Storage: (fs-legacy-gcs-migration)= ## Migration from legacy Google Cloud Storage file system -Trino includes legacy Google Cloud Storage support to use with a catalog using -the Delta Lake, Hive, Hudi, or Iceberg connectors. Upgrading existing -deployments to the current native implementation is recommended. Legacy support -is deprecated and will be removed. +Previous Trino releases included a legacy Google Cloud Storage file system +implementation used by catalogs configured with `fs.hadoop.enabled` and +`hive.gcs.*` properties. That legacy support has been removed. Use the native +Google Cloud Storage file system implementation. To migrate a catalog to use the native file system implementation for Google Cloud Storage, make the following edits to your catalog configuration: 1. Add the `fs.native-gcs.enabled=true` catalog configuration property. -2. Refer to the following table to rename your existing legacy catalog +2. If your catalog enabled `fs.hadoop.enabled` only for legacy Google Cloud + Storage access, remove that property. +3. Refer to the following table to rename your existing legacy catalog configuration properties to the corresponding native configuration properties. Supported configuration values are identical unless otherwise noted. diff --git a/docs/src/main/sphinx/object-storage/file-system-s3.md b/docs/src/main/sphinx/object-storage/file-system-s3.md index 12054a722e72..d53824de822f 100644 --- a/docs/src/main/sphinx/object-storage/file-system-s3.md +++ b/docs/src/main/sphinx/object-storage/file-system-s3.md @@ -290,16 +290,18 @@ Example JSON configuration: (fs-legacy-s3-migration)= ## Migration from legacy S3 file system -Trino includes legacy Amazon S3 support to use with a catalog using the Delta -Lake, Hive, Hudi, or Iceberg connectors. Upgrading existing deployments to the -current native implementation is recommended. Legacy support is deprecated and -will be removed. +Previous Trino releases included a legacy Amazon S3 file system implementation +used by catalogs configured with `fs.hadoop.enabled` and `hive.s3.*` +properties. That legacy support has been removed. Use the native S3 file +system implementation. To migrate a catalog to use the native file system implementation for S3, make the following edits to your catalog configuration: 1. Add the `fs.native-s3.enabled=true` catalog configuration property. -2. Refer to the following table to rename your existing legacy catalog +2. If your catalog enabled `fs.hadoop.enabled` only for legacy S3 access, + remove that property. +3. Refer to the following table to rename your existing legacy catalog configuration properties to the corresponding native configuration properties. Supported configuration values are identical unless otherwise noted. @@ -395,7 +397,7 @@ the following edits to your catalog configuration: - ::: -1. Remove the following legacy configuration properties if they exist in your +4. Remove the following legacy configuration properties if they exist in your catalog configuration: * `hive.s3.storage-class` diff --git a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java index 411076795704..7ff36f3ddd9b 100644 --- a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java +++ b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java @@ -80,13 +80,7 @@ protected void setup(Binder binder) newOptionalBinder(binder, HdfsFileSystemLoader.class); if (config.isHadoopEnabled()) { - HdfsFileSystemLoader loader = new HdfsFileSystemLoader( - getProperties(), - !config.isNativeAzureEnabled(), - !config.isNativeGcsEnabled(), - !config.isNativeS3Enabled(), - catalogName, - context); + HdfsFileSystemLoader loader = new HdfsFileSystemLoader(getProperties(), catalogName, context); loader.configure().forEach((name, securitySensitive) -> consumeProperty(new ConfigPropertyMetadata(name, securitySensitive))); diff --git a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/HdfsFileSystemLoader.java b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/HdfsFileSystemLoader.java index 19b1c470eced..653f9ceaad84 100644 --- a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/HdfsFileSystemLoader.java +++ b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/HdfsFileSystemLoader.java @@ -39,13 +39,7 @@ final class HdfsFileSystemLoader private final HdfsClassLoader classLoader; private final Object manager; - public HdfsFileSystemLoader( - Map config, - boolean azureEnabled, - boolean gcsEnabled, - boolean s3Enabled, - String catalogName, - ConnectorContext context) + public HdfsFileSystemLoader(Map config, String catalogName, ConnectorContext context) { Class clazz = tryLoadExistingHdfsManager(); @@ -73,8 +67,8 @@ public HdfsFileSystemLoader( } try (var _ = new ThreadContextClassLoader(classLoader)) { - manager = clazz.getConstructor(Map.class, boolean.class, boolean.class, boolean.class, String.class, ConnectorContext.class) - .newInstance(config, azureEnabled, gcsEnabled, s3Enabled, catalogName, context); + manager = clazz.getConstructor(Map.class, String.class, ConnectorContext.class) + .newInstance(config, catalogName, context); } catch (ReflectiveOperationException e) { throw new RuntimeException(e); diff --git a/lib/trino-hdfs/pom.xml b/lib/trino-hdfs/pom.xml index 26114ba88dfb..cc765439644d 100644 --- a/lib/trino-hdfs/pom.xml +++ b/lib/trino-hdfs/pom.xml @@ -13,33 +13,12 @@ ${project.artifactId} Trino - Legacy HDFS file system support - - - com.amazonaws - aws-java-sdk-core - - - - com.amazonaws - aws-java-sdk-s3 - - - - com.amazonaws - aws-java-sdk-sts - - - - com.fasterxml.jackson.core - jackson-databind - - - - com.google.cloud.bigdataoss - gcs-connector - shaded - + + + true + + com.google.errorprone error_prone_annotations @@ -62,21 +41,11 @@ bootstrap - - io.airlift - concurrent - - io.airlift configuration - - io.airlift - http-client - - io.airlift log @@ -92,11 +61,6 @@ units - - io.opentelemetry.instrumentation - opentelemetry-aws-sdk-1.11 - - io.trino trino-filesystem @@ -137,30 +101,12 @@ jmxutils - - com.fasterxml.jackson.core - jackson-annotations - provided - - io.airlift slice provided - - io.opentelemetry - opentelemetry-api - provided - - - - io.opentelemetry - opentelemetry-context - provided - - io.trino trino-spi @@ -168,8 +114,8 @@ - com.adobe.testing - s3mock-testcontainers + io.airlift + concurrent test @@ -191,12 +137,6 @@ test - - io.trino - trino-client - test - - io.trino trino-filesystem @@ -295,16 +235,6 @@ org.apache.maven.plugins maven-enforcer-plugin - - - - - - com.amazonaws:*:* - - - - org.apache.maven.plugins @@ -312,7 +242,6 @@ **/TestFileSystemCache.java - **/TestTrinoS3FileSystemAwsS3.java @@ -339,21 +268,5 @@ - - cloud-tests - - - - org.apache.maven.plugins - maven-surefire-plugin - - - **/TestTrinoS3FileSystemAwsS3.java - - - - - - diff --git a/lib/trino-hdfs/src/main/java/com/google/cloud/hadoop/fs/gcs/TrinoGoogleHadoopFileSystemConfiguration.java b/lib/trino-hdfs/src/main/java/com/google/cloud/hadoop/fs/gcs/TrinoGoogleHadoopFileSystemConfiguration.java deleted file mode 100644 index b3026fa04488..000000000000 --- a/lib/trino-hdfs/src/main/java/com/google/cloud/hadoop/fs/gcs/TrinoGoogleHadoopFileSystemConfiguration.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.google.cloud.hadoop.fs.gcs; - -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; -import org.apache.hadoop.conf.Configuration; - -/** - * Wrapper class used to access package-private methods which - * convert {@link Configuration} to gcs hadoop-connectors specific - * configuration instances. - */ -public final class TrinoGoogleHadoopFileSystemConfiguration -{ - private TrinoGoogleHadoopFileSystemConfiguration() {} - - public static GoogleCloudStorageOptions.Builder getGcsOptionsBuilder(Configuration configuration) - { - return GoogleHadoopFileSystemConfiguration.getGcsOptionsBuilder(configuration); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HadoopPaths.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HadoopPaths.java index 28bc715c647e..8b2b8343696a 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HadoopPaths.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HadoopPaths.java @@ -13,40 +13,15 @@ */ package io.trino.filesystem.hdfs; -import com.google.common.base.VerifyException; import io.trino.filesystem.Location; import org.apache.hadoop.fs.Path; -import java.net.URI; -import java.net.URISyntaxException; - public final class HadoopPaths { private HadoopPaths() {} public static Path hadoopPath(Location location) { - // hack to preserve the original path for S3 if necessary - String path = location.toString(); - Path hadoopPath = new Path(path); - if ("s3".equals(hadoopPath.toUri().getScheme()) && !path.equals(hadoopPath.toString())) { - return new Path(toPathEncodedUri(location)); - } - return hadoopPath; - } - - private static URI toPathEncodedUri(Location location) - { - try { - return new URI( - location.scheme().orElse(null), - location.host().orElse(null), - "/" + location.path(), - null, - location.path()); - } - catch (URISyntaxException e) { - throw new VerifyException("Failed to convert location to URI: " + location, e); - } + return new Path(location.toString()); } } diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java index 0c0619ef0ddb..cf64f8e7c7ac 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystem.java @@ -13,7 +13,6 @@ */ package io.trino.filesystem.hdfs; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airlift.stats.TimeStat; import io.trino.filesystem.FileIterator; @@ -21,7 +20,6 @@ import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoInputFile; import io.trino.filesystem.TrinoOutputFile; -import io.trino.hdfs.FileSystemWithBatchDelete; import io.trino.hdfs.HdfsContext; import io.trino.hdfs.HdfsEnvironment; import io.trino.hdfs.TrinoHdfsFileSystemStats; @@ -59,13 +57,6 @@ class HdfsFileSystem implements TrinoFileSystem { - private static final Map KNOWN_HIERARCHICAL_FILESYSTEMS = ImmutableMap.builder() - .put("s3", false) - .put("s3a", false) - .put("s3n", false) - .put("hdfs", true) - .buildOrThrow(); - private final HdfsEnvironment environment; private final HdfsContext context; private final TrinoHdfsFileSystemStats stats; @@ -142,19 +133,14 @@ public void deleteFiles(Collection locations) for (Entry> directoryWithPaths : pathsGroupedByDirectory.entrySet()) { FileSystem rawFileSystem = getRawFileSystem(environment.getFileSystem(context, directoryWithPaths.getKey())); environment.doAs(context.getIdentity(), () -> { - if (rawFileSystem instanceof FileSystemWithBatchDelete fileSystemWithBatchDelete) { - fileSystemWithBatchDelete.deleteFiles(directoryWithPaths.getValue()); - } - else { - for (Path path : directoryWithPaths.getValue()) { - stats.getDeleteFileCalls().newCall(); - try (TimeStat.BlockTimer _ = stats.getDeleteFileCalls().time()) { - rawFileSystem.delete(path, false); - } - catch (IOException e) { - stats.getDeleteFileCalls().recordException(e); - throw e; - } + for (Path path : directoryWithPaths.getValue()) { + stats.getDeleteFileCalls().newCall(); + try (TimeStat.BlockTimer _ = stats.getDeleteFileCalls().time()) { + rawFileSystem.delete(path, false); + } + catch (IOException e) { + stats.getDeleteFileCalls().recordException(e); + throw e; } } return null; @@ -437,9 +423,8 @@ public Optional createTemporaryDirectory(Location targetLocation, Stri private boolean hierarchical(FileSystem fileSystem, Location rootLocation) { - Boolean knownResult = KNOWN_HIERARCHICAL_FILESYSTEMS.get(fileSystem.getScheme()); - if (knownResult != null) { - return knownResult; + if ("hdfs".equals(fileSystem.getScheme())) { + return true; } Boolean cachedResult = hierarchicalFileSystemCache.get(fileSystem); diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystemManager.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystemManager.java index 44fff919c843..c7a947e16402 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystemManager.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsFileSystemManager.java @@ -21,10 +21,6 @@ import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.hdfs.HdfsModule; import io.trino.hdfs.authentication.HdfsAuthenticationModule; -import io.trino.hdfs.azure.HiveAzureModule; -import io.trino.hdfs.cos.HiveCosModule; -import io.trino.hdfs.gcs.HiveGcsModule; -import io.trino.hdfs.s3.HiveS3Module; import io.trino.plugin.base.ConnectorContextModule; import io.trino.plugin.base.jmx.ConnectorObjectNameGeneratorModule; import io.trino.plugin.base.jmx.MBeanServerModule; @@ -45,9 +41,6 @@ public final class HdfsFileSystemManager public HdfsFileSystemManager( Map config, - boolean azureEnabled, - boolean gcsEnabled, - boolean s3Enabled, String catalogName, ConnectorContext context) { @@ -60,19 +53,8 @@ public HdfsFileSystemManager( modules.add(new HdfsFileSystemModule()); modules.add(new HdfsModule()); modules.add(new HdfsAuthenticationModule()); - modules.add(new HiveCosModule()); modules.add(new ConnectorContextModule(catalogName, context)); - if (azureEnabled) { - modules.add(new HiveAzureModule()); - } - if (gcsEnabled) { - modules.add(new HiveGcsModule()); - } - if (s3Enabled) { - modules.add(new HiveS3Module()); - } - bootstrap = new Bootstrap("io.trino.bootstrap.catalog." + catalogName, modules) .doNotInitializeLogging() .setRequiredConfigurationProperties(Map.of()) diff --git a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java index 37542f89e255..63496988dfad 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java +++ b/lib/trino-hdfs/src/main/java/io/trino/filesystem/hdfs/HdfsOutputFile.java @@ -13,16 +13,13 @@ */ package io.trino.filesystem.hdfs; -import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem; import io.airlift.stats.TimeStat; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoOutputFile; import io.trino.hdfs.CallStats; import io.trino.hdfs.HdfsContext; import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.MemoryAwareFileSystem; import io.trino.hdfs.authentication.HdfsAuthentication.ExceptionAction; -import io.trino.hdfs.gcs.GcsAtomicOutputStream; import io.trino.memory.context.AggregatedMemoryContext; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -34,8 +31,6 @@ import static io.trino.filesystem.hdfs.HadoopPaths.hadoopPath; import static io.trino.filesystem.hdfs.HdfsFileSystem.withCause; -import static io.trino.hdfs.FileSystemUtils.getRawFileSystem; -import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; import static java.util.Objects.requireNonNull; class HdfsOutputFile @@ -59,44 +54,27 @@ public HdfsOutputFile(Location location, HdfsEnvironment environment, HdfsContex public OutputStream create(AggregatedMemoryContext memoryContext) throws IOException { - return create(false, memoryContext); + requireNonNull(memoryContext, "memoryContext is null"); + // Hadoop output streams do not expose allocation details for memory tracking. + return create(false); } @Override public void createOrOverwrite(byte[] data) throws IOException { - try (OutputStream out = create(true, newSimpleAggregatedMemoryContext())) { + try (OutputStream out = create(true)) { out.write(data); } } - @Override - public void createExclusive(byte[] data) - throws IOException - { - Path file = hadoopPath(location); - FileSystem fileSystem = getRawFileSystem(environment.getFileSystem(context, file)); - if (fileSystem instanceof GoogleHadoopFileSystem) { - GcsAtomicOutputStream atomicOutputStream = new GcsAtomicOutputStream(environment, context, file); - atomicOutputStream.write(data); - atomicOutputStream.close(); - return; - } - throw new UnsupportedOperationException("createExclusive not supported for " + fileSystem); - } - - private OutputStream create(boolean overwrite, AggregatedMemoryContext memoryContext) + private OutputStream create(boolean overwrite) throws IOException { createFileCallStat.newCall(); Path file = hadoopPath(location); FileSystem fileSystem = environment.getFileSystem(context, file); - FileSystem rawFileSystem = getRawFileSystem(fileSystem); try (TimeStat.BlockTimer _ = createFileCallStat.time()) { - if (rawFileSystem instanceof MemoryAwareFileSystem memoryAwareFileSystem) { - return create(() -> memoryAwareFileSystem.create(file, memoryContext)); - } return create(() -> fileSystem.create(file, overwrite)); } catch (org.apache.hadoop.fs.FileAlreadyExistsException e) { diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/FileSystemWithBatchDelete.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/FileSystemWithBatchDelete.java deleted file mode 100644 index d3dad98afa6d..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/FileSystemWithBatchDelete.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs; - -import org.apache.hadoop.fs.Path; - -import java.io.IOException; -import java.util.Collection; - -public interface FileSystemWithBatchDelete -{ - void deleteFiles(Collection paths) - throws IOException; -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsEnvironment.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsEnvironment.java index 7a7c23c98970..7862f1194cff 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsEnvironment.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsEnvironment.java @@ -13,15 +13,11 @@ */ package io.trino.hdfs; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.services.storage.Storage; -import com.google.common.annotations.VisibleForTesting; import com.google.inject.Inject; import io.airlift.log.Logger; -import io.opentelemetry.api.OpenTelemetry; import io.trino.hadoop.HadoopNative; import io.trino.hdfs.authentication.HdfsAuthentication; import io.trino.hdfs.authentication.HdfsAuthentication.ExceptionAction; -import io.trino.hdfs.gcs.GcsStorageFactory; import io.trino.spi.Plugin; import io.trino.spi.classloader.ThreadContextClassLoader; import io.trino.spi.security.ConnectorIdentity; @@ -36,7 +32,6 @@ import java.lang.reflect.Field; import java.util.Optional; -import static io.trino.hdfs.FileSystemUtils.getRawFileSystem; import static java.util.Objects.requireNonNull; public class HdfsEnvironment @@ -48,33 +43,18 @@ public class HdfsEnvironment private static final Logger log = Logger.get(HdfsEnvironment.class); - private final OpenTelemetry openTelemetry; private final HdfsConfiguration hdfsConfiguration; private final HdfsAuthentication hdfsAuthentication; private final Optional newDirectoryPermissions; private final boolean verifyChecksum; - private final Optional gcsStorageFactory; - - @VisibleForTesting - public HdfsEnvironment(HdfsConfiguration hdfsConfiguration, HdfsConfig config, HdfsAuthentication hdfsAuthentication) - { - this(OpenTelemetry.noop(), hdfsConfiguration, config, hdfsAuthentication, Optional.empty()); - } @Inject - public HdfsEnvironment( - OpenTelemetry openTelemetry, - HdfsConfiguration hdfsConfiguration, - HdfsConfig config, - HdfsAuthentication hdfsAuthentication, - Optional gcsStorageFactory) + public HdfsEnvironment(HdfsConfiguration hdfsConfiguration, HdfsConfig config, HdfsAuthentication hdfsAuthentication) { - this.openTelemetry = requireNonNull(openTelemetry, "openTelemetry is null"); this.hdfsConfiguration = requireNonNull(hdfsConfiguration, "hdfsConfiguration is null"); this.verifyChecksum = config.isVerifyChecksum(); this.hdfsAuthentication = requireNonNull(hdfsAuthentication, "hdfsAuthentication is null"); this.newDirectoryPermissions = config.getNewDirectoryFsPermissions(); - this.gcsStorageFactory = requireNonNull(gcsStorageFactory, "gcsStorageFactory is null"); } @PreDestroy @@ -107,9 +87,6 @@ public FileSystem getFileSystem(ConnectorIdentity identity, Path path, Configura return hdfsAuthentication.doAs(identity, () -> { FileSystem fileSystem = path.getFileSystem(configuration); fileSystem.setVerifyChecksum(verifyChecksum); - if (getRawFileSystem(fileSystem) instanceof OpenTelemetryAwareFileSystem fs) { - fs.setOpenTelemetry(openTelemetry); - } return fileSystem; }); } @@ -128,13 +105,6 @@ public T doAs(ConnectorIdentity identity, ExceptionAction action) } } - public Storage createGcsStorage(HdfsContext context, Path path) - { - return gcsStorageFactory - .orElseThrow(() -> new IllegalStateException("GcsStorageFactory not set")) - .create(this, context, path); - } - private static void stopFileSystemStatsThread() { try { diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsModule.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsModule.java index 24f1cd876d21..0dd78944aa58 100644 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsModule.java +++ b/lib/trino-hdfs/src/main/java/io/trino/hdfs/HdfsModule.java @@ -16,10 +16,8 @@ import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Scopes; -import io.trino.hdfs.gcs.GcsStorageFactory; import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; import static io.airlift.configuration.ConfigBinder.configBinder; import static org.weakref.jmx.guice.ExportBinder.newExporter; @@ -40,7 +38,5 @@ public void configure(Binder binder) binder.bind(HdfsConfigurationInitializer.class).in(Scopes.SINGLETON); newSetBinder(binder, ConfigurationInitializer.class); newSetBinder(binder, DynamicConfigurationProvider.class); - - newOptionalBinder(binder, GcsStorageFactory.class); } } diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/MemoryAwareFileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/MemoryAwareFileSystem.java deleted file mode 100644 index c16830d058e6..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/MemoryAwareFileSystem.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs; - -import io.trino.memory.context.AggregatedMemoryContext; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; - -public interface MemoryAwareFileSystem -{ - FSDataOutputStream create(Path f, AggregatedMemoryContext memoryContext) - throws IOException; -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/OpenTelemetryAwareFileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/OpenTelemetryAwareFileSystem.java deleted file mode 100644 index 479b631d0bf5..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/OpenTelemetryAwareFileSystem.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs; - -import io.opentelemetry.api.OpenTelemetry; - -public interface OpenTelemetryAwareFileSystem -{ - void setOpenTelemetry(OpenTelemetry openTelemetry); -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureConfig.java deleted file mode 100644 index f85e6edc28e0..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureConfig.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.azure; - -import com.google.common.net.HostAndPort; -import io.airlift.configuration.Config; -import io.airlift.configuration.ConfigSecuritySensitive; - -import java.util.Optional; - -public class HiveAzureConfig -{ - private String wasbStorageAccount; - private String wasbAccessKey; - private String abfsStorageAccount; - private String abfsAccessKey; - private String adlClientId; - private String adlCredential; - private String adlRefreshUrl; - private HostAndPort adlProxyHost; - private String abfsOAuthClientEndpoint; - private String abfsOAuthClientId; - private String abfsOAuthClientSecret; - - @Deprecated(forRemoval = true, since = "470") - public Optional getWasbStorageAccount() - { - return Optional.ofNullable(wasbStorageAccount); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.wasb-storage-account") - public HiveAzureConfig setWasbStorageAccount(String wasbStorageAccount) - { - this.wasbStorageAccount = wasbStorageAccount; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getWasbAccessKey() - { - return Optional.ofNullable(wasbAccessKey); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.wasb-access-key") - public HiveAzureConfig setWasbAccessKey(String wasbAccessKey) - { - this.wasbAccessKey = wasbAccessKey; - return this; - } - - @Deprecated - public Optional getAbfsStorageAccount() - { - return Optional.ofNullable(abfsStorageAccount); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.abfs-storage-account") - public HiveAzureConfig setAbfsStorageAccount(String abfsStorageAccount) - { - this.abfsStorageAccount = abfsStorageAccount; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAbfsAccessKey() - { - return Optional.ofNullable(abfsAccessKey); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.abfs-access-key") - public HiveAzureConfig setAbfsAccessKey(String abfsAccessKey) - { - this.abfsAccessKey = abfsAccessKey; - return this; - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.adl-client-id") - public HiveAzureConfig setAdlClientId(String adlClientId) - { - this.adlClientId = adlClientId; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAdlClientId() - { - return Optional.ofNullable(adlClientId); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.adl-credential") - public HiveAzureConfig setAdlCredential(String adlCredential) - { - this.adlCredential = adlCredential; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAdlCredential() - { - return Optional.ofNullable(adlCredential); - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAdlRefreshUrl() - { - return Optional.ofNullable(adlRefreshUrl); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.adl-refresh-url") - public HiveAzureConfig setAdlRefreshUrl(String adlRefreshUrl) - { - this.adlRefreshUrl = adlRefreshUrl; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.adl-proxy-host") - public HiveAzureConfig setAdlProxyHost(HostAndPort adlProxyHost) - { - this.adlProxyHost = adlProxyHost; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAdlProxyHost() - { - return Optional.ofNullable(adlProxyHost); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.abfs.oauth.endpoint") - public HiveAzureConfig setAbfsOAuthClientEndpoint(String endpoint) - { - abfsOAuthClientEndpoint = endpoint; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAbfsOAuthClientEndpoint() - { - return Optional.ofNullable(abfsOAuthClientEndpoint); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.abfs.oauth.client-id") - public HiveAzureConfig setAbfsOAuthClientId(String id) - { - abfsOAuthClientId = id; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAbfsOAuthClientId() - { - return Optional.ofNullable(abfsOAuthClientId); - } - - @ConfigSecuritySensitive - @Deprecated(forRemoval = true, since = "470") - @Config("hive.azure.abfs.oauth.secret") - public HiveAzureConfig setAbfsOAuthClientSecret(String secret) - { - abfsOAuthClientSecret = secret; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getAbfsOAuthClientSecret() - { - return Optional.ofNullable(abfsOAuthClientSecret); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureModule.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureModule.java deleted file mode 100644 index c5f9ec7776d8..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/HiveAzureModule.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.azure; - -import com.google.inject.Binder; -import com.google.inject.Scopes; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.hdfs.ConfigurationInitializer; - -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; - -public class HiveAzureModule - extends AbstractConfigurationAwareModule -{ - @Override - protected void setup(Binder binder) - { - newSetBinder(binder, ConfigurationInitializer.class).addBinding() - .to(TrinoAzureConfigurationInitializer.class).in(Scopes.SINGLETON); - configBinder(binder).bindConfig(HiveAzureConfig.class); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/TrinoAzureConfigurationInitializer.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/TrinoAzureConfigurationInitializer.java deleted file mode 100644 index 72cd595a31fb..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/azure/TrinoAzureConfigurationInitializer.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.azure; - -import com.google.common.net.HostAndPort; -import com.google.inject.Inject; -import io.trino.hdfs.ConfigurationInitializer; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.adl.AdlFileSystem; -import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; - -import java.net.InetSocketAddress; -import java.net.Proxy; -import java.net.Proxy.Type; -import java.util.Optional; - -import static com.google.common.base.Preconditions.checkArgument; -import static java.lang.String.format; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; -import static org.apache.hadoop.fs.store.DataBlocks.DATA_BLOCKS_BUFFER_ARRAY; - -public class TrinoAzureConfigurationInitializer - implements ConfigurationInitializer -{ - private final Optional wasbAccessKey; - private final Optional wasbStorageAccount; - private final Optional adlClientId; - private final Optional adlCredential; - private final Optional adlRefreshUrl; - private final Optional abfsAccessKey; - private final Optional abfsStorageAccount; - private final Optional abfsOAuthClientEndpoint; - private final Optional abfsOAuthClientId; - private final Optional abfsOAuthClientSecret; - - @Inject - public TrinoAzureConfigurationInitializer(HiveAzureConfig config) - { - this.wasbAccessKey = dropEmpty(config.getWasbAccessKey()); - this.wasbStorageAccount = dropEmpty(config.getWasbStorageAccount()); - if (wasbAccessKey.isPresent() || wasbStorageAccount.isPresent()) { - checkArgument( - wasbAccessKey.isPresent() && wasbStorageAccount.isPresent(), - "If WASB storage account or access key is set, both must be set"); - } - - this.abfsAccessKey = dropEmpty(config.getAbfsAccessKey()); - this.abfsStorageAccount = dropEmpty(config.getAbfsStorageAccount()); - if (abfsAccessKey.isPresent() || abfsStorageAccount.isPresent()) { - checkArgument( - abfsStorageAccount.isPresent() && abfsAccessKey.isPresent(), - "If ABFS storage account or access key is set, both must be set"); - } - - this.adlClientId = dropEmpty(config.getAdlClientId()); - this.adlCredential = dropEmpty(config.getAdlCredential()); - this.adlRefreshUrl = dropEmpty(config.getAdlRefreshUrl()); - if (adlClientId.isPresent() || adlCredential.isPresent() || adlRefreshUrl.isPresent()) { - checkArgument( - adlClientId.isPresent() && adlCredential.isPresent() && adlRefreshUrl.isPresent(), - "If any of ADL client ID, credential, and refresh URL are set, all must be set"); - } - - this.abfsOAuthClientEndpoint = dropEmpty(config.getAbfsOAuthClientEndpoint()); - this.abfsOAuthClientId = dropEmpty(config.getAbfsOAuthClientId()); - this.abfsOAuthClientSecret = dropEmpty(config.getAbfsOAuthClientSecret()); - if (abfsOAuthClientEndpoint.isPresent() || abfsOAuthClientSecret.isPresent() || abfsOAuthClientId.isPresent()) { - checkArgument( - abfsOAuthClientEndpoint.isPresent() && abfsOAuthClientId.isPresent() && abfsOAuthClientSecret.isPresent(), - "If any of ABFS OAuth2 Client endpoint, ID, and secret are set, all must be set."); - } - - checkArgument( - !(abfsAccessKey.isPresent() && abfsOAuthClientSecret.isPresent()), - "Multiple ABFS authentication methods configured: access key and OAuth2"); - - config.getAdlProxyHost().ifPresent(proxyHost -> setConnectionProxy(proxyForHost(proxyHost))); - } - - @Override - public void initializeConfiguration(Configuration config) - { - if (wasbAccessKey.isPresent() && wasbStorageAccount.isPresent()) { - config.set(format("fs.azure.account.key.%s.blob.core.windows.net", wasbStorageAccount.get()), wasbAccessKey.get()); - } - - if (abfsAccessKey.isPresent() && abfsStorageAccount.isPresent()) { - config.set(format("fs.azure.account.key.%s.dfs.core.windows.net", abfsStorageAccount.get()), abfsAccessKey.get()); - config.set("fs.abfs.impl", AzureBlobFileSystem.class.getName()); - } - if (abfsOAuthClientEndpoint.isPresent() && abfsOAuthClientId.isPresent() && abfsOAuthClientSecret.isPresent()) { - config.set("fs.azure.account.auth.type", "OAuth"); - config.set("fs.azure.account.oauth.provider.type", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"); - config.set("fs.azure.account.oauth2.client.endpoint", abfsOAuthClientEndpoint.get()); - config.set("fs.azure.account.oauth2.client.id", abfsOAuthClientId.get()); - config.set("fs.azure.account.oauth2.client.secret", abfsOAuthClientSecret.get()); - } - - if (adlClientId.isPresent() && adlCredential.isPresent() && adlRefreshUrl.isPresent()) { - config.set("fs.adl.oauth2.access.token.provider.type", "ClientCredential"); - config.set("fs.adl.oauth2.client.id", adlClientId.get()); - config.set("fs.adl.oauth2.credential", adlCredential.get()); - config.set("fs.adl.oauth2.refresh.url", adlRefreshUrl.get()); - config.set("fs.adl.impl", AdlFileSystem.class.getName()); - } - - // do not rely on information returned from local system about users and groups - config.set("fs.azure.skipUserGroupMetadataDuringInitialization", "true"); - - // disable buffering Azure output streams to disk(default is DATA_BLOCKS_BUFFER_DISK) - config.set(DATA_BLOCKS_BUFFER, DATA_BLOCKS_BUFFER_ARRAY); - } - - private static Optional dropEmpty(Optional optional) - { - return optional.filter(value -> !value.isEmpty()); - } - - private static Proxy proxyForHost(HostAndPort address) - { - return new Proxy(Type.HTTP, new InetSocketAddress(address.getHost(), address.getPort())); - } - - @SuppressWarnings({"UnnecessarilyFullyQualified", "UnnecessaryFullyQualifiedName"}) - private static void setConnectionProxy(Proxy proxy) - { - io.trino.hadoop.$internal.com.microsoft.azure.datalake.store.HttpTransport.setConnectionProxy(proxy); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosConfigurationInitializer.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosConfigurationInitializer.java deleted file mode 100644 index c3a3d352413c..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosConfigurationInitializer.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import io.trino.hdfs.ConfigurationInitializer; -import org.apache.hadoop.conf.Configuration; - -public class CosConfigurationInitializer - implements ConfigurationInitializer -{ - @Override - public void initializeConfiguration(Configuration config) - { - config.set("fs.cos.impl", TrinoCosFileSystem.class.getName()); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosServiceConfigurationProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosServiceConfigurationProvider.java deleted file mode 100644 index 5c6483afb4ce..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/CosServiceConfigurationProvider.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.common.base.Splitter; -import com.google.inject.Inject; -import io.trino.hdfs.DynamicConfigurationProvider; -import io.trino.hdfs.HdfsContext; -import org.apache.hadoop.conf.Configuration; - -import java.net.URI; -import java.util.List; -import java.util.Map; - -import static io.trino.hdfs.DynamicConfigurationProvider.setCacheKey; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; - -public class CosServiceConfigurationProvider - implements DynamicConfigurationProvider -{ - private final Map services; - - @Inject - public CosServiceConfigurationProvider(HiveCosServiceConfig config) - { - services = ServiceConfig.loadServiceConfigs(config.getServiceConfig()); - } - - @Override - public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri) - { - if (!"cos".equals(uri.getScheme())) { - return; - } - - List parts = Splitter.on('.').limit(2).splitToList(uri.getHost()); - if (parts.size() != 2) { - return; - } - String serviceName = parts.get(1); - - ServiceConfig service = services.get(serviceName); - if (service == null) { - return; - } - - configuration.set(S3_ACCESS_KEY, service.getAccessKey()); - configuration.set(S3_SECRET_KEY, service.getSecretKey()); - service.getEndpoint().ifPresent(endpoint -> configuration.set(S3_ENDPOINT, endpoint)); - setCacheKey(configuration, serviceName); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosModule.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosModule.java deleted file mode 100644 index 22a930258893..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosModule.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.inject.Binder; -import com.google.inject.Scopes; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicConfigurationProvider; - -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; - -public class HiveCosModule - extends AbstractConfigurationAwareModule -{ - @Override - protected void setup(Binder binder) - { - configBinder(binder).bindConfig(HiveCosServiceConfig.class); - - newSetBinder(binder, ConfigurationInitializer.class).addBinding().to(CosConfigurationInitializer.class).in(Scopes.SINGLETON); - - if (buildConfigObject(HiveCosServiceConfig.class).getServiceConfig() != null) { - newSetBinder(binder, DynamicConfigurationProvider.class).addBinding().to(CosServiceConfigurationProvider.class).in(Scopes.SINGLETON); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosServiceConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosServiceConfig.java deleted file mode 100644 index dd0cb9b019d7..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/HiveCosServiceConfig.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import io.airlift.configuration.Config; -import io.airlift.configuration.validation.FileExists; - -import java.io.File; - -public class HiveCosServiceConfig -{ - private File serviceConfig; - - @Deprecated(forRemoval = true, since = "470") - @FileExists - public File getServiceConfig() - { - return serviceConfig; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.cos.service-config") - public HiveCosServiceConfig setServiceConfig(File serviceConfig) - { - this.serviceConfig = serviceConfig; - return this; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/ServiceConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/ServiceConfig.java deleted file mode 100644 index 58ae8ed3e688..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/ServiceConfig.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Sets; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.util.HashSet; -import java.util.Map; -import java.util.Optional; -import java.util.Properties; -import java.util.Set; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.nio.file.Files.newInputStream; -import static java.util.Objects.requireNonNull; - -public class ServiceConfig -{ - private static final String ACCESS_KEY_SUFFIX = ".access-key"; - private static final String SECRET_KEY_SUFFIX = ".secret-key"; - private static final String ENDPOINT_SUFFIX = ".endpoint"; - private final String name; - private final String accessKey; - private final String secretKey; - private final Optional endpoint; - - public ServiceConfig(String name, String accessKey, String secretKey, Optional endpoint) - { - this.name = requireNonNull(name, "name is null"); - this.accessKey = requireNonNull(accessKey, "accessKey is null"); - this.secretKey = requireNonNull(secretKey, "secretKey is null"); - this.endpoint = requireNonNull(endpoint, "endpoint is null"); - } - - public String getName() - { - return name; - } - - public String getAccessKey() - { - return accessKey; - } - - public String getSecretKey() - { - return secretKey; - } - - public Optional getEndpoint() - { - return endpoint; - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("name", name) - .toString(); - } - - public static Map loadServiceConfigs(File configFile) - { - if (configFile == null) { - return ImmutableMap.of(); - } - Properties properties = new Properties(); - try (InputStream inputStream = newInputStream(configFile.toPath())) { - properties.load(inputStream); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - Set serviceNames = properties.keySet().stream() - .map(String.class::cast) - .map(ServiceConfig::getServiceName) - .filter(Optional::isPresent) - .map(Optional::get) - .collect(toImmutableSet()); - - ImmutableMap.Builder configs = ImmutableMap.builder(); - Set usedProperties = new HashSet<>(); - for (String serviceName : serviceNames) { - String accessKey = getRequiredProperty(serviceName + ACCESS_KEY_SUFFIX, properties, configFile, usedProperties); - String secretKey = getRequiredProperty(serviceName + SECRET_KEY_SUFFIX, properties, configFile, usedProperties); - Optional endpoint = getOptionalProperty(serviceName + ENDPOINT_SUFFIX, properties, usedProperties); - configs.put(serviceName, new ServiceConfig(serviceName, accessKey, secretKey, endpoint)); - } - - Set unusedProperties = Sets.difference(properties.keySet(), usedProperties); - checkArgument(unusedProperties.isEmpty(), "Not all properties in file %s were used: %s", configFile, unusedProperties); - - return configs.buildOrThrow(); - } - - private static Optional getServiceName(String propertyName) - { - if (propertyName.endsWith(ACCESS_KEY_SUFFIX)) { - return Optional.of(propertyName.substring(0, propertyName.length() - ACCESS_KEY_SUFFIX.length())); - } - if (propertyName.endsWith(SECRET_KEY_SUFFIX)) { - return Optional.of(propertyName.substring(0, propertyName.length() - SECRET_KEY_SUFFIX.length())); - } - if (propertyName.endsWith(ENDPOINT_SUFFIX)) { - return Optional.of(propertyName.substring(0, propertyName.length() - ENDPOINT_SUFFIX.length())); - } - return Optional.empty(); - } - - private static String getRequiredProperty(String propertyName, Properties properties, File configFile, Set usedProperties) - { - String value = properties.getProperty(propertyName); - checkArgument(value != null, "%s bucket property not provided in file %s", propertyName, configFile); - usedProperties.add(propertyName); - return value; - } - - private static Optional getOptionalProperty(String propertyName, Properties properties, Set usedProperties) - { - String value = properties.getProperty(propertyName); - if (value != null) { - usedProperties.add(propertyName); - } - return Optional.ofNullable(value); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/TrinoCosFileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/TrinoCosFileSystem.java deleted file mode 100644 index da09e0134450..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/cos/TrinoCosFileSystem.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.common.base.Splitter; -import io.trino.hdfs.s3.TrinoS3FileSystem; - -import java.net.URI; - -public class TrinoCosFileSystem - extends TrinoS3FileSystem -{ - @Override - protected String getBucketName(URI uri) - { - String fullName = extractBucketName(uri); - return Splitter.on(".").limit(2).splitToList(fullName).get(0); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java deleted file mode 100644 index 21b4877ff704..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAccessTokenProvider.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.cloud.hadoop.util.AccessTokenProvider; -import org.apache.hadoop.conf.Configuration; - -import java.time.Instant; - -import static com.google.common.base.Strings.nullToEmpty; -import static java.util.concurrent.TimeUnit.HOURS; - -public class GcsAccessTokenProvider - implements AccessTokenProvider -{ - public static final String GCS_ACCESS_TOKEN_CONF = "trino.gcs.oauth-access-token"; - public static final Long EXPIRATION_TIME_MILLISECONDS = HOURS.toMillis(1); - private Configuration config; - - @Override - public AccessToken getAccessToken() - { - return new AccessToken(nullToEmpty(config.get(GCS_ACCESS_TOKEN_CONF)), Instant.now().plusMillis(EXPIRATION_TIME_MILLISECONDS)); - } - - @Override - public void refresh() {} - - @Override - public void setConf(Configuration configuration) - { - this.config = configuration; - } - - @Override - public Configuration getConf() - { - return config; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java deleted file mode 100644 index 3ee56648304f..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsAtomicOutputStream.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.ByteArrayContent; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.services.storage.Storage; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.services.storage.model.StorageObject; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.StorageResourceId; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import org.apache.hadoop.fs.Path; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -public class GcsAtomicOutputStream - extends ByteArrayOutputStream -{ - private final Storage storage; - private final Path path; - private boolean closed; - - public GcsAtomicOutputStream(HdfsEnvironment environment, HdfsContext context, Path path) - { - this.storage = environment.createGcsStorage(context, path); - this.path = path; - } - - @Override - public void close() - throws IOException - { - if (closed) { - return; - } - closed = true; - - StorageResourceId storageResourceId = StorageResourceId.fromStringPath(path.toString()); - Storage.Objects.Insert insert = storage.objects().insert( - storageResourceId.getBucketName(), - new StorageObject().setName(storageResourceId.getObjectName()), - new ByteArrayContent("application/octet-stream", buf, 0, count)); - insert.setIfGenerationMatch(0L); // fail if object already exists - insert.getMediaHttpUploader().setDirectUploadEnabled(true); - insert.execute(); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsConfigurationProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsConfigurationProvider.java deleted file mode 100644 index 202d148ada20..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsConfigurationProvider.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import io.trino.hdfs.DynamicConfigurationProvider; -import io.trino.hdfs.HdfsContext; -import org.apache.hadoop.conf.Configuration; - -import java.net.URI; - -import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem.SCHEME; -import static io.trino.hdfs.DynamicConfigurationProvider.setCacheKey; -import static io.trino.hdfs.gcs.GcsAccessTokenProvider.GCS_ACCESS_TOKEN_CONF; - -public class GcsConfigurationProvider - implements DynamicConfigurationProvider -{ - public static final String GCS_OAUTH_KEY = "hive.gcs.oauth"; - - @Override - public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri) - { - if (!uri.getScheme().equals(SCHEME)) { - return; - } - - String accessToken = context.getIdentity().getExtraCredentials().get(GCS_OAUTH_KEY); - if (accessToken != null) { - configuration.set(GCS_ACCESS_TOKEN_CONF, accessToken); - setCacheKey(configuration, accessToken); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java deleted file mode 100644 index 3a7fb040bc8c..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GcsStorageFactory.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.HttpTransport; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.json.jackson2.JacksonFactory; -import com.google.cloud.hadoop.repackaged.gcs.com.google.api.services.storage.Storage; -import com.google.cloud.hadoop.repackaged.gcs.com.google.auth.oauth2.GoogleCredentials; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HttpTransportFactory; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.RetryHttpInitializer; -import com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.RetryHttpInitializerOptions; -import com.google.inject.Inject; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import org.apache.hadoop.fs.Path; - -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.util.Optional; - -import static com.google.cloud.hadoop.fs.gcs.TrinoGoogleHadoopFileSystemConfiguration.getGcsOptionsBuilder; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.CLOUD_PLATFORM_SCOPE; -import static com.google.common.base.Strings.nullToEmpty; -import static io.trino.hdfs.gcs.GcsConfigurationProvider.GCS_OAUTH_KEY; -import static java.nio.charset.StandardCharsets.UTF_8; - -public class GcsStorageFactory -{ - private static final String APPLICATION_NAME = "Trino"; - - private final boolean useGcsAccessToken; - private final Optional jsonGoogleCredential; - - @Inject - public GcsStorageFactory(HiveGcsConfig hiveGcsConfig) - throws IOException - { - hiveGcsConfig.validate(); - this.useGcsAccessToken = hiveGcsConfig.isUseGcsAccessToken(); - String jsonKey = hiveGcsConfig.getJsonKey(); - String jsonKeyFilePath = hiveGcsConfig.getJsonKeyFilePath(); - if (jsonKey != null) { - try (InputStream inputStream = new ByteArrayInputStream(jsonKey.getBytes(UTF_8))) { - jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE)); - } - } - else if (jsonKeyFilePath != null) { - try (FileInputStream inputStream = new FileInputStream(jsonKeyFilePath)) { - jsonGoogleCredential = Optional.of(GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE)); - } - } - else { - jsonGoogleCredential = Optional.empty(); - } - } - - public Storage create(HdfsEnvironment environment, HdfsContext context, Path path) - { - try { - GoogleCloudStorageOptions gcsOptions = getGcsOptionsBuilder(environment.getConfiguration(context, path)).build(); - HttpTransport httpTransport = HttpTransportFactory.createHttpTransport( - gcsOptions.getProxyAddress(), - gcsOptions.getProxyUsername(), - gcsOptions.getProxyPassword()); - GoogleCredentials credential; - if (useGcsAccessToken) { - String accessToken = nullToEmpty(context.getIdentity().getExtraCredentials().get(GCS_OAUTH_KEY)); - try (ByteArrayInputStream inputStream = new ByteArrayInputStream(accessToken.getBytes(UTF_8))) { - credential = GoogleCredentials.fromStream(inputStream).createScoped(CLOUD_PLATFORM_SCOPE); - } - } - else { - credential = jsonGoogleCredential.orElseThrow(() -> new IllegalStateException("GCS credentials not configured")); - } - return new Storage.Builder(httpTransport, JacksonFactory.getDefaultInstance(), new RetryHttpInitializer(credential, RetryHttpInitializerOptions.builder() - .setReadTimeout(gcsOptions.getHttpRequestReadTimeout()) - .setMaxRequestRetries(gcsOptions.getMaxHttpRequestRetries()) - .build())) - .setApplicationName(APPLICATION_NAME) - .build(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java deleted file mode 100644 index cc5db9332370..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/GoogleGcsConfigurationInitializer.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem; -import com.google.cloud.hadoop.util.AccessTokenProvider; -import com.google.inject.Inject; -import io.trino.hdfs.ConfigurationInitializer; -import org.apache.hadoop.conf.Configuration; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.attribute.PosixFilePermissions; -import java.util.EnumSet; -import java.util.Optional; - -import static com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemConfiguration.GCS_CONFIG_PREFIX; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.ACCESS_TOKEN_PROVIDER_SUFFIX; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AUTHENTICATION_TYPE_SUFFIX; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType.ACCESS_TOKEN_PROVIDER; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType.SERVICE_ACCOUNT_JSON_KEYFILE; -import static com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX; -import static java.nio.file.attribute.PosixFilePermission.OWNER_READ; -import static java.nio.file.attribute.PosixFilePermission.OWNER_WRITE; - -public class GoogleGcsConfigurationInitializer - implements ConfigurationInitializer -{ - private final boolean useGcsAccessToken; - private final String jsonKeyFilePath; - - @Inject - public GoogleGcsConfigurationInitializer(HiveGcsConfig config) - { - config.validate(); - this.useGcsAccessToken = config.isUseGcsAccessToken(); - this.jsonKeyFilePath = Optional.ofNullable(config.getJsonKey()) - .map(GoogleGcsConfigurationInitializer::getJsonKeyFilePath) - .orElse(config.getJsonKeyFilePath()); - } - - private static String getJsonKeyFilePath(String jsonKey) - { - try { - // Just create a temporary json key file. - Path tempFile = Files.createTempFile("gcs-key-", ".json", PosixFilePermissions.asFileAttribute(EnumSet.of(OWNER_READ, OWNER_WRITE))); - tempFile.toFile().deleteOnExit(); - Files.writeString(tempFile, jsonKey, StandardCharsets.UTF_8); - return tempFile.toString(); - } - catch (IOException e) { - throw new UncheckedIOException("Failed to create a temp file for the GCS JSON key", e); - } - } - - @Override - public void initializeConfiguration(Configuration config) - { - config.set("fs.gs.impl", GoogleHadoopFileSystem.class.getName()); - - if (useGcsAccessToken) { - // use oauth token to authenticate with Google Cloud Storage - config.setEnum(GCS_CONFIG_PREFIX + AUTHENTICATION_TYPE_SUFFIX.getKey(), ACCESS_TOKEN_PROVIDER); - config.setClass(GCS_CONFIG_PREFIX + ACCESS_TOKEN_PROVIDER_SUFFIX.getKey(), GcsAccessTokenProvider.class, AccessTokenProvider.class); - } - else if (jsonKeyFilePath != null) { - // use service account key file - config.setEnum(GCS_CONFIG_PREFIX + AUTHENTICATION_TYPE_SUFFIX.getKey(), SERVICE_ACCOUNT_JSON_KEYFILE); - config.set(GCS_CONFIG_PREFIX + SERVICE_ACCOUNT_JSON_KEYFILE_SUFFIX.getKey(), jsonKeyFilePath); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsConfig.java deleted file mode 100644 index 2fadde7d3d40..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsConfig.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import io.airlift.configuration.Config; -import io.airlift.configuration.ConfigDescription; -import io.airlift.configuration.ConfigSecuritySensitive; -import io.airlift.configuration.validation.FileExists; -import jakarta.annotation.Nullable; - -import static com.google.common.base.Preconditions.checkState; - -public class HiveGcsConfig -{ - private boolean useGcsAccessToken; - private String jsonKey; - private String jsonKeyFilePath; - - @Deprecated(forRemoval = true, since = "470") - public boolean isUseGcsAccessToken() - { - return useGcsAccessToken; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.gcs.use-access-token") - @ConfigDescription("Use client-provided OAuth token to access Google Cloud Storage") - public HiveGcsConfig setUseGcsAccessToken(boolean useGcsAccessToken) - { - this.useGcsAccessToken = useGcsAccessToken; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Nullable - public String getJsonKey() - { - return jsonKey; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.gcs.json-key") - @ConfigSecuritySensitive - public HiveGcsConfig setJsonKey(String jsonKey) - { - this.jsonKey = jsonKey; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Nullable - @FileExists - public String getJsonKeyFilePath() - { - return jsonKeyFilePath; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.gcs.json-key-file-path") - @ConfigDescription("JSON key file used to access Google Cloud Storage") - public HiveGcsConfig setJsonKeyFilePath(String jsonKeyFilePath) - { - this.jsonKeyFilePath = jsonKeyFilePath; - return this; - } - - public void validate() - { - // This cannot be normal validation, as it would make it impossible to write TestHiveGcsConfig.testExplicitPropertyMappings - - if (useGcsAccessToken) { - checkState(jsonKey == null, "Cannot specify 'hive.gcs.json-key' when 'hive.gcs.use-access-token' is set"); - checkState(jsonKeyFilePath == null, "Cannot specify 'hive.gcs.json-key-file-path' when 'hive.gcs.use-access-token' is set"); - } - checkState(jsonKey == null || jsonKeyFilePath == null, "'hive.gcs.json-key' and 'hive.gcs.json-key-file-path' cannot be both set"); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsModule.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsModule.java deleted file mode 100644 index d74991f4a649..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/gcs/HiveGcsModule.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.inject.Binder; -import com.google.inject.Scopes; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicConfigurationProvider; - -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; - -public class HiveGcsModule - extends AbstractConfigurationAwareModule -{ - @Override - protected void setup(Binder binder) - { - configBinder(binder).bindConfig(HiveGcsConfig.class); - - newSetBinder(binder, ConfigurationInitializer.class).addBinding().to(GoogleGcsConfigurationInitializer.class).in(Scopes.SINGLETON); - - if (buildConfigObject(HiveGcsConfig.class).isUseGcsAccessToken()) { - newSetBinder(binder, DynamicConfigurationProvider.class).addBinding().to(GcsConfigurationProvider.class).in(Scopes.SINGLETON); - } - - binder.bind(GcsStorageFactory.class).in(Scopes.SINGLETON); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsCurrentRegionHolder.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsCurrentRegionHolder.java deleted file mode 100644 index 4026970ed341..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsCurrentRegionHolder.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.regions.Region; -import com.amazonaws.regions.Regions; -import com.google.common.base.Suppliers; - -import java.util.function.Supplier; - -/** - * Caches the result of calling {@link Regions#getCurrentRegion()} since accessing EC2 instance - * metadata repeatedly can result in being throttled and prevent other metadata accessing operations - * such as refreshing instance credentials from working normally - */ -public final class AwsCurrentRegionHolder -{ - private static final Supplier SUPPLIER = Suppliers.memoize(AwsCurrentRegionHolder::loadCurrentRegionOrThrowOnNull); - - private AwsCurrentRegionHolder() {} - - /** - * Attempts to resolve the current region from EC2's instance metadata through {@link Regions#getCurrentRegion()}. If - * no region is able to be resolved an exception is thrown - */ - public static Region getCurrentRegionFromEC2Metadata() - throws IllegalStateException - { - return SUPPLIER.get(); - } - - /** - * @throws IllegalStateException when no region is resolved to avoid memoizing a transient failure - */ - private static Region loadCurrentRegionOrThrowOnNull() - throws IllegalStateException - { - Region result = Regions.getCurrentRegion(); - if (result == null) { - throw new IllegalStateException("Failed to resolve current AWS region from EC2 metadata"); - } - return result; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsSdkClientCoreStats.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsSdkClientCoreStats.java deleted file mode 100644 index af139c41d922..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/AwsSdkClientCoreStats.java +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.util.AWSRequestMetrics; -import com.amazonaws.util.TimingInfo; -import com.google.errorprone.annotations.ThreadSafe; -import io.airlift.stats.CounterStat; -import io.airlift.stats.TimeStat; -import org.weakref.jmx.Managed; -import org.weakref.jmx.Nested; - -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; - -import static com.amazonaws.util.AWSRequestMetrics.Field.ClientExecuteTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolAvailableCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolLeasedCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientPoolPendingCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpClientRetryCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.HttpRequestTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.RequestCount; -import static com.amazonaws.util.AWSRequestMetrics.Field.RetryPauseTime; -import static com.amazonaws.util.AWSRequestMetrics.Field.ThrottleException; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.MILLISECONDS; - -@ThreadSafe -public final class AwsSdkClientCoreStats -{ - private final CounterStat awsRequestCount = new CounterStat(); - private final CounterStat awsRetryCount = new CounterStat(); - private final CounterStat awsHttpClientRetryCount = new CounterStat(); - private final CounterStat awsThrottleExceptions = new CounterStat(); - private final TimeStat awsRequestTime = new TimeStat(MILLISECONDS); - private final TimeStat awsClientExecuteTime = new TimeStat(MILLISECONDS); - private final TimeStat awsClientRetryPauseTime = new TimeStat(MILLISECONDS); - private final AtomicLong awsHttpClientPoolAvailableCount = new AtomicLong(); - private final AtomicLong awsHttpClientPoolLeasedCount = new AtomicLong(); - private final AtomicLong awsHttpClientPoolPendingCount = new AtomicLong(); - - @Managed - @Nested - public CounterStat getAwsRequestCount() - { - return awsRequestCount; - } - - @Managed - @Nested - public CounterStat getAwsRetryCount() - { - return awsRetryCount; - } - - @Managed - @Nested - public CounterStat getAwsHttpClientRetryCount() - { - return awsHttpClientRetryCount; - } - - @Managed - @Nested - public CounterStat getAwsThrottleExceptions() - { - return awsThrottleExceptions; - } - - @Managed - @Nested - public TimeStat getAwsRequestTime() - { - return awsRequestTime; - } - - @Managed - @Nested - public TimeStat getAwsClientExecuteTime() - { - return awsClientExecuteTime; - } - - @Managed - @Nested - public TimeStat getAwsClientRetryPauseTime() - { - return awsClientRetryPauseTime; - } - - @Managed - public long getAwsHttpClientPoolAvailableCount() - { - return awsHttpClientPoolAvailableCount.get(); - } - - @Managed - public long getAwsHttpClientPoolLeasedCount() - { - return awsHttpClientPoolLeasedCount.get(); - } - - @Managed - public long getAwsHttpClientPoolPendingCount() - { - return awsHttpClientPoolPendingCount.get(); - } - - public AwsSdkClientCoreRequestMetricCollector newRequestMetricCollector() - { - return new AwsSdkClientCoreRequestMetricCollector(this); - } - - public static class AwsSdkClientCoreRequestMetricCollector - extends RequestMetricCollector - { - private final AwsSdkClientCoreStats stats; - - protected AwsSdkClientCoreRequestMetricCollector(AwsSdkClientCoreStats stats) - { - this.stats = requireNonNull(stats, "stats is null"); - } - - @Override - public void collectMetrics(Request request, Response response) - { - TimingInfo timingInfo = request.getAWSRequestMetrics().getTimingInfo(); - - Number requestCounts = timingInfo.getCounter(RequestCount.name()); - if (requestCounts != null) { - long count = requestCounts.longValue(); - stats.awsRequestCount.update(count); - if (count > 1) { - stats.awsRetryCount.update(count - 1); - } - } - - Number httpClientRetryCounts = timingInfo.getCounter(HttpClientRetryCount.name()); - if (httpClientRetryCounts != null) { - stats.awsHttpClientRetryCount.update(httpClientRetryCounts.longValue()); - } - - Number throttleExceptions = timingInfo.getCounter(ThrottleException.name()); - if (throttleExceptions != null) { - stats.awsThrottleExceptions.update(throttleExceptions.longValue()); - } - - Number httpClientPoolAvailableCount = timingInfo.getCounter(HttpClientPoolAvailableCount.name()); - if (httpClientPoolAvailableCount != null) { - stats.awsHttpClientPoolAvailableCount.set(httpClientPoolAvailableCount.longValue()); - } - - Number httpClientPoolLeasedCount = timingInfo.getCounter(HttpClientPoolLeasedCount.name()); - if (httpClientPoolLeasedCount != null) { - stats.awsHttpClientPoolLeasedCount.set(httpClientPoolLeasedCount.longValue()); - } - - Number httpClientPoolPendingCount = timingInfo.getCounter(HttpClientPoolPendingCount.name()); - if (httpClientPoolPendingCount != null) { - stats.awsHttpClientPoolPendingCount.set(httpClientPoolPendingCount.longValue()); - } - - recordSubTimingDurations(timingInfo, HttpRequestTime, stats.awsRequestTime); - recordSubTimingDurations(timingInfo, ClientExecuteTime, stats.awsClientExecuteTime); - recordSubTimingDurations(timingInfo, RetryPauseTime, stats.awsClientRetryPauseTime); - } - - private static void recordSubTimingDurations(TimingInfo timingInfo, AWSRequestMetrics.Field field, TimeStat timeStat) - { - List subTimings = timingInfo.getAllSubMeasurements(field.name()); - if (subTimings != null) { - for (TimingInfo subTiming : subTimings) { - Long endTimeNanos = subTiming.getEndTimeNanoIfKnown(); - if (endTimeNanos != null) { - timeStat.addNanos(endTimeNanos - subTiming.getStartTimeNano()); - } - } - } - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/FileBasedS3SecurityMappingsProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/FileBasedS3SecurityMappingsProvider.java deleted file mode 100644 index 2074a65dc405..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/FileBasedS3SecurityMappingsProvider.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.inject.Inject; -import io.airlift.log.Logger; - -import java.io.File; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.Files; - -import static java.lang.String.format; - -public class FileBasedS3SecurityMappingsProvider - implements S3SecurityMappingsProvider -{ - private static final Logger log = Logger.get(FileBasedS3SecurityMappingsProvider.class); - private final File configFile; - private final S3SecurityMappingsParser parser; - - @Inject - public FileBasedS3SecurityMappingsProvider(S3SecurityMappingConfig config) - { - this.configFile = config.getConfigFilePath().map(File::new).orElseThrow(() -> new IllegalArgumentException("configFile not set")); - if (!configFile.exists()) { - throw new IllegalArgumentException(format("configFile %s does not exist", configFile.getAbsoluteFile())); - } - this.parser = new S3SecurityMappingsParser(config); - } - - private String getRawJsonString() - { - log.info("Retrieving config from file %s", configFile); - try { - return Files.readString(configFile.toPath()); - } - catch (IOException e) { - throw new UncheckedIOException("Failed to read file: " + configFile, e); - } - } - - @Override - public S3SecurityMappings get() - { - return parser.parseJSONString(getRawJsonString()); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/ForS3SecurityMapping.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/ForS3SecurityMapping.java deleted file mode 100644 index 9168e632bc16..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/ForS3SecurityMapping.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.inject.BindingAnnotation; - -import java.lang.annotation.Retention; -import java.lang.annotation.Target; - -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -@Retention(RUNTIME) -@Target({FIELD, PARAMETER, METHOD}) -@BindingAnnotation -public @interface ForS3SecurityMapping {} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Config.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Config.java deleted file mode 100644 index 293b84cc01ec..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Config.java +++ /dev/null @@ -1,731 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.base.StandardSystemProperty; -import com.google.common.collect.ImmutableList; -import io.airlift.configuration.Config; -import io.airlift.configuration.ConfigDescription; -import io.airlift.configuration.ConfigSecuritySensitive; -import io.airlift.configuration.DefunctConfig; -import io.airlift.configuration.validation.FileExists; -import io.airlift.units.DataSize; -import io.airlift.units.Duration; -import io.airlift.units.MaxDataSize; -import io.airlift.units.MinDataSize; -import io.airlift.units.MinDuration; -import jakarta.validation.constraints.Min; -import jakarta.validation.constraints.NotNull; - -import java.io.File; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.TimeUnit; - -import static io.airlift.units.DataSize.Unit.MEGABYTE; -import static java.util.Locale.ENGLISH; - -@DefunctConfig("hive.s3.use-instance-credentials") -public class HiveS3Config -{ - private String s3AwsAccessKey; - private String s3AwsSecretKey; - private String s3Endpoint; - private String s3Region; - private TrinoS3StorageClass s3StorageClass = TrinoS3StorageClass.STANDARD; - private TrinoS3SignerType s3SignerType; - private String s3SignerClass; - private boolean s3PathStyleAccess; - private String s3IamRole; - private String s3ExternalId; - private boolean s3SslEnabled = true; - private boolean s3SseEnabled; - private TrinoS3SseType s3SseType = TrinoS3SseType.S3; - private String s3EncryptionMaterialsProvider; - private String s3KmsKeyId; - private String s3SseKmsKeyId; - private int s3MaxClientRetries = 5; - private int s3MaxErrorRetries = 10; - private Duration s3MaxBackoffTime = new Duration(10, TimeUnit.MINUTES); - private Duration s3MaxRetryTime = new Duration(10, TimeUnit.MINUTES); - private Duration s3ConnectTimeout = new Duration(5, TimeUnit.SECONDS); - private Optional s3ConnectTtl = Optional.empty(); - private Duration s3SocketTimeout = new Duration(5, TimeUnit.SECONDS); - private int s3MaxConnections = 500; - private File s3StagingDirectory = new File(StandardSystemProperty.JAVA_IO_TMPDIR.value()); - private DataSize s3MultipartMinFileSize = DataSize.of(16, MEGABYTE); - private DataSize s3MultipartMinPartSize = DataSize.of(5, MEGABYTE); - private boolean pinS3ClientToCurrentRegion; - private String s3UserAgentPrefix = ""; - private TrinoS3AclType s3AclType = TrinoS3AclType.PRIVATE; - private boolean skipGlacierObjects; - private boolean requesterPaysEnabled; - private boolean s3StreamingUploadEnabled = true; - private DataSize s3StreamingPartSize = DataSize.of(32, MEGABYTE); - private String s3proxyHost; - private Integer s3proxyPort = -1; - private TrinoS3Protocol s3ProxyProtocol = TrinoS3Protocol.HTTPS; - private List s3nonProxyHosts = ImmutableList.of(); - private String s3proxyUsername; - private String s3proxyPassword; - private boolean s3preemptiveBasicProxyAuth; - private String s3StsEndpoint; - private String s3StsRegion; - - @Deprecated(forRemoval = true, since = "470") - public String getS3AwsAccessKey() - { - return s3AwsAccessKey; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.aws-access-key") - public HiveS3Config setS3AwsAccessKey(String s3AwsAccessKey) - { - this.s3AwsAccessKey = s3AwsAccessKey; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3AwsSecretKey() - { - return s3AwsSecretKey; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.aws-secret-key") - @ConfigSecuritySensitive - public HiveS3Config setS3AwsSecretKey(String s3AwsSecretKey) - { - this.s3AwsSecretKey = s3AwsSecretKey; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3Endpoint() - { - return s3Endpoint; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.endpoint") - public HiveS3Config setS3Endpoint(String s3Endpoint) - { - this.s3Endpoint = s3Endpoint; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3Region() - { - return s3Region; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.region") - public HiveS3Config setS3Region(String s3Region) - { - this.s3Region = s3Region; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public TrinoS3StorageClass getS3StorageClass() - { - return s3StorageClass; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.storage-class") - @ConfigDescription("AWS S3 storage class to use when writing the data") - public HiveS3Config setS3StorageClass(TrinoS3StorageClass s3StorageClass) - { - this.s3StorageClass = s3StorageClass; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public TrinoS3SignerType getS3SignerType() - { - return s3SignerType; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.signer-type") - public HiveS3Config setS3SignerType(TrinoS3SignerType s3SignerType) - { - this.s3SignerType = s3SignerType; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3SignerClass() - { - return s3SignerClass; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.signer-class") - public HiveS3Config setS3SignerClass(String s3SignerClass) - { - this.s3SignerClass = s3SignerClass; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isS3PathStyleAccess() - { - return s3PathStyleAccess; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.path-style-access") - @ConfigDescription("Use path-style access for all request to S3") - public HiveS3Config setS3PathStyleAccess(boolean s3PathStyleAccess) - { - this.s3PathStyleAccess = s3PathStyleAccess; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3IamRole() - { - return s3IamRole; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.iam-role") - @ConfigDescription("ARN of an IAM role to assume when connecting to S3") - public HiveS3Config setS3IamRole(String s3IamRole) - { - this.s3IamRole = s3IamRole; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3ExternalId() - { - return s3ExternalId; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.external-id") - @ConfigDescription("External ID for the IAM role trust policy when connecting to S3") - public HiveS3Config setS3ExternalId(String s3ExternalId) - { - this.s3ExternalId = s3ExternalId; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isS3SslEnabled() - { - return s3SslEnabled; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.ssl.enabled") - public HiveS3Config setS3SslEnabled(boolean s3SslEnabled) - { - this.s3SslEnabled = s3SslEnabled; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3EncryptionMaterialsProvider() - { - return s3EncryptionMaterialsProvider; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.encryption-materials-provider") - @ConfigDescription("Use a custom encryption materials provider for S3 data encryption") - public HiveS3Config setS3EncryptionMaterialsProvider(String s3EncryptionMaterialsProvider) - { - this.s3EncryptionMaterialsProvider = s3EncryptionMaterialsProvider; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3KmsKeyId() - { - return s3KmsKeyId; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.kms-key-id") - @ConfigDescription("Use an AWS KMS key for S3 data encryption") - public HiveS3Config setS3KmsKeyId(String s3KmsKeyId) - { - this.s3KmsKeyId = s3KmsKeyId; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3SseKmsKeyId() - { - return s3SseKmsKeyId; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.sse.kms-key-id") - @ConfigDescription("KMS Key ID to use for S3 server-side encryption with KMS-managed key") - public HiveS3Config setS3SseKmsKeyId(String s3SseKmsKeyId) - { - this.s3SseKmsKeyId = s3SseKmsKeyId; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isS3SseEnabled() - { - return s3SseEnabled; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.sse.enabled") - @ConfigDescription("Enable S3 server side encryption") - public HiveS3Config setS3SseEnabled(boolean s3SseEnabled) - { - this.s3SseEnabled = s3SseEnabled; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public TrinoS3SseType getS3SseType() - { - return s3SseType; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.sse.type") - @ConfigDescription("Key management type for S3 server-side encryption (S3 or KMS)") - public HiveS3Config setS3SseType(TrinoS3SseType s3SseType) - { - this.s3SseType = s3SseType; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Min(0) - public int getS3MaxClientRetries() - { - return s3MaxClientRetries; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.max-client-retries") - public HiveS3Config setS3MaxClientRetries(int s3MaxClientRetries) - { - this.s3MaxClientRetries = s3MaxClientRetries; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Min(0) - public int getS3MaxErrorRetries() - { - return s3MaxErrorRetries; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.max-error-retries") - public HiveS3Config setS3MaxErrorRetries(int s3MaxErrorRetries) - { - this.s3MaxErrorRetries = s3MaxErrorRetries; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @MinDuration("1s") - @NotNull - public Duration getS3MaxBackoffTime() - { - return s3MaxBackoffTime; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.max-backoff-time") - public HiveS3Config setS3MaxBackoffTime(Duration s3MaxBackoffTime) - { - this.s3MaxBackoffTime = s3MaxBackoffTime; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @MinDuration("1ms") - @NotNull - public Duration getS3MaxRetryTime() - { - return s3MaxRetryTime; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.max-retry-time") - public HiveS3Config setS3MaxRetryTime(Duration s3MaxRetryTime) - { - this.s3MaxRetryTime = s3MaxRetryTime; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @MinDuration("1ms") - @NotNull - public Duration getS3ConnectTimeout() - { - return s3ConnectTimeout; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.connect-timeout") - public HiveS3Config setS3ConnectTimeout(Duration s3ConnectTimeout) - { - this.s3ConnectTimeout = s3ConnectTimeout; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public Optional getS3ConnectTtl() - { - return s3ConnectTtl; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.connect-ttl") - @ConfigDescription("TCP connect TTL in the client side, which affects connection reusage") - public HiveS3Config setS3ConnectTtl(Duration s3ConnectTtl) - { - this.s3ConnectTtl = Optional.ofNullable(s3ConnectTtl); - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @MinDuration("1ms") - @NotNull - public Duration getS3SocketTimeout() - { - return s3SocketTimeout; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.socket-timeout") - public HiveS3Config setS3SocketTimeout(Duration s3SocketTimeout) - { - this.s3SocketTimeout = s3SocketTimeout; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @Min(1) - public int getS3MaxConnections() - { - return s3MaxConnections; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.max-connections") - public HiveS3Config setS3MaxConnections(int s3MaxConnections) - { - this.s3MaxConnections = s3MaxConnections; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - @FileExists - public File getS3StagingDirectory() - { - return s3StagingDirectory; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.staging-directory") - @ConfigDescription("Temporary directory for staging files before uploading to S3") - public HiveS3Config setS3StagingDirectory(File s3StagingDirectory) - { - this.s3StagingDirectory = s3StagingDirectory; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - @MinDataSize("16MB") - public DataSize getS3MultipartMinFileSize() - { - return s3MultipartMinFileSize; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.multipart.min-file-size") - @ConfigDescription("Minimum file size for an S3 multipart upload") - public HiveS3Config setS3MultipartMinFileSize(DataSize size) - { - this.s3MultipartMinFileSize = size; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - @MinDataSize("5MB") - public DataSize getS3MultipartMinPartSize() - { - return s3MultipartMinPartSize; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.multipart.min-part-size") - @ConfigDescription("Minimum part size for an S3 multipart upload") - public HiveS3Config setS3MultipartMinPartSize(DataSize size) - { - this.s3MultipartMinPartSize = size; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isPinS3ClientToCurrentRegion() - { - return pinS3ClientToCurrentRegion; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.pin-client-to-current-region") - @ConfigDescription("Should the S3 client be pinned to the current EC2 region") - public HiveS3Config setPinS3ClientToCurrentRegion(boolean pinS3ClientToCurrentRegion) - { - this.pinS3ClientToCurrentRegion = pinS3ClientToCurrentRegion; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public String getS3UserAgentPrefix() - { - return s3UserAgentPrefix; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.user-agent-prefix") - @ConfigDescription("The user agent prefix to use for S3 calls") - public HiveS3Config setS3UserAgentPrefix(String s3UserAgentPrefix) - { - this.s3UserAgentPrefix = s3UserAgentPrefix; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public TrinoS3AclType getS3AclType() - { - return s3AclType; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.upload-acl-type") - @ConfigDescription("Canned ACL type for S3 uploads") - public HiveS3Config setS3AclType(TrinoS3AclType s3AclType) - { - this.s3AclType = s3AclType; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isSkipGlacierObjects() - { - return skipGlacierObjects; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.skip-glacier-objects") - public HiveS3Config setSkipGlacierObjects(boolean skipGlacierObjects) - { - this.skipGlacierObjects = skipGlacierObjects; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isRequesterPaysEnabled() - { - return requesterPaysEnabled; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.requester-pays.enabled") - public HiveS3Config setRequesterPaysEnabled(boolean requesterPaysEnabled) - { - this.requesterPaysEnabled = requesterPaysEnabled; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean isS3StreamingUploadEnabled() - { - return s3StreamingUploadEnabled; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.streaming.enabled") - public HiveS3Config setS3StreamingUploadEnabled(boolean s3StreamingUploadEnabled) - { - this.s3StreamingUploadEnabled = s3StreamingUploadEnabled; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - @MinDataSize("5MB") - @MaxDataSize("256MB") - public DataSize getS3StreamingPartSize() - { - return s3StreamingPartSize; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.streaming.part-size") - @ConfigDescription("Part size for S3 streaming upload") - public HiveS3Config setS3StreamingPartSize(DataSize s3StreamingPartSize) - { - this.s3StreamingPartSize = s3StreamingPartSize; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3ProxyHost() - { - return s3proxyHost; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.host") - public HiveS3Config setS3ProxyHost(String s3proxyHost) - { - this.s3proxyHost = s3proxyHost; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public int getS3ProxyPort() - { - return s3proxyPort; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.port") - public HiveS3Config setS3ProxyPort(int s3proxyPort) - { - this.s3proxyPort = s3proxyPort; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public TrinoS3Protocol getS3ProxyProtocol() - { - return s3ProxyProtocol; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.protocol") - public HiveS3Config setS3ProxyProtocol(String s3ProxyProtocol) - { - this.s3ProxyProtocol = TrinoS3Protocol.valueOf(s3ProxyProtocol.toUpperCase(ENGLISH)); - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public List getS3NonProxyHosts() - { - return s3nonProxyHosts; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.non-proxy-hosts") - public HiveS3Config setS3NonProxyHosts(List s3nonProxyHosts) - { - this.s3nonProxyHosts = ImmutableList.copyOf(s3nonProxyHosts); - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3ProxyUsername() - { - return s3proxyUsername; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.username") - public HiveS3Config setS3ProxyUsername(String s3proxyUsername) - { - this.s3proxyUsername = s3proxyUsername; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3ProxyPassword() - { - return s3proxyPassword; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.password") - @ConfigSecuritySensitive - public HiveS3Config setS3ProxyPassword(String s3proxyPassword) - { - this.s3proxyPassword = s3proxyPassword; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public boolean getS3PreemptiveBasicProxyAuth() - { - return s3preemptiveBasicProxyAuth; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.proxy.preemptive-basic-auth") - public HiveS3Config setS3PreemptiveBasicProxyAuth(boolean s3preemptiveBasicProxyAuth) - { - this.s3preemptiveBasicProxyAuth = s3preemptiveBasicProxyAuth; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3StsEndpoint() - { - return s3StsEndpoint; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.sts.endpoint") - public HiveS3Config setS3StsEndpoint(String s3StsEndpoint) - { - this.s3StsEndpoint = s3StsEndpoint; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public String getS3StsRegion() - { - return s3StsRegion; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.sts.region") - public HiveS3Config setS3StsRegion(String s3StsRegion) - { - this.s3StsRegion = s3StsRegion; - return this; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java deleted file mode 100644 index 8932c49ccb2e..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3Module.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.inject.Binder; -import com.google.inject.Scopes; -import io.airlift.configuration.AbstractConfigurationAwareModule; -import io.airlift.units.Duration; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicConfigurationProvider; -import org.apache.hadoop.conf.Configuration; - -import java.util.concurrent.TimeUnit; - -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; -import static io.airlift.http.client.HttpClientBinder.httpClientBinder; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - -public class HiveS3Module - extends AbstractConfigurationAwareModule -{ - public static final String EMR_FS_CLASS_NAME = "com.amazon.ws.emr.hadoop.fs.EmrFileSystem"; - - @Override - protected void setup(Binder binder) - { - S3FileSystemType type = buildConfigObject(HiveS3TypeConfig.class).getS3FileSystemType(); - switch (type) { - case TRINO: - bindSecurityMapping(binder); - - newSetBinder(binder, ConfigurationInitializer.class).addBinding().to(TrinoS3ConfigurationInitializer.class).in(Scopes.SINGLETON); - configBinder(binder).bindConfig(HiveS3Config.class); - - binder.bind(TrinoS3FileSystemStats.class).toInstance(TrinoS3FileSystem.getFileSystemStats()); - newExporter(binder).export(TrinoS3FileSystemStats.class) - .as(generator -> generator.generatedNameOf(TrinoS3FileSystem.class)); - return; - case EMRFS: - validateEmrFsClass(); - newSetBinder(binder, ConfigurationInitializer.class).addBinding().to(EmrFsS3ConfigurationInitializer.class).in(Scopes.SINGLETON); - return; - case HADOOP_DEFAULT: - // configuration is done using Hadoop configuration files - return; - } - throw new RuntimeException("Unknown file system type: " + type); - } - - private void bindSecurityMapping(Binder binder) - { - S3SecurityMappingConfig configuration = buildConfigObject(S3SecurityMappingConfig.class); - if (configuration.getConfigFilePath().isEmpty()) { - return; - } - - if (isHttp(configuration)) { - binder.bind(S3SecurityMappingsProvider.class).to(UriBasedS3SecurityMappingsProvider.class).in(Scopes.SINGLETON); - httpClientBinder(binder).bindHttpClient("s3SecurityMapping", ForS3SecurityMapping.class) - .withConfigDefaults(config -> config - .setRequestTimeout(Duration.succinctDuration(10, TimeUnit.SECONDS)) - .setSelectorCount(1) - .setMinThreads(1)); - } - else { - binder.bind(S3SecurityMappingsProvider.class).to(FileBasedS3SecurityMappingsProvider.class).in(Scopes.SINGLETON); - } - - newSetBinder(binder, DynamicConfigurationProvider.class).addBinding() - .to(S3SecurityMappingConfigurationProvider.class).in(Scopes.SINGLETON); - } - - private static void validateEmrFsClass() - { - // verify that the class exists - try { - Class.forName(EMR_FS_CLASS_NAME); - } - catch (ClassNotFoundException e) { - throw new RuntimeException("EMR File System class not found: " + EMR_FS_CLASS_NAME, e); - } - } - - private static boolean isHttp(S3SecurityMappingConfig config) - { - return config.getConfigFilePath().map(configFile -> configFile.startsWith("https://") || configFile.startsWith("http://")).orElse(false); - } - - public static class EmrFsS3ConfigurationInitializer - implements ConfigurationInitializer - { - @Override - public void initializeConfiguration(Configuration config) - { - // re-map filesystem schemes to use the Amazon EMR file system - config.set("fs.s3.impl", EMR_FS_CLASS_NAME); - config.set("fs.s3a.impl", EMR_FS_CLASS_NAME); - config.set("fs.s3n.impl", EMR_FS_CLASS_NAME); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3TypeConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3TypeConfig.java deleted file mode 100644 index c15b9b14a4a7..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/HiveS3TypeConfig.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import io.airlift.configuration.Config; -import jakarta.validation.constraints.NotNull; - -public class HiveS3TypeConfig -{ - private S3FileSystemType s3FileSystemType = S3FileSystemType.TRINO; - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public S3FileSystemType getS3FileSystemType() - { - return s3FileSystemType; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3-file-system-type") - public HiveS3TypeConfig setS3FileSystemType(S3FileSystemType s3FileSystemType) - { - this.s3FileSystemType = s3FileSystemType; - return this; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/RetryDriver.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/RetryDriver.java deleted file mode 100644 index ec378c20a516..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/RetryDriver.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.collect.ImmutableList; -import io.airlift.log.Logger; -import io.airlift.units.Duration; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.Callable; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; - -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.SECONDS; - -public class RetryDriver -{ - private static final Logger log = Logger.get(RetryDriver.class); - public static final int DEFAULT_MAX_ATTEMPTS = 10; - public static final Duration DEFAULT_SLEEP_TIME = new Duration(1, SECONDS); - public static final Duration DEFAULT_MAX_RETRY_TIME = new Duration(30, SECONDS); - public static final double DEFAULT_SCALE_FACTOR = 2.0; - - private final int maxAttempts; - private final Duration minSleepTime; - private final Duration maxSleepTime; - private final double scaleFactor; - private final Duration maxRetryTime; - private final List> stopOnExceptions; - private final Optional retryRunnable; - - private RetryDriver( - int maxAttempts, - Duration minSleepTime, - Duration maxSleepTime, - double scaleFactor, - Duration maxRetryTime, - List> stopOnExceptions, - Optional retryRunnable) - { - this.maxAttempts = maxAttempts; - this.minSleepTime = minSleepTime; - this.maxSleepTime = maxSleepTime; - this.scaleFactor = scaleFactor; - this.maxRetryTime = maxRetryTime; - this.stopOnExceptions = stopOnExceptions; - this.retryRunnable = retryRunnable; - } - - private RetryDriver() - { - this(DEFAULT_MAX_ATTEMPTS, - DEFAULT_SLEEP_TIME, - DEFAULT_SLEEP_TIME, - DEFAULT_SCALE_FACTOR, - DEFAULT_MAX_RETRY_TIME, - ImmutableList.of(), - Optional.empty()); - } - - public static RetryDriver retry() - { - return new RetryDriver(); - } - - public final RetryDriver maxAttempts(int maxAttempts) - { - return new RetryDriver(maxAttempts, minSleepTime, maxSleepTime, scaleFactor, maxRetryTime, stopOnExceptions, retryRunnable); - } - - public final RetryDriver exponentialBackoff(Duration minSleepTime, Duration maxSleepTime, Duration maxRetryTime, double scaleFactor) - { - return new RetryDriver(maxAttempts, minSleepTime, maxSleepTime, scaleFactor, maxRetryTime, stopOnExceptions, retryRunnable); - } - - public final RetryDriver onRetry(Runnable retryRunnable) - { - return new RetryDriver(maxAttempts, minSleepTime, maxSleepTime, scaleFactor, maxRetryTime, stopOnExceptions, Optional.ofNullable(retryRunnable)); - } - - @SafeVarargs - public final RetryDriver stopOn(Class... classes) - { - requireNonNull(classes, "classes is null"); - List> exceptions = ImmutableList.>builder() - .addAll(stopOnExceptions) - .addAll(Arrays.asList(classes)) - .build(); - - return new RetryDriver(maxAttempts, minSleepTime, maxSleepTime, scaleFactor, maxRetryTime, exceptions, retryRunnable); - } - - public V run(String callableName, Callable callable) - throws Exception - { - requireNonNull(callableName, "callableName is null"); - requireNonNull(callable, "callable is null"); - - List suppressedExceptions = new ArrayList<>(); - long startTime = System.nanoTime(); - int attempt = 0; - while (true) { - attempt++; - - if (attempt > 1) { - retryRunnable.ifPresent(Runnable::run); - } - - try { - return callable.call(); - } - catch (Exception e) { - // Immediately stop retry attempts once an interrupt has been received - if (e instanceof InterruptedException || Thread.currentThread().isInterrupted()) { - addSuppressed(e, suppressedExceptions); - throw e; - } - for (Class clazz : stopOnExceptions) { - if (clazz.isInstance(e)) { - addSuppressed(e, suppressedExceptions); - throw e; - } - } - if (attempt >= maxAttempts || Duration.nanosSince(startTime).compareTo(maxRetryTime) >= 0) { - addSuppressed(e, suppressedExceptions); - throw e; - } - log.debug("Failed on executing %s with attempt %d, will retry. Exception: %s", callableName, attempt, e.getMessage()); - - suppressedExceptions.add(e); - - int delayInMs = (int) Math.min(minSleepTime.toMillis() * Math.pow(scaleFactor, attempt - 1), maxSleepTime.toMillis()); - int jitter = ThreadLocalRandom.current().nextInt(Math.max(1, (int) (delayInMs * 0.1))); - try { - TimeUnit.MILLISECONDS.sleep(delayInMs + jitter); - } - catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - Exception exception = new RuntimeException(ie); - addSuppressed(exception, suppressedExceptions); - throw exception; - } - } - } - } - - private static void addSuppressed(Exception exception, List suppressedExceptions) - { - for (Throwable suppressedException : suppressedExceptions) { - if (exception != suppressedException) { - exception.addSuppressed(suppressedException); - } - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3FileSystemType.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3FileSystemType.java deleted file mode 100644 index 87868ce69017..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3FileSystemType.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -public enum S3FileSystemType -{ - TRINO, - /** - * @deprecated EMRFS was used to workaround S3's lack of strong consistency and is no longer needed - * per AWS's announcement. - */ - @Deprecated - EMRFS, - HADOOP_DEFAULT, -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMapping.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMapping.java deleted file mode 100644 index 25fad944921e..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMapping.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.auth.BasicAWSCredentials; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import io.trino.spi.security.ConnectorIdentity; - -import java.net.URI; -import java.util.Collection; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.function.Predicate; -import java.util.regex.Pattern; - -import static com.google.common.base.MoreObjects.toStringHelper; -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.hdfs.s3.TrinoS3FileSystem.extractBucketName; -import static java.util.Objects.requireNonNull; - -public class S3SecurityMapping -{ - private final Predicate user; - private final Predicate> group; - private final Predicate prefix; - private final Optional iamRole; - private final Set allowedIamRoles; - private final Optional kmsKeyId; - private final Set allowedKmsKeyIds; - private final Optional credentials; - private final boolean useClusterDefault; - private final Optional endpoint; - private final Optional roleSessionName; - private final Optional region; - - @JsonCreator - public S3SecurityMapping( - @JsonProperty("user") Optional user, - @JsonProperty("group") Optional group, - @JsonProperty("prefix") Optional prefix, - @JsonProperty("iamRole") Optional iamRole, - @JsonProperty("roleSessionName") Optional roleSessionName, - @JsonProperty("allowedIamRoles") Optional> allowedIamRoles, - @JsonProperty("kmsKeyId") Optional kmsKeyId, - @JsonProperty("allowedKmsKeyIds") Optional> allowedKmsKeyIds, - @JsonProperty("accessKey") Optional accessKey, - @JsonProperty("secretKey") Optional secretKey, - @JsonProperty("useClusterDefault") Optional useClusterDefault, - @JsonProperty("endpoint") Optional endpoint, - @JsonProperty("region") Optional region) - { - this.user = user - .map(S3SecurityMapping::toPredicate) - .orElse(x -> true); - this.group = group - .map(S3SecurityMapping::toPredicate) - .map(S3SecurityMapping::anyMatch) - .orElse(x -> true); - this.prefix = prefix - .map(S3SecurityMapping::prefixPredicate) - .orElse(x -> true); - - this.iamRole = requireNonNull(iamRole, "iamRole is null"); - this.roleSessionName = requireNonNull(roleSessionName, "roleSessionName is null"); - checkArgument(!(iamRole.isEmpty() && roleSessionName.isPresent()), "iamRole must be provided when roleSessionName is provided"); - - this.allowedIamRoles = ImmutableSet.copyOf(allowedIamRoles.orElse(ImmutableList.of())); - - this.kmsKeyId = requireNonNull(kmsKeyId, "kmsKeyId is null"); - - this.allowedKmsKeyIds = ImmutableSet.copyOf(allowedKmsKeyIds.orElse(ImmutableList.of())); - - requireNonNull(accessKey, "accessKey is null"); - requireNonNull(secretKey, "secretKey is null"); - checkArgument(accessKey.isPresent() == secretKey.isPresent(), "accessKey and secretKey must be provided together"); - this.credentials = accessKey.map(access -> new BasicAWSCredentials(access, secretKey.get())); - - this.useClusterDefault = useClusterDefault.orElse(false); - boolean roleOrCredentialsArePresent = !this.allowedIamRoles.isEmpty() || iamRole.isPresent() || credentials.isPresent(); - checkArgument(this.useClusterDefault ^ roleOrCredentialsArePresent, "must either allow useClusterDefault role or provide role and/or credentials"); - - checkArgument(!(this.useClusterDefault && this.kmsKeyId.isPresent()), "KMS key ID cannot be provided together with useClusterDefault"); - - this.endpoint = requireNonNull(endpoint, "endpoint is null"); - this.region = requireNonNull(region, "region is null"); - } - - public boolean matches(ConnectorIdentity identity, URI uri) - { - return user.test(identity.getUser()) - && group.test(identity.getGroups()) - && prefix.test(uri); - } - - public Optional getIamRole() - { - return iamRole; - } - - public Set getAllowedIamRoles() - { - return allowedIamRoles; - } - - public Optional getKmsKeyId() - { - return kmsKeyId; - } - - public Set getAllowedKmsKeyIds() - { - return allowedKmsKeyIds; - } - - public Optional getCredentials() - { - return credentials; - } - - public boolean isUseClusterDefault() - { - return useClusterDefault; - } - - public Optional getEndpoint() - { - return endpoint; - } - - public Optional getRegion() - { - return region; - } - - public Optional getRoleSessionName() - { - return roleSessionName; - } - - @Override - public String toString() - { - return toStringHelper(this) - .add("user", user) - .add("group", group) - .add("prefix", prefix) - .add("iamRole", iamRole) - .add("roleSessionName", roleSessionName.orElse(null)) - .add("allowedIamRoles", allowedIamRoles) - .add("kmsKeyId", kmsKeyId) - .add("allowedKmsKeyIds", allowedKmsKeyIds) - .add("credentials", credentials) - .add("useClusterDefault", useClusterDefault) - .add("endpoint", endpoint.orElse(null)) - .add("region", region.orElse(null)) - .toString(); - } - - private static Predicate prefixPredicate(URI prefix) - { - checkArgument("s3".equals(prefix.getScheme()), "prefix URI scheme is not 's3': %s", prefix); - checkArgument(prefix.getQuery() == null, "prefix URI must not contain query: %s", prefix); - checkArgument(prefix.getFragment() == null, "prefix URI must not contain fragment: %s", prefix); - return value -> extractBucketName(prefix).equals(extractBucketName(value)) && - value.getPath().startsWith(prefix.getPath()); - } - - private static Predicate toPredicate(Pattern pattern) - { - return value -> pattern.matcher(value).matches(); - } - - private static Predicate> anyMatch(Predicate predicate) - { - return values -> values.stream().anyMatch(predicate); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfig.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfig.java deleted file mode 100644 index 98c7093e263a..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfig.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import io.airlift.configuration.Config; -import io.airlift.configuration.ConfigDescription; -import io.airlift.units.Duration; -import jakarta.validation.constraints.NotNull; - -import java.util.Optional; - -public class S3SecurityMappingConfig -{ - private String configFilePath; - private String jsonPointer = ""; - private String roleCredentialName; - private String kmsKeyIdCredentialName; - private Duration refreshPeriod; - private String colonReplacement; - - @Deprecated(forRemoval = true, since = "470") - public Optional getConfigFilePath() - { - return Optional.ofNullable(configFilePath); - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.config-file") - @ConfigDescription("JSON configuration file containing security mappings") - public S3SecurityMappingConfig setConfigFilePath(String configFilePath) - { - this.configFilePath = configFilePath; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - @NotNull - public String getJsonPointer() - { - return jsonPointer; - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.json-pointer") - @ConfigDescription("JSON pointer (RFC 6901) to mappings inside JSON config") - public S3SecurityMappingConfig setJsonPointer(String jsonPointer) - { - this.jsonPointer = jsonPointer; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getRoleCredentialName() - { - return Optional.ofNullable(roleCredentialName); - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.iam-role-credential-name") - @ConfigDescription("Name of the extra credential used to provide IAM role") - public S3SecurityMappingConfig setRoleCredentialName(String roleCredentialName) - { - this.roleCredentialName = roleCredentialName; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getKmsKeyIdCredentialName() - { - return Optional.ofNullable(kmsKeyIdCredentialName); - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.kms-key-id-credential-name") - @ConfigDescription("Name of the extra credential used to provide KMS Key ID") - public S3SecurityMappingConfig setKmsKeyIdCredentialName(String kmsKeyIdCredentialName) - { - this.kmsKeyIdCredentialName = kmsKeyIdCredentialName; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getRefreshPeriod() - { - return Optional.ofNullable(refreshPeriod); - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.refresh-period") - @ConfigDescription("How often to refresh the security mapping configuration") - public S3SecurityMappingConfig setRefreshPeriod(Duration refreshPeriod) - { - this.refreshPeriod = refreshPeriod; - return this; - } - - @Deprecated(forRemoval = true, since = "470") - public Optional getColonReplacement() - { - return Optional.ofNullable(colonReplacement); - } - - @Deprecated(forRemoval = true, since = "470") - @Config("hive.s3.security-mapping.colon-replacement") - @ConfigDescription("Value used in place of colon for IAM role name in extra credentials") - public S3SecurityMappingConfig setColonReplacement(String colonReplacement) - { - this.colonReplacement = colonReplacement; - return this; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfigurationProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfigurationProvider.java deleted file mode 100644 index e1403e10f230..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingConfigurationProvider.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableSet; -import com.google.common.hash.Hasher; -import com.google.common.hash.Hashing; -import com.google.inject.Inject; -import io.airlift.log.Logger; -import io.trino.hdfs.DynamicConfigurationProvider; -import io.trino.hdfs.HdfsContext; -import io.trino.spi.security.AccessDeniedException; -import org.apache.hadoop.conf.Configuration; - -import java.net.URI; -import java.util.Optional; -import java.util.Set; -import java.util.function.Supplier; - -import static com.google.common.base.Verify.verify; -import static io.trino.hdfs.DynamicConfigurationProvider.setCacheKey; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_IAM_ROLE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_KMS_KEY_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ROLE_SESSION_NAME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.TimeUnit.MILLISECONDS; - -public class S3SecurityMappingConfigurationProvider - implements DynamicConfigurationProvider -{ - private static final Logger log = Logger.get(S3SecurityMappingConfigurationProvider.class); - - private static final Set SCHEMES = ImmutableSet.of("s3", "s3a", "s3n"); - private static final String ANY_KMS_KEY_ID = "*"; - - private final Supplier mappings; - private final Optional roleCredentialName; - private final Optional kmsKeyIdCredentialName; - private final Optional colonReplacement; - - @Inject - public S3SecurityMappingConfigurationProvider(S3SecurityMappingConfig config, S3SecurityMappingsProvider mappingsProvider) - { - this(getMappings(config, mappingsProvider), config.getRoleCredentialName(), config.getKmsKeyIdCredentialName(), config.getColonReplacement()); - } - - public S3SecurityMappingConfigurationProvider(Supplier mappings, Optional roleCredentialName, Optional kmsKeyIdCredentialName, Optional colonReplacement) - { - this.mappings = requireNonNull(mappings, "mappings is null"); - this.roleCredentialName = requireNonNull(roleCredentialName, "roleCredentialName is null"); - this.kmsKeyIdCredentialName = requireNonNull(kmsKeyIdCredentialName, "kmsKeyIdCredentialName is null"); - this.colonReplacement = requireNonNull(colonReplacement, "colonReplacement is null"); - } - - private static Supplier getMappings(S3SecurityMappingConfig config, S3SecurityMappingsProvider supplier) - { - String configFilePath = config.getConfigFilePath().orElseThrow(() -> new IllegalArgumentException("config file not set")); - if (config.getRefreshPeriod().isEmpty()) { - return Suppliers.memoize(supplier::get); - } - return Suppliers.memoizeWithExpiration( - () -> { - log.info("Refreshing S3 security mapping configuration from %s", configFilePath); - return supplier.get(); - }, - config.getRefreshPeriod().get().toMillis(), - MILLISECONDS); - } - - @Override - public void updateConfiguration(Configuration configuration, HdfsContext context, URI uri) - { - if (!SCHEMES.contains(uri.getScheme())) { - return; - } - - S3SecurityMapping mapping = mappings.get().getMapping(context.getIdentity(), uri) - .orElseThrow(() -> new AccessDeniedException("No matching S3 security mapping")); - if (mapping.isUseClusterDefault()) { - return; - } - - Hasher hasher = Hashing.sha256().newHasher(); - - mapping.getCredentials().ifPresent(credentials -> { - configuration.set(S3_ACCESS_KEY, credentials.getAWSAccessKeyId()); - configuration.set(S3_SECRET_KEY, credentials.getAWSSecretKey()); - hasher.putString(credentials.getAWSAccessKeyId(), UTF_8); - hasher.putString(credentials.getAWSSecretKey(), UTF_8); - }); - - selectRole(mapping, context).ifPresent(role -> { - configuration.set(S3_IAM_ROLE, role); - hasher.putString(role, UTF_8); - }); - - selectKmsKeyId(mapping, context).ifPresent(key -> { - configuration.set(S3_KMS_KEY_ID, key); - hasher.putString(S3_KMS_KEY_ID + ":" + key, UTF_8); - }); - - mapping.getEndpoint().ifPresent(endpoint -> { - configuration.set(S3_ENDPOINT, endpoint); - hasher.putString(endpoint, UTF_8); - }); - - mapping.getRoleSessionName().ifPresent(roleSessionName -> { - configuration.set(S3_ROLE_SESSION_NAME, roleSessionName.replace("${USER}", context.getIdentity().getUser())); - hasher.putString(roleSessionName, UTF_8); - }); - - mapping.getRegion().ifPresent(region -> { - configuration.set(S3_REGION, region); - hasher.putString(region, UTF_8); - }); - - setCacheKey(configuration, hasher.hash().toString()); - } - - private Optional selectRole(S3SecurityMapping mapping, HdfsContext context) - { - Optional optionalSelected = getRoleFromExtraCredential(context); - - if (optionalSelected.isEmpty()) { - if (!mapping.getAllowedIamRoles().isEmpty() && mapping.getIamRole().isEmpty()) { - throw new AccessDeniedException("No S3 role selected and mapping has no default role"); - } - verify(mapping.getIamRole().isPresent() || mapping.getCredentials().isPresent(), "mapping must have role or credential"); - return mapping.getIamRole(); - } - - String selected = optionalSelected.get(); - - // selected role must match default or be allowed - if (!selected.equals(mapping.getIamRole().orElse(null)) && - !mapping.getAllowedIamRoles().contains(selected)) { - throw new AccessDeniedException("Selected S3 role is not allowed: " + selected); - } - - return optionalSelected; - } - - private Optional getRoleFromExtraCredential(HdfsContext context) - { - Optional extraCredentialRole = roleCredentialName.map(name -> context.getIdentity().getExtraCredentials().get(name)); - - if (colonReplacement.isPresent()) { - return extraCredentialRole.map(role -> role.replace(colonReplacement.get(), ":")); - } - return extraCredentialRole; - } - - private Optional selectKmsKeyId(S3SecurityMapping mapping, HdfsContext context) - { - Optional userSelected = getKmsKeyIdFromExtraCredential(context); - - if (userSelected.isEmpty()) { - return mapping.getKmsKeyId(); - } - - String selected = userSelected.get(); - - // selected key id must match default or be allowed - if (!selected.equals(mapping.getKmsKeyId().orElse(null)) && - !mapping.getAllowedKmsKeyIds().contains(selected) && - !mapping.getAllowedKmsKeyIds().contains(ANY_KMS_KEY_ID)) { - throw new AccessDeniedException("Selected KMS Key ID is not allowed"); - } - - return userSelected; - } - - private Optional getKmsKeyIdFromExtraCredential(HdfsContext context) - { - return kmsKeyIdCredentialName.map(name -> context.getIdentity().getExtraCredentials().get(name)); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappings.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappings.java deleted file mode 100644 index 8ca368962d3e..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappings.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.collect.ImmutableList; -import io.trino.spi.security.ConnectorIdentity; - -import java.net.URI; -import java.util.List; -import java.util.Optional; - -import static java.util.Objects.requireNonNull; - -public class S3SecurityMappings -{ - private final List mappings; - - @JsonCreator - public S3SecurityMappings(@JsonProperty("mappings") List mappings) - { - this.mappings = ImmutableList.copyOf(requireNonNull(mappings, "mappings is null")); - } - - public Optional getMapping(ConnectorIdentity identity, URI uri) - { - return mappings.stream() - .filter(mapping -> mapping.matches(identity, uri)) - .findFirst(); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsParser.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsParser.java deleted file mode 100644 index 5f9550fb5118..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsParser.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.fasterxml.jackson.databind.JsonNode; - -import static io.trino.plugin.base.util.JsonUtils.jsonTreeToValue; -import static io.trino.plugin.base.util.JsonUtils.parseJson; -import static java.util.Objects.requireNonNull; - -public class S3SecurityMappingsParser -{ - protected final String jsonPointer; - - protected S3SecurityMappingsParser(S3SecurityMappingConfig config) - { - this.jsonPointer = requireNonNull(config.getJsonPointer()); - } - - public S3SecurityMappings parseJSONString(String jsonString) - { - JsonNode node = parseJson(jsonString, JsonNode.class); - JsonNode mappingsNode = node.at(this.jsonPointer); - return jsonTreeToValue(mappingsNode, S3SecurityMappings.class); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsProvider.java deleted file mode 100644 index 4cff2ac843cb..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/S3SecurityMappingsProvider.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import java.util.function.Supplier; - -public interface S3SecurityMappingsProvider - extends Supplier {} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3AclType.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3AclType.java deleted file mode 100644 index 03199c556bde..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3AclType.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.services.s3.model.CannedAccessControlList; - -import static java.util.Objects.requireNonNull; - -public enum TrinoS3AclType -{ - AUTHENTICATED_READ(CannedAccessControlList.AuthenticatedRead), - AWS_EXEC_READ(CannedAccessControlList.AwsExecRead), - BUCKET_OWNER_FULL_CONTROL(CannedAccessControlList.BucketOwnerFullControl), - BUCKET_OWNER_READ(CannedAccessControlList.BucketOwnerRead), - LOG_DELIVERY_WRITE(CannedAccessControlList.LogDeliveryWrite), - PRIVATE(CannedAccessControlList.Private), - PUBLIC_READ(CannedAccessControlList.PublicRead), - PUBLIC_READ_WRITE(CannedAccessControlList.PublicReadWrite); - - private final CannedAccessControlList cannedACL; - - TrinoS3AclType(CannedAccessControlList cannedACL) - { - this.cannedACL = requireNonNull(cannedACL, "cannedACL is null"); - } - - CannedAccessControlList getCannedACL() - { - return cannedACL; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3ConfigurationInitializer.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3ConfigurationInitializer.java deleted file mode 100644 index 94334708c6cb..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3ConfigurationInitializer.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.inject.Inject; -import io.airlift.units.DataSize; -import io.airlift.units.Duration; -import io.trino.hdfs.ConfigurationInitializer; -import org.apache.hadoop.conf.Configuration; - -import java.io.File; -import java.util.List; -import java.util.Optional; - -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACL_TYPE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CONNECT_TIMEOUT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CONNECT_TTL; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_EXTERNAL_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_IAM_ROLE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_KMS_KEY_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_BACKOFF_TIME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_CLIENT_RETRIES; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_CONNECTIONS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_ERROR_RETRIES; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_RETRY_TIME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MULTIPART_MIN_FILE_SIZE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MULTIPART_MIN_PART_SIZE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_NON_PROXY_HOSTS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PATH_STYLE_ACCESS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PREEMPTIVE_BASIC_PROXY_AUTH; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PROXY_HOST; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PROXY_PASSWORD; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PROXY_PORT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PROXY_PROTOCOL; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PROXY_USERNAME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_REQUESTER_PAYS_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SIGNER_CLASS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SIGNER_TYPE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SKIP_GLACIER_OBJECTS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SOCKET_TIMEOUT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SSE_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SSE_KMS_KEY_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SSE_TYPE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SSL_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STAGING_DIRECTORY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STORAGE_CLASS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STREAMING_UPLOAD_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STREAMING_UPLOAD_PART_SIZE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STS_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STS_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_USER_AGENT_PREFIX; -import static java.util.stream.Collectors.joining; - -public class TrinoS3ConfigurationInitializer - implements ConfigurationInitializer -{ - private final String awsAccessKey; - private final String awsSecretKey; - private final String endpoint; - private final String region; - private final TrinoS3StorageClass s3StorageClass; - private final TrinoS3SignerType signerType; - private final boolean pathStyleAccess; - private final String iamRole; - private final String externalId; - private final boolean sslEnabled; - private final boolean sseEnabled; - private final TrinoS3SseType sseType; - private final String encryptionMaterialsProvider; - private final String kmsKeyId; - private final String sseKmsKeyId; - private final int maxClientRetries; - private final int maxErrorRetries; - private final Duration maxBackoffTime; - private final Duration maxRetryTime; - private final Duration connectTimeout; - private final Optional connectTtl; - private final Duration socketTimeout; - private final int maxConnections; - private final DataSize multipartMinFileSize; - private final DataSize multipartMinPartSize; - private final File stagingDirectory; - private final boolean pinClientToCurrentRegion; - private final String userAgentPrefix; - private final TrinoS3AclType aclType; - private final String signerClass; - private final boolean requesterPaysEnabled; - private final boolean skipGlacierObjects; - private final boolean s3StreamingUploadEnabled; - private final DataSize streamingPartSize; - private final String s3proxyHost; - private final int s3proxyPort; - private final TrinoS3Protocol s3ProxyProtocol; - private final List s3nonProxyHosts; - private final String s3proxyUsername; - private final String s3proxyPassword; - private final boolean s3preemptiveBasicProxyAuth; - private final String s3StsEndpoint; - private final String s3StsRegion; - - @Inject - public TrinoS3ConfigurationInitializer(HiveS3Config config) - { - this.awsAccessKey = config.getS3AwsAccessKey(); - this.awsSecretKey = config.getS3AwsSecretKey(); - this.endpoint = config.getS3Endpoint(); - this.region = config.getS3Region(); - this.s3StorageClass = config.getS3StorageClass(); - this.signerType = config.getS3SignerType(); - this.signerClass = config.getS3SignerClass(); - this.pathStyleAccess = config.isS3PathStyleAccess(); - this.iamRole = config.getS3IamRole(); - this.externalId = config.getS3ExternalId(); - this.sslEnabled = config.isS3SslEnabled(); - this.sseEnabled = config.isS3SseEnabled(); - this.sseType = config.getS3SseType(); - this.encryptionMaterialsProvider = config.getS3EncryptionMaterialsProvider(); - this.kmsKeyId = config.getS3KmsKeyId(); - this.sseKmsKeyId = config.getS3SseKmsKeyId(); - this.maxClientRetries = config.getS3MaxClientRetries(); - this.maxErrorRetries = config.getS3MaxErrorRetries(); - this.maxBackoffTime = config.getS3MaxBackoffTime(); - this.maxRetryTime = config.getS3MaxRetryTime(); - this.connectTimeout = config.getS3ConnectTimeout(); - this.connectTtl = config.getS3ConnectTtl(); - this.socketTimeout = config.getS3SocketTimeout(); - this.maxConnections = config.getS3MaxConnections(); - this.multipartMinFileSize = config.getS3MultipartMinFileSize(); - this.multipartMinPartSize = config.getS3MultipartMinPartSize(); - this.stagingDirectory = config.getS3StagingDirectory(); - this.pinClientToCurrentRegion = config.isPinS3ClientToCurrentRegion(); - this.userAgentPrefix = config.getS3UserAgentPrefix(); - this.aclType = config.getS3AclType(); - this.skipGlacierObjects = config.isSkipGlacierObjects(); - this.requesterPaysEnabled = config.isRequesterPaysEnabled(); - this.s3StreamingUploadEnabled = config.isS3StreamingUploadEnabled(); - this.streamingPartSize = config.getS3StreamingPartSize(); - this.s3proxyHost = config.getS3ProxyHost(); - this.s3proxyPort = config.getS3ProxyPort(); - this.s3ProxyProtocol = config.getS3ProxyProtocol(); - this.s3nonProxyHosts = config.getS3NonProxyHosts(); - this.s3proxyUsername = config.getS3ProxyUsername(); - this.s3proxyPassword = config.getS3ProxyPassword(); - this.s3preemptiveBasicProxyAuth = config.getS3PreemptiveBasicProxyAuth(); - this.s3StsEndpoint = config.getS3StsEndpoint(); - this.s3StsRegion = config.getS3StsRegion(); - } - - @Override - public void initializeConfiguration(Configuration config) - { - // re-map filesystem schemes to match Amazon Elastic MapReduce - config.set("fs.s3.impl", TrinoS3FileSystem.class.getName()); - config.set("fs.s3a.impl", TrinoS3FileSystem.class.getName()); - config.set("fs.s3n.impl", TrinoS3FileSystem.class.getName()); - - if (awsAccessKey != null) { - config.set(S3_ACCESS_KEY, awsAccessKey); - } - if (awsSecretKey != null) { - config.set(S3_SECRET_KEY, awsSecretKey); - } - if (endpoint != null) { - config.set(S3_ENDPOINT, endpoint); - } - if (region != null) { - config.set(S3_REGION, region); - } - config.set(S3_STORAGE_CLASS, s3StorageClass.name()); - if (signerType != null) { - config.set(S3_SIGNER_TYPE, signerType.name()); - } - if (signerClass != null) { - config.set(S3_SIGNER_CLASS, signerClass); - } - config.setBoolean(S3_PATH_STYLE_ACCESS, pathStyleAccess); - if (iamRole != null) { - config.set(S3_IAM_ROLE, iamRole); - } - if (externalId != null) { - config.set(S3_EXTERNAL_ID, externalId); - } - config.setBoolean(S3_SSL_ENABLED, sslEnabled); - config.setBoolean(S3_SSE_ENABLED, sseEnabled); - config.set(S3_SSE_TYPE, sseType.name()); - if (encryptionMaterialsProvider != null) { - config.set(S3_ENCRYPTION_MATERIALS_PROVIDER, encryptionMaterialsProvider); - } - if (kmsKeyId != null) { - config.set(S3_KMS_KEY_ID, kmsKeyId); - } - if (sseKmsKeyId != null) { - config.set(S3_SSE_KMS_KEY_ID, sseKmsKeyId); - } - config.setInt(S3_MAX_CLIENT_RETRIES, maxClientRetries); - config.setInt(S3_MAX_ERROR_RETRIES, maxErrorRetries); - config.set(S3_MAX_BACKOFF_TIME, maxBackoffTime.toString()); - config.set(S3_MAX_RETRY_TIME, maxRetryTime.toString()); - config.set(S3_CONNECT_TIMEOUT, connectTimeout.toString()); - connectTtl.ifPresent(duration -> config.set(S3_CONNECT_TTL, duration.toString())); - config.set(S3_SOCKET_TIMEOUT, socketTimeout.toString()); - config.set(S3_STAGING_DIRECTORY, stagingDirectory.getPath()); - config.setInt(S3_MAX_CONNECTIONS, maxConnections); - config.setLong(S3_MULTIPART_MIN_FILE_SIZE, multipartMinFileSize.toBytes()); - config.setLong(S3_MULTIPART_MIN_PART_SIZE, multipartMinPartSize.toBytes()); - config.setBoolean(S3_PIN_CLIENT_TO_CURRENT_REGION, pinClientToCurrentRegion); - config.set(S3_USER_AGENT_PREFIX, userAgentPrefix); - config.set(S3_ACL_TYPE, aclType.name()); - config.setBoolean(S3_SKIP_GLACIER_OBJECTS, skipGlacierObjects); - config.setBoolean(S3_REQUESTER_PAYS_ENABLED, requesterPaysEnabled); - config.setBoolean(S3_STREAMING_UPLOAD_ENABLED, s3StreamingUploadEnabled); - config.setLong(S3_STREAMING_UPLOAD_PART_SIZE, streamingPartSize.toBytes()); - if (s3proxyHost != null) { - config.set(S3_PROXY_HOST, s3proxyHost); - } - if (s3proxyPort > -1) { - config.setInt(S3_PROXY_PORT, s3proxyPort); - } - if (s3ProxyProtocol != null) { - config.set(S3_PROXY_PROTOCOL, s3ProxyProtocol.name()); - } - if (s3nonProxyHosts != null) { - config.set(S3_NON_PROXY_HOSTS, s3nonProxyHosts.stream().collect(joining("|"))); - } - if (s3proxyUsername != null) { - config.set(S3_PROXY_USERNAME, s3proxyUsername); - } - if (s3proxyPassword != null) { - config.set(S3_PROXY_PASSWORD, s3proxyPassword); - } - config.setBoolean(S3_PREEMPTIVE_BASIC_PROXY_AUTH, s3preemptiveBasicProxyAuth); - if (s3StsEndpoint != null) { - config.set(S3_STS_ENDPOINT, s3StsEndpoint); - } - if (s3StsRegion != null) { - config.set(S3_STS_REGION, s3StsRegion); - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystem.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystem.java deleted file mode 100644 index 9c8337753b59..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystem.java +++ /dev/null @@ -1,2137 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.AbortedException; -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.Request; -import com.amazonaws.Response; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.auth.Signer; -import com.amazonaws.auth.SignerFactory; -import com.amazonaws.auth.WebIdentityTokenCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import com.amazonaws.event.ProgressEvent; -import com.amazonaws.event.ProgressEventType; -import com.amazonaws.event.ProgressListener; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.metrics.RequestMetricCollector; -import com.amazonaws.regions.DefaultAwsRegionProviderChain; -import com.amazonaws.regions.Region; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Builder; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.AmazonS3Encryption; -import com.amazonaws.services.s3.AmazonS3EncryptionClient; -import com.amazonaws.services.s3.AmazonS3EncryptionClientBuilder; -import com.amazonaws.services.s3.Headers; -import com.amazonaws.services.s3.internal.Constants; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.CryptoConfiguration; -import com.amazonaws.services.s3.model.DeleteObjectRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; -import com.amazonaws.services.s3.model.EncryptionMaterialsProvider; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.S3ObjectInputStream; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import com.amazonaws.services.s3.transfer.Transfer; -import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.TransferManagerBuilder; -import com.amazonaws.services.s3.transfer.Upload; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Splitter; -import com.google.common.collect.AbstractSequentialIterator; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterables; -import com.google.common.collect.Iterators; -import com.google.common.io.Closer; -import com.google.common.net.MediaType; -import io.airlift.log.Logger; -import io.airlift.units.DataSize; -import io.airlift.units.Duration; -import io.opentelemetry.api.OpenTelemetry; -import io.opentelemetry.instrumentation.awssdk.v1_11.AwsSdkTelemetry; -import io.trino.hdfs.FSDataInputStreamTail; -import io.trino.hdfs.FileSystemWithBatchDelete; -import io.trino.hdfs.MemoryAwareFileSystem; -import io.trino.hdfs.OpenTelemetryAwareFileSystem; -import io.trino.memory.context.AggregatedMemoryContext; -import io.trino.memory.context.LocalMemoryContext; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.BufferedFSInputStream; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.util.Progressable; -import org.gaul.modernizer_maven_annotations.SuppressModernizer; - -import java.io.BufferedOutputStream; -import java.io.ByteArrayInputStream; -import java.io.Closeable; -import java.io.EOFException; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FilterOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InterruptedIOException; -import java.io.OutputStream; -import java.io.UncheckedIOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collection; -import java.util.Date; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.OptionalInt; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Consumer; -import java.util.function.Supplier; - -import static com.amazonaws.regions.Regions.US_EAST_1; -import static com.amazonaws.services.s3.Headers.CRYPTO_KEYWRAP_ALGORITHM; -import static com.amazonaws.services.s3.Headers.SERVER_SIDE_ENCRYPTION; -import static com.amazonaws.services.s3.Headers.UNENCRYPTED_CONTENT_LENGTH; -import static com.amazonaws.services.s3.model.StorageClass.DeepArchive; -import static com.amazonaws.services.s3.model.StorageClass.Glacier; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Strings.isNullOrEmpty; -import static com.google.common.base.Strings.nullToEmpty; -import static com.google.common.base.Throwables.throwIfInstanceOf; -import static com.google.common.base.Throwables.throwIfUnchecked; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.Iterables.toArray; -import static com.google.common.hash.Hashing.md5; -import static io.airlift.concurrent.Threads.threadsNamed; -import static io.airlift.units.DataSize.Unit.MEGABYTE; -import static io.trino.hdfs.s3.AwsCurrentRegionHolder.getCurrentRegionFromEC2Metadata; -import static io.trino.hdfs.s3.RetryDriver.retry; -import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.Math.toIntExact; -import static java.lang.String.format; -import static java.lang.System.arraycopy; -import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; -import static java.net.HttpURLConnection.HTTP_FORBIDDEN; -import static java.net.HttpURLConnection.HTTP_NOT_FOUND; -import static java.nio.file.Files.createDirectories; -import static java.nio.file.Files.createTempFile; -import static java.util.Objects.checkFromToIndex; -import static java.util.Objects.nonNull; -import static java.util.Objects.requireNonNull; -import static java.util.Objects.requireNonNullElse; -import static java.util.concurrent.Executors.newCachedThreadPool; -import static java.util.concurrent.TimeUnit.SECONDS; -import static java.util.stream.Collectors.joining; -import static java.util.stream.Collectors.toList; -import static org.apache.hadoop.fs.FSExceptionMessages.CANNOT_SEEK_PAST_EOF; -import static org.apache.hadoop.fs.FSExceptionMessages.NEGATIVE_SEEK; -import static org.apache.hadoop.fs.FSExceptionMessages.STREAM_IS_CLOSED; - -public class TrinoS3FileSystem - extends FileSystem - implements FileSystemWithBatchDelete, MemoryAwareFileSystem, OpenTelemetryAwareFileSystem -{ - public static final String S3_USER_AGENT_PREFIX = "trino.s3.user-agent-prefix"; - public static final String S3_CREDENTIALS_PROVIDER = "trino.s3.credentials-provider"; - public static final String S3_USE_WEB_IDENTITY_TOKEN_CREDENTIALS_PROVIDER = "trino.s3.use-web-identity-token-credentials-provider"; - public static final String S3_SSE_TYPE = "trino.s3.sse.type"; - public static final String S3_SSE_ENABLED = "trino.s3.sse.enabled"; - public static final String S3_SSE_KMS_KEY_ID = "trino.s3.sse.kms-key-id"; - public static final String S3_KMS_KEY_ID = "trino.s3.kms-key-id"; - public static final String S3_ENCRYPTION_MATERIALS_PROVIDER = "trino.s3.encryption-materials-provider"; - public static final String S3_PIN_CLIENT_TO_CURRENT_REGION = "trino.s3.pin-client-to-current-region"; - public static final String S3_MULTIPART_MIN_PART_SIZE = "trino.s3.multipart.min-part-size"; - public static final String S3_MULTIPART_MIN_FILE_SIZE = "trino.s3.multipart.min-file-size"; - public static final String S3_STAGING_DIRECTORY = "trino.s3.staging-directory"; - public static final String S3_MAX_CONNECTIONS = "trino.s3.max-connections"; - public static final String S3_SOCKET_TIMEOUT = "trino.s3.socket-timeout"; - public static final String S3_CONNECT_TIMEOUT = "trino.s3.connect-timeout"; - public static final String S3_CONNECT_TTL = "trino.s3.connect-ttl"; - public static final String S3_MAX_RETRY_TIME = "trino.s3.max-retry-time"; - public static final String S3_MAX_BACKOFF_TIME = "trino.s3.max-backoff-time"; - public static final String S3_MAX_CLIENT_RETRIES = "trino.s3.max-client-retries"; - public static final String S3_MAX_ERROR_RETRIES = "trino.s3.max-error-retries"; - public static final String S3_SSL_ENABLED = "trino.s3.ssl.enabled"; - public static final String S3_PATH_STYLE_ACCESS = "trino.s3.path-style-access"; - public static final String S3_SIGNER_TYPE = "trino.s3.signer-type"; - public static final String S3_SIGNER_CLASS = "trino.s3.signer-class"; - public static final String S3_ENDPOINT = "trino.s3.endpoint"; - public static final String S3_REGION = "trino.s3.region"; - public static final String S3_SECRET_KEY = "trino.s3.secret-key"; - public static final String S3_ACCESS_KEY = "trino.s3.access-key"; - public static final String S3_SESSION_TOKEN = "trino.s3.session-token"; - public static final String S3_IAM_ROLE = "trino.s3.iam-role"; - public static final String S3_EXTERNAL_ID = "trino.s3.external-id"; - public static final String S3_ACL_TYPE = "trino.s3.upload-acl-type"; - public static final String S3_SKIP_GLACIER_OBJECTS = "trino.s3.skip-glacier-objects"; - public static final String S3_REQUESTER_PAYS_ENABLED = "trino.s3.requester-pays.enabled"; - public static final String S3_STREAMING_UPLOAD_ENABLED = "trino.s3.streaming.enabled"; - public static final String S3_STREAMING_UPLOAD_PART_SIZE = "trino.s3.streaming.part-size"; - public static final String S3_STORAGE_CLASS = "trino.s3.storage-class"; - public static final String S3_ROLE_SESSION_NAME = "trino.s3.role-session-name"; - public static final String S3_PROXY_HOST = "trino.s3.proxy.host"; - public static final String S3_PROXY_PORT = "trino.s3.proxy.port"; - public static final String S3_PROXY_PROTOCOL = "trino.s3.proxy.protocol"; - public static final String S3_NON_PROXY_HOSTS = "trino.s3.proxy.non-proxy-hosts"; - public static final String S3_PROXY_USERNAME = "trino.s3.proxy.username"; - public static final String S3_PROXY_PASSWORD = "trino.s3.proxy.password"; - public static final String S3_PREEMPTIVE_BASIC_PROXY_AUTH = "trino.s3.proxy.preemptive-basic-auth"; - - public static final String S3_STS_ENDPOINT = "trino.s3.sts.endpoint"; - public static final String S3_STS_REGION = "trino.s3.sts.region"; - - private static final Logger log = Logger.get(TrinoS3FileSystem.class); - private static final TrinoS3FileSystemStats STATS = new TrinoS3FileSystemStats(); - private static final RequestMetricCollector METRIC_COLLECTOR = STATS.newRequestMetricCollector(); - private static final String DIRECTORY_SUFFIX = "_$folder$"; - private static final DataSize BLOCK_SIZE = DataSize.of(32, MEGABYTE); - private static final DataSize MAX_SKIP_SIZE = DataSize.of(1, MEGABYTE); - private static final String PATH_SEPARATOR = "/"; - private static final Duration BACKOFF_MIN_SLEEP = new Duration(1, SECONDS); - private static final int HTTP_RANGE_NOT_SATISFIABLE = 416; - private static final String S3_CUSTOM_SIGNER = "TrinoS3CustomSigner"; - private static final Set GLACIER_STORAGE_CLASSES = ImmutableSet.of(Glacier.toString(), DeepArchive.toString()); - private static final MediaType DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); - private static final String S3_DEFAULT_ROLE_SESSION_NAME = "trino-session"; - public static final int DELETE_BATCH_SIZE = 1000; - - static final String NO_SUCH_KEY_ERROR_CODE = "NoSuchKey"; - static final String NO_SUCH_BUCKET_ERROR_CODE = "NoSuchBucket"; - - private URI uri; - private Path workingDirectory; - private AmazonS3 s3; - private AWSCredentialsProvider credentialsProvider; - private File stagingDirectory; - private int maxAttempts; - private Duration maxBackoffTime; - private Duration maxRetryTime; - private String iamRole; - private String externalId; - private boolean pinS3ClientToCurrentRegion; - private boolean sseEnabled; - private TrinoS3SseType sseType; - private String sseKmsKeyId; - private boolean isPathStyleAccess; - private long multiPartUploadMinFileSize; - private long multiPartUploadMinPartSize; - private TrinoS3AclType s3AclType; - private boolean skipGlacierObjects; - private boolean requesterPaysEnabled; - private boolean streamingUploadEnabled; - private int streamingUploadPartSize; - private TrinoS3StorageClass s3StorageClass; - private String s3RoleSessionName; - - private final ExecutorService uploadExecutor = newCachedThreadPool(threadsNamed("s3-upload-%s")); - private final ForwardingRequestHandler forwardingRequestHandler = new ForwardingRequestHandler(); - - @Override - public void initialize(URI uri, Configuration conf) - throws IOException - { - requireNonNull(uri, "uri is null"); - requireNonNull(conf, "conf is null"); - super.initialize(uri, conf); - setConf(conf); - - try { - this.uri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null); - } - catch (URISyntaxException e) { - throw new IllegalArgumentException("Invalid uri: " + uri, e); - } - this.workingDirectory = new Path(PATH_SEPARATOR).makeQualified(this.uri, new Path(PATH_SEPARATOR)); - - HiveS3Config defaults = new HiveS3Config(); - this.stagingDirectory = new File(conf.get(S3_STAGING_DIRECTORY, defaults.getS3StagingDirectory().getPath())); - this.maxAttempts = conf.getInt(S3_MAX_CLIENT_RETRIES, defaults.getS3MaxClientRetries()) + 1; - this.maxBackoffTime = Duration.valueOf(conf.get(S3_MAX_BACKOFF_TIME, defaults.getS3MaxBackoffTime().toString())); - this.maxRetryTime = Duration.valueOf(conf.get(S3_MAX_RETRY_TIME, defaults.getS3MaxRetryTime().toString())); - int maxErrorRetries = conf.getInt(S3_MAX_ERROR_RETRIES, defaults.getS3MaxErrorRetries()); - boolean sslEnabled = conf.getBoolean(S3_SSL_ENABLED, defaults.isS3SslEnabled()); - Duration connectTimeout = Duration.valueOf(conf.get(S3_CONNECT_TIMEOUT, defaults.getS3ConnectTimeout().toString())); - Duration socketTimeout = Duration.valueOf(conf.get(S3_SOCKET_TIMEOUT, defaults.getS3SocketTimeout().toString())); - int maxConnections = conf.getInt(S3_MAX_CONNECTIONS, defaults.getS3MaxConnections()); - this.multiPartUploadMinFileSize = conf.getLong(S3_MULTIPART_MIN_FILE_SIZE, defaults.getS3MultipartMinFileSize().toBytes()); - this.multiPartUploadMinPartSize = conf.getLong(S3_MULTIPART_MIN_PART_SIZE, defaults.getS3MultipartMinPartSize().toBytes()); - this.isPathStyleAccess = conf.getBoolean(S3_PATH_STYLE_ACCESS, defaults.isS3PathStyleAccess()); - this.iamRole = conf.get(S3_IAM_ROLE, defaults.getS3IamRole()); - this.externalId = conf.get(S3_EXTERNAL_ID, defaults.getS3ExternalId()); - this.pinS3ClientToCurrentRegion = conf.getBoolean(S3_PIN_CLIENT_TO_CURRENT_REGION, defaults.isPinS3ClientToCurrentRegion()); - verify(!pinS3ClientToCurrentRegion || conf.get(S3_ENDPOINT) == null, - "Invalid configuration: either endpoint can be set or S3 client can be pinned to the current region"); - this.sseEnabled = conf.getBoolean(S3_SSE_ENABLED, defaults.isS3SseEnabled()); - this.sseType = TrinoS3SseType.valueOf(conf.get(S3_SSE_TYPE, defaults.getS3SseType().name())); - this.sseKmsKeyId = conf.get(S3_SSE_KMS_KEY_ID, defaults.getS3SseKmsKeyId()); - this.s3AclType = TrinoS3AclType.valueOf(conf.get(S3_ACL_TYPE, defaults.getS3AclType().name())); - String userAgentPrefix = conf.get(S3_USER_AGENT_PREFIX, defaults.getS3UserAgentPrefix()); - this.skipGlacierObjects = conf.getBoolean(S3_SKIP_GLACIER_OBJECTS, defaults.isSkipGlacierObjects()); - this.requesterPaysEnabled = conf.getBoolean(S3_REQUESTER_PAYS_ENABLED, defaults.isRequesterPaysEnabled()); - this.streamingUploadEnabled = conf.getBoolean(S3_STREAMING_UPLOAD_ENABLED, defaults.isS3StreamingUploadEnabled()); - this.streamingUploadPartSize = toIntExact(conf.getLong(S3_STREAMING_UPLOAD_PART_SIZE, defaults.getS3StreamingPartSize().toBytes())); - this.s3StorageClass = conf.getEnum(S3_STORAGE_CLASS, defaults.getS3StorageClass()); - this.s3RoleSessionName = conf.get(S3_ROLE_SESSION_NAME, S3_DEFAULT_ROLE_SESSION_NAME); - - ClientConfiguration configuration = new ClientConfiguration() - .withMaxErrorRetry(maxErrorRetries) - .withProtocol(sslEnabled ? Protocol.HTTPS : Protocol.HTTP) - .withConnectionTimeout(toIntExact(connectTimeout.toMillis())) - .withSocketTimeout(toIntExact(socketTimeout.toMillis())) - .withMaxConnections(maxConnections) - .withUserAgentPrefix(userAgentPrefix) - .withUserAgentSuffix("Trino"); - - String connectTtlValue = conf.get(S3_CONNECT_TTL); - if (!isNullOrEmpty(connectTtlValue)) { - configuration.setConnectionTTL(Duration.valueOf(connectTtlValue).toMillis()); - } - - String proxyHost = conf.get(S3_PROXY_HOST); - if (nonNull(proxyHost)) { - configuration.setProxyHost(proxyHost); - configuration.setProxyPort(conf.getInt(S3_PROXY_PORT, defaults.getS3ProxyPort())); - String proxyProtocol = conf.get(S3_PROXY_PROTOCOL); - if (proxyProtocol != null) { - configuration.setProxyProtocol(TrinoS3Protocol.valueOf(proxyProtocol).getProtocol()); - } - String nonProxyHosts = conf.get(S3_NON_PROXY_HOSTS); - if (nonProxyHosts != null) { - configuration.setNonProxyHosts(nonProxyHosts); - } - String proxyUsername = conf.get(S3_PROXY_USERNAME); - if (proxyUsername != null) { - configuration.setProxyUsername(proxyUsername); - } - String proxyPassword = conf.get(S3_PROXY_PASSWORD); - if (proxyPassword != null) { - configuration.setProxyPassword(proxyPassword); - } - configuration.setPreemptiveBasicProxyAuth( - conf.getBoolean(S3_PREEMPTIVE_BASIC_PROXY_AUTH, defaults.getS3PreemptiveBasicProxyAuth())); - } - - this.credentialsProvider = createAwsCredentialsProvider(uri, conf); - this.s3 = createAmazonS3Client(conf, configuration); - } - - @Override - public void close() - throws IOException - { - try (Closer closer = Closer.create()) { - closer.register(this::closeSuper); - if (credentialsProvider instanceof Closeable closeable) { - closer.register(closeable); - } - closer.register(uploadExecutor::shutdown); - if (s3 != null) { - closer.register(s3::shutdown); - } - } - } - - @SuppressModernizer - private void closeSuper() - throws IOException - { - super.close(); - } - - @Override - public void setOpenTelemetry(OpenTelemetry openTelemetry) - { - requireNonNull(openTelemetry, "openTelemetry is null"); - forwardingRequestHandler.setDelegateIfAbsent(() -> - AwsSdkTelemetry.builder(openTelemetry) - .setCaptureExperimentalSpanAttributes(true) - .build() - .createRequestHandler()); - } - - @Override - public String getScheme() - { - return uri.getScheme(); - } - - @Override - public URI getUri() - { - return uri; - } - - @Override - public Path getWorkingDirectory() - { - return workingDirectory; - } - - @Override - public void setWorkingDirectory(Path path) - { - workingDirectory = path; - } - - @Override - public FileStatus[] listStatus(Path path) - throws IOException - { - STATS.newListStatusCall(); - List list = new ArrayList<>(); - RemoteIterator iterator = listLocatedStatus(path); - while (iterator.hasNext()) { - list.add(iterator.next()); - } - return toArray(list, LocatedFileStatus.class); - } - - @Override - public RemoteIterator listFiles(Path path, boolean recursive) - { - // Either a single level or full listing, depending on the recursive flag, no "directories" - // included in either path - return new S3ObjectsV2RemoteIterator(listPath(path, OptionalInt.empty(), recursive ? ListingMode.RECURSIVE_FILES_ONLY : ListingMode.SHALLOW_FILES_ONLY)); - } - - @Override - public RemoteIterator listLocatedStatus(Path path) - { - STATS.newListLocatedStatusCall(); - return new S3ObjectsV2RemoteIterator(listPath(path, OptionalInt.empty(), ListingMode.SHALLOW_ALL)); - } - - private static final class S3ObjectsV2RemoteIterator - implements RemoteIterator - { - private final Iterator iterator; - - public S3ObjectsV2RemoteIterator(Iterator iterator) - { - this.iterator = requireNonNull(iterator, "iterator is null"); - } - - @Override - public boolean hasNext() - throws IOException - { - try { - return iterator.hasNext(); - } - catch (AmazonClientException e) { - throw new IOException(e); - } - } - - @Override - public LocatedFileStatus next() - throws IOException - { - try { - return iterator.next(); - } - catch (AmazonClientException e) { - throw new IOException(e); - } - } - } - - @Override - public FileStatus getFileStatus(Path path) - throws IOException - { - if (path.getName().isEmpty()) { - // the bucket root requires special handling - if (getS3ObjectMetadata(path) != null) { - return new FileStatus(0, true, 1, 0, 0, qualifiedPath(path)); - } - throw new FileNotFoundException("File does not exist: " + path); - } - - ObjectMetadata metadata = getS3ObjectMetadata(path); - - if (metadata == null) { - // check if this path is a directory - Iterator iterator = listPath(path, OptionalInt.of(1), ListingMode.SHALLOW_ALL); - if (iterator.hasNext()) { - return new FileStatus(0, true, 1, 0, 0, qualifiedPath(path)); - } - throw new FileNotFoundException("File does not exist: " + path); - } - - return new FileStatus( - getObjectSize(path, metadata), - // Some directories (e.g. uploaded through S3 GUI) return a charset in the Content-Type header - isDirectoryMediaType(nullToEmpty(metadata.getContentType())), - 1, - BLOCK_SIZE.toBytes(), - lastModifiedTime(metadata), - qualifiedPath(path)); - } - - private static boolean isDirectoryMediaType(String contentType) - { - try { - return MediaType.parse(contentType).is(DIRECTORY_MEDIA_TYPE); - } - catch (IllegalArgumentException e) { - log.debug(e, "isDirectoryMediaType: failed to inspect contentType [%s], assuming not a directory", contentType); - return false; - } - } - - private long getObjectSize(Path path, ObjectMetadata metadata) - throws IOException - { - Map userMetadata = metadata.getUserMetadata(); - String length = userMetadata.get(UNENCRYPTED_CONTENT_LENGTH); - if (userMetadata.containsKey(SERVER_SIDE_ENCRYPTION) && length == null) { - throw new IOException(format("%s header is not set on an encrypted object: %s", UNENCRYPTED_CONTENT_LENGTH, path)); - } - - if (length != null) { - return Long.parseLong(length); - } - - long reportedObjectSize = metadata.getContentLength(); - // x-amz-unencrypted-content-length was not set, infer length for cse-kms encrypted objects by reading the tail until EOF - if (s3 instanceof AmazonS3Encryption && "kms".equalsIgnoreCase(userMetadata.get(CRYPTO_KEYWRAP_ALGORITHM))) { - try (FSDataInputStream in = open(path, FSDataInputStreamTail.MAX_SUPPORTED_PADDING_BYTES + 1)) { - return FSDataInputStreamTail.readTailForFileSize(path.toString(), reportedObjectSize, in); - } - } - return reportedObjectSize; - } - - @Override - public FSDataInputStream open(Path path, int bufferSize) - { - return new FSDataInputStream( - new BufferedFSInputStream( - new TrinoS3InputStream(s3, getBucketName(uri), path, requesterPaysEnabled, maxAttempts, maxBackoffTime, maxRetryTime), - bufferSize)); - } - - @Override - public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) - throws IOException - { - // Ignore the overwrite flag, since Trino Hive connector *usually* writes to unique file names. - // Checking for file existence is thus an unnecessary, expensive operation. - return new FSDataOutputStream(createOutputStream(path, newSimpleAggregatedMemoryContext()), statistics); - } - - @Override - public FSDataOutputStream create(Path path, AggregatedMemoryContext aggregatedMemoryContext) - throws IOException - { - return new FSDataOutputStream(createOutputStream(path, aggregatedMemoryContext), statistics); - } - - private OutputStream createOutputStream(Path path, AggregatedMemoryContext memoryContext) - throws IOException - { - String bucketName = getBucketName(uri); - String key = keyFromPath(qualifiedPath(path)); - - if (streamingUploadEnabled) { - Supplier uploadIdFactory = () -> initMultipartUpload(bucketName, key).getUploadId(); - return new TrinoS3StreamingOutputStream(s3, bucketName, key, this::customizePutObjectRequest, uploadIdFactory, uploadExecutor, streamingUploadPartSize, memoryContext); - } - - if (!stagingDirectory.exists()) { - createDirectories(stagingDirectory.toPath()); - } - if (!stagingDirectory.isDirectory()) { - throw new IOException("Configured staging path is not a directory: " + stagingDirectory); - } - File tempFile = createTempFile(stagingDirectory.toPath(), "trino-s3-", ".tmp").toFile(); - return new TrinoS3StagingOutputStream(s3, bucketName, key, tempFile, this::customizePutObjectRequest, multiPartUploadMinFileSize, multiPartUploadMinPartSize); - } - - @Override - public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) - { - throw new UnsupportedOperationException("append"); - } - - @Override - public boolean rename(Path src, Path dst) - throws IOException - { - boolean srcDirectory; - try { - srcDirectory = directory(src); - } - catch (FileNotFoundException e) { - return false; - } - - try { - if (!directory(dst)) { - // cannot copy a file to an existing file - return false; - } - // move source under destination directory - dst = new Path(dst, src.getName()); - } - catch (FileNotFoundException e) { - // destination does not exist - } - - if (keysEqual(src, dst)) { - return false; - } - - if (srcDirectory) { - for (FileStatus file : listStatus(src)) { - rename(file.getPath(), new Path(dst, file.getPath().getName())); - } - deleteObject(keyFromPath(src) + DIRECTORY_SUFFIX); - } - else { - s3.copyObject(new CopyObjectRequest(getBucketName(uri), keyFromPath(src), getBucketName(uri), keyFromPath(dst)) - .withRequesterPays(requesterPaysEnabled)); - delete(src, true); - } - - // TODO should we return true also when deleteObject() returned false? - return true; - } - - @Override - public boolean delete(Path path, boolean recursive) - throws IOException - { - String key = keyFromPath(path); - if (recursive) { - DeletePrefixResult deletePrefixResult; - try { - deletePrefixResult = deletePrefix(path); - } - catch (AmazonClientException e) { - throw new IOException("Failed to delete paths with the prefix path " + path, e); - } - if (deletePrefixResult == DeletePrefixResult.NO_KEYS_FOUND) { - // If the provided key is not a "directory" prefix, attempt to delete the object with the specified key - deleteObject(key); - } - else if (deletePrefixResult == DeletePrefixResult.DELETE_KEYS_FAILURE) { - return false; - } - deleteObject(key + DIRECTORY_SUFFIX); - } - else { - Iterator listingsIterator = listObjects(path, OptionalInt.of(2), true); - Iterator objectKeysIterator = Iterators.concat(Iterators.transform(listingsIterator, TrinoS3FileSystem::keysFromRecursiveListing)); - if (objectKeysIterator.hasNext()) { - String childKey = objectKeysIterator.next(); - if (!Objects.equals(childKey, key + PATH_SEPARATOR) || objectKeysIterator.hasNext()) { - throw new IOException("Directory " + path + " is not empty"); - } - deleteObject(childKey); - } - else { - // Avoid deleting the bucket in case that the provided path points to the bucket root - if (!key.isEmpty()) { - deleteObject(key); - } - } - deleteObject(key + DIRECTORY_SUFFIX); - } - // TODO should we return true also when deleteObject() returned false? (currently deleteObject's return value is never used) - return true; - } - - private DeletePrefixResult deletePrefix(Path prefix) - { - String bucketName = getBucketName(uri); - Iterator listings = listObjects(prefix, OptionalInt.empty(), true); - Iterator objectKeys = Iterators.concat(Iterators.transform(listings, TrinoS3FileSystem::keysFromRecursiveListing)); - Iterator> objectKeysBatches = Iterators.partition(objectKeys, DELETE_BATCH_SIZE); - if (!objectKeysBatches.hasNext()) { - return DeletePrefixResult.NO_KEYS_FOUND; - } - - boolean allKeysDeleted = true; - while (objectKeysBatches.hasNext()) { - String[] objectKeysBatch = objectKeysBatches.next().toArray(String[]::new); - try { - s3.deleteObjects(new DeleteObjectsRequest(bucketName) - .withKeys(objectKeysBatch) - .withRequesterPays(requesterPaysEnabled) - .withQuiet(true)); - } - catch (AmazonS3Exception e) { - log.debug(e, "Failed to delete objects from the bucket %s under the prefix '%s'", bucketName, prefix); - allKeysDeleted = false; - } - } - - return allKeysDeleted ? DeletePrefixResult.ALL_KEYS_DELETED : DeletePrefixResult.DELETE_KEYS_FAILURE; - } - - @VisibleForTesting - static Iterator keysFromRecursiveListing(ListObjectsV2Result listing) - { - checkState( - listing.getCommonPrefixes() == null || listing.getCommonPrefixes().isEmpty(), - "No common prefixes should be present when listing without a path delimiter"); - - return Iterators.transform(listing.getObjectSummaries().iterator(), S3ObjectSummary::getKey); - } - - private boolean directory(Path path) - throws IOException - { - return getFileStatus(path).isDirectory(); - } - - private boolean deleteObject(String key) - { - String bucketName = getBucketName(uri); - try { - DeleteObjectRequest deleteObjectRequest = new DeleteObjectRequest(bucketName, key); - if (requesterPaysEnabled) { - // TODO use deleteObjectRequest.setRequesterPays() when https://github.com/aws/aws-sdk-java/issues/1219 is fixed - // currently the method exists, but is ineffective (doesn't set the required HTTP header) - deleteObjectRequest.putCustomRequestHeader(Headers.REQUESTER_PAYS_HEADER, Constants.REQUESTER_PAYS); - } - - s3.deleteObject(deleteObjectRequest); - return true; - } - catch (AmazonClientException e) { - // TODO should we propagate this? - log.debug(e, "Failed to delete object from the bucket %s: %s", bucketName, key); - return false; - } - } - - @Override - public void deleteFiles(Collection paths) - throws IOException - { - try { - Iterable> partitions = Iterables.partition(paths, DELETE_BATCH_SIZE); - for (List currentBatch : partitions) { - deletePaths(currentBatch); - } - } - catch (MultiObjectDeleteException e) { - String errors = e.getErrors().stream() - .map(error -> format("key: %s, versionId: %s, code: %s, message: %s", error.getKey(), error.getVersionId(), error.getCode(), error.getMessage())) - .collect(joining(", ")); - throw new IOException("Exception while batch deleting paths: %s".formatted(errors), e); - } - catch (AmazonClientException e) { - throw new IOException("Exception while batch deleting paths", e); - } - } - - private void deletePaths(List paths) - { - List keys = paths.stream() - .map(TrinoS3FileSystem::keyFromPath) - .map(KeyVersion::new) - .collect(toImmutableList()); - DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(getBucketName(uri)) - .withRequesterPays(requesterPaysEnabled) - .withKeys(keys) - .withQuiet(true); - - s3.deleteObjects(deleteObjectsRequest); - } - - @Override - public boolean mkdirs(Path f, FsPermission permission) - { - // no need to do anything for S3 - return true; - } - - /** - * Enum representing the valid listing modes. This could be two booleans (recursive, filesOnly) except - * that (recursive=true, filesOnly=false) can't be translated directly to a natively supported behavior - */ - private enum ListingMode - { - SHALLOW_ALL, - SHALLOW_FILES_ONLY, - RECURSIVE_FILES_ONLY; - - public boolean isFilesOnly() - { - return (this == SHALLOW_FILES_ONLY || this == RECURSIVE_FILES_ONLY); - } - - public boolean isRecursive() - { - return this == RECURSIVE_FILES_ONLY; - } - } - - /** - * List all objects rooted at the provided path. - */ - private Iterator listPath(Path path, OptionalInt initialMaxKeys, ListingMode mode) - { - Iterator listings = listObjects(path, initialMaxKeys, mode.isRecursive()); - - Iterator results = Iterators.concat(Iterators.transform(listings, this::statusFromListing)); - if (mode.isFilesOnly()) { - // Even recursive listing can still contain empty "directory" objects, must filter them out - results = Iterators.filter(results, LocatedFileStatus::isFile); - } - return results; - } - - private Iterator listObjects(Path path, OptionalInt initialMaxKeys, boolean recursive) - { - String key = keyFromPath(path); - if (!key.isEmpty()) { - key += PATH_SEPARATOR; - } - - ListObjectsV2Request request = new ListObjectsV2Request() - .withBucketName(getBucketName(uri)) - .withPrefix(key) - .withDelimiter(recursive ? null : PATH_SEPARATOR) - .withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null) - .withRequesterPays(requesterPaysEnabled); - - STATS.newListObjectsCall(); - return new AbstractSequentialIterator<>(s3.listObjectsV2(request)) - { - @Override - protected ListObjectsV2Result computeNext(ListObjectsV2Result previous) - { - if (!previous.isTruncated()) { - return null; - } - // Clear any max keys after the first batch completes - request.withMaxKeys(null).setContinuationToken(previous.getNextContinuationToken()); - return s3.listObjectsV2(request); - } - }; - } - - private Iterator statusFromListing(ListObjectsV2Result listing) - { - List prefixes = listing.getCommonPrefixes(); - List objects = listing.getObjectSummaries(); - if (prefixes.isEmpty()) { - return statusFromObjects(objects); - } - if (objects.isEmpty()) { - return statusFromPrefixes(prefixes); - } - return Iterators.concat( - statusFromPrefixes(prefixes), - statusFromObjects(objects)); - } - - private Iterator statusFromPrefixes(List prefixes) - { - List list = new ArrayList<>(prefixes.size()); - for (String prefix : prefixes) { - Path path = qualifiedPath(new Path(PATH_SEPARATOR + prefix)); - FileStatus status = new FileStatus(0, true, 1, 0, 0, path); - list.add(createLocatedFileStatus(status)); - } - return list.iterator(); - } - - private Iterator statusFromObjects(List objects) - { - // NOTE: for encrypted objects, S3ObjectSummary.size() used below is NOT correct, - // however, to get the correct size we'd need to make an additional request to get - // user metadata, and in this case it doesn't matter. - return objects.stream() - .filter(object -> !object.getKey().endsWith(PATH_SEPARATOR)) - .filter(object -> !skipGlacierObjects || !isGlacierObject(object)) - .filter(object -> !isHadoopFolderMarker(object)) - .map(object -> new FileStatus( - object.getSize(), - false, - 1, - BLOCK_SIZE.toBytes(), - object.getLastModified().getTime(), - qualifiedPath(new Path(PATH_SEPARATOR + object.getKey())))) - .map(this::createLocatedFileStatus) - .iterator(); - } - - private static boolean isGlacierObject(S3ObjectSummary object) - { - return GLACIER_STORAGE_CLASSES.contains(object.getStorageClass()); - } - - private static boolean isHadoopFolderMarker(S3ObjectSummary object) - { - return object.getKey().endsWith("_$folder$"); - } - - /** - * This exception is for stopping retries for S3 calls that shouldn't be retried. - * For example, "Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: Forbidden (Service: Amazon S3; Status Code: 403 ..." - */ - public static class UnrecoverableS3OperationException - extends IOException - { - public UnrecoverableS3OperationException(String bucket, String key, Throwable cause) - { - // append bucket and key to the message - super(format("%s (Bucket: %s, Key: %s)", cause, bucket, key)); - } - } - - @VisibleForTesting - ObjectMetadata getS3ObjectMetadata(Path path) - throws IOException - { - String bucketName = getBucketName(uri); - String key = keyFromPath(path); - ObjectMetadata s3ObjectMetadata = getS3ObjectMetadata(bucketName, key); - if (s3ObjectMetadata == null && !key.isEmpty()) { - return getS3ObjectMetadata(bucketName, key + PATH_SEPARATOR); - } - return s3ObjectMetadata; - } - - private ObjectMetadata getS3ObjectMetadata(String bucketName, String key) - throws IOException - { - try { - return retry() - .maxAttempts(maxAttempts) - .exponentialBackoff(BACKOFF_MIN_SLEEP, maxBackoffTime, maxRetryTime, 2.0) - .stopOn(InterruptedException.class, UnrecoverableS3OperationException.class, AbortedException.class) - .onRetry(STATS::newGetMetadataRetry) - .run("getS3ObjectMetadata", () -> { - try { - STATS.newMetadataCall(); - return s3.getObjectMetadata(new GetObjectMetadataRequest(bucketName, key) - .withRequesterPays(requesterPaysEnabled)); - } - catch (RuntimeException e) { - STATS.newGetMetadataError(); - if (e instanceof AmazonServiceException awsException) { - switch (awsException.getStatusCode()) { - case HTTP_FORBIDDEN: - case HTTP_BAD_REQUEST: - throw new UnrecoverableS3OperationException(bucketName, key, e); - } - } - if (e instanceof AmazonS3Exception s3Exception && - s3Exception.getStatusCode() == HTTP_NOT_FOUND) { - return null; - } - throw e; - } - }); - } - catch (InterruptedException | AbortedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - catch (Exception e) { - throwIfInstanceOf(e, IOException.class); - throwIfUnchecked(e); - throw new RuntimeException(e); - } - } - - private Path qualifiedPath(Path path) - { - return path.makeQualified(this.uri, getWorkingDirectory()); - } - - private LocatedFileStatus createLocatedFileStatus(FileStatus status) - { - try { - BlockLocation[] fakeLocation = getFileBlockLocations(status, 0, status.getLen()); - return new LocatedFileStatus(status, fakeLocation); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - private static long lastModifiedTime(ObjectMetadata metadata) - { - Date date = metadata.getLastModified(); - return (date != null) ? date.getTime() : 0; - } - - private static boolean keysEqual(Path p1, Path p2) - { - return keyFromPath(p1).equals(keyFromPath(p2)); - } - - public static String keyFromPath(Path path) - { - checkArgument(path.isAbsolute(), "Path is not absolute: %s", path); - // hack to use path from fragment -- see IcebergSplitSource#hadoopPath() - String key = Optional.ofNullable(path.toUri().getFragment()) - .or(() -> Optional.ofNullable(path.toUri().getPath())) - .orElse(""); - if (key.startsWith(PATH_SEPARATOR)) { - key = key.substring(PATH_SEPARATOR.length()); - } - if (key.endsWith(PATH_SEPARATOR)) { - key = key.substring(0, key.length() - PATH_SEPARATOR.length()); - } - return key; - } - - private AmazonS3 createAmazonS3Client(Configuration hadoopConfig, ClientConfiguration clientConfig) - { - Optional encryptionMaterialsProvider = createEncryptionMaterialsProvider(hadoopConfig); - AmazonS3Builder, ? extends AmazonS3> clientBuilder; - - String signerType = hadoopConfig.get(S3_SIGNER_TYPE); - if (signerType != null) { - clientConfig.withSignerOverride(signerType); - } - - String signerClass = hadoopConfig.get(S3_SIGNER_CLASS); - if (signerClass != null) { - Class klass; - try { - klass = Class.forName(signerClass).asSubclass(Signer.class); - } - catch (ClassNotFoundException e) { - throw new RuntimeException("Signer class not found: " + signerClass, e); - } - SignerFactory.registerSigner(S3_CUSTOM_SIGNER, klass); - clientConfig.setSignerOverride(S3_CUSTOM_SIGNER); - } - - if (encryptionMaterialsProvider.isPresent()) { - clientBuilder = AmazonS3EncryptionClient.encryptionBuilder() - .withCredentials(credentialsProvider) - .withEncryptionMaterials(encryptionMaterialsProvider.get()) - .withClientConfiguration(clientConfig) - .withMetricsCollector(METRIC_COLLECTOR); - } - else { - clientBuilder = AmazonS3Client.builder() - .withCredentials(credentialsProvider) - .withClientConfiguration(clientConfig) - .withMetricsCollector(METRIC_COLLECTOR); - } - - boolean regionOrEndpointSet = false; - - // use local region when running inside of EC2 - if (pinS3ClientToCurrentRegion) { - Region region = getCurrentRegionFromEC2Metadata(); - clientBuilder.setRegion(region.getName()); - if (encryptionMaterialsProvider.isPresent()) { - CryptoConfiguration cryptoConfiguration = new CryptoConfiguration(); - cryptoConfiguration.setAwsKmsRegion(region); - ((AmazonS3EncryptionClientBuilder) clientBuilder).withCryptoConfiguration(cryptoConfiguration); - } - regionOrEndpointSet = true; - } - - String endpoint = hadoopConfig.get(S3_ENDPOINT); - String region = hadoopConfig.get(S3_REGION); - if (endpoint != null) { - clientBuilder.setEndpointConfiguration(new EndpointConfiguration(endpoint, region)); - regionOrEndpointSet = true; - } - else if (region != null) { - clientBuilder.setRegion(region); - regionOrEndpointSet = true; - } - - if (isPathStyleAccess) { - clientBuilder.enablePathStyleAccess(); - } - - if (!regionOrEndpointSet) { - clientBuilder.withRegion(US_EAST_1); - clientBuilder.setForceGlobalBucketAccessEnabled(true); - } - - clientBuilder.setRequestHandlers(forwardingRequestHandler); - - return clientBuilder.build(); - } - - private static Optional createEncryptionMaterialsProvider(Configuration hadoopConfig) - { - String kmsKeyId = hadoopConfig.get(S3_KMS_KEY_ID); - if (kmsKeyId != null) { - return Optional.of(new KMSEncryptionMaterialsProvider(kmsKeyId)); - } - - String empClassName = hadoopConfig.get(S3_ENCRYPTION_MATERIALS_PROVIDER); - if (empClassName == null) { - return Optional.empty(); - } - - try { - Object instance = Class.forName(empClassName).getConstructor().newInstance(); - if (!(instance instanceof EncryptionMaterialsProvider emp)) { - throw new RuntimeException("Invalid encryption materials provider class: " + instance.getClass().getName()); - } - if (emp instanceof Configurable configurable) { - configurable.setConf(hadoopConfig); - } - return Optional.of(emp); - } - catch (ReflectiveOperationException e) { - throw new RuntimeException("Unable to load or create S3 encryption materials provider: " + empClassName, e); - } - } - - private AWSCredentialsProvider createAwsCredentialsProvider(URI uri, Configuration conf) - { - // credentials embedded in the URI take precedence and are used alone - Optional credentials = getEmbeddedAwsCredentials(uri); - if (credentials.isPresent()) { - return new AWSStaticCredentialsProvider(credentials.get()); - } - - if (conf.getBoolean(S3_USE_WEB_IDENTITY_TOKEN_CREDENTIALS_PROVIDER, false)) { - return new WebIdentityTokenCredentialsProvider(); - } - - // a custom credential provider is also used alone - String providerClass = conf.get(S3_CREDENTIALS_PROVIDER); - if (!isNullOrEmpty(providerClass)) { - return getCustomAWSCredentialsProvider(uri, conf, providerClass); - } - - // use configured credentials or default chain with optional role - AWSCredentialsProvider provider = getAwsCredentials(conf) - .map(value -> (AWSCredentialsProvider) new AWSStaticCredentialsProvider(value)) - .orElseGet(DefaultAWSCredentialsProviderChain::getInstance); - - if (iamRole != null) { - String stsEndpointOverride = conf.get(S3_STS_ENDPOINT); - String stsRegionOverride = conf.get(S3_STS_REGION); - - AWSSecurityTokenServiceClientBuilder stsClientBuilder = AWSSecurityTokenServiceClientBuilder.standard() - .withCredentials(provider); - - String region; - if (!isNullOrEmpty(stsRegionOverride)) { - region = stsRegionOverride; - } - else { - DefaultAwsRegionProviderChain regionProviderChain = new DefaultAwsRegionProviderChain(); - try { - region = regionProviderChain.getRegion(); - } - catch (SdkClientException ex) { - log.warn("Falling back to default AWS region %s", US_EAST_1); - region = US_EAST_1.getName(); - } - } - - if (!isNullOrEmpty(stsEndpointOverride)) { - stsClientBuilder.withEndpointConfiguration(new EndpointConfiguration(stsEndpointOverride, region)); - } - else { - stsClientBuilder.withRegion(region); - } - - provider = new STSAssumeRoleSessionCredentialsProvider.Builder(iamRole, s3RoleSessionName) - .withExternalId(externalId) - .withStsClient(stsClientBuilder.build()) - .build(); - } - - return provider; - } - - private static AWSCredentialsProvider getCustomAWSCredentialsProvider(URI uri, Configuration conf, String providerClass) - { - try { - log.debug("Using AWS credential provider %s for URI %s", providerClass, uri); - return conf.getClassByName(providerClass) - .asSubclass(AWSCredentialsProvider.class) - .getConstructor(URI.class, Configuration.class) - .newInstance(uri, conf); - } - catch (ReflectiveOperationException e) { - throw new RuntimeException(format("Error creating an instance of %s for URI %s", providerClass, uri), e); - } - } - - private static Optional getEmbeddedAwsCredentials(URI uri) - { - String userInfo = nullToEmpty(uri.getUserInfo()); - List parts = Splitter.on(':').limit(2).splitToList(userInfo); - if (parts.size() == 2) { - String accessKey = parts.get(0); - String secretKey = parts.get(1); - if (!accessKey.isEmpty() && !secretKey.isEmpty()) { - return Optional.of(new BasicAWSCredentials(accessKey, secretKey)); - } - } - return Optional.empty(); - } - - private static Optional getAwsCredentials(Configuration conf) - { - String accessKey = conf.get(S3_ACCESS_KEY); - String secretKey = conf.get(S3_SECRET_KEY); - - if (isNullOrEmpty(accessKey) || isNullOrEmpty(secretKey)) { - return Optional.empty(); - } - - String sessionToken = conf.get(S3_SESSION_TOKEN); - if (!isNullOrEmpty(sessionToken)) { - return Optional.of(new BasicSessionCredentials(accessKey, secretKey, sessionToken)); - } - - return Optional.of(new BasicAWSCredentials(accessKey, secretKey)); - } - - private void customizePutObjectRequest(PutObjectRequest request) - { - if (request.getMetadata() == null) { - request.setMetadata(new ObjectMetadata()); - } - if (sseEnabled) { - switch (sseType) { - case KMS: - request.setSSEAwsKeyManagementParams(getSseKeyManagementParams()); - break; - case S3: - request.getMetadata().setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); - break; - } - } - request.setCannedAcl(s3AclType.getCannedACL()); - request.setRequesterPays(requesterPaysEnabled); - request.setStorageClass(s3StorageClass.getS3StorageClass()); - } - - private InitiateMultipartUploadResult initMultipartUpload(String bucket, String key) - { - try { - return retry() - .maxAttempts(maxAttempts) - .exponentialBackoff(BACKOFF_MIN_SLEEP, maxBackoffTime, maxRetryTime, 2.0) - .stopOn(InterruptedException.class, UnrecoverableS3OperationException.class, AbortedException.class, FileNotFoundException.class) - .onRetry(STATS::newInitiateMultipartUploadRetry) - .run("initiateMultipartUpload", () -> { - try { - InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(bucket, key) - .withObjectMetadata(new ObjectMetadata()) - .withCannedACL(s3AclType.getCannedACL()) - .withRequesterPays(requesterPaysEnabled) - .withStorageClass(s3StorageClass.getS3StorageClass()); - - if (sseEnabled) { - switch (sseType) { - case KMS: - request.setSSEAwsKeyManagementParams(getSseKeyManagementParams()); - break; - case S3: - request.getObjectMetadata().setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); - break; - } - } - - return s3.initiateMultipartUpload(request); - } - catch (RuntimeException e) { - STATS.newInitiateMultipartUploadError(); - if (e instanceof AmazonS3Exception s3Exception) { - switch (s3Exception.getStatusCode()) { - case HTTP_FORBIDDEN, HTTP_BAD_REQUEST -> throw new UnrecoverableS3OperationException(bucket, key, e); - case HTTP_NOT_FOUND -> { - throwIfFileNotFound(bucket, key, s3Exception); - throw new UnrecoverableS3OperationException(bucket, key, e); - } - } - } - throw e; - } - }); - } - catch (InterruptedException | AbortedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - catch (Exception e) { - throwIfUnchecked(e); - throw new RuntimeException(e); - } - } - - private SSEAwsKeyManagementParams getSseKeyManagementParams() - { - return (sseKmsKeyId != null) ? new SSEAwsKeyManagementParams(sseKmsKeyId) : new SSEAwsKeyManagementParams(); - } - - private static class TrinoS3InputStream - extends FSInputStream - { - private final AmazonS3 s3; - private final String bucket; - private final Path path; - private final boolean requesterPaysEnabled; - private final int maxAttempts; - private final Duration maxBackoffTime; - private final Duration maxRetryTime; - - private final AtomicBoolean closed = new AtomicBoolean(); - - private InputStream in; - private long streamPosition; - private long nextReadPosition; - - public TrinoS3InputStream(AmazonS3 s3, String bucket, Path path, boolean requesterPaysEnabled, int maxAttempts, Duration maxBackoffTime, Duration maxRetryTime) - { - this.s3 = requireNonNull(s3, "s3 is null"); - this.bucket = requireNonNull(bucket, "bucket is null"); - this.path = requireNonNull(path, "path is null"); - this.requesterPaysEnabled = requesterPaysEnabled; - - checkArgument(maxAttempts >= 0, "maxAttempts cannot be negative"); - this.maxAttempts = maxAttempts; - this.maxBackoffTime = requireNonNull(maxBackoffTime, "maxBackoffTime is null"); - this.maxRetryTime = requireNonNull(maxRetryTime, "maxRetryTime is null"); - } - - @Override - public void close() - { - closed.set(true); - closeStream(); - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) - throws IOException - { - checkClosed(); - if (position < 0) { - throw new EOFException(NEGATIVE_SEEK); - } - checkFromToIndex(offset, offset + length, buffer.length); - if (length == 0) { - return 0; - } - - try { - return retry() - .maxAttempts(maxAttempts) - .exponentialBackoff(BACKOFF_MIN_SLEEP, maxBackoffTime, maxRetryTime, 2.0) - .stopOn(InterruptedException.class, UnrecoverableS3OperationException.class, EOFException.class, AbortedException.class, FileNotFoundException.class) - .onRetry(STATS::newGetObjectRetry) - .run("getS3Object", () -> { - InputStream stream; - String key = keyFromPath(path); - try { - GetObjectRequest request = new GetObjectRequest(bucket, key) - .withRange(position, (position + length) - 1) - .withRequesterPays(requesterPaysEnabled); - stream = s3.getObject(request).getObjectContent(); - } - catch (RuntimeException e) { - STATS.newGetObjectError(); - if (e instanceof AmazonServiceException s3Exception) { - switch (s3Exception.getStatusCode()) { - case HTTP_FORBIDDEN: - case HTTP_BAD_REQUEST: - throw new UnrecoverableS3OperationException(bucket, key, e); - } - } - if (e instanceof AmazonS3Exception s3Exception) { - switch (s3Exception.getStatusCode()) { - case HTTP_RANGE_NOT_SATISFIABLE: - throw new EOFException(CANNOT_SEEK_PAST_EOF); - case HTTP_NOT_FOUND: - throwIfFileNotFound(bucket, key, s3Exception); - throw new UnrecoverableS3OperationException(bucket, key, e); - } - } - throw e; - } - - STATS.connectionOpened(); - try { - int read = 0; - while (read < length) { - int n = stream.read(buffer, offset + read, length - read); - if (n <= 0) { - if (read > 0) { - return read; - } - return -1; - } - read += n; - } - return read; - } - catch (Throwable t) { - STATS.newReadError(t); - abortStream(stream); - throw t; - } - finally { - STATS.connectionReleased(); - stream.close(); - } - }); - } - catch (Exception e) { - throw propagate(e); - } - } - - @Override - public void seek(long pos) - throws IOException - { - checkClosed(); - if (pos < 0) { - throw new EOFException(NEGATIVE_SEEK); - } - - // this allows a seek beyond the end of the stream but the next read will fail - nextReadPosition = pos; - } - - @Override - public long getPos() - { - return nextReadPosition; - } - - @Override - public int read() - { - // This stream is wrapped with BufferedInputStream, so this method should never be called - throw new UnsupportedOperationException(); - } - - @Override - public int read(byte[] buffer, int offset, int length) - throws IOException - { - checkClosed(); - try { - int bytesRead = retry() - .maxAttempts(maxAttempts) - .exponentialBackoff(BACKOFF_MIN_SLEEP, maxBackoffTime, maxRetryTime, 2.0) - .stopOn(InterruptedException.class, UnrecoverableS3OperationException.class, AbortedException.class, FileNotFoundException.class) - .onRetry(STATS::newReadRetry) - .run("readStream", () -> { - seekStream(); - try { - return in.read(buffer, offset, length); - } - catch (Exception e) { - STATS.newReadError(e); - closeStream(); - throw e; - } - }); - - if (bytesRead != -1) { - streamPosition += bytesRead; - nextReadPosition += bytesRead; - } - return bytesRead; - } - catch (Exception e) { - throw propagate(e); - } - } - - @Override - public boolean seekToNewSource(long targetPos) - { - return false; - } - - private void seekStream() - throws IOException - { - if ((in != null) && (nextReadPosition == streamPosition)) { - // already at specified position - return; - } - - if ((in != null) && (nextReadPosition > streamPosition)) { - // seeking forwards - long skip = nextReadPosition - streamPosition; - if (skip <= max(in.available(), MAX_SKIP_SIZE.toBytes())) { - // already buffered or seek is small enough - try { - if (in.skip(skip) == skip) { - streamPosition = nextReadPosition; - return; - } - } - catch (IOException _) { - // will retry by re-opening the stream - } - } - } - - // close the stream and open at desired position - streamPosition = nextReadPosition; - closeStream(); - openStream(); - } - - private void openStream() - throws IOException - { - if (in == null) { - in = openStream(path, nextReadPosition); - streamPosition = nextReadPosition; - STATS.connectionOpened(); - } - } - - private InputStream openStream(Path path, long start) - throws IOException - { - try { - return retry() - .maxAttempts(maxAttempts) - .exponentialBackoff(BACKOFF_MIN_SLEEP, maxBackoffTime, maxRetryTime, 2.0) - .stopOn(InterruptedException.class, UnrecoverableS3OperationException.class, AbortedException.class, FileNotFoundException.class) - .onRetry(STATS::newGetObjectRetry) - .run("getS3Object", () -> { - String key = keyFromPath(path); - try { - GetObjectRequest request = new GetObjectRequest(bucket, key) - .withRange(start) - .withRequesterPays(requesterPaysEnabled); - return s3.getObject(request).getObjectContent(); - } - catch (RuntimeException e) { - STATS.newGetObjectError(); - if (e instanceof AmazonServiceException awsException) { - switch (awsException.getStatusCode()) { - case HTTP_FORBIDDEN: - case HTTP_BAD_REQUEST: - throw new UnrecoverableS3OperationException(bucket, key, e); - } - } - if (e instanceof AmazonS3Exception s3Exception) { - switch (s3Exception.getStatusCode()) { - case HTTP_RANGE_NOT_SATISFIABLE: - // ignore request for start past end of object - return new ByteArrayInputStream(new byte[0]); - case HTTP_NOT_FOUND: - throwIfFileNotFound(bucket, key, s3Exception); - throw new UnrecoverableS3OperationException(bucket, key, e); - } - } - throw e; - } - }); - } - catch (Exception e) { - throw propagate(e); - } - } - - private void closeStream() - { - if (in != null) { - abortStream(in); - in = null; - STATS.connectionReleased(); - } - } - - private void checkClosed() - throws IOException - { - if (closed.get()) { - throw new IOException(STREAM_IS_CLOSED); - } - } - - private static void abortStream(InputStream in) - { - try { - if (in instanceof S3ObjectInputStream s3ObjectInputStream) { - s3ObjectInputStream.abort(); - } - else { - in.close(); - } - } - catch (IOException | AbortedException _) { - // thrown if the current thread is in the interrupted state - } - } - - private static RuntimeException propagate(Exception e) - throws IOException - { - if (e instanceof InterruptedException | e instanceof AbortedException) { - Thread.currentThread().interrupt(); - throw new InterruptedIOException(); - } - throwIfInstanceOf(e, IOException.class); - throwIfUnchecked(e); - throw new IOException(e); - } - } - - private static class TrinoS3StagingOutputStream - extends FilterOutputStream - { - private final TransferManager transferManager; - private final String bucket; - private final String key; - private final File tempFile; - private final Consumer requestCustomizer; - - private boolean closed; - - public TrinoS3StagingOutputStream( - AmazonS3 s3, - String bucket, - String key, - File tempFile, - Consumer requestCustomizer, - long multiPartUploadMinFileSize, - long multiPartUploadMinPartSize) - throws IOException - { - super(new BufferedOutputStream(new FileOutputStream(requireNonNull(tempFile, "tempFile is null")))); - - transferManager = TransferManagerBuilder.standard() - .withS3Client(requireNonNull(s3, "s3 is null")) - .withMinimumUploadPartSize(multiPartUploadMinPartSize) - .withMultipartUploadThreshold(multiPartUploadMinFileSize).build(); - - this.bucket = requireNonNull(bucket, "bucket is null"); - this.key = requireNonNull(key, "key is null"); - this.tempFile = tempFile; - this.requestCustomizer = requireNonNull(requestCustomizer, "requestCustomizer is null"); - - log.debug("OutputStream for key '%s' using file: %s", key, tempFile); - } - - @Override - public void close() - throws IOException - { - if (closed) { - return; - } - closed = true; - - try { - super.close(); - uploadObject(); - } - finally { - if (!tempFile.delete()) { - log.warn("Could not delete temporary file: %s", tempFile); - } - // close transfer manager but keep underlying S3 client open - transferManager.shutdownNow(false); - } - } - - private void uploadObject() - throws IOException - { - try { - log.debug("Starting upload for bucket: %s, key: %s, file: %s, size: %s", bucket, key, tempFile, tempFile.length()); - STATS.uploadStarted(); - - PutObjectRequest request = new PutObjectRequest(bucket, key, tempFile); - requestCustomizer.accept(request); - - Upload upload = transferManager.upload(request); - - if (log.isDebugEnabled()) { - upload.addProgressListener(createProgressListener(upload)); - } - - upload.waitForCompletion(); - STATS.uploadSuccessful(); - log.debug("Completed upload for bucket: %s, key: %s", bucket, key); - } - catch (AmazonClientException e) { - STATS.uploadFailed(); - throw new IOException(e); - } - catch (InterruptedException e) { - STATS.uploadFailed(); - Thread.currentThread().interrupt(); - throw new InterruptedIOException(); - } - } - - private ProgressListener createProgressListener(Transfer transfer) - { - return new ProgressListener() - { - private ProgressEventType previousType; - private double previousTransferred; - - @Override - public synchronized void progressChanged(ProgressEvent progressEvent) - { - ProgressEventType eventType = progressEvent.getEventType(); - if (previousType != eventType) { - log.debug("Upload progress event (%s/%s): %s", bucket, key, eventType); - previousType = eventType; - } - - double transferred = transfer.getProgress().getPercentTransferred(); - if (transferred >= (previousTransferred + 10.0)) { - log.debug("Upload percentage (%s/%s): %.0f%%", bucket, key, transferred); - previousTransferred = transferred; - } - } - }; - } - } - - private static class TrinoS3StreamingOutputStream - extends OutputStream - { - private final AmazonS3 s3; - private final String bucketName; - private final String key; - private final Consumer requestCustomizer; - private final Supplier uploadIdFactory; - private final ExecutorService uploadExecutor; - - private int currentPartNumber; - private byte[] buffer; - private int bufferSize; - - private boolean closed; - private boolean failed; - // Mutated and read by main thread; mutated just before scheduling upload to background thread (access does not need to be thread safe) - private boolean multipartUploadStarted; - // Mutated by background thread which does the multipart upload; read by both main thread and background thread; - // Visibility ensured by memory barrier via inProgressUploadFuture - private Optional uploadId = Optional.empty(); - private Future inProgressUploadFuture; - private final List parts = new ArrayList<>(); - private final int partSize; - private int initialBufferSize; - private final LocalMemoryContext memoryContext; - - public TrinoS3StreamingOutputStream( - AmazonS3 s3, - String bucketName, - String key, - Consumer requestCustomizer, - Supplier uploadIdFactory, - ExecutorService uploadExecutor, - int partSize, - AggregatedMemoryContext memoryContext) - { - STATS.uploadStarted(); - - this.s3 = requireNonNull(s3, "s3 is null"); - this.partSize = partSize; - this.bucketName = requireNonNull(bucketName, "bucketName is null"); - this.key = requireNonNull(key, "key is null"); - this.requestCustomizer = requireNonNull(requestCustomizer, "requestCustomizer is null"); - this.uploadIdFactory = requireNonNull(uploadIdFactory, "uploadIdFactory is null"); - this.uploadExecutor = requireNonNull(uploadExecutor, "uploadExecutor is null"); - this.buffer = new byte[0]; - this.initialBufferSize = 64; - this.memoryContext = requireNonNull(memoryContext, "memoryContext is null") - .newLocalMemoryContext(TrinoS3StreamingOutputStream.class.getSimpleName()); - } - - @Override - public void write(int b) - throws IOException - { - ensureExtraBytesCapacity(1); - flushBuffer(false); - buffer[bufferSize] = (byte) b; - bufferSize++; - } - - @Override - public void write(byte[] bytes, int offset, int length) - throws IOException - { - while (length > 0) { - ensureExtraBytesCapacity(min(partSize - bufferSize, length)); - int copied = min(buffer.length - bufferSize, length); - arraycopy(bytes, offset, buffer, bufferSize, copied); - bufferSize += copied; - - flushBuffer(false); - - offset += copied; - length -= copied; - } - } - - @Override - public void flush() - throws IOException - { - flushBuffer(false); - } - - @Override - public void close() - throws IOException - { - if (closed) { - return; - } - closed = true; - - if (failed) { - try { - abortUpload(); - return; - } - catch (RuntimeException e) { - throw new IOException(e); - } - } - - try { - flushBuffer(true); - memoryContext.close(); - waitForPreviousUploadFinish(); - } - catch (IOException | RuntimeException e) { - abortUploadSuppressed(e); - throw e; - } - - try { - uploadId.ifPresent(this::finishUpload); - } - catch (RuntimeException e) { - abortUploadSuppressed(e); - throw new IOException(e); - } - } - - private void ensureExtraBytesCapacity(int extraBytesCapacity) - { - int totalBytesCapacity = bufferSize + extraBytesCapacity; - checkArgument(totalBytesCapacity <= partSize); - if (buffer.length < totalBytesCapacity) { - // buffer length might be 0 - int newBytesLength = max(buffer.length, initialBufferSize); - if (totalBytesCapacity > newBytesLength) { - // grow array by 50% - newBytesLength = max(newBytesLength + (newBytesLength >> 1), totalBytesCapacity); - newBytesLength = min(newBytesLength, partSize); - } - buffer = Arrays.copyOf(buffer, newBytesLength); - memoryContext.setBytes(buffer.length); - } - } - - private void flushBuffer(boolean finished) - throws IOException - { - // Skip multipart upload if there would only be one part - if (finished && !multipartUploadStarted) { - InputStream in = new ByteArrayInputStream(buffer, 0, bufferSize); - - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(bufferSize); - metadata.setContentMD5(getMd5AsBase64(buffer, 0, bufferSize)); - - PutObjectRequest request = new PutObjectRequest(bucketName, key, in, metadata); - requestCustomizer.accept(request); - - try { - s3.putObject(request); - return; - } - catch (AmazonServiceException e) { - failed = true; - throw new IOException(e); - } - } - - // The multipart upload API only accept the last part to be less than 5MB - if (bufferSize == partSize || (finished && bufferSize > 0)) { - byte[] data = buffer; - int length = bufferSize; - - if (finished) { - this.buffer = null; - } - else { - this.buffer = new byte[0]; - this.initialBufferSize = partSize; - bufferSize = 0; - } - memoryContext.setBytes(0); - - try { - waitForPreviousUploadFinish(); - } - catch (IOException e) { - failed = true; - abortUploadSuppressed(e); - throw e; - } - multipartUploadStarted = true; - inProgressUploadFuture = uploadExecutor.submit(() -> uploadPage(data, length)); - } - } - - private void waitForPreviousUploadFinish() - throws IOException - { - if (inProgressUploadFuture == null) { - return; - } - - try { - inProgressUploadFuture.get(); - } - catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new InterruptedIOException(); - } - catch (ExecutionException e) { - throw new IOException("Streaming upload failed", e); - } - } - - private UploadPartResult uploadPage(byte[] data, int length) - { - if (uploadId.isEmpty()) { - uploadId = Optional.of(uploadIdFactory.get()); - } - - currentPartNumber++; - UploadPartRequest uploadRequest = new UploadPartRequest() - .withBucketName(bucketName) - .withKey(key) - .withUploadId(uploadId.get()) - .withPartNumber(currentPartNumber) - .withInputStream(new ByteArrayInputStream(data, 0, length)) - .withPartSize(length) - .withMD5Digest(getMd5AsBase64(data, 0, length)); - - UploadPartResult partResult = s3.uploadPart(uploadRequest); - parts.add(partResult); - return partResult; - } - - private void finishUpload(String uploadId) - { - List etags = parts.stream() - .map(UploadPartResult::getPartETag) - .collect(toList()); - s3.completeMultipartUpload(new CompleteMultipartUploadRequest(bucketName, key, uploadId, etags)); - - STATS.uploadSuccessful(); - } - - private void abortUpload() - { - STATS.uploadFailed(); - - uploadId.ifPresent(id -> s3.abortMultipartUpload(new AbortMultipartUploadRequest(bucketName, key, id))); - } - - @SuppressWarnings("ObjectEquality") - private void abortUploadSuppressed(Throwable throwable) - { - try { - abortUpload(); - } - catch (Throwable t) { - if (throwable != t) { - throwable.addSuppressed(t); - } - } - } - } - - @VisibleForTesting - public AmazonS3 getS3Client() - { - return s3; - } - - @VisibleForTesting - void setS3Client(AmazonS3 client) - { - s3 = client; - } - - @VisibleForTesting - protected String getBucketName(URI uri) - { - return extractBucketName(uri); - } - - /** - * Helper function used to work around the fact that if you use an S3 bucket with an '_' that java.net.URI - * behaves differently and sets the host value to null whereas S3 buckets without '_' have a properly - * set host field. '_' is only allowed in S3 bucket names in us-east-1. - * - * @param uri The URI from which to extract a host value. - * @return The host value where uri.getAuthority() is used when uri.getHost() returns null as long as no UserInfo is present. - * @throws IllegalArgumentException If the bucket cannot be determined from the URI. - */ - public static String extractBucketName(URI uri) - { - if (uri.getHost() != null) { - return uri.getHost(); - } - - if (uri.getUserInfo() == null) { - return uri.getAuthority(); - } - - throw new IllegalArgumentException("Unable to determine S3 bucket from URI."); - } - - public static TrinoS3FileSystemStats getFileSystemStats() - { - return STATS; - } - - private static String getMd5AsBase64(byte[] data, int offset, int length) - { - @SuppressWarnings("deprecation") - byte[] md5 = md5().hashBytes(data, offset, length).asBytes(); - return Base64.getEncoder().encodeToString(md5); - } - - private static void throwIfFileNotFound(String bucket, String key, AmazonS3Exception s3Exception) - throws FileNotFoundException - { - String errorCode = s3Exception.getErrorCode(); - if (NO_SUCH_KEY_ERROR_CODE.equals(errorCode) || NO_SUCH_BUCKET_ERROR_CODE.equals(errorCode)) { - FileNotFoundException fileNotFoundException = new FileNotFoundException(format("%s (Bucket: %s, Key: %s)", requireNonNullElse(s3Exception.getMessage(), s3Exception), bucket, key)); - fileNotFoundException.initCause(s3Exception); - throw fileNotFoundException; - } - } - - private enum DeletePrefixResult - { - NO_KEYS_FOUND, - ALL_KEYS_DELETED, - DELETE_KEYS_FAILURE - } - - private static class ForwardingRequestHandler - extends RequestHandler2 - { - private volatile RequestHandler2 delegate; - - public synchronized void setDelegateIfAbsent(Supplier supplier) - { - if (delegate == null) { - delegate = supplier.get(); - } - } - - @Override - public void beforeRequest(Request request) - { - if (delegate != null) { - delegate.beforeRequest(request); - } - } - - @Override - public void afterResponse(Request request, Response response) - { - if (delegate != null) { - delegate.afterResponse(request, response); - } - } - - @Override - public void afterError(Request request, Response response, Exception e) - { - if (delegate != null) { - delegate.afterError(request, response, e); - } - } - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystemStats.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystemStats.java deleted file mode 100644 index 8957cb379486..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3FileSystemStats.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.AbortedException; -import com.amazonaws.metrics.RequestMetricCollector; -import io.airlift.stats.CounterStat; -import org.weakref.jmx.Flatten; -import org.weakref.jmx.Managed; -import org.weakref.jmx.Nested; - -import java.net.SocketException; -import java.net.SocketTimeoutException; - -public class TrinoS3FileSystemStats -{ - private final CounterStat activeConnections = new CounterStat(); - private final CounterStat startedUploads = new CounterStat(); - private final CounterStat failedUploads = new CounterStat(); - private final CounterStat successfulUploads = new CounterStat(); - private final CounterStat metadataCalls = new CounterStat(); - private final CounterStat listStatusCalls = new CounterStat(); - private final CounterStat listLocatedStatusCalls = new CounterStat(); - private final CounterStat listObjectsCalls = new CounterStat(); - private final CounterStat otherReadErrors = new CounterStat(); - private final CounterStat awsAbortedExceptions = new CounterStat(); - private final CounterStat socketExceptions = new CounterStat(); - private final CounterStat socketTimeoutExceptions = new CounterStat(); - private final CounterStat getObjectErrors = new CounterStat(); - private final CounterStat getMetadataErrors = new CounterStat(); - private final CounterStat initiateMultipartUploadErrors = new CounterStat(); - private final CounterStat getObjectRetries = new CounterStat(); - private final CounterStat getMetadataRetries = new CounterStat(); - private final CounterStat readRetries = new CounterStat(); - private final CounterStat initiateMultipartUploadRetries = new CounterStat(); - - // see AWSRequestMetrics - private final AwsSdkClientCoreStats clientCoreStats = new AwsSdkClientCoreStats(); - - @Managed - @Nested - public CounterStat getActiveConnections() - { - return activeConnections; - } - - @Managed - @Nested - public CounterStat getStartedUploads() - { - return startedUploads; - } - - @Managed - @Nested - public CounterStat getFailedUploads() - { - return failedUploads; - } - - @Managed - @Nested - public CounterStat getSuccessfulUploads() - { - return successfulUploads; - } - - @Managed - @Nested - public CounterStat getMetadataCalls() - { - return metadataCalls; - } - - @Managed - @Nested - public CounterStat getListStatusCalls() - { - return listStatusCalls; - } - - @Managed - @Nested - public CounterStat getListLocatedStatusCalls() - { - return listLocatedStatusCalls; - } - - @Managed - @Nested - public CounterStat getListObjectsCalls() - { - return listObjectsCalls; - } - - @Managed - @Nested - public CounterStat getGetObjectErrors() - { - return getObjectErrors; - } - - @Managed - @Nested - public CounterStat getGetMetadataErrors() - { - return getMetadataErrors; - } - - @Managed - @Nested - public CounterStat getInitiateMultipartUploadErrors() - { - return initiateMultipartUploadErrors; - } - - @Managed - @Nested - public CounterStat getOtherReadErrors() - { - return otherReadErrors; - } - - @Managed - @Nested - public CounterStat getSocketExceptions() - { - return socketExceptions; - } - - @Managed - @Nested - public CounterStat getSocketTimeoutExceptions() - { - return socketTimeoutExceptions; - } - - @Managed - @Nested - public CounterStat getAwsAbortedExceptions() - { - return awsAbortedExceptions; - } - - @Managed - @Flatten - public AwsSdkClientCoreStats getClientCoreStats() - { - return clientCoreStats; - } - - @Managed - @Nested - public CounterStat getGetObjectRetries() - { - return getObjectRetries; - } - - @Managed - @Nested - public CounterStat getGetMetadataRetries() - { - return getMetadataRetries; - } - - @Managed - @Nested - public CounterStat getReadRetries() - { - return readRetries; - } - - @Managed - @Nested - public CounterStat getInitiateMultipartUploadRetries() - { - return initiateMultipartUploadRetries; - } - - public RequestMetricCollector newRequestMetricCollector() - { - return clientCoreStats.newRequestMetricCollector(); - } - - public void connectionOpened() - { - activeConnections.update(1); - } - - public void connectionReleased() - { - activeConnections.update(-1); - } - - public void uploadStarted() - { - startedUploads.update(1); - } - - public void uploadFailed() - { - failedUploads.update(1); - } - - public void uploadSuccessful() - { - successfulUploads.update(1); - } - - public void newMetadataCall() - { - metadataCalls.update(1); - } - - public void newListStatusCall() - { - listStatusCalls.update(1); - } - - public void newListLocatedStatusCall() - { - listLocatedStatusCalls.update(1); - } - - public void newListObjectsCall() - { - listObjectsCalls.update(1); - } - - public void newReadError(Throwable t) - { - if (t instanceof SocketException) { - socketExceptions.update(1); - } - else if (t instanceof SocketTimeoutException) { - socketTimeoutExceptions.update(1); - } - else if (t instanceof AbortedException) { - awsAbortedExceptions.update(1); - } - else { - otherReadErrors.update(1); - } - } - - public void newGetObjectError() - { - getObjectErrors.update(1); - } - - public void newGetMetadataError() - { - getMetadataErrors.update(1); - } - - public void newInitiateMultipartUploadError() - { - initiateMultipartUploadErrors.update(1); - } - - public void newGetObjectRetry() - { - getObjectRetries.update(1); - } - - public void newGetMetadataRetry() - { - getMetadataRetries.update(1); - } - - public void newReadRetry() - { - readRetries.update(1); - } - - public void newInitiateMultipartUploadRetry() - { - initiateMultipartUploadRetries.update(1); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3Protocol.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3Protocol.java deleted file mode 100644 index c88adf27fb45..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3Protocol.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.Protocol; - -public enum TrinoS3Protocol -{ - HTTP(Protocol.HTTP), - HTTPS(Protocol.HTTPS); - - private final Protocol protocol; - - TrinoS3Protocol(Protocol protocol) - { - this.protocol = protocol; - } - - public Protocol getProtocol() - { - return protocol; - } - - @Override - public String toString() - { - return this.protocol.name(); - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SignerType.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SignerType.java deleted file mode 100644 index 9e60c415b1d8..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SignerType.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -// These are the exact names used by SignerFactory in the AWS library -// and thus cannot be renamed or use the normal naming convention. -@SuppressWarnings("EnumeratedConstantNamingConvention") -public enum TrinoS3SignerType -{ - S3SignerType, - AWS3SignerType, - AWS4SignerType, - AWSS3V4SignerType, - CloudFrontSignerType, - QueryStringSignerType, -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SseType.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SseType.java deleted file mode 100644 index 34c9b395490b..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3SseType.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -public enum TrinoS3SseType -{ - KMS, S3 -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3StorageClass.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3StorageClass.java deleted file mode 100644 index d8cfa388c690..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/TrinoS3StorageClass.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.services.s3.model.StorageClass; - -import static java.util.Objects.requireNonNull; - -public enum TrinoS3StorageClass -{ - STANDARD(StorageClass.Standard), - INTELLIGENT_TIERING(StorageClass.IntelligentTiering); - - private final StorageClass s3StorageClass; - - TrinoS3StorageClass(StorageClass s3StorageClass) - { - this.s3StorageClass = requireNonNull(s3StorageClass, "s3StorageClass is null"); - } - - public StorageClass getS3StorageClass() - { - return s3StorageClass; - } -} diff --git a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/UriBasedS3SecurityMappingsProvider.java b/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/UriBasedS3SecurityMappingsProvider.java deleted file mode 100644 index 6540a086eaac..000000000000 --- a/lib/trino-hdfs/src/main/java/io/trino/hdfs/s3/UriBasedS3SecurityMappingsProvider.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.inject.Inject; -import io.airlift.http.client.HttpClient; -import io.airlift.http.client.HttpStatus; -import io.airlift.http.client.Request; -import io.airlift.http.client.StringResponseHandler.StringResponse; - -import java.net.URI; - -import static io.airlift.http.client.Request.Builder.prepareGet; -import static io.airlift.http.client.StringResponseHandler.createStringResponseHandler; -import static java.lang.String.format; -import static java.util.Objects.requireNonNull; - -public class UriBasedS3SecurityMappingsProvider - implements S3SecurityMappingsProvider -{ - private final URI configUri; - private final HttpClient httpClient; - private final S3SecurityMappingsParser parser; - - @Inject - public UriBasedS3SecurityMappingsProvider(S3SecurityMappingConfig config, @ForS3SecurityMapping HttpClient httpClient) - { - this.configUri = config.getConfigFilePath().map(URI::create).orElseThrow(() -> new IllegalArgumentException("configUri not set")); - this.httpClient = requireNonNull(httpClient, "httpClient is null"); - this.parser = new S3SecurityMappingsParser(config); - } - - String getRawJsonString() - { - Request request = prepareGet().setUri(configUri).build(); - StringResponse response = httpClient.execute(request, createStringResponseHandler()); - int status = response.getStatusCode(); - if (status != HttpStatus.OK.code()) { - throw new IllegalStateException(format("Request to '%s' returned unexpected status code: '%d'", configUri, status)); - } - return response.getBody(); - } - - @Override - public S3SecurityMappings get() - { - return parser.parseJSONString(getRawJsonString()); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemManager.java b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemManager.java index 9e6637cd9a51..dd609d6958bb 100644 --- a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemManager.java +++ b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemManager.java @@ -38,13 +38,10 @@ void testManager() .put("hive.dfs.verify-checksum", "false") .put("hive.s3.region", "us-west-1") .buildOrThrow(), - true, - true, - true, "test", new TestingConnectorContext()); - assertThat(manager.configure().keySet()).containsExactly("hive.dfs.verify-checksum", "hive.s3.region"); + assertThat(manager.configure().keySet()).containsExactly("hive.dfs.verify-checksum"); TrinoFileSystemFactory factory = manager.create(); TrinoFileSystem fileSystem = factory.create(ConnectorIdentity.ofUser("test")); diff --git a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java deleted file mode 100644 index 4b6d1eed0094..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/TestHdfsFileSystemS3Mock.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.filesystem.hdfs; - -import com.adobe.testing.s3mock.testcontainers.S3MockContainer; -import io.airlift.units.DataSize; -import io.trino.filesystem.AbstractTestTrinoFileSystem; -import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.hdfs.ConfigurationInitializer; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.TrinoHdfsFileSystemStats; -import io.trino.hdfs.authentication.NoHdfsAuthentication; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.spi.security.ConnectorIdentity; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Set; - -import static java.util.Collections.emptySet; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -@Testcontainers -public class TestHdfsFileSystemS3Mock - extends AbstractTestTrinoFileSystem -{ - private static final String BUCKET = "test-bucket"; - - @Container - private static final S3MockContainer S3_MOCK = new S3MockContainer("4.10.0") - .withInitialBuckets(BUCKET); - - private HdfsEnvironment hdfsEnvironment; - private HdfsContext hdfsContext; - private TrinoFileSystem fileSystem; - - @BeforeAll - void beforeAll() - { - DataSize streamingPartSize = DataSize.valueOf("5.5MB"); - assertThat(streamingPartSize).describedAs("Configured part size should be less than test's larger file size") - .isLessThan(LARGER_FILE_DATA_SIZE); - HiveS3Config s3Config = new HiveS3Config() - .setS3AwsAccessKey("accesskey") - .setS3AwsSecretKey("secretkey") - .setS3Endpoint(S3_MOCK.getHttpEndpoint()) - .setS3PathStyleAccess(true) - .setS3StreamingPartSize(streamingPartSize); - - HdfsConfig hdfsConfig = new HdfsConfig(); - ConfigurationInitializer s3Initializer = new TrinoS3ConfigurationInitializer(s3Config); - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(hdfsConfig, Set.of(s3Initializer)); - HdfsConfiguration hdfsConfiguration = new DynamicHdfsConfiguration(initializer, emptySet()); - hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication()); - hdfsContext = new HdfsContext(ConnectorIdentity.ofUser("test")); - - fileSystem = new HdfsFileSystem(hdfsEnvironment, hdfsContext, new TrinoHdfsFileSystemStats()); - } - - @AfterEach - void afterEach() - throws IOException - { - Path root = new Path(getRootLocation().toString()); - FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, root); - for (FileStatus status : fs.listStatus(root)) { - fs.delete(status.getPath(), true); - } - } - - @Override - protected final boolean isHierarchical() - { - return false; - } - - @Override - protected TrinoFileSystem getFileSystem() - { - return fileSystem; - } - - @Override - protected Location getRootLocation() - { - return Location.of("s3://%s/".formatted(BUCKET)); - } - - @Override - protected boolean isCreateExclusive() - { - return false; - } - - @Override - protected boolean supportsCreateExclusive() - { - return false; - } - - @Override - protected boolean normalizesListFilesResult() - { - return true; - } - - @Override - protected boolean seekPastEndOfFileFails() - { - return false; - } - - @Override - protected void verifyFileSystemIsEmpty() - { - try { - Path root = new Path(getRootLocation().toString()); - FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, root); - assertThat(fs.listStatus(root)).isEmpty(); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Test - @Override - public void testPaths() - { - // this is S3Mock bug, see https://github.com/adobe/S3Mock/issues/2788 - assertThatThrownBy(super::testPaths) - .hasMessageFindingMatch("Status Code: 400; Error Code: 400 .*\\Q(Bucket: test-bucket, Key: test/.././/file)"); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/HdfsTestUtils.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/HdfsTestUtils.java index 971a98b2f691..ba1119a45f23 100644 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/HdfsTestUtils.java +++ b/lib/trino-hdfs/src/test/java/io/trino/hdfs/HdfsTestUtils.java @@ -17,12 +17,6 @@ import com.google.common.net.HostAndPort; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.authentication.NoHdfsAuthentication; -import io.trino.hdfs.azure.HiveAzureConfig; -import io.trino.hdfs.azure.TrinoAzureConfigurationInitializer; -import io.trino.hdfs.gcs.GoogleGcsConfigurationInitializer; -import io.trino.hdfs.gcs.HiveGcsConfig; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; import java.util.Optional; @@ -34,11 +28,7 @@ public final class HdfsTestUtils public static final DynamicHdfsConfiguration HDFS_CONFIGURATION = new DynamicHdfsConfiguration( new HdfsConfigurationInitializer( new HdfsConfig() - .setSocksProxy(SOCKS_PROXY.orElse(null)), - ImmutableSet.of( - new TrinoS3ConfigurationInitializer(new HiveS3Config()), - new GoogleGcsConfigurationInitializer(new HiveGcsConfig()), - new TrinoAzureConfigurationInitializer(new HiveAzureConfig()))), + .setSocksProxy(SOCKS_PROXY.orElse(null))), ImmutableSet.of()); public static final TrinoHdfsFileSystemStats HDFS_FILE_SYSTEM_STATS = new TrinoHdfsFileSystemStats(); diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestHiveAzureConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestHiveAzureConfig.java deleted file mode 100644 index 1af1c01c0759..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestHiveAzureConfig.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.azure; - -import com.google.common.collect.ImmutableMap; -import com.google.common.net.HostAndPort; -import org.junit.jupiter.api.Test; - -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; - -public class TestHiveAzureConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(HiveAzureConfig.class) - .setWasbAccessKey(null) - .setWasbStorageAccount(null) - .setAbfsStorageAccount(null) - .setAbfsAccessKey(null) - .setAdlClientId(null) - .setAdlCredential(null) - .setAdlProxyHost(null) - .setAdlRefreshUrl(null) - .setAbfsOAuthClientEndpoint(null) - .setAbfsOAuthClientId(null) - .setAbfsOAuthClientSecret(null)); - } - - @Test - public void testExplicitPropertyMappings() - { - Map properties = ImmutableMap.builder() - .put("hive.azure.wasb-storage-account", "testwasbstorage") - .put("hive.azure.wasb-access-key", "secret") - .put("hive.azure.abfs-storage-account", "abfsstorage") - .put("hive.azure.abfs-access-key", "abfssecret") - .put("hive.azure.adl-client-id", "adlclientid") - .put("hive.azure.adl-credential", "adlcredential") - .put("hive.azure.adl-refresh-url", "adlrefreshurl") - .put("hive.azure.adl-proxy-host", "proxy-host:9800") - .put("hive.azure.abfs.oauth.endpoint", "abfsoauthendpoint") - .put("hive.azure.abfs.oauth.client-id", "abfsoauthclientid") - .put("hive.azure.abfs.oauth.secret", "abfsoauthsecret") - .buildOrThrow(); - - HiveAzureConfig expected = new HiveAzureConfig() - .setWasbStorageAccount("testwasbstorage") - .setWasbAccessKey("secret") - .setAbfsStorageAccount("abfsstorage") - .setAbfsAccessKey("abfssecret") - .setAdlClientId("adlclientid") - .setAdlCredential("adlcredential") - .setAdlRefreshUrl("adlrefreshurl") - .setAdlProxyHost(HostAndPort.fromParts("proxy-host", 9800)) - .setAbfsOAuthClientEndpoint("abfsoauthendpoint") - .setAbfsOAuthClientId("abfsoauthclientid") - .setAbfsOAuthClientSecret("abfsoauthsecret"); - - assertFullMapping(properties, expected); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestTrinoAzureConfigurationInitializer.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestTrinoAzureConfigurationInitializer.java deleted file mode 100644 index b6a249bef765..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/azure/TestTrinoAzureConfigurationInitializer.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.azure; - -import org.junit.jupiter.api.Test; - -import java.util.Set; -import java.util.function.BiConsumer; - -import static com.google.common.collect.Sets.difference; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestTrinoAzureConfigurationInitializer -{ - @Test - public void testAdl() - { - testPropertyGroup( - "If any of ADL client ID, credential, and refresh URL are set, all must be set", - HiveAzureConfig::setAdlClientId, - HiveAzureConfig::setAdlCredential, - HiveAzureConfig::setAdlRefreshUrl); - } - - @Test - public void testWasb() - { - testPropertyGroup( - "If WASB storage account or access key is set, both must be set", - HiveAzureConfig::setWasbAccessKey, - HiveAzureConfig::setWasbStorageAccount); - } - - @Test - public void testAbfsAccessKey() - { - testPropertyGroup( - "If ABFS storage account or access key is set, both must be set", - HiveAzureConfig::setAbfsAccessKey, - HiveAzureConfig::setAbfsStorageAccount); - } - - @Test - public void testAbfsOAuth() - { - testPropertyGroup( - "If any of ABFS OAuth2 Client endpoint, ID, and secret are set, all must be set.", - HiveAzureConfig::setAbfsOAuthClientEndpoint, - HiveAzureConfig::setAbfsOAuthClientId, - HiveAzureConfig::setAbfsOAuthClientSecret); - } - - @Test - public void testExclusiveProperties() - { - assertThatThrownBy(() -> testProperties( - HiveAzureConfig::setAbfsAccessKey, - HiveAzureConfig::setAbfsStorageAccount, - HiveAzureConfig::setAbfsOAuthClientEndpoint, - HiveAzureConfig::setAbfsOAuthClientId, - HiveAzureConfig::setAbfsOAuthClientSecret)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Multiple ABFS authentication methods configured: access key and OAuth2"); - } - - @SafeVarargs - private static void testPropertyGroup(String expectedErrorMessage, BiConsumer... setters) - { - testPropertyGroup(expectedErrorMessage, Set.of(setters)); - } - - private static void testPropertyGroup(String expectedErrorMessage, Set> setters) - { - // All properties work together - testProperties(setters); - - // Dropping any one property fails - for (BiConsumer setter : setters) { - assertThatThrownBy(() -> testProperties(difference(setters, Set.of(setter)))) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage(expectedErrorMessage); - } - } - - @SafeVarargs - private static void testProperties(BiConsumer... setters) - { - testProperties(Set.of(setters)); - } - - private static void testProperties(Set> setters) - { - HiveAzureConfig config = new HiveAzureConfig(); - for (BiConsumer setter : setters) { - setter.accept(config, "test value"); - } - new TrinoAzureConfigurationInitializer(config); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfig.java deleted file mode 100644 index 697dedafd054..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfig.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; -import static java.nio.file.Files.createTempFile; - -public class TestHiveCosServiceConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(HiveCosServiceConfig.class) - .setServiceConfig(null)); - } - - @Test - public void testExplicitPropertyMappings() - throws IOException - { - Path serviceConfig = createTempFile(null, null); - - Map properties = ImmutableMap.of("hive.cos.service-config", serviceConfig.toString()); - - HiveCosServiceConfig expected = new HiveCosServiceConfig() - .setServiceConfig(serviceConfig.toFile()); - - assertFullMapping(properties, expected); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfigurationProvider.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfigurationProvider.java deleted file mode 100644 index 1372986bc94b..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestHiveCosServiceConfigurationProvider.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.google.common.collect.ImmutableSet; -import io.airlift.testing.TempFile; -import io.trino.hdfs.DynamicConfigurationProvider; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsContext; -import io.trino.hdfs.s3.HiveS3Config; -import io.trino.hdfs.s3.TrinoS3ConfigurationInitializer; -import io.trino.spi.security.ConnectorIdentity; -import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.Test; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.net.URI; -import java.util.Properties; - -import static io.trino.hdfs.s3.TestTrinoS3FileSystem.getAwsCredentialsProvider; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestHiveCosServiceConfigurationProvider -{ - @Test - public void testPerBucketCredentialsIntegrated() - throws Exception - { - HdfsConfiguration hiveHdfsConfiguration = getCosHdfsConfiguration(); - - try (TrinoCosFileSystem fs = new TrinoCosFileSystem()) { - verifyStaticCredentials(hiveHdfsConfiguration, fs, "cos://test-bucket/", "test-bucket", "test_access_key", "test_secret_key"); - verifyStaticCredentials(hiveHdfsConfiguration, fs, "cos://test-bucket.a/", "test-bucket", "cos_a_access_key", "cos_a_secret_key"); - verifyStaticCredentials(hiveHdfsConfiguration, fs, "cos://test-bucket.b/", "test-bucket", "cos_b_access_key", "cos_b_secret_key"); - verifyStaticCredentials(hiveHdfsConfiguration, fs, "cos://a/", "a", "test_access_key", "test_secret_key"); - } - } - - private HdfsConfiguration getCosHdfsConfiguration() - throws IOException - { - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of( - new TrinoS3ConfigurationInitializer(new HiveS3Config() - .setS3AwsAccessKey("test_access_key") - .setS3AwsSecretKey("test_secret_key")), - new CosConfigurationInitializer())); - - DynamicConfigurationProvider provider; - try (TempFile cosServiceConfig = new TempFile()) { - Properties cosServiceProperties = new Properties(); - cosServiceProperties.put("a.access-key", "cos_a_access_key"); - cosServiceProperties.put("a.secret-key", "cos_a_secret_key"); - cosServiceProperties.put("b.access-key", "cos_b_access_key"); - cosServiceProperties.put("b.secret-key", "cos_b_secret_key"); - try (FileOutputStream out = new FileOutputStream(cosServiceConfig.file())) { - cosServiceProperties.store(out, "S3 bucket"); - } - - provider = new CosServiceConfigurationProvider(new HiveCosServiceConfig().setServiceConfig(cosServiceConfig.file())); - } - - return new DynamicHdfsConfiguration(initializer, ImmutableSet.of(provider)); - } - - private static void verifyStaticCredentials(HdfsConfiguration hiveHdfsConfiguration, - TrinoCosFileSystem fileSystem, - String uri, - String expectedBucket, - String expectedAccessKey, - String expectedSecretKey) - throws IOException - { - HdfsContext hdfsContext = new HdfsContext(ConnectorIdentity.forUser("test").build()); - Configuration configuration = hiveHdfsConfiguration.getConfiguration(hdfsContext, URI.create(uri)); - fileSystem.initialize(URI.create(uri), configuration); - assertThat(fileSystem.getBucketName(URI.create(uri))).isEqualTo(expectedBucket); - AWSCredentialsProvider awsCredentialsProvider = getAwsCredentialsProvider(fileSystem); - assertThat(awsCredentialsProvider).isInstanceOf(AWSStaticCredentialsProvider.class); - assertThat(awsCredentialsProvider.getCredentials().getAWSAccessKeyId()).isEqualTo(expectedAccessKey); - assertThat(awsCredentialsProvider.getCredentials().getAWSSecretKey()).isEqualTo(expectedSecretKey); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestServiceConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestServiceConfig.java deleted file mode 100644 index c49c951eaab6..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/cos/TestServiceConfig.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.cos; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import io.airlift.testing.TempFile; -import org.junit.jupiter.api.Test; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Map; -import java.util.Optional; -import java.util.Properties; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestServiceConfig -{ - @Test - public void testInitializeConfiguration() - { - testInitializeConfiguration2("accessValue", "secretValue", Optional.of("endpointValue")); - testInitializeConfiguration2("accessValue", "secretValue", Optional.empty()); - } - - private static void testInitializeConfiguration2(String accessValue, String secretValue, Optional endpointValue) - { - ServiceConfig serviceConfig = new ServiceConfig("name", accessValue, secretValue, endpointValue); - assertConfig(serviceConfig, accessValue, secretValue, endpointValue); - } - - private static void assertConfig( - ServiceConfig serviceConfig, - String accessValue, - String secretValue, - Optional endpointValue) - { - assertThat(serviceConfig.getAccessKey()).isEqualTo(accessValue); - assertThat(serviceConfig.getSecretKey()).isEqualTo(secretValue); - assertThat(serviceConfig.getEndpoint()).isEqualTo(endpointValue); - } - - @Test - public void testLoad() - throws IOException - { - try (TempFile tempFile = new TempFile()) { - assertThat(ServiceConfig.loadServiceConfigs(tempFile.file())).isEmpty(); - - writeProperties(tempFile, ImmutableMap.builder() - .put("a.access-key", "a-accessValue") - .put("a.secret-key", "a-secretValue") - .put("a.endpoint", "a-endpointValue") - - .put("b.access-key", "b-accessValue") - .put("b.secret-key", "b-secretValue") - - .put("c.access-key", "c-accessValue") - .put("c.secret-key", "c-secretValue") - .put("c.endpoint", "c-endpointValue") - .buildOrThrow()); - - Map bucketConfigs = ServiceConfig.loadServiceConfigs(tempFile.file()); - assertThat(bucketConfigs.keySet()).isEqualTo(ImmutableSet.of("a", "b", "c")); - - assertConfig(bucketConfigs.get("a"), "a-accessValue", "a-secretValue", Optional.of("a-endpointValue")); - assertConfig(bucketConfigs.get("b"), "b-accessValue", "b-secretValue", Optional.empty()); - assertConfig(bucketConfigs.get("c"), "c-accessValue", "c-secretValue", Optional.of("c-endpointValue")); - } - } - - @Test - public void testLoadInvalid() - throws IOException - { - assertInvalidLoad( - "a.secret-key", - ImmutableMap.of("a.access-key", "a-accessValue")); - assertInvalidLoad( - "a.unknown", - ImmutableMap.builder() - .put("a.access-key", "a-accessValue") - .put("a.secret-key", "a-secretValue") - .put("a.unknown", "value") - .buildOrThrow()); - assertInvalidLoad( - "unknown", - ImmutableMap.builder() - .put("a.access-key", "a-accessValue") - .put("a.secret-key", "a-secretValue") - .put("unknown", "value") - .buildOrThrow()); - } - - private static void assertInvalidLoad(String message, Map properties) - throws IOException - { - try (TempFile tempFile = new TempFile()) { - writeProperties(tempFile, properties); - - assertThatThrownBy(() -> ServiceConfig.loadServiceConfigs(tempFile.file())) - .hasMessageContaining(message) - .isInstanceOf(IllegalArgumentException.class); - } - } - - private static void writeProperties(TempFile tempFile, Map map) - throws IOException - { - try (FileOutputStream out = new FileOutputStream(tempFile.file())) { - Properties properties = new Properties(); - properties.putAll(map); - properties.store(out, "test"); - } - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/gcs/TestHiveGcsConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/gcs/TestHiveGcsConfig.java deleted file mode 100644 index 305e529189b3..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/gcs/TestHiveGcsConfig.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.gcs; - -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestHiveGcsConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(HiveGcsConfig.class) - .setUseGcsAccessToken(false) - .setJsonKey(null) - .setJsonKeyFilePath(null)); - } - - @Test - public void testExplicitPropertyMappings() - throws IOException - { - Path jsonKeyFile = Files.createTempFile(null, null); - - Map properties = ImmutableMap.builder() - .put("hive.gcs.use-access-token", "true") - .put("hive.gcs.json-key", "{}") - .put("hive.gcs.json-key-file-path", jsonKeyFile.toString()) - .buildOrThrow(); - - HiveGcsConfig expected = new HiveGcsConfig() - .setUseGcsAccessToken(true) - .setJsonKey("{}") - .setJsonKeyFilePath(jsonKeyFile.toString()); - - assertFullMapping(properties, expected); - } - - @Test - public void testValidation() - { - assertThatThrownBy( - new HiveGcsConfig() - .setUseGcsAccessToken(true) - .setJsonKey("{}}")::validate) - .isInstanceOf(IllegalStateException.class) - .hasMessage("Cannot specify 'hive.gcs.json-key' when 'hive.gcs.use-access-token' is set"); - - assertThatThrownBy( - new HiveGcsConfig() - .setUseGcsAccessToken(true) - .setJsonKeyFilePath("/dev/null")::validate) - .isInstanceOf(IllegalStateException.class) - .hasMessage("Cannot specify 'hive.gcs.json-key-file-path' when 'hive.gcs.use-access-token' is set"); - - assertThatThrownBy( - new HiveGcsConfig() - .setJsonKey("{}") - .setJsonKeyFilePath("/dev/null")::validate) - .isInstanceOf(IllegalStateException.class) - .hasMessage("'hive.gcs.json-key' and 'hive.gcs.json-key-file-path' cannot be both set"); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/AbstractTestTrinoS3FileSystem.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/AbstractTestTrinoS3FileSystem.java deleted file mode 100644 index 4f41ada007dc..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/AbstractTestTrinoS3FileSystem.java +++ /dev/null @@ -1,592 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.google.common.net.MediaType; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.List; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.airlift.testing.Closeables.closeAllSuppress; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public abstract class AbstractTestTrinoS3FileSystem -{ - private static final MediaType DIRECTORY_MEDIA_TYPE = MediaType.create("application", "x-directory"); - private static final String PATH_SEPARATOR = "/"; - private static final String DIRECTORY_SUFFIX = "_$folder$"; - - protected abstract String getBucketName(); - - protected abstract Configuration s3Configuration(); - - @Test - public void testDeleteRecursivelyMissingObjectPath() - throws Exception - { - String prefix = "test-delete-recursively-missing-object-" + randomNameSuffix(); - - try (TrinoS3FileSystem fs = createFileSystem()) { - // Follow Amazon S3 behavior if attempting to delete an object that does not exist - // and return a success message - assertThat(fs.delete(new Path("s3://%s/%s".formatted(getBucketName(), prefix)), true)).isTrue(); - } - } - - @Test - public void testDeleteNonRecursivelyMissingObjectPath() - throws Exception - { - String prefix = "test-delete-non-recursively-missing-object-" + randomNameSuffix(); - - try (TrinoS3FileSystem fs = createFileSystem()) { - // Follow Amazon S3 behavior if attempting to delete an object that does not exist - // and return a success message - assertThat(fs.delete(new Path("s3://%s/%s".formatted(getBucketName(), prefix)), false)).isTrue(); - } - } - - @Test - public void testDeleteRecursivelyObjectPath() - throws Exception - { - String prefix = "test-delete-recursively-object-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename = "file.txt"; - String fileKey = "%s/%s".formatted(prefix, filename); - String filePath = "s3://%s/%s/%s".formatted(getBucketName(), prefix, filename); - fs.createNewFile(new Path(prefixPath, filename)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly(fileKey); - - assertThat(fs.delete(new Path(filePath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyObjectPath() - throws Exception - { - String prefix = "test-delete-non-recursively-object-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename = "file.txt"; - String fileKey = "%s/%s".formatted(prefix, filename); - String filePath = "s3://%s/%s".formatted(getBucketName(), fileKey); - fs.createNewFile(new Path(prefixPath, filename)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly(fileKey); - - assertThat(fs.delete(new Path(filePath), false)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyObjectNamePrefixingAnotherObjectName() - throws Exception - { - String prefix = "test-delete-non-recursively-object-delete-only-requested-object-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - fs.createNewFile(new Path(prefixPath, "foo")); - fs.createNewFile(new Path(prefixPath, "foobar")); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foo".formatted(prefix), - "%s/foobar".formatted(prefix)); - - assertThat(fs.delete(new Path("s3://%s/%s/foo".formatted(getBucketName(), prefix)), false)).isTrue(); - - paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foobar".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyDirectoryNamePrefixingAnotherDirectoryName() - throws Exception - { - String prefix = "test-delete-non-recursively-object-delete-only-requested-directory-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - createDirectory(fs.getS3Client(), getBucketName(), "%s/foo".formatted(prefix)); - createDirectory(fs.getS3Client(), getBucketName(), "%s/foobar".formatted(prefix)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foo/".formatted(prefix), - "%s/foobar/".formatted(prefix)); - - assertThat(fs.delete(new Path("s3://%s/%s/foo".formatted(getBucketName(), prefix)), true)).isTrue(); - - paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foobar/".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyEmptyDirectory() - throws Exception - { - String prefix = "test-delete-non-recursively-empty-directory-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - createDirectory(fs.getS3Client(), getBucketName(), prefix); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, false); - assertThat(paths).containsOnly(prefix + PATH_SEPARATOR); - - assertThat(fs.delete(new Path(prefixPath), false)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyEmptyDirectoryWithAdditionalDirectorySuffixPlaceholder() - throws Exception - { - String directoryName = "test-delete-non-recursively-empty-directory-" + randomNameSuffix(); - String directoryPath = "s3://%s/%s".formatted(getBucketName(), directoryName); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - createDirectory(fs.getS3Client(), getBucketName(), directoryName); - fs.createNewFile(new Path(directoryPath + DIRECTORY_SUFFIX)); - List paths = listPaths(fs.getS3Client(), getBucketName(), directoryName, true); - assertThat(paths).containsOnly( - directoryName + PATH_SEPARATOR, - directoryName + DIRECTORY_SUFFIX); - - assertThat(fs.delete(new Path(directoryPath), false)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), directoryName, true)).isEmpty(); - } - finally { - fs.delete(new Path(directoryPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyObjectNamePrefixingAnotherObjectName() - throws Exception - { - String prefix = "test-delete-recursively-object-delete-only-requested-object-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - fs.createNewFile(new Path(prefixPath, "foo")); - fs.createNewFile(new Path(prefixPath, "foobar")); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foo".formatted(prefix), - "%s/foobar".formatted(prefix)); - - assertThat(fs.delete(new Path("s3://%s/%s/foo".formatted(getBucketName(), prefix)), true)).isTrue(); - - paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foobar".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyDirectoryNamePrefixingAnotherDirectoryName() - throws Exception - { - String prefix = "test-delete-recursively-object-delete-only-requested-directory-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - createDirectory(fs.getS3Client(), getBucketName(), "%s/foo".formatted(prefix)); - createDirectory(fs.getS3Client(), getBucketName(), "%s/foobar".formatted(prefix)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/foo/".formatted(prefix), - "%s/foobar/".formatted(prefix)); - - assertThat(fs.delete(new Path("s3://%s/%s/foo".formatted(getBucketName(), prefix)), true)).isTrue(); - - paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly("%s/foobar/".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyPrefixContainingMultipleObjectsPlain() - throws Exception - { - String prefix = "test-delete-recursively-path-multiple-objects-plain-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - fs.createNewFile(new Path(prefixPath, filename1)); - fs.createNewFile(new Path(prefixPath, filename2)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/%s".formatted(prefix, filename1), - "%s/%s".formatted(prefix, filename2)); - - assertThat(fs.delete(new Path(prefixPath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyPrefixWithSpecialCharacters() - throws Exception - { - String prefix = "test-delete-recursively-path-with-special characters |" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - fs.createNewFile(new Path(prefixPath, filename1)); - fs.createNewFile(new Path(prefixPath, filename2)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/%s".formatted(prefix, filename1), - "%s/%s".formatted(prefix, filename2)); - - assertThat(fs.delete(new Path(prefixPath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyDirectoryWithDeepHierarchy() - throws Exception - { - String prefix = "test-delete-recursively-directory-deep-hierarchy-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String directoryKey = prefix + "/directory"; - String directoryPath = "s3://%s/%s".formatted(getBucketName(), directoryKey); - createDirectory(fs.getS3Client(), getBucketName(), directoryKey); - - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - String filename3 = "file3.txt"; - fs.createNewFile(new Path(directoryPath, filename1)); - fs.createNewFile(new Path(directoryPath, filename2)); - fs.createNewFile(new Path(directoryPath + "/dir3", filename3)); - createDirectory(fs.getS3Client(), getBucketName(), directoryKey + "/dir4"); - - assertThat(fs.delete(new Path(directoryPath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyEmptyDirectory() - throws Exception - { - String prefix = "test-delete-recursively-empty-directory-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String directoryKey = prefix + "/directory"; - createDirectory(fs.getS3Client(), getBucketName(), directoryKey); - fs.createNewFile(new Path("s3://%s/%s%s".formatted(getBucketName(), directoryKey, DIRECTORY_SUFFIX))); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - directoryKey + PATH_SEPARATOR, - directoryKey + DIRECTORY_SUFFIX); - - assertThat(fs.delete(new Path(prefixPath + "/directory"), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyDirectoryWithObjectsAndDirectorySuffixPlaceholder() - throws Exception - { - String prefix = "test-delete-recursively-directory-multiple-objects-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String directoryKey = prefix + "/directory"; - String directoryPath = "s3://%s/%s".formatted(getBucketName(), directoryKey); - createDirectory(fs.getS3Client(), getBucketName(), directoryKey); - fs.createNewFile(new Path(directoryPath + DIRECTORY_SUFFIX)); - - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - String filename3 = "file3.txt"; - fs.createNewFile(new Path(directoryPath, filename1)); - fs.createNewFile(new Path(directoryPath, filename2)); - fs.createNewFile(new Path(directoryPath + "/dir3", filename3)); - fs.createNewFile(new Path(directoryPath + "/dir3" + DIRECTORY_SUFFIX)); - createDirectory(fs.getS3Client(), getBucketName(), directoryKey + "/dir4"); - fs.createNewFile(new Path(directoryPath + "/dir4" + DIRECTORY_SUFFIX)); - - assertThat(fs.delete(new Path(directoryPath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteRecursivelyPrefixContainingDeepHierarchy() - throws Exception - { - String prefix = "test-delete-recursively-prefix-deep-hierarchy-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - String filename3 = "file3.txt"; - fs.createNewFile(new Path("s3://%s/%s/dir1".formatted(getBucketName(), prefix), filename1)); - fs.createNewFile(new Path("s3://%s/%s/dir2/dir22".formatted(getBucketName(), prefix), filename2)); - fs.createNewFile(new Path("s3://%s/%s/dir3/dir33/dir333".formatted(getBucketName(), prefix), filename3)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/dir1/%s".formatted(prefix, filename1), - "%s/dir2/dir22/%s".formatted(prefix, filename2), - "%s/dir3/dir33/dir333/%s".formatted(prefix, filename3)); - - assertThat(fs.delete(new Path(prefixPath), true)).isTrue(); - - assertThat(listPaths(fs.getS3Client(), getBucketName(), prefix, true)).isEmpty(); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyNonEmptyDirectory() - throws Exception - { - String prefix = "test-illegal-delete-non-recursively-directory-non-empty-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String directoryKey = prefix + "/directory"; - String directoryPath = "s3://%s/%s".formatted(getBucketName(), directoryKey); - createDirectory(fs.getS3Client(), getBucketName(), directoryKey); - - fs.createNewFile(new Path(directoryPath, "file1.txt")); - - assertThatThrownBy(() -> fs.delete(new Path(directoryPath), false)) - .hasMessage("Directory %s is not empty".formatted(directoryPath)); - - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/directory/".formatted(prefix), - "%s/directory/file1.txt".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyNonEmptyPath() - throws Exception - { - String prefix = "test-illegal-delete-non-recursively-path-non-empty-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - fs.createNewFile(new Path(prefixPath, "file1.txt")); - - assertThatThrownBy(() -> fs.delete(new Path("s3://%s/%s".formatted(getBucketName(), prefix)), false)) - .hasMessage("Directory s3://%s/%s is not empty".formatted(getBucketName(), prefix)); - - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/file1.txt".formatted(prefix)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - @Test - public void testDeleteNonRecursivelyNonEmptyDeepPath() - throws Exception - { - String prefix = "test-illegal-delete-non-recursively-deep-path-non-empty-" + randomNameSuffix(); - String prefixPath = "s3://%s/%s".formatted(getBucketName(), prefix); - - try (TrinoS3FileSystem fs = createFileSystem()) { - try { - String filename1 = "file1.txt"; - String filename2 = "file2.txt"; - fs.createNewFile(new Path(prefixPath + "/dir1/", filename1)); - fs.createNewFile(new Path(prefixPath + "/dir2/", filename2)); - List paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/dir1/%s".formatted(prefix, filename1), - "%s/dir2/%s".formatted(prefix, filename2)); - - assertThatThrownBy(() -> fs.delete(new Path("s3://%s/%s".formatted(getBucketName(), prefix)), false)) - .hasMessage("Directory s3://%s/%s is not empty".formatted(getBucketName(), prefix)); - - paths = listPaths(fs.getS3Client(), getBucketName(), prefix, true); - assertThat(paths).containsOnly( - "%s/dir1/%s".formatted(prefix, filename1), - "%s/dir2/%s".formatted(prefix, filename2)); - } - finally { - fs.delete(new Path(prefixPath), true); - } - } - } - - protected TrinoS3FileSystem createFileSystem() - throws Exception - { - TrinoS3FileSystem fs = new TrinoS3FileSystem(); - try { - fs.initialize(new URI("s3://%s/".formatted(getBucketName())), s3Configuration()); - } - catch (Throwable e) { - closeAllSuppress(e, fs); - throw e; - } - return fs; - } - - protected static void createDirectory(AmazonS3 client, String bucketName, String key) - { - // create meta-data for your folder and set content-length to 0 - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentLength(0); - metadata.setContentType(DIRECTORY_MEDIA_TYPE.toString()); - // create empty content - InputStream emptyContent = new ByteArrayInputStream(new byte[0]); - // create a PutObjectRequest passing the folder name suffixed by / - PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key + PATH_SEPARATOR, emptyContent, metadata); - // send request to S3 to create folder - client.putObject(putObjectRequest); - } - - protected static List listPaths(AmazonS3 s3, String bucketName, String prefix, boolean recursive) - { - ListObjectsV2Request request = new ListObjectsV2Request() - .withBucketName(bucketName) - .withPrefix(prefix) - .withDelimiter(recursive ? null : PATH_SEPARATOR); - ListObjectsV2Result listing = s3.listObjectsV2(request); - - List paths = new ArrayList<>(); - paths.addAll(listing.getCommonPrefixes()); - paths.addAll(listing.getObjectSummaries().stream().map(S3ObjectSummary::getKey).collect(toImmutableList())); - return paths; - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/MockAmazonS3.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/MockAmazonS3.java deleted file mode 100644 index de29a4a54921..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/MockAmazonS3.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.SdkClientException; -import com.amazonaws.services.s3.AbstractAmazonS3; -import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; - -import java.util.ArrayList; -import java.util.Date; -import java.util.List; - -import static java.net.HttpURLConnection.HTTP_OK; -import static java.util.UUID.randomUUID; - -public class MockAmazonS3 - extends AbstractAmazonS3 -{ - private int getObjectHttpCode = HTTP_OK; - private String getObjectS3ErrorCode; - private int getObjectMetadataHttpCode = HTTP_OK; - private GetObjectMetadataRequest getObjectMetadataRequest; - private CannedAccessControlList acl; - private boolean hasGlacierObjects; - private boolean hasHadoopFolderMarkerObjects; - private final List uploadParts = new ArrayList<>(); - - public void setGetObjectHttpErrorCode(int getObjectHttpErrorCode) - { - this.getObjectHttpCode = getObjectHttpErrorCode; - } - - public void setGetObjectS3ErrorCode(String getObjectS3ErrorCode) - { - this.getObjectS3ErrorCode = getObjectS3ErrorCode; - } - - public void setGetObjectMetadataHttpCode(int getObjectMetadataHttpCode) - { - this.getObjectMetadataHttpCode = getObjectMetadataHttpCode; - } - - public CannedAccessControlList getAcl() - { - return this.acl; - } - - public void setHasGlacierObjects(boolean hasGlacierObjects) - { - this.hasGlacierObjects = hasGlacierObjects; - } - - public void setHasHadoopFolderMarkerObjects(boolean hasHadoopFolderMarkerObjects) - { - this.hasHadoopFolderMarkerObjects = hasHadoopFolderMarkerObjects; - } - - public GetObjectMetadataRequest getGetObjectMetadataRequest() - { - return getObjectMetadataRequest; - } - - public List getUploadParts() - { - return uploadParts; - } - - @Override - public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) - { - this.getObjectMetadataRequest = getObjectMetadataRequest; - if (getObjectMetadataHttpCode != HTTP_OK) { - AmazonS3Exception exception = new AmazonS3Exception("Failing getObjectMetadata call with " + getObjectMetadataHttpCode); - exception.setStatusCode(getObjectMetadataHttpCode); - throw exception; - } - return null; - } - - @Override - public S3Object getObject(GetObjectRequest getObjectRequest) - { - if (getObjectHttpCode != HTTP_OK) { - AmazonS3Exception exception = new AmazonS3Exception("Failing getObject call with status code:" + getObjectHttpCode + "; error code:" + getObjectS3ErrorCode); - exception.setStatusCode(getObjectHttpCode); - exception.setErrorCode(getObjectS3ErrorCode); - throw exception; - } - return null; - } - - @Override - public PutObjectResult putObject(PutObjectRequest putObjectRequest) - { - this.acl = putObjectRequest.getCannedAcl(); - return new PutObjectResult(); - } - - @Override - public ListObjectsV2Result listObjectsV2(ListObjectsV2Request listObjectsV2Request) - { - final String continuationToken = "continue"; - - ListObjectsV2Result listingV2 = new ListObjectsV2Result(); - - if (continuationToken.equals(listObjectsV2Request.getContinuationToken())) { - S3ObjectSummary standardTwo = new S3ObjectSummary(); - standardTwo.setStorageClass(StorageClass.Standard.toString()); - standardTwo.setKey("test/standardTwo"); - standardTwo.setLastModified(new Date()); - listingV2.getObjectSummaries().add(standardTwo); - - if (hasGlacierObjects) { - S3ObjectSummary glacier = new S3ObjectSummary(); - glacier.setStorageClass(StorageClass.Glacier.toString()); - glacier.setKey("test/glacier"); - glacier.setLastModified(new Date()); - listingV2.getObjectSummaries().add(glacier); - - S3ObjectSummary deepArchive = new S3ObjectSummary(); - deepArchive.setStorageClass(StorageClass.DeepArchive.toString()); - deepArchive.setKey("test/deepArchive"); - deepArchive.setLastModified(new Date()); - listingV2.getObjectSummaries().add(deepArchive); - } - } - else { - S3ObjectSummary standardOne = new S3ObjectSummary(); - standardOne.setStorageClass(StorageClass.Standard.toString()); - standardOne.setKey("test/standardOne"); - standardOne.setLastModified(new Date()); - listingV2.getObjectSummaries().add(standardOne); - listingV2.setTruncated(true); - listingV2.setNextContinuationToken(continuationToken); - - if (hasHadoopFolderMarkerObjects) { - S3ObjectSummary hadoopFolderMarker = new S3ObjectSummary(); - hadoopFolderMarker.setStorageClass(StorageClass.Standard.toString()); - hadoopFolderMarker.setKey("test/test_$folder$"); - hadoopFolderMarker.setLastModified(new Date()); - listingV2.getObjectSummaries().add(hadoopFolderMarker); - } - } - - return listingV2; - } - - @Override - public PutObjectResult putObject(String bucketName, String key, String content) - { - return new PutObjectResult(); - } - - @Override - public InitiateMultipartUploadResult initiateMultipartUpload(InitiateMultipartUploadRequest request) - throws SdkClientException - { - this.acl = request.getCannedACL(); - - InitiateMultipartUploadResult result = new InitiateMultipartUploadResult(); - result.setUploadId(randomUUID().toString()); - return result; - } - - @Override - public UploadPartResult uploadPart(UploadPartRequest request) - throws SdkClientException - { - uploadParts.add(request); - return new UploadPartResult(); - } - - @Override - public CompleteMultipartUploadResult completeMultipartUpload(CompleteMultipartUploadRequest request) - throws SdkClientException - { - return new CompleteMultipartUploadResult(); - } - - @Override - public void abortMultipartUpload(AbortMultipartUploadRequest request) - throws SdkClientException {} - - @Override - public void shutdown() {} -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3Config.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3Config.java deleted file mode 100644 index 4228984a3512..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3Config.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.base.StandardSystemProperty; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import io.airlift.units.DataSize; -import io.airlift.units.DataSize.Unit; -import io.airlift.units.Duration; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; - -public class TestHiveS3Config -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(HiveS3Config.class) - .setS3AwsAccessKey(null) - .setS3AwsSecretKey(null) - .setS3Endpoint(null) - .setS3Region(null) - .setS3SignerType(null) - .setS3SignerClass(null) - .setS3PathStyleAccess(false) - .setS3IamRole(null) - .setS3ExternalId(null) - .setS3StorageClass(TrinoS3StorageClass.STANDARD) - .setS3SslEnabled(true) - .setS3SseEnabled(false) - .setS3SseType(TrinoS3SseType.S3) - .setS3SseKmsKeyId(null) - .setS3KmsKeyId(null) - .setS3EncryptionMaterialsProvider(null) - .setS3MaxClientRetries(5) - .setS3MaxErrorRetries(10) - .setS3MaxBackoffTime(new Duration(10, TimeUnit.MINUTES)) - .setS3MaxRetryTime(new Duration(10, TimeUnit.MINUTES)) - .setS3ConnectTimeout(new Duration(5, TimeUnit.SECONDS)) - .setS3ConnectTtl(null) - .setS3SocketTimeout(new Duration(5, TimeUnit.SECONDS)) - .setS3MultipartMinFileSize(DataSize.of(16, Unit.MEGABYTE)) - .setS3MultipartMinPartSize(DataSize.of(5, Unit.MEGABYTE)) - .setS3MaxConnections(500) - .setS3StagingDirectory(new File(StandardSystemProperty.JAVA_IO_TMPDIR.value())) - .setPinS3ClientToCurrentRegion(false) - .setS3UserAgentPrefix("") - .setS3AclType(TrinoS3AclType.PRIVATE) - .setSkipGlacierObjects(false) - .setRequesterPaysEnabled(false) - .setS3StreamingUploadEnabled(true) - .setS3StreamingPartSize(DataSize.of(32, Unit.MEGABYTE)) - .setS3ProxyHost(null) - .setS3ProxyPort(-1) - .setS3ProxyProtocol("HTTPS") - .setS3NonProxyHosts(ImmutableList.of()) - .setS3ProxyUsername(null) - .setS3ProxyPassword(null) - .setS3PreemptiveBasicProxyAuth(false) - .setS3StsEndpoint(null) - .setS3StsRegion(null)); - } - - @Test - public void testExplicitPropertyMappings() - throws IOException - { - Path stagingDirectory = Files.createTempDirectory(null); - - Map properties = ImmutableMap.builder() - .put("hive.s3.aws-access-key", "abc123") - .put("hive.s3.aws-secret-key", "secret") - .put("hive.s3.endpoint", "endpoint.example.com") - .put("hive.s3.region", "eu-central-1") - .put("hive.s3.signer-type", "S3SignerType") - .put("hive.s3.signer-class", "com.amazonaws.services.s3.internal.AWSS3V4Signer") - .put("hive.s3.path-style-access", "true") - .put("hive.s3.iam-role", "roleArn") - .put("hive.s3.external-id", "externalId") - .put("hive.s3.storage-class", "INTELLIGENT_TIERING") - .put("hive.s3.ssl.enabled", "false") - .put("hive.s3.sse.enabled", "true") - .put("hive.s3.sse.type", "KMS") - .put("hive.s3.sse.kms-key-id", "KMS_KEY_ID") - .put("hive.s3.encryption-materials-provider", "EMP_CLASS") - .put("hive.s3.kms-key-id", "KEY_ID") - .put("hive.s3.max-client-retries", "9") - .put("hive.s3.max-error-retries", "8") - .put("hive.s3.max-backoff-time", "4m") - .put("hive.s3.max-retry-time", "20m") - .put("hive.s3.connect-timeout", "8s") - .put("hive.s3.connect-ttl", "30m") - .put("hive.s3.socket-timeout", "4m") - .put("hive.s3.multipart.min-file-size", "32MB") - .put("hive.s3.multipart.min-part-size", "15MB") - .put("hive.s3.max-connections", "77") - .put("hive.s3.staging-directory", stagingDirectory.toString()) - .put("hive.s3.pin-client-to-current-region", "true") - .put("hive.s3.user-agent-prefix", "user-agent-prefix") - .put("hive.s3.upload-acl-type", "PUBLIC_READ") - .put("hive.s3.skip-glacier-objects", "true") - .put("hive.s3.requester-pays.enabled", "true") - .put("hive.s3.streaming.enabled", "false") - .put("hive.s3.streaming.part-size", "15MB") - .put("hive.s3.proxy.host", "localhost") - .put("hive.s3.proxy.port", "14000") - .put("hive.s3.proxy.protocol", "HTTP") - .put("hive.s3.proxy.non-proxy-hosts", "test,test2,test3") - .put("hive.s3.proxy.username", "test") - .put("hive.s3.proxy.password", "test") - .put("hive.s3.proxy.preemptive-basic-auth", "true") - .put("hive.s3.sts.endpoint", "http://minio:9000") - .put("hive.s3.sts.region", "eu-central-1") - .buildOrThrow(); - - HiveS3Config expected = new HiveS3Config() - .setS3AwsAccessKey("abc123") - .setS3AwsSecretKey("secret") - .setS3Endpoint("endpoint.example.com") - .setS3Region("eu-central-1") - .setS3SignerType(TrinoS3SignerType.S3SignerType) - .setS3SignerClass("com.amazonaws.services.s3.internal.AWSS3V4Signer") - .setS3PathStyleAccess(true) - .setS3IamRole("roleArn") - .setS3ExternalId("externalId") - .setS3StorageClass(TrinoS3StorageClass.INTELLIGENT_TIERING) - .setS3SslEnabled(false) - .setS3SseEnabled(true) - .setS3SseType(TrinoS3SseType.KMS) - .setS3SseKmsKeyId("KMS_KEY_ID") - .setS3EncryptionMaterialsProvider("EMP_CLASS") - .setS3KmsKeyId("KEY_ID") - .setS3MaxClientRetries(9) - .setS3MaxErrorRetries(8) - .setS3MaxBackoffTime(new Duration(4, TimeUnit.MINUTES)) - .setS3MaxRetryTime(new Duration(20, TimeUnit.MINUTES)) - .setS3ConnectTimeout(new Duration(8, TimeUnit.SECONDS)) - .setS3ConnectTtl(new Duration(30, TimeUnit.MINUTES)) - .setS3SocketTimeout(new Duration(4, TimeUnit.MINUTES)) - .setS3MultipartMinFileSize(DataSize.of(32, Unit.MEGABYTE)) - .setS3MultipartMinPartSize(DataSize.of(15, Unit.MEGABYTE)) - .setS3MaxConnections(77) - .setS3StagingDirectory(stagingDirectory.toFile()) - .setPinS3ClientToCurrentRegion(true) - .setS3UserAgentPrefix("user-agent-prefix") - .setS3AclType(TrinoS3AclType.PUBLIC_READ) - .setSkipGlacierObjects(true) - .setRequesterPaysEnabled(true) - .setS3StreamingUploadEnabled(false) - .setS3StreamingPartSize(DataSize.of(15, Unit.MEGABYTE)) - .setS3ProxyHost("localhost") - .setS3ProxyPort(14000) - .setS3ProxyProtocol("HTTP") - .setS3NonProxyHosts(ImmutableList.of("test", "test2", "test3")) - .setS3ProxyUsername("test") - .setS3ProxyPassword("test") - .setS3PreemptiveBasicProxyAuth(true) - .setS3StsEndpoint("http://minio:9000") - .setS3StsRegion("eu-central-1"); - - assertFullMapping(properties, expected); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3TypeConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3TypeConfig.java deleted file mode 100644 index 9084b6562473..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestHiveS3TypeConfig.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.collect.ImmutableMap; -import org.junit.jupiter.api.Test; - -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; - -public class TestHiveS3TypeConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(HiveS3TypeConfig.class) - .setS3FileSystemType(S3FileSystemType.TRINO)); - } - - @Test - public void testExplicitPropertyMappings() - { - Map properties = ImmutableMap.of("hive.s3-file-system-type", "EMRFS"); - - HiveS3TypeConfig expected = new HiveS3TypeConfig() - .setS3FileSystemType(S3FileSystemType.EMRFS); - - assertFullMapping(properties, expected); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3HadoopPaths.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3HadoopPaths.java deleted file mode 100644 index ba4265ff915d..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3HadoopPaths.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import io.trino.filesystem.Location; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Test; - -import java.net.URI; - -import static io.trino.filesystem.hdfs.HadoopPaths.hadoopPath; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestS3HadoopPaths -{ - @Test - public void testNonS3Path() - { - assertThat(hadoopPath(Location.of("gcs://test/abc//xyz"))) - .isEqualTo(new Path("gcs://test/abc/xyz")); - } - - @Test - public void testS3NormalPath() - { - assertThat(hadoopPath(Location.of("s3://test/abc/xyz.csv"))) - .isEqualTo(new Path("s3://test/abc/xyz.csv")) - .extracting(TrinoS3FileSystem::keyFromPath) - .isEqualTo("abc/xyz.csv"); - } - - @Test - public void testS3NormalPathWithInvalidUriEscape() - { - assertThat(hadoopPath(Location.of("s3://test/abc%xyz"))) - .isEqualTo(new Path("s3://test/abc%xyz")) - .extracting(TrinoS3FileSystem::keyFromPath) - .isEqualTo("abc%xyz"); - } - - @Test - public void testS3NonCanonicalPath() - { - assertThat(hadoopPath(Location.of("s3://test/abc//xyz.csv"))) - .isEqualTo(new Path(URI.create("s3://test/abc/xyz.csv#abc//xyz.csv"))) - .hasToString("s3://test/abc/xyz.csv#abc//xyz.csv") - .extracting(TrinoS3FileSystem::keyFromPath) - .isEqualTo("abc//xyz.csv"); - } - - @Test - public void testS3NonCanonicalPathWithInvalidUriEscape() - { - assertThat(hadoopPath(Location.of("s3://test/abc%xyz//test"))) - .isEqualTo(new Path(URI.create("s3://test/abc%25xyz/test#abc%25xyz//test"))) - .hasToString("s3://test/abc%xyz/test#abc%xyz//test") - .extracting(TrinoS3FileSystem::keyFromPath) - .isEqualTo("abc%xyz//test"); - } - - @Test - public void testS3NonCanonicalPathWithDotDigitBucketName() - { - assertThat(hadoopPath(Location.of("s3://test.123/abc//xyz.csv"))) - .isEqualTo(new Path(URI.create("s3://test.123/abc/xyz.csv#abc//xyz.csv"))) - .hasToString("s3://test.123/abc/xyz.csv#abc//xyz.csv") - .extracting(TrinoS3FileSystem::keyFromPath) - .isEqualTo("abc//xyz.csv"); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMapping.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMapping.java deleted file mode 100644 index 7797d83b8044..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMapping.java +++ /dev/null @@ -1,548 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import io.trino.hdfs.DynamicConfigurationProvider; -import io.trino.hdfs.HdfsContext; -import io.trino.spi.connector.ConnectorSession; -import io.trino.spi.security.AccessDeniedException; -import io.trino.spi.security.ConnectorIdentity; -import io.trino.testing.TestingConnectorSession; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Test; - -import java.util.Optional; -import java.util.Set; - -import static com.google.common.io.Resources.getResource; -import static io.trino.hdfs.s3.TestS3SecurityMapping.MappingResult.clusterDefaultRole; -import static io.trino.hdfs.s3.TestS3SecurityMapping.MappingResult.credentials; -import static io.trino.hdfs.s3.TestS3SecurityMapping.MappingResult.role; -import static io.trino.hdfs.s3.TestS3SecurityMapping.MappingSelector.empty; -import static io.trino.hdfs.s3.TestS3SecurityMapping.MappingSelector.path; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_IAM_ROLE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_KMS_KEY_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ROLE_SESSION_NAME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; -import static java.util.Objects.requireNonNull; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -public class TestS3SecurityMapping -{ - private static final String IAM_ROLE_CREDENTIAL_NAME = "IAM_ROLE_CREDENTIAL_NAME"; - private static final String KMS_KEY_ID_CREDENTIAL_NAME = "KMS_KEY_ID_CREDENTIAL_NAME"; - private static final String DEFAULT_PATH = "s3://default"; - private static final String DEFAULT_USER = "testuser"; - - @Test - public void testMapping() - { - S3SecurityMappingConfig mappingConfig = new S3SecurityMappingConfig() - .setConfigFilePath(getResource(getClass(), "security-mapping.json").getPath()) - .setRoleCredentialName(IAM_ROLE_CREDENTIAL_NAME) - .setKmsKeyIdCredentialName(KMS_KEY_ID_CREDENTIAL_NAME) - .setColonReplacement("#"); - - DynamicConfigurationProvider provider = new S3SecurityMappingConfigurationProvider(mappingConfig, - new FileBasedS3SecurityMappingsProvider(mappingConfig)); - - // matches prefix -- mapping provides credentials - assertMapping( - provider, - path("s3://foo/data/test.csv"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_10")); - - // matches prefix exactly -- mapping provides credentials - assertMapping( - provider, - path("s3://foo"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_10")); - - // matches prefix exactly -- mapping provides credentials, kms key from extra credentials matching default - assertMapping( - provider, - path("s3://foo").withExtraCredentialKmsKeyId("kmsKey_10"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_10")); - - // matches prefix exactly -- mapping provides credentials, kms key from extra credentials, allowed, different than default - assertMapping( - provider, - path("s3://foo").withExtraCredentialKmsKeyId("kmsKey_11"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_11")); - - // matches prefix exactly -- mapping provides credentials, kms key from extra credentials, not allowed - assertMappingFails( - provider, - path("s3://foo").withExtraCredentialKmsKeyId("kmsKey_not_allowed"), - "Selected KMS Key ID is not allowed"); - - // matches prefix exactly -- mapping provides credentials, kms key from extra credentials, all keys are allowed, different than default - assertMapping( - provider, - path("s3://foo_all_keys_allowed").withExtraCredentialKmsKeyId("kmsKey_777"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_777")); - - // matches prefix exactly -- mapping provides credentials, kms key from extra credentials, allowed, no default key - assertMapping( - provider, - path("s3://foo_no_default_key").withExtraCredentialKmsKeyId("kmsKey_12"), - credentials("AKIAxxxaccess", "iXbXxxxsecret") - .withKmsKeyId("kmsKey_12")); - - // no role selected and mapping has no default role - assertMappingFails( - provider, - path("s3://bar/test"), - "No S3 role selected and mapping has no default role"); - - // matches prefix and user selected one of allowed roles - assertMapping( - provider, - path("s3://bar/test").withExtraCredentialIamRole("arn:aws:iam::123456789101:role/allow_bucket_2"), - role("arn:aws:iam::123456789101:role/allow_bucket_2")); - - // user selected role not in allowed list - assertMappingFails( - provider, - path("s3://bar/test").withUser("bob").withExtraCredentialIamRole("bogus"), - "Selected S3 role is not allowed: bogus"); - - // verify that colon replacement works - String roleWithoutColon = "arn#aws#iam##123456789101#role/allow_bucket_2"; - assertThat(roleWithoutColon).doesNotContain(":"); - assertMapping( - provider, - path("s3://bar/test").withExtraCredentialIamRole(roleWithoutColon), - role("arn:aws:iam::123456789101:role/allow_bucket_2")); - - // matches prefix -- default role used - assertMapping( - provider, - path("s3://bar/abc/data/test.csv"), - role("arn:aws:iam::123456789101:role/allow_path")); - - // matches empty rule at end -- default role used - assertMapping( - provider, - empty(), - role("arn:aws:iam::123456789101:role/default")); - - // matches prefix -- default role used - assertMapping( - provider, - path("s3://xyz/default"), - role("arn:aws:iam::123456789101:role/allow_default")); - - // matches prefix and user selected one of allowed roles - assertMapping( - provider, - path("s3://xyz/foo").withExtraCredentialIamRole("arn:aws:iam::123456789101:role/allow_foo"), - role("arn:aws:iam::123456789101:role/allow_foo")); - - // matches prefix and user selected one of allowed roles - assertMapping( - provider, - path("s3://xyz/bar").withExtraCredentialIamRole("arn:aws:iam::123456789101:role/allow_bar"), - role("arn:aws:iam::123456789101:role/allow_bar")); - - // matches user -- default role used - assertMapping( - provider, - empty().withUser("alice"), - role("alice_role")); - - // matches user and user selected default role - assertMapping( - provider, - empty().withUser("alice").withExtraCredentialIamRole("alice_role"), - role("alice_role")); - - // matches user and selected role not allowed - assertMappingFails( - provider, - empty().withUser("alice").withExtraCredentialIamRole("bogus"), - "Selected S3 role is not allowed: bogus"); - - // verify that first matching rule is used - // matches prefix earlier in file and selected role not allowed - assertMappingFails( - provider, - path("s3://bar/test").withUser("alice").withExtraCredentialIamRole("alice_role"), - "Selected S3 role is not allowed: alice_role"); - - // matches user regex -- default role used - assertMapping( - provider, - empty().withUser("bob"), - role("bob_and_charlie_role")); - - // matches group -- default role used - assertMapping( - provider, - empty().withGroups("finance"), - role("finance_role")); - - // matches group regex -- default role used - assertMapping( - provider, - empty().withGroups("eng"), - role("hr_and_eng_group")); - - // verify that all constraints must match - // matches user but not group -- uses empty mapping at end - assertMapping( - provider, - empty().withUser("danny"), - role("arn:aws:iam::123456789101:role/default")); - - // matches group but not user -- uses empty mapping at end - assertMapping( - provider, - empty().withGroups("hq"), - role("arn:aws:iam::123456789101:role/default")); - - // matches user and group - assertMapping( - provider, - empty().withUser("danny").withGroups("hq"), - role("danny_hq_role")); - - // matches prefix -- mapping provides credentials and endpoint - assertMapping( - provider, - path("s3://endpointbucket/bar"), - credentials("AKIAxxxaccess", "iXbXxxxsecret").withEndpoint("http://localhost:7753")); - - // matches prefix -- mapping provides credentials and region - assertMapping( - provider, - path("s3://regionalbucket/bar"), - credentials("AKIAxxxaccess", "iXbXxxxsecret").withRegion("us-west-2")); - - // matches role session name - assertMapping( - provider, - path("s3://somebucket"), - role("arn:aws:iam::1234567891012:role/default").withRoleSessionName("iam-trino-session")); - } - - @Test - public void testMappingWithFallbackToClusterDefault() - { - S3SecurityMappingConfig mappingConfig = new S3SecurityMappingConfig() - .setConfigFilePath(getResource(getClass(), "security-mapping-with-fallback-to-cluster-default.json").getPath()); - - DynamicConfigurationProvider provider = new S3SecurityMappingConfigurationProvider(mappingConfig, - new FileBasedS3SecurityMappingsProvider(mappingConfig)); - - // matches prefix - returns role from the mapping - assertMapping( - provider, - path("s3://bar/abc/data/test.csv"), - role("arn:aws:iam::123456789101:role/allow_path")); - - // doesn't match any rule except default rule at the end - assertMapping( - provider, - empty(), - clusterDefaultRole()); - } - - @Test - public void testMappingWithoutFallback() - { - S3SecurityMappingConfig mappingConfig = new S3SecurityMappingConfig() - .setConfigFilePath(getResource(getClass(), "security-mapping-without-fallback.json").getPath()); - - DynamicConfigurationProvider provider = new S3SecurityMappingConfigurationProvider(mappingConfig, - new FileBasedS3SecurityMappingsProvider(mappingConfig)); - - // matches prefix - returns role from the mapping - assertMapping( - provider, - path("s3://bar/abc/data/test.csv"), - role("arn:aws:iam::123456789101:role/allow_path")); - - // doesn't match any rule - assertMappingFails( - provider, - empty(), - "No matching S3 security mapping"); - } - - @Test - public void testMappingWithoutRoleCredentialsFallbackShouldFail() - { - assertThatThrownBy(() -> - new S3SecurityMapping(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty())) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("must either allow useClusterDefault role or provide role and/or credentials"); - } - - @Test - public void testMappingWithRoleAndFallbackShouldFail() - { - Optional iamRole = Optional.of("arn:aws:iam::123456789101:role/allow_path"); - Optional useClusterDefault = Optional.of(true); - - assertThatThrownBy(() -> - new S3SecurityMapping(Optional.empty(), Optional.empty(), Optional.empty(), iamRole, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), - Optional.empty(), Optional.empty(), useClusterDefault, Optional.empty(), Optional.empty())) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("must either allow useClusterDefault role or provide role and/or credentials"); - } - - @Test - public void testMappingWithEncryptionKeysAndFallbackShouldFail() - { - Optional useClusterDefault = Optional.of(true); - Optional kmsKeyId = Optional.of("CLIENT_S3CRT_KEY_ID"); - - assertThatThrownBy(() -> - new S3SecurityMapping(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), kmsKeyId, Optional.empty(), - Optional.empty(), Optional.empty(), useClusterDefault, Optional.empty(), Optional.empty())) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("KMS key ID cannot be provided together with useClusterDefault"); - } - - @Test - public void testMappingWithRoleSessionNameWithoutIamRoleShouldFail() - { - Optional roleSessionName = Optional.of("iam-trino-session"); - - assertThatThrownBy(() -> - new S3SecurityMapping(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), roleSessionName, Optional.empty(), Optional.empty(), - Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty())) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("iamRole must be provided when roleSessionName is provided"); - } - - private static void assertMapping(DynamicConfigurationProvider provider, MappingSelector selector, MappingResult mappingResult) - { - Configuration configuration = new Configuration(false); - - assertThat(configuration.get(S3_ACCESS_KEY)).isNull(); - assertThat(configuration.get(S3_SECRET_KEY)).isNull(); - assertThat(configuration.get(S3_IAM_ROLE)).isNull(); - assertThat(configuration.get(S3_KMS_KEY_ID)).isNull(); - - applyMapping(provider, selector, configuration); - - assertThat(configuration.get(S3_ACCESS_KEY)).isEqualTo(mappingResult.getAccessKey().orElse(null)); - assertThat(configuration.get(S3_SECRET_KEY)).isEqualTo(mappingResult.getSecretKey().orElse(null)); - assertThat(configuration.get(S3_IAM_ROLE)).isEqualTo(mappingResult.getRole().orElse(null)); - assertThat(configuration.get(S3_KMS_KEY_ID)).isEqualTo(mappingResult.getKmsKeyId().orElse(null)); - assertThat(configuration.get(S3_ENDPOINT)).isEqualTo(mappingResult.getEndpoint().orElse(null)); - assertThat(configuration.get(S3_ROLE_SESSION_NAME)).isEqualTo(mappingResult.getRoleSessionName().orElse(null)); - assertThat(configuration.get(S3_REGION)).isEqualTo(mappingResult.getRegion().orElse(null)); - } - - private static void assertMappingFails(DynamicConfigurationProvider provider, MappingSelector selector, String message) - { - Configuration configuration = new Configuration(false); - - assertThatThrownBy(() -> applyMapping(provider, selector, configuration)) - .isInstanceOf(AccessDeniedException.class) - .hasMessage("Access Denied: " + message); - } - - private static void applyMapping(DynamicConfigurationProvider provider, MappingSelector selector, Configuration configuration) - { - provider.updateConfiguration(configuration, selector.getHdfsContext(), selector.getPath().toUri()); - } - - public static class MappingSelector - { - public static MappingSelector empty() - { - return path(DEFAULT_PATH); - } - - public static MappingSelector path(String path) - { - return new MappingSelector(DEFAULT_USER, ImmutableSet.of(), new Path(path), Optional.empty(), Optional.empty()); - } - - private final String user; - private final Set groups; - private final Path path; - private final Optional extraCredentialIamRole; - private final Optional extraCredentialKmsKeyId; - - private MappingSelector(String user, Set groups, Path path, Optional extraCredentialIamRole, Optional extraCredentialKmsKeyId) - { - this.user = requireNonNull(user, "user is null"); - this.groups = ImmutableSet.copyOf(requireNonNull(groups, "groups is null")); - this.path = requireNonNull(path, "path is null"); - this.extraCredentialIamRole = requireNonNull(extraCredentialIamRole, "extraCredentialIamRole is null"); - this.extraCredentialKmsKeyId = requireNonNull(extraCredentialKmsKeyId, "extraCredentialKmsKeyId is null"); - } - - public Path getPath() - { - return path; - } - - public MappingSelector withExtraCredentialIamRole(String role) - { - return new MappingSelector(user, groups, path, Optional.of(role), extraCredentialKmsKeyId); - } - - public MappingSelector withExtraCredentialKmsKeyId(String kmsKeyId) - { - return new MappingSelector(user, groups, path, extraCredentialIamRole, Optional.of(kmsKeyId)); - } - - public MappingSelector withUser(String user) - { - return new MappingSelector(user, groups, path, extraCredentialIamRole, extraCredentialKmsKeyId); - } - - public MappingSelector withGroups(String... groups) - { - return new MappingSelector(user, ImmutableSet.copyOf(groups), path, extraCredentialIamRole, extraCredentialKmsKeyId); - } - - public HdfsContext getHdfsContext() - { - ImmutableMap.Builder extraCredentials = ImmutableMap.builder(); - extraCredentialIamRole.ifPresent(role -> extraCredentials.put(IAM_ROLE_CREDENTIAL_NAME, role)); - extraCredentialKmsKeyId.ifPresent(kmsKeyId -> extraCredentials.put(KMS_KEY_ID_CREDENTIAL_NAME, kmsKeyId)); - - ConnectorSession connectorSession = TestingConnectorSession.builder() - .setIdentity(ConnectorIdentity.forUser(user) - .withGroups(groups) - .withExtraCredentials(extraCredentials.buildOrThrow()) - .build()) - .build(); - return new HdfsContext(connectorSession); - } - } - - public static class MappingResult - { - public static MappingResult credentials(String accessKey, String secretKey) - { - return new MappingResult(Optional.of(accessKey), Optional.of(secretKey), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); - } - - public static MappingResult role(String role) - { - return new MappingResult(Optional.empty(), Optional.empty(), Optional.of(role), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); - } - - public static MappingResult clusterDefaultRole() - { - return new MappingResult(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()); - } - - public static MappingResult endpoint(String endpoint) - { - return new MappingResult(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.of(endpoint), Optional.empty(), Optional.empty()); - } - - public static MappingResult region(String region) - { - return new MappingResult(Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.of(region)); - } - - private final Optional accessKey; - private final Optional secretKey; - private final Optional role; - private final Optional kmsKeyId; - private final Optional endpoint; - private final Optional roleSessionName; - private final Optional region; - - private MappingResult(Optional accessKey, Optional secretKey, Optional role, Optional kmsKeyId, Optional endpoint, - Optional roleSessionName, Optional region) - { - this.accessKey = requireNonNull(accessKey, "accessKey is null"); - this.secretKey = requireNonNull(secretKey, "secretKey is null"); - this.role = requireNonNull(role, "role is null"); - this.kmsKeyId = requireNonNull(kmsKeyId, "kmsKeyId is null"); - this.endpoint = requireNonNull(endpoint, "endpoint is null"); - this.roleSessionName = requireNonNull(roleSessionName, "roleSessionName is null"); - this.region = requireNonNull(region, "region is null"); - } - - public MappingResult withEndpoint(String endpoint) - { - return new MappingResult(accessKey, secretKey, role, kmsKeyId, Optional.of(endpoint), Optional.empty(), region); - } - - public MappingResult withKmsKeyId(String kmsKeyId) - { - return new MappingResult(accessKey, secretKey, role, Optional.of(kmsKeyId), endpoint, Optional.empty(), region); - } - - public MappingResult withRegion(String region) - { - return new MappingResult(accessKey, secretKey, role, kmsKeyId, endpoint, Optional.empty(), Optional.of(region)); - } - - public MappingResult withRoleSessionName(String roleSessionName) - { - return new MappingResult(accessKey, secretKey, role, kmsKeyId, Optional.empty(), Optional.of(roleSessionName), region); - } - - public Optional getAccessKey() - { - return accessKey; - } - - public Optional getSecretKey() - { - return secretKey; - } - - public Optional getRole() - { - return role; - } - - public Optional getKmsKeyId() - { - return kmsKeyId; - } - - public Optional getEndpoint() - { - return endpoint; - } - - public Optional getRoleSessionName() - { - return roleSessionName; - } - - public Optional getRegion() - { - return region; - } - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingConfig.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingConfig.java deleted file mode 100644 index 1634c8eb2a5e..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingConfig.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.google.common.collect.ImmutableMap; -import io.airlift.units.Duration; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Map; - -import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; -import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; -import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; -import static java.util.concurrent.TimeUnit.SECONDS; - -public class TestS3SecurityMappingConfig -{ - @Test - public void testDefaults() - { - assertRecordedDefaults(recordDefaults(S3SecurityMappingConfig.class) - .setJsonPointer("") - .setConfigFilePath(null) - .setRoleCredentialName(null) - .setKmsKeyIdCredentialName(null) - .setRefreshPeriod(null) - .setColonReplacement(null)); - } - - @Test - public void testExplicitPropertyMappingsWithFile() - throws IOException - { - Path securityMappingConfigFile = Files.createTempFile(null, null); - - Map properties = ImmutableMap.builder() - .put("hive.s3.security-mapping.config-file", securityMappingConfigFile.toString()) - .put("hive.s3.security-mapping.json-pointer", "/data") - .put("hive.s3.security-mapping.iam-role-credential-name", "iam-role-credential-name") - .put("hive.s3.security-mapping.kms-key-id-credential-name", "kms-key-id-credential-name") - .put("hive.s3.security-mapping.refresh-period", "1s") - .put("hive.s3.security-mapping.colon-replacement", "#") - .buildOrThrow(); - - S3SecurityMappingConfig expected = new S3SecurityMappingConfig() - .setConfigFilePath(securityMappingConfigFile.toString()) - .setJsonPointer("/data") - .setRoleCredentialName("iam-role-credential-name") - .setKmsKeyIdCredentialName("kms-key-id-credential-name") - .setRefreshPeriod(new Duration(1, SECONDS)) - .setColonReplacement("#"); - - assertFullMapping(properties, expected); - } - - @Test - public void testExplicitPropertyMappingsWithUrl() - { - Map properties = ImmutableMap.builder() - .put("hive.s3.security-mapping.config-file", "http://test:1234/example") - .put("hive.s3.security-mapping.json-pointer", "/data") - .put("hive.s3.security-mapping.iam-role-credential-name", "iam-role-credential-name") - .put("hive.s3.security-mapping.kms-key-id-credential-name", "kms-key-id-credential-name") - .put("hive.s3.security-mapping.refresh-period", "1s") - .put("hive.s3.security-mapping.colon-replacement", "#") - .buildOrThrow(); - - S3SecurityMappingConfig expected = new S3SecurityMappingConfig() - .setConfigFilePath("http://test:1234/example") - .setJsonPointer("/data") - .setRoleCredentialName("iam-role-credential-name") - .setKmsKeyIdCredentialName("kms-key-id-credential-name") - .setRefreshPeriod(new Duration(1, SECONDS)) - .setColonReplacement("#"); - - assertFullMapping(properties, expected); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingsParser.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingsParser.java deleted file mode 100644 index c94108c4502f..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestS3SecurityMappingsParser.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import io.trino.spi.security.ConnectorIdentity; -import org.junit.jupiter.api.Test; - -import java.net.URI; -import java.util.Optional; - -import static org.assertj.core.api.Assertions.assertThat; - -public class TestS3SecurityMappingsParser -{ - @Test - public void testParse() - { - S3SecurityMappingConfig conf = new S3SecurityMappingConfig() - .setJsonPointer("/data"); - - S3SecurityMappingsParser provider = new S3SecurityMappingsParser(conf); - S3SecurityMappings mappings = - provider.parseJSONString("{\"data\": {\"mappings\": [{\"iamRole\":\"arn:aws:iam::test\",\"user\":\"test\"}]}, \"time\": \"30s\"}"); - - Optional mapping = mappings.getMapping(ConnectorIdentity.ofUser("test"), URI.create("http://trino")); - assertThat(mapping).isPresent(); - } - - @Test - public void testParseDefault() - { - S3SecurityMappingConfig conf = new S3SecurityMappingConfig(); - - S3SecurityMappingsParser provider = new S3SecurityMappingsParser(conf); - S3SecurityMappings mappings = - provider.parseJSONString("{\"mappings\": [{\"iamRole\":\"arn:aws:iam::test\",\"user\":\"test\"}]}"); - - Optional mapping = mappings.getMapping(ConnectorIdentity.ofUser("test"), URI.create("http://trino")); - assertThat(mapping).isPresent(); - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystem.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystem.java deleted file mode 100644 index ea416427e50a..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystem.java +++ /dev/null @@ -1,1051 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.AmazonWebServiceClient; -import com.amazonaws.ClientConfiguration; -import com.amazonaws.Protocol; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.auth.WebIdentityTokenCredentialsProvider; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.AmazonS3EncryptionClient; -import com.amazonaws.services.s3.S3ClientOptions; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CannedAccessControlList; -import com.amazonaws.services.s3.model.EncryptionMaterials; -import com.amazonaws.services.s3.model.EncryptionMaterialsProvider; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.StorageClass; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.securitytoken.AWSSecurityTokenService; -import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClient; -import com.google.common.base.VerifyException; -import com.google.common.collect.ImmutableList; -import com.google.common.util.concurrent.ListenableFuture; -import io.trino.hdfs.s3.TrinoS3FileSystem.UnrecoverableS3OperationException; -import io.trino.memory.context.AggregatedMemoryContext; -import io.trino.memory.context.MemoryReservationHandler; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.junit.jupiter.api.Test; - -import javax.crypto.spec.SecretKeySpec; - -import java.io.ByteArrayInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.SequenceInputStream; -import java.lang.reflect.Field; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.List; -import java.util.Map; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.io.MoreFiles.deleteRecursively; -import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.NO_SUCH_BUCKET_ERROR_CODE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.NO_SUCH_KEY_ERROR_CODE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACCESS_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ACL_TYPE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_CREDENTIALS_PROVIDER; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_ENDPOINT; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_EXTERNAL_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_IAM_ROLE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_KMS_KEY_ID; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_BACKOFF_TIME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_CLIENT_RETRIES; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_MAX_RETRY_TIME; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PATH_STYLE_ACCESS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_REGION; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SECRET_KEY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SESSION_TOKEN; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_SKIP_GLACIER_OBJECTS; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STAGING_DIRECTORY; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STREAMING_UPLOAD_ENABLED; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_STREAMING_UPLOAD_PART_SIZE; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_USER_AGENT_PREFIX; -import static io.trino.hdfs.s3.TrinoS3FileSystem.S3_USE_WEB_IDENTITY_TOKEN_CREDENTIALS_PROVIDER; -import static io.trino.memory.context.AggregatedMemoryContext.newRootAggregatedMemoryContext; -import static java.net.HttpURLConnection.HTTP_FORBIDDEN; -import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; -import static java.net.HttpURLConnection.HTTP_NOT_FOUND; -import static java.nio.charset.StandardCharsets.US_ASCII; -import static java.nio.file.Files.createTempDirectory; -import static java.nio.file.Files.createTempFile; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assumptions.abort; - -public class TestTrinoS3FileSystem -{ - private static final int HTTP_RANGE_NOT_SATISFIABLE = 416; - private static final String S3_DIRECTORY_OBJECT_CONTENT_TYPE = "application/x-directory; charset=UTF-8"; - - @Test - public void testEmbeddedCredentials() - throws Exception - { - Configuration config = new Configuration(false); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - AWSCredentials credentials = getStaticCredentials(config, fs, "s3n://testAccess:testSecret@test-bucket/"); - assertThat(credentials.getAWSAccessKeyId()).isEqualTo("testAccess"); - assertThat(credentials.getAWSSecretKey()).isEqualTo("testSecret"); - assertThat(credentials).isNotInstanceOf(AWSSessionCredentials.class); - } - } - - @Test - public void testStaticCredentials() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_ACCESS_KEY, "test_access_key"); - config.set(S3_SECRET_KEY, "test_secret_key"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - AWSCredentials credentials = getStaticCredentials(config, fs, "s3n://test-bucket/"); - assertThat(credentials.getAWSAccessKeyId()).isEqualTo("test_access_key"); - assertThat(credentials.getAWSSecretKey()).isEqualTo("test_secret_key"); - assertThat(credentials).isNotInstanceOf(AWSSessionCredentials.class); - } - - config.set(S3_SESSION_TOKEN, "test_token"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - AWSCredentials credentials = getStaticCredentials(config, fs, "s3n://test-bucket/"); - assertThat(credentials.getAWSAccessKeyId()).isEqualTo("test_access_key"); - assertThat(credentials.getAWSSecretKey()).isEqualTo("test_secret_key"); - assertThat(credentials).isInstanceOfSatisfying(AWSSessionCredentials.class, sessionCredentials -> - assertThat(sessionCredentials.getSessionToken()).isEqualTo("test_token")); - } - } - - private static AWSCredentials getStaticCredentials(Configuration config, TrinoS3FileSystem fileSystem, String uri) - throws IOException, URISyntaxException - { - fileSystem.initialize(new URI(uri), config); - AWSCredentialsProvider awsCredentialsProvider = getAwsCredentialsProvider(fileSystem); - assertThat(awsCredentialsProvider).isInstanceOf(AWSStaticCredentialsProvider.class); - return awsCredentialsProvider.getCredentials(); - } - - @Test - public void testEndpointWithPinToCurrentRegionConfiguration() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_ENDPOINT, "test.example.endpoint.com"); - config.set(S3_PIN_CLIENT_TO_CURRENT_REGION, "true"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - assertThatThrownBy(() -> fs.initialize(new URI("s3a://test-bucket/"), config)) - .isInstanceOf(VerifyException.class) - .hasMessage("Invalid configuration: either endpoint can be set or S3 client can be pinned to the current region"); - } - } - - @Test - public void testEndpointWithExplicitRegionConfiguration() - throws Exception - { - Configuration config = new Configuration(false); - - // Only endpoint set - config.set(S3_ENDPOINT, "test.example.endpoint.com"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3a://test-bucket/"), config); - assertThat(((AmazonS3Client) fs.getS3Client()).getSignerRegionOverride()).isNull(); - } - - // Endpoint and region set - config.set(S3_ENDPOINT, "test.example.endpoint.com"); - config.set(S3_REGION, "region1"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3a://test-bucket/"), config); - assertThat(((AmazonS3Client) fs.getS3Client()).getSignerRegionOverride()).isEqualTo("region1"); - } - - // Only region set - config.set(S3_REGION, "region1"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3a://test-bucket/"), config); - assertThat(((AmazonS3Client) fs.getS3Client()).getSignerRegionOverride()).isEqualTo("region1"); - } - } - - @Test - public void testAssumeRoleDefaultCredentials() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_IAM_ROLE, "test_role"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - AWSCredentialsProvider tokenService = getStsCredentialsProvider(fs, "test_role"); - assertThat(tokenService).isInstanceOf(DefaultAWSCredentialsProviderChain.class); - } - } - - @Test - public void testAssumeRoleStaticCredentials() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_ACCESS_KEY, "test_access_key"); - config.set(S3_SECRET_KEY, "test_secret_key"); - config.set(S3_IAM_ROLE, "test_role"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - AWSCredentialsProvider tokenService = getStsCredentialsProvider(fs, "test_role"); - assertThat(tokenService).isInstanceOf(AWSStaticCredentialsProvider.class); - - AWSCredentials credentials = tokenService.getCredentials(); - assertThat(credentials.getAWSAccessKeyId()).isEqualTo("test_access_key"); - assertThat(credentials.getAWSSecretKey()).isEqualTo("test_secret_key"); - } - } - - private static AWSCredentialsProvider getStsCredentialsProvider(TrinoS3FileSystem fs, String expectedRole) - { - AWSCredentialsProvider awsCredentialsProvider = getAwsCredentialsProvider(fs); - assertThat(awsCredentialsProvider).isInstanceOf(STSAssumeRoleSessionCredentialsProvider.class); - - assertThat(getFieldValue(awsCredentialsProvider, "roleArn", String.class)).isEqualTo(expectedRole); - - AWSSecurityTokenService tokenService = getFieldValue(awsCredentialsProvider, "securityTokenService", AWSSecurityTokenService.class); - assertThat(tokenService).isInstanceOf(AWSSecurityTokenServiceClient.class); - return getFieldValue(tokenService, "awsCredentialsProvider", AWSCredentialsProvider.class); - } - - @Test - public void testAssumeRoleCredentialsWithExternalId() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_IAM_ROLE, "role"); - config.set(S3_EXTERNAL_ID, "externalId"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - AWSCredentialsProvider awsCredentialsProvider = getAwsCredentialsProvider(fs); - assertThat(awsCredentialsProvider).isInstanceOf(STSAssumeRoleSessionCredentialsProvider.class); - assertThat(getFieldValue(awsCredentialsProvider, "roleArn", String.class)).isEqualTo("role"); - assertThat(getFieldValue(awsCredentialsProvider, "roleExternalId", String.class)).isEqualTo("externalId"); - } - } - - @Test - public void testDefaultCredentials() - throws Exception - { - Configuration config = new Configuration(false); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - assertThat(getAwsCredentialsProvider(fs)).isInstanceOf(DefaultAWSCredentialsProviderChain.class); - } - } - - @Test - public void testPathStyleAccess() - throws Exception - { - Configuration config = new Configuration(false); - config.setBoolean(S3_PATH_STYLE_ACCESS, true); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - S3ClientOptions clientOptions = getFieldValue(fs.getS3Client(), AmazonS3Client.class, "clientOptions", S3ClientOptions.class); - assertThat(clientOptions.isPathStyleAccess()).isTrue(); - } - } - - @Test - public void testUnderscoreBucket() - throws Exception - { - Configuration config = new Configuration(false); - config.setBoolean(S3_PATH_STYLE_ACCESS, true); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - String expectedBucketName = "test-bucket_underscore"; - URI uri = new URI("s3n://" + expectedBucketName + "/"); - assertThat(fs.getBucketName(uri)).isEqualTo(expectedBucketName); - fs.initialize(uri, config); - fs.setS3Client(s3); - fs.getS3ObjectMetadata(new Path("/test/path")); - assertThat(expectedBucketName).isEqualTo(s3.getGetObjectMetadataRequest().getBucketName()); - } - } - - @SuppressWarnings({"ResultOfMethodCallIgnored", "OverlyStrongTypeCast", "ConstantConditions"}) - @Test - public void testReadRetryCounters() - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - int maxRetries = 2; - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectHttpErrorCode(HTTP_INTERNAL_ERROR); - Configuration configuration = new Configuration(false); - configuration.set(S3_MAX_BACKOFF_TIME, "1ms"); - configuration.set(S3_MAX_RETRY_TIME, "5s"); - configuration.setInt(S3_MAX_CLIENT_RETRIES, maxRetries); - fs.initialize(new URI("s3n://test-bucket/"), configuration); - fs.setS3Client(s3); - try (FSDataInputStream inputStream = fs.open(new Path("s3n://test-bucket/test"))) { - inputStream.read(); - } - catch (Throwable expected) { - assertThat(expected).isInstanceOf(AmazonS3Exception.class); - assertThat(((AmazonS3Exception) expected).getStatusCode()).isEqualTo(HTTP_INTERNAL_ERROR); - assertThat(TrinoS3FileSystem.getFileSystemStats().getReadRetries().getTotalCount()).isEqualTo(maxRetries); - assertThat(TrinoS3FileSystem.getFileSystemStats().getGetObjectRetries().getTotalCount()).isEqualTo((maxRetries + 1L) * maxRetries); - } - } - } - - @SuppressWarnings({"OverlyStrongTypeCast", "ConstantConditions"}) - @Test - public void testGetMetadataRetryCounter() - { - int maxRetries = 2; - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectMetadataHttpCode(HTTP_INTERNAL_ERROR); - Configuration configuration = new Configuration(false); - configuration.set(S3_MAX_BACKOFF_TIME, "1ms"); - configuration.set(S3_MAX_RETRY_TIME, "5s"); - configuration.setInt(S3_MAX_CLIENT_RETRIES, maxRetries); - fs.initialize(new URI("s3n://test-bucket/"), configuration); - fs.setS3Client(s3); - fs.getS3ObjectMetadata(new Path("s3n://test-bucket/test")); - } - catch (Throwable expected) { - assertThat(expected).isInstanceOf(AmazonS3Exception.class); - assertThat(((AmazonS3Exception) expected).getStatusCode()).isEqualTo(HTTP_INTERNAL_ERROR); - assertThat(TrinoS3FileSystem.getFileSystemStats().getGetMetadataRetries().getTotalCount()).isEqualTo(maxRetries); - } - } - - @Test - public void testReadNotFound() - throws Exception - { - testReadObject(IOException.class, HTTP_NOT_FOUND, null); - } - - @Test - public void testNoSuchKeyFound() - throws Exception - { - testReadObject(FileNotFoundException.class, HTTP_NOT_FOUND, NO_SUCH_KEY_ERROR_CODE); - } - - @Test - public void testNoSuchBucketFound() - throws Exception - { - testReadObject(FileNotFoundException.class, HTTP_NOT_FOUND, NO_SUCH_BUCKET_ERROR_CODE); - } - - @Test - public void testReadForbidden() - throws Exception - { - testReadObject(IOException.class, HTTP_FORBIDDEN, null); - } - - @SuppressWarnings("ResultOfMethodCallIgnored") - private static void testReadObject(Class exceptionClass, int httpErrorCode, String s3ErrorCode) - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectHttpErrorCode(httpErrorCode); - s3.setGetObjectS3ErrorCode(s3ErrorCode); - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - fs.setS3Client(s3); - try (FSDataInputStream inputStream = fs.open(new Path("s3n://test-bucket/test"))) { - assertThatThrownBy(inputStream::read) - .isInstanceOf(exceptionClass) - .hasMessageContaining("Failing getObject call with status code:" + httpErrorCode + "; error code:" + s3ErrorCode); - } - } - } - - @Test - public void testCreateWithNonexistentStagingDirectory() - throws Exception - { - java.nio.file.Path stagingParent = createTempDirectory("test"); - java.nio.file.Path staging = java.nio.file.Path.of(stagingParent.toString(), "staging"); - // stagingParent = /tmp/testXXX - // staging = /tmp/testXXX/staging - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - Configuration conf = new Configuration(false); - conf.set(S3_STAGING_DIRECTORY, staging.toString()); - conf.set(S3_STREAMING_UPLOAD_ENABLED, "false"); - fs.initialize(new URI("s3n://test-bucket/"), conf); - fs.setS3Client(s3); - FSDataOutputStream stream = fs.create(new Path("s3n://test-bucket/test")); - stream.close(); - assertThat(Files.exists(staging)).isTrue(); - } - finally { - deleteRecursively(stagingParent, ALLOW_INSECURE); - } - } - - @Test - public void testCreateWithStagingDirectoryFile() - throws Exception - { - java.nio.file.Path staging = createTempFile("staging", null); - // staging = /tmp/stagingXXX.tmp - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - Configuration conf = new Configuration(false); - conf.set(S3_STAGING_DIRECTORY, staging.toString()); - conf.set(S3_STREAMING_UPLOAD_ENABLED, "false"); - fs.initialize(new URI("s3n://test-bucket/"), conf); - fs.setS3Client(s3); - assertThatThrownBy(() -> fs.create(new Path("s3n://test-bucket/test"))) - .isInstanceOf(IOException.class) - .hasMessageStartingWith("Configured staging path is not a directory:"); - } - finally { - Files.deleteIfExists(staging); - } - } - - @Test - public void testCreateWithStagingDirectorySymlink() - throws Exception - { - java.nio.file.Path staging = createTempDirectory("staging"); - java.nio.file.Path link = java.nio.file.Path.of(staging + ".symlink"); - // staging = /tmp/stagingXXX - // link = /tmp/stagingXXX.symlink -> /tmp/stagingXXX - - try { - try { - Files.createSymbolicLink(link, staging); - } - catch (UnsupportedOperationException e) { - abort("Filesystem does not support symlinks"); - } - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - Configuration conf = new Configuration(false); - conf.set(S3_STAGING_DIRECTORY, link.toString()); - fs.initialize(new URI("s3n://test-bucket/"), conf); - fs.setS3Client(s3); - FSDataOutputStream stream = fs.create(new Path("s3n://test-bucket/test")); - stream.close(); - assertThat(Files.exists(link)).isTrue(); - } - } - finally { - deleteRecursively(link, ALLOW_INSECURE); - deleteRecursively(staging, ALLOW_INSECURE); - } - } - - @Test - public void testReadRequestRangeNotSatisfiable() - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectHttpErrorCode(HTTP_RANGE_NOT_SATISFIABLE); - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - fs.setS3Client(s3); - try (FSDataInputStream inputStream = fs.open(new Path("s3n://test-bucket/test"))) { - assertThat(inputStream.read()).isEqualTo(-1); - } - } - } - - @Test - public void testGetMetadataForbidden() - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectMetadataHttpCode(HTTP_FORBIDDEN); - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - fs.setS3Client(s3); - assertThatThrownBy(() -> fs.getS3ObjectMetadata(new Path("s3n://test-bucket/test"))) - .isInstanceOf(IOException.class) - .hasMessageContaining("Failing getObjectMetadata call with " + HTTP_FORBIDDEN); - } - } - - @Test - public void testGetMetadataNotFound() - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setGetObjectMetadataHttpCode(HTTP_NOT_FOUND); - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - fs.setS3Client(s3); - assertThat(fs.getS3ObjectMetadata(new Path("s3n://test-bucket/test"))).isNull(); - } - } - - @Test - public void testEncryptionMaterialsProvider() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_ENCRYPTION_MATERIALS_PROVIDER, TestEncryptionMaterialsProvider.class.getName()); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - assertThat(fs.getS3Client()).isInstanceOf(AmazonS3EncryptionClient.class); - } - } - - @Test - public void testKMSEncryptionMaterialsProvider() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_KMS_KEY_ID, "test-key-id"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - assertThat(fs.getS3Client()).isInstanceOf(AmazonS3EncryptionClient.class); - } - } - - @Test - public void testUnrecoverableS3ExceptionMessage() - { - assertThat(new UnrecoverableS3OperationException("my-bucket", "tmp/test/path", new IOException("test io exception"))) - .hasMessage("java.io.IOException: test io exception (Bucket: my-bucket, Key: tmp/test/path)"); - } - - @Test - public void testWebIdentityTokenCredentialsProvider() - throws Exception - { - Configuration config = new Configuration(false); - config.setBoolean(S3_USE_WEB_IDENTITY_TOKEN_CREDENTIALS_PROVIDER, true); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - assertThat(getAwsCredentialsProvider(fs)).isInstanceOf(WebIdentityTokenCredentialsProvider.class); - } - } - - @Test - public void testCustomCredentialsProvider() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_CREDENTIALS_PROVIDER, TestCredentialsProvider.class.getName()); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - assertThat(getAwsCredentialsProvider(fs)).isInstanceOf(TestCredentialsProvider.class); - } - } - - @Test - public void testCustomCredentialsClassCannotBeFound() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_CREDENTIALS_PROVIDER, "com.example.DoesNotExist"); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - assertThatThrownBy(() -> fs.initialize(new URI("s3n://test-bucket/"), config)) - .isInstanceOf(RuntimeException.class) - .hasMessage("Error creating an instance of com.example.DoesNotExist for URI s3n://test-bucket/") - .cause() - .isInstanceOf(ClassNotFoundException.class) - .hasMessage("Class com.example.DoesNotExist not found"); - } - } - - @Test - public void testUserAgentPrefix() - throws Exception - { - String userAgentPrefix = "agent_prefix"; - Configuration config = new Configuration(false); - config.set(S3_USER_AGENT_PREFIX, userAgentPrefix); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), config); - ClientConfiguration clientConfig = getFieldValue(fs.getS3Client(), AmazonWebServiceClient.class, "clientConfiguration", ClientConfiguration.class); - assertThat(clientConfig.getUserAgentSuffix()).isEqualTo("Trino"); - assertThat(clientConfig.getUserAgentPrefix()).isEqualTo(userAgentPrefix); - } - } - - @Test - public void testDefaultS3ClientConfiguration() - throws Exception - { - HiveS3Config defaults = new HiveS3Config(); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - ClientConfiguration config = getFieldValue(fs.getS3Client(), AmazonWebServiceClient.class, "clientConfiguration", ClientConfiguration.class); - assertThat(config.getMaxErrorRetry()).isEqualTo(defaults.getS3MaxErrorRetries()); - assertThat(config.getConnectionTimeout()).isEqualTo(defaults.getS3ConnectTimeout().toMillis()); - assertThat(config.getSocketTimeout()).isEqualTo(defaults.getS3SocketTimeout().toMillis()); - assertThat(config.getMaxConnections()).isEqualTo(defaults.getS3MaxConnections()); - assertThat(config.getUserAgentSuffix()).isEqualTo("Trino"); - assertThat(config.getUserAgentPrefix()).isEqualTo(""); - } - } - - @Test - public void testSkipGlacierObjectsEnabled() - throws Exception - { - assertSkipGlacierObjects(true); - assertSkipGlacierObjects(false); - } - - @Test - public void testProxyDefaultsS3ClientConfiguration() - throws Exception - { - HiveS3Config hiveS3Config = new HiveS3Config(); - - TrinoS3ConfigurationInitializer configurationInitializer = new TrinoS3ConfigurationInitializer(hiveS3Config); - Configuration trinoFsConfiguration = new Configuration(false); - configurationInitializer.initializeConfiguration(trinoFsConfiguration); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), trinoFsConfiguration); - ClientConfiguration config = getFieldValue(fs.getS3Client(), AmazonWebServiceClient.class, "clientConfiguration", ClientConfiguration.class); - assertThat(config.getProxyHost()).isNull(); - assertThat(config.getProxyPort()).isEqualTo(-1); - assertThat(config.getProxyProtocol()).isEqualTo(Protocol.HTTP); - assertThat(config.getNonProxyHosts()).isEqualTo(System.getProperty("http.nonProxyHosts")); - assertThat(config.getProxyUsername()).isNull(); - assertThat(config.getProxyPassword()).isNull(); - assertThat(config.isPreemptiveBasicProxyAuth()).isFalse(); - } - } - - @Test - public void testOnNoHostProxyDefaultsS3ClientConfiguration() - throws Exception - { - HiveS3Config hiveS3Config = new HiveS3Config(); - hiveS3Config.setS3ProxyHost(null); - hiveS3Config.setS3ProxyPort(40000); - hiveS3Config.setS3ProxyProtocol("https"); - hiveS3Config.setS3NonProxyHosts(ImmutableList.of("firsthost.com", "secondhost.com")); - hiveS3Config.setS3ProxyUsername("dummy_username"); - hiveS3Config.setS3ProxyPassword("dummy_password"); - hiveS3Config.setS3PreemptiveBasicProxyAuth(true); - - TrinoS3ConfigurationInitializer configurationInitializer = new TrinoS3ConfigurationInitializer(hiveS3Config); - Configuration trinoFsConfiguration = new Configuration(false); - configurationInitializer.initializeConfiguration(trinoFsConfiguration); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), trinoFsConfiguration); - ClientConfiguration config = getFieldValue(fs.getS3Client(), AmazonWebServiceClient.class, "clientConfiguration", ClientConfiguration.class); - assertThat(config.getProxyHost()).isNull(); - assertThat(config.getProxyPort()).isEqualTo(-1); - assertThat(config.getProxyProtocol()).isEqualTo(Protocol.HTTP); - assertThat(config.getNonProxyHosts()).isEqualTo(System.getProperty("http.nonProxyHosts")); - assertThat(config.getProxyUsername()).isNull(); - assertThat(config.getProxyPassword()).isNull(); - assertThat(config.isPreemptiveBasicProxyAuth()).isFalse(); - } - } - - @Test - public void testExplicitProxyS3ClientConfiguration() - throws Exception - { - HiveS3Config hiveS3Config = new HiveS3Config(); - hiveS3Config.setS3ProxyHost("dummy.com"); - hiveS3Config.setS3ProxyPort(40000); - hiveS3Config.setS3ProxyProtocol("https"); - hiveS3Config.setS3NonProxyHosts(ImmutableList.of("firsthost.com", "secondhost.com")); - hiveS3Config.setS3ProxyUsername("dummy_username"); - hiveS3Config.setS3ProxyPassword("dummy_password"); - hiveS3Config.setS3PreemptiveBasicProxyAuth(true); - - TrinoS3ConfigurationInitializer configurationInitializer = new TrinoS3ConfigurationInitializer(hiveS3Config); - Configuration trinoFsConfiguration = new Configuration(false); - configurationInitializer.initializeConfiguration(trinoFsConfiguration); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI("s3n://test-bucket/"), trinoFsConfiguration); - ClientConfiguration config = getFieldValue(fs.getS3Client(), AmazonWebServiceClient.class, "clientConfiguration", ClientConfiguration.class); - assertThat(config.getProxyHost()).isEqualTo("dummy.com"); - assertThat(config.getProxyPort()).isEqualTo(40000); - assertThat(config.getProxyProtocol()).isEqualTo(Protocol.HTTPS); - assertThat(config.getNonProxyHosts()).isEqualTo("firsthost.com|secondhost.com"); - assertThat(config.getProxyUsername()).isEqualTo("dummy_username"); - assertThat(config.getProxyPassword()).isEqualTo("dummy_password"); - assertThat(config.isPreemptiveBasicProxyAuth()).isTrue(); - } - } - - private static void assertSkipGlacierObjects(boolean skipGlacierObjects) - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_SKIP_GLACIER_OBJECTS, String.valueOf(skipGlacierObjects)); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setHasGlacierObjects(true); - fs.initialize(new URI("s3n://test-bucket/"), config); - fs.setS3Client(s3); - FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test")); - assertThat(statuses.length).isEqualTo(skipGlacierObjects ? 2 : 4); - } - } - - @Test - public void testSkipHadoopFolderMarkerObjectsEnabled() - throws Exception - { - Configuration config = new Configuration(false); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - s3.setHasHadoopFolderMarkerObjects(true); - fs.initialize(new URI("s3n://test-bucket/"), config); - fs.setS3Client(s3); - FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test")); - assertThat(statuses.length).isEqualTo(2); - } - } - - public static AWSCredentialsProvider getAwsCredentialsProvider(TrinoS3FileSystem fs) - { - return getFieldValue(fs.getS3Client(), "awsCredentialsProvider", AWSCredentialsProvider.class); - } - - private static T getFieldValue(Object instance, String name, Class type) - { - return getFieldValue(instance, instance.getClass(), name, type); - } - - @SuppressWarnings("unchecked") - private static T getFieldValue(Object instance, Class clazz, String name, Class type) - { - try { - Field field = clazz.getDeclaredField(name); - checkArgument(field.getType() == type, "expected %s but found %s", type, field.getType()); - field.setAccessible(true); - return (T) field.get(instance); - } - catch (ReflectiveOperationException e) { - throw new RuntimeException(e); - } - } - - private static class TestEncryptionMaterialsProvider - implements EncryptionMaterialsProvider - { - private final EncryptionMaterials encryptionMaterials; - - public TestEncryptionMaterialsProvider() - { - encryptionMaterials = new EncryptionMaterials(new SecretKeySpec(new byte[] {1, 2, 3}, "AES")); - } - - @Override - public void refresh() {} - - @Override - public EncryptionMaterials getEncryptionMaterials(Map materialsDescription) - { - return encryptionMaterials; - } - - @Override - public EncryptionMaterials getEncryptionMaterials() - { - return encryptionMaterials; - } - } - - private static class TestCredentialsProvider - implements AWSCredentialsProvider - { - @SuppressWarnings("UnusedParameters") - public TestCredentialsProvider(URI uri, Configuration conf) {} - - @Override - public AWSCredentials getCredentials() - { - return null; - } - - @Override - public void refresh() {} - } - - @Test - public void testDefaultAcl() - throws Exception - { - Configuration config = new Configuration(false); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - String expectedBucketName = "test-bucket"; - fs.initialize(new URI("s3n://" + expectedBucketName + "/"), config); - fs.setS3Client(s3); - try (FSDataOutputStream stream = fs.create(new Path("s3n://test-bucket/test"))) { - // initiate an upload by creating a stream & closing it immediately - } - assertThat(CannedAccessControlList.Private).isEqualTo(s3.getAcl()); - } - } - - @Test - public void testFullBucketOwnerControlAcl() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_ACL_TYPE, "BUCKET_OWNER_FULL_CONTROL"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - String expectedBucketName = "test-bucket"; - fs.initialize(new URI("s3n://" + expectedBucketName + "/"), config); - fs.setS3Client(s3); - try (FSDataOutputStream stream = fs.create(new Path("s3n://test-bucket/test"))) { - // initiate an upload by creating a stream & closing it immediately - } - assertThat(CannedAccessControlList.BucketOwnerFullControl).isEqualTo(s3.getAcl()); - } - } - - @Test - public void testStreamingUpload() - throws Exception - { - Configuration config = new Configuration(false); - config.set(S3_STREAMING_UPLOAD_ENABLED, "true"); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - String expectedBucketName = "test-bucket"; - config.set(S3_STREAMING_UPLOAD_PART_SIZE, "128"); - fs.initialize(new URI("s3n://" + expectedBucketName + "/"), config); - fs.setS3Client(s3); - String objectKey = "test"; - try (FSDataOutputStream stream = fs.create(new Path("s3n://test-bucket/" + objectKey))) { - stream.write('a'); - stream.write("foo".repeat(21).getBytes(US_ASCII)); // 63 bytes = "foo" * 21 - stream.write("bar".repeat(44).getBytes(US_ASCII)); // 132 bytes = "bar" * 44 - stream.write("orange".repeat(25).getBytes(US_ASCII), 6, 132); // 132 bytes = "orange" * 22 - } - - List parts = s3.getUploadParts(); - assertThat(parts).size().isEqualTo(3); - - InputStream concatInputStream = parts.stream() - .map(UploadPartRequest::getInputStream) - .reduce(new ByteArrayInputStream(new byte[0]), SequenceInputStream::new); - String data = new String(concatInputStream.readAllBytes(), US_ASCII); - assertThat(data).isEqualTo("a" + "foo".repeat(21) + "bar".repeat(44) + "orange".repeat(22)); - } - } - - @Test - public void testEmptyDirectory() - throws Exception - { - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3() - { - @Override - public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) - { - if (getObjectMetadataRequest.getKey().equals("empty-dir/")) { - ObjectMetadata objectMetadata = new ObjectMetadata(); - objectMetadata.setContentType(S3_DIRECTORY_OBJECT_CONTENT_TYPE); - return objectMetadata; - } - return super.getObjectMetadata(getObjectMetadataRequest); - } - }; - fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false)); - fs.setS3Client(s3); - - FileStatus fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir/")); - assertThat(fileStatus.isDirectory()).isTrue(); - - fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir")); - assertThat(fileStatus.isDirectory()).isTrue(); - } - } - - @Test - public void testListPrefixModes() - throws Exception - { - S3ObjectSummary rootObject = new S3ObjectSummary(); - rootObject.setStorageClass(StorageClass.Standard.toString()); - rootObject.setKey("standard-object-at-root.txt"); - rootObject.setLastModified(new Date()); - - S3ObjectSummary childObject = new S3ObjectSummary(); - childObject.setStorageClass(StorageClass.Standard.toString()); - childObject.setKey("prefix/child-object.txt"); - childObject.setLastModified(new Date()); - - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3() - { - @Override - public ListObjectsV2Result listObjectsV2(ListObjectsV2Request listObjectsV2Request) - { - ListObjectsV2Result listing = new ListObjectsV2Result(); - // Shallow listing - if ("/".equals(listObjectsV2Request.getDelimiter())) { - listing.getCommonPrefixes().add("prefix"); - listing.getObjectSummaries().add(rootObject); - return listing; - } - // Recursive listing of object keys only - listing.getObjectSummaries().addAll(Arrays.asList(childObject, rootObject)); - return listing; - } - }; - Path rootPath = new Path("s3n://test-bucket/"); - fs.initialize(rootPath.toUri(), new Configuration(false)); - fs.setS3Client(s3); - - List shallowAll = remoteIteratorToList(fs.listLocatedStatus(rootPath)); - assertThat(shallowAll).hasSize(2); - assertThat(shallowAll.get(0).isDirectory()).isTrue(); - assertThat(shallowAll.get(1).isDirectory()).isFalse(); - assertThat(shallowAll.get(0).getPath()).isEqualTo(new Path(rootPath, "prefix")); - assertThat(shallowAll.get(1).getPath()).isEqualTo(new Path(rootPath, rootObject.getKey())); - - List shallowFiles = remoteIteratorToList(fs.listFiles(rootPath, false)); - assertThat(shallowFiles).hasSize(1); - assertThat(shallowFiles.get(0).isDirectory()).isFalse(); - assertThat(shallowFiles.get(0).getPath()).isEqualTo(new Path(rootPath, rootObject.getKey())); - - List recursiveFiles = remoteIteratorToList(fs.listFiles(rootPath, true)); - assertThat(recursiveFiles).hasSize(2); - assertThat(recursiveFiles.get(0).isDirectory()).isFalse(); - assertThat(recursiveFiles.get(1).isDirectory()).isFalse(); - assertThat(recursiveFiles.get(0).getPath()).isEqualTo(new Path(rootPath, childObject.getKey())); - assertThat(recursiveFiles.get(1).getPath()).isEqualTo(new Path(rootPath, rootObject.getKey())); - } - } - - @Test - public void testThatTrinoS3FileSystemReportsConsumedMemory() - throws IOException - { - TestMemoryReservationHandler memoryReservationHandler = new TestMemoryReservationHandler(); - AggregatedMemoryContext memoryContext = newRootAggregatedMemoryContext(memoryReservationHandler, 1024 * 1000 * 1000); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - MockAmazonS3 s3 = new MockAmazonS3(); - Path rootPath = new Path("s3n://test-bucket/"); - fs.initialize(rootPath.toUri(), new Configuration(false)); - fs.setS3Client(s3); - OutputStream outputStream = fs.create(new Path("s3n://test-bucket/test1"), memoryContext); - outputStream.write(new byte[] {1, 2, 3, 4, 5, 6}, 0, 6); - outputStream.close(); - } - assertThat(memoryReservationHandler.getReserved()).isEqualTo(0); - assertThat(memoryReservationHandler.getMaxReserved()).isGreaterThan(0); - } - - private static List remoteIteratorToList(RemoteIterator statuses) - throws IOException - { - List result = new ArrayList<>(); - while (statuses.hasNext()) { - result.add(statuses.next()); - } - return result; - } - - private static class TestMemoryReservationHandler - implements MemoryReservationHandler - { - private long reserved; - private long maxReserved; - - @Override - public ListenableFuture reserveMemory(String allocationTag, long delta) - { - reserved += delta; - if (delta > maxReserved) { - maxReserved = delta; - } - return null; - } - - @Override - public boolean tryReserveMemory(String allocationTag, long delta) - { - reserved += delta; - if (delta > maxReserved) { - maxReserved = delta; - } - return true; - } - - public long getReserved() - { - return reserved; - } - - public long getMaxReserved() - { - return maxReserved; - } - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemAwsS3.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemAwsS3.java deleted file mode 100644 index 69b5e96bd53f..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemAwsS3.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import org.apache.hadoop.conf.Configuration; - -import static io.trino.testing.SystemEnvironmentUtils.requireEnv; - -/** - * Tests file system operations on AWS S3 storage. - *

- * Requires AWS credentials, which can be provided any way supported by the DefaultProviderChain - * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default - */ -public class TestTrinoS3FileSystemAwsS3 - extends AbstractTestTrinoS3FileSystem -{ - private final String bucketName; - private final String s3Endpoint; - - public TestTrinoS3FileSystemAwsS3() - { - bucketName = requireEnv("S3_BUCKET"); - s3Endpoint = requireEnv("S3_BUCKET_ENDPOINT"); - } - - @Override - protected String getBucketName() - { - return bucketName; - } - - @Override - protected Configuration s3Configuration() - { - Configuration config = new Configuration(false); - config.set("fs.s3.endpoint", s3Endpoint); - return config; - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemMinio.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemMinio.java deleted file mode 100644 index a14dcbe7108e..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestTrinoS3FileSystemMinio.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import com.amazonaws.services.s3.AmazonS3; -import io.trino.plugin.base.util.AutoCloseableCloser; -import io.trino.testing.containers.Minio; -import io.trino.testing.minio.MinioClient; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; - -import java.net.URI; - -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.containers.Minio.MINIO_ROOT_PASSWORD; -import static io.trino.testing.containers.Minio.MINIO_ROOT_USER; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; - -@TestInstance(PER_CLASS) -public class TestTrinoS3FileSystemMinio - extends AbstractTestTrinoS3FileSystem -{ - private final String bucketName = "test-bucket-" + randomNameSuffix(); - - private final Minio minio; - - private final MinioClient minioClient; - - public TestTrinoS3FileSystemMinio() - { - minio = Minio.builder().build(); - minio.start(); - - minioClient = minio.createMinioClient(); - minio.createBucket(bucketName); - } - - @AfterAll - public void tearDown() - throws Exception - { - try (AutoCloseableCloser closer = AutoCloseableCloser.create()) { - closer.register(minio); - closer.register(minioClient); - } - } - - @Override - protected String getBucketName() - { - return bucketName; - } - - @Override - protected Configuration s3Configuration() - { - Configuration config = new Configuration(false); - config.set("trino.s3.endpoint", minio.getMinioAddress()); - config.set("trino.s3.access-key", MINIO_ROOT_USER); - config.set("trino.s3.secret-key", MINIO_ROOT_PASSWORD); - config.set("trino.s3.path-style-access", "true"); - - return config; - } - - @Test - public void testDeleteNonRecursivelyEmptyBucketRoot() - throws Exception - { - String testBucketName = "trino-delete-bucket-root-empty" + randomNameSuffix(); - minioClient.makeBucket(testBucketName); - String testBucketPath = "s3://%s/".formatted(testBucketName); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI(testBucketPath), s3Configuration()); - - AmazonS3 s3 = fs.getS3Client(); - - assertThat(listPaths(s3, testBucketName, "", true)).isEmpty(); - - fs.delete(new Path(testBucketPath), false); - - assertThat(listPaths(s3, testBucketName, "", true)).isEmpty(); - } - } - - @Test - public void testDeleteNonRecursivelyNonEmptyBucketRoot() - throws Exception - { - String testBucketName = "trino-delete-bucket-root-non-empty" + randomNameSuffix(); - minioClient.makeBucket(testBucketName); - String testBucketPath = "s3://%s/".formatted(testBucketName); - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI(testBucketPath), s3Configuration()); - - AmazonS3 s3 = fs.getS3Client(); - fs.createNewFile(new Path("s3://%s/file1.txt".formatted(testBucketName))); - String directory2Path = testBucketPath + "directory2"; - createDirectory(fs.getS3Client(), testBucketName, "directory2"); - String filename2 = "file2.txt"; - fs.createNewFile(new Path(directory2Path, filename2)); - - assertThat(listPaths(s3, testBucketName, "", true)) - .containsOnly("file1.txt", "directory2/", "directory2/file2.txt"); - - assertThatThrownBy(() -> fs.delete(new Path(testBucketPath), false)) - .hasMessage("Directory %s is not empty".formatted(testBucketPath)); - - assertThat(listPaths(s3, testBucketName, "", true)) - .containsOnly("file1.txt", "directory2/", "directory2/file2.txt"); - } - } - - @Test - public void testDeleteRecursivelyBucketRoot() - throws Exception - { - String testBucketName = "trino-delete-recursive-bucket-root" + randomNameSuffix(); - minioClient.makeBucket(testBucketName); - String testBucketPath = "s3://" + testBucketName; - try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) { - fs.initialize(new URI(testBucketPath), s3Configuration()); - - AmazonS3 s3 = fs.getS3Client(); - fs.createNewFile(new Path("s3://%s/file1.txt".formatted(testBucketName))); - String directory2Path = testBucketPath + "/directory2"; - createDirectory(fs.getS3Client(), testBucketName, "directory2"); - fs.createNewFile(new Path(directory2Path, "file2.txt")); - - assertThat(listPaths(s3, testBucketName, "", true)) - .containsOnly("file1.txt", "directory2/", "directory2/file2.txt"); - - assertThat(fs.delete(new Path(testBucketPath + Path.SEPARATOR), true)).isTrue(); - - assertThat(listPaths(s3, testBucketName, "", true)).isEmpty(); - } - } -} diff --git a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestUriBasedS3SecurityMappingsProvider.java b/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestUriBasedS3SecurityMappingsProvider.java deleted file mode 100644 index 5a2b0282933e..000000000000 --- a/lib/trino-hdfs/src/test/java/io/trino/hdfs/s3/TestUriBasedS3SecurityMappingsProvider.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.hdfs.s3; - -import io.airlift.http.client.HttpStatus; -import io.airlift.http.client.Response; -import io.airlift.http.client.testing.TestingHttpClient; -import org.junit.jupiter.api.Test; - -import static com.google.common.net.MediaType.JSON_UTF_8; -import static io.airlift.http.client.testing.TestingResponse.mockResponse; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestUriBasedS3SecurityMappingsProvider -{ - private static final String MOCK_MAPPINGS_RESPONSE = - "{\"mappings\": [{\"iamRole\":\"arn:aws:iam::test\",\"user\":\"test\"}]}"; - - @Test - public void testGetRawJSON() - { - final Response response = mockResponse(HttpStatus.OK, JSON_UTF_8, MOCK_MAPPINGS_RESPONSE); - S3SecurityMappingConfig conf = new S3SecurityMappingConfig().setConfigFilePath("http://test:1234/api/endpoint"); - UriBasedS3SecurityMappingsProvider provider = new UriBasedS3SecurityMappingsProvider(conf, new TestingHttpClient(request -> response)); - String result = provider.getRawJsonString(); - assertThat(result).isEqualTo(MOCK_MAPPINGS_RESPONSE); - } -} diff --git a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-with-fallback-to-cluster-default.json b/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-with-fallback-to-cluster-default.json deleted file mode 100644 index 45b98ed39849..000000000000 --- a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-with-fallback-to-cluster-default.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "mappings": [ - { - "prefix": "s3://bar/abc", - "iamRole": "arn:aws:iam::123456789101:role/allow_path" - }, - { - "useClusterDefault": "true" - } - ] -} diff --git a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-without-fallback.json b/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-without-fallback.json deleted file mode 100644 index 6700f8b1dec0..000000000000 --- a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping-without-fallback.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "mappings": [ - { - "prefix": "s3://bar/abc", - "iamRole": "arn:aws:iam::123456789101:role/allow_path" - } - ] -} diff --git a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping.json b/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping.json deleted file mode 100644 index 36bc766cfd85..000000000000 --- a/lib/trino-hdfs/src/test/resources/io/trino/hdfs/s3/security-mapping.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "mappings": [ - { - "prefix": "s3://bar/abc", - "iamRole": "arn:aws:iam::123456789101:role/allow_path" - }, - { - "prefix": "s3://bar", - "allowedIamRoles": [ - "arn:aws:iam::123456789101:role/allow_bucket_1", - "arn:aws:iam::123456789101:role/allow_bucket_2", - "arn:aws:iam::123456789101:role/allow_bucket_3" - ] - }, - { - "prefix": "s3://xyz", - "iamRole": "arn:aws:iam::123456789101:role/allow_default", - "allowedIamRoles": [ - "arn:aws:iam::123456789101:role/allow_foo", - "arn:aws:iam::123456789101:role/allow_bar" - ] - }, - { - "prefix": "s3://foo", - "accessKey": "AKIAxxxaccess", - "secretKey": "iXbXxxxsecret", - "kmsKeyId": "kmsKey_10", - "allowedKmsKeyIds": ["kmsKey_11"] - }, - { - "prefix": "s3://foo_all_keys_allowed", - "accessKey": "AKIAxxxaccess", - "secretKey": "iXbXxxxsecret", - "kmsKeyId": "kmsKey_10", - "allowedKmsKeyIds": ["*"] - }, - { - "prefix": "s3://foo_no_default_key", - "accessKey": "AKIAxxxaccess", - "secretKey": "iXbXxxxsecret", - "allowedKmsKeyIds": ["kmsKey_11", "kmsKey_12"] - }, - { - "user": "alice", - "iamRole": "alice_role" - }, - { - "user": "bob|charlie", - "iamRole": "bob_and_charlie_role" - }, - { - "group": "finance", - "iamRole": "finance_role" - }, - { - "group": "hr|eng", - "iamRole": "hr_and_eng_group" - }, - { - "user": "danny", - "group": "hq", - "iamRole": "danny_hq_role" - }, - { - "prefix": "s3://endpointbucket", - "accessKey": "AKIAxxxaccess", - "secretKey": "iXbXxxxsecret", - "endpoint": "http://localhost:7753" - }, - { - "prefix": "s3://regionalbucket", - "accessKey": "AKIAxxxaccess", - "secretKey": "iXbXxxxsecret", - "region": "us-west-2" - }, - { - "prefix": "s3://somebucket", - "iamRole": "arn:aws:iam::1234567891012:role/default", - "roleSessionName": "iam-trino-session" - }, - { - "useClusterDefault": "false", - "iamRole": "arn:aws:iam::123456789101:role/default" - } - ] -} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java index 45925df1f6c4..5b59fa3544c2 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java @@ -14,18 +14,7 @@ package io.trino.plugin.iceberg.catalog.glue; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import io.trino.filesystem.Location; -import io.trino.filesystem.TrinoFileSystem; -import io.trino.filesystem.TrinoFileSystemFactory; -import io.trino.filesystem.hdfs.HdfsFileSystemFactory; -import io.trino.hdfs.DynamicHdfsConfiguration; -import io.trino.hdfs.HdfsConfig; -import io.trino.hdfs.HdfsConfiguration; -import io.trino.hdfs.HdfsConfigurationInitializer; -import io.trino.hdfs.HdfsEnvironment; -import io.trino.hdfs.TrinoHdfsFileSystemStats; -import io.trino.hdfs.authentication.NoHdfsAuthentication; import io.trino.plugin.iceberg.BaseIcebergConnectorSmokeTest; import io.trino.plugin.iceberg.IcebergQueryRunner; import io.trino.plugin.iceberg.SchemaInitializer; @@ -52,7 +41,6 @@ import static io.trino.plugin.hive.metastore.glue.GlueConverter.getTableTypeNullable; import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; import static io.trino.testing.SystemEnvironmentUtils.requireEnv; -import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.testing.TestingNames.randomNameSuffix; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -72,7 +60,6 @@ public class TestIcebergGlueCatalogConnectorSmokeTest private final String bucketName; private final String schemaName; private final GlueClient glueClient; - private final TrinoFileSystemFactory fileSystemFactory; public TestIcebergGlueCatalogConnectorSmokeTest() { @@ -80,10 +67,6 @@ public TestIcebergGlueCatalogConnectorSmokeTest() this.bucketName = requireEnv("S3_BUCKET"); this.schemaName = "test_iceberg_smoke_" + randomNameSuffix(); glueClient = GlueClient.create(); - - HdfsConfigurationInitializer initializer = new HdfsConfigurationInitializer(new HdfsConfig(), ImmutableSet.of()); - HdfsConfiguration hdfsConfiguration = new DynamicHdfsConfiguration(initializer, ImmutableSet.of()); - this.fileSystemFactory = new HdfsFileSystemFactory(new HdfsEnvironment(hdfsConfiguration, new HdfsConfig(), new NoHdfsAuthentication()), new TrinoHdfsFileSystemStats()); } @Override @@ -96,6 +79,7 @@ protected QueryRunner createQueryRunner() "iceberg.file-format", format.name(), "iceberg.catalog.type", "glue", "hive.metastore.glue.default-warehouse-dir", schemaPath(), + "fs.native-s3.enabled", "true", "iceberg.register-table-procedure.enabled", "true", "iceberg.writer-sort-buffer-size", "1MB")) .setSchemaInitializer( @@ -231,7 +215,6 @@ protected void deleteDirectory(String location) @Override protected boolean isFileSorted(Location path, String sortColumnName) { - TrinoFileSystem fileSystem = fileSystemFactory.create(SESSION); return checkParquetFileSorting(fileSystem.newInputFile(path), sortColumnName); }